[llvm] [llvm-profdata][llvm-cov]Fix double-counted coverage for same-named functions across binaries (PR #153679)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 14 14:21:55 PDT 2025


https://github.com/awearden updated https://github.com/llvm/llvm-project/pull/153679

>From 40e13df500f6df6e9b02a4f648cbaf3f6f59b910 Mon Sep 17 00:00:00 2001
From: Andres Wearden <andreswearden5 at gmail.com>
Date: Thu, 14 Aug 2025 13:15:38 -0700
Subject: [PATCH] [llvm-profdata][llvm-cov]Fix double-counted coverage for
 same-named functions across binaries

---
 .../ProfileData/Coverage/CoverageMapping.h    | 12 ++-
 llvm/include/llvm/ProfileData/InstrProf.h     | 19 +++-
 .../llvm/ProfileData/InstrProfWriter.h        |  6 +-
 .../ProfileData/Coverage/CoverageMapping.cpp  | 91 +++++++++++++++----
 llvm/lib/ProfileData/InstrProfWriter.cpp      | 14 ++-
 .../llvm-cov/Inputs/merge-same-func-bin1-2.c  |  5 +
 .../llvm-cov/Inputs/merge-same-func-bin1.c    |  8 ++
 .../llvm-cov/Inputs/merge-same-func-bin2.c    |  2 +
 .../tools/llvm-cov/merge-same-func-diff-bin.c | 38 ++++++++
 llvm/tools/llvm-cov/CodeCoverage.cpp          | 14 ++-
 llvm/tools/llvm-cov/CoverageViewOptions.h     |  1 +
 llvm/tools/llvm-profdata/llvm-profdata.cpp    | 69 +++++++++-----
 12 files changed, 220 insertions(+), 59 deletions(-)
 create mode 100644 llvm/test/tools/llvm-cov/Inputs/merge-same-func-bin1-2.c
 create mode 100644 llvm/test/tools/llvm-cov/Inputs/merge-same-func-bin1.c
 create mode 100644 llvm/test/tools/llvm-cov/Inputs/merge-same-func-bin2.c
 create mode 100644 llvm/test/tools/llvm-cov/merge-same-func-diff-bin.c

diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
index 7d1a85ba528fc..8b87b4337a75e 100644
--- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -993,6 +993,7 @@ class CoverageMapping {
   std::vector<FunctionRecord> Functions;
   DenseMap<size_t, SmallVector<unsigned, 0>> FilenameHash2RecordIndices;
   std::vector<std::pair<std::string, uint64_t>> FuncHashMismatches;
+  DenseMap<std::pair<size_t, hash_code>, unsigned> RecordIndices;
 
   std::optional<bool> SingleByteCoverage;
 
@@ -1003,7 +1004,8 @@ class CoverageMapping {
       ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders,
       std::optional<std::reference_wrapper<IndexedInstrProfReader>>
           &ProfileReader,
-      CoverageMapping &Coverage);
+      CoverageMapping &Coverage, StringRef ObjectFilename = "",
+      bool MergeBinaryCoverage = false);
 
   // Load coverage records from file.
   static Error
@@ -1011,13 +1013,15 @@ class CoverageMapping {
                std::optional<std::reference_wrapper<IndexedInstrProfReader>>
                    &ProfileReader,
                CoverageMapping &Coverage, bool &DataFound,
-               SmallVectorImpl<object::BuildID> *FoundBinaryIDs = nullptr);
+               SmallVectorImpl<object::BuildID> *FoundBinaryIDs = nullptr,
+               StringRef ObjectFilename = "", bool MergeBinaryCoverage = false);
 
   /// Add a function record corresponding to \p Record.
   Error loadFunctionRecord(
       const CoverageMappingRecord &Record,
       const std::optional<std::reference_wrapper<IndexedInstrProfReader>>
-          &ProfileReader);
+          &ProfileReader,
+      StringRef ObjectFilename = "", bool MergeBinaryCoverage = false);
 
   /// Look up the indices for function records which are at least partially
   /// defined in the specified file. This is guaranteed to return a superset of
@@ -1044,7 +1048,7 @@ class CoverageMapping {
        std::optional<StringRef> ProfileFilename, vfs::FileSystem &FS,
        ArrayRef<StringRef> Arches = {}, StringRef CompilationDir = "",
        const object::BuildIDFetcher *BIDFetcher = nullptr,
-       bool CheckBinaryIDs = false);
+       bool CheckBinaryIDs = false, bool MergeBinaryCoverage = false);
 
   /// The number of functions that couldn't have their profiles mapped.
   ///
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 85a9efe73855b..a41756e7d0dff 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -513,6 +513,7 @@ class InstrProfSymtab {
   LLVM_ABI static StringRef getCanonicalName(StringRef PGOName);
 
 private:
+  StringRef ObjectFilename;
   using AddrIntervalMap =
       IntervalMap<uint64_t, uint64_t, 4, IntervalMapHalfOpenInfo<uint64_t>>;
   StringRef Data;
@@ -640,10 +641,18 @@ class InstrProfSymtab {
 
     // Insert into NameTab so that MD5NameMap (a vector that will be sorted)
     // won't have duplicated entries in the first place.
+    uint64_t HashValue = IndexedInstrProf::ComputeHash(SymbolName);
+    std::string HashStr(std::to_string(HashValue));
+    // if ObjectFilename is not empty from the --object-aware-hashing flag, add
+    // ObjectFilename to hash context.
+    if (!ObjectFilename.empty()) {
+      std::string CombinedStr = HashStr + ":" + ObjectFilename.str();
+      StringRef HashRef = CombinedStr;
+      HashValue = IndexedInstrProf::ComputeHash(HashRef);
+    }
     auto Ins = NameTab.insert(SymbolName);
     if (Ins.second) {
-      MD5NameMap.push_back(std::make_pair(
-          IndexedInstrProf::ComputeHash(SymbolName), Ins.first->getKey()));
+      MD5NameMap.push_back(std::make_pair(HashValue, Ins.first->getKey()));
       Sorted = false;
     }
     return Error::success();
@@ -777,6 +786,12 @@ StringRef InstrProfSymtab::getFuncOrVarNameIfDefined(uint64_t MD5Hash) const {
 
 StringRef InstrProfSymtab::getFuncOrVarName(uint64_t MD5Hash) const {
   finalizeSymtab();
+  std::string TempMD5HashStr = std::to_string(MD5Hash);
+  if (!ObjectFilename.empty()) {
+    std::string CombinedHashStr = TempMD5HashStr + ":" + ObjectFilename.str();
+    llvm::StringRef CombinedHashRef(CombinedHashStr);
+    MD5Hash = IndexedInstrProf::ComputeHash(CombinedHashRef);
+  }
   auto Result = llvm::lower_bound(MD5NameMap, MD5Hash,
                                   [](const std::pair<uint64_t, StringRef> &LHS,
                                      uint64_t RHS) { return LHS.first < RHS; });
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 1b24425e68a9e..4375b280e50d3 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -113,7 +113,8 @@ class InstrProfWriter {
   /// for this function and the hash and number of counts match, each counter is
   /// summed. Optionally scale counts by \p Weight.
   LLVM_ABI void addRecord(NamedInstrProfRecord &&I, uint64_t Weight,
-                          function_ref<void(Error)> Warn);
+                          function_ref<void(Error)> Warn,
+                          StringRef ObjectFilename = "");
   void addRecord(NamedInstrProfRecord &&I, function_ref<void(Error)> Warn) {
     addRecord(std::move(I), 1, Warn);
   }
@@ -224,7 +225,8 @@ class InstrProfWriter {
 
 private:
   void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I,
-                 uint64_t Weight, function_ref<void(Error)> Warn);
+                 uint64_t Weight, function_ref<void(Error)> Warn,
+                 StringRef ObjectFilename = "");
   bool shouldEncodeData(const ProfilingData &PD);
 
   /// Add a memprof record for a function identified by its \p Id.
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index 429ec5c19f1f8..8476800e443e3 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -824,7 +824,8 @@ class MCDCDecisionRecorder {
 Error CoverageMapping::loadFunctionRecord(
     const CoverageMappingRecord &Record,
     const std::optional<std::reference_wrapper<IndexedInstrProfReader>>
-        &ProfileReader) {
+        &ProfileReader,
+    StringRef ObjectFilename, bool MergeBinaryCoverage) {
   StringRef OrigFuncName = Record.FunctionName;
   if (OrigFuncName.empty())
     return make_error<CoverageMapError>(coveragemap_error::malformed,
@@ -837,14 +838,21 @@ Error CoverageMapping::loadFunctionRecord(
 
   CounterMappingContext Ctx(Record.Expressions);
 
+  uint64_t FuncObjectHash = Record.FunctionHash;
+  if (!ObjectFilename.empty() && MergeBinaryCoverage) {
+    std::string HashStr =
+        std::to_string(Record.FunctionHash) + ":" + ObjectFilename.str();
+    llvm::StringRef HashRef(HashStr);
+    FuncObjectHash = IndexedInstrProf::ComputeHash(HashRef);
+  }
   std::vector<uint64_t> Counts;
   if (ProfileReader) {
     if (Error E = ProfileReader.value().get().getFunctionCounts(
-            Record.FunctionName, Record.FunctionHash, Counts)) {
+            Record.FunctionName, FuncObjectHash, Counts)) {
       instrprof_error IPE = std::get<0>(InstrProfError::take(std::move(E)));
       if (IPE == instrprof_error::hash_mismatch) {
         FuncHashMismatches.emplace_back(std::string(Record.FunctionName),
-                                        Record.FunctionHash);
+                                        FuncObjectHash);
         return Error::success();
       }
       if (IPE != instrprof_error::unknown_function)
@@ -863,11 +871,11 @@ Error CoverageMapping::loadFunctionRecord(
   BitVector Bitmap;
   if (ProfileReader) {
     if (Error E = ProfileReader.value().get().getFunctionBitmap(
-            Record.FunctionName, Record.FunctionHash, Bitmap)) {
+            Record.FunctionName, FuncObjectHash, Bitmap)) {
       instrprof_error IPE = std::get<0>(InstrProfError::take(std::move(E)));
       if (IPE == instrprof_error::hash_mismatch) {
         FuncHashMismatches.emplace_back(std::string(Record.FunctionName),
-                                        Record.FunctionHash);
+                                        FuncObjectHash);
         return Error::success();
       }
       if (IPE != instrprof_error::unknown_function)
@@ -942,11 +950,46 @@ Error CoverageMapping::loadFunctionRecord(
     Function.pushMCDCRecord(std::move(*Record));
   }
 
-  // Don't create records for (filenames, function) pairs we've already seen.
   auto FilenamesHash = hash_combine_range(Record.Filenames);
-  if (!RecordProvenance[FilenamesHash].insert(hash_value(OrigFuncName)).second)
-    return Error::success();
+  std::string HashStr = OrigFuncName.str();
+  auto LogicalFuncKey = std::make_pair(FilenamesHash, hash_value(OrigFuncName));
+  auto It = RecordIndices.find(LogicalFuncKey);
+  if (It != RecordIndices.end()) {
+    auto &ExistingFunction = Functions[It->second];
+    // Create a map of existing regions for lookup.
+    // The key uniquely identifies the source region.
+    using RegionKey =
+        std::tuple<unsigned, unsigned, unsigned, unsigned, unsigned>;
+    std::map<RegionKey, CountedRegion *> ExistingRegionsMap;
+    for (auto &ExistingRegion : ExistingFunction.CountedRegions) {
+      RegionKey Key = {ExistingRegion.FileID, ExistingRegion.LineStart,
+                       ExistingRegion.ColumnStart, ExistingRegion.LineEnd,
+                       ExistingRegion.ColumnEnd};
+      ExistingRegionsMap[Key] = &ExistingRegion;
+    }
+    // Merge the new regions into the existing function's regions.
+    for (const auto &NewRegion : Function.CountedRegions) {
+      RegionKey Key = {NewRegion.FileID, NewRegion.LineStart,
+                       NewRegion.ColumnStart, NewRegion.LineEnd,
+                       NewRegion.ColumnEnd};
+      auto MapIt = ExistingRegionsMap.find(Key);
+      if (MapIt != ExistingRegionsMap.end()) {
+        // Region already exists, merge counts by summing the counts.
+        CountedRegion *ExistingRegion = MapIt->second;
+        ExistingRegion->ExecutionCount += NewRegion.ExecutionCount;
+      } else {
+        ExistingFunction.CountedRegions.push_back(NewRegion);
+      }
+    }
 
+    return Error::success();
+  }
+  RecordIndices.insert({LogicalFuncKey, Functions.size()});
+  // Don't create records for (filenames, function) pairs we've already seen.
+  StringRef Hash(HashStr);
+  if (!RecordProvenance[FilenamesHash].insert(hash_value(Hash)).second) {
+    return Error::success();
+  }
   Functions.push_back(std::move(Function));
 
   // Performance optimization: keep track of the indices of the function records
@@ -971,7 +1014,8 @@ Error CoverageMapping::loadFromReaders(
     ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders,
     std::optional<std::reference_wrapper<IndexedInstrProfReader>>
         &ProfileReader,
-    CoverageMapping &Coverage) {
+    CoverageMapping &Coverage, StringRef ObjectFilename,
+    bool MergeBinaryCoverage) {
   assert(!Coverage.SingleByteCoverage || !ProfileReader ||
          *Coverage.SingleByteCoverage ==
              ProfileReader.value().get().hasSingleByteCoverage());
@@ -982,7 +1026,8 @@ Error CoverageMapping::loadFromReaders(
       if (Error E = RecordOrErr.takeError())
         return E;
       const auto &Record = *RecordOrErr;
-      if (Error E = Coverage.loadFunctionRecord(Record, ProfileReader))
+      if (Error E = Coverage.loadFunctionRecord(
+              Record, ProfileReader, ObjectFilename, MergeBinaryCoverage))
         return E;
     }
   }
@@ -1013,7 +1058,8 @@ Error CoverageMapping::loadFromFile(
     std::optional<std::reference_wrapper<IndexedInstrProfReader>>
         &ProfileReader,
     CoverageMapping &Coverage, bool &DataFound,
-    SmallVectorImpl<object::BuildID> *FoundBinaryIDs) {
+    SmallVectorImpl<object::BuildID> *FoundBinaryIDs, StringRef ObjectFilename,
+    bool MergeBinaryCoverage) {
   auto CovMappingBufOrErr = MemoryBuffer::getFileOrSTDIN(
       Filename, /*IsText=*/false, /*RequiresNullTerminator=*/false);
   if (std::error_code EC = CovMappingBufOrErr.getError())
@@ -1043,16 +1089,19 @@ Error CoverageMapping::loadFromFile(
                        }));
   }
   DataFound |= !Readers.empty();
-  if (Error E = loadFromReaders(Readers, ProfileReader, Coverage))
+  if (Error E = loadFromReaders(Readers, ProfileReader, Coverage,
+                                ObjectFilename, MergeBinaryCoverage))
     return createFileError(Filename, std::move(E));
   return Error::success();
 }
 
-Expected<std::unique_ptr<CoverageMapping>> CoverageMapping::load(
-    ArrayRef<StringRef> ObjectFilenames,
-    std::optional<StringRef> ProfileFilename, vfs::FileSystem &FS,
-    ArrayRef<StringRef> Arches, StringRef CompilationDir,
-    const object::BuildIDFetcher *BIDFetcher, bool CheckBinaryIDs) {
+Expected<std::unique_ptr<CoverageMapping>>
+CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames,
+                      std::optional<StringRef> ProfileFilename,
+                      vfs::FileSystem &FS, ArrayRef<StringRef> Arches,
+                      StringRef CompilationDir,
+                      const object::BuildIDFetcher *BIDFetcher,
+                      bool CheckBinaryIDs, bool MergeBinaryCoverage) {
   std::unique_ptr<IndexedInstrProfReader> ProfileReader;
   if (ProfileFilename) {
     auto ProfileReaderOrErr =
@@ -1079,9 +1128,11 @@ Expected<std::unique_ptr<CoverageMapping>> CoverageMapping::load(
 
   SmallVector<object::BuildID> FoundBinaryIDs;
   for (const auto &File : llvm::enumerate(ObjectFilenames)) {
-    if (Error E = loadFromFile(File.value(), GetArch(File.index()),
-                               CompilationDir, ProfileReaderRef, *Coverage,
-                               DataFound, &FoundBinaryIDs))
+    if (Error E = loadFromFile(
+            File.value(), GetArch(File.index()), CompilationDir,
+            ProfileReaderRef, *Coverage, DataFound, &FoundBinaryIDs,
+            MergeBinaryCoverage ? ObjectFilenames[File.index()] : "",
+            MergeBinaryCoverage))
       return std::move(E);
   }
 
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index df807fc02b910..c4661652cb133 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -154,10 +154,11 @@ void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) {
 void InstrProfWriter::setOutputSparse(bool Sparse) { this->Sparse = Sparse; }
 
 void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight,
-                                function_ref<void(Error)> Warn) {
+                                function_ref<void(Error)> Warn,
+                                StringRef ObjectFilename) {
   auto Name = I.Name;
   auto Hash = I.Hash;
-  addRecord(Name, Hash, std::move(I), Weight, Warn);
+  addRecord(Name, Hash, std::move(I), Weight, Warn, ObjectFilename);
 }
 
 void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other,
@@ -193,9 +194,16 @@ void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other,
 
 void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
                                 InstrProfRecord &&I, uint64_t Weight,
-                                function_ref<void(Error)> Warn) {
+                                function_ref<void(Error)> Warn,
+                                StringRef ObjectFilename) {
   auto &ProfileDataMap = FunctionData[Name];
 
+  // Add object file name to hash value if --object-aware-hashing flag is used.
+  if (!ObjectFilename.empty()) {
+    std::string HashStr = std::to_string(Hash) + ":" + ObjectFilename.str();
+    llvm::StringRef HashRef(HashStr);
+    Hash = IndexedInstrProf::ComputeHash(HashRef);
+  }
   auto [Where, NewFunc] = ProfileDataMap.try_emplace(Hash);
   InstrProfRecord &Dest = Where->second;
 
diff --git a/llvm/test/tools/llvm-cov/Inputs/merge-same-func-bin1-2.c b/llvm/test/tools/llvm-cov/Inputs/merge-same-func-bin1-2.c
new file mode 100644
index 0000000000000..428cbfd163a72
--- /dev/null
+++ b/llvm/test/tools/llvm-cov/Inputs/merge-same-func-bin1-2.c
@@ -0,0 +1,5 @@
+int foo() { return 0; }
+
+int bar() { return 0; }
+
+int bun() { return 0; }
diff --git a/llvm/test/tools/llvm-cov/Inputs/merge-same-func-bin1.c b/llvm/test/tools/llvm-cov/Inputs/merge-same-func-bin1.c
new file mode 100644
index 0000000000000..863b2baa017f6
--- /dev/null
+++ b/llvm/test/tools/llvm-cov/Inputs/merge-same-func-bin1.c
@@ -0,0 +1,8 @@
+extern int foo();
+extern int bar();
+extern int bun();
+
+
+int main() {
+  return foo() + bar() + bun();
+}
diff --git a/llvm/test/tools/llvm-cov/Inputs/merge-same-func-bin2.c b/llvm/test/tools/llvm-cov/Inputs/merge-same-func-bin2.c
new file mode 100644
index 0000000000000..2fcea0fce9546
--- /dev/null
+++ b/llvm/test/tools/llvm-cov/Inputs/merge-same-func-bin2.c
@@ -0,0 +1,2 @@
+int baz() { return 0; }
+int main() { return 1; }
diff --git a/llvm/test/tools/llvm-cov/merge-same-func-diff-bin.c b/llvm/test/tools/llvm-cov/merge-same-func-diff-bin.c
new file mode 100644
index 0000000000000..f09192daa175c
--- /dev/null
+++ b/llvm/test/tools/llvm-cov/merge-same-func-diff-bin.c
@@ -0,0 +1,38 @@
+// RUN: clang -O0 -fprofile-instr-generate -fcoverage-mapping \
+// RUN:        %S/Inputs/merge-same-func-bin1.c %S/Inputs/merge-same-func-bin1-2.c -o %t.bin1
+// RUN: %t.bin1
+// RUN: cp default.profraw %t.bin1.profraw 
+//
+
+// RUN: clang -O0 -fprofile-instr-generate -fcoverage-mapping \
+// RUN:        %S/Inputs/merge-same-func-bin2.c %S/Inputs/merge-same-func-bin1-2.c -o %t.bin2
+// RUN: %t.bin2 || true
+// RUN: cp default.profraw %t.bin2.profraw  
+//---------------- merge the raw profiles ------------------------------------//
+// RUN: llvm-profdata merge --object-aware-hashing=%t.bin2 %t.bin2.profraw \
+// RUN:                     --object-aware-hashing=%t.bin1 %t.bin1.profraw \
+// RUN:                     -o %t.profdata
+//
+
+// RUN: llvm-cov show -instr-profile=%t.profdata --object=%t.bin2 --object=%t.bin1 --merge-binary-coverage | FileCheck %s
+//
+// CHECK-LABEL: {{.*merge-same-func-bin1-2\.c}}:
+// CHECK:     1|      1|int foo() { return 0; }
+// CHECK:     2|       |
+// CHECK:     3|      1|int bar() { return 0; }
+// CHECK:     4|       |
+// CHECK:     5|      1|int bun() { return 0; }
+//
+// CHECK-LABEL: {{.*merge-same-func-bin1\.c}}:
+// CHECK:     1|       |extern int foo();
+// CHECK:     2|       |extern int bar();
+// CHECK:     3|       |extern int bun();
+// CHECK:     4|       |
+// CHECK:     5|       |
+// CHECK:     6|      1|int main()
+// CHECK:     7|      1|  return foo() + bar() + bun();
+// CHECK:     8|      1|}
+//
+// CHECK-LABEL: {{.*merge-same-func-bin2\.c}}:
+// CHECK:     1|      0|int baz() { return 0; }
+// CHECK:     2|      1|int main() { return 1; }
diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp
index 6c66858c4de8c..f827a6c3620b8 100644
--- a/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -462,9 +462,10 @@ std::unique_ptr<CoverageMapping> CodeCoverageTool::load() {
                 ObjectFilename);
   }
   auto FS = vfs::getRealFileSystem();
-  auto CoverageOrErr = CoverageMapping::load(
-      ObjectFilenames, PGOFilename, *FS, CoverageArches,
-      ViewOpts.CompilationDirectory, BIDFetcher.get(), CheckBinaryIDs);
+  auto CoverageOrErr =
+      CoverageMapping::load(ObjectFilenames, PGOFilename, *FS, CoverageArches,
+                            ViewOpts.CompilationDirectory, BIDFetcher.get(),
+                            CheckBinaryIDs, ViewOpts.MergeBinaryCoverage);
   if (Error E = CoverageOrErr.takeError()) {
     error("failed to load coverage: " + toString(std::move(E)));
     return nullptr;
@@ -801,6 +802,12 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
       "check-binary-ids", cl::desc("Fail if an object couldn't be found for a "
                                    "binary ID in the profile"));
 
+  cl::opt<bool> MergeBinaryCoverage(
+      "merge-binary-coverage",
+      cl::desc("Enable merging of coverage profiles from binaries compiled for "
+               "different architectures"),
+      cl::init(false));
+
   auto commandLineParser = [&, this](int argc, const char **argv) -> int {
     cl::ParseCommandLineOptions(argc, argv, "LLVM code coverage tool\n");
     ViewOpts.Debug = DebugDump;
@@ -966,6 +973,7 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
     ViewOpts.ExportSummaryOnly = SummaryOnly;
     ViewOpts.NumThreads = NumThreads;
     ViewOpts.CompilationDirectory = CompilationDirectory;
+    ViewOpts.MergeBinaryCoverage = MergeBinaryCoverage;
 
     return 0;
   };
diff --git a/llvm/tools/llvm-cov/CoverageViewOptions.h b/llvm/tools/llvm-cov/CoverageViewOptions.h
index 1f6ad570f86f2..22bec8df26409 100644
--- a/llvm/tools/llvm-cov/CoverageViewOptions.h
+++ b/llvm/tools/llvm-cov/CoverageViewOptions.h
@@ -47,6 +47,7 @@ struct CoverageViewOptions {
   bool SkipFunctions;
   bool SkipBranches;
   bool BinaryCounters;
+  bool MergeBinaryCoverage;
   OutputFormat Format;
   BranchOutputType ShowBranches;
   std::string ShowOutputDirectory;
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 207ae2ddd4cf2..3854f3ba08f77 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -196,6 +196,12 @@ static cl::opt<std::string> RemappingFile("remapping-file",
                                           cl::desc("Symbol remapping file"));
 static cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
                                 cl::aliasopt(RemappingFile));
+static cl::list<std::string> ObjectAwareHashing(
+    "object-aware-hashing",
+    cl::desc("Includes the object file name when hashing function names "
+             "and control flow hashes, allowing functions from different "
+             "binaries to be distinguished"),
+    cl::sub(MergeSubcommand));
 static cl::opt<bool>
     UseMD5("use-md5", cl::init(false), cl::Hidden,
            cl::desc("Choose to use MD5 to represent string in name table (only "
@@ -694,19 +700,27 @@ static void
 loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
           const InstrProfCorrelator *Correlator, const StringRef ProfiledBinary,
           WriterContext *WC, const object::BuildIDFetcher *BIDFetcher = nullptr,
-          const ProfCorrelatorKind *BIDFetcherCorrelatorKind = nullptr) {
+          const ProfCorrelatorKind *BIDFetcherCorrelatorKind = nullptr,
+          StringRef ObjectAwareHashing = "") {
   std::unique_lock<std::mutex> CtxGuard{WC->Lock};
 
   // Copy the filename, because llvm::ThreadPool copied the input "const
   // WeightedFile &" by value, making a reference to the filename within it
   // invalid outside of this packaged task.
   std::string Filename = Input.Filename;
+  std::string ProfileFile = Input.Filename;
+  StringRef ObjectFilename = "";
+
+  StringRef FilenameRef = Filename;
+  if (!ObjectAwareHashing.empty()) {
+    ObjectFilename = ObjectAwareHashing.data();
+  }
 
   using ::llvm::memprof::RawMemProfReader;
-  if (RawMemProfReader::hasFormat(Input.Filename)) {
-    auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary);
+  if (RawMemProfReader::hasFormat(ProfileFile)) {
+    auto ReaderOrErr = RawMemProfReader::create(ProfileFile, ProfiledBinary);
     if (!ReaderOrErr) {
-      exitWithError(ReaderOrErr.takeError(), Input.Filename);
+      exitWithError(ReaderOrErr.takeError(), ProfileFile);
     }
     std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
     // Check if the profile types can be merged, e.g. clang frontend profiles
@@ -790,7 +804,7 @@ loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
   const ProfCorrelatorKind CorrelatorKind = BIDFetcherCorrelatorKind
                                                 ? *BIDFetcherCorrelatorKind
                                                 : ProfCorrelatorKind::NONE;
-  auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator,
+  auto ReaderOrErr = InstrProfReader::create(ProfileFile, *FS, Correlator,
                                              BIDFetcher, CorrelatorKind, Warn);
   if (Error E = ReaderOrErr.takeError()) {
     // Skip the empty profiles by returning silently.
@@ -817,19 +831,22 @@ loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
       I.Name = (*Remapper)(I.Name);
     const StringRef FuncName = I.Name;
     bool Reported = false;
-    WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) {
-      if (Reported) {
-        consumeError(std::move(E));
-        return;
-      }
-      Reported = true;
-      // Only show hint the first time an error occurs.
-      auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
-      std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
-      bool firstTime = WC->WriterErrorCodes.insert(ErrCode).second;
-      handleMergeWriterError(make_error<InstrProfError>(ErrCode, Msg),
-                             Input.Filename, FuncName, firstTime);
-    });
+    WC->Writer.addRecord(
+        std::move(I), Input.Weight,
+        [&](Error E) {
+          if (Reported) {
+            consumeError(std::move(E));
+            return;
+          }
+          Reported = true;
+          // Only show hint the first time an error occurs.
+          auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
+          std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
+          bool firstTime = WC->WriterErrorCodes.insert(ErrCode).second;
+          handleMergeWriterError(make_error<InstrProfError>(ErrCode, Msg),
+                                 Input.Filename, FuncName, firstTime);
+        },
+        ObjectFilename);
   }
 
   if (KeepVTableSymbols) {
@@ -1038,18 +1055,20 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
         MaxTraceLength));
 
   if (NumThreads == 1) {
-    for (const auto &Input : Inputs)
-      loadInput(Input, Remapper, Correlator.get(), ProfiledBinary,
-                Contexts[0].get(), BIDFetcher.get(), &BIDFetcherCorrelateKind);
+    for (int I = 0; I < int(Inputs.size()); ++I)
+      loadInput(Inputs[I], Remapper, Correlator.get(), ProfiledBinary,
+                Contexts[0].get(), BIDFetcher.get(), &BIDFetcherCorrelateKind,
+                !ObjectAwareHashing.empty() ? ObjectAwareHashing[I] : "");
   } else {
     DefaultThreadPool Pool(hardware_concurrency(NumThreads));
 
     // Load the inputs in parallel (N/NumThreads serial steps).
     unsigned Ctx = 0;
-    for (const auto &Input : Inputs) {
-      Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary,
-                 Contexts[Ctx].get(), BIDFetcher.get(),
-                 &BIDFetcherCorrelateKind);
+    for (int I = 0; I < int(Inputs.size()); ++I) {
+      Pool.async(loadInput, Inputs[I], Remapper, Correlator.get(),
+                 ProfiledBinary, Contexts[Ctx].get(), BIDFetcher.get(),
+                 &BIDFetcherCorrelateKind,
+                 !ObjectAwareHashing.empty() ? ObjectAwareHashing[I] : "");
       Ctx = (Ctx + 1) % NumThreads;
     }
     Pool.wait();



More information about the llvm-commits mailing list