[clang] [llvm] [sancov] Add -diff and -union options to compute set difference and union of sancov files (PR #171191)

Manuel Carrasco via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 11 06:18:51 PST 2025


https://github.com/mgcarrasco updated https://github.com/llvm/llvm-project/pull/171191

>From ae1650f76560a5dafcfe1d2344ca31e0c707e280 Mon Sep 17 00:00:00 2001
From: Manuel Carrasco <Manuel.Carrasco at amd.com>
Date: Mon, 8 Dec 2025 13:30:06 -0600
Subject: [PATCH 1/4] [sancov] Add -diff option to compute set difference of
 sancov files

Add a new -diff action that computes the difference between two sancov
coverage files (A - B) and writes the result to a new .sancov file.

The option takes exactly two input .sancov files and requires an
--output option to specify the output file. The output file preserves
the binary format (magic number and bitness) from the first input file.

A warning is emitted if the two input files have different bitness
(32-bit vs 64-bit), though the operation proceeds using the bitness
from file A.
---
 .../tools/sancov/diff-different-files.test    |   7 ++
 llvm/test/tools/sancov/diff-same-file.test    |   6 ++
 llvm/tools/sancov/Opts.td                     |   6 ++
 llvm/tools/sancov/sancov.cpp                  | 101 ++++++++++++++++++
 4 files changed, 120 insertions(+)
 create mode 100644 llvm/test/tools/sancov/diff-different-files.test
 create mode 100644 llvm/test/tools/sancov/diff-same-file.test

diff --git a/llvm/test/tools/sancov/diff-different-files.test b/llvm/test/tools/sancov/diff-different-files.test
new file mode 100644
index 0000000000000..db46593099e74
--- /dev/null
+++ b/llvm/test/tools/sancov/diff-different-files.test
@@ -0,0 +1,7 @@
+REQUIRES: x86-registered-target && host-byteorder-little-endian
+RUN: rm -f %t.out.sancov
+RUN: sancov -diff --output=%t.out.sancov %p/Inputs/test-linux_x86_64.1.sancov %p/Inputs/test-linux_x86_64.0.sancov
+RUN: sancov -print %t.out.sancov | FileCheck %s 
+
+CHECK: 0x4e14c2
+CHECK: 0x4e178c
diff --git a/llvm/test/tools/sancov/diff-same-file.test b/llvm/test/tools/sancov/diff-same-file.test
new file mode 100644
index 0000000000000..fd3b5de79d9a6
--- /dev/null
+++ b/llvm/test/tools/sancov/diff-same-file.test
@@ -0,0 +1,6 @@
+REQUIRES: x86-registered-target && host-byteorder-little-endian
+RUN: rm -f %t.out.sancov
+RUN: sancov -diff --output=%t.out.sancov %p/Inputs/test-linux_x86_64.0.sancov %p/Inputs/test-linux_x86_64.0.sancov
+RUN: sancov -print %t.out.sancov | FileCheck %s --allow-empty --check-prefix=EMPTY
+
+EMPTY-NOT: {{.}} 
diff --git a/llvm/tools/sancov/Opts.td b/llvm/tools/sancov/Opts.td
index 2e8af81b2a40d..01de87d596327 100644
--- a/llvm/tools/sancov/Opts.td
+++ b/llvm/tools/sancov/Opts.td
@@ -22,6 +22,8 @@ def : Flag<["-"], "v">, Alias<version>, HelpText<"Alias for --version">, Group<g
 def action_grp : OptionGroup<"Action">, HelpText<"Action (required)">;
 def print : F<"print", "Print coverage addresses">,
   Group<action_grp>;
+def diff : F<"diff", "Compute difference between two sancov files (A - B) and write to the new output sancov file">,
+  Group<action_grp>;
 def printCoveragePcs : F<"print-coverage-pcs", "Print coverage instrumentation points addresses.">,
   Group<action_grp>;
 def coveredFunctions : F<"covered-functions", "Print all covered funcions.">,
@@ -56,3 +58,7 @@ defm stripPathPrefix
 defm ignorelist
     : Eq<"ignorelist", "Ignorelist file (sanitizer ignorelist format)">,
       MetaVarName<"<string>">;
+
+defm output
+    : Eq<"output", "Output file for diff action">,
+      MetaVarName<"<string>">;
diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp
index a0585fad024c7..78fa3ebedd817 100644
--- a/llvm/tools/sancov/sancov.cpp
+++ b/llvm/tools/sancov/sancov.cpp
@@ -92,6 +92,7 @@ class SancovOptTable : public opt::GenericOptTable {
 
 enum ActionType {
   CoveredFunctionsAction,
+  DiffAction,
   HtmlReportAction,
   MergeAction,
   NotCoveredFunctionsAction,
@@ -108,6 +109,7 @@ static bool ClSkipDeadFiles;
 static bool ClUseDefaultIgnorelist;
 static std::string ClStripPathPrefix;
 static std::string ClIgnorelist;
+static std::string ClOutputFile;
 
 static const char *const DefaultIgnorelistStr = "fun:__sanitizer_.*\n"
                                                 "src:/usr/include/.*\n"
@@ -139,6 +141,10 @@ struct RawCoverage {
   static ErrorOr<std::unique_ptr<RawCoverage>>
   read(const std::string &FileName);
 
+  // Write binary .sancov file.
+  static void write(const std::string &FileName, const RawCoverage &Coverage,
+                    FileHeader Header);
+
   std::unique_ptr<std::set<uint64_t>> Addrs;
 };
 
@@ -277,6 +283,33 @@ raw_ostream &operator<<(raw_ostream &OS, const RawCoverage &CoverageData) {
   return OS;
 }
 
+// Write coverage addresses in binary format.
+void RawCoverage::write(const std::string &FileName,
+                        const RawCoverage &Coverage, FileHeader Header) {
+  std::error_code EC;
+  raw_fd_ostream OS(FileName, EC, sys::fs::OF_None);
+  failIfError(EC);
+
+  OS.write(reinterpret_cast<const char *>(&Header), sizeof(Header));
+
+  switch (Header.Bitness) {
+  case Bitness64:
+    for (auto Addr : *Coverage.Addrs) {
+      uint64_t Addr64 = Addr;
+      OS.write(reinterpret_cast<const char *>(&Addr64), sizeof(Addr64));
+    }
+    break;
+  case Bitness32:
+    for (auto Addr : *Coverage.Addrs) {
+      uint32_t Addr32 = static_cast<uint32_t>(Addr);
+      OS.write(reinterpret_cast<const char *>(&Addr32), sizeof(Addr32));
+    }
+    break;
+  default:
+    fail("Unsupported bitness: " + std::to_string(Header.Bitness));
+  }
+}
+
 static raw_ostream &operator<<(raw_ostream &OS, const CoverageStats &Stats) {
   OS << "all-edges: " << Stats.AllPoints << "\n";
   OS << "cov-edges: " << Stats.CovPoints << "\n";
@@ -1015,6 +1048,59 @@ static void readAndPrintRawCoverage(const std::vector<std::string> &FileNames,
   }
 }
 
+static const char *bitnessToString(uint32_t Bitness) {
+  switch (Bitness) {
+  case Bitness64:
+    return "64-bit";
+  case Bitness32:
+    return "32-bit";
+  default:
+    fail("Unsupported bitness: " + std::to_string(Bitness));
+    return nullptr;
+  }
+}
+
+// Compute difference between two coverage files (A - B) and write to output
+// file.
+static void diffRawCoverage(const std::string &FileA, const std::string &FileB,
+                            const std::string &OutputFile) {
+  auto CovA = RawCoverage::read(FileA);
+  failIfError(CovA);
+
+  auto CovB = RawCoverage::read(FileB);
+  failIfError(CovB);
+
+  // Determine bitness from both files
+  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErrA =
+      MemoryBuffer::getFile(FileA);
+  failIfError(BufOrErrA);
+  const FileHeader *HeaderA =
+      reinterpret_cast<const FileHeader *>(BufOrErrA.get()->getBufferStart());
+
+  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErrB =
+      MemoryBuffer::getFile(FileB);
+  failIfError(BufOrErrB);
+  const FileHeader *HeaderB =
+      reinterpret_cast<const FileHeader *>(BufOrErrB.get()->getBufferStart());
+
+  // Warn if bitness differs
+  if (HeaderA->Bitness != HeaderB->Bitness) {
+    errs() << "WARNING: Input files have different bitness (File A: "
+           << bitnessToString(HeaderA->Bitness)
+           << ", File B: " << bitnessToString(HeaderB->Bitness)
+           << "). Using bitness from File A.\n";
+  }
+
+  // Compute A - B
+  auto DiffAddrs = std::make_unique<std::set<uint64_t>>();
+  std::set_difference(CovA.get()->Addrs->begin(), CovA.get()->Addrs->end(),
+                      CovB.get()->Addrs->begin(), CovB.get()->Addrs->end(),
+                      std::inserter(*DiffAddrs, DiffAddrs->end()));
+
+  RawCoverage DiffCov(std::move(DiffAddrs));
+  RawCoverage::write(OutputFile, DiffCov, *HeaderA);
+}
+
 static std::unique_ptr<SymbolizedCoverage>
 merge(const std::vector<std::unique_ptr<SymbolizedCoverage>> &Coverages) {
   if (Coverages.empty())
@@ -1176,6 +1262,9 @@ static void parseArgs(int Argc, char **Argv) {
     case OPT_print:
       Action = ActionType::PrintAction;
       break;
+    case OPT_diff:
+      Action = ActionType::DiffAction;
+      break;
     case OPT_printCoveragePcs:
       Action = ActionType::PrintCovPointsAction;
       break;
@@ -1209,6 +1298,7 @@ static void parseArgs(int Argc, char **Argv) {
 
   ClStripPathPrefix = Args.getLastArgValue(OPT_stripPathPrefix_EQ);
   ClIgnorelist = Args.getLastArgValue(OPT_ignorelist_EQ);
+  ClOutputFile = Args.getLastArgValue(OPT_output_EQ);
 }
 
 int sancov_main(int Argc, char **Argv, const llvm::ToolContext &) {
@@ -1223,6 +1313,16 @@ int sancov_main(int Argc, char **Argv, const llvm::ToolContext &) {
     readAndPrintRawCoverage(ClInputFiles, outs());
     return 0;
   }
+  if (Action == DiffAction) {
+    // -diff requires exactly 2 input files and an output file.
+    failIf(ClInputFiles.size() != 2,
+           "diff action requires exactly 2 input sancov files");
+    failIf(
+        ClOutputFile.empty(),
+        "diff action requires --output option to specify output sancov file");
+    diffRawCoverage(ClInputFiles[0], ClInputFiles[1], ClOutputFile);
+    return 0;
+  }
   if (Action == PrintCovPointsAction) {
     // -print-coverage-points doesn't need coverage files.
     for (const std::string &ObjFile : ClInputFiles) {
@@ -1257,6 +1357,7 @@ int sancov_main(int Argc, char **Argv, const llvm::ToolContext &) {
     errs() << "-html-report option is removed: "
               "use -symbolize & coverage-report-server.py instead\n";
     return 1;
+  case DiffAction:
   case PrintAction:
   case PrintCovPointsAction:
     llvm_unreachable("unsupported action");

>From 829db2af5acf4b83aba50d76102bdc3019fe0b85 Mon Sep 17 00:00:00 2001
From: Manuel Carrasco <Manuel.Carrasco at amd.com>
Date: Tue, 9 Dec 2025 05:21:40 -0600
Subject: [PATCH 2/4] [sancov] Add -union option to merge multiple sancov files

Add a new -union action that computes the union of multiple sancov
coverage files and writes the result to a new .sancov file.

The option takes one or more input .sancov files and requires an
--output option to specify the output file. The output file preserves
the binary format (magic number and bitness) from the first input file.

A warning is emitted if any input file has different bitness (32-bit
vs 64-bit) than the first file, though the operation proceeds using
the bitness from the first file.
---
 .../tools/sancov/union-different-files.test   | 11 ++++
 llvm/test/tools/sancov/union-same-file.test   | 10 +++
 llvm/tools/sancov/Opts.td                     |  4 +-
 llvm/tools/sancov/sancov.cpp                  | 62 ++++++++++++++++++-
 4 files changed, 85 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/tools/sancov/union-different-files.test
 create mode 100644 llvm/test/tools/sancov/union-same-file.test

diff --git a/llvm/test/tools/sancov/union-different-files.test b/llvm/test/tools/sancov/union-different-files.test
new file mode 100644
index 0000000000000..47dafc97ad3c2
--- /dev/null
+++ b/llvm/test/tools/sancov/union-different-files.test
@@ -0,0 +1,11 @@
+REQUIRES: x86-registered-target && host-byteorder-little-endian
+RUN: rm -f %t.out.sancov
+RUN: sancov -union --output=%t.out.sancov %p/Inputs/test-linux_x86_64.1.sancov %p/Inputs/test-linux_x86_64.0.sancov
+RUN: sancov -print %t.out.sancov | FileCheck %s 
+
+CHECK: 0x4e1472
+CHECK: 0x4e14c2
+CHECK: 0x4e1520
+CHECK: 0x4e1553
+CHECK: 0x4e1586
+CHECK: 0x4e178c
diff --git a/llvm/test/tools/sancov/union-same-file.test b/llvm/test/tools/sancov/union-same-file.test
new file mode 100644
index 0000000000000..e7991d24fb7ac
--- /dev/null
+++ b/llvm/test/tools/sancov/union-same-file.test
@@ -0,0 +1,10 @@
+REQUIRES: x86-registered-target && host-byteorder-little-endian
+RUN: rm -f %t.out.sancov
+RUN: sancov -union --output=%t.out.sancov %p/Inputs/test-linux_x86_64.0.sancov %p/Inputs/test-linux_x86_64.0.sancov
+RUN: sancov -print %t.out.sancov | FileCheck %s
+
+CHECK: 0x4e132b
+CHECK: 0x4e1472
+CHECK: 0x4e1520
+CHECK: 0x4e1553
+CHECK: 0x4e1586
diff --git a/llvm/tools/sancov/Opts.td b/llvm/tools/sancov/Opts.td
index 01de87d596327..411d08a033050 100644
--- a/llvm/tools/sancov/Opts.td
+++ b/llvm/tools/sancov/Opts.td
@@ -24,6 +24,8 @@ def print : F<"print", "Print coverage addresses">,
   Group<action_grp>;
 def diff : F<"diff", "Compute difference between two sancov files (A - B) and write to the new output sancov file">,
   Group<action_grp>;
+def union_files : F<"union", "Compute union of multiple sancov files and write to the new output sancov file">,
+  Group<action_grp>;
 def printCoveragePcs : F<"print-coverage-pcs", "Print coverage instrumentation points addresses.">,
   Group<action_grp>;
 def coveredFunctions : F<"covered-functions", "Print all covered funcions.">,
@@ -60,5 +62,5 @@ defm ignorelist
       MetaVarName<"<string>">;
 
 defm output
-    : Eq<"output", "Output file for diff action">,
+    : Eq<"output", "Output file for diff and union actions">,
       MetaVarName<"<string>">;
diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp
index 78fa3ebedd817..bb5507bbbf02d 100644
--- a/llvm/tools/sancov/sancov.cpp
+++ b/llvm/tools/sancov/sancov.cpp
@@ -99,7 +99,8 @@ enum ActionType {
   PrintAction,
   PrintCovPointsAction,
   StatsAction,
-  SymbolizeAction
+  SymbolizeAction,
+  UnionAction
 };
 
 static ActionType Action;
@@ -1101,6 +1102,48 @@ static void diffRawCoverage(const std::string &FileA, const std::string &FileB,
   RawCoverage::write(OutputFile, DiffCov, *HeaderA);
 }
 
+// Compute union of multiple coverage files and write to output file.
+static void unionRawCoverage(const std::vector<std::string> &InputFiles,
+                             const std::string &OutputFile) {
+  failIf(InputFiles.empty(), "union action requires at least one input file");
+
+  // Read the first file to get the header and initial coverage
+  auto UnionCov = RawCoverage::read(InputFiles[0]);
+  failIfError(UnionCov);
+
+  // Get header from first file
+  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+      MemoryBuffer::getFile(InputFiles[0]);
+  failIfError(BufOrErr);
+  const FileHeader *FirstHeader =
+      reinterpret_cast<const FileHeader *>(BufOrErr.get()->getBufferStart());
+  FileHeader Header = *FirstHeader;
+
+  for (size_t I = 1; I < InputFiles.size(); ++I) {
+    auto Cov = RawCoverage::read(InputFiles[I]);
+    failIfError(Cov);
+
+    // Check bitness of current file
+    ErrorOr<std::unique_ptr<MemoryBuffer>> CurBufOrErr =
+        MemoryBuffer::getFile(InputFiles[I]);
+    failIfError(CurBufOrErr);
+    const FileHeader *CurHeader = reinterpret_cast<const FileHeader *>(
+        CurBufOrErr.get()->getBufferStart());
+
+    if (CurHeader->Bitness != Header.Bitness) {
+      errs() << "WARNING: Input file has different bitness (File "
+             << InputFiles[I] << ": " << bitnessToString(CurHeader->Bitness)
+             << ", First file: " << bitnessToString(Header.Bitness)
+             << "). Using bitness from first file.\n";
+    }
+
+    UnionCov.get()->Addrs->insert(Cov.get()->Addrs->begin(),
+                                  Cov.get()->Addrs->end());
+  }
+
+  RawCoverage::write(OutputFile, *UnionCov.get(), Header);
+}
+
 static std::unique_ptr<SymbolizedCoverage>
 merge(const std::vector<std::unique_ptr<SymbolizedCoverage>> &Coverages) {
   if (Coverages.empty())
@@ -1239,6 +1282,9 @@ static void parseArgs(int Argc, char **Argv) {
         "  Depending on chosen action the tool expects different input files:\n"
         "    -print-coverage-pcs     - coverage-instrumented binary files\n"
         "    -print-coverage         - .sancov files\n"
+        "    -diff                   - two .sancov files & --output option\n"
+        "    -union                  - one or more .sancov files & --output "
+        "option\n"
         "    <other actions>         - .sancov files & corresponding binary "
         "files, .symcov files\n");
     std::exit(0);
@@ -1265,6 +1311,9 @@ static void parseArgs(int Argc, char **Argv) {
     case OPT_diff:
       Action = ActionType::DiffAction;
       break;
+    case OPT_union_files:
+      Action = ActionType::UnionAction;
+      break;
     case OPT_printCoveragePcs:
       Action = ActionType::PrintCovPointsAction;
       break;
@@ -1323,6 +1372,16 @@ int sancov_main(int Argc, char **Argv, const llvm::ToolContext &) {
     diffRawCoverage(ClInputFiles[0], ClInputFiles[1], ClOutputFile);
     return 0;
   }
+  if (Action == UnionAction) {
+    // -union requires at least 1 input file and an output file.
+    failIf(ClInputFiles.empty(),
+           "union action requires at least one input sancov file");
+    failIf(
+        ClOutputFile.empty(),
+        "union action requires --output option to specify output sancov file");
+    unionRawCoverage(ClInputFiles, ClOutputFile);
+    return 0;
+  }
   if (Action == PrintCovPointsAction) {
     // -print-coverage-points doesn't need coverage files.
     for (const std::string &ObjFile : ClInputFiles) {
@@ -1358,6 +1417,7 @@ int sancov_main(int Argc, char **Argv, const llvm::ToolContext &) {
               "use -symbolize & coverage-report-server.py instead\n";
     return 1;
   case DiffAction:
+  case UnionAction:
   case PrintAction:
   case PrintCovPointsAction:
     llvm_unreachable("unsupported action");

>From 9210908ccddc7954f01d241856632ca7e4d934b0 Mon Sep 17 00:00:00 2001
From: Manuel Carrasco <Manuel.Carrasco at amd.com>
Date: Mon, 8 Dec 2025 13:32:59 -0600
Subject: [PATCH 3/4] [sancov] Refreshed CLI for sancov in docs.

---
 clang/docs/SanitizerCoverage.rst | 45 +++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/clang/docs/SanitizerCoverage.rst b/clang/docs/SanitizerCoverage.rst
index 23720e542e4e9..4ab2d09366f4f 100644
--- a/clang/docs/SanitizerCoverage.rst
+++ b/clang/docs/SanitizerCoverage.rst
@@ -563,18 +563,39 @@ Sancov matches these files using module names and binaries file names.
 
 .. code-block:: console
 
-    USAGE: sancov [options] <action> (<binary file>|<.sancov file>)...
-
-    Action (required)
-      -print                    - Print coverage addresses
-      -covered-functions        - Print all covered functions.
-      -not-covered-functions    - Print all not covered functions.
-      -symbolize                - Symbolizes the report.
-
-    Options
-      -blocklist=<string>         - Blocklist file (sanitizer blocklist format).
-      -demangle                   - Print demangled function name.
-      -strip_path_prefix=<string> - Strip this prefix from file paths in reports
+    USAGE: sancov [options] <action> <binary files...> <.sancov files...> <.symcov files...>
+
+    Action (required):
+      -covered-functions     Print all covered funcions.
+      -diff                  Compute difference between two sancov files (A - B) and write to the new output sancov file
+      -html-report           REMOVED. Use -symbolize & coverage-report-server.py.
+      -merge                 Merges reports.
+      -not-covered-functions Print all not covered funcions.
+      -print-coverage-pcs    Print coverage instrumentation points addresses.
+      -print-coverage-stats  Print coverage statistics.
+      -print                 Print coverage addresses
+      -symbolize             Produces a symbolized JSON report from binary report.
+      -union                 Compute union of multiple sancov files and write to the new output sancov file
+
+    Generic Options:
+      -help    Display this help
+      -h       Alias for --help
+      -version Display the version
+      -v       Alias for --version
+
+    OPTIONS:
+      -demangle=0          Alias for --no-demangle
+      -demangle            Demangle function names
+      -ignorelist=<string> Ignorelist file (sanitizer ignorelist format)
+      -no-demangle         Do not demangle function names
+      -no-skip-dead-files  List dead source files in reports
+      -output=<string>     Output file for diff and union actions
+      -skip-dead-files=0   Alias for --no-skip-dead-files
+      -skip-dead-files     Do not list dead source files in reports
+      -strip_path_prefix=<string>
+                          Strip this prefix from files paths in reports
+      -use_default_ignorelist=0
+                          Alias for --no-use_default_ignore_list
 
 
 Coverage Reports

>From 36fff3cdfe6e1ab1b2b199bf55db2d0b00c759ed Mon Sep 17 00:00:00 2001
From: Manuel Carrasco <Manuel.Carrasco at amd.com>
Date: Thu, 11 Dec 2025 08:15:22 -0600
Subject: [PATCH 4/4] [Review] Update RawCoverage so it keeps track of their
 source bitness.

In this way, we can properly warn about possible data loss.
---
 .../tools/sancov/Inputs/dummy-32bits.0.sancov | Bin 0 -> 48 bytes
 .../tools/sancov/diff-different-bitness.test  |   6 ++
 .../tools/sancov/union-different-bitness.test |   6 ++
 llvm/tools/sancov/sancov.cpp                  |  93 ++++++++----------
 4 files changed, 53 insertions(+), 52 deletions(-)
 create mode 100644 llvm/test/tools/sancov/Inputs/dummy-32bits.0.sancov
 create mode 100644 llvm/test/tools/sancov/diff-different-bitness.test
 create mode 100644 llvm/test/tools/sancov/union-different-bitness.test

diff --git a/llvm/test/tools/sancov/Inputs/dummy-32bits.0.sancov b/llvm/test/tools/sancov/Inputs/dummy-32bits.0.sancov
new file mode 100644
index 0000000000000000000000000000000000000000..fee9a125ef08b7ed75e4833f71f2e9b0f72798fc
GIT binary patch
literal 48
fcmXr|4+8rSXbbx>fIyK5gjNuR(7{l;4N3z5bFvCA

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/sancov/diff-different-bitness.test b/llvm/test/tools/sancov/diff-different-bitness.test
new file mode 100644
index 0000000000000..506db06a552a5
--- /dev/null
+++ b/llvm/test/tools/sancov/diff-different-bitness.test
@@ -0,0 +1,6 @@
+REQUIRES: x86-registered-target && host-byteorder-little-endian
+RUN: rm -f %t.out.sancov
+RUN: sancov -diff --output=%t.out.sancov %p/Inputs/dummy-32bits.0.sancov %p/Inputs/test-linux_x86_64.0.sancov 2>&1 | FileCheck %s 
+
+CHECK: Input files have different bitness
+CHECK: 64-bit addresses will be truncated to 32 bits. This may result in data loss.
diff --git a/llvm/test/tools/sancov/union-different-bitness.test b/llvm/test/tools/sancov/union-different-bitness.test
new file mode 100644
index 0000000000000..fa37451a59b71
--- /dev/null
+++ b/llvm/test/tools/sancov/union-different-bitness.test
@@ -0,0 +1,6 @@
+REQUIRES: x86-registered-target && host-byteorder-little-endian
+RUN: rm -f %t.out.sancov
+RUN: sancov -union --output=%t.out.sancov %p/Inputs/dummy-32bits.0.sancov %p/Inputs/test-linux_x86_64.0.sancov 2>&1 | FileCheck %s 
+
+CHECK: Input files have different bitness
+CHECK: 64-bit addresses will be truncated to 32 bits. This may result in data loss.
diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp
index bb5507bbbf02d..f2e71f97dda34 100644
--- a/llvm/tools/sancov/sancov.cpp
+++ b/llvm/tools/sancov/sancov.cpp
@@ -135,18 +135,19 @@ static const Regex SymcovFileRegex(".*\\.symcov");
 // Contents of .sancov file: list of coverage point addresses that were
 // executed.
 struct RawCoverage {
-  explicit RawCoverage(std::unique_ptr<std::set<uint64_t>> Addrs)
-      : Addrs(std::move(Addrs)) {}
+  explicit RawCoverage(std::unique_ptr<std::set<uint64_t>> Addrs,
+                       FileHeader Header)
+      : Addrs(std::move(Addrs)), Header(Header) {}
 
   // Read binary .sancov file.
   static ErrorOr<std::unique_ptr<RawCoverage>>
   read(const std::string &FileName);
 
   // Write binary .sancov file.
-  static void write(const std::string &FileName, const RawCoverage &Coverage,
-                    FileHeader Header);
+  static void write(const std::string &FileName, const RawCoverage &Coverage);
 
   std::unique_ptr<std::set<uint64_t>> Addrs;
+  FileHeader Header;
 };
 
 // Coverage point has an opaque Id and corresponds to multiple source locations.
@@ -271,7 +272,7 @@ RawCoverage::read(const std::string &FileName) {
   // to compactify the data.
   Addrs->erase(0);
 
-  return std::make_unique<RawCoverage>(std::move(Addrs));
+  return std::make_unique<RawCoverage>(std::move(Addrs), *Header);
 }
 
 // Print coverage addresses.
@@ -286,14 +287,15 @@ raw_ostream &operator<<(raw_ostream &OS, const RawCoverage &CoverageData) {
 
 // Write coverage addresses in binary format.
 void RawCoverage::write(const std::string &FileName,
-                        const RawCoverage &Coverage, FileHeader Header) {
+                        const RawCoverage &Coverage) {
   std::error_code EC;
   raw_fd_ostream OS(FileName, EC, sys::fs::OF_None);
   failIfError(EC);
 
-  OS.write(reinterpret_cast<const char *>(&Header), sizeof(Header));
+  OS.write(reinterpret_cast<const char *>(&Coverage.Header),
+           sizeof(Coverage.Header));
 
-  switch (Header.Bitness) {
+  switch (Coverage.Header.Bitness) {
   case Bitness64:
     for (auto Addr : *Coverage.Addrs) {
       uint64_t Addr64 = Addr;
@@ -307,7 +309,7 @@ void RawCoverage::write(const std::string &FileName,
     }
     break;
   default:
-    fail("Unsupported bitness: " + std::to_string(Header.Bitness));
+    fail("Unsupported bitness: " + std::to_string(Coverage.Header.Bitness));
   }
 }
 
@@ -1061,6 +1063,24 @@ static const char *bitnessToString(uint32_t Bitness) {
   }
 }
 
+// Warn if two file headers have different bitness.
+static void warnIfDifferentBitness(const FileHeader &Header1,
+                                   const FileHeader &Header2,
+                                   const std::string &File1Desc,
+                                   const std::string &File2Desc) {
+  if (Header1.Bitness != Header2.Bitness) {
+    errs() << "WARNING: Input files have different bitness (" << File1Desc
+           << ": " << bitnessToString(Header1.Bitness) << ", " << File2Desc
+           << ": " << bitnessToString(Header2.Bitness)
+           << "). Using bitness from " << File1Desc << ".\n";
+
+    if (Header1.Bitness == Bitness32 && Header2.Bitness == Bitness64) {
+      errs() << "WARNING: 64-bit addresses will be truncated to 32 bits. "
+             << "This may result in data loss.\n";
+    }
+  }
+}
+
 // Compute difference between two coverage files (A - B) and write to output
 // file.
 static void diffRawCoverage(const std::string &FileA, const std::string &FileB,
@@ -1071,26 +1091,10 @@ static void diffRawCoverage(const std::string &FileA, const std::string &FileB,
   auto CovB = RawCoverage::read(FileB);
   failIfError(CovB);
 
-  // Determine bitness from both files
-  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErrA =
-      MemoryBuffer::getFile(FileA);
-  failIfError(BufOrErrA);
-  const FileHeader *HeaderA =
-      reinterpret_cast<const FileHeader *>(BufOrErrA.get()->getBufferStart());
-
-  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErrB =
-      MemoryBuffer::getFile(FileB);
-  failIfError(BufOrErrB);
-  const FileHeader *HeaderB =
-      reinterpret_cast<const FileHeader *>(BufOrErrB.get()->getBufferStart());
-
-  // Warn if bitness differs
-  if (HeaderA->Bitness != HeaderB->Bitness) {
-    errs() << "WARNING: Input files have different bitness (File A: "
-           << bitnessToString(HeaderA->Bitness)
-           << ", File B: " << bitnessToString(HeaderB->Bitness)
-           << "). Using bitness from File A.\n";
-  }
+  const FileHeader &HeaderA = CovA.get()->Header;
+  const FileHeader &HeaderB = CovB.get()->Header;
+
+  warnIfDifferentBitness(HeaderA, HeaderB, FileA, FileB);
 
   // Compute A - B
   auto DiffAddrs = std::make_unique<std::set<uint64_t>>();
@@ -1098,8 +1102,8 @@ static void diffRawCoverage(const std::string &FileA, const std::string &FileB,
                       CovB.get()->Addrs->begin(), CovB.get()->Addrs->end(),
                       std::inserter(*DiffAddrs, DiffAddrs->end()));
 
-  RawCoverage DiffCov(std::move(DiffAddrs));
-  RawCoverage::write(OutputFile, DiffCov, *HeaderA);
+  RawCoverage DiffCov(std::move(DiffAddrs), HeaderA);
+  RawCoverage::write(OutputFile, DiffCov);
 }
 
 // Compute union of multiple coverage files and write to output file.
@@ -1111,37 +1115,22 @@ static void unionRawCoverage(const std::vector<std::string> &InputFiles,
   auto UnionCov = RawCoverage::read(InputFiles[0]);
   failIfError(UnionCov);
 
-  // Get header from first file
-  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
-      MemoryBuffer::getFile(InputFiles[0]);
-  failIfError(BufOrErr);
-  const FileHeader *FirstHeader =
-      reinterpret_cast<const FileHeader *>(BufOrErr.get()->getBufferStart());
-  FileHeader Header = *FirstHeader;
+  const FileHeader &UnionHeader = UnionCov.get()->Header;
 
   for (size_t I = 1; I < InputFiles.size(); ++I) {
     auto Cov = RawCoverage::read(InputFiles[I]);
     failIfError(Cov);
 
-    // Check bitness of current file
-    ErrorOr<std::unique_ptr<MemoryBuffer>> CurBufOrErr =
-        MemoryBuffer::getFile(InputFiles[I]);
-    failIfError(CurBufOrErr);
-    const FileHeader *CurHeader = reinterpret_cast<const FileHeader *>(
-        CurBufOrErr.get()->getBufferStart());
-
-    if (CurHeader->Bitness != Header.Bitness) {
-      errs() << "WARNING: Input file has different bitness (File "
-             << InputFiles[I] << ": " << bitnessToString(CurHeader->Bitness)
-             << ", First file: " << bitnessToString(Header.Bitness)
-             << "). Using bitness from first file.\n";
-    }
+    const FileHeader &CurHeader = Cov.get()->Header;
+
+    warnIfDifferentBitness(UnionHeader, CurHeader, InputFiles[0],
+                           InputFiles[I]);
 
     UnionCov.get()->Addrs->insert(Cov.get()->Addrs->begin(),
                                   Cov.get()->Addrs->end());
   }
 
-  RawCoverage::write(OutputFile, *UnionCov.get(), Header);
+  RawCoverage::write(OutputFile, *UnionCov.get());
 }
 
 static std::unique_ptr<SymbolizedCoverage>



More information about the llvm-commits mailing list