[compiler-rt] b6b8fa3 - [llvm-cov][gcov] Support multi-files coverage in one basic block (#144504)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 20 01:24:23 PDT 2025


Author: int-zjt
Date: 2025-06-20T01:24:19-07:00
New Revision: b6b8fa3b15d334c51fcf8763ccda0102a01aeb9c

URL: https://github.com/llvm/llvm-project/commit/b6b8fa3b15d334c51fcf8763ccda0102a01aeb9c
DIFF: https://github.com/llvm/llvm-project/commit/b6b8fa3b15d334c51fcf8763ccda0102a01aeb9c.diff

LOG: [llvm-cov][gcov] Support multi-files coverage in one basic block (#144504)

In the current gcov implementation, all lines within a basic block are
attributed to the source file of the block's containing function. This
is inaccurate when a block contains lines from other files (e.g., via
#include "foo.inc").

Commit
[406e81b](https://github.com/llvm/llvm-project/commit/406e81b79d26dae6838cc69d10a3e22635da09ef)
attempted to address this by filtering lines based on debug info types,
but this approach has two limitations:

* **Over-filtering**: Some valid lines belonging to the function are
incorrectly excluded.
* **Under-counting**: Lines not belonging to the function are filtered
out and omitted from coverage statistics.

**GCC Reference Behavior**
GCC's gcov implementation handles this case correctly.This change aligns
the LLVM behavior with GCC.

**Proposed Solution**
1. **GCNO Generation**:

* **Current**: Each block stores a single GCOVLines record (filename +
lines).

* **New**: Dynamically create new GCOVLines records whenever consecutive
lines in a block originate from different source files. Group subsequent
lines from the same file under one record.

2. **GCNO Parsing**:

* **Current**: Lines are directly attributed to the function's source
file.

* **New**: Introduce a GCOVLocation type to track filename/line mappings
within blocks. Statistics will reflect the actual source file for each
line.

Added: 
    compiler-rt/test/profile/Posix/gcov-file-change-line.cpp

Modified: 
    compiler-rt/test/profile/Posix/gcov-file-change.cpp
    llvm/include/llvm/ProfileData/GCOV.h
    llvm/lib/ProfileData/GCOV.cpp
    llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/test/profile/Posix/gcov-file-change-line.cpp b/compiler-rt/test/profile/Posix/gcov-file-change-line.cpp
new file mode 100644
index 0000000000000..a750befb47e50
--- /dev/null
+++ b/compiler-rt/test/profile/Posix/gcov-file-change-line.cpp
@@ -0,0 +1,15 @@
+// RUN: rm -rf %t && split-file %s %t && cd %t
+// RUN: %clangxx --coverage main.cpp -o t
+// RUN: %run ./t
+// RUN: llvm-cov gcov -t t-main. | FileCheck %s
+
+//--- main.cpp
+#include <stdio.h>
+
+int main(int argc, char *argv[]) { // CHECK:      2: [[#]]:int main
+  puts("");                        // CHECK-NEXT: 2: [[#]]:
+#line 3
+  puts(""); // line 3
+  return 0; // line 4
+}
+// CHECK-NOT:  {{^ +[0-9]+:}}

diff  --git a/compiler-rt/test/profile/Posix/gcov-file-change.cpp b/compiler-rt/test/profile/Posix/gcov-file-change.cpp
index 9d3bc79591f27..0cef1c3512f88 100644
--- a/compiler-rt/test/profile/Posix/gcov-file-change.cpp
+++ b/compiler-rt/test/profile/Posix/gcov-file-change.cpp
@@ -16,8 +16,8 @@ inline auto *const inl_var_main = // CHECK:      1: [[#]]:inline auto
 void foo(int x) {                 // CHECK-NEXT: 1: [[#]]:
   if (x) {                        // CHECK-NEXT: 1: [[#]]:
 #include "a.inc"
-  }
-}
+  } // CHECK:      1: [[#]]:
+} // CHECK-NEXT: 1: [[#]]:
 // CHECK-NOT:  {{^ +[0-9]+:}}
 
 int main(int argc, char *argv[]) { // CHECK:      1: [[#]]:int main
@@ -32,10 +32,8 @@ int main(int argc, char *argv[]) { // CHECK:      1: [[#]]:int main
 //--- a.h
 /// Apple targets doesn't enable -mconstructor-aliases by default and the count may be 4.
 struct A { A() { } };              // CHECK:      {{[24]}}: [[#]]:struct A
-inline auto *const inl_var_a =
-    new A;
-/// TODO a.inc:1 should have line execution.
-// CHECK-NOT:  {{^ +[0-9]+:}}
+inline auto *const inl_var_a =     // CHECK-NEXT: 1: [[#]]:
+    new A;                         // CHECK-NEXT: 1: [[#]]:
 
 //--- a.inc
-puts("");
+puts(""); // CHECK:      1: [[#]]:puts

diff  --git a/llvm/include/llvm/ProfileData/GCOV.h b/llvm/include/llvm/ProfileData/GCOV.h
index 0dc33d062e4f8..bc407182a094d 100644
--- a/llvm/include/llvm/ProfileData/GCOV.h
+++ b/llvm/include/llvm/ProfileData/GCOV.h
@@ -271,6 +271,14 @@ class GCOVFunction {
   DenseSet<const GCOVBlock *> visited;
 };
 
+/// Represent file of lines same with block_location_info in gcc.
+struct GCOVBlockLocation {
+  GCOVBlockLocation(unsigned idx) : srcIdx(idx) {}
+
+  unsigned srcIdx;
+  SmallVector<uint32_t, 4> lines;
+};
+
 /// GCOVBlock - Collects block information.
 class GCOVBlock {
 public:
@@ -281,8 +289,13 @@ class GCOVBlock {
 
   GCOVBlock(uint32_t N) : number(N) {}
 
-  void addLine(uint32_t N) { lines.push_back(N); }
-  uint32_t getLastLine() const { return lines.back(); }
+  void addLine(uint32_t N) {
+    locations.back().lines.push_back(N);
+    lastLine = N;
+  }
+  void addFile(unsigned fileIdx) { locations.emplace_back(fileIdx); }
+
+  uint32_t getLastLine() const { return lastLine; }
   uint64_t getCount() const { return count; }
 
   void addSrcEdge(GCOVArc *Edge) { pred.push_back(Edge); }
@@ -311,7 +324,8 @@ class GCOVBlock {
   uint64_t count = 0;
   SmallVector<GCOVArc *, 2> pred;
   SmallVector<GCOVArc *, 2> succ;
-  SmallVector<uint32_t, 4> lines;
+  SmallVector<GCOVBlockLocation> locations;
+  uint32_t lastLine = 0;
   bool traversable = false;
   GCOVArc *incoming = nullptr;
 };

diff  --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp
index ecb12c045b5b1..7d0a243d02388 100644
--- a/llvm/lib/ProfileData/GCOV.cpp
+++ b/llvm/lib/ProfileData/GCOV.cpp
@@ -191,7 +191,7 @@ bool GCOVFile::readGCNO(GCOVBuffer &buf) {
           buf.readString(filename);
           if (filename.empty())
             break;
-          // TODO Unhandled
+          Block.addFile(addNormalizedPathToMap(filename));
         }
       }
     }
@@ -456,11 +456,13 @@ void GCOVBlock::print(raw_ostream &OS) const {
     }
     OS << "\n";
   }
-  if (!lines.empty()) {
-    OS << "\tLines : ";
-    for (uint32_t N : lines)
-      OS << (N) << ",";
-    OS << "\n";
+  if (!locations.empty()) {
+    for (const GCOVBlockLocation &loc : locations) {
+      OS << "\tFile: " << loc.srcIdx << ": ";
+      for (uint32_t N : loc.lines)
+        OS << (N) << ",";
+      OS << "\n";
+    }
   }
 }
 
@@ -701,20 +703,25 @@ void Context::collectFunction(GCOVFunction &f, Summary &summary) {
   SmallSet<uint32_t, 16> lines;
   SmallSet<uint32_t, 16> linesExec;
   for (const GCOVBlock &b : f.blocksRange()) {
-    if (b.lines.empty())
+    if (b.locations.empty())
       continue;
-    uint32_t maxLineNum = *llvm::max_element(b.lines);
-    if (maxLineNum >= si.lines.size())
-      si.lines.resize(maxLineNum + 1);
-    for (uint32_t lineNum : b.lines) {
-      LineInfo &line = si.lines[lineNum];
-      if (lines.insert(lineNum).second)
-        ++summary.lines;
-      if (b.count && linesExec.insert(lineNum).second)
-        ++summary.linesExec;
-      line.exists = true;
-      line.count += b.count;
-      line.blocks.push_back(&b);
+    for (const GCOVBlockLocation &loc : b.locations) {
+      SourceInfo &locSource = sources[loc.srcIdx];
+      uint32_t maxLineNum = *llvm::max_element(loc.lines);
+      if (maxLineNum >= locSource.lines.size())
+        locSource.lines.resize(maxLineNum + 1);
+      for (uint32_t lineNum : loc.lines) {
+        LineInfo &line = locSource.lines[lineNum];
+        line.exists = true;
+        line.count += b.count;
+        line.blocks.push_back(&b);
+        if (f.srcIdx == loc.srcIdx) {
+          if (lines.insert(lineNum).second)
+            ++summary.lines;
+          if (b.count && linesExec.insert(lineNum).second)
+            ++summary.linesExec;
+        }
+      }
     }
   }
 }

diff  --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 3dfb36f4f1815..4c7b8c69c1bf3 100644
--- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -210,12 +210,12 @@ static StringRef getFunctionName(const DISubprogram *SP) {
   return SP->getName();
 }
 
-/// Extract a filename for a DISubprogram.
+/// Extract a filename for a DIScope.
 ///
 /// Prefer relative paths in the coverage notes. Clang also may split
 /// up absolute paths into a directory and filename component. When
 /// the relative path doesn't exist, reconstruct the absolute path.
-static SmallString<128> getFilename(const DISubprogram *SP) {
+static SmallString<128> getFilename(const DIScope *SP) {
   SmallString<128> Path;
   StringRef RelPath = SP->getFilename();
   if (sys::fs::exists(RelPath))
@@ -244,7 +244,9 @@ namespace {
   // list of line numbers and a single filename, representing lines that belong
   // to the block.
   class GCOVLines : public GCOVRecord {
-   public:
+  public:
+    const StringRef getFilename() { return Filename; }
+
     void addLine(uint32_t Line) {
       assert(Line != 0 && "Line zero is not a valid real line number.");
       Lines.push_back(Line);
@@ -276,7 +278,9 @@ namespace {
   class GCOVBlock : public GCOVRecord {
    public:
     GCOVLines &getFile(StringRef Filename) {
-      return LinesByFile.try_emplace(Filename, P, Filename).first->second;
+      if (Lines.empty() || Lines.back().getFilename() != Filename)
+        Lines.emplace_back(P, Filename);
+      return Lines.back();
     }
 
     void addEdge(GCOVBlock &Successor, uint32_t Flags) {
@@ -285,22 +289,16 @@ namespace {
 
     void writeOut() {
       uint32_t Len = 3;
-      SmallVector<StringMapEntry<GCOVLines> *, 32> SortedLinesByFile;
-      for (auto &I : LinesByFile) {
-        Len += I.second.length();
-        SortedLinesByFile.push_back(&I);
-      }
+
+      for (auto &L : Lines)
+        Len += L.length();
 
       write(GCOV_TAG_LINES);
       write(Len);
       write(Number);
 
-      llvm::sort(SortedLinesByFile, [](StringMapEntry<GCOVLines> *LHS,
-                                       StringMapEntry<GCOVLines> *RHS) {
-        return LHS->getKey() < RHS->getKey();
-      });
-      for (auto &I : SortedLinesByFile)
-        I->getValue().writeOut();
+      for (auto &L : Lines)
+        L.writeOut();
       write(0);
       write(0);
     }
@@ -309,7 +307,7 @@ namespace {
       // Only allow copy before edges and lines have been added. After that,
       // there are inter-block pointers (eg: edges) that won't take kindly to
       // blocks being copied or moved around.
-      assert(LinesByFile.empty());
+      assert(Lines.empty());
       assert(OutEdges.empty());
     }
 
@@ -322,7 +320,7 @@ namespace {
     GCOVBlock(GCOVProfiler *P, uint32_t Number)
         : GCOVRecord(P), Number(Number) {}
 
-    StringMap<GCOVLines> LinesByFile;
+    SmallVector<GCOVLines> Lines;
   };
 
   // A function has a unique identifier, a checksum (we leave as zero) and a
@@ -881,11 +879,10 @@ bool GCOVProfiler::emitProfileNotes(
           if (Line == Loc.getLine()) continue;
           Line = Loc.getLine();
           MDNode *Scope = Loc.getScope();
-          // TODO: Handle blocks from another file due to #line, #include, etc.
-          if (isa<DILexicalBlockFile>(Scope) || SP != getDISubprogram(Scope))
+          if (SP != getDISubprogram(Scope))
             continue;
 
-          GCOVLines &Lines = Block.getFile(Filename);
+          GCOVLines &Lines = Block.getFile(getFilename(Loc->getScope()));
           Lines.addLine(Loc.getLine());
         }
         Line = 0;


        


More information about the llvm-commits mailing list