[clang] dbbc4f4 - SourceManager: Encapsulate line number mapping into SrcMgr::LineOffsetMapping

Duncan P. N. Exon Smith via cfe-commits cfe-commits at lists.llvm.org
Fri Oct 23 09:56:00 PDT 2020


Author: Duncan P. N. Exon Smith
Date: 2020-10-23T12:55:51-04:00
New Revision: dbbc4f4e226be44e484f448be2d308d205c81038

URL: https://github.com/llvm/llvm-project/commit/dbbc4f4e226be44e484f448be2d308d205c81038
DIFF: https://github.com/llvm/llvm-project/commit/dbbc4f4e226be44e484f448be2d308d205c81038.diff

LOG: SourceManager: Encapsulate line number mapping into SrcMgr::LineOffsetMapping

Put the guts of `ComputeLineNumbers` into `LineOffsetMapping::get` and
`LineOffsetMapping::LineOffsetMapping`.  As a drive-by, store the number
of lines directly in the bump-ptr-allocated array.

Differential Revision: https://reviews.llvm.org/D89913

Added: 
    clang/unittests/Basic/LineOffsetMappingTest.cpp

Modified: 
    clang/include/clang/Basic/SourceManager.h
    clang/lib/Basic/SourceManager.cpp
    clang/lib/Lex/ScratchBuffer.cpp
    clang/unittests/Basic/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index 06bec09cda98..00a790038dbf 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -90,6 +90,35 @@ namespace SrcMgr {
     return CK == C_User_ModuleMap || CK == C_System_ModuleMap;
   }
 
+  /// Mapping of line offsets into a source file. This does not own the storage
+  /// for the line numbers.
+  class LineOffsetMapping {
+  public:
+    explicit operator bool() const { return Storage; }
+    unsigned size() const {
+      assert(Storage);
+      return Storage[0];
+    }
+    ArrayRef<unsigned> getLines() const {
+      assert(Storage);
+      return ArrayRef<unsigned>(Storage + 1, Storage + 1 + size());
+    }
+    const unsigned *begin() const { return getLines().begin(); }
+    const unsigned *end() const { return getLines().end(); }
+    const unsigned &operator[](int I) const { return getLines()[I]; }
+
+    static LineOffsetMapping get(llvm::MemoryBufferRef Buffer,
+                                 llvm::BumpPtrAllocator &Alloc);
+
+    LineOffsetMapping() = default;
+    LineOffsetMapping(ArrayRef<unsigned> LineOffsets,
+                      llvm::BumpPtrAllocator &Alloc);
+
+  private:
+    /// First element is the size, followed by elements at off-by-one indexes.
+    unsigned *Storage = nullptr;
+  };
+
   /// One instance of this struct is kept for every file loaded or used.
   ///
   /// This object owns the MemoryBuffer object.
@@ -115,14 +144,9 @@ namespace SrcMgr {
 
     /// A bump pointer allocated array of offsets for each source line.
     ///
-    /// This is lazily computed.  This is owned by the SourceManager
+    /// This is lazily computed.  The lines are owned by the SourceManager
     /// BumpPointerAllocator object.
-    unsigned *SourceLineCache = nullptr;
-
-    /// The number of lines in this ContentCache.
-    ///
-    /// This is only valid if SourceLineCache is non-null.
-    unsigned NumLines = 0;
+    LineOffsetMapping SourceLineCache;
 
     /// Indicates whether the buffer itself was provided to override
     /// the actual file contents.
@@ -157,10 +181,8 @@ namespace SrcMgr {
       OrigEntry = RHS.OrigEntry;
       ContentsEntry = RHS.ContentsEntry;
 
-      assert(!RHS.Buffer && RHS.SourceLineCache == nullptr &&
+      assert(!RHS.Buffer && !RHS.SourceLineCache &&
              "Passed ContentCache object cannot own a buffer.");
-
-      NumLines = RHS.NumLines;
     }
 
     ContentCache &operator=(const ContentCache& RHS) = delete;

diff  --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index d2015b55c03f..778fd0dca329 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -1200,10 +1200,10 @@ unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos,
   const char *Buf = MemBuf->getBufferStart();
   // See if we just calculated the line number for this FilePos and can use
   // that to lookup the start of the line instead of searching for it.
-  if (LastLineNoFileIDQuery == FID &&
-      LastLineNoContentCache->SourceLineCache != nullptr &&
-      LastLineNoResult < LastLineNoContentCache->NumLines) {
-    unsigned *SourceLineCache = LastLineNoContentCache->SourceLineCache;
+  if (LastLineNoFileIDQuery == FID && LastLineNoContentCache->SourceLineCache &&
+      LastLineNoResult < LastLineNoContentCache->SourceLineCache.size()) {
+    const unsigned *SourceLineCache =
+        LastLineNoContentCache->SourceLineCache.begin();
     unsigned LineStart = SourceLineCache[LastLineNoResult - 1];
     unsigned LineEnd = SourceLineCache[LastLineNoResult];
     if (FilePos >= LineStart && FilePos < LineEnd) {
@@ -1274,6 +1274,11 @@ static void ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI,
   if (Invalid)
     return;
 
+  FI->SourceLineCache = LineOffsetMapping::get(*Buffer, Alloc);
+}
+
+LineOffsetMapping LineOffsetMapping::get(llvm::MemoryBufferRef Buffer,
+                                         llvm::BumpPtrAllocator &Alloc) {
   // Find the file offsets of all of the *physical* source lines.  This does
   // not look at trigraphs, escaped newlines, or anything else tricky.
   SmallVector<unsigned, 256> LineOffsets;
@@ -1281,8 +1286,8 @@ static void ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI,
   // Line #1 starts at char 0.
   LineOffsets.push_back(0);
 
-  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
-  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
+  const unsigned char *Buf = (const unsigned char *)Buffer.getBufferStart();
+  const unsigned char *End = (const unsigned char *)Buffer.getBufferEnd();
   const std::size_t BufLen = End - Buf;
   unsigned I = 0;
   while (I < BufLen) {
@@ -1297,10 +1302,14 @@ static void ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI,
     ++I;
   }
 
-  // Copy the offsets into the FileInfo structure.
-  FI->NumLines = LineOffsets.size();
-  FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
-  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
+  return LineOffsetMapping(LineOffsets, Alloc);
+}
+
+LineOffsetMapping::LineOffsetMapping(ArrayRef<unsigned> LineOffsets,
+                                     llvm::BumpPtrAllocator &Alloc)
+    : Storage(Alloc.Allocate<unsigned>(LineOffsets.size() + 1)) {
+  Storage[0] = LineOffsets.size();
+  std::copy(LineOffsets.begin(), LineOffsets.end(), Storage + 1);
 }
 
 /// getLineNumber - Given a SourceLocation, return the spelling line number
@@ -1344,9 +1353,9 @@ unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos,
 
   // Okay, we know we have a line number table.  Do a binary search to find the
   // line number that this character position lands on.
-  unsigned *SourceLineCache = Content->SourceLineCache;
-  unsigned *SourceLineCacheStart = SourceLineCache;
-  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
+  const unsigned *SourceLineCache = Content->SourceLineCache.begin();
+  const unsigned *SourceLineCacheStart = SourceLineCache;
+  const unsigned *SourceLineCacheEnd = Content->SourceLineCache.end();
 
   unsigned QueriedFilePos = FilePos+1;
 
@@ -1385,13 +1394,13 @@ unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos,
         }
       }
     } else {
-      if (LastLineNoResult < Content->NumLines)
+      if (LastLineNoResult < Content->SourceLineCache.size())
         SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
     }
   }
 
-  unsigned *Pos
-    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
+  const unsigned *Pos =
+      std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
   unsigned LineNo = Pos-SourceLineCacheStart;
 
   LastLineNoFileIDQuery = FID;
@@ -1693,7 +1702,7 @@ SourceLocation SourceManager::translateLineCol(FileID FID,
   if (!Buffer)
     return SourceLocation();
 
-  if (Line > Content->NumLines) {
+  if (Line > Content->SourceLineCache.size()) {
     unsigned Size = Buffer->getBufferSize();
     if (Size > 0)
       --Size;
@@ -2105,7 +2114,7 @@ void SourceManager::PrintStats() const {
   unsigned NumLineNumsComputed = 0;
   unsigned NumFileBytesMapped = 0;
   for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
-    NumLineNumsComputed += I->second->SourceLineCache != nullptr;
+    NumLineNumsComputed += bool(I->second->SourceLineCache);
     NumFileBytesMapped  += I->second->getSizeBytesMapped();
   }
   unsigned NumMacroArgsComputed = MacroArgsCacheMap.size();

diff  --git a/clang/lib/Lex/ScratchBuffer.cpp b/clang/lib/Lex/ScratchBuffer.cpp
index 174b4b362ada..56ea9b71fd02 100644
--- a/clang/lib/Lex/ScratchBuffer.cpp
+++ b/clang/lib/Lex/ScratchBuffer.cpp
@@ -41,7 +41,7 @@ SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
         &SourceMgr.getSLocEntry(SourceMgr.getFileID(BufferStartLoc))
              .getFile()
              .getContentCache());
-    ContentCache->SourceLineCache = nullptr;
+    ContentCache->SourceLineCache = SrcMgr::LineOffsetMapping();
   }
 
   // Prefix the token with a \n, so that it looks like it is the first thing on

diff  --git a/clang/unittests/Basic/CMakeLists.txt b/clang/unittests/Basic/CMakeLists.txt
index 27e49ff6e0b9..b7fa259fd1b5 100644
--- a/clang/unittests/Basic/CMakeLists.txt
+++ b/clang/unittests/Basic/CMakeLists.txt
@@ -6,6 +6,7 @@ add_clang_unittest(BasicTests
   CharInfoTest.cpp
   DiagnosticTest.cpp
   FileManagerTest.cpp
+  LineOffsetMappingTest.cpp
   SourceManagerTest.cpp
   )
 

diff  --git a/clang/unittests/Basic/LineOffsetMappingTest.cpp b/clang/unittests/Basic/LineOffsetMappingTest.cpp
new file mode 100644
index 000000000000..e04081378283
--- /dev/null
+++ b/clang/unittests/Basic/LineOffsetMappingTest.cpp
@@ -0,0 +1,79 @@
+//===- unittests/Basic/LineOffsetMappingTest.cpp - Test LineOffsetMapping -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/SourceManager.h"
+#include "gtest/gtest.h"
+
+using namespace clang;
+using namespace clang::SrcMgr;
+using namespace llvm;
+
+namespace {
+
+TEST(LineOffsetMappingTest, empty) {
+  LineOffsetMapping Mapping;
+  EXPECT_FALSE(Mapping);
+
+#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
+  EXPECT_DEATH((void)Mapping.getLines(), "Assertion");
+#endif
+}
+
+TEST(LineOffsetMappingTest, construct) {
+  BumpPtrAllocator Alloc;
+  unsigned Offsets[] = {0, 10, 20};
+  LineOffsetMapping Mapping(Offsets, Alloc);
+  EXPECT_EQ(3u, Mapping.size());
+  EXPECT_EQ(0u, Mapping[0]);
+  EXPECT_EQ(10u, Mapping[1]);
+  EXPECT_EQ(20u, Mapping[2]);
+
+#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
+  EXPECT_DEATH((void)Mapping[3], "Assertion");
+#endif
+}
+
+TEST(LineOffsetMappingTest, constructTwo) {
+  // Confirm allocation size is big enough, convering an off-by-one bug.
+  BumpPtrAllocator Alloc;
+  unsigned Offsets1[] = {0, 10};
+  unsigned Offsets2[] = {0, 20};
+  LineOffsetMapping Mapping1(Offsets1, Alloc);
+  LineOffsetMapping Mapping2(Offsets2, Alloc);
+
+  // Need to check Mapping1 *after* building Mapping2.
+  EXPECT_EQ(2u, Mapping1.size());
+  EXPECT_EQ(0u, Mapping1[0]);
+  EXPECT_EQ(10u, Mapping1[1]);
+  EXPECT_EQ(2u, Mapping2.size());
+  EXPECT_EQ(0u, Mapping2[0]);
+  EXPECT_EQ(20u, Mapping2[1]);
+}
+
+TEST(LineOffsetMappingTest, get) {
+  BumpPtrAllocator Alloc;
+  StringRef Source = "first line\n"
+                     "second line\n";
+  auto Mapping = LineOffsetMapping::get(MemoryBufferRef(Source, ""), Alloc);
+  EXPECT_EQ(3u, Mapping.size());
+  EXPECT_EQ(0u, Mapping[0]);
+  EXPECT_EQ(11u, Mapping[1]);
+  EXPECT_EQ(23u, Mapping[2]);
+}
+
+TEST(LineOffsetMappingTest, getMissingFinalNewline) {
+  BumpPtrAllocator Alloc;
+  StringRef Source = "first line\n"
+                     "second line";
+  auto Mapping = LineOffsetMapping::get(MemoryBufferRef(Source, ""), Alloc);
+  EXPECT_EQ(2u, Mapping.size());
+  EXPECT_EQ(0u, Mapping[0]);
+  EXPECT_EQ(11u, Mapping[1]);
+}
+
+} // end namespace


        


More information about the cfe-commits mailing list