[cfe-commits] r40459 - in /cfe/trunk: Basic/SourceManager.cpp clang.xcodeproj/project.pbxproj include/clang/Basic/SourceManager.h

Chris Lattner sabre at nondot.org
Mon Jul 23 22:57:20 PDT 2007


Author: lattner
Date: Tue Jul 24 00:57:19 2007
New Revision: 40459

URL: http://llvm.org/viewvc/llvm-project?rev=40459&view=rev
Log:
Add a cache to SourceManager to accellerate line # lookup.  This is a
bottleneck for -E computation, because every token that starts a line needs
to determine *which* line it is on (so -E mode can insert the appropriate 
vertical whitespace).  This optimization improves this common case where
it is striding through the line # table.

This speeds up -E on xalancbmk by 3.2%

Modified:
    cfe/trunk/Basic/SourceManager.cpp
    cfe/trunk/clang.xcodeproj/project.pbxproj
    cfe/trunk/include/clang/Basic/SourceManager.h

Modified: cfe/trunk/Basic/SourceManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/Basic/SourceManager.cpp?rev=40459&r1=40458&r2=40459&view=diff

==============================================================================
--- cfe/trunk/Basic/SourceManager.cpp (original)
+++ cfe/trunk/Basic/SourceManager.cpp Tue Jul 24 00:57:19 2007
@@ -13,6 +13,7 @@
 
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/FileManager.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/System/Path.h"
 #include <algorithm>
@@ -236,6 +237,50 @@
   return getFileInfo(FileID)->Buffer->getBufferIdentifier();
 }
 
+static void ComputeLineNumbers(FileInfo *FI) DISABLE_INLINE;
+static void ComputeLineNumbers(FileInfo *FI) {
+  const MemoryBuffer *Buffer = FI->Buffer;
+  
+  // Find the file offsets of all of the *physical* source lines.  This does
+  // not look at trigraphs, escaped newlines, or anything else tricky.
+  std::vector<unsigned> LineOffsets;
+  
+  // Line #1 starts at char 0.
+  LineOffsets.push_back(0);
+  
+  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
+  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
+  unsigned Offs = 0;
+  while (1) {
+    // Skip over the contents of the line.
+    // TODO: Vectorize this?  This is very performance sensitive for programs
+    // with lots of diagnostics and in -E mode.
+    const unsigned char *NextBuf = (const unsigned char *)Buf;
+    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
+      ++NextBuf;
+    Offs += NextBuf-Buf;
+    Buf = NextBuf;
+    
+    if (Buf[0] == '\n' || Buf[0] == '\r') {
+      // If this is \n\r or \r\n, skip both characters.
+      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
+        ++Offs, ++Buf;
+      ++Offs, ++Buf;
+      LineOffsets.push_back(Offs);
+    } else {
+      // Otherwise, this is a null.  If end of file, exit.
+      if (Buf == End) break;
+      // Otherwise, skip the null.
+      ++Offs, ++Buf;
+    }
+  }
+  LineOffsets.push_back(Offs);
+  
+  // Copy the offsets into the FileInfo structure.
+  FI->NumLines = LineOffsets.size();
+  FI->SourceLineCache = new unsigned[LineOffsets.size()];
+  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
+}
 
 /// getLineNumber - Given a SourceLocation, return the physical line number
 /// for the position indicated.  This requires building and caching a table of
@@ -244,66 +289,66 @@
 unsigned SourceManager::getLineNumber(SourceLocation Loc) {
   unsigned FileID = Loc.getFileID();
   if (FileID == 0) return 0;
-  FileInfo *FileInfo = getFileInfo(FileID);
+  FileInfo *FileInfo;
+  
+  if (LastLineNoFileIDQuery == FileID)
+    FileInfo = LastLineNoFileInfo;
+  else
+    FileInfo = getFileInfo(FileID);
   
   // If this is the first use of line information for this buffer, compute the
-  /// SourceLineCache for it on demand. 
-  if (FileInfo->SourceLineCache == 0) {
-    const MemoryBuffer *Buffer = FileInfo->Buffer;
-    
-    // Find the file offsets of all of the *physical* source lines.  This does
-    // not look at trigraphs, escaped newlines, or anything else tricky.
-    std::vector<unsigned> LineOffsets;
-    
-    // Line #1 starts at char 0.
-    LineOffsets.push_back(0);
-    
-    const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
-    const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
-    unsigned Offs = 0;
-    while (1) {
-      // Skip over the contents of the line.
-      // TODO: Vectorize this?  This is very performance sensitive for programs
-      // with lots of diagnostics and in -E mode.
-      const unsigned char *NextBuf = (const unsigned char *)Buf;
-      while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
-        ++NextBuf;
-      Offs += NextBuf-Buf;
-      Buf = NextBuf;
-      
-      if (Buf[0] == '\n' || Buf[0] == '\r') {
-        // If this is \n\r or \r\n, skip both characters.
-        if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
-          ++Offs, ++Buf;
-        ++Offs, ++Buf;
-        LineOffsets.push_back(Offs);
-      } else {
-        // Otherwise, this is a null.  If end of file, exit.
-        if (Buf == End) break;
-        // Otherwise, skip the null.
-        ++Offs, ++Buf;
-      }
-    }
-    LineOffsets.push_back(Offs);
-    
-    // Copy the offsets into the FileInfo structure.
-    FileInfo->NumLines = LineOffsets.size();
-    FileInfo->SourceLineCache = new unsigned[LineOffsets.size()];
-    std::copy(LineOffsets.begin(), LineOffsets.end(),
-              FileInfo->SourceLineCache);
-  }
+  /// SourceLineCache for it on demand.
+  if (FileInfo->SourceLineCache == 0)
+    ComputeLineNumbers(FileInfo);
 
   // Okay, we know we have a line number table.  Do a binary search to find the
   // line number that this character position lands on.
-  unsigned NumLines = FileInfo->NumLines;
   unsigned *SourceLineCache = FileInfo->SourceLineCache;
-    
+  unsigned *SourceLineCacheStart = SourceLineCache;
+  unsigned *SourceLineCacheEnd = SourceLineCache + FileInfo->NumLines;
+  
+  unsigned QueriedFilePos = getFullFilePos(Loc)+1;
+
+  // If the previous query was to the same file, we know both the file pos from
+  // that query and the line number returned.  This allows us to narrow the
+  // search space from the entire file to something near the match.
+  if (LastLineNoFileIDQuery == FileID) {
+    if (QueriedFilePos >= LastLineNoFilePos) {
+      SourceLineCache = SourceLineCache+LastLineNoResult-1;
+      
+      // The query is likely to be nearby the previous one.  Here we check to
+      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
+      // where big comment blocks and vertical whitespace eat up lines but
+      // contribute no tokens.
+      if (SourceLineCache+5 < SourceLineCacheEnd) {
+        if (SourceLineCache[5] > QueriedFilePos)
+          SourceLineCacheEnd = SourceLineCache+5;
+        else if (SourceLineCache+10 < SourceLineCacheEnd) {
+          if (SourceLineCache[10] > QueriedFilePos)
+            SourceLineCacheEnd = SourceLineCache+10;
+          else if (SourceLineCache+20 < SourceLineCacheEnd) {
+            if (SourceLineCache[20] > QueriedFilePos)
+              SourceLineCacheEnd = SourceLineCache+20;
+          }
+        }
+      }
+    } else {
+      SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
+    }
+  }
+  
+  unsigned *Pos;
   // TODO: If this is performance sensitive, we could try doing simple radix
   // type approaches to make good (tight?) initial guesses based on the
   // assumption that all lines are the same average size.
-  unsigned *Pos = std::lower_bound(SourceLineCache, SourceLineCache+NumLines,
-                                   getFullFilePos(Loc)+1);
-  return Pos-SourceLineCache;
+  Pos = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
+  unsigned LineNo = Pos-SourceLineCacheStart;
+  
+  LastLineNoFileIDQuery = FileID;
+  LastLineNoFileInfo = FileInfo;
+  LastLineNoFilePos = QueriedFilePos;
+  LastLineNoResult = LineNo;
+  return LineNo;
 }
 
 /// PrintStats - Print statistics to stderr.

Modified: cfe/trunk/clang.xcodeproj/project.pbxproj
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/clang.xcodeproj/project.pbxproj?rev=40459&r1=40458&r2=40459&view=diff

==============================================================================
--- cfe/trunk/clang.xcodeproj/project.pbxproj (original)
+++ cfe/trunk/clang.xcodeproj/project.pbxproj Tue Jul 24 00:57:19 2007
@@ -191,7 +191,7 @@
 		1A869AA70BA21ABA008DA07A /* LiteralSupport.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = LiteralSupport.cpp; sourceTree = "<group>"; };
 		84D9A8870C1A57E100AC7ABC /* AttributeList.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = AttributeList.cpp; path = Parse/AttributeList.cpp; sourceTree = "<group>"; };
 		84D9A88B0C1A581300AC7ABC /* AttributeList.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = AttributeList.h; path = clang/Parse/AttributeList.h; sourceTree = "<group>"; };
-		8DD76F6C0486A84900D96B5E /* clang */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = clang; sourceTree = BUILT_PRODUCTS_DIR; };
+		8DD76F6C0486A84900D96B5E /* clang */ = {isa = PBXFileReference; includeInIndex = 0; lastKnownFileType = "compiled.mach-o.executable"; path = clang; sourceTree = BUILT_PRODUCTS_DIR; };
 		DE01DA480B12ADA300AC22CE /* PPCallbacks.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = PPCallbacks.h; sourceTree = "<group>"; };
 		DE06756B0C051CFE00EBBFD8 /* ParseExprCXX.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ParseExprCXX.cpp; path = Parse/ParseExprCXX.cpp; sourceTree = "<group>"; };
 		DE06B73D0A8307640050E87E /* LangOptions.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = LangOptions.h; sourceTree = "<group>"; };

Modified: cfe/trunk/include/clang/Basic/SourceManager.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/SourceManager.h?rev=40459&r1=40458&r2=40459&view=diff

==============================================================================
--- cfe/trunk/include/clang/Basic/SourceManager.h (original)
+++ cfe/trunk/include/clang/Basic/SourceManager.h Tue Jul 24 00:57:19 2007
@@ -154,13 +154,21 @@
   /// MacroIDs - Information about each MacroID.
   std::vector<SrcMgr::MacroIDInfo> MacroIDs;
   
+  /// LastLineNo - These ivars serve as a cache used in the getLineNumber
+  /// method which is used to speedup getLineNumber calls to nearby locations.
+  unsigned LastLineNoFileIDQuery;
+  SrcMgr::FileInfo *LastLineNoFileInfo;
+  unsigned LastLineNoFilePos;
+  unsigned LastLineNoResult;
 public:
-  SourceManager() {}
+  SourceManager() : LastLineNoFileIDQuery(~0U) {}
   ~SourceManager();
   
   void clearIDTables() {
     FileIDs.clear();
     MacroIDs.clear();
+    LastLineNoFileIDQuery = ~0U;
+    LastLineNoFileInfo = 0;
   }
   
   /// createFileID - Create a new FileID that represents the specified file





More information about the cfe-commits mailing list