[cfe-commits] r43090 - in /cfe/trunk: Driver/TextDiagnosticPrinter.cpp Driver/TextDiagnosticPrinter.h Lex/Lexer.cpp include/clang/Lex/Lexer.h

Wed Oct 17 14:18:47 PDT 2007

Author: lattner
Date: Wed Oct 17 16:18:47 2007
New Revision: 43090

URL: http://llvm.org/viewvc/llvm-project?rev=43090&view=rev
Log:
Move token length calculation out of the diagnostics machinery into 
the lexer, where it can be shared.

Modified:
    cfe/trunk/Driver/TextDiagnosticPrinter.cpp
    cfe/trunk/Driver/TextDiagnosticPrinter.h
    cfe/trunk/Lex/Lexer.cpp
    cfe/trunk/include/clang/Lex/Lexer.h

Modified: cfe/trunk/Driver/TextDiagnosticPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/Driver/TextDiagnosticPrinter.cpp?rev=43090&r1=43089&r2=43090&view=diff

==============================================================================

--- cfe/trunk/Driver/TextDiagnosticPrinter.cpp (original)
+++ cfe/trunk/Driver/TextDiagnosticPrinter.cpp Wed Oct 17 16:18:47 2007
@@ -80,7 +80,7 @@
       --EndColNo;  // Zero base the col #.
       
       // Add in the length of the token, so that we cover multi-char tokens.
-      EndColNo += GetTokenLength(R.getEnd());
+      EndColNo += Lexer::MeasureTokenLength(R.getEnd(), SourceMgr);
     } else {
       EndColNo = CaratLine.size();
     }
@@ -97,31 +97,6 @@
     CaratLine[i] = '~';
 }
 
-/// GetTokenLength - Given the source location of a token, determine its length.
-/// This is a fully general function that uses a lexer to relex the token.
-unsigned TextDiagnosticPrinter::GetTokenLength(SourceLocation Loc) {
-  // If this comes from a macro expansion, we really do want the macro name, not
-  // the token this macro expanded to.
-  Loc = SourceMgr.getLogicalLoc(Loc);
-  const char *StrData = SourceMgr.getCharacterData(Loc);
-  const char *BufEnd = SourceMgr.getBufferData(Loc.getFileID()).second;
-  
-  // TODO: this could be special cased for common tokens like identifiers, ')',
-  // etc to make this faster, if it mattered.  This could use 
-  // Lexer::isObviouslySimpleCharacter for example.
-  
-  // Create a langops struct and enable trigraphs.  This is sufficient for
-  // measuring tokens.
-  LangOptions LangOpts;
-  LangOpts.Trigraphs = true;
-  
-  // Create a lexer starting at the beginning of this token.
-  Lexer TheLexer(Loc, LangOpts, StrData, BufEnd);
-  Token TheTok;
-  TheLexer.LexRawToken(TheTok);
-  return TheTok.getLength();
-}
-
 void TextDiagnosticPrinter::HandleDiagnostic(Diagnostic::Level Level, 
                                              SourceLocation Pos,
                                              diag::kind ID,

Modified: cfe/trunk/Driver/TextDiagnosticPrinter.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/Driver/TextDiagnosticPrinter.h?rev=43090&r1=43089&r2=43090&view=diff

==============================================================================
--- cfe/trunk/Driver/TextDiagnosticPrinter.h (original)
+++ cfe/trunk/Driver/TextDiagnosticPrinter.h Wed Oct 17 16:18:47 2007
@@ -31,7 +31,6 @@
   void HighlightRange(const SourceRange &R, unsigned LineNo,
                       std::string &CaratLine,
                       const std::string &SourceLine);
-  unsigned GetTokenLength(SourceLocation Loc);
 
   virtual void HandleDiagnostic(Diagnostic::Level DiagLevel,
                                 SourceLocation Pos,

Modified: cfe/trunk/Lex/Lexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/Lex/Lexer.cpp?rev=43090&r1=43089&r2=43090&view=diff

==============================================================================
--- cfe/trunk/Lex/Lexer.cpp (original)
+++ cfe/trunk/Lex/Lexer.cpp Wed Oct 17 16:18:47 2007
@@ -163,6 +163,39 @@
 }
 
 
+/// MeasureTokenLength - Relex the token at the specified location and return
+/// its length in bytes in the input file.  If the token needs cleaning (e.g.
+/// includes a trigraph or an escaped newline) then this count includes bytes
+/// that are part of that.
+unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
+                                   const SourceManager &SM) {
+  // If this comes from a macro expansion, we really do want the macro name, not
+  // the token this macro expanded to.
+  Loc = SM.getLogicalLoc(Loc);
+  
+  const char *StrData = SM.getCharacterData(Loc);
+  
+  // TODO: this could be special cased for common tokens like identifiers, ')',
+  // etc to make this faster, if it mattered.  Just look at StrData[0] to handle
+  // all obviously single-char tokens.  This could use 
+  // Lexer::isObviouslySimpleCharacter for example to handle identifiers or
+  // something.
+  
+  
+  const char *BufEnd = SM.getBufferData(Loc.getFileID()).second;
+  
+  // Create a langops struct and enable trigraphs.  This is sufficient for
+  // measuring tokens.
+  LangOptions LangOpts;
+  LangOpts.Trigraphs = true;
+  
+  // Create a lexer starting at the beginning of this token.
+  Lexer TheLexer(Loc, LangOpts, StrData, BufEnd);
+  Token TheTok;
+  TheLexer.LexRawToken(TheTok);
+  return TheTok.getLength();
+}
+
 //===----------------------------------------------------------------------===//
 // Character information.
 //===----------------------------------------------------------------------===//

Modified: cfe/trunk/include/clang/Lex/Lexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/Lexer.h?rev=43090&r1=43089&r2=43090&view=diff

==============================================================================
--- cfe/trunk/include/clang/Lex/Lexer.h (original)
+++ cfe/trunk/include/clang/Lex/Lexer.h Wed Oct 17 16:18:47 2007
@@ -24,6 +24,7 @@
 
 namespace clang {
 class Diagnostic;
+class SourceManager;
 class Preprocessor;
 
 /// Lexer - This provides a simple interface that turns a text buffer into a
@@ -178,6 +179,13 @@
   /// and " characters.  This does not add surrounding ""'s to the string.
   static void Stringify(llvm::SmallVectorImpl<char> &Str);
   
+  /// MeasureTokenLength - Relex the token at the specified location and return
+  /// its length in bytes in the input file.  If the token needs cleaning (e.g.
+  /// includes a trigraph or an escaped newline) then this count includes bytes
+  /// that are part of that.
+  static unsigned MeasureTokenLength(SourceLocation Loc,
+                                     const SourceManager &SM);
+  
   //===--------------------------------------------------------------------===//
   // Internal implementation interfaces.
 private: