[cfe-commits] r119474 - in /cfe/trunk: docs/InternalsManual.html include/clang/Basic/SourceLocation.h include/clang/Lex/Lexer.h include/clang/Lex/Preprocessor.h lib/Lex/Lexer.cpp lib/Lex/LiteralSupport.cpp lib/Lex/Preprocessor.cpp

Tue Nov 16 23:05:50 PST 2010

Author: lattner
Date: Wed Nov 17 01:05:50 2010
New Revision: 119474

URL: http://llvm.org/viewvc/llvm-project?rev=119474&view=rev
Log:
move AdvanceToTokenCharacter and getLocForEndOfToken from
Preprocessor to Lexer where they make more sense.

Modified:
    cfe/trunk/docs/InternalsManual.html
    cfe/trunk/include/clang/Basic/SourceLocation.h
    cfe/trunk/include/clang/Lex/Lexer.h
    cfe/trunk/include/clang/Lex/Preprocessor.h
    cfe/trunk/lib/Lex/Lexer.cpp
    cfe/trunk/lib/Lex/LiteralSupport.cpp
    cfe/trunk/lib/Lex/Preprocessor.cpp

Modified: cfe/trunk/docs/InternalsManual.html
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/docs/InternalsManual.html?rev=119474&r1=119473&r2=119474&view=diff
==============================================================================

--- cfe/trunk/docs/InternalsManual.html (original)
+++ cfe/trunk/docs/InternalsManual.html Wed Nov 17 01:05:50 2010
@@ -565,7 +565,7 @@
 representation, the 'last' location needs to be adjusted to point to
 (or past) the end of that token with either
 <code>Lexer::MeasureTokenLength()</code> or
-<code>Preprocessor::getLocForEndOfToken()</code>. For the rare cases
+<code>Lexer::getLocForEndOfToken()</code>. For the rare cases
 where character-level source ranges information is needed we use
 the <code>CharSourceRange</code> class.</p>
 

Modified: cfe/trunk/include/clang/Basic/SourceLocation.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/SourceLocation.h?rev=119474&r1=119473&r2=119474&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/SourceLocation.h (original)
+++ cfe/trunk/include/clang/Basic/SourceLocation.h Wed Nov 17 01:05:50 2010
@@ -121,7 +121,6 @@
   /// directly.
   unsigned getRawEncoding() const { return ID; }
 
-
   /// getFromRawEncoding - Turn a raw encoding of a SourceLocation object into
   /// a real SourceLocation.
   static SourceLocation getFromRawEncoding(unsigned Encoding) {

Modified: cfe/trunk/include/clang/Lex/Lexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/Lexer.h?rev=119474&r1=119473&r2=119474&view=diff
==============================================================================
--- cfe/trunk/include/clang/Lex/Lexer.h (original)
+++ cfe/trunk/include/clang/Lex/Lexer.h Wed Nov 17 01:05:50 2010
@@ -228,6 +228,33 @@
                                             const SourceManager &SM,
                                             const LangOptions &LangOpts);
   
+  /// AdvanceToTokenCharacter - If the current SourceLocation specifies a
+  /// location at the start of a token, return a new location that specifies a
+  /// character within the token.  This handles trigraphs and escaped newlines.
+  static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
+                                                unsigned Character,
+                                                const SourceManager &SM,
+                                                const LangOptions &Features);
+  
+  /// \brief Computes the source location just past the end of the
+  /// token at this source location.
+  ///
+  /// This routine can be used to produce a source location that
+  /// points just past the end of the token referenced by \p Loc, and
+  /// is generally used when a diagnostic needs to point just after a
+  /// token where it expected something different that it received. If
+  /// the returned source location would not be meaningful (e.g., if
+  /// it points into a macro), this routine returns an invalid
+  /// source location.
+  ///
+  /// \param Offset an offset from the end of the token, where the source
+  /// location should refer to. The default offset (0) produces a source
+  /// location pointing just past the end of the token; an offset of 1 produces
+  /// a source location pointing to the last character in the token, etc.
+  static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
+                                            const SourceManager &SM,
+                                            const LangOptions &Features);
+    
   /// \brief Compute the preamble of the given file.
   ///
   /// The preamble of a file contains the initial comments, include directives,

Modified: cfe/trunk/include/clang/Lex/Preprocessor.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/Preprocessor.h?rev=119474&r1=119473&r2=119474&view=diff
==============================================================================
--- cfe/trunk/include/clang/Lex/Preprocessor.h (original)
+++ cfe/trunk/include/clang/Lex/Preprocessor.h Wed Nov 17 01:05:50 2010
@@ -717,7 +717,9 @@
   /// location should refer to. The default offset (0) produces a source
   /// location pointing just past the end of the token; an offset of 1 produces
   /// a source location pointing to the last character in the token, etc.
-  SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0);
+  SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
+    return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, Features);
+  }
 
   /// DumpToken - Print the token to stderr, used for debugging.
   ///
@@ -729,12 +731,8 @@
   /// token, return a new location that specifies a character within the token.
   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
                                          unsigned Char) const {
-    return AdvanceToTokenCharacter(FullSourceLoc(TokStart, SourceMgr), Char,
-                                   Features);
+    return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, Features);
   }
-  static FullSourceLoc AdvanceToTokenCharacter(FullSourceLoc TokStart,
-                                               unsigned Char,
-                                               const LangOptions &Features);
 
 
   /// IncrementPasteCounter - Increment the counters for the number of token

Modified: cfe/trunk/lib/Lex/Lexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Lexer.cpp?rev=119474&r1=119473&r2=119474&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/Lexer.cpp (original)
+++ cfe/trunk/lib/Lex/Lexer.cpp Wed Nov 17 01:05:50 2010
@@ -444,6 +444,83 @@
                                : TheTok.isAtStartOfLine());
 }
 
+
+/// AdvanceToTokenCharacter - Given a location that specifies the start of a
+/// token, return a new location that specifies a character within the token.
+SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
+                                              unsigned CharNo,
+                                              const SourceManager &SM,
+                                              const LangOptions &Features) {
+  // Figure out how many physical characters away the specified instantiation
+  // character is.  This needs to take into consideration newlines and
+  // trigraphs.
+  bool Invalid = false;
+  const char *TokPtr = SM.getCharacterData(TokStart, &Invalid);
+  
+  // If they request the first char of the token, we're trivially done.
+  if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
+    return TokStart;
+  
+  unsigned PhysOffset = 0;
+  
+  // The usual case is that tokens don't contain anything interesting.  Skip
+  // over the uninteresting characters.  If a token only consists of simple
+  // chars, this method is extremely fast.
+  while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
+    if (CharNo == 0)
+      return TokStart.getFileLocWithOffset(PhysOffset);
+    ++TokPtr, --CharNo, ++PhysOffset;
+  }
+  
+  // If we have a character that may be a trigraph or escaped newline, use a
+  // lexer to parse it correctly.
+  for (; CharNo; --CharNo) {
+    unsigned Size;
+    Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features);
+    TokPtr += Size;
+    PhysOffset += Size;
+  }
+  
+  // Final detail: if we end up on an escaped newline, we want to return the
+  // location of the actual byte of the token.  For example foo\<newline>bar
+  // advanced by 3 should return the location of b, not of \\.  One compounding
+  // detail of this is that the escape may be made by a trigraph.
+  if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
+    PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
+  
+  return TokStart.getFileLocWithOffset(PhysOffset);
+}
+
+/// \brief Computes the source location just past the end of the
+/// token at this source location.
+///
+/// This routine can be used to produce a source location that
+/// points just past the end of the token referenced by \p Loc, and
+/// is generally used when a diagnostic needs to point just after a
+/// token where it expected something different that it received. If
+/// the returned source location would not be meaningful (e.g., if
+/// it points into a macro), this routine returns an invalid
+/// source location.
+///
+/// \param Offset an offset from the end of the token, where the source
+/// location should refer to. The default offset (0) produces a source
+/// location pointing just past the end of the token; an offset of 1 produces
+/// a source location pointing to the last character in the token, etc.
+SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
+                                          const SourceManager &SM,
+                                          const LangOptions &Features) {
+  if (Loc.isInvalid() || !Loc.isFileID())
+    return SourceLocation();
+  
+  unsigned Len = Lexer::MeasureTokenLength(Loc, SM, Features);
+  if (Len > Offset)
+    Len = Len - Offset;
+  else
+    return Loc;
+  
+  return AdvanceToTokenCharacter(Loc, Len, SM, Features);
+}
+
 //===----------------------------------------------------------------------===//
 // Character information.
 //===----------------------------------------------------------------------===//

Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=119474&r1=119473&r2=119474&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/LiteralSupport.cpp (original)
+++ cfe/trunk/lib/Lex/LiteralSupport.cpp Wed Nov 17 01:05:50 2010
@@ -194,9 +194,11 @@
   // If we didn't consume the proper number of digits, there is a problem.
   if (UcnLenSave) {
     if (Diags) {
-      Loc = Preprocessor::AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin,
-                                                  Features);
-      Diags->Report(Loc, diag::err_ucn_escape_incomplete);
+      SourceLocation L =
+        Lexer::AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin,
+                                       Loc.getManager(), Features);
+      Diags->Report(FullSourceLoc(L, Loc.getManager()),
+                    diag::err_ucn_escape_incomplete);
     }
     return false;
   }

Modified: cfe/trunk/lib/Lex/Preprocessor.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Preprocessor.cpp?rev=119474&r1=119473&r2=119474&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/Preprocessor.cpp (original)
+++ cfe/trunk/lib/Lex/Preprocessor.cpp Wed Nov 17 01:05:50 2010
@@ -429,68 +429,6 @@
 }
 
 
-/// AdvanceToTokenCharacter - Given a location that specifies the start of a
-/// token, return a new location that specifies a character within the token.
-FullSourceLoc Preprocessor::AdvanceToTokenCharacter(FullSourceLoc TokStart,
-                                                    unsigned CharNo,
-                                                  const LangOptions &Features) {
-  // Figure out how many physical characters away the specified instantiation
-  // character is.  This needs to take into consideration newlines and
-  // trigraphs.
-  bool Invalid = false;
-  const char *TokPtr = TokStart.getCharacterData(&Invalid);
-
-  // If they request the first char of the token, we're trivially done.
-  if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
-    return TokStart;
-
-  unsigned PhysOffset = 0;
-
-  // The usual case is that tokens don't contain anything interesting.  Skip
-  // over the uninteresting characters.  If a token only consists of simple
-  // chars, this method is extremely fast.
-  while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
-    if (CharNo == 0)
-      return FullSourceLoc(TokStart.getFileLocWithOffset(PhysOffset),
-                           TokStart.getManager());
-    ++TokPtr, --CharNo, ++PhysOffset;
-  }
-
-  // If we have a character that may be a trigraph or escaped newline, use a
-  // lexer to parse it correctly.
-  for (; CharNo; --CharNo) {
-    unsigned Size;
-    Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features);
-    TokPtr += Size;
-    PhysOffset += Size;
-  }
-
-  // Final detail: if we end up on an escaped newline, we want to return the
-  // location of the actual byte of the token.  For example foo\<newline>bar
-  // advanced by 3 should return the location of b, not of \\.  One compounding
-  // detail of this is that the escape may be made by a trigraph.
-  if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
-    PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
-
-  return FullSourceLoc(TokStart.getFileLocWithOffset(PhysOffset),
-                       TokStart.getManager());
-}
-
-SourceLocation Preprocessor::getLocForEndOfToken(SourceLocation Loc,
-                                                 unsigned Offset) {
-  if (Loc.isInvalid() || !Loc.isFileID())
-    return SourceLocation();
-
-  unsigned Len = Lexer::MeasureTokenLength(Loc, getSourceManager(), Features);
-  if (Len > Offset)
-    Len = Len - Offset;
-  else
-    return Loc;
-
-  return AdvanceToTokenCharacter(Loc, Len);
-}
-
-
 
 //===----------------------------------------------------------------------===//
 // Preprocessor Initialization Methods