[cfe-commits] r64418 - in /cfe/trunk: Driver/PrintPreprocessedOutput.cpp clang.xcodeproj/project.pbxproj include/clang/Lex/TokenConcatenation.h lib/Lex/TokenConcatenation.cpp
Chris Lattner
sabre at nondot.org
Thu Feb 12 16:46:04 PST 2009
Author: lattner
Date: Thu Feb 12 18:46:04 2009
New Revision: 64418
URL: http://llvm.org/viewvc/llvm-project?rev=64418&view=rev
Log:
factor token concatenation avoidance logic out of
PrintPreprocessedOutput into its own file. No functionality change.
Added:
cfe/trunk/include/clang/Lex/TokenConcatenation.h
cfe/trunk/lib/Lex/TokenConcatenation.cpp
Modified:
cfe/trunk/Driver/PrintPreprocessedOutput.cpp
cfe/trunk/clang.xcodeproj/project.pbxproj
Modified: cfe/trunk/Driver/PrintPreprocessedOutput.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/Driver/PrintPreprocessedOutput.cpp?rev=64418&r1=64417&r2=64418&view=diff
==============================================================================
--- cfe/trunk/Driver/PrintPreprocessedOutput.cpp (original)
+++ cfe/trunk/Driver/PrintPreprocessedOutput.cpp Thu Feb 12 18:46:04 2009
@@ -12,11 +12,13 @@
//
//===----------------------------------------------------------------------===//
+#include "clang/Basic/SourceManager.h"
#include "clang.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/Pragma.h"
+#include "clang/Lex/TokenConcatenation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Diagnostic.h"
#include "llvm/ADT/SmallString.h"
@@ -48,6 +50,7 @@
namespace {
class PrintPPOutputPPCallbacks : public PPCallbacks {
Preprocessor &PP;
+ TokenConcatenation ConcatInfo;
public:
llvm::raw_ostream &OS;
private:
@@ -58,7 +61,7 @@
bool Initialized;
public:
PrintPPOutputPPCallbacks(Preprocessor &pp, llvm::raw_ostream &os)
- : PP(pp), OS(os) {
+ : PP(pp), ConcatInfo(PP), OS(os) {
CurLine = 0;
CurFilename += "<uninit>";
EmittedTokensOnThisLine = false;
@@ -78,7 +81,9 @@
bool HandleFirstTokOnLine(Token &Tok);
bool MoveToLine(SourceLocation Loc);
- bool AvoidConcat(const Token &PrevTok, const Token &Tok);
+ bool AvoidConcat(const Token &PrevTok, const Token &Tok) {
+ return ConcatInfo.AvoidConcat(PrevTok, Tok);
+ }
void WriteLineInfo(unsigned LineNo, const char *Extra=0, unsigned ExtraLen=0);
};
} // end anonymous namespace
@@ -291,222 +296,6 @@
} // end anonymous namespace
-enum AvoidConcatInfo {
- /// By default, a token never needs to avoid concatenation. Most tokens (e.g.
- /// ',', ')', etc) don't cause a problem when concatenated.
- aci_never_avoid_concat = 0,
-
- /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
- /// token's requirements, and it needs to know the first character of the
- /// token.
- aci_custom_firstchar = 1,
-
- /// aci_custom - AvoidConcat contains custom code to handle this token's
- /// requirements, but it doesn't need to know the first character of the
- /// token.
- aci_custom = 2,
-
- /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
- /// character. For example, "<<" turns into "<<=" when followed by an =.
- aci_avoid_equal = 4
-};
-
-/// This array contains information for each token on what action to take when
-/// avoiding concatenation of tokens in the AvoidConcat method.
-static char TokenInfo[tok::NUM_TOKENS];
-
-/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
-/// marked by this function.
-static void InitAvoidConcatTokenInfo() {
- // These tokens have custom code in AvoidConcat.
- TokenInfo[tok::identifier ] |= aci_custom;
- TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
- TokenInfo[tok::period ] |= aci_custom_firstchar;
- TokenInfo[tok::amp ] |= aci_custom_firstchar;
- TokenInfo[tok::plus ] |= aci_custom_firstchar;
- TokenInfo[tok::minus ] |= aci_custom_firstchar;
- TokenInfo[tok::slash ] |= aci_custom_firstchar;
- TokenInfo[tok::less ] |= aci_custom_firstchar;
- TokenInfo[tok::greater ] |= aci_custom_firstchar;
- TokenInfo[tok::pipe ] |= aci_custom_firstchar;
- TokenInfo[tok::percent ] |= aci_custom_firstchar;
- TokenInfo[tok::colon ] |= aci_custom_firstchar;
- TokenInfo[tok::hash ] |= aci_custom_firstchar;
- TokenInfo[tok::arrow ] |= aci_custom_firstchar;
-
- // These tokens change behavior if followed by an '='.
- TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
- TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
- TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
- TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
- TokenInfo[tok::less ] |= aci_avoid_equal; // <=
- TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
- TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
- TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
- TokenInfo[tok::star ] |= aci_avoid_equal; // *=
- TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
- TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
- TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
- TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
- TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
-}
-
-/// StartsWithL - Return true if the spelling of this token starts with 'L'.
-static bool StartsWithL(const Token &Tok, Preprocessor &PP) {
- if (!Tok.needsCleaning()) {
- SourceManager &SrcMgr = PP.getSourceManager();
- return *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()))
- == 'L';
- }
-
- if (Tok.getLength() < 256) {
- char Buffer[256];
- const char *TokPtr = Buffer;
- PP.getSpelling(Tok, TokPtr);
- return TokPtr[0] == 'L';
- }
-
- return PP.getSpelling(Tok)[0] == 'L';
-}
-
-/// IsIdentifierL - Return true if the spelling of this token is literally 'L'.
-static bool IsIdentifierL(const Token &Tok, Preprocessor &PP) {
- if (!Tok.needsCleaning()) {
- if (Tok.getLength() != 1)
- return false;
- SourceManager &SrcMgr = PP.getSourceManager();
- return *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()))
- == 'L';
- }
-
- if (Tok.getLength() < 256) {
- char Buffer[256];
- const char *TokPtr = Buffer;
- if (PP.getSpelling(Tok, TokPtr) != 1)
- return false;
- return TokPtr[0] == 'L';
- }
-
- return PP.getSpelling(Tok) == "L";
-}
-
-
-/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
-/// the two individual tokens to be lexed as a single token, return true (which
-/// causes a space to be printed between them). This allows the output of -E
-/// mode to be lexed to the same token stream as lexing the input directly
-/// would.
-///
-/// This code must conservatively return true if it doesn't want to be 100%
-/// accurate. This will cause the output to include extra space characters, but
-/// the resulting output won't have incorrect concatenations going on. Examples
-/// include "..", which we print with a space between, because we don't want to
-/// track enough to tell "x.." from "...".
-bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
- const Token &Tok) {
- char Buffer[256];
-
- tok::TokenKind PrevKind = PrevTok.getKind();
- if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
- PrevKind = tok::identifier;
-
- // Look up information on when we should avoid concatenation with prevtok.
- unsigned ConcatInfo = TokenInfo[PrevKind];
-
- // If prevtok never causes a problem for anything after it, return quickly.
- if (ConcatInfo == 0) return false;
-
- if (ConcatInfo & aci_avoid_equal) {
- // If the next token is '=' or '==', avoid concatenation.
- if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
- return true;
- ConcatInfo &= ~aci_avoid_equal;
- }
-
- if (ConcatInfo == 0) return false;
-
-
-
- // Basic algorithm: we look at the first character of the second token, and
- // determine whether it, if appended to the first token, would form (or would
- // contribute) to a larger token if concatenated.
- char FirstChar = 0;
- if (ConcatInfo & aci_custom) {
- // If the token does not need to know the first character, don't get it.
- } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
- // Avoid spelling identifiers, the most common form of token.
- FirstChar = II->getName()[0];
- } else if (!Tok.needsCleaning()) {
- if (Tok.isLiteral() && Tok.getLiteralData()) {
- FirstChar = *Tok.getLiteralData();
- } else {
- SourceManager &SrcMgr = PP.getSourceManager();
- FirstChar =
- *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()));
- }
- } else if (Tok.getLength() < 256) {
- const char *TokPtr = Buffer;
- PP.getSpelling(Tok, TokPtr);
- FirstChar = TokPtr[0];
- } else {
- FirstChar = PP.getSpelling(Tok)[0];
- }
-
- switch (PrevKind) {
- default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
- case tok::identifier: // id+id or id+number or id+L"foo".
- if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
- Tok.is(tok::wide_string_literal) /* ||
- Tok.is(tok::wide_char_literal)*/)
- return true;
-
- // If this isn't identifier + string, we're done.
- if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
- return false;
-
- // FIXME: need a wide_char_constant!
-
- // If the string was a wide string L"foo" or wide char L'f', it would concat
- // with the previous identifier into fooL"bar". Avoid this.
- if (StartsWithL(Tok, PP))
- return true;
-
- // Otherwise, this is a narrow character or string. If the *identifier* is
- // a literal 'L', avoid pasting L "foo" -> L"foo".
- return IsIdentifierL(PrevTok, PP);
- case tok::numeric_constant:
- return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
- FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
- case tok::period: // ..., .*, .1234
- return FirstChar == '.' || isdigit(FirstChar) ||
- (FirstChar == '*' && PP.getLangOptions().CPlusPlus);
- case tok::amp: // &&
- return FirstChar == '&';
- case tok::plus: // ++
- return FirstChar == '+';
- case tok::minus: // --, ->, ->*
- return FirstChar == '-' || FirstChar == '>';
- case tok::slash: //, /*, //
- return FirstChar == '*' || FirstChar == '/';
- case tok::less: // <<, <<=, <:, <%
- return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
- case tok::greater: // >>, >>=
- return FirstChar == '>';
- case tok::pipe: // ||
- return FirstChar == '|';
- case tok::percent: // %>, %:
- return (FirstChar == '>' || FirstChar == ':') &&
- PP.getLangOptions().Digraphs;
- case tok::colon: // ::, :>
- return (FirstChar == ':' && PP.getLangOptions().CPlusPlus) ||
- (FirstChar == '>' && PP.getLangOptions().Digraphs);
- case tok::hash: // ##, #@, %:%:
- return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
- case tok::arrow: // ->*
- return FirstChar == '*';
- }
-}
-
static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
PrintPPOutputPPCallbacks *Callbacks,
llvm::raw_ostream &OS) {
@@ -614,8 +403,6 @@
// Inform the preprocessor whether we want it to retain comments or not, due
// to -C or -CC.
PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
- InitAvoidConcatTokenInfo();
-
// Open the output buffer.
std::string Err;
@@ -646,8 +433,8 @@
PrintMacroDefinition(*MacrosByID[i].first, *MacrosByID[i].second, PP, OS);
} else {
- PrintPPOutputPPCallbacks *Callbacks;
- Callbacks = new PrintPPOutputPPCallbacks(PP, OS);
+ PrintPPOutputPPCallbacks *Callbacks
+ = new PrintPPOutputPPCallbacks(PP, OS);
PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",
Callbacks));
Modified: cfe/trunk/clang.xcodeproj/project.pbxproj
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/clang.xcodeproj/project.pbxproj?rev=64418&r1=64417&r2=64418&view=diff
==============================================================================
--- cfe/trunk/clang.xcodeproj/project.pbxproj (original)
+++ cfe/trunk/clang.xcodeproj/project.pbxproj Thu Feb 12 18:46:04 2009
@@ -177,6 +177,7 @@
DEAEE98B0A5A2B970045101B /* MultipleIncludeOpt.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEAEE98A0A5A2B970045101B /* MultipleIncludeOpt.h */; };
DEAEED4B0A5AF89A0045101B /* NOTES.txt in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEAEED4A0A5AF89A0045101B /* NOTES.txt */; };
DEB076CF0F3A222200F5A2BE /* DeclTemplate.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEB076CE0F3A222200F5A2BE /* DeclTemplate.cpp */; };
+ DEB077990F44F97800F5A2BE /* TokenConcatenation.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEB077980F44F97800F5A2BE /* TokenConcatenation.cpp */; };
DEC63B1A0C7B940200DBF169 /* CFG.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEC63B190C7B940200DBF169 /* CFG.cpp */; };
DEC63B1C0C7B940600DBF169 /* CFG.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEC63B1B0C7B940600DBF169 /* CFG.h */; };
DEC8D9910A9433CD00353FCA /* Decl.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEC8D9900A9433CD00353FCA /* Decl.h */; };
@@ -551,6 +552,8 @@
DEAEED4A0A5AF89A0045101B /* NOTES.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = NOTES.txt; sourceTree = "<group>"; };
DEB076C90F3A221200F5A2BE /* DeclTemplate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DeclTemplate.h; path = clang/AST/DeclTemplate.h; sourceTree = "<group>"; };
DEB076CE0F3A222200F5A2BE /* DeclTemplate.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = DeclTemplate.cpp; path = lib/AST/DeclTemplate.cpp; sourceTree = "<group>"; };
+ DEB077930F44F96000F5A2BE /* TokenConcatenation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TokenConcatenation.h; sourceTree = "<group>"; };
+ DEB077980F44F97800F5A2BE /* TokenConcatenation.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TokenConcatenation.cpp; sourceTree = "<group>"; };
DEB089EE0F12F1D900522C07 /* TypeTraits.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TypeTraits.h; sourceTree = "<group>"; };
DEC63B190C7B940200DBF169 /* CFG.cpp */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 2; lastKnownFileType = sourcecode.cpp.cpp; name = CFG.cpp; path = lib/AST/CFG.cpp; sourceTree = "<group>"; tabWidth = 2; };
DEC63B1B0C7B940600DBF169 /* CFG.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 2; lastKnownFileType = sourcecode.c.h; name = CFG.h; path = clang/AST/CFG.h; sourceTree = "<group>"; tabWidth = 2; };
@@ -1119,6 +1122,7 @@
35B820740ECB811A0020BEC0 /* PreprocessorLexer.h */,
DED7D9170A52518C003AD0FB /* ScratchBuffer.h */,
DE6954630C5121BD00A5826B /* Token.h */,
+ DEB077930F44F96000F5A2BE /* TokenConcatenation.h */,
DE85CD840D8380F20070E26E /* TokenLexer.h */,
);
name = Lex;
@@ -1162,6 +1166,7 @@
3537AA0D0ECD08A4008F7CDC /* PreprocessorLexer.cpp */,
35E1946C0ECB83C100F21733 /* PTHLexer.cpp */,
DED7D9E40A5257F6003AD0FB /* ScratchBuffer.cpp */,
+ DEB077980F44F97800F5A2BE /* TokenConcatenation.cpp */,
DE85CD800D8380B10070E26E /* TokenLexer.cpp */,
);
name = Lex;
@@ -1395,6 +1400,7 @@
357EA27D0F2526F300439B60 /* SemaLookup.cpp in Sources */,
DEB076CF0F3A222200F5A2BE /* DeclTemplate.cpp in Sources */,
1A471AB50F437BC500753CE8 /* CGBlocks.cpp in Sources */,
+ DEB077990F44F97800F5A2BE /* TokenConcatenation.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Added: cfe/trunk/include/clang/Lex/TokenConcatenation.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/TokenConcatenation.h?rev=64418&view=auto
==============================================================================
--- cfe/trunk/include/clang/Lex/TokenConcatenation.h (added)
+++ cfe/trunk/include/clang/Lex/TokenConcatenation.h Thu Feb 12 18:46:04 2009
@@ -0,0 +1,73 @@
+//===--- TokenConcatenation.h - Token Concatenation Avoidance ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TokenConcatenation class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_LEX_TOKEN_CONCATENATION_H
+#define CLANG_LEX_TOKEN_CONCATENATION_H
+
+#include "clang/Basic/TokenKinds.h"
+
+namespace clang {
+ class Preprocessor;
+ class Token;
+
+ /// TokenConcatenation class, which answers the question of
+ /// "Is it safe to emit two tokens without a whitespace between them, or
+ /// would that cause implicit concatenation of the tokens?"
+ ///
+ /// For example, it emitting two identifiers "foo" and "bar" next to each
+ /// other would cause the lexer to produce one "foobar" token. Emitting "1"
+ /// and ")" next to each other is safe.
+ ///
+ class TokenConcatenation {
+ Preprocessor &PP;
+
+ enum AvoidConcatInfo {
+ /// By default, a token never needs to avoid concatenation. Most tokens
+ /// (e.g. ',', ')', etc) don't cause a problem when concatenated.
+ aci_never_avoid_concat = 0,
+
+ /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
+ /// token's requirements, and it needs to know the first character of the
+ /// token.
+ aci_custom_firstchar = 1,
+
+ /// aci_custom - AvoidConcat contains custom code to handle this token's
+ /// requirements, but it doesn't need to know the first character of the
+ /// token.
+ aci_custom = 2,
+
+ /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
+ /// character. For example, "<<" turns into "<<=" when followed by an =.
+ aci_avoid_equal = 4
+ };
+
+ /// TokenInfo - This array contains information for each token on what
+ /// action to take when avoiding concatenation of tokens in the AvoidConcat
+ /// method.
+ char TokenInfo[tok::NUM_TOKENS];
+ public:
+ TokenConcatenation(Preprocessor &PP);
+
+ bool AvoidConcat(const Token &PrevTok, const Token &Tok) const;
+
+ private:
+ /// StartsWithL - Return true if the spelling of this token starts with 'L'.
+ bool StartsWithL(const Token &Tok) const;
+
+ /// IsIdentifierL - Return true if the spelling of this token is literally
+ /// 'L'.
+ bool IsIdentifierL(const Token &Tok) const;
+ };
+ } // end clang namespace
+
+#endif
Added: cfe/trunk/lib/Lex/TokenConcatenation.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/TokenConcatenation.cpp?rev=64418&view=auto
==============================================================================
--- cfe/trunk/lib/Lex/TokenConcatenation.cpp (added)
+++ cfe/trunk/lib/Lex/TokenConcatenation.cpp Thu Feb 12 18:46:04 2009
@@ -0,0 +1,205 @@
+//===--- TokenConcatenation.cpp - Token Concatenation Avoidance -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TokenConcatenation class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/TokenConcatenation.h"
+#include "clang/Lex/Preprocessor.h"
+using namespace clang;
+
+
+/// StartsWithL - Return true if the spelling of this token starts with 'L'.
+bool TokenConcatenation::StartsWithL(const Token &Tok) const {
+ if (!Tok.needsCleaning()) {
+ SourceManager &SM = PP.getSourceManager();
+ return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
+ }
+
+ if (Tok.getLength() < 256) {
+ char Buffer[256];
+ const char *TokPtr = Buffer;
+ PP.getSpelling(Tok, TokPtr);
+ return TokPtr[0] == 'L';
+ }
+
+ return PP.getSpelling(Tok)[0] == 'L';
+}
+
+/// IsIdentifierL - Return true if the spelling of this token is literally
+/// 'L'.
+bool TokenConcatenation::IsIdentifierL(const Token &Tok) const {
+ if (!Tok.needsCleaning()) {
+ if (Tok.getLength() != 1)
+ return false;
+ SourceManager &SM = PP.getSourceManager();
+ return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
+ }
+
+ if (Tok.getLength() < 256) {
+ char Buffer[256];
+ const char *TokPtr = Buffer;
+ if (PP.getSpelling(Tok, TokPtr) != 1)
+ return false;
+ return TokPtr[0] == 'L';
+ }
+
+ return PP.getSpelling(Tok) == "L";
+}
+
+TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
+ memset(TokenInfo, 0, sizeof(TokenInfo));
+
+ // These tokens have custom code in AvoidConcat.
+ TokenInfo[tok::identifier ] |= aci_custom;
+ TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
+ TokenInfo[tok::period ] |= aci_custom_firstchar;
+ TokenInfo[tok::amp ] |= aci_custom_firstchar;
+ TokenInfo[tok::plus ] |= aci_custom_firstchar;
+ TokenInfo[tok::minus ] |= aci_custom_firstchar;
+ TokenInfo[tok::slash ] |= aci_custom_firstchar;
+ TokenInfo[tok::less ] |= aci_custom_firstchar;
+ TokenInfo[tok::greater ] |= aci_custom_firstchar;
+ TokenInfo[tok::pipe ] |= aci_custom_firstchar;
+ TokenInfo[tok::percent ] |= aci_custom_firstchar;
+ TokenInfo[tok::colon ] |= aci_custom_firstchar;
+ TokenInfo[tok::hash ] |= aci_custom_firstchar;
+ TokenInfo[tok::arrow ] |= aci_custom_firstchar;
+
+ // These tokens change behavior if followed by an '='.
+ TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
+ TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
+ TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
+ TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
+ TokenInfo[tok::less ] |= aci_avoid_equal; // <=
+ TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
+ TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
+ TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
+ TokenInfo[tok::star ] |= aci_avoid_equal; // *=
+ TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
+ TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
+ TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
+ TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
+ TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
+}
+
+/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
+/// the two individual tokens to be lexed as a single token, return true
+/// (which causes a space to be printed between them). This allows the output
+/// of -E mode to be lexed to the same token stream as lexing the input
+/// directly would.
+///
+/// This code must conservatively return true if it doesn't want to be 100%
+/// accurate. This will cause the output to include extra space characters,
+/// but the resulting output won't have incorrect concatenations going on.
+/// Examples include "..", which we print with a space between, because we
+/// don't want to track enough to tell "x.." from "...".
+bool TokenConcatenation::AvoidConcat(const Token &PrevTok,
+ const Token &Tok) const {
+ char Buffer[256];
+
+ tok::TokenKind PrevKind = PrevTok.getKind();
+ if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
+ PrevKind = tok::identifier;
+
+ // Look up information on when we should avoid concatenation with prevtok.
+ unsigned ConcatInfo = TokenInfo[PrevKind];
+
+ // If prevtok never causes a problem for anything after it, return quickly.
+ if (ConcatInfo == 0) return false;
+
+ if (ConcatInfo & aci_avoid_equal) {
+ // If the next token is '=' or '==', avoid concatenation.
+ if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
+ return true;
+ ConcatInfo &= ~aci_avoid_equal;
+ }
+
+ if (ConcatInfo == 0) return false;
+
+ // Basic algorithm: we look at the first character of the second token, and
+ // determine whether it, if appended to the first token, would form (or
+ // would contribute) to a larger token if concatenated.
+ char FirstChar = 0;
+ if (ConcatInfo & aci_custom) {
+ // If the token does not need to know the first character, don't get it.
+ } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
+ // Avoid spelling identifiers, the most common form of token.
+ FirstChar = II->getName()[0];
+ } else if (!Tok.needsCleaning()) {
+ if (Tok.isLiteral() && Tok.getLiteralData()) {
+ FirstChar = *Tok.getLiteralData();
+ } else {
+ SourceManager &SrcMgr = PP.getSourceManager();
+ FirstChar =
+ *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()));
+ }
+ } else if (Tok.getLength() < 256) {
+ const char *TokPtr = Buffer;
+ PP.getSpelling(Tok, TokPtr);
+ FirstChar = TokPtr[0];
+ } else {
+ FirstChar = PP.getSpelling(Tok)[0];
+ }
+
+ switch (PrevKind) {
+ default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
+ case tok::identifier: // id+id or id+number or id+L"foo".
+ if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
+ Tok.is(tok::wide_string_literal) /* ||
+ Tok.is(tok::wide_char_literal)*/)
+ return true;
+
+ // If this isn't identifier + string, we're done.
+ if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
+ return false;
+
+ // FIXME: need a wide_char_constant!
+
+ // If the string was a wide string L"foo" or wide char L'f', it would
+ // concat with the previous identifier into fooL"bar". Avoid this.
+ if (StartsWithL(Tok))
+ return true;
+
+ // Otherwise, this is a narrow character or string. If the *identifier*
+ // is a literal 'L', avoid pasting L "foo" -> L"foo".
+ return IsIdentifierL(PrevTok);
+ case tok::numeric_constant:
+ return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
+ FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
+ case tok::period: // ..., .*, .1234
+ return FirstChar == '.' || isdigit(FirstChar) ||
+ (FirstChar == '*' && PP.getLangOptions().CPlusPlus);
+ case tok::amp: // &&
+ return FirstChar == '&';
+ case tok::plus: // ++
+ return FirstChar == '+';
+ case tok::minus: // --, ->, ->*
+ return FirstChar == '-' || FirstChar == '>';
+ case tok::slash: //, /*, //
+ return FirstChar == '*' || FirstChar == '/';
+ case tok::less: // <<, <<=, <:, <%
+ return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
+ case tok::greater: // >>, >>=
+ return FirstChar == '>';
+ case tok::pipe: // ||
+ return FirstChar == '|';
+ case tok::percent: // %>, %:
+ return (FirstChar == '>' || FirstChar == ':') &&
+ PP.getLangOptions().Digraphs;
+ case tok::colon: // ::, :>
+ return (FirstChar == ':' && PP.getLangOptions().CPlusPlus) ||
+ (FirstChar == '>' && PP.getLangOptions().Digraphs);
+ case tok::hash: // ##, #@, %:%:
+ return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
+ case tok::arrow: // ->*
+ return FirstChar == '*';
+ }
+}
More information about the cfe-commits
mailing list