[cfe-commits] r38753 - in /cfe/cfe/trunk: Lex/MacroExpander.cpp Lex/Preprocessor.cpp include/clang/Lex/MacroExpander.h
sabre at cs.uiuc.edu
sabre at cs.uiuc.edu
Wed Jul 11 09:24:20 PDT 2007
Author: sabre
Date: Wed Jul 11 11:24:20 2007
New Revision: 38753
URL: http://llvm.org/viewvc/llvm-project?rev=38753&view=rev
Log:
speed up a brutal macro-expansion torture test by about 30% (1.5 -> 1.0s)
by turning vectors of vectors into a single vector, reducing pressure on
malloc. This can still be improved.
Modified:
cfe/cfe/trunk/Lex/MacroExpander.cpp
cfe/cfe/trunk/Lex/Preprocessor.cpp
cfe/cfe/trunk/include/clang/Lex/MacroExpander.h
Modified: cfe/cfe/trunk/Lex/MacroExpander.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/MacroExpander.cpp?rev=38753&r1=38752&r2=38753&view=diff
==============================================================================
--- cfe/cfe/trunk/Lex/MacroExpander.cpp (original)
+++ cfe/cfe/trunk/Lex/MacroExpander.cpp Wed Jul 11 11:24:20 2007
@@ -24,45 +24,34 @@
// MacroArgs Implementation
//===----------------------------------------------------------------------===//
-MacroArgs::MacroArgs(const MacroInfo *MI) {
+MacroArgs::MacroArgs(const MacroInfo *MI, std::vector<LexerToken> &UnexpArgs) {
assert(MI->isFunctionLike() &&
"Can't have args for an object-like macro!");
- // Reserve space for arguments to avoid reallocation.
- unsigned NumArgs = MI->getNumArgs();
- if (MI->isC99Varargs() || MI->isGNUVarargs())
- NumArgs += 3; // Varargs can have more than this, just some guess.
-
- UnexpArgTokens.reserve(NumArgs);
-}
-
-/// addArgument - Add an argument for this invocation. This method destroys
-/// the vector passed in to avoid extraneous memory copies. This adds the EOF
-/// token to the end of the argument list as a marker. 'Loc' specifies a
-/// location at the end of the argument, e.g. the ',' token or the ')'.
-void MacroArgs::addArgument(std::vector<LexerToken> &ArgToks,
- SourceLocation Loc) {
- UnexpArgTokens.push_back(std::vector<LexerToken>());
- UnexpArgTokens.back().swap(ArgToks);
-
- // Add a marker EOF token to the end of the argument list, useful for handling
- // empty arguments and macro pre-expansion.
- LexerToken EOFTok;
- EOFTok.StartToken();
- EOFTok.SetKind(tok::eof);
- EOFTok.SetLocation(Loc);
- EOFTok.SetLength(0);
- UnexpArgTokens.back().push_back(EOFTok);
+ UnexpArgTokens.swap(UnexpArgs);
}
+/// getUnexpArgument - Return the unexpanded tokens for the specified formal.
+///
+const LexerToken *MacroArgs::getUnexpArgument(unsigned Arg) const {
+ // Scan to find Arg.
+ const LexerToken *Start = &UnexpArgTokens[0];
+ const LexerToken *Result = Start;
+ for (; Arg; ++Result) {
+ assert(Result < Start+UnexpArgTokens.size() && "Invalid arg #");
+ if (Result->getKind() == tok::eof)
+ --Arg;
+ }
+ return Result;
+}
+
+
/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
/// by pre-expansion, return false. Otherwise, conservatively return true.
-bool MacroArgs::ArgNeedsPreexpansion(unsigned ArgNo) const {
- const std::vector<LexerToken> &ArgTokens = getUnexpArgument(ArgNo);
-
+bool MacroArgs::ArgNeedsPreexpansion(const LexerToken *ArgTok) const {
// If there are no identifiers in the argument list, or if the identifiers are
// known to not be macros, pre-expansion won't modify it.
- for (unsigned i = 0, e = ArgTokens.size()-1; i != e; ++i)
- if (IdentifierInfo *II = ArgTokens[i].getIdentifierInfo()) {
+ for (; ArgTok->getKind() != tok::eof; ++ArgTok)
+ if (IdentifierInfo *II = ArgTok->getIdentifierInfo()) {
if (II->getMacroInfo() && II->getMacroInfo()->isEnabled())
// Return true even though the macro could be a function-like macro
// without a following '(' token.
@@ -84,11 +73,21 @@
std::vector<LexerToken> &Result = PreExpArgTokens[Arg];
if (!Result.empty()) return Result;
+ // FIXME
+ // FIXME: Don't require copying into a temporary vector!!!
+ // FIXME
+
+ std::vector<LexerToken> UnexpArgToks;
+ const LexerToken *AT = getUnexpArgument(Arg);
+ for (; AT->getKind() != tok::eof; ++AT)
+ UnexpArgToks.push_back(*AT);
+ UnexpArgToks.push_back(*AT); // push the EOF too.
+
// Otherwise, we have to pre-expand this argument, populating Result. To do
// this, we set up a fake MacroExpander to lex from the unexpanded argument
// list. With this installed, we lex expanded tokens until we hit the EOF
// token at the end of the unexp list.
- PP.EnterTokenStream(UnexpArgTokens[Arg]);
+ PP.EnterTokenStream(UnexpArgToks);
// Lex all of the macro-expanded tokens into Result.
do {
@@ -110,19 +109,23 @@
/// tokens into the literal string token that should be produced by the C #
/// preprocessor operator.
///
-static LexerToken StringifyArgument(const std::vector<LexerToken> &Toks,
+static LexerToken StringifyArgument(const LexerToken *ArgToks,
Preprocessor &PP, bool Charify = false) {
LexerToken Tok;
Tok.StartToken();
Tok.SetKind(tok::string_literal);
+ const LexerToken *ArgTokStart = ArgToks;
+
// Stringify all the tokens.
std::string Result = "\"";
// FIXME: Optimize this loop to not use std::strings.
- for (unsigned i = 0, e = Toks.size()-1 /*no eof*/; i != e; ++i) {
- const LexerToken &Tok = Toks[i];
- if (i != 0 && Tok.hasLeadingSpace())
+ bool isFirst = true;
+ for (; ArgToks->getKind() != tok::eof; ++ArgToks) {
+ const LexerToken &Tok = *ArgToks;
+ if (!isFirst && Tok.hasLeadingSpace())
Result += ' ';
+ isFirst = false;
// If this is a string or character constant, escape the token as specified
// by 6.10.3.2p2.
@@ -146,7 +149,7 @@
--FirstNonSlash;
if ((Result.size()-1-FirstNonSlash) & 1) {
// Diagnose errors for things like: #define F(X) #X / F(\)
- PP.Diag(Toks.back(), diag::pp_invalid_string_literal);
+ PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal);
Result.erase(Result.end()-1); // remove one of the \'s.
}
}
@@ -168,8 +171,7 @@
}
if (isBad) {
- assert(!Toks.empty() && "No eof token at least?");
- PP.Diag(Toks[0], diag::err_invalid_character_to_charify);
+ PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify);
Result = "' '"; // Use something arbitrary, but legal.
}
}
@@ -190,7 +192,7 @@
sizeof(StringifiedArgs[0])*getNumArguments());
}
if (StringifiedArgs[ArgNo].getKind() != tok::string_literal)
- StringifiedArgs[ArgNo] = StringifyArgument(UnexpArgTokens[ArgNo], PP);
+ StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP);
return StringifiedArgs[ArgNo];
}
@@ -305,34 +307,37 @@
// argument and substitute the expanded tokens into the result. This is
// C99 6.10.3.1p1.
if (!PasteBefore && !PasteAfter) {
- const std::vector<LexerToken> *ArgToks;
+ const LexerToken *ResultArgToks;
+
// Only preexpand the argument if it could possibly need it. This
// avoids some work in common cases.
- if (ActualArgs->ArgNeedsPreexpansion(ArgNo))
- ArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP);
+ const LexerToken *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
+ if (ActualArgs->ArgNeedsPreexpansion(ArgTok))
+ ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0];
else
- ArgToks = &ActualArgs->getUnexpArgument(ArgNo);
+ ResultArgToks = ArgTok; // Use non-preexpanded tokens.
- unsigned FirstTok = ResultToks.size();
- ResultToks.insert(ResultToks.end(), ArgToks->begin(), ArgToks->end()-1);
+ if (ResultArgToks->getKind() != tok::eof) {
+ unsigned FirstResult = ResultToks.size();
+ for (; ResultArgToks->getKind() != tok::eof; ++ResultArgToks)
+ ResultToks.push_back(*ResultArgToks);
- // If any tokens were substituted from the argument, the whitespace
- // before the first token should match the whitespace of the arg
- // identifier.
- if (FirstTok != ResultToks.size())
- ResultToks[FirstTok].SetFlagValue(LexerToken::LeadingSpace,
- CurTok.hasLeadingSpace());
+ // If any tokens were substituted from the argument, the whitespace
+ // before the first token should match the whitespace of the arg
+ // identifier.
+ ResultToks[FirstResult].SetFlagValue(LexerToken::LeadingSpace,
+ CurTok.hasLeadingSpace());
+ }
continue;
}
// Okay, we have a token that is either the LHS or RHS of a paste (##)
// argument. It gets substituted as its non-pre-expanded tokens.
- const std::vector<LexerToken> &ArgToks =
- ActualArgs->getUnexpArgument(ArgNo);
- assert(ArgToks.back().getKind() == tok::eof && "Bad argument!");
+ const LexerToken *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
- if (ArgToks.size() != 1) { // Not just an EOF token?
- ResultToks.insert(ResultToks.end(), ArgToks.begin(), ArgToks.end()-1);
+ if (ArgToks->getKind() != tok::eof) { // Not an empty argument?
+ for (; ArgToks->getKind() != tok::eof; ++ArgToks)
+ ResultToks.push_back(*ArgToks);
continue;
}
Modified: cfe/cfe/trunk/Lex/Preprocessor.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Preprocessor.cpp?rev=38753&r1=38752&r2=38753&view=diff
==============================================================================
--- cfe/cfe/trunk/Lex/Preprocessor.cpp (original)
+++ cfe/cfe/trunk/Lex/Preprocessor.cpp Wed Jul 11 11:24:20 2007
@@ -694,10 +694,6 @@
/// invocation. This returns null on error.
MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(LexerToken &MacroName,
MacroInfo *MI) {
- // Use an auto_ptr here so that the MacroArgs object is deleted on
- // all error paths.
- std::auto_ptr<MacroArgs> Args(new MacroArgs(MI));
-
// The number of fixed arguments to parse.
unsigned NumFixedArgsLeft = MI->getNumArgs();
bool isVariadic = MI->isVariadic();
@@ -711,13 +707,16 @@
LexerToken Tok;
Tok.SetKind(tok::comma);
--NumFixedArgsLeft; // Start reading the first arg.
-
+
+ // ArgTokens - Build up a list of tokens that make up each argument. Each
+ // argument is separated by an EOF token.
+ std::vector<LexerToken> ArgTokens;
+
+ unsigned NumActuals = 0;
while (Tok.getKind() == tok::comma) {
- // ArgTokens - Build up a list of tokens that make up this argument.
- std::vector<LexerToken> ArgTokens;
// C99 6.10.3p11: Keep track of the number of l_parens we have seen.
unsigned NumParens = 0;
-
+
while (1) {
// Read arguments as unexpanded tokens. This avoids issues, e.g., where
// an argument value in a macro could expand to ',' or '(' or ')'.
@@ -757,14 +756,19 @@
if (ArgTokens.empty() && !Features.C99)
Diag(Tok, diag::ext_empty_fnmacro_arg);
- // Remember the tokens that make up this argument. This destroys ArgTokens.
- Args->addArgument(ArgTokens, Tok.getLocation());
+ // Add a marker EOF token to the end of the token list for this argument.
+ LexerToken EOFTok;
+ EOFTok.StartToken();
+ EOFTok.SetKind(tok::eof);
+ EOFTok.SetLocation(Tok.getLocation());
+ EOFTok.SetLength(0);
+ ArgTokens.push_back(EOFTok);
+ ++NumActuals;
--NumFixedArgsLeft;
};
// Okay, we either found the r_paren. Check to see if we parsed too few
// arguments.
- unsigned NumActuals = Args->getNumArguments();
unsigned MinArgsExpected = MI->getNumArgs();
// C99 expects us to pass at least one vararg arg (but as an extension, we
@@ -783,8 +787,15 @@
// #define A(x)
// A()
// is ok because it is an empty argument. Add it explicitly.
- std::vector<LexerToken> ArgTokens;
- Args->addArgument(ArgTokens, Tok.getLocation());
+
+
+ // Add a marker EOF token to the end of the token list for this argument.
+ SourceLocation EndLoc = Tok.getLocation();
+ Tok.StartToken();
+ Tok.SetKind(tok::eof);
+ Tok.SetLocation(EndLoc);
+ Tok.SetLength(0);
+ ArgTokens.push_back(Tok);
// Empty arguments are standard in C99 and supported as an extension in
// other modes.
@@ -797,7 +808,7 @@
}
}
- return Args.release();
+ return new MacroArgs(MI, ArgTokens);
}
/// ComputeDATE_TIME - Compute the current time, enter it into the specified
Modified: cfe/cfe/trunk/include/clang/Lex/MacroExpander.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Lex/MacroExpander.h?rev=38753&r1=38752&r2=38753&view=diff
==============================================================================
--- cfe/cfe/trunk/include/clang/Lex/MacroExpander.h (original)
+++ cfe/cfe/trunk/include/clang/Lex/MacroExpander.h Wed Jul 11 11:24:20 2007
@@ -26,9 +26,10 @@
/// MacroArgs - An instance of this class captures information about
/// the formal arguments specified to a function-like macro invocation.
class MacroArgs {
- /// UnexpArgTokens - Raw, unexpanded tokens for the arguments. This includes
- /// an 'EOF' marker at the end of each argument.
- std::vector<std::vector<LexerToken> > UnexpArgTokens;
+ /// UnexpArgTokens - Raw, unexpanded tokens for the arguments. This is all of
+ /// the arguments concatenated together, with 'EOF' markers at the end of each
+ /// argument.
+ std::vector<LexerToken> UnexpArgTokens;
/// PreExpArgTokens - Pre-expanded tokens for arguments that need them. Empty
/// if not yet computed. This includes the EOF marker at the end of the
@@ -39,24 +40,18 @@
/// stringified form of an argument has not yet been computed, this is empty.
std::vector<LexerToken> StringifiedArgs;
public:
- MacroArgs(const MacroInfo *MI);
+ /// MacroArgs ctor - This destroys the vector passed in.
+ MacroArgs(const MacroInfo *MI, std::vector<LexerToken> &UnexpArgTokens);
- /// addArgument - Add an argument for this invocation. This method destroys
- /// the vector passed in to avoid extraneous memory copies. This adds the EOF
- /// token to the end of the argument list as a marker. 'Loc' specifies a
- /// location at the end of the argument, e.g. the ',' token or the ')'.
- void addArgument(std::vector<LexerToken> &ArgToks, SourceLocation Loc);
/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
/// by pre-expansion, return false. Otherwise, conservatively return true.
- bool ArgNeedsPreexpansion(unsigned ArgNo) const;
+ bool ArgNeedsPreexpansion(const LexerToken *ArgTok) const;
- /// getUnexpArgument - Return the unexpanded tokens for the specified formal.
+ /// getUnexpArgument - Return a pointer to the first token of the unexpanded
+ /// token list for the specified formal.
///
- const std::vector<LexerToken> &getUnexpArgument(unsigned Arg) const {
- assert(Arg < UnexpArgTokens.size() && "Invalid ArgNo");
- return UnexpArgTokens[Arg];
- }
+ const LexerToken *getUnexpArgument(unsigned Arg) const;
/// getPreExpArgument - Return the pre-expanded form of the specified
/// argument.
More information about the cfe-commits
mailing list