[clang] 1c876ff - Revert "Perf/lexer faster slow get char and size (#70543)"
Nico Weber via cfe-commits
cfe-commits at lists.llvm.org
Sun Oct 29 18:12:01 PDT 2023
Author: Nico Weber
Date: 2023-10-29T21:11:39-04:00
New Revision: 1c876ff5155c4feeb2b2885eb3e6abda17c4b7f4
URL: https://github.com/llvm/llvm-project/commit/1c876ff5155c4feeb2b2885eb3e6abda17c4b7f4
DIFF: https://github.com/llvm/llvm-project/commit/1c876ff5155c4feeb2b2885eb3e6abda17c4b7f4.diff
LOG: Revert "Perf/lexer faster slow get char and size (#70543)"
This reverts commit d8f5a18b6e587aeaa8b99707e87b652f49b160cd.
Breaks build, see:
https://github.com/llvm/llvm-project/pull/70543#issuecomment-1784227421
Added:
Modified:
clang/include/clang/Lex/Lexer.h
clang/lib/Lex/DependencyDirectivesScanner.cpp
clang/lib/Lex/Lexer.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 899e665e7454652..ac0ef14c591bdd7 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -575,23 +575,19 @@ class Lexer : public PreprocessorLexer {
/// sequence.
static bool isNewLineEscaped(const char *BufferStart, const char *Str);
- /// Represents a char and the number of bytes parsed to produce it.
- struct SizedChar {
- char Char;
- unsigned Size;
- };
-
/// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever
/// emit a warning.
- static inline SizedChar getCharAndSizeNoWarn(const char *Ptr,
- const LangOptions &LangOpts) {
+ static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size,
+ const LangOptions &LangOpts) {
// If this is not a trigraph and not a UCN or escaped newline, return
// quickly.
if (isObviouslySimpleCharacter(Ptr[0])) {
- return {*Ptr, 1u};
+ Size = 1;
+ return *Ptr;
}
- return getCharAndSizeSlowNoWarn(Ptr, LangOpts);
+ Size = 0;
+ return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
}
/// Returns the leading whitespace for line that corresponds to the given
@@ -669,7 +665,8 @@ class Lexer : public PreprocessorLexer {
// quickly.
if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++;
- auto [C, Size] = getCharAndSizeSlow(Ptr, &Tok);
+ unsigned Size = 0;
+ char C = getCharAndSizeSlow(Ptr, Size, &Tok);
Ptr += Size;
return C;
}
@@ -685,7 +682,9 @@ class Lexer : public PreprocessorLexer {
// Otherwise, re-lex the character with a current token, allowing
// diagnostics to be emitted and flags to be set.
- return Ptr + getCharAndSizeSlow(Ptr, &Tok).Size;
+ Size = 0;
+ getCharAndSizeSlow(Ptr, Size, &Tok);
+ return Ptr+Size;
}
/// getCharAndSize - Peek a single 'character' from the specified buffer,
@@ -700,14 +699,14 @@ class Lexer : public PreprocessorLexer {
return *Ptr;
}
- auto CharAndSize = getCharAndSizeSlow(Ptr);
- Size = CharAndSize.Size;
- return CharAndSize.Char;
+ Size = 0;
+ return getCharAndSizeSlow(Ptr, Size);
}
/// getCharAndSizeSlow - Handle the slow/uncommon case of the getCharAndSize
/// method.
- SizedChar getCharAndSizeSlow(const char *Ptr, Token *Tok = nullptr);
+ char getCharAndSizeSlow(const char *Ptr, unsigned &Size,
+ Token *Tok = nullptr);
/// getEscapedNewLineSize - Return the size of the specified escaped newline,
/// or 0 if it is not an escaped newline. P[-1] is known to be a "\" on entry
@@ -721,8 +720,8 @@ class Lexer : public PreprocessorLexer {
/// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a
/// diagnostic.
- static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,
- const LangOptions &LangOpts);
+ static char getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
+ const LangOptions &LangOpts);
//===--------------------------------------------------------------------===//
// Other lexer functions.
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 980f865cf24c97e..2bd2c5f8388c0dd 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -565,8 +565,9 @@ Scanner::cleanStringIfNeeded(const dependency_directives_scan::Token &Tok) {
const char *BufPtr = Input.begin() + Tok.Offset;
const char *AfterIdent = Input.begin() + Tok.getEnd();
while (BufPtr < AfterIdent) {
- auto [Char, Size] = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts);
- Spelling[SpellingLength++] = Char;
+ unsigned Size;
+ Spelling[SpellingLength++] =
+ Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
BufPtr += Size;
}
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 1c53997527732a9..675ec28e514797e 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -287,9 +287,9 @@ static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
if (tok::isStringLiteral(Tok.getKind())) {
// Munch the encoding-prefix and opening double-quote.
while (BufPtr < BufEnd) {
- auto CharAndSize = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts);
- Spelling[Length++] = CharAndSize.Char;
- BufPtr += CharAndSize.Size;
+ unsigned Size;
+ Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+ BufPtr += Size;
if (Spelling[Length - 1] == '"')
break;
@@ -316,9 +316,9 @@ static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
}
while (BufPtr < BufEnd) {
- auto CharAndSize = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts);
- Spelling[Length++] = CharAndSize.Char;
- BufPtr += CharAndSize.Size;
+ unsigned Size;
+ Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+ BufPtr += Size;
}
assert(Length < Tok.getLength() &&
@@ -772,9 +772,10 @@ unsigned Lexer::getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo,
// If we have a character that may be a trigraph or escaped newline, use a
// lexer to parse it correctly.
for (; CharNo; --CharNo) {
- auto CharAndSize = Lexer::getCharAndSizeNoWarn(TokPtr, LangOpts);
- TokPtr += CharAndSize.Size;
- PhysOffset += CharAndSize.Size;
+ unsigned Size;
+ Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts);
+ TokPtr += Size;
+ PhysOffset += Size;
}
// Final detail: if we end up on an escaped newline, we want to return the
@@ -1356,16 +1357,15 @@ SourceLocation Lexer::findLocationAfterToken(
///
/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
/// be updated to match.
-Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) {
- unsigned Size = 0;
+char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
+ Token *Tok) {
// If we have a slash, look for an escaped newline.
if (Ptr[0] == '\\') {
++Size;
++Ptr;
Slash:
// Common case, backslash-char where the char is not whitespace.
- if (!isWhitespace(Ptr[0]))
- return {'\\', Size};
+ if (!isWhitespace(Ptr[0])) return '\\';
// See if we have optional whitespace characters between the slash and
// newline.
@@ -1382,13 +1382,11 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) {
Ptr += EscapedNewLineSize;
// Use slow version to accumulate a correct size field.
- auto CharAndSize = getCharAndSizeSlow(Ptr, Tok);
- CharAndSize.Size += Size;
- return CharAndSize;
+ return getCharAndSizeSlow(Ptr, Size, Tok);
}
// Otherwise, this is not an escaped newline, just return the slash.
- return {'\\', Size};
+ return '\\';
}
// If this is a trigraph, process it.
@@ -1403,12 +1401,13 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) {
Ptr += 3;
Size += 3;
if (C == '\\') goto Slash;
- return {C, Size};
+ return C;
}
}
// If this is neither, return a single character.
- return {*Ptr, Size + 1u};
+ ++Size;
+ return *Ptr;
}
/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
@@ -1417,18 +1416,15 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) {
///
/// NOTE: When this method is updated, getCharAndSizeSlow (above) should
/// be updated to match.
-Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr,
- const LangOptions &LangOpts) {
-
- unsigned Size = 0;
+char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
+ const LangOptions &LangOpts) {
// If we have a slash, look for an escaped newline.
if (Ptr[0] == '\\') {
++Size;
++Ptr;
Slash:
// Common case, backslash-char where the char is not whitespace.
- if (!isWhitespace(Ptr[0]))
- return {'\\', Size};
+ if (!isWhitespace(Ptr[0])) return '\\';
// See if we have optional whitespace characters followed by a newline.
if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
@@ -1437,13 +1433,11 @@ Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr,
Ptr += EscapedNewLineSize;
// Use slow version to accumulate a correct size field.
- auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
- CharAndSize.Size += Size;
- return CharAndSize;
+ return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
}
// Otherwise, this is not an escaped newline, just return the slash.
- return {'\\', Size};
+ return '\\';
}
// If this is a trigraph, process it.
@@ -1454,12 +1448,13 @@ Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr,
Ptr += 3;
Size += 3;
if (C == '\\') goto Slash;
- return {C, Size};
+ return C;
}
}
// If this is neither, return a single character.
- return {*Ptr, Size + 1u};
+ ++Size;
+ return *Ptr;
}
//===----------------------------------------------------------------------===//
@@ -1969,14 +1964,11 @@ bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {
/// isHexaLiteral - Return true if Start points to a hex constant.
/// in microsoft mode (where this is supposed to be several
diff erent tokens).
bool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) {
- auto CharAndSize1 = Lexer::getCharAndSizeNoWarn(Start, LangOpts);
- char C1 = CharAndSize1.Char;
+ unsigned Size;
+ char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, LangOpts);
if (C1 != '0')
return false;
-
- auto CharAndSize2 =
- Lexer::getCharAndSizeNoWarn(Start + CharAndSize1.Size, LangOpts);
- char C2 = CharAndSize2.Char;
+ char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, LangOpts);
return (C2 == 'x' || C2 == 'X');
}
@@ -2020,7 +2012,8 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
// If we have a digit separator, continue.
if (C == '\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
- auto [Next, NextSize] = getCharAndSizeNoWarn(CurPtr + Size, LangOpts);
+ unsigned NextSize;
+ char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, LangOpts);
if (isAsciiIdentifierContinue(Next)) {
if (!isLexingRawMode())
Diag(CurPtr, LangOpts.CPlusPlus
@@ -2092,8 +2085,8 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
unsigned Consumed = Size;
unsigned Chars = 1;
while (true) {
- auto [Next, NextSize] =
- getCharAndSizeNoWarn(CurPtr + Consumed, LangOpts);
+ unsigned NextSize;
+ char Next = getCharAndSizeNoWarn(CurPtr + Consumed, NextSize, LangOpts);
if (!isAsciiIdentifierContinue(Next)) {
// End of suffix. Check whether this is on the allowed list.
const StringRef CompleteSuffix(Buffer, Chars);
More information about the cfe-commits
mailing list