[clang] [Clang] [Lexer] Detect SSE4.2 availability at runtime in fastParseASCIIIdentifier (PR #171914)
Thibault Monnier via cfe-commits
cfe-commits at lists.llvm.org
Sun Dec 28 08:44:22 PST 2025
https://github.com/Thibault-Monnier updated https://github.com/llvm/llvm-project/pull/171914
>From 4fc9a07698e1a4627a050ba6fa9df3f1f8725451 Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <thibaultmonni at gmail.com>
Date: Thu, 11 Dec 2025 22:02:35 +0100
Subject: [PATCH 1/6] Detect sse4.2 availability at runtime to use it on modern
processors
---
clang/lib/Lex/Lexer.cpp | 35 ++++++++++++++++++++++++++---------
1 file changed, 26 insertions(+), 9 deletions(-)
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index b282a600c0e56..3b8fa0b9b7f36 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -46,9 +46,7 @@
#include <string>
#include <tuple>
-#ifdef __SSE4_2__
#include <nmmintrin.h>
-#endif
using namespace clang;
@@ -1921,9 +1919,17 @@ bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C,
}
static const char *
-fastParseASCIIIdentifier(const char *CurPtr,
- [[maybe_unused]] const char *BufferEnd) {
-#ifdef __SSE4_2__
+fastParseASCIIIdentifierScalar(const char *CurPtr,
+ [[maybe_unused]] const char *BufferEnd) {
+ unsigned char C = *CurPtr;
+ while (isAsciiIdentifierContinue(C))
+ C = *++CurPtr;
+ return CurPtr;
+}
+
+__attribute__((target("sse4.2"))) static const char *
+fastParseASCIIIdentifierSSE42(const char *CurPtr,
+ [[maybe_unused]] const char *BufferEnd) {
alignas(16) static constexpr char AsciiIdentifierRange[16] = {
'_', '_', 'A', 'Z', 'a', 'z', '0', '9',
};
@@ -1943,12 +1949,23 @@ fastParseASCIIIdentifier(const char *CurPtr,
continue;
return CurPtr;
}
+
+ return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
+}
+
+static bool supportsSSE42() {
+ static bool SupportsSSE42 = __builtin_cpu_supports("sse4.2");
+ return SupportsSSE42;
+}
+
+static const char *fastParseASCIIIdentifier(const char *CurPtr,
+ const char *BufferEnd) {
+#ifndef __SSE4_2__
+ if (LLVM_UNLIKELY(!supportsSSE42()))
+ return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
#endif
- unsigned char C = *CurPtr;
- while (isAsciiIdentifierContinue(C))
- C = *++CurPtr;
- return CurPtr;
+ return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd);
}
bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {
>From ce3bf515e7a60bd58ff5871352979999f5864b4b Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <thibaultmonni at gmail.com>
Date: Thu, 11 Dec 2025 23:15:40 +0100
Subject: [PATCH 2/6] Only on x86
---
clang/lib/Lex/Lexer.cpp | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 3b8fa0b9b7f36..c195237dae1f4 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -46,7 +46,9 @@
#include <string>
#include <tuple>
+#if defined(__i386__) || defined(__x86_64__)
#include <nmmintrin.h>
+#endif
using namespace clang;
@@ -1927,6 +1929,8 @@ fastParseASCIIIdentifierScalar(const char *CurPtr,
return CurPtr;
}
+#if defined(__i386__) || defined(__x86_64__)
+
__attribute__((target("sse4.2"))) static const char *
fastParseASCIIIdentifierSSE42(const char *CurPtr,
[[maybe_unused]] const char *BufferEnd) {
@@ -1958,14 +1962,22 @@ static bool supportsSSE42() {
return SupportsSSE42;
}
+#endif
+
static const char *fastParseASCIIIdentifier(const char *CurPtr,
const char *BufferEnd) {
+#if !defined(__i386__) && !defined(__x86_64__)
+ return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
+#else
+
#ifndef __SSE4_2__
if (LLVM_UNLIKELY(!supportsSSE42()))
return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
#endif
return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd);
+
+#endif
}
bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {
>From 2109fdd371822ec77f870c5edbbdfccaaa7615be Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <thibaultmonni at gmail.com>
Date: Sun, 14 Dec 2025 11:32:30 +0100
Subject: [PATCH 3/6] Not on windows
---
clang/lib/Lex/Lexer.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index c195237dae1f4..86cfb47ca84d5 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -36,6 +36,7 @@
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/Unicode.h"
#include "llvm/Support/UnicodeCharRanges.h"
+
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -1929,7 +1930,7 @@ fastParseASCIIIdentifierScalar(const char *CurPtr,
return CurPtr;
}
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) && !defined(_WIN32)
__attribute__((target("sse4.2"))) static const char *
fastParseASCIIIdentifierSSE42(const char *CurPtr,
@@ -1966,7 +1967,7 @@ static bool supportsSSE42() {
static const char *fastParseASCIIIdentifier(const char *CurPtr,
const char *BufferEnd) {
-#if !defined(__i386__) && !defined(__x86_64__)
+#if !defined(__i386__) && !defined(__x86_64__) || defined(_WIN32)
return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
#else
>From d5485438edd460892bf210916827e0d92fc24065 Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <thibaultmonni at gmail.com>
Date: Sun, 14 Dec 2025 14:37:43 +0100
Subject: [PATCH 4/6] Address comments
---
clang/lib/Lex/Lexer.cpp | 26 ++++++++++----------------
1 file changed, 10 insertions(+), 16 deletions(-)
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 86cfb47ca84d5..470579df233d1 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1930,7 +1930,12 @@ fastParseASCIIIdentifierScalar(const char *CurPtr,
return CurPtr;
}
-#if defined(__i386__) || defined(__x86_64__) && !defined(_WIN32)
+// Fast path for lexing ASCII identifiers using SSE4.2 instructions.
+// Only enabled on x86/x86_64 when building with a compiler that supports
+// the 'target' attribute, which is used for runtime dispatch. Otherwise, we
+// fall back to the scalar implementation.
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__has_attribute) && \
+ __has_attribute(target)
__attribute__((target("sse4.2"))) static const char *
fastParseASCIIIdentifierSSE42(const char *CurPtr,
@@ -1958,27 +1963,16 @@ fastParseASCIIIdentifierSSE42(const char *CurPtr,
return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
}
-static bool supportsSSE42() {
- static bool SupportsSSE42 = __builtin_cpu_supports("sse4.2");
- return SupportsSSE42;
+__attribute__((target("sse4.2"))) static const char *
+fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd) {
+ return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd);
}
+__attribute__((target("default")))
#endif
-
static const char *fastParseASCIIIdentifier(const char *CurPtr,
const char *BufferEnd) {
-#if !defined(__i386__) && !defined(__x86_64__) || defined(_WIN32)
return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
-#else
-
-#ifndef __SSE4_2__
- if (LLVM_UNLIKELY(!supportsSSE42()))
- return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
-#endif
-
- return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd);
-
-#endif
}
bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {
>From 82cf41e460d2fa1105e9abbf925837fa9e9c7b45 Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <thibaultmonni at gmail.com>
Date: Sun, 14 Dec 2025 19:32:29 +0100
Subject: [PATCH 5/6] Not on MSVC
---
clang/lib/Lex/Lexer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 470579df233d1..58cd9348d3027 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1935,7 +1935,7 @@ fastParseASCIIIdentifierScalar(const char *CurPtr,
// the 'target' attribute, which is used for runtime dispatch. Otherwise, we
// fall back to the scalar implementation.
#if (defined(__i386__) || defined(__x86_64__)) && defined(__has_attribute) && \
- __has_attribute(target)
+ __has_attribute(target) && !defined(_MSC_VER)
__attribute__((target("sse4.2"))) static const char *
fastParseASCIIIdentifierSSE42(const char *CurPtr,
>From 3e1428de60068729a17e5e3cca3942aa175ab975 Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <thibaultmonni at gmail.com>
Date: Sun, 28 Dec 2025 17:42:50 +0100
Subject: [PATCH 6/6] Clean up
---
clang/lib/Lex/Lexer.cpp | 720 +++++++++++++++++++++-------------------
1 file changed, 383 insertions(+), 337 deletions(-)
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 58cd9348d3027..7d3731812cf68 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -106,6 +106,7 @@ bool Token::isSimpleTypeSpecifier(const LangOptions &LangOpts) const {
case tok::kw__Sat:
#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
#include "clang/Basic/TransformTypeTraits.def"
+
case tok::kw___auto_type:
case tok::kw_char16_t:
case tok::kw_char32_t:
@@ -142,8 +143,8 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
// Determine the size of the BOM.
StringRef Buf(BufferStart, BufferEnd - BufferStart);
size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
- .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
- .Default(0);
+ .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
+ .Default(0);
// Skip the BOM.
BufferPtr += BOMLength;
@@ -257,14 +258,14 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,
const char *StrData = SM.getCharacterData(SpellingLoc);
L->BufferPtr = StrData;
- L->BufferEnd = StrData+TokLen;
+ L->BufferEnd = StrData + TokLen;
assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!");
// Set the SourceLocation with the remapping information. This ensures that
// GetMappedTokenLoc will remap the tokens as they are lexed.
- L->FileLoc = SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID),
- ExpansionLocStart,
- ExpansionLocEnd, TokLen);
+ L->FileLoc =
+ SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID),
+ ExpansionLocStart, ExpansionLocEnd, TokLen);
// Ensure that the lexer thinks it is inside a directive, so that end \n will
// return an EOD token.
@@ -343,12 +344,14 @@ static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
// Raw string literals need special handling; trigraph expansion and line
// splicing do not occur within their d-char-sequence nor within their
// r-char-sequence.
- if (Length >= 2 &&
- Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') {
+ if (Length >= 2 && Spelling[Length - 2] == 'R' &&
+ Spelling[Length - 1] == '"') {
// Search backwards from the end of the token to find the matching closing
// quote.
const char *RawEnd = BufEnd;
- do --RawEnd; while (*RawEnd != '"');
+ do
+ --RawEnd;
+ while (*RawEnd != '"');
size_t RawLength = RawEnd - BufPtr + 1;
// Everything between the quotes is included verbatim in the spelling.
@@ -376,11 +379,9 @@ static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
/// after trigraph expansion and escaped-newline folding. In particular, this
/// wants to get the true, uncanonicalized, spelling of things like digraphs
/// UCNs, etc.
-StringRef Lexer::getSpelling(SourceLocation loc,
- SmallVectorImpl<char> &buffer,
+StringRef Lexer::getSpelling(SourceLocation loc, SmallVectorImpl<char> &buffer,
const SourceManager &SM,
- const LangOptions &options,
- bool *invalid) {
+ const LangOptions &options, bool *invalid) {
// Break down the source location.
FileIDAndOffset locInfo = SM.getDecomposedLoc(loc);
@@ -388,15 +389,16 @@ StringRef Lexer::getSpelling(SourceLocation loc,
bool invalidTemp = false;
StringRef file = SM.getBufferData(locInfo.first, &invalidTemp);
if (invalidTemp) {
- if (invalid) *invalid = true;
+ if (invalid)
+ *invalid = true;
return {};
}
const char *tokenBegin = file.data() + locInfo.second;
// Lex from the start of the given location.
- Lexer lexer(SM.getLocForStartOfFile(locInfo.first), options,
- file.begin(), tokenBegin, file.end());
+ Lexer lexer(SM.getLocForStartOfFile(locInfo.first), options, file.begin(),
+ tokenBegin, file.end());
Token token;
lexer.LexFromRawLexer(token);
@@ -422,8 +424,8 @@ std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
bool CharDataInvalid = false;
- const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation(),
- &CharDataInvalid);
+ const char *TokStart =
+ SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
if (Invalid)
*Invalid = CharDataInvalid;
if (CharDataInvalid)
@@ -489,15 +491,14 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
}
// Otherwise, hard case, relex the characters into the string.
- return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
+ return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char *>(Buffer));
}
/// MeasureTokenLength - Relex the token at the specified location and return
/// its length in bytes in the input file. If the token needs cleaning (e.g.
/// includes a trigraph or an escaped newline) then this count includes bytes
/// that are part of that.
-unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
- const SourceManager &SM,
+unsigned Lexer::MeasureTokenLength(SourceLocation Loc, const SourceManager &SM,
const LangOptions &LangOpts) {
Token TheTok;
if (getRawToken(Loc, TheTok, SM, LangOpts))
@@ -508,8 +509,7 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
/// Relex the token at the specified location.
/// \returns true if there was a failure, false on success.
bool Lexer::getRawToken(SourceLocation Loc, Token &Result,
- const SourceManager &SM,
- const LangOptions &LangOpts,
+ const SourceManager &SM, const LangOptions &LangOpts,
bool IgnoreWhiteSpace) {
// TODO: this could be special cased for common tokens like identifiers, ')',
// etc to make this faster, if it mattered. Just look at StrData[0] to handle
@@ -526,7 +526,7 @@ bool Lexer::getRawToken(SourceLocation Loc, Token &Result,
if (Invalid)
return true;
- const char *StrData = Buffer.data()+LocInfo.second;
+ const char *StrData = Buffer.data() + LocInfo.second;
if (!IgnoreWhiteSpace && isWhitespace(SkipEscapedNewLines(StrData)[0]))
return true;
@@ -626,10 +626,7 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
namespace {
-enum PreambleDirectiveKind {
- PDK_Skipped,
- PDK_Unknown
-};
+enum PreambleDirectiveKind { PDK_Skipped, PDK_Unknown };
} // namespace
@@ -713,31 +710,31 @@ PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
TheLexer.LexFromRawLexer(TheTok);
if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) {
StringRef Keyword = TheTok.getRawIdentifier();
- PreambleDirectiveKind PDK
- = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
- .Case("include", PDK_Skipped)
- .Case("__include_macros", PDK_Skipped)
- .Case("define", PDK_Skipped)
- .Case("undef", PDK_Skipped)
- .Case("line", PDK_Skipped)
- .Case("error", PDK_Skipped)
- .Case("pragma", PDK_Skipped)
- .Case("import", PDK_Skipped)
- .Case("include_next", PDK_Skipped)
- .Case("warning", PDK_Skipped)
- .Case("ident", PDK_Skipped)
- .Case("sccs", PDK_Skipped)
- .Case("assert", PDK_Skipped)
- .Case("unassert", PDK_Skipped)
- .Case("if", PDK_Skipped)
- .Case("ifdef", PDK_Skipped)
- .Case("ifndef", PDK_Skipped)
- .Case("elif", PDK_Skipped)
- .Case("elifdef", PDK_Skipped)
- .Case("elifndef", PDK_Skipped)
- .Case("else", PDK_Skipped)
- .Case("endif", PDK_Skipped)
- .Default(PDK_Unknown);
+ PreambleDirectiveKind PDK =
+ llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
+ .Case("include", PDK_Skipped)
+ .Case("__include_macros", PDK_Skipped)
+ .Case("define", PDK_Skipped)
+ .Case("undef", PDK_Skipped)
+ .Case("line", PDK_Skipped)
+ .Case("error", PDK_Skipped)
+ .Case("pragma", PDK_Skipped)
+ .Case("import", PDK_Skipped)
+ .Case("include_next", PDK_Skipped)
+ .Case("warning", PDK_Skipped)
+ .Case("ident", PDK_Skipped)
+ .Case("sccs", PDK_Skipped)
+ .Case("assert", PDK_Skipped)
+ .Case("unassert", PDK_Skipped)
+ .Case("if", PDK_Skipped)
+ .Case("ifdef", PDK_Skipped)
+ .Case("ifndef", PDK_Skipped)
+ .Case("elif", PDK_Skipped)
+ .Case("elifdef", PDK_Skipped)
+ .Case("elifndef", PDK_Skipped)
+ .Case("else", PDK_Skipped)
+ .Case("endif", PDK_Skipped)
+ .Default(PDK_Unknown);
switch (PDK) {
case PDK_Skipped:
@@ -826,7 +823,7 @@ unsigned Lexer::getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo,
// advanced by 3 should return the location of b, not of \\. One compounding
// detail of this is that the escape may be made by a trigraph.
if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
- PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
+ PhysOffset += Lexer::SkipEscapedNewLines(TokPtr) - TokPtr;
return PhysOffset;
}
@@ -890,8 +887,7 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
/// Returns true if the given MacroID location points at the last
/// token of the macro expansion.
-bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc,
- const SourceManager &SM,
+bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM,
const LangOptions &LangOpts,
SourceLocation *MacroEnd) {
assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc");
@@ -923,7 +919,7 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range,
SourceLocation End = Range.getEnd();
assert(Begin.isFileID() && End.isFileID());
if (Range.isTokenRange()) {
- End = Lexer::getLocForEndOfToken(End, 0, SM,LangOpts);
+ End = Lexer::getLocForEndOfToken(End, 0, SM, LangOpts);
if (End.isInvalid())
return {};
}
@@ -934,8 +930,7 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range,
return {};
unsigned EndOffs;
- if (!SM.isInFileID(End, FID, &EndOffs) ||
- BeginOffs > EndOffs)
+ if (!SM.isInFileID(End, FID, &EndOffs) || BeginOffs > EndOffs)
return {};
return CharSourceRange::getCharRange(Begin, End);
@@ -982,10 +977,10 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
assert(Begin.isMacroID() && End.isMacroID());
SourceLocation MacroBegin, MacroEnd;
if (isAtStartOfMacroExpansion(Begin, SM, LangOpts, &MacroBegin) &&
- ((Range.isTokenRange() && isAtEndOfMacroExpansion(End, SM, LangOpts,
- &MacroEnd)) ||
- (Range.isCharRange() && isAtStartOfMacroExpansion(End, SM, LangOpts,
- &MacroEnd)))) {
+ ((Range.isTokenRange() &&
+ isAtEndOfMacroExpansion(End, SM, LangOpts, &MacroEnd)) ||
+ (Range.isCharRange() &&
+ isAtStartOfMacroExpansion(End, SM, LangOpts, &MacroEnd)))) {
Range.setBegin(MacroBegin);
Range.setEnd(MacroEnd);
// Use the *original* `End`, not the expanded one in `MacroEnd`.
@@ -995,14 +990,14 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
}
bool Invalid = false;
- const SrcMgr::SLocEntry &BeginEntry = SM.getSLocEntry(SM.getFileID(Begin),
- &Invalid);
+ const SrcMgr::SLocEntry &BeginEntry =
+ SM.getSLocEntry(SM.getFileID(Begin), &Invalid);
if (Invalid)
return {};
if (BeginEntry.getExpansion().isMacroArgExpansion()) {
- const SrcMgr::SLocEntry &EndEntry = SM.getSLocEntry(SM.getFileID(End),
- &Invalid);
+ const SrcMgr::SLocEntry &EndEntry =
+ SM.getSLocEntry(SM.getFileID(End), &Invalid);
if (Invalid)
return {};
@@ -1018,27 +1013,28 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
return {};
}
-StringRef Lexer::getSourceText(CharSourceRange Range,
- const SourceManager &SM,
- const LangOptions &LangOpts,
- bool *Invalid) {
+StringRef Lexer::getSourceText(CharSourceRange Range, const SourceManager &SM,
+ const LangOptions &LangOpts, bool *Invalid) {
Range = makeFileCharRange(Range, SM, LangOpts);
if (Range.isInvalid()) {
- if (Invalid) *Invalid = true;
+ if (Invalid)
+ *Invalid = true;
return {};
}
// Break down the source location.
FileIDAndOffset beginInfo = SM.getDecomposedLoc(Range.getBegin());
if (beginInfo.first.isInvalid()) {
- if (Invalid) *Invalid = true;
+ if (Invalid)
+ *Invalid = true;
return {};
}
unsigned EndOffs;
if (!SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
beginInfo.second > EndOffs) {
- if (Invalid) *Invalid = true;
+ if (Invalid)
+ *Invalid = true;
return {};
}
@@ -1046,11 +1042,13 @@ StringRef Lexer::getSourceText(CharSourceRange Range,
bool invalidTemp = false;
StringRef file = SM.getBufferData(beginInfo.first, &invalidTemp);
if (invalidTemp) {
- if (Invalid) *Invalid = true;
+ if (Invalid)
+ *Invalid = true;
return {};
}
- if (Invalid) *Invalid = false;
+ if (Invalid)
+ *Invalid = false;
return file.substr(beginInfo.second, EndOffs - beginInfo.second);
}
@@ -1184,8 +1182,8 @@ StringRef Lexer::getIndentationForLine(SourceLocation Loc,
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(
Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen);
static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
- SourceLocation FileLoc,
- unsigned CharNo, unsigned TokLen) {
+ SourceLocation FileLoc, unsigned CharNo,
+ unsigned TokLen) {
assert(FileLoc.isMacroID() && "Must be a macro expansion");
// Otherwise, we're lexing "mapped tokens". This is used for things like
@@ -1214,7 +1212,7 @@ SourceLocation Lexer::getSourceLocation(const char *Loc,
// In the normal case, we're just lexing from a simple file buffer, return
// the file id from FileLoc with the offset specified.
- unsigned CharNo = Loc-BufferStart;
+ unsigned CharNo = Loc - BufferStart;
if (FileLoc.isFileID())
return FileLoc.getLocWithOffset(CharNo);
@@ -1238,16 +1236,26 @@ DiagnosticBuilder Lexer::Diag(const char *Loc, unsigned DiagID) const {
/// return the decoded trigraph letter it corresponds to, or '\0' if nothing.
static char GetTrigraphCharForLetter(char Letter) {
switch (Letter) {
- default: return 0;
- case '=': return '#';
- case ')': return ']';
- case '(': return '[';
- case '!': return '|';
- case '\'': return '^';
- case '>': return '}';
- case '/': return '\\';
- case '<': return '{';
- case '-': return '~';
+ default:
+ return 0;
+ case '=':
+ return '#';
+ case ')':
+ return ']';
+ case '(':
+ return '[';
+ case '!':
+ return '|';
+ case '\'':
+ return '^';
+ case '>':
+ return '}';
+ case '/':
+ return '\\';
+ case '<':
+ return '{';
+ case '-':
+ return '~';
}
}
@@ -1262,12 +1270,12 @@ static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs) {
if (!Trigraphs) {
if (L && !L->isLexingRawMode())
- L->Diag(CP-2, diag::trigraph_ignored);
+ L->Diag(CP - 2, diag::trigraph_ignored);
return 0;
}
if (L && !L->isLexingRawMode())
- L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
+ L->Diag(CP - 2, diag::trigraph_converted) << StringRef(&Res, 1);
return Res;
}
@@ -1279,12 +1287,11 @@ unsigned Lexer::getEscapedNewLineSize(const char *Ptr) {
while (isWhitespace(Ptr[Size])) {
++Size;
- if (Ptr[Size-1] != '\n' && Ptr[Size-1] != '\r')
+ if (Ptr[Size - 1] != '\n' && Ptr[Size - 1] != '\r')
continue;
// If this is a \r\n or \n\r, skip the other half.
- if ((Ptr[Size] == '\r' || Ptr[Size] == '\n') &&
- Ptr[Size-1] != Ptr[Size])
+ if ((Ptr[Size] == '\r' || Ptr[Size] == '\n') && Ptr[Size - 1] != Ptr[Size])
++Size;
return Size;
@@ -1301,21 +1308,22 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {
while (true) {
const char *AfterEscape;
if (*P == '\\') {
- AfterEscape = P+1;
+ AfterEscape = P + 1;
} else if (*P == '?') {
// If not a trigraph for escape, bail out.
if (P[1] != '?' || P[2] != '/')
return P;
// FIXME: Take LangOpts into account; the language might not
// support trigraphs.
- AfterEscape = P+3;
+ AfterEscape = P + 3;
} else {
return P;
}
unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
- if (NewLineSize == 0) return P;
- P = AfterEscape+NewLineSize;
+ if (NewLineSize == 0)
+ return P;
+ P = AfterEscape + NewLineSize;
}
}
@@ -1342,7 +1350,7 @@ std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
// Lex from the start of the given location.
Lexer lexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),
- TokenBegin, File.end());
+ TokenBegin, File.end());
lexer.SetCommentRetentionState(IncludeComments);
// Find the token.
Token Tok;
@@ -1427,7 +1435,7 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) {
if (Ptr[0] == '\\') {
++Size;
++Ptr;
-Slash:
+ Slash:
// Common case, backslash-char where the char is not whitespace.
if (!isWhitespace(Ptr[0]))
return {'\\', Size};
@@ -1436,7 +1444,8 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) {
// newline.
if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
// Remember that this token needs to be cleaned.
- if (Tok) Tok->setFlag(Token::NeedsCleaning);
+ if (Tok)
+ Tok->setFlag(Token::NeedsCleaning);
// Warn if there was whitespace between the backslash and newline.
if (Ptr[0] != '\n' && Ptr[0] != '\r' && Tok && !isLexingRawMode())
@@ -1444,7 +1453,7 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) {
// Found backslash<whitespace><newline>. Parse the char after it.
Size += EscapedNewLineSize;
- Ptr += EscapedNewLineSize;
+ Ptr += EscapedNewLineSize;
// Use slow version to accumulate a correct size field.
auto CharAndSize = getCharAndSizeSlow(Ptr, Tok);
@@ -1463,11 +1472,13 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) {
if (char C = DecodeTrigraphChar(Ptr + 2, Tok ? this : nullptr,
LangOpts.Trigraphs)) {
// Remember that this token needs to be cleaned.
- if (Tok) Tok->setFlag(Token::NeedsCleaning);
+ if (Tok)
+ Tok->setFlag(Token::NeedsCleaning);
Ptr += 3;
Size += 3;
- if (C == '\\') goto Slash;
+ if (C == '\\')
+ goto Slash;
return {C, Size};
}
}
@@ -1490,7 +1501,7 @@ Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr,
if (Ptr[0] == '\\') {
++Size;
++Ptr;
-Slash:
+ Slash:
// Common case, backslash-char where the char is not whitespace.
if (!isWhitespace(Ptr[0]))
return {'\\', Size};
@@ -1499,7 +1510,7 @@ Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr,
if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
// Found backslash<whitespace><newline>. Parse the char after it.
Size += EscapedNewLineSize;
- Ptr += EscapedNewLineSize;
+ Ptr += EscapedNewLineSize;
// Use slow version to accumulate a correct size field.
auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
@@ -1518,7 +1529,8 @@ Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr,
if (char C = GetTrigraphCharForLetter(Ptr[2])) {
Ptr += 3;
Size += 3;
- if (C == '\\') goto Slash;
+ if (C == '\\')
+ goto Slash;
return {C, Size};
}
}
@@ -1656,10 +1668,7 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C,
CharSourceRange Range, bool IsFirst) {
// Check C99 compatibility.
if (!Diags.isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
- enum {
- CannotAppearInIdentifier = 0,
- CannotStartIdentifier
- };
+ enum { CannotAppearInIdentifier = 0, CannotStartIdentifier };
static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
C99AllowedIDCharRanges);
@@ -1667,12 +1676,10 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C,
C99DisallowedInitialIDCharRanges);
if (!C99AllowedIDChars.contains(C)) {
Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
- << Range
- << CannotAppearInIdentifier;
+ << Range << CannotAppearInIdentifier;
} else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) {
Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
- << Range
- << CannotStartIdentifier;
+ << Range << CannotStartIdentifier;
}
}
}
@@ -1690,57 +1697,56 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C,
bool operator<(HomoglyphPair R) const { return Character < R.Character; }
};
static constexpr HomoglyphPair SortedHomoglyphs[] = {
- {U'\u00ad', 0}, // SOFT HYPHEN
- {U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK
- {U'\u037e', ';'}, // GREEK QUESTION MARK
- {U'\u200b', 0}, // ZERO WIDTH SPACE
- {U'\u200c', 0}, // ZERO WIDTH NON-JOINER
- {U'\u200d', 0}, // ZERO WIDTH JOINER
- {U'\u2060', 0}, // WORD JOINER
- {U'\u2061', 0}, // FUNCTION APPLICATION
- {U'\u2062', 0}, // INVISIBLE TIMES
- {U'\u2063', 0}, // INVISIBLE SEPARATOR
- {U'\u2064', 0}, // INVISIBLE PLUS
- {U'\u2212', '-'}, // MINUS SIGN
- {U'\u2215', '/'}, // DIVISION SLASH
- {U'\u2216', '\\'}, // SET MINUS
- {U'\u2217', '*'}, // ASTERISK OPERATOR
- {U'\u2223', '|'}, // DIVIDES
- {U'\u2227', '^'}, // LOGICAL AND
- {U'\u2236', ':'}, // RATIO
- {U'\u223c', '~'}, // TILDE OPERATOR
- {U'\ua789', ':'}, // MODIFIER LETTER COLON
- {U'\ufeff', 0}, // ZERO WIDTH NO-BREAK SPACE
- {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK
- {U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN
- {U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN
- {U'\uff05', '%'}, // FULLWIDTH PERCENT SIGN
- {U'\uff06', '&'}, // FULLWIDTH AMPERSAND
- {U'\uff08', '('}, // FULLWIDTH LEFT PARENTHESIS
- {U'\uff09', ')'}, // FULLWIDTH RIGHT PARENTHESIS
- {U'\uff0a', '*'}, // FULLWIDTH ASTERISK
- {U'\uff0b', '+'}, // FULLWIDTH ASTERISK
- {U'\uff0c', ','}, // FULLWIDTH COMMA
- {U'\uff0d', '-'}, // FULLWIDTH HYPHEN-MINUS
- {U'\uff0e', '.'}, // FULLWIDTH FULL STOP
- {U'\uff0f', '/'}, // FULLWIDTH SOLIDUS
- {U'\uff1a', ':'}, // FULLWIDTH COLON
- {U'\uff1b', ';'}, // FULLWIDTH SEMICOLON
- {U'\uff1c', '<'}, // FULLWIDTH LESS-THAN SIGN
- {U'\uff1d', '='}, // FULLWIDTH EQUALS SIGN
- {U'\uff1e', '>'}, // FULLWIDTH GREATER-THAN SIGN
- {U'\uff1f', '?'}, // FULLWIDTH QUESTION MARK
- {U'\uff20', '@'}, // FULLWIDTH COMMERCIAL AT
- {U'\uff3b', '['}, // FULLWIDTH LEFT SQUARE BRACKET
- {U'\uff3c', '\\'}, // FULLWIDTH REVERSE SOLIDUS
- {U'\uff3d', ']'}, // FULLWIDTH RIGHT SQUARE BRACKET
- {U'\uff3e', '^'}, // FULLWIDTH CIRCUMFLEX ACCENT
- {U'\uff5b', '{'}, // FULLWIDTH LEFT CURLY BRACKET
- {U'\uff5c', '|'}, // FULLWIDTH VERTICAL LINE
- {U'\uff5d', '}'}, // FULLWIDTH RIGHT CURLY BRACKET
- {U'\uff5e', '~'}, // FULLWIDTH TILDE
- {0, 0}
- };
+ {U'\u00ad', 0}, // SOFT HYPHEN
+ {U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK
+ {U'\u037e', ';'}, // GREEK QUESTION MARK
+ {U'\u200b', 0}, // ZERO WIDTH SPACE
+ {U'\u200c', 0}, // ZERO WIDTH NON-JOINER
+ {U'\u200d', 0}, // ZERO WIDTH JOINER
+ {U'\u2060', 0}, // WORD JOINER
+ {U'\u2061', 0}, // FUNCTION APPLICATION
+ {U'\u2062', 0}, // INVISIBLE TIMES
+ {U'\u2063', 0}, // INVISIBLE SEPARATOR
+ {U'\u2064', 0}, // INVISIBLE PLUS
+ {U'\u2212', '-'}, // MINUS SIGN
+ {U'\u2215', '/'}, // DIVISION SLASH
+ {U'\u2216', '\\'}, // SET MINUS
+ {U'\u2217', '*'}, // ASTERISK OPERATOR
+ {U'\u2223', '|'}, // DIVIDES
+ {U'\u2227', '^'}, // LOGICAL AND
+ {U'\u2236', ':'}, // RATIO
+ {U'\u223c', '~'}, // TILDE OPERATOR
+ {U'\ua789', ':'}, // MODIFIER LETTER COLON
+ {U'\ufeff', 0}, // ZERO WIDTH NO-BREAK SPACE
+ {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK
+ {U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN
+ {U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN
+ {U'\uff05', '%'}, // FULLWIDTH PERCENT SIGN
+ {U'\uff06', '&'}, // FULLWIDTH AMPERSAND
+ {U'\uff08', '('}, // FULLWIDTH LEFT PARENTHESIS
+ {U'\uff09', ')'}, // FULLWIDTH RIGHT PARENTHESIS
+ {U'\uff0a', '*'}, // FULLWIDTH ASTERISK
+ {U'\uff0b', '+'}, // FULLWIDTH ASTERISK
+ {U'\uff0c', ','}, // FULLWIDTH COMMA
+ {U'\uff0d', '-'}, // FULLWIDTH HYPHEN-MINUS
+ {U'\uff0e', '.'}, // FULLWIDTH FULL STOP
+ {U'\uff0f', '/'}, // FULLWIDTH SOLIDUS
+ {U'\uff1a', ':'}, // FULLWIDTH COLON
+ {U'\uff1b', ';'}, // FULLWIDTH SEMICOLON
+ {U'\uff1c', '<'}, // FULLWIDTH LESS-THAN SIGN
+ {U'\uff1d', '='}, // FULLWIDTH EQUALS SIGN
+ {U'\uff1e', '>'}, // FULLWIDTH GREATER-THAN SIGN
+ {U'\uff1f', '?'}, // FULLWIDTH QUESTION MARK
+ {U'\uff20', '@'}, // FULLWIDTH COMMERCIAL AT
+ {U'\uff3b', '['}, // FULLWIDTH LEFT SQUARE BRACKET
+ {U'\uff3c', '\\'}, // FULLWIDTH REVERSE SOLIDUS
+ {U'\uff3d', ']'}, // FULLWIDTH RIGHT SQUARE BRACKET
+ {U'\uff3e', '^'}, // FULLWIDTH CIRCUMFLEX ACCENT
+ {U'\uff5b', '{'}, // FULLWIDTH LEFT CURLY BRACKET
+ {U'\uff5c', '|'}, // FULLWIDTH VERTICAL LINE
+ {U'\uff5d', '}'}, // FULLWIDTH RIGHT CURLY BRACKET
+ {U'\uff5e', '~'}, // FULLWIDTH TILDE
+ {0, 0}};
auto Homoglyph =
std::lower_bound(std::begin(SortedHomoglyphs),
std::end(SortedHomoglyphs) - 1, HomoglyphPair{C, '\0'});
@@ -1815,7 +1821,7 @@ bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
}
Result.setFlag(Token::HasUCN);
- if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') ||
+ if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') ||
(UCNPtr - CurPtr == 10 && CurPtr[1] == 'U'))
CurPtr = UCNPtr;
else
@@ -1921,9 +1927,7 @@ bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C,
return true;
}
-static const char *
-fastParseASCIIIdentifierScalar(const char *CurPtr,
- [[maybe_unused]] const char *BufferEnd) {
+static const char *fastParseASCIIIdentifierScalar(const char *CurPtr) {
unsigned char C = *CurPtr;
while (isAsciiIdentifierContinue(C))
C = *++CurPtr;
@@ -1936,10 +1940,8 @@ fastParseASCIIIdentifierScalar(const char *CurPtr,
// fall back to the scalar implementation.
#if (defined(__i386__) || defined(__x86_64__)) && defined(__has_attribute) && \
__has_attribute(target) && !defined(_MSC_VER)
-
__attribute__((target("sse4.2"))) static const char *
-fastParseASCIIIdentifierSSE42(const char *CurPtr,
- [[maybe_unused]] const char *BufferEnd) {
+fastParseASCIIIdentifierSSE42(const char *CurPtr, const char *BufferEnd) {
alignas(16) static constexpr char AsciiIdentifierRange[16] = {
'_', '_', 'A', 'Z', 'a', 'z', '0', '9',
};
@@ -1960,7 +1962,7 @@ fastParseASCIIIdentifierSSE42(const char *CurPtr,
return CurPtr;
}
- return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
+ return fastParseASCIIIdentifierScalar(CurPtr);
}
__attribute__((target("sse4.2"))) static const char *
@@ -1972,7 +1974,7 @@ __attribute__((target("default")))
#endif
static const char *fastParseASCIIIdentifier(const char *CurPtr,
const char *BufferEnd) {
- return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
+ return fastParseASCIIIdentifierScalar(CurPtr);
}
bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {
@@ -2159,10 +2161,10 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
if (!LangOpts.CPlusPlus11) {
if (!isLexingRawMode())
- Diag(CurPtr,
- C == '_' ? diag::warn_cxx11_compat_user_defined_literal
- : diag::warn_cxx11_compat_reserved_user_defined_literal)
- << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
+ Diag(CurPtr, C == '_'
+ ? diag::warn_cxx11_compat_user_defined_literal
+ : diag::warn_cxx11_compat_reserved_user_defined_literal)
+ << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
return CurPtr;
}
@@ -2180,7 +2182,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
// valid suffix for a string literal or a numeric literal (this could be
// the 'operator""if' defining a numeric literal operator).
const unsigned MaxStandardSuffixLength = 3;
- char Buffer[MaxStandardSuffixLength] = { C };
+ char Buffer[MaxStandardSuffixLength] = {C};
unsigned Consumed = Size;
unsigned Chars = 1;
while (true) {
@@ -2238,8 +2240,7 @@ bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
const char *NulCharacter = nullptr;
if (!isLexingRawMode() &&
- (Kind == tok::utf8_string_literal ||
- Kind == tok::utf16_string_literal ||
+ (Kind == tok::utf8_string_literal || Kind == tok::utf16_string_literal ||
Kind == tok::utf32_string_literal))
Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
: diag::warn_c99_compat_unicode_literal);
@@ -2251,16 +2252,16 @@ bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
if (C == '\\')
C = getAndAdvanceChar(CurPtr, Result);
- if (C == '\n' || C == '\r' || // Newline.
- (C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
+ if (C == '\n' || C == '\r' || // Newline.
+ (C == 0 && CurPtr - 1 == BufferEnd)) { // End of file.
if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
- FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+ FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
return true;
}
if (C == 0) {
- if (isCodeCompletionPoint(CurPtr-1)) {
+ if (isCodeCompletionPoint(CurPtr - 1)) {
if (ParsingFilename)
codeCompleteIncludedFile(AfterQuote, CurPtr - 1, /*IsAngled=*/false);
else
@@ -2270,7 +2271,7 @@ bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
return true;
}
- NulCharacter = CurPtr-1;
+ NulCharacter = CurPtr - 1;
}
C = getAndAdvanceChar(CurPtr, Result);
}
@@ -2326,7 +2327,7 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
} else {
Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
- << StringRef(PrefixEnd, 1);
+ << StringRef(PrefixEnd, 1);
}
}
@@ -2338,7 +2339,7 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
if (C == '"')
break;
- if (C == 0 && CurPtr-1 == BufferEnd) {
+ if (C == 0 && CurPtr - 1 == BufferEnd) {
--CurPtr;
break;
}
@@ -2361,11 +2362,11 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
CurPtr += PrefixLen + 1; // skip over prefix and '"'
break;
}
- } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file.
+ } else if (C == 0 && CurPtr - 1 == BufferEnd) { // End of file.
if (!isLexingRawMode())
Diag(BufferPtr, diag::err_unterminated_raw_string)
- << StringRef(Prefix, PrefixLen);
- FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+ << StringRef(Prefix, PrefixLen);
+ FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
return true;
}
}
@@ -2409,7 +2410,7 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
return true;
}
- NulCharacter = CurPtr-1;
+ NulCharacter = CurPtr - 1;
}
C = getAndAdvanceChar(CurPtr, Result);
}
@@ -2489,23 +2490,23 @@ bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
if (C == '\\')
C = getAndAdvanceChar(CurPtr, Result);
- if (C == '\n' || C == '\r' || // Newline.
- (C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
+ if (C == '\n' || C == '\r' || // Newline.
+ (C == 0 && CurPtr - 1 == BufferEnd)) { // End of file.
if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
- FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+ FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
return true;
}
if (C == 0) {
- if (isCodeCompletionPoint(CurPtr-1)) {
+ if (isCodeCompletionPoint(CurPtr - 1)) {
PP->CodeCompleteNaturalLanguage();
- FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+ FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
cutOffLexing();
return true;
}
- NulCharacter = CurPtr-1;
+ NulCharacter = CurPtr - 1;
}
C = getAndAdvanceChar(CurPtr, Result);
}
@@ -2659,7 +2660,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
const char *NextLine = CurPtr;
if (C != 0) {
// We found a newline, see if it's escaped.
- const char *EscapePtr = CurPtr-1;
+ const char *EscapePtr = CurPtr - 1;
bool HasSpace = false;
while (isHorizontalWhitespace(*EscapePtr)) { // Skip whitespace.
--EscapePtr;
@@ -2672,7 +2673,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
else if (EscapePtr[0] == '/' && EscapePtr[-1] == '?' &&
EscapePtr[-2] == '?' && LangOpts.Trigraphs)
// Trigraph-escaped newline.
- CurPtr = EscapePtr-2;
+ CurPtr = EscapePtr - 2;
else
break; // This is a newline, we're done.
@@ -2693,7 +2694,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
// If we only read only one character, then no special handling is needed.
// We're done and can skip forward to the newline.
- if (C != 0 && CurPtr == OldPtr+1) {
+ if (C != 0 && CurPtr == OldPtr + 1) {
CurPtr = NextLine;
break;
}
@@ -2709,14 +2710,14 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
// line is also a // comment, but has spaces, don't emit a diagnostic.
if (isWhitespace(C)) {
const char *ForwardPtr = CurPtr;
- while (isWhitespace(*ForwardPtr)) // Skip whitespace.
+ while (isWhitespace(*ForwardPtr)) // Skip whitespace.
++ForwardPtr;
if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/')
break;
}
if (!isLexingRawMode())
- Diag(OldPtr-1, diag::ext_multi_line_line_comment);
+ Diag(OldPtr - 1, diag::ext_multi_line_line_comment);
break;
}
}
@@ -2726,7 +2727,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
break;
}
- if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) {
+ if (C == '\0' && isCodeCompletionPoint(CurPtr - 1)) {
PP->CodeCompleteNaturalLanguage();
cutOffLexing();
return false;
@@ -2787,12 +2788,12 @@ bool Lexer::SaveLineComment(Token &Result, const char *CurPtr) {
return true;
assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?");
- Spelling[1] = '*'; // Change prefix to "/*".
- Spelling += "*/"; // add suffix.
+ Spelling[1] = '*'; // Change prefix to "/*".
+ Spelling += "*/"; // add suffix.
Result.setKind(tok::comment);
- PP->CreateString(Spelling, Result,
- Result.getLocation(), Result.getLocation());
+ PP->CreateString(Spelling, Result, Result.getLocation(),
+ Result.getLocation());
return true;
}
@@ -2900,7 +2901,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
unsigned CharSize;
unsigned char C = getCharAndSize(CurPtr, CharSize);
CurPtr += CharSize;
- if (C == 0 && CurPtr == BufferEnd+1) {
+ if (C == 0 && CurPtr == BufferEnd + 1) {
if (!isLexingRawMode())
Diag(BufferPtr, diag::err_unterminated_block_comment);
--CurPtr;
@@ -2940,7 +2941,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
goto MultiByteUTF8;
C = *CurPtr++;
}
- if (C == '/') goto FoundSlash;
+ if (C == '/')
+ goto FoundSlash;
#ifdef __SSE2__
__m128i Slashes = _mm_set1_epi8('/');
@@ -2950,8 +2952,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
goto MultiByteUTF8;
}
// look for slashes
- int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)CurPtr,
- Slashes));
+ int cmp = _mm_movemask_epi8(
+ _mm_cmpeq_epi8(*(const __m128i *)CurPtr, Slashes));
if (cmp != 0) {
// Adjust the pointer to point directly after the first slash. It's
// not necessary to set C here, it will be overwritten at the end of
@@ -2965,10 +2967,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
__vector unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80};
- __vector unsigned char Slashes = {
- '/', '/', '/', '/', '/', '/', '/', '/',
- '/', '/', '/', '/', '/', '/', '/', '/'
- };
+ __vector unsigned char Slashes = {'/', '/', '/', '/', '/', '/', '/', '/',
+ '/', '/', '/', '/', '/', '/', '/', '/'};
while (CurPtr + 16 < BufferEnd) {
if (LLVM_UNLIKELY(
vec_any_ge(*(const __vector unsigned char *)CurPtr, LongUTF)))
@@ -3027,8 +3027,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
}
if (C == '/') {
- FoundSlash:
- if (CurPtr[-2] == '*') // We found the final */. We're done!
+ FoundSlash:
+ if (CurPtr[-2] == '*') // We found the final */. We're done!
break;
if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) {
@@ -3044,9 +3044,9 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
// if this is a /*/, which will end the comment. This misses cases with
// embedded escaped newlines, but oh well.
if (!isLexingRawMode())
- Diag(CurPtr-1, diag::warn_nested_block_comment);
+ Diag(CurPtr - 1, diag::warn_nested_block_comment);
}
- } else if (C == 0 && CurPtr == BufferEnd+1) {
+ } else if (C == 0 && CurPtr == BufferEnd + 1) {
if (!isLexingRawMode())
Diag(BufferPtr, diag::err_unterminated_block_comment);
// Note: the user probably forgot a */. We could continue immediately
@@ -3063,7 +3063,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
BufferPtr = CurPtr;
return false;
- } else if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) {
+ } else if (C == '\0' && isCodeCompletionPoint(CurPtr - 1)) {
PP->CodeCompleteNaturalLanguage();
cutOffLexing();
return false;
@@ -3091,7 +3091,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
// efficiently now. This is safe even in KeepWhitespaceMode because we would
// have already returned above with the comment as a token.
if (isHorizontalWhitespace(*CurPtr)) {
- SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
+ SkipWhitespace(Result, CurPtr + 1, TokAtPhysicalStartOfLine);
return false;
}
@@ -3122,10 +3122,10 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) {
if (Result)
Result->push_back(Char);
break;
- case 0: // Null.
+ case 0: // Null.
// Found end of file?
- if (CurPtr-1 != BufferEnd) {
- if (isCodeCompletionPoint(CurPtr-1)) {
+ if (CurPtr - 1 != BufferEnd) {
+ if (isCodeCompletionPoint(CurPtr - 1)) {
PP->CodeCompleteNaturalLanguage();
cutOffLexing();
return;
@@ -3142,7 +3142,7 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) {
case '\n':
// Okay, we found the end of the line. First, back up past the \0, \r, \n.
assert(CurPtr[-1] == Char && "Trigraphs for newline?");
- BufferPtr = CurPtr-1;
+ BufferPtr = CurPtr - 1;
// Next, lex the character, which should handle the EOD transition.
Lex(Tmp);
@@ -3176,7 +3176,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
// Restore comment saving mode, in case it was disabled for directive.
if (PP)
resetExtendedTokenMode();
- return true; // Have a token.
+ return true; // Have a token.
}
// If we are in raw mode, return this event as an EOF token. Let the caller
@@ -3276,11 +3276,11 @@ static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd,
// Must occur at start of line.
if (Pos == 0 ||
(RestOfBuffer[Pos - 1] != '\r' && RestOfBuffer[Pos - 1] != '\n')) {
- RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
+ RestOfBuffer = RestOfBuffer.substr(Pos + TermLen);
Pos = RestOfBuffer.find(Terminator);
continue;
}
- return RestOfBuffer.data()+Pos;
+ return RestOfBuffer.data() + Pos;
}
return nullptr;
}
@@ -3291,8 +3291,7 @@ static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd,
/// if not.
bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
// Only a conflict marker if it starts at the beginning of a line.
- if (CurPtr != BufferStart &&
- CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+ if (CurPtr != BufferStart && CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
return false;
// Check to see if we have <<<<<<< or >>>>.
@@ -3335,8 +3334,7 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
/// the line. This returns true if it is a conflict marker and false if not.
bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
// Only a conflict marker if it starts at the beginning of a line.
- if (CurPtr != BufferStart &&
- CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+ if (CurPtr != BufferStart && CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
return false;
// If we have a situation where we don't care about conflict markers, ignore
@@ -3352,8 +3350,8 @@ bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
// If we do have it, search for the end of the conflict marker. This could
// fail if it got skipped with a '#if 0' or something. Note that CurPtr might
// be the end of conflict marker.
- if (const char *End = FindConflictEnd(CurPtr, BufferEnd,
- CurrentConflictMarkerState)) {
+ if (const char *End =
+ FindConflictEnd(CurPtr, BufferEnd, CurrentConflictMarkerState)) {
CurPtr = End;
// Skip ahead to the end of line.
@@ -3403,7 +3401,7 @@ bool Lexer::lexEditorPlaceholder(Token &Result, const char *CurPtr) {
bool Lexer::isCodeCompletionPoint(const char *CurPtr) const {
if (PP && PP->isCodeCompletionEnabled()) {
- SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);
+ SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr - BufferStart);
return Loc == PP->getCodeCompletionLoc();
}
@@ -3709,7 +3707,7 @@ bool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C,
if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
isUnicodeWhitespace(C)) {
Diag(BufferPtr, diag::ext_unicode_whitespace)
- << makeCharRange(*this, BufferPtr, CurPtr);
+ << makeCharRange(*this, BufferPtr, CurPtr);
Result.setFlag(Token::LeadingSpace);
return true;
@@ -3749,7 +3747,7 @@ bool Lexer::Lex(Token &Result) {
bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
IsAtPhysicalStartOfLine = false;
bool isRawLex = isLexingRawMode();
- (void) isRawLex;
+ (void)isRawLex;
bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
// (After the LexTokenInternal call, the lexer might be destroyed.)
assert((returnedToken || !isRawLex) && "Raw lex must succeed");
@@ -3788,7 +3786,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
Result.setFlag(Token::LeadingSpace);
}
- unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below.
+ unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below.
// Read a character, advancing over it.
char Char = getAndAdvanceChar(CurPtr, Result);
@@ -3798,13 +3796,13 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
NewLinePtr = nullptr;
switch (Char) {
- case 0: // Null.
+ case 0: // Null.
// Found end of file?
- if (CurPtr-1 == BufferEnd)
- return LexEndOfFile(Result, CurPtr-1);
+ if (CurPtr - 1 == BufferEnd)
+ return LexEndOfFile(Result, CurPtr - 1);
// Check if we are performing code completion.
- if (isCodeCompletionPoint(CurPtr-1)) {
+ if (isCodeCompletionPoint(CurPtr - 1)) {
// Return the code-completion token.
Result.startToken();
FormTokenWithChars(Result, CurPtr, tok::code_completion);
@@ -3812,7 +3810,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
}
if (!isLexingRawMode())
- Diag(CurPtr-1, diag::null_in_file);
+ Diag(CurPtr - 1, diag::null_in_file);
Result.setFlag(Token::LeadingSpace);
if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
return true; // KeepWhitespaceMode
@@ -3821,12 +3819,12 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// (We manually eliminate the tail call to avoid recursion.)
goto LexNextToken;
- case 26: // DOS & CP/M EOF: "^Z".
+ case 26: // DOS & CP/M EOF: "^Z".
// If we're in Microsoft extensions mode, treat this as end of file.
if (LangOpts.MicrosoftExt) {
if (!isLexingRawMode())
- Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
- return LexEndOfFile(Result, CurPtr-1);
+ Diag(CurPtr - 1, diag::ext_ctrl_z_eof_microsoft);
+ return LexEndOfFile(Result, CurPtr - 1);
}
// If Microsoft extensions are disabled, this is just random garbage.
@@ -3882,11 +3880,11 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// too (without going through the big switch stmt).
if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
- if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+ if (SkipLineComment(Result, CurPtr + 2, TokAtPhysicalStartOfLine))
return true; // There is a token to return.
goto SkipIgnoredUnits;
} else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
- if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+ if (SkipBlockComment(Result, CurPtr + 2, TokAtPhysicalStartOfLine))
return true; // There is a token to return.
goto SkipIgnoredUnits;
} else if (isHorizontalWhitespace(*CurPtr)) {
@@ -3898,8 +3896,16 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// C99 6.4.4.1: Integer Constants.
// C99 6.4.4.2: Floating Constants.
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
return LexNumericConstant(Result, CurPtr);
@@ -3927,24 +3933,26 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// UTF-16 raw string literal
if (Char == 'R' && LangOpts.RawStringLiterals &&
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
- return LexRawStringLiteral(Result,
- ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result),
- tok::utf16_string_literal);
+ return LexRawStringLiteral(
+ Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result),
+ tok::utf16_string_literal);
if (Char == '8') {
char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
// UTF-8 string literal
if (Char2 == '"')
- return LexStringLiteral(Result,
- ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result),
- tok::utf8_string_literal);
+ return LexStringLiteral(
+ Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2,
+ Result),
+ tok::utf8_string_literal);
if (Char2 == '\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
return LexCharConstant(
- Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result),
+ Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2,
+ Result),
tok::utf8_char_constant);
if (Char2 == 'R' && LangOpts.RawStringLiterals) {
@@ -3952,11 +3960,12 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
// UTF-8 raw string literal
if (Char3 == '"') {
- return LexRawStringLiteral(Result,
- ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result),
- SizeTmp3, Result),
- tok::utf8_string_literal);
+ return LexRawStringLiteral(
+ Result,
+ ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ SizeTmp3, Result),
+ tok::utf8_string_literal);
}
}
}
@@ -3985,10 +3994,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// UTF-32 raw string literal
if (Char == 'R' && LangOpts.RawStringLiterals &&
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
- return LexRawStringLiteral(Result,
- ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result),
- tok::utf32_string_literal);
+ return LexRawStringLiteral(
+ Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result),
+ tok::utf32_string_literal);
}
// treat U like the start of an identifier.
@@ -4002,15 +4011,14 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
Char = getCharAndSize(CurPtr, SizeTmp);
if (Char == '"')
- return LexRawStringLiteral(Result,
- ConsumeChar(CurPtr, SizeTmp, Result),
+ return LexRawStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
tok::string_literal);
}
// treat R like the start of an identifier.
return LexIdentifierContinue(Result, CurPtr);
- case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").
+ case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
Char = getCharAndSize(CurPtr, SizeTmp);
@@ -4023,10 +4031,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// Wide raw string literal.
if (LangOpts.RawStringLiterals && Char == 'R' &&
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
- return LexRawStringLiteral(Result,
- ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result),
- tok::wide_string_literal);
+ return LexRawStringLiteral(
+ Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result),
+ tok::wide_string_literal);
// Wide character constant.
if (Char == '\'')
@@ -4036,23 +4044,63 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
[[fallthrough]];
// C99 6.4.2: Identifiers.
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
- case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N':
- case 'O': case 'P': case 'Q': /*'R'*/case 'S': case 'T': /*'U'*/
- case 'V': case 'W': case 'X': case 'Y': case 'Z':
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
- case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
- case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/
- case 'v': case 'w': case 'x': case 'y': case 'z':
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K': /*'L'*/
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q': /*'R'*/
+ case 'S':
+ case 'T': /*'U'*/
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'g':
+ case 'h':
+ case 'i':
+ case 'j':
+ case 'k':
+ case 'l':
+ case 'm':
+ case 'n':
+ case 'o':
+ case 'p':
+ case 'q':
+ case 'r':
+ case 's':
+ case 't': /*'u'*/
+ case 'v':
+ case 'w':
+ case 'x':
+ case 'y':
+ case 'z':
case '_':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
return LexIdentifierContinue(Result, CurPtr);
- case '$': // $ in identifiers.
+ case '$': // $ in identifiers.
if (LangOpts.DollarIdents) {
if (!isLexingRawMode())
- Diag(CurPtr-1, diag::ext_dollar_in_identifier);
+ Diag(CurPtr - 1, diag::ext_dollar_in_identifier);
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
return LexIdentifierContinue(Result, CurPtr);
@@ -4108,10 +4156,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
Kind = tok::periodstar;
CurPtr += SizeTmp;
} else if (Char == '.' &&
- getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') {
+ getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '.') {
Kind = tok::ellipsis;
- CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result);
+ CurPtr =
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result);
} else {
Kind = tok::period;
}
@@ -4150,18 +4198,18 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
break;
case '-':
Char = getCharAndSize(CurPtr, SizeTmp);
- if (Char == '-') { // --
+ if (Char == '-') { // --
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::minusminus;
} else if (Char == '>' && LangOpts.CPlusPlus &&
- getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') { // C++ ->*
- CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result);
+ getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '*') { // C++ ->*
+ CurPtr =
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result);
Kind = tok::arrowstar;
- } else if (Char == '>') { // ->
+ } else if (Char == '>') { // ->
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::arrow;
- } else if (Char == '=') { // -=
+ } else if (Char == '=') { // -=
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::minusequal;
} else {
@@ -4182,7 +4230,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
case '/':
// 6.4.9: Comments
Char = getCharAndSize(CurPtr, SizeTmp);
- if (Char == '/') { // Line comment.
+ if (Char == '/') { // Line comment.
// Even if Line comments are disabled (e.g. in C89 mode), we generally
// want to lex this as a comment. There is one problem with this though,
// that in one particular corner case, this can change the behavior of the
@@ -4195,7 +4243,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
if (!TreatAsComment)
if (!(PP && PP->isPreprocessedOutput()))
- TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*';
+ TreatAsComment = getCharAndSize(CurPtr + SizeTmp, SizeTmp2) != '*';
if (TreatAsComment) {
if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
@@ -4209,7 +4257,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
}
}
- if (Char == '*') { // /**/ comment.
+ if (Char == '*') { // /**/ comment.
if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
TokAtPhysicalStartOfLine))
return true; // There is a token to return.
@@ -4232,21 +4280,21 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
Kind = tok::percentequal;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else if (LangOpts.Digraphs && Char == '>') {
- Kind = tok::r_brace; // '%>' -> '}'
+ Kind = tok::r_brace; // '%>' -> '}'
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else if (LangOpts.Digraphs && Char == ':') {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Char = getCharAndSize(CurPtr, SizeTmp);
- if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
- Kind = tok::hashhash; // '%:%:' -> '##'
- CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result);
- } else if (Char == '@' && LangOpts.MicrosoftExt) {// %:@ -> #@ -> Charize
+ if (Char == '%' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') {
+ Kind = tok::hashhash; // '%:%:' -> '##'
+ CurPtr =
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result);
+ } else if (Char == '@' && LangOpts.MicrosoftExt) { // %:@ -> #@ -> Charize
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
if (!isLexingRawMode())
Diag(BufferPtr, diag::ext_charize_microsoft);
Kind = tok::hashat;
- } else { // '%:' -> '#'
+ } else { // '%:' -> '#'
// We parsed a # character. If this occurs at the start of the line,
// it's actually the start of a preprocessing directive. Callback to
// the preprocessor to handle it.
@@ -4265,35 +4313,35 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
if (ParsingFilename) {
return LexAngledStringLiteral(Result, CurPtr);
} else if (Char == '<') {
- char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+ char After = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
if (After == '=') {
Kind = tok::lesslessequal;
- CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result);
- } else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) {
+ CurPtr =
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result);
+ } else if (After == '<' && IsStartOfConflictMarker(CurPtr - 1)) {
// If this is actually a '<<<<<<<' version control conflict marker,
// recognize it as such and recover nicely.
goto LexNextToken;
- } else if (After == '<' && HandleEndOfConflictMarker(CurPtr-1)) {
+ } else if (After == '<' && HandleEndOfConflictMarker(CurPtr - 1)) {
// If this is '<<<<' and we're in a Perforce-style conflict marker,
// ignore it.
goto LexNextToken;
} else if (LangOpts.CUDA && After == '<') {
Kind = tok::lesslessless;
- CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result);
+ CurPtr =
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result);
} else {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::lessless;
}
} else if (Char == '=') {
- char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+ char After = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
if (After == '>') {
if (LangOpts.CPlusPlus20) {
if (!isLexingRawMode())
Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
- CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result);
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2,
+ Result);
Kind = tok::spaceship;
break;
}
@@ -4301,13 +4349,13 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// change in semantics if this turns up in C++ <=17 mode.
if (LangOpts.CPlusPlus && !isLexingRawMode()) {
Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
- << FixItHint::CreateInsertion(
- getSourceLocation(CurPtr + SizeTmp, SizeTmp2), " ");
+ << FixItHint::CreateInsertion(
+ getSourceLocation(CurPtr + SizeTmp, SizeTmp2), " ");
}
}
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::lessequal;
- } else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '['
+ } else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '['
if (LangOpts.CPlusPlus11 &&
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') {
// C++0x [lex.pptoken]p3:
@@ -4327,7 +4375,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::l_square;
- } else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{'
+ } else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{'
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::l_brace;
} else if (Char == '#' && /*Not a trigraph*/ SizeTmp == 1 &&
@@ -4343,22 +4391,22 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::greaterequal;
} else if (Char == '>') {
- char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+ char After = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
if (After == '=') {
- CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result);
+ CurPtr =
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result);
Kind = tok::greatergreaterequal;
- } else if (After == '>' && IsStartOfConflictMarker(CurPtr-1)) {
+ } else if (After == '>' && IsStartOfConflictMarker(CurPtr - 1)) {
// If this is actually a '>>>>' conflict marker, recognize it as such
// and recover nicely.
goto LexNextToken;
- } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {
+ } else if (After == '>' && HandleEndOfConflictMarker(CurPtr - 1)) {
// If this is '>>>>>>>' and we're in a conflict marker, ignore it.
goto LexNextToken;
} else if (LangOpts.CUDA && After == '>') {
Kind = tok::greatergreatergreater;
- CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result);
+ CurPtr =
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result);
} else {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::greatergreater;
@@ -4385,7 +4433,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else if (Char == '|') {
// If this is '|||||||' and we're in a conflict marker, ignore it.
- if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1))
+ if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr - 1))
goto LexNextToken;
Kind = tok::pipepipe;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
@@ -4412,7 +4460,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
Char = getCharAndSize(CurPtr, SizeTmp);
if (Char == '=') {
// If this is '====' and we're in a conflict marker, ignore it.
- if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))
+ if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr - 1))
goto LexNextToken;
Kind = tok::equalequal;
@@ -4429,7 +4477,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
if (Char == '#') {
Kind = tok::hashhash;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
- } else if (Char == '@' && LangOpts.MicrosoftExt) { // #@ -> Charize
+ } else if (Char == '@' && LangOpts.MicrosoftExt) { // #@ -> Charize
Kind = tok::hashat;
if (!isLexingRawMode())
Diag(BufferPtr, diag::ext_charize_microsoft);
@@ -4485,11 +4533,9 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// We can't just reset CurPtr to BufferPtr because BufferPtr may point to
// an escaped newline.
--CurPtr;
- llvm::ConversionResult Status =
- llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr,
- (const llvm::UTF8 *)BufferEnd,
- &CodePoint,
- llvm::strictConversion);
+ llvm::ConversionResult Status = llvm::convertUTF8Sequence(
+ (const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)BufferEnd, &CodePoint,
+ llvm::strictConversion);
if (Status == llvm::conversionOK) {
if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
@@ -4514,7 +4560,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// just diagnose the invalid UTF-8, then drop the character.
Diag(CurPtr, diag::err_invalid_utf8);
- BufferPtr = CurPtr+1;
+ BufferPtr = CurPtr + 1;
// We're pretending the character didn't exist, so just try again with
// this lexer.
// (We manually eliminate the tail call to avoid recursion.)
More information about the cfe-commits
mailing list