[llvm] e6751bf - [NFC][TableGen] Elminate use of isalpha/isdigit from TGLexer (#104837)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 09:54:16 PDT 2024
Author: Rahul Joshi
Date: 2024-08-20T09:54:12-07:00
New Revision: e6751bfbf77bd9888b7882a46e2453e4ed149c7b
URL: https://github.com/llvm/llvm-project/commit/e6751bfbf77bd9888b7882a46e2453e4ed149c7b
DIFF: https://github.com/llvm/llvm-project/commit/e6751bfbf77bd9888b7882a46e2453e4ed149c7b.diff
LOG: [NFC][TableGen] Elminate use of isalpha/isdigit from TGLexer (#104837)
- Replace use of std::isalpha, std::isdigit, std:isxdigit with LLVM's
StringExtras versions, to avoid possibly locale dependent behavior (e.g.
glibc).
- Create helper function for common checks for valid identifier
characters.
Added:
Modified:
llvm/lib/TableGen/TGLexer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index c1fae91e1fd9f2..62a884e01a5306 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -12,6 +12,7 @@
#include "TGLexer.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h" // for strtoull()/strtoll() define
@@ -20,7 +21,6 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/TableGen/Error.h"
#include <algorithm>
-#include <cctype>
#include <cerrno>
#include <cstdint>
#include <cstdio>
@@ -38,6 +38,17 @@ struct PreprocessorDir {
};
} // end anonymous namespace
+/// Returns true if `C` is a valid character in an identifier. If `First` is
+/// true, returns true if `C` is a valid first character of an identifier,
+/// else returns true if `C` is a valid non-first character of an identifier.
+/// Identifiers match the following regular expression:
+/// [a-zA-Z_][0-9a-zA-Z_]*
+static bool isValidIDChar(char C, bool First) {
+ if (C == '_' || isAlpha(C))
+ return true;
+ return !First && isDigit(C);
+}
+
constexpr PreprocessorDir PreprocessorDirs[] = {{tgtok::Ifdef, "ifdef"},
{tgtok::Ifndef, "ifndef"},
{tgtok::Else, "else"},
@@ -51,14 +62,14 @@ static const char *lexMacroName(StringRef Str) {
// Macro names start with [a-zA-Z_].
const char *Next = Str.begin();
- if (*Next != '_' && !isalpha(*Next))
+ if (!isValidIDChar(*Next, /*First=*/true))
return Next;
// Eat the first character of the name.
++Next;
// Match the rest of the identifier regex: [0-9a-zA-Z_]*
const char *End = Str.end();
- while (Next != End && (isalpha(*Next) || isdigit(*Next) || *Next == '_'))
+ while (Next != End && isValidIDChar(*Next, /*First=*/false))
++Next;
return Next;
}
@@ -173,7 +184,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
switch (CurChar) {
default:
// Handle letters: [a-zA-Z_]
- if (isalpha(CurChar) || CurChar == '_')
+ if (isValidIDChar(CurChar, /*First=*/true))
return LexIdentifier();
// Unknown character, emit an error.
@@ -250,14 +261,14 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
case '0': case '1': case '2': case '3': case '4': case '5': case '6':
case '7': case '8': case '9': {
int NextChar = 0;
- if (isdigit(CurChar)) {
+ if (isDigit(CurChar)) {
// Allow identifiers to start with a number if it is followed by
// an identifier. This can happen with paste operations like
// foo#8i.
int i = 0;
do {
NextChar = peekNextChar(i++);
- } while (isdigit(NextChar));
+ } while (isDigit(NextChar));
if (NextChar == 'x' || NextChar == 'b') {
// If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
@@ -281,7 +292,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
}
}
- if (isalpha(NextChar) || NextChar == '_')
+ if (isValidIDChar(NextChar, /*First=*/true))
return LexIdentifier();
return LexNumber();
@@ -347,13 +358,13 @@ tgtok::TokKind TGLexer::LexString() {
}
tgtok::TokKind TGLexer::LexVarName() {
- if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
+ if (!isValidIDChar(CurPtr[0], /*First=*/true))
return ReturnError(TokStart, "Invalid variable name");
// Otherwise, we're ok, consume the rest of the characters.
const char *VarNameStart = CurPtr++;
- while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
+ while (isValidIDChar(*CurPtr, /*First=*/false))
++CurPtr;
CurStrVal.assign(VarNameStart, CurPtr);
@@ -365,7 +376,7 @@ tgtok::TokKind TGLexer::LexIdentifier() {
const char *IdentStart = TokStart;
// Match the rest of the identifier regex: [0-9a-zA-Z_]*
- while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
+ while (isValidIDChar(*CurPtr, /*First=*/false))
++CurPtr;
// Check to see if this identifier is a reserved keyword.
@@ -500,7 +511,7 @@ tgtok::TokKind TGLexer::LexNumber() {
Base = 16;
do
++CurPtr;
- while (isxdigit(CurPtr[0]));
+ while (isHexDigit(CurPtr[0]));
} else if (CurPtr[0] == 'b') {
Base = 2;
do
@@ -515,7 +526,7 @@ tgtok::TokKind TGLexer::LexNumber() {
// Check if it's a decimal value.
if (Base == 0) {
// Check for a sign without a digit.
- if (!isdigit(CurPtr[0])) {
+ if (!isDigit(CurPtr[0])) {
if (CurPtr[-1] == '-')
return tgtok::minus;
else if (CurPtr[-1] == '+')
@@ -526,7 +537,7 @@ tgtok::TokKind TGLexer::LexNumber() {
NumStart = TokStart;
IsMinus = CurPtr[-1] == '-';
- while (isdigit(CurPtr[0]))
+ while (isDigit(CurPtr[0]))
++CurPtr;
}
@@ -574,11 +585,11 @@ tgtok::TokKind TGLexer::LexBracket() {
/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
tgtok::TokKind TGLexer::LexExclaim() {
- if (!isalpha(*CurPtr))
+ if (!isAlpha(*CurPtr))
return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
const char *Start = CurPtr++;
- while (isalpha(*CurPtr))
+ while (isAlpha(*CurPtr))
++CurPtr;
// Check to see which operator this is.
More information about the llvm-commits
mailing list