[flang-commits] [flang] [flang] Tokenize all -D macro bodies, and do it better (PR #168116)
Andre Kuhlenschmidt via flang-commits
flang-commits at lists.llvm.org
Tue Nov 18 10:25:03 PST 2025
================
@@ -371,40 +374,66 @@ TokenSequence Preprocessor::TokenizeMacroBody(const std::string &str) {
Provenance provenance{allSources_.AddCompilerInsertion(str).start()};
auto end{str.size()};
for (std::string::size_type at{0}; at < end;) {
- // Alternate between tokens that are identifiers (and therefore subject
- // to argument replacement) and those that are not.
- auto start{str.find_first_of(idChars, at)};
- if (start == str.npos) {
- tokens.Put(str.substr(at), provenance + at);
- break;
- } else if (start > at) {
- tokens.Put(str.substr(at, start - at), provenance + at);
+ char ch{str.at(at)};
+ if (IsWhiteSpace(ch)) {
+ ++at;
+ continue;
}
- at = str.find_first_not_of(idChars, start + 1);
- if (at == str.npos) {
+ auto start{at};
+ if (IsLegalIdentifierStart(ch)) {
+ for (++at; at < end && IsLegalInIdentifier(str.at(at)); ++at) {
+ }
+ } else if (IsDecimalDigit(ch) || ch == '.') {
+ for (++at; at < end; ++at) {
+ ch = str.at(at);
+ if (!IsDecimalDigit(ch) && ch != '.') {
+ break;
+ }
+ }
+ if (at < end) {
+ ch = ToUpperCaseLetter(str.at(at));
+ if (ch == 'E' || ch == 'D' || ch == 'Q') {
+ if (++at < end) {
+ ch = str.at(at);
+ if (ch == '+' || ch == '-') {
+ ++at;
+ }
+ for (; at < end && IsDecimalDigit(str.at(at)); ++at) {
+ }
+ }
+ }
+ }
+ } else if (ch == '\'' || ch == '"') {
+ for (++at; at < end && str.at(at) != ch; ++at) {
+ }
+ if (at < end) {
+ ++at;
+ }
+ } else {
+ ++at; // single-character token
+ }
+ if (at >= end || at == str.npos) {
tokens.Put(str.substr(start), provenance + start);
break;
- } else {
- tokens.Put(str.substr(start, at - start), provenance + start);
}
+ tokens.Put(str.substr(start, at - start), provenance + start);
}
return tokens;
}
void Preprocessor::Define(const std::string ¯o, const std::string &value) {
+ auto rhs{TokenizeMacroBody(value)};
----------------
akuhlens wrote:
Should you put an explicit type here since it isn't one of the forms that that explicitly mentions the type? Looking at the rest of the code here, it heavily uses auto without type, so I could see the argument for consistency with existing code.
https://github.com/llvm/llvm-project/pull/168116
More information about the flang-commits
mailing list