[clang] add define2 pp directive (PR #65851)

via cfe-commits cfe-commits at lists.llvm.org
Sat Sep 9 08:00:04 PDT 2023


https://github.com/kelbon created https://github.com/llvm/llvm-project/pull/65851:

Add 'define2' directive, which works as 'define', but allows recursive macros

Motivation:

 * There are huge amount of code, which uses code generation/ boiler plate macros /misused terrible templates which is basically 'for each token' or somehow may be solved with this new feature
1. Nlohmann json:
https://github.com/nlohmann/json/blob/836b7beca4b62e2a99465edef44066b7401fd704/include/nlohmann/detail/macro_scope.hpp#L320

2. boost preprocessor:
https://github.com/boostorg/preprocessor/blob/develop/include/boost/preprocessor/seq/detail/limits/split_1024.hpp

3. boost pfr:
(codegen)
https://github.com/boostorg/pfr/blob/develop/include/boost/pfr/detail/core17_generated.hpp

4. data_parallel_vector:
https://github.com/kelbon/AnyAny/blob/4b056be2b6cbcfa1a407f7ee75279af414e390e4/include/anyany/noexport/data_parallel_vector_details.hpp#L62



* Its easily may be used for what 'magic enum' do, in many cases it can replace reflection ( because many who want reflection actually just want to create a JSON schema without specifying names twice )

* C++20 adds `__VA_OPT__`, which is designed for recursive macros, but there are no such thing in C++!
  

Examples: 
<details>
  <summary>fold</summary>

```C++

#define2 $fold_right(op, head, ...) ( head __VA_OPT__(op $fold_right(op, __VA_ARGS__)) )
#define2 $fold_left(op, head, ...) ( __VA_OPT__($fold_left(op, __VA_ARGS__) op) head )

static_assert($fold_right(+, 1, 2, 3) == 6);
// error: static assertion failed due to requirement '((((4) + 3) + 2) + 1) == 4'
static_assert($fold_left(+, 1, 2, 3, 4) == 4);

```
</details>

<details>
  <summary>reverse token stream</summary>

```C++

#define2 $reverse(head, ...) __VA_OPT__($reverse(__VA_ARGS__) , ) head

// works as expected
constexpr int A[] = { $reverse($reverse($reverse(1, 2, 3))) };
constexpr int B[] = { 3, 2, 1 };
static_assert(A[0] == B[0] && A[1] == B[1] && A[2] == B[2]);

```
</details>

<details>
  <summary>transform token stream ( literaly for each )</summary>

```C++

#define2 $transform(macro, head, ...) macro(head) __VA_OPT__($transform(macro, __VA_ARGS__))

#define $to_string(tok) #tok,

constexpr const char* names[] = {
  $transform($to_string, a, b)
#undef $to_string
};
static_assert(names[0][0] == 'a' && names[1][0] == 'b');

```
</details>

<details>
  <summary>calculate count of tokens</summary>


```C++

#define2 TOKCOUNT_IMPL(head, ...) (1 __VA_OPT__(+ TOKCOUNT_IMPL(__VA_ARGS__)))
// works for zero args too
#define $tok_count(...) (0 __VA_OPT__(+ TOKCOUNT_IMPL(__VA_ARGS__)) )

static_assert($tok_count() == 0);
static_assert($tok_count(1, 2, (4, 5, 6)) == 3);

```
</details>

<details>
  <summary>boost pfr without code generation</summary>

```C++

// placeholders for actual calculations
template<typename T>
consteval int aggregate_size() { return 3; }
constexpr int tie(auto&... args) { return sizeof...(args); }

#define2 $try_expand(value, head, ...)                                            \
if constexpr (aggregate_size<decltype(value)>() == $tok_count(+1, __VA_ARGS__)) { \
  auto [head __VA_OPT__(,) __VA_ARGS__] = value;                                  \
  return tie(head __VA_OPT__(,) __VA_ARGS__);                                     \
}                                                                                 \
__VA_OPT__($try_expand(value, __VA_ARGS__))

constexpr auto magic_get(auto aggregate) {
  $try_expand(aggregate, _3, _2, _1);
}

struct abc { int a, b, c; };
static_assert(magic_get(abc{}) == 3);

```

Here magic get expands to (screenshot from clangd builded with this patch)

![image](https://github.com/llvm/llvm-project/assets/58717435/d65c2f4f-12c7-48da-b03e-147791692c64)


</details>

<details>
  <summary>infinite recursion macro:</summary>

```C++
#define2 A A
// produces 'error: unknown type name 'A'' (expanded to 'A')
// A
```
![image](https://github.com/llvm/llvm-project/assets/58717435/9cc388a1-ecd8-4577-856f-313c11669999)

</details>


>From 2f807b312baef8c6038c2452b84232acb6d6d2c2 Mon Sep 17 00:00:00 2001
From: Kelbon Nik <kelbonage at gmail.com>
Date: Sat, 9 Sep 2023 17:51:15 +0400
Subject: [PATCH] add define2 pp directive

---
 clang/include/clang/Basic/TokenKinds.def |  1 +
 clang/include/clang/Lex/MacroInfo.h      | 19 +++++++++----------
 clang/include/clang/Lex/Preprocessor.h   |  2 +-
 clang/lib/Basic/IdentifierTable.cpp      |  2 ++
 clang/lib/Format/WhitespaceManager.cpp   |  2 +-
 clang/lib/Lex/MacroInfo.cpp              |  3 ++-
 clang/lib/Lex/PPDirectives.cpp           | 16 +++++++++++-----
 7 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 45ebc200b168986..f059d809823ab42 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -115,6 +115,7 @@ PPKEYWORD(__include_macros)
 
 // C99 6.10.3 - Macro Replacement.
 PPKEYWORD(define)
+PPKEYWORD(define2)
 PPKEYWORD(undef)
 
 // C99 6.10.4 - Line Control.
diff --git a/clang/include/clang/Lex/MacroInfo.h b/clang/include/clang/Lex/MacroInfo.h
index 00c1c3866bbd9ca..4f0c8e987610e50 100644
--- a/clang/include/clang/Lex/MacroInfo.h
+++ b/clang/include/clang/Lex/MacroInfo.h
@@ -102,6 +102,10 @@ class MacroInfo {
   /// like \#define A A.
   bool IsDisabled : 1;
 
+  // True if 'define2' used,
+  // ignores 'IsDisabled' and enables expansion anyway
+  bool AllowRecurse : 1;
+
   /// True if this macro is either defined in the main file and has
   /// been used, or if it is not defined in the main file.
   ///
@@ -278,18 +282,13 @@ class MacroInfo {
   /// Return true if this macro is enabled.
   ///
   /// In other words, that we are not currently in an expansion of this macro.
-  bool isEnabled() const { return !IsDisabled; }
-
-  void EnableMacro() {
-    assert(IsDisabled && "Cannot enable an already-enabled macro!");
-    IsDisabled = false;
-  }
+  bool isEnabled() const { return AllowRecurse || !IsDisabled; }
+  void setAllowRecursive(bool Allow) { AllowRecurse = Allow; }
+  bool isAllowRecurse() const { return AllowRecurse; }
 
-  void DisableMacro() {
-    assert(!IsDisabled && "Cannot disable an already-disabled macro!");
-    IsDisabled = true;
-  }
+  void EnableMacro() { IsDisabled = false; }
 
+  void DisableMacro() { IsDisabled = true; }
   /// Determine whether this macro was used for a header guard.
   bool isUsedForHeaderGuard() const { return UsedForHeaderGuard; }
 
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 9efe439bc5f2192..de121ce82fd1d7b 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2754,7 +2754,7 @@ class Preprocessor {
   void replayPreambleConditionalStack();
 
   // Macro handling.
-  void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
+  void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard, bool AllowRecurse);
   void HandleUndefDirective();
 
   // Conditional Inclusion.
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index afb30268f2973ce..4de3565c8c6d9a8 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -433,6 +433,8 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   unsigned Len = getLength();
   if (Len < 2) return tok::pp_not_keyword;
   const char *Name = getNameStart();
+  if (std::string_view(Name, Len) == "define2")
+    return tok::pp_define2;
   switch (HASH(Len, Name[0], Name[2])) {
   default: return tok::pp_not_keyword;
   CASE( 2, 'i', '\0', if);
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
index b7bd8d27dc976b1..d8ab76d6761553e 100644
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -743,7 +743,7 @@ void WhitespaceManager::alignConsecutiveMacros() {
     if (!Current || Current->isNot(tok::identifier))
       return false;
 
-    if (!Current->Previous || Current->Previous->isNot(tok::pp_define))
+    if (!Current->Previous || !Current->Previous->isOneOf(tok::pp_define, tok::pp_define2))
       return false;
 
     // For a macro function, 0 spaces are required between the
diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp
index 39bb0f44eff25ba..9c3619c7c909304 100644
--- a/clang/lib/Lex/MacroInfo.cpp
+++ b/clang/lib/Lex/MacroInfo.cpp
@@ -50,7 +50,7 @@ static_assert(MacroInfoSizeChecker<sizeof(void *)>::AsExpected,
 MacroInfo::MacroInfo(SourceLocation DefLoc)
     : Location(DefLoc), IsDefinitionLengthCached(false), IsFunctionLike(false),
       IsC99Varargs(false), IsGNUVarargs(false), IsBuiltinMacro(false),
-      HasCommaPasting(false), IsDisabled(false), IsUsed(false),
+      HasCommaPasting(false), IsDisabled(false), AllowRecurse(false), IsUsed(false),
       IsAllowRedefinitionsWithoutWarning(false), IsWarnIfUnused(false),
       UsedForHeaderGuard(false) {}
 
@@ -157,6 +157,7 @@ LLVM_DUMP_METHOD void MacroInfo::dump() const {
   if (IsBuiltinMacro) Out << " builtin";
   if (IsDisabled) Out << " disabled";
   if (IsUsed) Out << " used";
+  if (AllowRecurse) Out << " allow_recurse";
   if (IsAllowRedefinitionsWithoutWarning)
     Out << " allow_redefinitions_without_warning";
   if (IsWarnIfUnused) Out << " warn_if_unused";
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index a4db8e7a84c07d5..4605d331eba6801 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -1111,7 +1111,11 @@ void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
   if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
     if (II->getPPKeywordID() == tok::pp_define) {
       return HandleDefineDirective(Result,
-                                   /*ImmediatelyAfterHeaderGuard=*/false);
+                                   /*ImmediatelyAfterHeaderGuard=*/false, /*AllowRecurse=*/false);
+    }
+    if (II->getPPKeywordID() == tok::pp_define2) {
+      return HandleDefineDirective(Result,
+                                   /*ImmediatelyAfterHeaderGuard=*/false, /*AllowRecurse=*/true);
     }
     if (SkippingUntilPCHThroughHeader &&
         II->getPPKeywordID() == tok::pp_include) {
@@ -1250,7 +1254,9 @@ void Preprocessor::HandleDirective(Token &Result) {
 
     // C99 6.10.3 - Macro Replacement.
     case tok::pp_define:
-      return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);
+      return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef, false);
+    case tok::pp_define2:
+      return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef, true);
     case tok::pp_undef:
       return HandleUndefDirective();
 
@@ -3036,10 +3042,10 @@ static bool isObjCProtectedMacro(const IdentifierInfo *II) {
          II->isStr("__unsafe_unretained") || II->isStr("__autoreleasing");
 }
 
-/// HandleDefineDirective - Implements \#define.  This consumes the entire macro
+/// HandleDefineDirective - Implements \#define and define2. This consumes the entire macro
 /// line then lets the caller lex the next real token.
 void Preprocessor::HandleDefineDirective(
-    Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
+    Token &DefineTok, const bool ImmediatelyAfterHeaderGuard, bool AllowRecurse) {
   ++NumDefined;
 
   Token MacroNameTok;
@@ -3064,7 +3070,7 @@ void Preprocessor::HandleDefineDirective(
       MacroNameTok, ImmediatelyAfterHeaderGuard);
 
   if (!MI) return;
-
+  MI->setAllowRecursive(AllowRecurse);
   if (MacroShadowsKeyword &&
       !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) {
     Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);



More information about the cfe-commits mailing list