[clang] [clang][scandeps] Improve handling of rawstrings. (PR #139504)
Tobias Hieta via cfe-commits
cfe-commits at lists.llvm.org
Sun May 11 23:41:35 PDT 2025
https://github.com/tru created https://github.com/llvm/llvm-project/pull/139504
The current parser just checks one step back for a R before each string to know that it's a rawstring. But the preprocessor is much more advanced here and can have constructs like:
R\
"str"
And much more. This patch also adds more test coverage for Rawstrings in the dependencydirectivesscanner.
This was co-authored by Sylvain Audi <sylvain.audi at ubisoft.com> (@sylvain-audi)
Fixes #137648
>From b722c2e1702304de2bb962ba24868cf0912f27ee Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias.hieta at ubisoft.com>
Date: Mon, 5 May 2025 11:40:17 +0200
Subject: [PATCH] [clang][scandeps] Improve handling of rawstrings.
The current parser just checks one step back for a R before each string
to know that it's a rawstring. But the preprocessor is much more
advanced here and can have constructs like:
R\
"str"
And much more. This patch also adds more test coverage for Rawstrings in
the dependencydirectivesscanner.
This was co-authored by Sylvain Audi <sylvain.audi at ubisoft.com>
Fixes #137648
---
clang/lib/Lex/DependencyDirectivesScanner.cpp | 43 ++++++++++++---
clang/test/ClangScanDeps/raw-strings.cpp | 55 +++++++++++++++++++
2 files changed, 91 insertions(+), 7 deletions(-)
create mode 100644 clang/test/ClangScanDeps/raw-strings.cpp
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 088d1cc96e3a2..86e860abdbbdc 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -206,6 +206,24 @@ static void skipOverSpaces(const char *&First, const char *const End) {
++First;
}
+// Move back by one character, skipping escaped newlines (backslash + \n)
+static char previousChar(const char *First, const char *&Current) {
+ assert(Current > First);
+ --Current;
+ while (Current > First + 1 && isVerticalWhitespace(*Current)) {
+ const char PrevChar = *(Current - 1);
+ if (PrevChar == '\\') {
+ Current -= 2; // backslash + (\n or \r)
+ } else if (Current > First + 2 && isVerticalWhitespace(PrevChar) &&
+ PrevChar != *Current && *(Current - 2) == '\\') {
+ Current -= 3; // backslash + (\n\r or \r\n)
+ } else {
+ break;
+ }
+ }
+ return *Current;
+}
+
[[nodiscard]] static bool isRawStringLiteral(const char *First,
const char *Current) {
assert(First <= Current);
@@ -215,25 +233,28 @@ static void skipOverSpaces(const char *&First, const char *const End) {
return false;
// Check for an "R".
- --Current;
- if (*Current != 'R')
+ if (previousChar(First, Current) != 'R')
return false;
- if (First == Current || !isAsciiIdentifierContinue(*--Current))
+ if (First == Current ||
+ !isAsciiIdentifierContinue(previousChar(First, Current)))
return true;
// Check for a prefix of "u", "U", or "L".
if (*Current == 'u' || *Current == 'U' || *Current == 'L')
- return First == Current || !isAsciiIdentifierContinue(*--Current);
+ return First == Current ||
+ !isAsciiIdentifierContinue(previousChar(First, Current));
// Check for a prefix of "u8".
- if (*Current != '8' || First == Current || *Current-- != 'u')
+ if (*Current != '8' || First == Current ||
+ previousChar(First, Current) != 'u')
return false;
- return First == Current || !isAsciiIdentifierContinue(*--Current);
+ return First == Current ||
+ !isAsciiIdentifierContinue(previousChar(First, Current));
}
static void skipRawString(const char *&First, const char *const End) {
assert(First[0] == '"');
- assert(First[-1] == 'R');
+ //assert(First[-1] == 'R');
const char *Last = ++First;
while (Last != End && *Last != '(')
@@ -416,6 +437,14 @@ void Scanner::skipLine(const char *&First, const char *const End) {
continue;
}
+ // Continue on the same line if an EOL is preceded with backslash
+ if (First + 1 < End && *First == '\\') {
+ if (unsigned Len = isEOL(First + 1, End)) {
+ First += 1 + Len;
+ continue;
+ }
+ }
+
// Iterate over comments correctly.
if (*First != '/' || End - First < 2) {
LastTokenPtr = First;
diff --git a/clang/test/ClangScanDeps/raw-strings.cpp b/clang/test/ClangScanDeps/raw-strings.cpp
new file mode 100644
index 0000000000000..5fda4a559c9e3
--- /dev/null
+++ b/clang/test/ClangScanDeps/raw-strings.cpp
@@ -0,0 +1,55 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
+
+//--- cdb.json.in
+[{
+ "directory": "DIR",
+ "command": "clang -c DIR/tu.c -o DIR/tu.o -IDIR/include",
+ "file": "DIR/tu.c"
+}]
+//--- include/header.h
+//--- include/header2.h
+//--- include/header3.h
+//--- include/header4.h
+//--- tu.c
+#if 0
+R"x()x"
+#endif
+
+#include "header.h"
+
+#if 0
+R"y(";
+#endif
+#include "header2.h"
+
+#if 0
+//")y"
+#endif
+
+#if 0
+R"y(";
+R"z()y";
+#endif
+#include "header3.h"
+#if 0
+//")z"
+#endif
+
+#if 0
+R\
+"y(";
+R"z()y";
+#endif
+#include "header4.h"
+#if 0
+//")z"
+#endif
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -mode preprocess | FileCheck %s
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -mode preprocess-dependency-directives | FileCheck %s
+// CHECK: tu.c
+// CHECK-NEXT: header.h
+// CHECK-NEXT: header3.h
+// CHECK-NEXT: header4.h
More information about the cfe-commits
mailing list