r374127 - [clang-scan-deps] Improve string/character literal skipping

Alex Lorenz via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 8 15:42:44 PDT 2019


Author: arphaman
Date: Tue Oct  8 15:42:44 2019
New Revision: 374127

URL: http://llvm.org/viewvc/llvm-project?rev=374127&view=rev
Log:
[clang-scan-deps] Improve string/character literal skipping

The existing string/character literal skipping code in the
dependency directives source minimizer has two issues:

- It doesn't stop the scanning when a newline is reached before the terminating character,
unlike the lexer which considers the token to be done (even if it's invalid) at the end of the line.

- It doesn't support whitespace between '\' and the newline when looking if the '\' is used as a line continuation character.

This commit fixes both issues.

Differential Revision: https://reviews.llvm.org/D68436

Modified:
    cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
    cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp

Modified: cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp?rev=374127&r1=374126&r2=374127&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp (original)
+++ cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp Tue Oct  8 15:42:44 2019
@@ -185,17 +185,6 @@ static void skipRawString(const char *&F
   }
 }
 
-static void skipString(const char *&First, const char *const End) {
-  assert(*First == '\'' || *First == '"' || *First == '<');
-  const char Terminator = *First == '<' ? '>' : *First;
-  for (++First; First != End && *First != Terminator; ++First)
-    if (*First == '\\')
-      if (++First == End)
-        return;
-  if (First != End)
-    ++First; // Finish off the string.
-}
-
 // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
 static unsigned isEOL(const char *First, const char *const End) {
   if (First == End)
@@ -206,6 +195,35 @@ static unsigned isEOL(const char *First,
   return !!isVerticalWhitespace(First[0]);
 }
 
+static void skipString(const char *&First, const char *const End) {
+  assert(*First == '\'' || *First == '"' || *First == '<');
+  const char Terminator = *First == '<' ? '>' : *First;
+  for (++First; First != End && *First != Terminator; ++First) {
+    // String and character literals don't extend past the end of the line.
+    if (isVerticalWhitespace(*First))
+      return;
+    if (*First != '\\')
+      continue;
+    // Skip past backslash to the next character. This ensures that the
+    // character right after it is skipped as well, which matters if it's
+    // the terminator.
+    if (++First == End)
+      return;
+    if (!isWhitespace(*First))
+      continue;
+    // Whitespace after the backslash might indicate a line continuation.
+    const char *FirstAfterBackslashPastSpace = First;
+    skipOverSpaces(FirstAfterBackslashPastSpace, End);
+    if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) {
+      // Advance the character pointer to the next line for the next
+      // iteration.
+      First = FirstAfterBackslashPastSpace + NLSize - 1;
+    }
+  }
+  if (First != End)
+    ++First; // Finish off the string.
+}
+
 // Returns the length of the skipped newline
 static unsigned skipNewline(const char *&First, const char *End) {
   if (First == End)

Modified: cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp?rev=374127&r1=374126&r2=374127&view=diff
==============================================================================
--- cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp (original)
+++ cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp Tue Oct  8 15:42:44 2019
@@ -594,6 +594,50 @@ TEST(MinimizeSourceToDependencyDirective
   EXPECT_STREQ("#pragma once\n#include <test.h>\n", Out.data());
 }
 
+TEST(MinimizeSourceToDependencyDirectivesTest,
+     SkipLineStringCharLiteralsUntilNewline) {
+  SmallVector<char, 128> Out;
+
+  StringRef Source = R"(#if NEVER_ENABLED
+    #define why(fmt, ...) #error don't try me
+    #endif
+
+    void foo();
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ(
+      "#if NEVER_ENABLED\n#define why(fmt,...) #error don't try me\n#endif\n",
+      Out.data());
+
+  Source = R"(#if NEVER_ENABLED
+      #define why(fmt, ...) "quote dropped
+      #endif
+
+      void foo();
+  )";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ(
+      "#if NEVER_ENABLED\n#define why(fmt,...) \"quote dropped\n#endif\n",
+      Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest,
+     SupportWhitespaceBeforeLineContinuationInStringSkipping) {
+  SmallVector<char, 128> Out;
+
+  StringRef Source = "#define X '\\ \t\nx'\nvoid foo() {}";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#define X '\\ \t\nx'\n", Out.data());
+
+  Source = "#define X \"\\ \r\nx\"\nvoid foo() {}";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#define X \"\\ \r\nx\"\n", Out.data());
+
+  Source = "#define X \"\\ \r\nx\n#include <x>\n";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#define X \"\\ \r\nx\n#include <x>\n", Out.data());
+}
+
 TEST(MinimizeSourceToDependencyDirectivesTest, CxxModules) {
   SmallVector<char, 128> Out;
   SmallVector<Token, 4> Tokens;




More information about the cfe-commits mailing list