[clang] [Clang] Warn on backslash-newline-EOF (PR #97585)

Mital Ashok via cfe-commits cfe-commits at lists.llvm.org
Thu Jul 4 03:30:35 PDT 2024


https://github.com/MitalAshok updated https://github.com/llvm/llvm-project/pull/97585

>From 8af656659b79d76c971b01f1f4c14dc7315565b8 Mon Sep 17 00:00:00 2001
From: Mital Ashok <mital at mitalashok.co.uk>
Date: Fri, 21 Jun 2024 18:55:38 +0100
Subject: [PATCH 1/4] [Clang] Warn on backslash-newline-EOF

---
 clang/docs/ReleaseNotes.rst                   |  2 +
 .../include/clang/Basic/DiagnosticLexKinds.td |  1 +
 clang/lib/Lex/Lexer.cpp                       | 39 +++++++++++++++++--
 clang/test/CXX/drs/cwg16xx.cpp                |  9 +++++
 clang/test/CXX/drs/cwg2747.cpp                | 11 ++++++
 clang/test/CXX/drs/cwg27xx.cpp                |  2 +
 .../test/Preprocessor/backslash_newline_eof.c | 12 ++++++
 .../Preprocessor/backslash_without_newline.c  |  8 ++++
 .../Preprocessor/backslash_without_newline.h  |  4 ++
 clang/www/cxx_dr_status.html                  |  4 +-
 10 files changed, 87 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/CXX/drs/cwg2747.cpp
 create mode 100644 clang/test/Preprocessor/backslash_newline_eof.c
 create mode 100644 clang/test/Preprocessor/backslash_without_newline.c
 create mode 100644 clang/test/Preprocessor/backslash_without_newline.h

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f40fd1cd145bb0..7c0ac3a504f982 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -647,6 +647,8 @@ Improvements to Clang's diagnostics
 
 - Clang now shows implicit deduction guides when diagnosing overload resolution failure. #GH92393.
 
+- Clang now emits ``-Wnewline-eof`` when the last newline is deleted by a preceding backslash.
+
 Improvements to Clang's time-trace
 ----------------------------------
 
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 12d7b8c0205ee9..e6b2c1385944c7 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -56,6 +56,7 @@ def ext_no_newline_eof : Extension<"no newline at end of file">,
   InGroup<NewlineEOF>;
 def warn_no_newline_eof : Warning<"no newline at end of file">,
   InGroup<NewlineEOF>, DefaultIgnore;
+def note_backslash_newline_eof : Note<"last newline deleted by splice here">;
 
 def warn_cxx98_compat_no_newline_eof : Warning<
   "C++98 requires newline at end of file">,
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index e59c7805b38623..0e540834b473ba 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -3165,7 +3165,17 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
 
   // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue
   // a pedwarn.
-  if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')) {
+  if (CurPtr != BufferStart) {
+    StringRef LastNewline;
+    if (CurPtr[-1] == '\r' || CurPtr[-1] == '\n') {
+      LastNewline = StringRef(CurPtr - 1, 1);
+      if (CurPtr - 1 != BufferStart && CurPtr[-2] != CurPtr[-1] &&
+          (CurPtr[-2] == '\r' || CurPtr[-2] == '\n')) {
+        // \r\n or \n\r is one newline
+        LastNewline = StringRef(CurPtr - 2, 2);
+      }
+    }
+
     DiagnosticsEngine &Diags = PP->getDiagnostics();
     SourceLocation EndLoc = getSourceLocation(BufferEnd);
     unsigned DiagID;
@@ -3183,8 +3193,31 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
       DiagID = diag::ext_no_newline_eof;
     }
 
-    Diag(BufferEnd, DiagID)
-      << FixItHint::CreateInsertion(EndLoc, "\n");
+    if (LastNewline.empty()) {
+      Diag(BufferEnd, DiagID) << FixItHint::CreateInsertion(EndLoc, "\n");
+    } else {
+      // While the file physically ends in a newline, the previous
+      // line might have ended in a splice, so it would be deleted
+      const char *LastSpliceLocation = LastNewline.data();
+      while (LastSpliceLocation != BufferStart &&
+             isHorizontalWhitespace(*--LastSpliceLocation))
+        ;
+
+      bool LastIsSplice = *LastSpliceLocation == '\\';
+      if (*LastSpliceLocation == '/' && LangOpts.Trigraphs)
+        // Check for "??/" trigraph for "\"
+        LastIsSplice =
+            LastSpliceLocation != BufferStart && *--LastSpliceLocation == '?' &&
+            LastSpliceLocation != BufferStart && *--LastSpliceLocation == '?';
+
+      if (LastIsSplice) {
+        PP->Diag(getSourceLocation(LastNewline.data(), LastNewline.size()),
+                 DiagID);
+        Diag(LastSpliceLocation, diag::note_backslash_newline_eof)
+            << FixItHint::CreateRemoval(getSourceLocation(
+                   LastSpliceLocation, *LastSpliceLocation == '\\' ? 1 : 3));
+      }
+    }
   }
 
   BufferPtr = CurPtr;
diff --git a/clang/test/CXX/drs/cwg16xx.cpp b/clang/test/CXX/drs/cwg16xx.cpp
index cf6b45ceabf2cc..dca941fa30624f 100644
--- a/clang/test/CXX/drs/cwg16xx.cpp
+++ b/clang/test/CXX/drs/cwg16xx.cpp
@@ -536,3 +536,12 @@ namespace cwg1696 { // cwg1696: 7
   };
 #endif
 }
+
+// cwg1698: yes
+// This file intentionally does not end in a newline
+// to facilitate the CWG1698 test
+
+// cxx98-error at +3 {{no newline at end of file}}
+// expected-error at +2 {{expected unqualified-id}}
+
+\
\ No newline at end of file
diff --git a/clang/test/CXX/drs/cwg2747.cpp b/clang/test/CXX/drs/cwg2747.cpp
new file mode 100644
index 00000000000000..3f971452c5193f
--- /dev/null
+++ b/clang/test/CXX/drs/cwg2747.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -std=c++11 -pedantic-errors -verify=expected %s -E | FileCheck %s --strict-whitespace --allow-empty
+
+// expected-no-diagnostics
+// cwg2747: yes
+
+// Check that a newline is still added even though there is a
+// physical newline at the end of the file (which is spliced)
+// CHECK: int x;{{$[[:space:]]^}}int y;int z;{{$[[:space:]]^$}}
+int x;
+int y;\
+int z;\
diff --git a/clang/test/CXX/drs/cwg27xx.cpp b/clang/test/CXX/drs/cwg27xx.cpp
index 406c8ea41f3b2f..6e47af6333569d 100644
--- a/clang/test/CXX/drs/cwg27xx.cpp
+++ b/clang/test/CXX/drs/cwg27xx.cpp
@@ -18,6 +18,8 @@ void f(B b) {
 struct D : B {};
 } // namespace cwg2718
 
+// cwg2747 is in cwg2747.cpp
+
 namespace cwg2759 { // cwg2759: 19
 #if __cplusplus >= 201103L
 
diff --git a/clang/test/Preprocessor/backslash_newline_eof.c b/clang/test/Preprocessor/backslash_newline_eof.c
new file mode 100644
index 00000000000000..b41bc92a425144
--- /dev/null
+++ b/clang/test/Preprocessor/backslash_newline_eof.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -x c -fsyntax-only -pedantic -verify %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++98 -pedantic -verify %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++11 -Wc++98-compat-pedantic -verify=cxx11-compat %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++11 -verify=cxx11 %s
+
+// cxx11-no-diagnostics
+
+// expected-warning at +4 {{no newline at end of file}}
+// expected-note at +3 {{last newline deleted by splice here}}
+// cxx11-compat-warning at +2 {{C++98 requires newline at end of file}}
+// cxx11-compat-note at +1 {{last newline deleted by splice here}}
+int x; \
diff --git a/clang/test/Preprocessor/backslash_without_newline.c b/clang/test/Preprocessor/backslash_without_newline.c
new file mode 100644
index 00000000000000..84be4e04b54adc
--- /dev/null
+++ b/clang/test/Preprocessor/backslash_without_newline.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -x c -E -o - %s | FileCheck %s
+// RUN: %clang_cc1 -x c++ -E -o - %s | FileCheck %s
+// RUN: %clang_cc1 -x c++ -E -o - %s | FileCheck %s
+
+#include "./backslash_without_newline.h"
+
+// CHECK: A B \ C
+A BACKSLASH_WITH_NEWLINE B BACKSLASH_WITHOUT_NEWLINE C
diff --git a/clang/test/Preprocessor/backslash_without_newline.h b/clang/test/Preprocessor/backslash_without_newline.h
new file mode 100644
index 00000000000000..db21184332c3b2
--- /dev/null
+++ b/clang/test/Preprocessor/backslash_without_newline.h
@@ -0,0 +1,4 @@
+#define BACKSLASH_WITH_NEWLINE \
+
+// This file intentionally does not end with a newline.
+#define BACKSLASH_WITHOUT_NEWLINE \
\ No newline at end of file
diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html
index 937f67981e2963..2da096b746611e 100755
--- a/clang/www/cxx_dr_status.html
+++ b/clang/www/cxx_dr_status.html
@@ -10003,7 +10003,7 @@ <h2 id="cxxdr">C++ defect report implementation status</h2>
     <td><a href="https://cplusplus.github.io/CWG/issues/1698.html">1698</a></td>
     <td>DRWP</td>
     <td>Files ending in <TT>\</TT></td>
-    <td class="unknown" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr class="open" id="1699">
     <td><a href="https://cplusplus.github.io/CWG/issues/1699.html">1699</a></td>
@@ -16297,7 +16297,7 @@ <h2 id="cxxdr">C++ defect report implementation status</h2>
     <td><a href="https://cplusplus.github.io/CWG/issues/2747.html">2747</a></td>
     <td>DRWP</td>
     <td>Cannot depend on an already-deleted splice</td>
-    <td class="unknown" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="2748">
     <td><a href="https://cplusplus.github.io/CWG/issues/2748.html">2748</a></td>

>From 8b78310bf05e3d7459e19eb9e44d4cc805a71987 Mon Sep 17 00:00:00 2001
From: Mital Ashok <mital at mitalashok.co.uk>
Date: Thu, 4 Jul 2024 10:37:31 +0100
Subject: [PATCH 2/4] Add tests about trigraphs and trailing spaces

---
 .../Lexer/backslash-trigraph_newline_eof.c     | 18 ++++++++++++++++++
 .../backslash-trigraph_space-ext-newline_eof.c | 18 ++++++++++++++++++
 clang/test/Lexer/backslash_newline_eof.c       | 14 ++++++++++++++
 .../Lexer/backslash_space-ext-newline_eof.c    | 12 ++++++++++++
 .../test/Preprocessor/backslash_newline_eof.c  | 12 ------------
 5 files changed, 62 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Lexer/backslash-trigraph_newline_eof.c
 create mode 100644 clang/test/Lexer/backslash-trigraph_space-ext-newline_eof.c
 create mode 100644 clang/test/Lexer/backslash_newline_eof.c
 create mode 100644 clang/test/Lexer/backslash_space-ext-newline_eof.c
 delete mode 100644 clang/test/Preprocessor/backslash_newline_eof.c

diff --git a/clang/test/Lexer/backslash-trigraph_newline_eof.c b/clang/test/Lexer/backslash-trigraph_newline_eof.c
new file mode 100644
index 00000000000000..f066168f3a3f72
--- /dev/null
+++ b/clang/test/Lexer/backslash-trigraph_newline_eof.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -x c -std=c18 -fsyntax-only -pedantic -verify=expected,trigraphs,no-newline %s
+// RUN: %clang_cc1 -x c -std=c23 -fsyntax-only -pedantic -verify=expected,no-trigraphs,c23 %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++98 -pedantic -verify=expected,trigraphs,no-newline %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++11 -Wc++98-compat-pedantic -verify=expected,trigraphs,no-newline-compat %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++11 -verify=expected,trigraphs %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++17 -verify=expected,no-trigraphs,cxx17 %s
+
+// trigraphs-warning at +10 {{trigraph converted to '\' character}}
+// no-newline-warning at +9 {{no newline at end of file}}
+// no-newline-note at +8 {{last newline deleted by splice here}}
+// no-newline-compat-warning at +7 {{C++98 requires newline at end of file}}
+// no-newline-compat-note at +6 {{last newline deleted by splice here}}
+// no-trigraphs-warning at +5 {{trigraph ignored}}
+// c23-error at +4 {{expected identifier or '('}}
+// cxx17-error at +3 {{expected unqualified-id}}
+
+
+int x; ??/
diff --git a/clang/test/Lexer/backslash-trigraph_space-ext-newline_eof.c b/clang/test/Lexer/backslash-trigraph_space-ext-newline_eof.c
new file mode 100644
index 00000000000000..fdf51000bd255e
--- /dev/null
+++ b/clang/test/Lexer/backslash-trigraph_space-ext-newline_eof.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -x c -std=c18 -fsyntax-only -pedantic -verify=expected,trigraphs,no-newline %s
+// RUN: %clang_cc1 -x c -std=c23 -fsyntax-only -pedantic -verify=expected,no-trigraphs,c23 %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++98 -pedantic -verify=expected,trigraphs,no-newline %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++11 -Wc++98-compat-pedantic -verify=expected,trigraphs,no-newline-compat %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++11 -verify=expected,trigraphs %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++17 -verify=expected,no-trigraphs,cxx17 %s
+
+// trigraphs-warning at +10 {{trigraph converted to '\' character}}
+// no-newline-warning at +9 {{no newline at end of file}}
+// no-newline-note at +8 {{last newline deleted by splice here}}
+// no-newline-compat-warning at +7 {{C++98 requires newline at end of file}}
+// no-newline-compat-note at +6 {{last newline deleted by splice here}}
+// no-trigraphs-warning at +5 {{trigraph ignored}}
+// c23-error at +4 {{expected identifier or '('}}
+// cxx17-error at +3 {{expected unqualified-id}}
+// trigraphs-warning at +2 {{backslash and newline separated by space}}
+// The next line intentionally has a trailing tab character.
+int x; ??/	
diff --git a/clang/test/Lexer/backslash_newline_eof.c b/clang/test/Lexer/backslash_newline_eof.c
new file mode 100644
index 00000000000000..0288efaf10bd43
--- /dev/null
+++ b/clang/test/Lexer/backslash_newline_eof.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -x c -fsyntax-only -pedantic -verify=expected,no-newline %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++98 -pedantic -verify=expected,no-newline %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++11 -Wc++98-compat-pedantic -verify=expected,no-newline-compat %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++11 -verify=expected,cxx11 %s
+
+// cxx11-no-diagnostics
+
+// no-newline-warning at +6 {{no newline at end of file}}
+// no-newline-note at +5 {{last newline deleted by splice here}}
+// no-newline-compat-warning at +4 {{C++98 requires newline at end of file}}
+// no-newline-compat-note at +3 {{last newline deleted by splice here}}
+
+
+int x; \
diff --git a/clang/test/Lexer/backslash_space-ext-newline_eof.c b/clang/test/Lexer/backslash_space-ext-newline_eof.c
new file mode 100644
index 00000000000000..d9cb8c38b144f8
--- /dev/null
+++ b/clang/test/Lexer/backslash_space-ext-newline_eof.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -x c -fsyntax-only -pedantic -verify=expected,no-newline %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++98 -pedantic -verify=expected,no-newline %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++11 -Wc++98-compat-pedantic -verify=expected,no-newline-compat %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++11 -verify=expected,cxx11 %s
+
+// no-newline-warning at +6 {{no newline at end of file}}
+// no-newline-note at +5 {{last newline deleted by splice here}}
+// no-newline-compat-warning at +4 {{C++98 requires newline at end of file}}
+// no-newline-compat-note at +3 {{last newline deleted by splice here}}
+// expected-warning at +2 {{backslash and newline separated by space}}
+// The next line intentionally has a trailing tab character.
+int x; \	
diff --git a/clang/test/Preprocessor/backslash_newline_eof.c b/clang/test/Preprocessor/backslash_newline_eof.c
deleted file mode 100644
index b41bc92a425144..00000000000000
--- a/clang/test/Preprocessor/backslash_newline_eof.c
+++ /dev/null
@@ -1,12 +0,0 @@
-// RUN: %clang_cc1 -x c -fsyntax-only -pedantic -verify %s
-// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++98 -pedantic -verify %s
-// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++11 -Wc++98-compat-pedantic -verify=cxx11-compat %s
-// RUN: %clang_cc1 -x c++ -fsyntax-only -std=c++11 -verify=cxx11 %s
-
-// cxx11-no-diagnostics
-
-// expected-warning at +4 {{no newline at end of file}}
-// expected-note at +3 {{last newline deleted by splice here}}
-// cxx11-compat-warning at +2 {{C++98 requires newline at end of file}}
-// cxx11-compat-note at +1 {{last newline deleted by splice here}}
-int x; \

>From 1e8ca82a35e42fe8979bf7f86b76f71ea44d12a9 Mon Sep 17 00:00:00 2001
From: Mital Ashok <mital at mitalashok.co.uk>
Date: Thu, 4 Jul 2024 11:17:03 +0100
Subject: [PATCH 3/4] Hopefully this is more readable

---
 clang/lib/Lex/Lexer.cpp | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 0e540834b473ba..a5b3eaabb3a4dc 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -3198,24 +3198,28 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
     } else {
       // While the file physically ends in a newline, the previous
       // line might have ended in a splice, so it would be deleted
-      const char *LastSpliceLocation = LastNewline.data();
-      while (LastSpliceLocation != BufferStart &&
-             isHorizontalWhitespace(*--LastSpliceLocation))
-        ;
-
-      bool LastIsSplice = *LastSpliceLocation == '\\';
-      if (*LastSpliceLocation == '/' && LangOpts.Trigraphs)
-        // Check for "??/" trigraph for "\"
-        LastIsSplice =
-            LastSpliceLocation != BufferStart && *--LastSpliceLocation == '?' &&
-            LastSpliceLocation != BufferStart && *--LastSpliceLocation == '?';
-
-      if (LastIsSplice) {
+      StringRef WithoutLastNewline =
+          StringRef(BufferStart, LastNewline.data() - BufferStart);
+      while (!WithoutLastNewline.empty()) {
+        if (isHorizontalWhitespace(WithoutLastNewline.back())) {
+          WithoutLastNewline = WithoutLastNewline.drop_back();
+        } else {
+          break;
+        }
+      }
+
+      if (WithoutLastNewline.ends_with('\\') ||
+          (LangOpts.Trigraphs && WithoutLastNewline.ends_with("??"
+                                                              "/"))) {
         PP->Diag(getSourceLocation(LastNewline.data(), LastNewline.size()),
                  DiagID);
-        Diag(LastSpliceLocation, diag::note_backslash_newline_eof)
-            << FixItHint::CreateRemoval(getSourceLocation(
-                   LastSpliceLocation, *LastSpliceLocation == '\\' ? 1 : 3));
+        std::size_t SpliceSize = WithoutLastNewline.back() == '\\' ? 1 : 3;
+        SourceLocation LastSpliceLocation =
+            getSourceLocation(WithoutLastNewline.data() +
+                                  (WithoutLastNewline.size() - SpliceSize),
+                              SpliceSize);
+        PP->Diag(LastSpliceLocation, diag::note_backslash_newline_eof)
+            << FixItHint::CreateRemoval(LastSpliceLocation);
       }
     }
   }

>From eca9a7a36ecf7dfa487bc74a9dda2137bb35b46a Mon Sep 17 00:00:00 2001
From: Mital Ashok <mital at mitalashok.co.uk>
Date: Thu, 4 Jul 2024 11:30:21 +0100
Subject: [PATCH 4/4] Found related GitHub issue

---
 clang/docs/ReleaseNotes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7c0ac3a504f982..8493cc838013d5 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -647,7 +647,7 @@ Improvements to Clang's diagnostics
 
 - Clang now shows implicit deduction guides when diagnosing overload resolution failure. #GH92393.
 
-- Clang now emits ``-Wnewline-eof`` when the last newline is deleted by a preceding backslash.
+- Clang now emits ``-Wnewline-eof`` when the last newline is deleted by a preceding backslash. #GH41571.
 
 Improvements to Clang's time-trace
 ----------------------------------



More information about the cfe-commits mailing list