[llvm] 6ca2bdb - [TableGen] Make the NUL character invalid in .td files

Paul C. Anagnostopoulos via llvm-commits llvm-commits at lists.llvm.org
Tue May 11 06:21:01 PDT 2021


Author: Paul C. Anagnostopoulos
Date: 2021-05-11T09:20:42-04:00
New Revision: 6ca2bdb03c0fdb6736ed5c6a30d7bec6b557d1a0

URL: https://github.com/llvm/llvm-project/commit/6ca2bdb03c0fdb6736ed5c6a30d7bec6b557d1a0
DIFF: https://github.com/llvm/llvm-project/commit/6ca2bdb03c0fdb6736ed5c6a30d7bec6b557d1a0.diff

LOG: [TableGen] Make the NUL character invalid in .td files

Differential Revision: https://reviews.llvm.org/D101923

Added: 
    llvm/test/TableGen/nul-char.td

Modified: 
    llvm/lib/TableGen/TGLexer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index 91229818077f8..2acac63ce8439 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -108,16 +108,19 @@ int TGLexer::getNextChar() {
   switch (CurChar) {
   default:
     return (unsigned char)CurChar;
+
   case 0: {
-    // A nul character in the stream is either the end of the current buffer or
-    // a random nul in the file.  Disambiguate that here.
-    if (CurPtr-1 != CurBuf.end())
-      return 0;  // Just whitespace.
-
-    // Otherwise, return end of file.
-    --CurPtr;  // Another call to lex will return EOF again.
-    return EOF;
+    // A NUL character in the stream is either the end of the current buffer or
+    // a spurious NUL in the file.  Disambiguate that here.
+    if (CurPtr - 1 == CurBuf.end()) {
+      --CurPtr; // Arrange for another call to return EOF again.
+      return EOF;
+    }
+    PrintError(getLoc(),
+               "NUL character is invalid in source; treated as space");
+    return ' ';
   }
+
   case '\n':
   case '\r':
     // Handle the newline character by ignoring it and incrementing the line
@@ -197,7 +200,6 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
     PrintFatalError("getNextChar() must never return '\r'");
     return tgtok::Error;
 
-  case 0:
   case ' ':
   case '\t':
     // Ignore whitespace.
@@ -415,22 +417,12 @@ bool TGLexer::LexInclude() {
   return false;
 }
 
+/// SkipBCPLComment - Skip over the comment by finding the next CR or LF.
+/// Or we may end up at the end of the buffer.
 void TGLexer::SkipBCPLComment() {
   ++CurPtr;  // skip the second slash.
-  while (true) {
-    switch (*CurPtr) {
-    case '\n':
-    case '\r':
-      return;  // Newline is end of comment.
-    case 0:
-      // If this is the end of the buffer, end the comment.
-      if (CurPtr == CurBuf.end())
-        return;
-      break;
-    }
-    // Otherwise, skip the character.
-    ++CurPtr;
-  }
+  auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data());
+  CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos;
 }
 
 /// SkipCComment - This skips C-style /**/ comments.  The only 
diff erence from C

diff  --git a/llvm/test/TableGen/nul-char.td b/llvm/test/TableGen/nul-char.td
new file mode 100644
index 0000000000000..fc56b6a87829a
--- /dev/null
+++ b/llvm/test/TableGen/nul-char.td
@@ -0,0 +1,28 @@
+// RUN: sed -e 's/@/\x00/g' %s > %t
+// RUN: not llvm-tblgen -DERROR1 %t 2>&1 | FileCheck --check-prefix=ERROR1 %s
+
+// This test file checks that NUL is treated as an invalid character.
+// Each at sign is replaced with a NUL before running the test.
+
+#ifdef ERROR1
+
+// ERROR1: error: NUL character is invalid in source; treated as space
+// ERROR1: error: NUL character is invalid in source; treated as space
+// ERROR1: error: NUL character is invalid in source; treated as space
+// ERROR1: error: NUL character is invalid in source; treated as space
+// ERROR1: error: expected ';' after declaration
+
+def Foo@ {
+  int @ ID = 42;
+}
+
+@
+
+// Comment with a NUL @ there. They are ignored in comments.
+
+def Bar {
+  int Biggie = 12345 at 789;
+}
+
+#endif
+


        


More information about the llvm-commits mailing list