[PATCH] D101923: [TableGen] Make the NUL character invalid in .td files

Paul C. Anagnostopoulos via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu May 6 06:51:18 PDT 2021


Paul-C-Anagnostopoulos updated this revision to Diff 343394.
Paul-C-Anagnostopoulos added a comment.

I added a test. There are NULs in the .td file.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D101923/new/

https://reviews.llvm.org/D101923

Files:
  llvm/lib/TableGen/TGLexer.cpp
  llvm/test/TableGen/nul-char.td


Index: llvm/lib/TableGen/TGLexer.cpp
===================================================================
--- llvm/lib/TableGen/TGLexer.cpp
+++ llvm/lib/TableGen/TGLexer.cpp
@@ -108,16 +108,18 @@
   switch (CurChar) {
   default:
     return (unsigned char)CurChar;
+
   case 0: {
-    // A nul character in the stream is either the end of the current buffer or
-    // a random nul in the file.  Disambiguate that here.
-    if (CurPtr-1 != CurBuf.end())
-      return 0;  // Just whitespace.
-
-    // Otherwise, return end of file.
-    --CurPtr;  // Another call to lex will return EOF again.
-    return EOF;
+    // A NUL character in the stream is either the end of the current buffer or
+    // a spurious NUL in the file.  Disambiguate that here.
+    if (CurPtr-1 == CurBuf.end()) {
+      --CurPtr; // Arrange for another call to return EOF again.
+      return EOF;
+    }
+    PrintError(getLoc(), "NUL character is invalid in source; treated as space");
+    return ' ';
   }
+
   case '\n':
   case '\r':
     // Handle the newline character by ignoring it and incrementing the line
@@ -197,7 +199,6 @@
     PrintFatalError("getNextChar() must never return '\r'");
     return tgtok::Error;
 
-  case 0:
   case ' ':
   case '\t':
     // Ignore whitespace.
@@ -415,22 +416,12 @@
   return false;
 }
 
+/// SkipBCPLComment - Skip over the comment by finding the next CR or LF.
+/// Or we may end up at the end of the buffer.
 void TGLexer::SkipBCPLComment() {
   ++CurPtr;  // skip the second slash.
-  while (true) {
-    switch (*CurPtr) {
-    case '\n':
-    case '\r':
-      return;  // Newline is end of comment.
-    case 0:
-      // If this is the end of the buffer, end the comment.
-      if (CurPtr == CurBuf.end())
-        return;
-      break;
-    }
-    // Otherwise, skip the character.
-    ++CurPtr;
-  }
+  auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data());
+  CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos;
 }
 
 /// SkipCComment - This skips C-style /**/ comments.  The only difference from C


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D101923.343394.patch
Type: text/x-patch
Size: 2079 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210506/1defaf6a/attachment.bin>


More information about the llvm-commits mailing list