[PATCH] D101923: [TableGen] Make the NUL character invalid in .td files
Paul C. Anagnostopoulos via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon May 10 09:10:16 PDT 2021
Paul-C-Anagnostopoulos updated this revision to Diff 344082.
Paul-C-Anagnostopoulos added a comment.
I used David's trick to eliminate the NUL characters in the test file. They are now represented by at signs (@) and translated on the fly.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D101923/new/
https://reviews.llvm.org/D101923
Files:
llvm/lib/TableGen/TGLexer.cpp
llvm/test/TableGen/nul-char.td
Index: llvm/test/TableGen/nul-char.td
===================================================================
--- /dev/null
+++ llvm/test/TableGen/nul-char.td
@@ -0,0 +1,28 @@
+// RUN: sed -e 's/@/\x00/g' %s > %t
+// RUN: not llvm-tblgen -DERROR1 %t 2>&1 | FileCheck --check-prefix=ERROR1 %s
+
+// This test file checks that NUL is treated as an invalid character.
+// Each at sign is replaced with a NUL.
+
+#ifdef ERROR1
+
+// ERROR1: error: NUL character is invalid in source; treated as space
+// ERROR1: error: NUL character is invalid in source; treated as space
+// ERROR1: error: NUL character is invalid in source; treated as space
+// ERROR1: error: NUL character is invalid in source; treated as space
+// ERROR1: error: expected ';' after declaration
+
+def Foo@ {
+ int @ ID = 42;
+}
+
+@
+
+// Comment with a NUL @ there. They are ignored in comments.
+
+def Bar {
+ int Biggie = 12345 at 789;
+}
+
+#endif
+
Index: llvm/lib/TableGen/TGLexer.cpp
===================================================================
--- llvm/lib/TableGen/TGLexer.cpp
+++ llvm/lib/TableGen/TGLexer.cpp
@@ -108,16 +108,18 @@
switch (CurChar) {
default:
return (unsigned char)CurChar;
+
case 0: {
- // A nul character in the stream is either the end of the current buffer or
- // a random nul in the file. Disambiguate that here.
- if (CurPtr-1 != CurBuf.end())
- return 0; // Just whitespace.
-
- // Otherwise, return end of file.
- --CurPtr; // Another call to lex will return EOF again.
- return EOF;
+ // A NUL character in the stream is either the end of the current buffer or
+ // a spurious NUL in the file. Disambiguate that here.
+ if (CurPtr-1 == CurBuf.end()) {
+ --CurPtr; // Arrange for another call to return EOF again.
+ return EOF;
+ }
+ PrintError(getLoc(), "NUL character is invalid in source; treated as space");
+ return ' ';
}
+
case '\n':
case '\r':
// Handle the newline character by ignoring it and incrementing the line
@@ -197,7 +199,6 @@
PrintFatalError("getNextChar() must never return '\r'");
return tgtok::Error;
- case 0:
case ' ':
case '\t':
// Ignore whitespace.
@@ -415,22 +416,12 @@
return false;
}
+/// SkipBCPLComment - Skip over the comment by finding the next CR or LF.
+/// Or we may end up at the end of the buffer.
void TGLexer::SkipBCPLComment() {
++CurPtr; // skip the second slash.
- while (true) {
- switch (*CurPtr) {
- case '\n':
- case '\r':
- return; // Newline is end of comment.
- case 0:
- // If this is the end of the buffer, end the comment.
- if (CurPtr == CurBuf.end())
- return;
- break;
- }
- // Otherwise, skip the character.
- ++CurPtr;
- }
+ auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data());
+ CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos;
}
/// SkipCComment - This skips C-style /**/ comments. The only difference from C
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D101923.344082.patch
Type: text/x-patch
Size: 2997 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210510/d1d5d64e/attachment-0001.bin>
More information about the llvm-commits
mailing list