[PATCH] D31765: Skip Unicode character expansion in assembly files

Salman Arif via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Thu Apr 6 08:46:47 PDT 2017


salari01 created this revision.

When using the C preprocessor with assembly files, either with a capital `S` file extension, or with `-xassembler-with-cpp`, the Unicode escape sequence `\u` is ignored. The `\u` pattern can be used for expanding a macro argument that starts with `u`.


https://reviews.llvm.org/D31765

Files:
  lib/Lex/Lexer.cpp
  test/Lexer/asm-preproc-no-unicode.s


Index: test/Lexer/asm-preproc-no-unicode.s
===================================================================
--- /dev/null
+++ test/Lexer/asm-preproc-no-unicode.s
@@ -0,0 +1,13 @@
+// RUN: %clang --target=arm-arm-none-eabi -c -xassembler-with-cpp %s -o %t 2>&1 | FileCheck %s --check-prefix=WARNING
+// RUN: llvm-objdump -s %t | FileCheck %s --check-prefix=DATA
+
+// WARNING-NOT: warning: \u used with no following hex digits
+// DATA: Contents of section data:
+// DATA-NEXT: 0000 efbeadde
+
+    .warning  // required to avoid FileCheck empty input error
+    .macro foo, u, name
+        .section \name, "a", %progbits
+        .word \u
+    .endm
+    foo 0xdeadbeef, data
Index: lib/Lex/Lexer.cpp
===================================================================
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -3603,17 +3603,19 @@
 
   // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
   case '\\':
-    if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
-      if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
-        if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
-          return true; // KeepWhitespaceMode
+    if (!LangOpts.AsmPreprocessor) {
+      if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
+        if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+          if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+            return true; // KeepWhitespaceMode
+
+          // We only saw whitespace, so just try again with this lexer.
+          // (We manually eliminate the tail call to avoid recursion.)
+          goto LexNextToken;
+        }
 
-        // We only saw whitespace, so just try again with this lexer.
-        // (We manually eliminate the tail call to avoid recursion.)
-        goto LexNextToken;
+        return LexUnicode(Result, CodePoint, CurPtr);
       }
-
-      return LexUnicode(Result, CodePoint, CurPtr);
     }
 
     Kind = tok::unknown;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D31765.94378.patch
Type: text/x-patch
Size: 1968 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20170406/35882f02/attachment.bin>


More information about the cfe-commits mailing list