[llvm] [MC,ELF] Emit warning if a string constant contains newline char. (PR #98060)

Dmitriy Chestnykh via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 9 02:37:40 PDT 2024


https://github.com/chestnykh updated https://github.com/llvm/llvm-project/pull/98060

>From af41795cc038ecf9e395f2ae9bb4038d56d906c9 Mon Sep 17 00:00:00 2001
From: Dmitry Chestnykh <dm.chestnykh at gmail.com>
Date: Mon, 8 Jul 2024 21:06:19 +0300
Subject: [PATCH 1/2] [MC,ELF] Emit warning if a string constant contains
 newline char.

GAS emits warning about newline in the string constant
so make the same behaviour.
---
 llvm/lib/MC/MCParser/AsmLexer.cpp                  | 7 ++++++-
 llvm/test/MC/ELF/warn-newline-in-string-constant.s | 6 ++++++
 2 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/MC/ELF/warn-newline-in-string-constant.s

diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index e08404ae0ad92..43f4a87c3c2d3 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cctype>
 #include <cstdio>
@@ -646,13 +647,17 @@ AsmToken AsmLexer::LexQuote() {
     return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
   }
 
-  // TODO: does gas allow multiline string constants?
+  // gas doesn't allow multiline string constants
+  // and emits a warning if a string constant contains newline character
   while (CurChar != '"') {
     if (CurChar == '\\') {
       // Allow \", etc.
       CurChar = getNextChar();
     }
 
+    if (CurChar == '\n')
+      outs() << "Warning: unterminated string; newline inserted\n";
+
     if (CurChar == EOF)
       return ReturnError(TokStart, "unterminated string constant");
 
diff --git a/llvm/test/MC/ELF/warn-newline-in-string-constant.s b/llvm/test/MC/ELF/warn-newline-in-string-constant.s
new file mode 100644
index 0000000000000..e126db30ee47a
--- /dev/null
+++ b/llvm/test/MC/ELF/warn-newline-in-string-constant.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t | FileCheck %s
+
+.string "abcdefg
+12345678"
+
+// CHECK: Warning: unterminated string; newline inserted

>From 378ab9ad8e472543555c6414f2869b56eefaa028 Mon Sep 17 00:00:00 2001
From: Dmitry Chestnykh <dm.chestnykh at gmail.com>
Date: Tue, 9 Jul 2024 12:38:03 +0300
Subject: [PATCH 2/2] [MC,ELF] Move warning handling to parser code

Emit warning about newline characters in strings
for `.string`, '.ascii' and '.asciz' directives
like GAS.
---
 llvm/lib/MC/MCParser/AsmLexer.cpp             |  4 --
 llvm/lib/MC/MCParser/AsmParser.cpp            | 10 ++++
 .../MC/ELF/warn-newline-in-string-constant.s  | 50 ++++++++++++++++++-
 3 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 43f4a87c3c2d3..ef516b8e37ee4 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -21,7 +21,6 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SaveAndRestore.h"
-#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cctype>
 #include <cstdio>
@@ -655,9 +654,6 @@ AsmToken AsmLexer::LexQuote() {
       CurChar = getNextChar();
     }
 
-    if (CurChar == '\n')
-      outs() << "Warning: unterminated string; newline inserted\n";
-
     if (CurChar == EOF)
       return ReturnError(TokStart, "unterminated string constant");
 
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index f3caa90eedfb1..1e25eabb95049 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -3125,6 +3125,16 @@ bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
     do {
       if (parseEscapedString(Data))
         return true;
+
+      // Warn about newline characters in parsed string like GAS.
+      size_t NewlinePos = -1;
+      size_t DataSize = Data.size();
+      const char *Start = getTok().getLoc().getPointer() - DataSize - 1;
+
+      while ((NewlinePos = Data.find('\n', NewlinePos + 1)) < DataSize)
+        Warning(SMLoc::getFromPointer(Start + NewlinePos),
+                "unterminated string; newline inserted");
+
       getStreamer().emitBytes(Data);
     } while (!ZeroTerminated && getTok().is(AsmToken::String));
     if (ZeroTerminated)
diff --git a/llvm/test/MC/ELF/warn-newline-in-string-constant.s b/llvm/test/MC/ELF/warn-newline-in-string-constant.s
index e126db30ee47a..6e67e542b7832 100644
--- a/llvm/test/MC/ELF/warn-newline-in-string-constant.s
+++ b/llvm/test/MC/ELF/warn-newline-in-string-constant.s
@@ -1,6 +1,52 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s 2>&1 -o %t | FileCheck %s --check-prefix=CHECK-WARN
 
 .string "abcdefg
 12345678"
 
-// CHECK: Warning: unterminated string; newline inserted
+.ascii "some test ascii
+
+sequence
+with
+newlines
+"
+
+.asciz "another test string
+
+with
+newline characters
+
+
+"
+
+// CHECK-WARN:        warn-newline-in-string-constant.s:3:17: warning: unterminated string; newline inserted
+// CHECK-WARN:  .string "abcdefg
+
+// CHECK-WARN:   warn-newline-in-string-constant.s:6:24: warning: unterminated string; newline inserted
+// CHECK-WARN:  .ascii "some test ascii
+// CHECK-WARN:                         ^
+// CHECK-WARN:   warn-newline-in-string-constant.s:7:1: warning: unterminated string; newline inserted
+// CHECK-WARN:   ^
+// CHECK-WARN:   warn-newline-in-string-constant.s:8:9: warning: unterminated string; newline inserted
+// CHECK-WARN:   sequence
+// CHECK-WARN:           ^
+// CHECK-WARN:   warn-newline-in-string-constant.s:9:5: warning: unterminated string; newline inserted
+// CHECK-WARN:   with
+// CHECK-WARN:        ^
+// CHECK-WARN:   warn-newline-in-string-constant.s:10:9: warning: unterminated string; newline inserted
+// CHECK-WARN:   newlines
+// CHECK-WARN:           ^
+
+// CHECK-WATN:   warn-newline-in-string-constant.s:13:28: warning: unterminated string; newline inserted
+// CHECK-WARN:   .asciz "another test string
+// CHECK-WARN:   warn-newline-in-string-constant.s:14:1: warning: unterminated string; newline inserted
+// CHECK-WARN:   ^
+// CHECK-WARN:   warn-newline-in-string-constant.s:15:5: warning: unterminated string; newline inserted
+// CHECK-WARN:   with
+// CHECK-WARN:        ^
+// CHECK-WARN:   warn-newline-in-string-constant.s:16:19: warning: unterminated string; newline inserted
+// CHECK-WARN:   newline characters
+// CHECK-WARN:                      ^
+// CHECK-WARN:   warn-newline-in-string-constant.s:17:1: warning: unterminated string; newline inserted
+// CHECK-WARN:   ^
+// CHECK-WARN:   warn-newline-in-string-constant.s:18:1: warning: unterminated string; newline inserted
+// CHECK-WARN:   ^



More information about the llvm-commits mailing list