[llvm] [MC,ELF] Emit warning if a string constant contains newline char. (PR #98060)
Dmitriy Chestnykh via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 9 05:31:14 PDT 2024
https://github.com/chestnykh updated https://github.com/llvm/llvm-project/pull/98060
>From af41795cc038ecf9e395f2ae9bb4038d56d906c9 Mon Sep 17 00:00:00 2001
From: Dmitry Chestnykh <dm.chestnykh at gmail.com>
Date: Mon, 8 Jul 2024 21:06:19 +0300
Subject: [PATCH 1/4] [MC,ELF] Emit warning if a string constant contains
newline char.
GAS emits warning about newline in the string constant
so make the same behaviour.
---
llvm/lib/MC/MCParser/AsmLexer.cpp | 7 ++++++-
llvm/test/MC/ELF/warn-newline-in-string-constant.s | 6 ++++++
2 files changed, 12 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/MC/ELF/warn-newline-in-string-constant.s
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index e08404ae0ad92..43f4a87c3c2d3 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cctype>
#include <cstdio>
@@ -646,13 +647,17 @@ AsmToken AsmLexer::LexQuote() {
return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
}
- // TODO: does gas allow multiline string constants?
+ // gas doesn't allow multiline string constants
+ // and emits a warning if a string constant contains newline character
while (CurChar != '"') {
if (CurChar == '\\') {
// Allow \", etc.
CurChar = getNextChar();
}
+ if (CurChar == '\n')
+ outs() << "Warning: unterminated string; newline inserted\n";
+
if (CurChar == EOF)
return ReturnError(TokStart, "unterminated string constant");
diff --git a/llvm/test/MC/ELF/warn-newline-in-string-constant.s b/llvm/test/MC/ELF/warn-newline-in-string-constant.s
new file mode 100644
index 0000000000000..e126db30ee47a
--- /dev/null
+++ b/llvm/test/MC/ELF/warn-newline-in-string-constant.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t | FileCheck %s
+
+.string "abcdefg
+12345678"
+
+// CHECK: Warning: unterminated string; newline inserted
>From 378ab9ad8e472543555c6414f2869b56eefaa028 Mon Sep 17 00:00:00 2001
From: Dmitry Chestnykh <dm.chestnykh at gmail.com>
Date: Tue, 9 Jul 2024 12:38:03 +0300
Subject: [PATCH 2/4] [MC,ELF] Move warning handling to parser code
Emit warning about newline characters in strings
for `.string`, '.ascii' and '.asciz' directives
like GAS.
---
llvm/lib/MC/MCParser/AsmLexer.cpp | 4 --
llvm/lib/MC/MCParser/AsmParser.cpp | 10 ++++
.../MC/ELF/warn-newline-in-string-constant.s | 50 ++++++++++++++++++-
3 files changed, 58 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 43f4a87c3c2d3..ef516b8e37ee4 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -21,7 +21,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SaveAndRestore.h"
-#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cctype>
#include <cstdio>
@@ -655,9 +654,6 @@ AsmToken AsmLexer::LexQuote() {
CurChar = getNextChar();
}
- if (CurChar == '\n')
- outs() << "Warning: unterminated string; newline inserted\n";
-
if (CurChar == EOF)
return ReturnError(TokStart, "unterminated string constant");
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index f3caa90eedfb1..1e25eabb95049 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -3125,6 +3125,16 @@ bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
do {
if (parseEscapedString(Data))
return true;
+
+ // Warn about newline characters in parsed string like GAS.
+ size_t NewlinePos = -1;
+ size_t DataSize = Data.size();
+ const char *Start = getTok().getLoc().getPointer() - DataSize - 1;
+
+ while ((NewlinePos = Data.find('\n', NewlinePos + 1)) < DataSize)
+ Warning(SMLoc::getFromPointer(Start + NewlinePos),
+ "unterminated string; newline inserted");
+
getStreamer().emitBytes(Data);
} while (!ZeroTerminated && getTok().is(AsmToken::String));
if (ZeroTerminated)
diff --git a/llvm/test/MC/ELF/warn-newline-in-string-constant.s b/llvm/test/MC/ELF/warn-newline-in-string-constant.s
index e126db30ee47a..6e67e542b7832 100644
--- a/llvm/test/MC/ELF/warn-newline-in-string-constant.s
+++ b/llvm/test/MC/ELF/warn-newline-in-string-constant.s
@@ -1,6 +1,52 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s 2>&1 -o %t | FileCheck %s --check-prefix=CHECK-WARN
.string "abcdefg
12345678"
-// CHECK: Warning: unterminated string; newline inserted
+.ascii "some test ascii
+
+sequence
+with
+newlines
+"
+
+.asciz "another test string
+
+with
+newline characters
+
+
+"
+
+// CHECK-WARN: warn-newline-in-string-constant.s:3:17: warning: unterminated string; newline inserted
+// CHECK-WARN: .string "abcdefg
+
+// CHECK-WARN: warn-newline-in-string-constant.s:6:24: warning: unterminated string; newline inserted
+// CHECK-WARN: .ascii "some test ascii
+// CHECK-WARN: ^
+// CHECK-WARN: warn-newline-in-string-constant.s:7:1: warning: unterminated string; newline inserted
+// CHECK-WARN: ^
+// CHECK-WARN: warn-newline-in-string-constant.s:8:9: warning: unterminated string; newline inserted
+// CHECK-WARN: sequence
+// CHECK-WARN: ^
+// CHECK-WARN: warn-newline-in-string-constant.s:9:5: warning: unterminated string; newline inserted
+// CHECK-WARN: with
+// CHECK-WARN: ^
+// CHECK-WARN: warn-newline-in-string-constant.s:10:9: warning: unterminated string; newline inserted
+// CHECK-WARN: newlines
+// CHECK-WARN: ^
+
+// CHECK-WATN: warn-newline-in-string-constant.s:13:28: warning: unterminated string; newline inserted
+// CHECK-WARN: .asciz "another test string
+// CHECK-WARN: warn-newline-in-string-constant.s:14:1: warning: unterminated string; newline inserted
+// CHECK-WARN: ^
+// CHECK-WARN: warn-newline-in-string-constant.s:15:5: warning: unterminated string; newline inserted
+// CHECK-WARN: with
+// CHECK-WARN: ^
+// CHECK-WARN: warn-newline-in-string-constant.s:16:19: warning: unterminated string; newline inserted
+// CHECK-WARN: newline characters
+// CHECK-WARN: ^
+// CHECK-WARN: warn-newline-in-string-constant.s:17:1: warning: unterminated string; newline inserted
+// CHECK-WARN: ^
+// CHECK-WARN: warn-newline-in-string-constant.s:18:1: warning: unterminated string; newline inserted
+// CHECK-WARN: ^
>From 491bf19042617a58b633485617a615d574db4b4d Mon Sep 17 00:00:00 2001
From: Dmitry Chestnykh <dm.chestnykh at gmail.com>
Date: Tue, 9 Jul 2024 15:19:36 +0300
Subject: [PATCH 3/4] [MC,AsmParser] Warn about newlines in some cases
Move the point of check into `parseEscapedString`.
Check if the `Warning()` returned true to stop compilation.
---
llvm/include/llvm/MC/MCParser/MCAsmParser.h | 2 +-
llvm/lib/MC/MCParser/AsmLexer.cpp | 2 --
llvm/lib/MC/MCParser/AsmParser.cpp | 21 +++++++--------
llvm/lib/MC/MCParser/DarwinAsmParser.cpp | 2 +-
llvm/lib/MC/MCParser/MasmParser.cpp | 4 +--
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 2 +-
.../lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 +-
.../AsmParser/WebAssemblyAsmParser.cpp | 2 +-
.../MC/ELF/warn-newline-in-string-constant.s | 26 ++++++++++++++-----
9 files changed, 35 insertions(+), 28 deletions(-)
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index faa72d5f3144c..de10fd74ed75e 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -286,7 +286,7 @@ class MCAsmParser {
/// Parse the current token as a string which may include escaped
/// characters and return the string contents.
- virtual bool parseEscapedString(std::string &Data) = 0;
+ virtual bool parseEscapedString(std::string &Data, bool WarnNewline) = 0;
/// Parse an angle-bracket delimited string at the current position if one is
/// present, returning the string contents.
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index ef516b8e37ee4..778ca340e1248 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -646,8 +646,6 @@ AsmToken AsmLexer::LexQuote() {
return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
}
- // gas doesn't allow multiline string constants
- // and emits a warning if a string constant contains newline character
while (CurChar != '"') {
if (CurChar == '\\') {
// Allow \", etc.
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 1e25eabb95049..e93f73f118d93 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -675,7 +675,7 @@ class AsmParser : public MCAsmParser {
bool parseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
bool parseDirectiveElse(SMLoc DirectiveLoc); // ".else"
bool parseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
- bool parseEscapedString(std::string &Data) override;
+ bool parseEscapedString(std::string &Data, bool WarnNewline = false) override;
bool parseAngleBracketString(std::string &Data) override;
const MCExpr *applyModifierToExpr(const MCExpr *E,
@@ -3021,7 +3021,7 @@ bool AsmParser::parseDirectiveSet(StringRef IDVal, AssignmentKind Kind) {
return false;
}
-bool AsmParser::parseEscapedString(std::string &Data) {
+bool AsmParser::parseEscapedString(std::string &Data, bool WarnNewline) {
if (check(getTok().isNot(AsmToken::String), "expected string"))
return true;
@@ -3029,6 +3029,12 @@ bool AsmParser::parseEscapedString(std::string &Data) {
StringRef Str = getTok().getStringContents();
for (unsigned i = 0, e = Str.size(); i != e; ++i) {
if (Str[i] != '\\') {
+ if (Str[i] == '\n' && WarnNewline) {
+ SMLoc NewlineLoc =
+ SMLoc::getFromPointer(getTok().getLoc().getPointer() + i + 1);
+ if (Warning(NewlineLoc, "unterminated string; newline inserted"))
+ return true;
+ }
Data += Str[i];
continue;
}
@@ -3123,18 +3129,9 @@ bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
// Only support spaces as separators for .ascii directive for now. See the
// discusssion at https://reviews.llvm.org/D91460 for more details.
do {
- if (parseEscapedString(Data))
+ if (parseEscapedString(Data, /* WarnNewline */ true))
return true;
- // Warn about newline characters in parsed string like GAS.
- size_t NewlinePos = -1;
- size_t DataSize = Data.size();
- const char *Start = getTok().getLoc().getPointer() - DataSize - 1;
-
- while ((NewlinePos = Data.find('\n', NewlinePos + 1)) < DataSize)
- Warning(SMLoc::getFromPointer(Start + NewlinePos),
- "unterminated string; newline inserted");
-
getStreamer().emitBytes(Data);
} while (!ZeroTerminated && getTok().is(AsmToken::String));
if (ZeroTerminated)
diff --git a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index a97b72997ae39..568e6d078e709 100644
--- a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -613,7 +613,7 @@ bool DarwinAsmParser::parseDirectiveLinkerOption(StringRef IDVal, SMLoc) {
return TokError("expected string in '" + Twine(IDVal) + "' directive");
std::string Data;
- if (getParser().parseEscapedString(Data))
+ if (getParser().parseEscapedString(Data, false))
return true;
Args.push_back(Data);
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 653cc64b4c36a..6c660434ce11d 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -1029,7 +1029,7 @@ class MasmParser : public MCAsmParser {
bool CaseInsensitive);
bool parseDirectiveElse(SMLoc DirectiveLoc); // "else"
bool parseDirectiveEndIf(SMLoc DirectiveLoc); // "endif"
- bool parseEscapedString(std::string &Data) override;
+ bool parseEscapedString(std::string &Data, bool WarnNewline = false) override;
bool parseAngleBracketString(std::string &Data) override;
// Macro-like directives
@@ -3509,7 +3509,7 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
return false;
}
-bool MasmParser::parseEscapedString(std::string &Data) {
+bool MasmParser::parseEscapedString(std::string &Data, bool WarnNewline) {
if (check(getTok().isNot(AsmToken::String), "expected string"))
return true;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b08957d22ee74..20b42e5c67bd3 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5337,7 +5337,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
std::string TargetIDDirective;
SMLoc TargetStart = getTok().getLoc();
- if (getParser().parseEscapedString(TargetIDDirective))
+ if (getParser().parseEscapedString(TargetIDDirective, /* WarnNewline */ false))
return true;
SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index e54314cc7d00a..242823bd61dae 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -11878,7 +11878,7 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
return Error(Parser.getTok().getLoc(), "bad string constant");
if (Tag == ARMBuildAttrs::also_compatible_with) {
- if (Parser.parseEscapedString(EscapedValue))
+ if (Parser.parseEscapedString(EscapedValue, /* WarnNewline */ false))
return Error(Parser.getTok().getLoc(), "bad escaped string constant");
StringValue = EscapedValue;
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index c63740d267819..1c088c097a2f3 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -980,7 +980,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
if (CheckDataSection())
return ParseStatus::Failure;
std::string S;
- if (Parser.parseEscapedString(S))
+ if (Parser.parseEscapedString(S, /* WarnNewline */ false))
return error("Cannot parse string constant: ", Lexer.getTok());
Out.emitBytes(StringRef(S.c_str(), S.length() + 1));
return expect(AsmToken::EndOfStatement, "EOL");
diff --git a/llvm/test/MC/ELF/warn-newline-in-string-constant.s b/llvm/test/MC/ELF/warn-newline-in-string-constant.s
index 6e67e542b7832..d48e1cce2e466 100644
--- a/llvm/test/MC/ELF/warn-newline-in-string-constant.s
+++ b/llvm/test/MC/ELF/warn-newline-in-string-constant.s
@@ -1,13 +1,13 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s 2>&1 -o %t | FileCheck %s --check-prefix=CHECK-WARN
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s 2>&1 -o %t | FileCheck --strict-whitespace %s --check-prefix=CHECK-WARN
-.string "abcdefg
+.string "abcd\xFFefg
12345678"
.ascii "some test ascii
sequence
with
-newlines
+newlines\x0A
"
.asciz "another test string
@@ -18,8 +18,8 @@ newline characters
"
-// CHECK-WARN: warn-newline-in-string-constant.s:3:17: warning: unterminated string; newline inserted
-// CHECK-WARN: .string "abcdefg
+// CHECK-WARN: warn-newline-in-string-constant.s:3:21: warning: unterminated string; newline inserted
+// CHECK-WARN: .string "abcd\xFFefg
// CHECK-WARN: warn-newline-in-string-constant.s:6:24: warning: unterminated string; newline inserted
// CHECK-WARN: .ascii "some test ascii
@@ -32,8 +32,8 @@ newline characters
// CHECK-WARN: warn-newline-in-string-constant.s:9:5: warning: unterminated string; newline inserted
// CHECK-WARN: with
// CHECK-WARN: ^
-// CHECK-WARN: warn-newline-in-string-constant.s:10:9: warning: unterminated string; newline inserted
-// CHECK-WARN: newlines
+// CHECK-WARN: warn-newline-in-string-constant.s:10:13: warning: unterminated string; newline inserted
+// CHECK-WARN: newlines\x0A
// CHECK-WARN: ^
// CHECK-WATN: warn-newline-in-string-constant.s:13:28: warning: unterminated string; newline inserted
@@ -50,3 +50,15 @@ newline characters
// CHECK-WARN: ^
// CHECK-WARN: warn-newline-in-string-constant.s:18:1: warning: unterminated string; newline inserted
// CHECK-WARN: ^
+
+.ascii "test\nstring\xFF\n\n\xFF"
+
+// CHECK-WARN-NOT: warn-newline-in-string-constant.s:55{{.*}}
+
+.asciz "\n\n\ntest_string\x0A"
+
+// CHECK-WARN-NOT: warn-newline-in-string-constant.s:59{{.*}}
+
+.string "1234\n\xFF\n\xFF\n"
+
+// CHECK-WARN-NOT: warn-newline-in-string-constant.s:63{{.*}}
\ No newline at end of file
>From 9732c3a0fcdcdc7f96875412c1e8ee01739c15ae Mon Sep 17 00:00:00 2001
From: Dmitry Chestnykh <dm.chestnykh at gmail.com>
Date: Tue, 9 Jul 2024 15:33:44 +0300
Subject: [PATCH 4/4] [MC,AMDGPU] Correct code format
---
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 20b42e5c67bd3..f989b7a0de402 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5337,7 +5337,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
std::string TargetIDDirective;
SMLoc TargetStart = getTok().getLoc();
- if (getParser().parseEscapedString(TargetIDDirective, /* WarnNewline */ false))
+ if (getParser().parseEscapedString(TargetIDDirective,
+ /* WarnNewline */ false))
return true;
SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
More information about the llvm-commits
mailing list