[flang-commits] [clang-tools-extra] [llvm] [clang] [compiler-rt] [flang] [lldb] [lld] [libcxx] [YAMLParser] Unfold multi-line scalar values (PR #70898)
Igor Kudrin via flang-commits
flang-commits at lists.llvm.org
Wed Nov 1 12:29:35 PDT 2023
https://github.com/igorkudrin updated https://github.com/llvm/llvm-project/pull/70898
>From 113c03bbf773c71d329ab2afd063753365e4ac68 Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin at accesssoftek.com>
Date: Thu, 26 Oct 2023 13:19:08 -0700
Subject: [PATCH] [YAMLParser] Unfold multi-line scalar values
Long scalar values can be split into multiple lines to improve
readability. The rules are described in Section 6.5. "Line Folding",
https://yaml.org/spec/1.2.2/#65-line-folding. In addition, for flow
scalar styles, the Spec states that "All leading and trailing white
space characters on each line are excluded from the content",
https://yaml.org/spec/1.2.2/#73-flow-scalar-styles.
The patch implements these unfolding rules for double-quoted,
single-quoted, and plain scalars.
---
llvm/include/llvm/Support/YAMLParser.h | 9 +-
llvm/lib/Support/YAMLParser.cpp | 364 +++++++++++++-----------
llvm/test/YAMLParser/spec-09-01.test | 11 +-
llvm/test/YAMLParser/spec-09-02.test | 31 +-
llvm/test/YAMLParser/spec-09-03.test | 7 +-
llvm/test/YAMLParser/spec-09-04.test | 3 +-
llvm/test/YAMLParser/spec-09-05.test | 7 +-
llvm/test/YAMLParser/spec-09-06.test | 3 +-
llvm/test/YAMLParser/spec-09-07.test | 11 +-
llvm/test/YAMLParser/spec-09-08.test | 15 +-
llvm/test/YAMLParser/spec-09-09.test | 7 +-
llvm/test/YAMLParser/spec-09-10.test | 3 +-
llvm/test/YAMLParser/spec-09-11.test | 6 +-
llvm/test/YAMLParser/spec-09-13.test | 11 +-
llvm/test/YAMLParser/spec-09-16.test | 17 +-
llvm/test/YAMLParser/spec-09-17.test | 3 +-
llvm/test/YAMLParser/spec1.2-07-05.test | 8 +
llvm/test/YAMLParser/spec1.2-07-06.test | 7 +
llvm/test/YAMLParser/spec1.2-07-09.test | 7 +
llvm/test/YAMLParser/spec1.2-07-12.test | 7 +
llvm/test/YAMLParser/spec1.2-07-14.test | 23 ++
21 files changed, 367 insertions(+), 193 deletions(-)
create mode 100644 llvm/test/YAMLParser/spec1.2-07-05.test
create mode 100644 llvm/test/YAMLParser/spec1.2-07-06.test
create mode 100644 llvm/test/YAMLParser/spec1.2-07-09.test
create mode 100644 llvm/test/YAMLParser/spec1.2-07-12.test
create mode 100644 llvm/test/YAMLParser/spec1.2-07-14.test
diff --git a/llvm/include/llvm/Support/YAMLParser.h b/llvm/include/llvm/Support/YAMLParser.h
index f4767641647c217..9d95a1e13a0dff4 100644
--- a/llvm/include/llvm/Support/YAMLParser.h
+++ b/llvm/include/llvm/Support/YAMLParser.h
@@ -240,9 +240,14 @@ class ScalarNode final : public Node {
private:
StringRef Value;
- StringRef unescapeDoubleQuoted(StringRef UnquotedValue,
- StringRef::size_type Start,
+ StringRef getDoubleQuotedValue(StringRef UnquotedValue,
SmallVectorImpl<char> &Storage) const;
+
+ static StringRef getSingleQuotedValue(StringRef RawValue,
+ SmallVectorImpl<char> &Storage);
+
+ static StringRef getPlainValue(StringRef RawValue,
+ SmallVectorImpl<char> &Storage);
};
/// A block scalar node is an opaque datum that can be presented as a
diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp
index 1422e40f91944ae..96b9aa95a96b3a6 100644
--- a/llvm/lib/Support/YAMLParser.cpp
+++ b/llvm/lib/Support/YAMLParser.cpp
@@ -2030,187 +2030,219 @@ bool Node::failed() const {
}
StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
- // TODO: Handle newlines properly. We need to remove leading whitespace.
- if (Value[0] == '"') { // Double quoted.
- // Pull off the leading and trailing "s.
- StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
- // Search for characters that would require unescaping the value.
- StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
- if (i != StringRef::npos)
- return unescapeDoubleQuoted(UnquotedValue, i, Storage);
+ if (Value[0] == '"')
+ return getDoubleQuotedValue(Value, Storage);
+ if (Value[0] == '\'')
+ return getSingleQuotedValue(Value, Storage);
+ return getPlainValue(Value, Storage);
+}
+
+static StringRef
+parseScalarValue(StringRef UnquotedValue, SmallVectorImpl<char> &Storage,
+ StringRef LookupChars,
+ std::function<StringRef(StringRef, SmallVectorImpl<char> &)>
+ UnescapeCallback) {
+ size_t I = UnquotedValue.find_first_of(LookupChars);
+ if (I == StringRef::npos)
return UnquotedValue;
- } else if (Value[0] == '\'') { // Single quoted.
- // Pull off the leading and trailing 's.
- StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
- StringRef::size_type i = UnquotedValue.find('\'');
- if (i != StringRef::npos) {
- // We're going to need Storage.
- Storage.clear();
- Storage.reserve(UnquotedValue.size());
- for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
- StringRef Valid(UnquotedValue.begin(), i);
- llvm::append_range(Storage, Valid);
- Storage.push_back('\'');
- UnquotedValue = UnquotedValue.substr(i + 2);
- }
- llvm::append_range(Storage, UnquotedValue);
- return StringRef(Storage.begin(), Storage.size());
- }
- return UnquotedValue;
- }
- // Plain.
- // Trim whitespace ('b-char' and 's-white').
- // NOTE: Alternatively we could change the scanner to not include whitespace
- // here in the first place.
- return Value.rtrim("\x0A\x0D\x20\x09");
-}
-StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
- , StringRef::size_type i
- , SmallVectorImpl<char> &Storage)
- const {
- // Use Storage to build proper value.
Storage.clear();
Storage.reserve(UnquotedValue.size());
- for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
- // Insert all previous chars into Storage.
- StringRef Valid(UnquotedValue.begin(), i);
- llvm::append_range(Storage, Valid);
- // Chop off inserted chars.
- UnquotedValue = UnquotedValue.substr(i);
-
- assert(!UnquotedValue.empty() && "Can't be empty!");
-
- // Parse escape or line break.
- switch (UnquotedValue[0]) {
- case '\r':
- case '\n':
- Storage.push_back('\n');
- if ( UnquotedValue.size() > 1
- && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
- UnquotedValue = UnquotedValue.substr(1);
- UnquotedValue = UnquotedValue.substr(1);
- break;
- default:
- if (UnquotedValue.size() == 1) {
- Token T;
- T.Range = StringRef(UnquotedValue.begin(), 1);
- setError("Unrecognized escape code", T);
- return "";
- }
- UnquotedValue = UnquotedValue.substr(1);
- switch (UnquotedValue[0]) {
- default: {
- Token T;
- T.Range = StringRef(UnquotedValue.begin(), 1);
- setError("Unrecognized escape code", T);
- return "";
- }
- case '\r':
- case '\n':
- // Remove the new line.
- if ( UnquotedValue.size() > 1
- && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
- UnquotedValue = UnquotedValue.substr(1);
- // If this was just a single byte newline, it will get skipped
- // below.
- break;
- case '0':
- Storage.push_back(0x00);
- break;
- case 'a':
- Storage.push_back(0x07);
- break;
- case 'b':
- Storage.push_back(0x08);
- break;
- case 't':
- case 0x09:
- Storage.push_back(0x09);
- break;
- case 'n':
- Storage.push_back(0x0A);
- break;
- case 'v':
- Storage.push_back(0x0B);
- break;
- case 'f':
- Storage.push_back(0x0C);
- break;
- case 'r':
- Storage.push_back(0x0D);
- break;
- case 'e':
- Storage.push_back(0x1B);
- break;
+ char LastNewLineAddedAs = '\0';
+ for (; I != StringRef::npos; I = UnquotedValue.find_first_of(LookupChars)) {
+ if (UnquotedValue[I] != '\x0D' && UnquotedValue[I] != '\x0A') {
+ llvm::append_range(Storage, UnquotedValue.take_front(I));
+ UnquotedValue = UnescapeCallback(UnquotedValue.drop_front(I), Storage);
+ LastNewLineAddedAs = '\0';
+ continue;
+ }
+ if (size_t LastNonSWhite = UnquotedValue.find_last_not_of("\x20\x09", I);
+ LastNonSWhite != StringRef::npos) {
+ llvm::append_range(Storage, UnquotedValue.take_front(LastNonSWhite + 1));
+ Storage.push_back(' ');
+ LastNewLineAddedAs = ' ';
+ } else {
+ // Note: we can't just check if the last character in Storage is ' ',
+ // '\n', or something else; that would give a wrong result for double
+ // quoted values containing an escaped space character before a new-line
+ // character.
+ switch (LastNewLineAddedAs) {
case ' ':
- Storage.push_back(0x20);
- break;
- case '"':
- Storage.push_back(0x22);
- break;
- case '/':
- Storage.push_back(0x2F);
- break;
- case '\\':
- Storage.push_back(0x5C);
- break;
- case 'N':
- encodeUTF8(0x85, Storage);
+ assert(!Storage.empty() && Storage.back() == ' ');
+ Storage.back() = '\n';
+ LastNewLineAddedAs = '\n';
break;
- case '_':
- encodeUTF8(0xA0, Storage);
- break;
- case 'L':
- encodeUTF8(0x2028, Storage);
+ case '\n':
+ assert(!Storage.empty() && Storage.back() == '\n');
+ Storage.push_back('\n');
break;
- case 'P':
- encodeUTF8(0x2029, Storage);
+ default:
+ Storage.push_back(' ');
+ LastNewLineAddedAs = ' ';
break;
- case 'x': {
- if (UnquotedValue.size() < 3)
- // TODO: Report error.
- break;
- unsigned int UnicodeScalarValue;
- if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
- // TODO: Report error.
- UnicodeScalarValue = 0xFFFD;
- encodeUTF8(UnicodeScalarValue, Storage);
- UnquotedValue = UnquotedValue.substr(2);
- break;
- }
- case 'u': {
- if (UnquotedValue.size() < 5)
- // TODO: Report error.
- break;
- unsigned int UnicodeScalarValue;
- if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
- // TODO: Report error.
- UnicodeScalarValue = 0xFFFD;
- encodeUTF8(UnicodeScalarValue, Storage);
- UnquotedValue = UnquotedValue.substr(4);
- break;
- }
- case 'U': {
- if (UnquotedValue.size() < 9)
- // TODO: Report error.
- break;
- unsigned int UnicodeScalarValue;
- if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
- // TODO: Report error.
- UnicodeScalarValue = 0xFFFD;
- encodeUTF8(UnicodeScalarValue, Storage);
- UnquotedValue = UnquotedValue.substr(8);
- break;
- }
}
- UnquotedValue = UnquotedValue.substr(1);
}
+ // Handle Windows-style EOL
+ if (UnquotedValue.substr(I, 2) == "\x0D\x0A")
+ I++;
+ UnquotedValue = UnquotedValue.drop_front(I + 1).ltrim("\x20\x09");
}
llvm::append_range(Storage, UnquotedValue);
return StringRef(Storage.begin(), Storage.size());
}
+StringRef
+ScalarNode::getDoubleQuotedValue(StringRef RawValue,
+ SmallVectorImpl<char> &Storage) const {
+ assert(RawValue.size() >= 2 && RawValue.front() == '"' &&
+ RawValue.back() == '"');
+ StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2);
+
+ auto UnescapeFunc = [this](StringRef UnquotedValue,
+ SmallVectorImpl<char> &Storage) {
+ assert(UnquotedValue.take_front(1) == "\\");
+ if (UnquotedValue.size() == 1) {
+ Token T;
+ T.Range = UnquotedValue;
+ this->setError("Unrecognized escape code", T);
+ Storage.clear();
+ return StringRef();
+ }
+ UnquotedValue = UnquotedValue.drop_front(1);
+ switch (UnquotedValue[0]) {
+ default: {
+ Token T;
+ T.Range = UnquotedValue.take_front(1);
+ setError("Unrecognized escape code", T);
+ Storage.clear();
+ return StringRef();
+ }
+ case '\x0D':
+ // Remove the Windows-style EOL.
+ if (UnquotedValue.size() >= 2 && UnquotedValue[1] == '\x0A')
+ return UnquotedValue.drop_front(2);
+ // If this was just a single byte newline, it will get skipped below.
+ break;
+ case '\x0A':
+ // If this was just a single byte newline, it will get skipped below.
+ break;
+ case '0':
+ Storage.push_back(0x00);
+ break;
+ case 'a':
+ Storage.push_back(0x07);
+ break;
+ case 'b':
+ Storage.push_back(0x08);
+ break;
+ case 't':
+ case 0x09:
+ Storage.push_back(0x09);
+ break;
+ case 'n':
+ Storage.push_back(0x0A);
+ break;
+ case 'v':
+ Storage.push_back(0x0B);
+ break;
+ case 'f':
+ Storage.push_back(0x0C);
+ break;
+ case 'r':
+ Storage.push_back(0x0D);
+ break;
+ case 'e':
+ Storage.push_back(0x1B);
+ break;
+ case ' ':
+ Storage.push_back(0x20);
+ break;
+ case '"':
+ Storage.push_back(0x22);
+ break;
+ case '/':
+ Storage.push_back(0x2F);
+ break;
+ case '\\':
+ Storage.push_back(0x5C);
+ break;
+ case 'N':
+ encodeUTF8(0x85, Storage);
+ break;
+ case '_':
+ encodeUTF8(0xA0, Storage);
+ break;
+ case 'L':
+ encodeUTF8(0x2028, Storage);
+ break;
+ case 'P':
+ encodeUTF8(0x2029, Storage);
+ break;
+ case 'x': {
+ if (UnquotedValue.size() < 3)
+ // TODO: Report error.
+ break;
+ unsigned int UnicodeScalarValue;
+ if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
+ encodeUTF8(UnicodeScalarValue, Storage);
+ return UnquotedValue.drop_front(3);
+ }
+ case 'u': {
+ if (UnquotedValue.size() < 5)
+ // TODO: Report error.
+ break;
+ unsigned int UnicodeScalarValue;
+ if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
+ encodeUTF8(UnicodeScalarValue, Storage);
+ return UnquotedValue.drop_front(5);
+ }
+ case 'U': {
+ if (UnquotedValue.size() < 9)
+ // TODO: Report error.
+ break;
+ unsigned int UnicodeScalarValue;
+ if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
+ encodeUTF8(UnicodeScalarValue, Storage);
+ return UnquotedValue.drop_front(9);
+ }
+ }
+ return UnquotedValue.drop_front(1);
+ };
+
+ return parseScalarValue(UnquotedValue, Storage, "\\\x0A\x0D", UnescapeFunc);
+}
+
+StringRef ScalarNode::getSingleQuotedValue(StringRef RawValue,
+ SmallVectorImpl<char> &Storage) {
+ assert(RawValue.size() >= 2 && RawValue.front() == '\'' &&
+ RawValue.back() == '\'');
+ StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2);
+
+ auto UnescapeFunc = [](StringRef UnquotedValue,
+ SmallVectorImpl<char> &Storage) {
+ assert(UnquotedValue.take_front(2) == "''");
+ Storage.push_back('\'');
+ return UnquotedValue.drop_front(2);
+ };
+
+ return parseScalarValue(UnquotedValue, Storage, "'\x0A\x0D", UnescapeFunc);
+}
+
+StringRef ScalarNode::getPlainValue(StringRef RawValue,
+ SmallVectorImpl<char> &Storage) {
+ // Trim trailing whitespace ('b-char' and 's-white').
+ // NOTE: Alternatively we could change the scanner to not include whitespace
+ // here in the first place.
+ RawValue = RawValue.rtrim("\x0A\x0D\x20\x09");
+ return parseScalarValue(RawValue, Storage, "\x0A\x0D", nullptr);
+}
+
Node *KeyValueNode::getKey() {
if (Key)
return Key;
diff --git a/llvm/test/YAMLParser/spec-09-01.test b/llvm/test/YAMLParser/spec-09-01.test
index 8999b4961626470..f766ee6015d91f7 100644
--- a/llvm/test/YAMLParser/spec-09-01.test
+++ b/llvm/test/YAMLParser/spec-09-01.test
@@ -1,4 +1,13 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: !!map {
+# CHECK-NEXT: ? !!str "simple key"
+# CHECK-NEXT: : !!map {
+# CHECK-NEXT: ? !!str "also simple"
+# CHECK-NEXT: : !!str "value",
+# CHECK-NEXT: ? !!str "not a simple key"
+# CHECK-NEXT: : !!str "any value",
+# CHECK-NEXT: },
+# CHECK-NEXT: }
"simple key" : {
"also simple" : value,
diff --git a/llvm/test/YAMLParser/spec-09-02.test b/llvm/test/YAMLParser/spec-09-02.test
index 3f8e49a8bd31079..5724801ba6ec089 100644
--- a/llvm/test/YAMLParser/spec-09-02.test
+++ b/llvm/test/YAMLParser/spec-09-02.test
@@ -1,14 +1,29 @@
# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s
+# CHECK: "as space trimmed\nspecific\L escaped\t none"
- "as space
- trimmed
+## Note: The example was originally taken from Spec 1.1, but the parsing rules
+## have been changed since then.
+## * The line-separator character '\u2028' is no longer considered a line-break
+## character, so the line "...specific\u2028\nescaped..." is now parsed as
+## "...specific\L escaped...".
+## * The paragraph-separator character '\u2029' is also excluded from line-break
+## characters, so the original sequence "escaped\t\\\u2029" is no longer
+## considered valid. This is replaced by "escaped\t\\\n" in the test source,
+# so the output has changed as well.
+## See https://yaml.org/spec/1.2.2/ext/changes/ for details.
+##
+## Note 2: Different parsers handle this corner case example differently.
+## * https://github.com/yaml/libyaml:
+## "as space trimmed\nspecific\L\nescaped\t\nnone"
+## * https://github.com/yaml/yaml-reference-parser (parser-1.2):
+## "as space trimmed\nspecific\L escaped\t none"
+## * https://github.com/yaml/yaml-reference-parser (parser-1.3):
+## "as space trimmed\nspecific
escaped\t none"
- specific
+ "as space
+ trimmed
+ specific
escaped \
+
none"
-
-# FIXME: The string below should actually be
-# "as space trimmed\nspecific\nescaped\tnone", but the parser currently has
-# a bug when parsing multiline quoted strings.
-# CHECK: !!str "as space\n trimmed\n specific\n escaped\t none"
diff --git a/llvm/test/YAMLParser/spec-09-03.test b/llvm/test/YAMLParser/spec-09-03.test
index 3fb0d8b184abb16..5067bf5bd740295 100644
--- a/llvm/test/YAMLParser/spec-09-03.test
+++ b/llvm/test/YAMLParser/spec-09-03.test
@@ -1,4 +1,9 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: !!seq [
+# CHECK-NEXT: !!str " last",
+# CHECK-NEXT: !!str " last",
+# CHECK-NEXT: !!str " \tfirst last",
+# CHECK-NEXT: ]
- "
last"
diff --git a/llvm/test/YAMLParser/spec-09-04.test b/llvm/test/YAMLParser/spec-09-04.test
index 4178ec6befbd538..73c66e975dee948 100644
--- a/llvm/test/YAMLParser/spec-09-04.test
+++ b/llvm/test/YAMLParser/spec-09-04.test
@@ -1,4 +1,5 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: "first inner 1 inner 2 last"
"first
inner 1
diff --git a/llvm/test/YAMLParser/spec-09-05.test b/llvm/test/YAMLParser/spec-09-05.test
index e482d5366235b79..e2b2b2a55db9ee9 100644
--- a/llvm/test/YAMLParser/spec-09-05.test
+++ b/llvm/test/YAMLParser/spec-09-05.test
@@ -1,4 +1,9 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: !!seq [
+# CHECK-NEXT: !!str "first ",
+# CHECK-NEXT: !!str "first\nlast",
+# CHECK-NEXT: !!str "first inner \tlast",
+# CHECK-NEXT: ]
- "first
"
diff --git a/llvm/test/YAMLParser/spec-09-06.test b/llvm/test/YAMLParser/spec-09-06.test
index edc0cbba9004c4a..084cda1cd52d789 100644
--- a/llvm/test/YAMLParser/spec-09-06.test
+++ b/llvm/test/YAMLParser/spec-09-06.test
@@ -1,3 +1,4 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: "here's to \"quotes\""
'here''s to "quotes"'
diff --git a/llvm/test/YAMLParser/spec-09-07.test b/llvm/test/YAMLParser/spec-09-07.test
index 3c010ca5b93bdd3..35171ec32ba8713 100644
--- a/llvm/test/YAMLParser/spec-09-07.test
+++ b/llvm/test/YAMLParser/spec-09-07.test
@@ -1,4 +1,13 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: !!map {
+# CHECK-NEXT: ? !!str "simple key"
+# CHECK-NEXT: : !!map {
+# CHECK-NEXT: ? !!str "also simple"
+# CHECK-NEXT: : !!str "value",
+# CHECK-NEXT: ? !!str "not a simple key"
+# CHECK-NEXT: : !!str "any value",
+# CHECK-NEXT: },
+# CHECK-NEXT: }
'simple key' : {
'also simple' : value,
diff --git a/llvm/test/YAMLParser/spec-09-08.test b/llvm/test/YAMLParser/spec-09-08.test
index d114e58fcac15da..6cef92912bf5007 100644
--- a/llvm/test/YAMLParser/spec-09-08.test
+++ b/llvm/test/YAMLParser/spec-09-08.test
@@ -1,3 +1,14 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: "as space trimmed\nspecific\L none"
- 'as space
trimmed
specific
none'
+## Note: The parsing rules were changed in version 1.2 and the line-separator
+## character is no longer considered a line-break character. The example is
+## taken from Spec 1.1 and is now parsed as "..\L .." instead of "..\L\n.." as
+## in the original edition.
+## See https://yaml.org/spec/1.2.2/ext/changes/ for details.
+
+'as space
+trimmed
+
+specific
+none'
diff --git a/llvm/test/YAMLParser/spec-09-09.test b/llvm/test/YAMLParser/spec-09-09.test
index 2fec1b536ef1357..133387c42cb3758 100644
--- a/llvm/test/YAMLParser/spec-09-09.test
+++ b/llvm/test/YAMLParser/spec-09-09.test
@@ -1,4 +1,9 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: !!seq [
+# CHECK-NEXT: !!str " last",
+# CHECK-NEXT: !!str " last",
+# CHECK-NEXT: !!str " \tfirst last",
+# CHECK-NEXT: ]
- '
last'
diff --git a/llvm/test/YAMLParser/spec-09-10.test b/llvm/test/YAMLParser/spec-09-10.test
index faabfb06b5ec2af..697efc2f1bc9142 100644
--- a/llvm/test/YAMLParser/spec-09-10.test
+++ b/llvm/test/YAMLParser/spec-09-10.test
@@ -1,4 +1,5 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: "first inner last"
'first
inner
diff --git a/llvm/test/YAMLParser/spec-09-11.test b/llvm/test/YAMLParser/spec-09-11.test
index 3f487ad6b04398d..d7f24ea83390b27 100644
--- a/llvm/test/YAMLParser/spec-09-11.test
+++ b/llvm/test/YAMLParser/spec-09-11.test
@@ -1,4 +1,8 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: !!seq [
+# CHECK-NEXT: !!str "first ",
+# CHECK-NEXT: !!str "first\nlast",
+# CHECK-NEXT: ]
- 'first
'
diff --git a/llvm/test/YAMLParser/spec-09-13.test b/llvm/test/YAMLParser/spec-09-13.test
index d48f2d2c47ee3d5..c93abdccaefedcd 100644
--- a/llvm/test/YAMLParser/spec-09-13.test
+++ b/llvm/test/YAMLParser/spec-09-13.test
@@ -1,4 +1,13 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: !!map {
+# CHECK-NEXT: ? !!str "simple key"
+# CHECK-NEXT: : !!map {
+# CHECK-NEXT: ? !!str "also simple"
+# CHECK-NEXT: : !!str "value",
+# CHECK-NEXT: ? !!str "not a simple key"
+# CHECK-NEXT: : !!str "any value",
+# CHECK-NEXT: },
+# CHECK-NEXT: }
simple key : {
also simple : value,
diff --git a/llvm/test/YAMLParser/spec-09-16.test b/llvm/test/YAMLParser/spec-09-16.test
index e595f47bece9d97..b38f405c8499708 100644
--- a/llvm/test/YAMLParser/spec-09-16.test
+++ b/llvm/test/YAMLParser/spec-09-16.test
@@ -1,5 +1,14 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: "as space trimmed\nspecific\L none"
-# Tabs are confusing:
-# as space/trimmed/specific/none
- as space
trimmed
specific
none
+## Note: The parsing rules were changed in version 1.2 and the line-separator
+## character is no longer considered a line-break character. The example is
+## taken from Spec 1.1 and is now parsed as "..\L .." instead of "..\L\n.." as
+## in the original edition.
+## See https://yaml.org/spec/1.2.2/ext/changes/ for details.
+
+ as space
+ trimmed
+
+ specific
+ none
diff --git a/llvm/test/YAMLParser/spec-09-17.test b/llvm/test/YAMLParser/spec-09-17.test
index 1bacf4d68b1f7ab..5020cb11b2724a3 100644
--- a/llvm/test/YAMLParser/spec-09-17.test
+++ b/llvm/test/YAMLParser/spec-09-17.test
@@ -1,4 +1,5 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: "first line\nmore line"
first line
diff --git a/llvm/test/YAMLParser/spec1.2-07-05.test b/llvm/test/YAMLParser/spec1.2-07-05.test
new file mode 100644
index 000000000000000..629372059b3ebce
--- /dev/null
+++ b/llvm/test/YAMLParser/spec1.2-07-05.test
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: "folded to a space,\nto a line feed, or \t \tnon-content"
+
+"folded
+to a space,
+
+to a line feed, or \
+ \ non-content"
diff --git a/llvm/test/YAMLParser/spec1.2-07-06.test b/llvm/test/YAMLParser/spec1.2-07-06.test
new file mode 100644
index 000000000000000..7bbe3e7d2e5cbaf
--- /dev/null
+++ b/llvm/test/YAMLParser/spec1.2-07-06.test
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: " 1st non-empty\n2nd non-empty 3rd non-empty "
+
+" 1st non-empty
+
+ 2nd non-empty
+ 3rd non-empty "
diff --git a/llvm/test/YAMLParser/spec1.2-07-09.test b/llvm/test/YAMLParser/spec1.2-07-09.test
new file mode 100644
index 000000000000000..c286bcb28452f37
--- /dev/null
+++ b/llvm/test/YAMLParser/spec1.2-07-09.test
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: " 1st non-empty\n2nd non-empty 3rd non-empty "
+
+' 1st non-empty
+
+ 2nd non-empty
+ 3rd non-empty '
diff --git a/llvm/test/YAMLParser/spec1.2-07-12.test b/llvm/test/YAMLParser/spec1.2-07-12.test
new file mode 100644
index 000000000000000..0d4eed4b9a5c66a
--- /dev/null
+++ b/llvm/test/YAMLParser/spec1.2-07-12.test
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s | FileCheck %s
+# CHECK: "1st non-empty\n2nd non-empty 3rd non-empty"
+
+1st non-empty
+
+ 2nd non-empty
+ 3rd non-empty
diff --git a/llvm/test/YAMLParser/spec1.2-07-14.test b/llvm/test/YAMLParser/spec1.2-07-14.test
new file mode 100644
index 000000000000000..908b7562ba8109d
--- /dev/null
+++ b/llvm/test/YAMLParser/spec1.2-07-14.test
@@ -0,0 +1,23 @@
+# RUN: yaml-bench -canonical %s | FileCheck %s
+
+# CHECK: !!seq [
+# CHECK-NEXT: !!str "double
+# CHECK-NEXT: !!str "single quoted",
+# CHECK-NEXT: !!str "plain text",
+# CHECK-NEXT: !!seq [
+# CHECK-NEXT: !!str "nested",
+# CHECK-NEXT: ],
+# CHECK-NEXT: !!map {
+# CHECK-NEXT: ? !!str "single"
+# CHECK-NEXT: : !!str "pair",
+# CHECK-NEXT: },
+# CHECK-NEXT: ]
+
+[
+"double
+ quoted", 'single
+ quoted',
+plain
+ text, [ nested ],
+single: pair,
+]
More information about the flang-commits
mailing list