[llvm] [YAMLParser] Improve plain scalar spec compliance (PR #68946)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 16 19:29:10 PDT 2023


https://github.com/akirchhoff-modular updated https://github.com/llvm/llvm-project/pull/68946

>From d99c12b13b2e3dadebac37a783ffa1be958d63ef Mon Sep 17 00:00:00 2001
From: Alex Kirchhoff <akirchhoff at modular.com>
Date: Wed, 11 Oct 2023 18:13:44 -0700
Subject: [PATCH 1/2] [YAMLParser] Improve plain scalar spec compliance

The `YAMLParser.h` header file claims support for YAML 1.2 with a few
deviations, but our plain scalar parsing failed to parse some valid YAML
according to the spec.  This change puts us more in compliance with the
YAML spec, now letting us parse plain scalars containing additional
special characters in cases where they are not ambiguous.
---
 llvm/lib/Support/YAMLParser.cpp               | 72 ++++++++++++-------
 .../Generic/first-character-parse-error.mir   |  4 +-
 llvm/test/YAMLParser/plain-characters.test    | 15 ++++
 llvm/unittests/Support/YAMLIOTest.cpp         |  2 +-
 llvm/unittests/Support/YAMLParserTest.cpp     | 47 +++++++++++-
 5 files changed, 109 insertions(+), 31 deletions(-)
 create mode 100644 llvm/test/YAMLParser/plain-characters.test

diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp
index 6ac2c6aeeb46ad5..d0af79513cadd67 100644
--- a/llvm/lib/Support/YAMLParser.cpp
+++ b/llvm/lib/Support/YAMLParser.cpp
@@ -392,6 +392,10 @@ class Scanner {
   ///        Pos is whitespace or a new line
   bool isBlankOrBreak(StringRef::iterator Position);
 
+  /// Return true if the minimal well-formed code unit subsequence at
+  ///        Pos is considered a "safe" character for plain scalars.
+  bool isPlainSafeNonBlank(StringRef::iterator Position);
+
   /// Return true if the line is a line break, false otherwise.
   bool isLineEmpty(StringRef Line);
 
@@ -545,6 +549,10 @@ class Scanner {
   /// Can the next token be the start of a simple key?
   bool IsSimpleKeyAllowed;
 
+  /// Can the next token be a value indicator even if it does not have a
+  /// trailing space?
+  bool IsAdjacentValueAllowed;
+
   /// True if an error has occurred.
   bool Failed;
 
@@ -868,6 +876,7 @@ void Scanner::init(MemoryBufferRef Buffer) {
   FlowLevel = 0;
   IsStartOfStream = true;
   IsSimpleKeyAllowed = true;
+  IsAdjacentValueAllowed = false;
   Failed = false;
   std::unique_ptr<MemoryBuffer> InputBufferOwner =
       MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false);
@@ -1049,6 +1058,15 @@ bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
          *Position == '\n';
 }
 
+bool Scanner::isPlainSafeNonBlank(StringRef::iterator Position) {
+  if (Position == End || isBlankOrBreak(Position))
+    return false;
+  if (FlowLevel &&
+      StringRef(Position, 1).find_first_of(",[]{}") != StringRef::npos)
+    return false;
+  return true;
+}
+
 bool Scanner::isLineEmpty(StringRef Line) {
   for (const auto *Position = Line.begin(); Position != Line.end(); ++Position)
     if (!isBlankOrBreak(Position))
@@ -1189,6 +1207,7 @@ bool Scanner::scanStreamEnd() {
   unrollIndent(-1);
   SimpleKeys.clear();
   IsSimpleKeyAllowed = false;
+  IsAdjacentValueAllowed = false;
 
   Token T;
   T.Kind = Token::TK_StreamEnd;
@@ -1202,6 +1221,7 @@ bool Scanner::scanDirective() {
   unrollIndent(-1);
   SimpleKeys.clear();
   IsSimpleKeyAllowed = false;
+  IsAdjacentValueAllowed = false;
 
   StringRef::iterator Start = Current;
   consume('%');
@@ -1233,6 +1253,7 @@ bool Scanner::scanDocumentIndicator(bool IsStart) {
   unrollIndent(-1);
   SimpleKeys.clear();
   IsSimpleKeyAllowed = false;
+  IsAdjacentValueAllowed = false;
 
   Token T;
   T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
@@ -1255,6 +1276,8 @@ bool Scanner::scanFlowCollectionStart(bool IsSequence) {
 
   // And may also be followed by a simple key.
   IsSimpleKeyAllowed = true;
+  // Adjacent values are allowed only after JSON-style keys.
+  IsAdjacentValueAllowed = false;
   ++FlowLevel;
   return true;
 }
@@ -1262,6 +1285,7 @@ bool Scanner::scanFlowCollectionStart(bool IsSequence) {
 bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
   IsSimpleKeyAllowed = false;
+  IsAdjacentValueAllowed = true;
   Token T;
   T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
                       : Token::TK_FlowMappingEnd;
@@ -1276,6 +1300,7 @@ bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
 bool Scanner::scanFlowEntry() {
   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
   IsSimpleKeyAllowed = true;
+  IsAdjacentValueAllowed = false;
   Token T;
   T.Kind = Token::TK_FlowEntry;
   T.Range = StringRef(Current, 1);
@@ -1288,6 +1313,7 @@ bool Scanner::scanBlockEntry() {
   rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
   IsSimpleKeyAllowed = true;
+  IsAdjacentValueAllowed = false;
   Token T;
   T.Kind = Token::TK_BlockEntry;
   T.Range = StringRef(Current, 1);
@@ -1302,6 +1328,7 @@ bool Scanner::scanKey() {
 
   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
   IsSimpleKeyAllowed = !FlowLevel;
+  IsAdjacentValueAllowed = false;
 
   Token T;
   T.Kind = Token::TK_Key;
@@ -1339,6 +1366,7 @@ bool Scanner::scanValue() {
       rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
     IsSimpleKeyAllowed = !FlowLevel;
   }
+  IsAdjacentValueAllowed = false;
 
   Token T;
   T.Kind = Token::TK_Value;
@@ -1420,6 +1448,7 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
 
   IsSimpleKeyAllowed = false;
+  IsAdjacentValueAllowed = true;
 
   return true;
 }
@@ -1434,21 +1463,9 @@ bool Scanner::scanPlainScalar() {
     if (*Current == '#')
       break;
 
-    while (Current != End && !isBlankOrBreak(Current)) {
-      if (FlowLevel && *Current == ':' &&
-          (Current + 1 == End ||
-           !(isBlankOrBreak(Current + 1) || *(Current + 1) == ','))) {
-        setError("Found unexpected ':' while scanning a plain scalar", Current);
-        return false;
-      }
-
-      // Check for the end of the plain scalar.
-      if (  (*Current == ':' && isBlankOrBreak(Current + 1))
-          || (  FlowLevel
-          && (StringRef(Current, 1).find_first_of(",:?[]{}")
-              != StringRef::npos)))
-        break;
-
+    while (Current != End &&
+           ((*Current != ':' && isPlainSafeNonBlank(Current)) ||
+            (*Current == ':' && isPlainSafeNonBlank(Current + 1)))) {
       StringRef::iterator i = skip_nb_char(Current);
       if (i == Current)
         break;
@@ -1499,6 +1516,7 @@ bool Scanner::scanPlainScalar() {
   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
 
   IsSimpleKeyAllowed = false;
+  IsAdjacentValueAllowed = false;
 
   return true;
 }
@@ -1534,6 +1552,7 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) {
   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
 
   IsSimpleKeyAllowed = false;
+  IsAdjacentValueAllowed = false;
 
   return true;
 }
@@ -1766,6 +1785,7 @@ bool Scanner::scanBlockScalar(bool IsLiteral) {
   // New lines may start a simple key.
   if (!FlowLevel)
     IsSimpleKeyAllowed = true;
+  IsAdjacentValueAllowed = false;
 
   Token T;
   T.Kind = Token::TK_BlockScalar;
@@ -1799,6 +1819,7 @@ bool Scanner::scanTag() {
   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
 
   IsSimpleKeyAllowed = false;
+  IsAdjacentValueAllowed = false;
 
   return true;
 }
@@ -1848,13 +1869,15 @@ bool Scanner::fetchMoreTokens() {
   if (*Current == ',')
     return scanFlowEntry();
 
-  if (*Current == '-' && isBlankOrBreak(Current + 1))
+  if (*Current == '-' && (isBlankOrBreak(Current + 1) || Current + 1 == End))
     return scanBlockEntry();
 
-  if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
+  if (*Current == '?' && (Current + 1 == End || isBlankOrBreak(Current + 1)))
     return scanKey();
 
-  if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
+  if (*Current == ':' && ((FlowLevel && (IsAdjacentValueAllowed ||
+                                         !isPlainSafeNonBlank(Current + 1))) ||
+                          isBlankOrBreak(Current + 1)))
     return scanValue();
 
   if (*Current == '*')
@@ -1880,15 +1903,10 @@ bool Scanner::fetchMoreTokens() {
 
   // Get a plain scalar.
   StringRef FirstChar(Current, 1);
-  if (!(isBlankOrBreak(Current)
-        || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
-      || (*Current == '-' && !isBlankOrBreak(Current + 1))
-      || (!FlowLevel && (*Current == '?' || *Current == ':')
-          && isBlankOrBreak(Current + 1))
-      || (!FlowLevel && *Current == ':'
-                      && Current + 2 < End
-                      && *(Current + 1) == ':'
-                      && !isBlankOrBreak(Current + 2)))
+  if ((!isBlankOrBreak(Current) &&
+       FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") == StringRef::npos) ||
+      (FirstChar.find_first_of("?:-") != StringRef::npos &&
+       isPlainSafeNonBlank(Current + 1)))
     return scanPlainScalar();
 
   setError("Unrecognized character while tokenizing.", Current);
diff --git a/llvm/test/CodeGen/MIR/Generic/first-character-parse-error.mir b/llvm/test/CodeGen/MIR/Generic/first-character-parse-error.mir
index 00a01058dc8cb89..869392f3e4bb6fb 100644
--- a/llvm/test/CodeGen/MIR/Generic/first-character-parse-error.mir
+++ b/llvm/test/CodeGen/MIR/Generic/first-character-parse-error.mir
@@ -1,6 +1,6 @@
-:# RUN: not llc -run-pass=none %s -o - 2>&1 | FileCheck %s
+@# RUN: not llc -run-pass=none %s -o - 2>&1 | FileCheck %s
 
-# The : before the run comment is syntactically invalid. This used to
+# The @ before the run comment is syntactically invalid. This used to
 # crash in the SourceMgr diagnostic printer because it was called
 # before the LLVMContext was initialized.
 
diff --git a/llvm/test/YAMLParser/plain-characters.test b/llvm/test/YAMLParser/plain-characters.test
new file mode 100644
index 000000000000000..c60a2468838a2a5
--- /dev/null
+++ b/llvm/test/YAMLParser/plain-characters.test
@@ -0,0 +1,15 @@
+# RUN: yaml-bench -canonical %s
+# Example from https://yaml.org/spec/1.2.2/#example-plain-characters
+
+# Outside flow collection:
+- ::vector
+- ": - ()"
+- Up, up, and away!
+- -123
+- https://example.com/foo#bar
+# Inside flow collection:
+- [ ::vector,
+  ": - ()",
+  "Up, up and away!",
+  -123,
+  https://example.com/foo#bar ]
diff --git a/llvm/unittests/Support/YAMLIOTest.cpp b/llvm/unittests/Support/YAMLIOTest.cpp
index 90c09ed7f79ee34..66a59fd429efb36 100644
--- a/llvm/unittests/Support/YAMLIOTest.cpp
+++ b/llvm/unittests/Support/YAMLIOTest.cpp
@@ -3153,7 +3153,7 @@ TEST(YAMLIO, TestFlowSequenceTokenErrors) {
 
 TEST(YAMLIO, TestDirectiveMappingNoValue) {
   Input yin("%YAML\n{5:");
-  EXPECT_FALSE(yin.setCurrentDocument());
+  yin.setCurrentDocument();
   EXPECT_TRUE(yin.error());
 
   Input yin2("%TAG\n'\x98!< :\n");
diff --git a/llvm/unittests/Support/YAMLParserTest.cpp b/llvm/unittests/Support/YAMLParserTest.cpp
index b52a3850c02b7c0..24c6d8b9eb3c93c 100644
--- a/llvm/unittests/Support/YAMLParserTest.cpp
+++ b/llvm/unittests/Support/YAMLParserTest.cpp
@@ -82,7 +82,10 @@ TEST(YAMLParser, FailsIfMissingColon) {
 }
 
 TEST(YAMLParser, FailsOnMissingQuote) {
-  ExpectParseError("Missing open quote", "[{a\":\"b\"}]");
+  // Missing open quote counts as a plain scalar per YAML spec
+  // (Following is equivalent to JSON [{"a\":\"b\"": null}])
+  ExpectParseSuccess("Missing open quote", "[{a\":\"b\"}]");
+  // Closing quote is more strict -- plain scalars cannot start with a quote
   ExpectParseError("Missing closing quote", "[{\"a\":\"b}]");
 }
 
@@ -128,6 +131,48 @@ TEST(YAMLParser, ParsesArrayOfArrays) {
   ExpectParseSuccess("Array of arrays", "[[]]");
 }
 
+TEST(YAMLParser, ParsesPlainScalars) {
+  ExpectParseSuccess("Plain scalar", "hello");
+  ExpectParseSuccess("Plain scalar beginning with a question mark", "?hello");
+  ExpectParseSuccess("Plain scalar beginning with a colon", ":hello");
+  ExpectParseSuccess("Plain scalar beginning with two colons", "::hello");
+  ExpectParseSuccess("Plain scalar beginning with a hyphen", "-hello");
+  ExpectParseSuccess("Multi-line plain scalar", "Hello\nworld");
+  ExpectParseSuccess("Plain scalar with indicator characters",
+                     "He-!l*lo, []world{}");
+  ExpectParseSuccess("Plain scalar with indicator characters used as block key",
+                     "He-!l*lo, []world{}: value");
+  ExpectParseSuccess("Plain scalar in flow sequence", "hello");
+  ExpectParseSuccess(
+      "Plain scalar beginning with a question mark in flow sequence",
+      "[ ?hello ]");
+  ExpectParseSuccess("Plain scalar beginning with a colon in flow sequence",
+                     "[ :hello ]");
+  ExpectParseSuccess("Plain scalar beginning with two colons in flow sequence",
+                     "[ ::hello ]");
+  ExpectParseSuccess("Plain scalar beginning with a hyphen in flow sequence",
+                     "[ -hello ]");
+  ExpectParseSuccess("Multi-line plain scalar in flow sequence",
+                     "[ Hello\nworld ]");
+  ExpectParseSuccess(
+      "Plain scalar with non-flow indicator characters in flow sequence",
+      "[ He-!l*lo, world ]");
+  ExpectParseSuccess(
+      "Plain scalar with non-flow indicator characters used as flow key",
+      "{ He-!l*lo, world: value } ");
+  ExpectParseError(
+      "Plain scalar with flow indicator characters inside flow sequence",
+      "[ Hello[world ]");
+  ExpectParseError(
+      "Plain scalar with flow indicator characters inside flow key",
+      "{ Hello[world: value }");
+  // Multi-line plain scalar in keys is strictly invalid per the spec, but many
+  // implementations accept it in flow keys nonetheless.  Block keys are not
+  // accepted by any other implementation I can find.
+  ExpectParseSuccess("Multi-line plain scalar in block key", "a\nb: c");
+  ExpectParseSuccess("Multi-line plain scalar in flow key", "{\na\nb: c\n}");
+}
+
 TEST(YAMLParser, ParsesBlockLiteralScalars) {
   ExpectParseSuccess("Block literal scalar", "test: |\n  Hello\n  World\n");
   ExpectParseSuccess("Block literal scalar EOF", "test: |\n  Hello\n  World");

>From 4eb24e2c0da2c6eead308aaa6296636dea141e81 Mon Sep 17 00:00:00 2001
From: Alex Kirchhoff <akirchhoff at modular.com>
Date: Mon, 16 Oct 2023 19:26:14 -0700
Subject: [PATCH 2/2] Address review feedback

---
 llvm/lib/Support/YAMLParser.cpp            | 39 +++++++++++-----------
 llvm/test/YAMLParser/plain-characters.test | 17 +++++++++-
 llvm/unittests/Support/YAMLParserTest.cpp  | 14 ++++++++
 3 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp
index d0af79513cadd67..1422e40f91944ae 100644
--- a/llvm/lib/Support/YAMLParser.cpp
+++ b/llvm/lib/Support/YAMLParser.cpp
@@ -551,7 +551,7 @@ class Scanner {
 
   /// Can the next token be a value indicator even if it does not have a
   /// trailing space?
-  bool IsAdjacentValueAllowed;
+  bool IsAdjacentValueAllowedInFlow;
 
   /// True if an error has occurred.
   bool Failed;
@@ -876,7 +876,7 @@ void Scanner::init(MemoryBufferRef Buffer) {
   FlowLevel = 0;
   IsStartOfStream = true;
   IsSimpleKeyAllowed = true;
-  IsAdjacentValueAllowed = false;
+  IsAdjacentValueAllowedInFlow = false;
   Failed = false;
   std::unique_ptr<MemoryBuffer> InputBufferOwner =
       MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false);
@@ -1207,7 +1207,7 @@ bool Scanner::scanStreamEnd() {
   unrollIndent(-1);
   SimpleKeys.clear();
   IsSimpleKeyAllowed = false;
-  IsAdjacentValueAllowed = false;
+  IsAdjacentValueAllowedInFlow = false;
 
   Token T;
   T.Kind = Token::TK_StreamEnd;
@@ -1221,7 +1221,7 @@ bool Scanner::scanDirective() {
   unrollIndent(-1);
   SimpleKeys.clear();
   IsSimpleKeyAllowed = false;
-  IsAdjacentValueAllowed = false;
+  IsAdjacentValueAllowedInFlow = false;
 
   StringRef::iterator Start = Current;
   consume('%');
@@ -1253,7 +1253,7 @@ bool Scanner::scanDocumentIndicator(bool IsStart) {
   unrollIndent(-1);
   SimpleKeys.clear();
   IsSimpleKeyAllowed = false;
-  IsAdjacentValueAllowed = false;
+  IsAdjacentValueAllowedInFlow = false;
 
   Token T;
   T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
@@ -1276,8 +1276,8 @@ bool Scanner::scanFlowCollectionStart(bool IsSequence) {
 
   // And may also be followed by a simple key.
   IsSimpleKeyAllowed = true;
-  // Adjacent values are allowed only after JSON-style keys.
-  IsAdjacentValueAllowed = false;
+  // Adjacent values are allowed in flows only after JSON-style keys.
+  IsAdjacentValueAllowedInFlow = false;
   ++FlowLevel;
   return true;
 }
@@ -1285,7 +1285,7 @@ bool Scanner::scanFlowCollectionStart(bool IsSequence) {
 bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
   IsSimpleKeyAllowed = false;
-  IsAdjacentValueAllowed = true;
+  IsAdjacentValueAllowedInFlow = true;
   Token T;
   T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
                       : Token::TK_FlowMappingEnd;
@@ -1300,7 +1300,7 @@ bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
 bool Scanner::scanFlowEntry() {
   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
   IsSimpleKeyAllowed = true;
-  IsAdjacentValueAllowed = false;
+  IsAdjacentValueAllowedInFlow = false;
   Token T;
   T.Kind = Token::TK_FlowEntry;
   T.Range = StringRef(Current, 1);
@@ -1313,7 +1313,7 @@ bool Scanner::scanBlockEntry() {
   rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
   IsSimpleKeyAllowed = true;
-  IsAdjacentValueAllowed = false;
+  IsAdjacentValueAllowedInFlow = false;
   Token T;
   T.Kind = Token::TK_BlockEntry;
   T.Range = StringRef(Current, 1);
@@ -1328,7 +1328,7 @@ bool Scanner::scanKey() {
 
   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
   IsSimpleKeyAllowed = !FlowLevel;
-  IsAdjacentValueAllowed = false;
+  IsAdjacentValueAllowedInFlow = false;
 
   Token T;
   T.Kind = Token::TK_Key;
@@ -1366,7 +1366,7 @@ bool Scanner::scanValue() {
       rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
     IsSimpleKeyAllowed = !FlowLevel;
   }
-  IsAdjacentValueAllowed = false;
+  IsAdjacentValueAllowedInFlow = false;
 
   Token T;
   T.Kind = Token::TK_Value;
@@ -1448,7 +1448,7 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
 
   IsSimpleKeyAllowed = false;
-  IsAdjacentValueAllowed = true;
+  IsAdjacentValueAllowedInFlow = true;
 
   return true;
 }
@@ -1516,7 +1516,7 @@ bool Scanner::scanPlainScalar() {
   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
 
   IsSimpleKeyAllowed = false;
-  IsAdjacentValueAllowed = false;
+  IsAdjacentValueAllowedInFlow = false;
 
   return true;
 }
@@ -1552,7 +1552,7 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) {
   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
 
   IsSimpleKeyAllowed = false;
-  IsAdjacentValueAllowed = false;
+  IsAdjacentValueAllowedInFlow = false;
 
   return true;
 }
@@ -1785,7 +1785,7 @@ bool Scanner::scanBlockScalar(bool IsLiteral) {
   // New lines may start a simple key.
   if (!FlowLevel)
     IsSimpleKeyAllowed = true;
-  IsAdjacentValueAllowed = false;
+  IsAdjacentValueAllowedInFlow = false;
 
   Token T;
   T.Kind = Token::TK_BlockScalar;
@@ -1819,7 +1819,7 @@ bool Scanner::scanTag() {
   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
 
   IsSimpleKeyAllowed = false;
-  IsAdjacentValueAllowed = false;
+  IsAdjacentValueAllowedInFlow = false;
 
   return true;
 }
@@ -1875,9 +1875,8 @@ bool Scanner::fetchMoreTokens() {
   if (*Current == '?' && (Current + 1 == End || isBlankOrBreak(Current + 1)))
     return scanKey();
 
-  if (*Current == ':' && ((FlowLevel && (IsAdjacentValueAllowed ||
-                                         !isPlainSafeNonBlank(Current + 1))) ||
-                          isBlankOrBreak(Current + 1)))
+  if (*Current == ':' &&
+      (!isPlainSafeNonBlank(Current + 1) || IsAdjacentValueAllowedInFlow))
     return scanValue();
 
   if (*Current == '*')
diff --git a/llvm/test/YAMLParser/plain-characters.test b/llvm/test/YAMLParser/plain-characters.test
index c60a2468838a2a5..f22016bcb9bca4a 100644
--- a/llvm/test/YAMLParser/plain-characters.test
+++ b/llvm/test/YAMLParser/plain-characters.test
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
 # Example from https://yaml.org/spec/1.2.2/#example-plain-characters
 
 # Outside flow collection:
@@ -13,3 +13,18 @@
   "Up, up and away!",
   -123,
   https://example.com/foo#bar ]
+
+# CHECK: !!seq [
+# CHECK-NEXT:   !!str "::vector",
+# CHECK-NEXT:   !!str ": - ()",
+# CHECK-NEXT:   !!str "Up, up, and away!",
+# CHECK-NEXT:   !!str "-123",
+# CHECK-NEXT:   !!str "https://example.com/foo#bar",
+# CHECK-NEXT:   !!seq [
+# CHECK-NEXT:     !!str "::vector",
+# CHECK-NEXT:     !!str ": - ()",
+# CHECK-NEXT:     !!str "Up, up and away!",
+# CHECK-NEXT:     !!str "-123",
+# CHECK-NEXT:     !!str "https://example.com/foo#bar",
+# CHECK-NEXT:   ],
+# CHECK-NEXT: ]
diff --git a/llvm/unittests/Support/YAMLParserTest.cpp b/llvm/unittests/Support/YAMLParserTest.cpp
index 24c6d8b9eb3c93c..247e70756861df1 100644
--- a/llvm/unittests/Support/YAMLParserTest.cpp
+++ b/llvm/unittests/Support/YAMLParserTest.cpp
@@ -47,6 +47,10 @@ TEST(YAMLParser, ParsesEmptyArray) {
   ExpectParseSuccess("Empty array", "[]");
 }
 
+TEST(YAMLParser, ParsesComplexMap) {
+  ExpectParseSuccess("Complex block map", "? a\n: b");
+}
+
 TEST(YAMLParser, FailsIfNotClosingArray) {
   ExpectParseError("Not closing array", "[");
   ExpectParseError("Not closing array", "  [  ");
@@ -221,6 +225,10 @@ TEST(YAMLParser, HandlesEndOfFileGracefully) {
   ExpectParseError("In array hitting EOF", "[[] ");
   ExpectParseError("In array hitting EOF", "[[]");
   ExpectParseError("In object hitting EOF", "{\"\"");
+  // This one is valid, equivalent to the JSON {"": null}
+  ExpectParseSuccess("In complex block map hitting EOF", "?");
+  // Equivalent to JSON [null]
+  ExpectParseSuccess("In block sequence hitting EOF", "-");
 }
 
 TEST(YAMLParser, HandlesNullValuesInKeyValueNodesGracefully) {
@@ -228,6 +236,12 @@ TEST(YAMLParser, HandlesNullValuesInKeyValueNodesGracefully) {
   ExpectParseError("KeyValueNode with null value", "test: '");
 }
 
+TEST(YAMLParser, BlockSequenceEOF) {
+  SourceMgr SM;
+  yaml::Stream Stream("-", SM);
+  EXPECT_TRUE(isa_and_present<yaml::SequenceNode>(Stream.begin()->getRoot()));
+}
+
 // Checks that the given string can be parsed into an identical string inside
 // of an array.
 static void ExpectCanParseString(StringRef String) {



More information about the llvm-commits mailing list