[llvm] f3487c7 - [YAMLParser] Add multi-line literal folding support

Scott Linder via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 28 13:03:57 PST 2022


Author: Scott Linder
Date: 2022-02-28T21:03:36Z
New Revision: f3487c7be97738f4d4fb8c4b80203dcf00c5e7a1

URL: https://github.com/llvm/llvm-project/commit/f3487c7be97738f4d4fb8c4b80203dcf00c5e7a1
DIFF: https://github.com/llvm/llvm-project/commit/f3487c7be97738f4d4fb8c4b80203dcf00c5e7a1.diff

LOG: [YAMLParser] Add multi-line literal folding support

Last year I was working at Swift to add support for [Localization of Compiler Diagnostic Messages](https://forums.swift.org/t/localization-of-compiler-diagnostic-messages/36412/41). We are currently using YAML as the new diagnostic format. The LLVM::YAMLParser didn't have a support for multiline string literal folding and it's crucial to have that for the diagnostic message to help us keep up with the 80 columns rule. Therefore, I decided to add a multiline string literal folding support to the YAML parser.

Patch By: @HassanElDesouky (Hassan ElDesouky)

Differential Revision: https://reviews.llvm.org/D102590

Added: 
    

Modified: 
    llvm/include/llvm/Support/YAMLParser.h
    llvm/lib/Support/YAMLParser.cpp
    llvm/test/YAMLParser/spec-09-24.test

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/YAMLParser.h b/llvm/include/llvm/Support/YAMLParser.h
index a4b2ab5e49ec1..231cc1d28c9a0 100644
--- a/llvm/include/llvm/Support/YAMLParser.h
+++ b/llvm/include/llvm/Support/YAMLParser.h
@@ -11,7 +11,6 @@
 //  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
 //
 //  This currently does not implement the following:
-//    * Multi-line literal folding.
 //    * Tag resolution.
 //    * UTF-16.
 //    * BOMs anywhere other than the first Unicode scalar value in the file.

diff  --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp
index 200261d3ed5c0..78130223507bf 100644
--- a/llvm/lib/Support/YAMLParser.cpp
+++ b/llvm/lib/Support/YAMLParser.cpp
@@ -392,6 +392,9 @@ class Scanner {
   ///        Pos is whitespace or a new line
   bool isBlankOrBreak(StringRef::iterator Position);
 
+  /// Return true if the line is a line break, false otherwise.
+  bool isLineEmpty(StringRef Line);
+
   /// Consume a single b-break[28] if it's present at the current position.
   ///
   /// Return false if the code unit at the current position isn't a line break.
@@ -470,6 +473,18 @@ class Scanner {
   /// Scan a block scalar starting with | or >.
   bool scanBlockScalar(bool IsLiteral);
 
+  /// Scan a block scalar style indicator and header.
+  ///
+  /// Note: This is distinct from scanBlockScalarHeader to mirror the fact that
+  /// YAML does not consider the style indicator to be a part of the header.
+  ///
+  /// Return false if an error occurred.
+  bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator,
+                                 unsigned &IndentIndicator, bool &IsDone);
+
+  /// Scan a style indicator in a block scalar header.
+  char scanBlockStyleIndicator();
+
   /// Scan a chomping indicator in a block scalar header.
   char scanBlockChompingIndicator();
 
@@ -1034,6 +1049,13 @@ bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
          *Position == '\n';
 }
 
+bool Scanner::isLineEmpty(StringRef Line) {
+  for (const auto *Position = Line.begin(); Position != Line.end(); ++Position)
+    if (!isBlankOrBreak(Position))
+      return false;
+  return true;
+}
+
 bool Scanner::consumeLineBreakIfPresent() {
   auto Next = skip_b_break(Current);
   if (Next == Current)
@@ -1516,6 +1538,25 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) {
   return true;
 }
 
+bool Scanner::scanBlockScalarIndicators(char &StyleIndicator,
+                                        char &ChompingIndicator,
+                                        unsigned &IndentIndicator,
+                                        bool &IsDone) {
+  StyleIndicator = scanBlockStyleIndicator();
+  if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
+    return false;
+  return true;
+}
+
+char Scanner::scanBlockStyleIndicator() {
+  char Indicator = ' ';
+  if (Current != End && (*Current == '>' || *Current == '|')) {
+    Indicator = *Current;
+    skip(1);
+  }
+  return Indicator;
+}
+
 char Scanner::scanBlockChompingIndicator() {
   char Indicator = ' ';
   if (Current != End && (*Current == '+' || *Current == '-')) {
@@ -1654,19 +1695,19 @@ bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
 }
 
 bool Scanner::scanBlockScalar(bool IsLiteral) {
-  // Eat '|' or '>'
   assert(*Current == '|' || *Current == '>');
-  skip(1);
-
+  char StyleIndicator;
   char ChompingIndicator;
   unsigned BlockIndent;
   bool IsDone = false;
-  if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
+  if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,
+                                 IsDone))
     return false;
   if (IsDone)
     return true;
+  bool IsFolded = StyleIndicator == '>';
 
-  auto Start = Current;
+  const auto *Start = Current;
   unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
   unsigned LineBreaks = 0;
   if (BlockIndent == 0) {
@@ -1687,6 +1728,22 @@ bool Scanner::scanBlockScalar(bool IsLiteral) {
     auto LineStart = Current;
     advanceWhile(&Scanner::skip_nb_char);
     if (LineStart != Current) {
+      if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {
+        // The folded style "folds" any single line break between content into a
+        // single space, except when that content is "empty" (only contains
+        // whitespace) in which case the line break is left as-is.
+        if (LineBreaks == 1) {
+          Str.append(LineBreaks,
+                     isLineEmpty(StringRef(LineStart, Current - LineStart))
+                         ? '\n'
+                         : ' ');
+        }
+        // If we saw a single line break, we are completely replacing it and so
+        // want `LineBreaks == 0`. Otherwise this decrement accounts for the
+        // fact that the first line break is "trimmed", only being used to
+        // signal a sequence of line breaks which should not be folded.
+        LineBreaks--;
+      }
       Str.append(LineBreaks, '\n');
       Str.append(StringRef(LineStart, Current - LineStart));
       LineBreaks = 0;

diff  --git a/llvm/test/YAMLParser/spec-09-24.test b/llvm/test/YAMLParser/spec-09-24.test
index 9cf514138550d..923aeeb8a0cf2 100644
--- a/llvm/test/YAMLParser/spec-09-24.test
+++ b/llvm/test/YAMLParser/spec-09-24.test
@@ -1,13 +1,103 @@
 # RUN: yaml-bench -canonical %s | FileCheck %s
-# CHECK: ? !!str "strip"
-# CHECK: : !!str ""
-# CHECK: ? !!str "clip"
-# CHECK: : !!str ""
-# CHECK: ? !!str "keep"
-# CHECK: : !!str "\n"
+# CHECK: ? !!str "literal_strip"
+# CHECK: : !!str "Hello\n\n\nworld\non\nmultiple    \n\n\nlines\n\nfoo bar"
+# CHECK: ? !!str "literal_clip"
+# CHECK: : !!str "Hello\n\n\nworld\non\nmultiple    \n\n\nlines\n\nfoo bar\n"
+# CHECK: ? !!str "literal_keep"
+# CHECK: : !!str "Hello\n\n\nworld\non\nmultiple    \n\n\nlines\n\nfoo bar\n\n\n\n"
+# CHECK: ? !!str "folded_strip"
+# CHECK: : !!str "Hello\n\nworld on multiple    \n\nlines\nfoo bar"
+# CHECK: ? !!str "folded_clip"
+# CHECK: : !!str "Hello\n\nworld on multiple    \n\nlines\nfoo bar\n"
+# CHECK: ? !!str "folded_keep"
+# CHECK: : !!str "Hello\n\nworld on multiple    \n\nlines\nfoo bar\n\n\n"
 
-strip: >-
+literal_strip: |-
+    Hello
 
-clip: >
 
-keep: |+
+    world
+    on
+    multiple    
+    
+    
+    lines
+    
+    foo bar
+    
+    
+
+literal_clip: |
+    Hello
+
+
+    world
+    on
+    multiple    
+    
+    
+    lines
+    
+    foo bar
+    
+    
+
+literal_keep: |+
+    Hello
+
+
+    world
+    on
+    multiple    
+    
+    
+    lines
+    
+    foo bar
+    
+    
+
+folded_strip: >-
+    Hello
+
+
+    world
+    on
+    multiple    
+    
+    
+    lines
+    
+    foo bar
+    
+    
+
+folded_clip: >
+    Hello
+
+
+    world
+    on
+    multiple    
+    
+    
+    lines
+    
+    foo bar
+    
+    
+
+folded_keep: >+
+    Hello
+
+
+    world
+    on
+    multiple    
+    
+    
+    lines
+    
+    foo bar
+    
+    


        


More information about the llvm-commits mailing list