[llvm] r321283 - [YAML] Fix UTF-8 handling

Francis Visoiu Mistrih via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 21 09:14:09 PST 2017


Author: thegameg
Date: Thu Dec 21 09:14:09 2017
New Revision: 321283

URL: http://llvm.org/viewvc/llvm-project?rev=321283&view=rev
Log:
[YAML] Fix UTF-8 handling

Previous YAML quoting patches broke UTF-8 printing in YAML: see https://reviews.llvm.org/D41290#961801.

Differential Revision: https://reviews.llvm.org/D41490

Modified:
    llvm/trunk/lib/Support/YAMLTraits.cpp
    llvm/trunk/unittests/Support/YAMLIOTest.cpp

Modified: llvm/trunk/lib/Support/YAMLTraits.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/YAMLTraits.cpp?rev=321283&r1=321282&r2=321283&view=diff
==============================================================================
--- llvm/trunk/lib/Support/YAMLTraits.cpp (original)
+++ llvm/trunk/lib/Support/YAMLTraits.cpp Thu Dec 21 09:14:09 2017
@@ -657,7 +657,12 @@ void Output::scalarString(StringRef &S,
       }
       i = j + 1;
     } else if (MustQuote == QuotingType::Double &&
-               !sys::unicode::isPrintable(S[j])) {
+               !sys::unicode::isPrintable(S[j]) && (S[j] & 0x80) == 0) {
+      // If we're double quoting non-printable characters, we prefer printing
+      // them as "\x" + their hex representation. Note that special casing is
+      // needed for UTF-8, where a byte may be part of a UTF-8 sequence and
+      // appear as non-printable, in which case we want to print the correct
+      // unicode character and not its hex representation.
       output(StringRef(&Base[i], j - i)); // "flush"
       output(StringLiteral("\\x"));
 

Modified: llvm/trunk/unittests/Support/YAMLIOTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/YAMLIOTest.cpp?rev=321283&r1=321282&r2=321283&view=diff
==============================================================================
--- llvm/trunk/unittests/Support/YAMLIOTest.cpp (original)
+++ llvm/trunk/unittests/Support/YAMLIOTest.cpp Thu Dec 21 09:14:09 2017
@@ -2541,3 +2541,31 @@ TEST(YAMLIO, TestEscapedSingleQuoteInsid
   ostr.flush();
   EXPECT_EQ("'abc''fdf'", out);
 }
+
+TEST(YAMLIO, TestEscapedUTF8SingleQuoteInsideDoubleQuote) {
+  std::string Id = "parameter 'параметр' is unused";
+
+  std::string out;
+  llvm::raw_string_ostream ostr(out);
+  Output xout(ostr, nullptr, 0);
+
+  llvm::yaml::EmptyContext Ctx;
+  yamlize(xout, Id, true, Ctx);
+
+  ostr.flush();
+  EXPECT_EQ("\"parameter 'параметр' is unused\"", out);
+}
+
+TEST(YAMLIO, TestEscapedUTF8) {
+  std::string Id = "/*параметр*/";
+
+  std::string out;
+  llvm::raw_string_ostream ostr(out);
+  Output xout(ostr, nullptr, 0);
+
+  llvm::yaml::EmptyContext Ctx;
+  yamlize(xout, Id, true, Ctx);
+
+  ostr.flush();
+  EXPECT_EQ("\"/*параметр*/\"", out);
+}




More information about the llvm-commits mailing list