[clang] e80748f - [clang-diff] Fix assertion error when dealing with wide strings

Johannes Altmanninger via cfe-commits cfe-commits at lists.llvm.org
Tue Jun 7 11:25:40 PDT 2022


Author: Kaining Zhong
Date: 2022-06-07T20:24:34+02:00
New Revision: e80748ff8840a10bd7c7336eb5e98664480ba1ba

URL: https://github.com/llvm/llvm-project/commit/e80748ff8840a10bd7c7336eb5e98664480ba1ba
DIFF: https://github.com/llvm/llvm-project/commit/e80748ff8840a10bd7c7336eb5e98664480ba1ba.diff

LOG: [clang-diff] Fix assertion error when dealing with wide strings

Directly using StringLiteral::getString for wide string is not
currently supported; therefore in ASTDiff, getStmtValue will fail when
asserting that the StringLiteral has a width of 1. This patch also
covers cases for UTF16 and UTF32 encoding, along with corresponding
test cases.

Fixes https://github.com/llvm/llvm-project/issues/55771.

Reviewed By: johannes

Differential Revision: https://reviews.llvm.org/D126651

Added: 
    

Modified: 
    clang/lib/Tooling/ASTDiff/ASTDiff.cpp
    clang/test/Tooling/clang-diff-ast.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Tooling/ASTDiff/ASTDiff.cpp b/clang/lib/Tooling/ASTDiff/ASTDiff.cpp
index 0821863adcc6e..786def58076a3 100644
--- a/clang/lib/Tooling/ASTDiff/ASTDiff.cpp
+++ b/clang/lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/ConvertUTF.h"
 
 #include <limits>
 #include <memory>
@@ -463,8 +464,29 @@ std::string SyntaxTree::Impl::getStmtValue(const Stmt *S) const {
   }
   if (auto *D = dyn_cast<DeclRefExpr>(S))
     return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S));
-  if (auto *String = dyn_cast<StringLiteral>(S))
+  if (auto *String = dyn_cast<StringLiteral>(S)) {
+    if (String->isWide() || String->isUTF16() || String->isUTF32()) {
+      std::string UTF8Str;
+      unsigned int NumChars = String->getLength();
+      const char *Bytes = String->getBytes().data();
+      if (String->isWide()) {
+        const auto *Chars = reinterpret_cast<const wchar_t *>(Bytes);
+        if (!convertWideToUTF8({Chars, NumChars}, UTF8Str))
+          return "";
+      } else if (String->isUTF16()) {
+        const auto *Chars = reinterpret_cast<const UTF16 *>(Bytes);
+        if (!convertUTF16ToUTF8String({Chars, NumChars}, UTF8Str))
+          return "";
+      } else {
+        assert(String->isUTF32() && "Unsupported string encoding.");
+        const auto *Chars = reinterpret_cast<const UTF32 *>(Bytes);
+        if (!convertUTF32ToUTF8String({Chars, NumChars}, UTF8Str))
+          return "";
+      }
+      return UTF8Str;
+    }
     return std::string(String->getString());
+  }
   if (auto *B = dyn_cast<CXXBoolLiteralExpr>(S))
     return B->getValue() ? "true" : "false";
   return "";

diff  --git a/clang/test/Tooling/clang-
diff -ast.cpp b/clang/test/Tooling/clang-
diff -ast.cpp
index a8efda50a4052..e67128a098ef1 100644
--- a/clang/test/Tooling/clang-
diff -ast.cpp
+++ b/clang/test/Tooling/clang-
diff -ast.cpp
@@ -47,6 +47,12 @@ class X : Base {
     if (i == 0)
       // CHECK: StringLiteral: foo(
       return "foo";
+    // CHECK: StringLiteral: wide(
+    (void)L"wide";
+    // CHECK: StringLiteral: utf-16(
+    (void)u"utf-16";
+    // CHECK: StringLiteral: utf-32(
+    (void)U"utf-32";
     // CHECK-NOT: ImplicitCastExpr
     return 0;
   }


        


More information about the cfe-commits mailing list