[llvm] Create a CharSetConverter class with both iconv and icu support. (PR #138893)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Fri May 9 13:34:20 PDT 2025
================
@@ -0,0 +1,351 @@
+//===-- CharSet.cpp - Characters sets conversion class ------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file provides utility classes to convert between different character
+/// set encodings.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CharSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ConvertEBCDIC.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <limits>
+#include <system_error>
+
+#if HAVE_ICU
+#include <unicode/ucnv.h>
+#elif HAVE_ICONV
+#include <iconv.h>
+#endif
+
+using namespace llvm;
+
+// Normalize the charset name with the charset alias matching algorithm proposed
+// in https://www.unicode.org/reports/tr22/tr22-8.html#Charset_Alias_Matching.
+static void normalizeCharSetName(StringRef CSName,
+ SmallVectorImpl<char> &Normalized) {
+ bool PrevDigit = false;
+ for (auto Ch : CSName) {
+ if (isAlnum(Ch)) {
+ Ch = toLower(Ch);
+ if (Ch != '0' || PrevDigit) {
+ PrevDigit = isDigit(Ch);
+ Normalized.push_back(Ch);
+ }
+ }
+ }
+}
+
+// Maps the charset name to enum constant if possible.
----------------
s-barannikov wrote:
```suggestion
// Maps the encoding name to enum constant if possible.
```
https://github.com/llvm/llvm-project/pull/138893
More information about the llvm-commits
mailing list