[llvm] [Support] Document and test ConvertEBCDIC UTF-8 error handling (PR #185176)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 7 03:26:46 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-support
Author: Levi Laine (levish0)
<details>
<summary>Changes</summary>
`ConverterEBCDIC::convertToEBCDIC()` already distinguishes between malformed
and truncated UTF-8 input, returning `std::errc::illegal_byte_sequence` and
`std::errc::invalid_argument` respectively.
Document this behavior in the public header and add unit tests for both cases.
---
Full diff: https://github.com/llvm/llvm-project/pull/185176.diff
2 Files Affected:
- (modified) llvm/include/llvm/Support/ConvertEBCDIC.h (+4)
- (modified) llvm/unittests/Support/ConvertEBCDICTest.cpp (+18)
``````````diff
diff --git a/llvm/include/llvm/Support/ConvertEBCDIC.h b/llvm/include/llvm/Support/ConvertEBCDIC.h
index 1ed88b9a905a7..3caa70b153bc6 100644
--- a/llvm/include/llvm/Support/ConvertEBCDIC.h
+++ b/llvm/include/llvm/Support/ConvertEBCDIC.h
@@ -20,6 +20,10 @@
namespace llvm {
namespace ConverterEBCDIC {
+/// Converts UTF-8 text to EBCDIC-1047.
+///
+/// Returns std::errc::illegal_byte_sequence for malformed UTF-8 and
+/// std::errc::invalid_argument for truncated UTF-8 input.
LLVM_ABI std::error_code convertToEBCDIC(StringRef Source,
SmallVectorImpl<char> &Result);
diff --git a/llvm/unittests/Support/ConvertEBCDICTest.cpp b/llvm/unittests/Support/ConvertEBCDICTest.cpp
index 557f29c391f9c..a541bafcaab02 100644
--- a/llvm/unittests/Support/ConvertEBCDICTest.cpp
+++ b/llvm/unittests/Support/ConvertEBCDICTest.cpp
@@ -40,6 +40,8 @@ static const char AccentE[] = "\xaa\x4a\xb1\xc1\x63\x67\x9e\xc5\x74\x71\x72"
// String with Cyrillic character ya.
static const char CyrillicUTF[] = "\xd0\xaf";
+static const char TruncatedUTF[] = "\xc2";
+static const char MalformedUTF[] = {'\xc2', 'A', '\0'};
TEST(ConverterEBCDIC, convertToEBCDIC) {
// Hello string.
@@ -94,4 +96,20 @@ TEST(ConverterEBCDIC, convertFromEBCDIC) {
Dst.clear();
}
+TEST(ConverterEBCDIC, convertToEBCDICRejectsTruncatedUTF8) {
+ SmallString<8> Dst;
+
+ std::error_code EC =
+ ConverterEBCDIC::convertToEBCDIC(StringRef(TruncatedUTF, 1), Dst);
+ EXPECT_EQ(EC, std::errc::invalid_argument);
+}
+
+TEST(ConverterEBCDIC, convertToEBCDICRejectsMalformedUTF8Continuation) {
+ SmallString<8> Dst;
+
+ std::error_code EC =
+ ConverterEBCDIC::convertToEBCDIC(StringRef(MalformedUTF, 2), Dst);
+ EXPECT_EQ(EC, std::errc::illegal_byte_sequence);
+}
+
} // namespace
``````````
</details>
https://github.com/llvm/llvm-project/pull/185176
More information about the llvm-commits
mailing list