[llvm] [Support] Document and test ConvertEBCDIC UTF-8 error handling (PR #185176)

via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 7 03:26:46 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-support

Author: Levi Laine (levish0)

<details>
<summary>Changes</summary>

  `ConverterEBCDIC::convertToEBCDIC()` already distinguishes between malformed                                                                                                                                                                                               
  and truncated UTF-8 input, returning `std::errc::illegal_byte_sequence` and                                                                                                                                                                                                
  `std::errc::invalid_argument` respectively.                                                                                                                                                                                                                                
                                                                                                                                                                                                                                                                             
  Document this behavior in the public header and add unit tests for both cases.    

---
Full diff: https://github.com/llvm/llvm-project/pull/185176.diff


2 Files Affected:

- (modified) llvm/include/llvm/Support/ConvertEBCDIC.h (+4) 
- (modified) llvm/unittests/Support/ConvertEBCDICTest.cpp (+18) 


``````````diff
diff --git a/llvm/include/llvm/Support/ConvertEBCDIC.h b/llvm/include/llvm/Support/ConvertEBCDIC.h
index 1ed88b9a905a7..3caa70b153bc6 100644
--- a/llvm/include/llvm/Support/ConvertEBCDIC.h
+++ b/llvm/include/llvm/Support/ConvertEBCDIC.h
@@ -20,6 +20,10 @@
 
 namespace llvm {
 namespace ConverterEBCDIC {
+/// Converts UTF-8 text to EBCDIC-1047.
+///
+/// Returns std::errc::illegal_byte_sequence for malformed UTF-8 and
+/// std::errc::invalid_argument for truncated UTF-8 input.
 LLVM_ABI std::error_code convertToEBCDIC(StringRef Source,
                                          SmallVectorImpl<char> &Result);
 
diff --git a/llvm/unittests/Support/ConvertEBCDICTest.cpp b/llvm/unittests/Support/ConvertEBCDICTest.cpp
index 557f29c391f9c..a541bafcaab02 100644
--- a/llvm/unittests/Support/ConvertEBCDICTest.cpp
+++ b/llvm/unittests/Support/ConvertEBCDICTest.cpp
@@ -40,6 +40,8 @@ static const char AccentE[] = "\xaa\x4a\xb1\xc1\x63\x67\x9e\xc5\x74\x71\x72"
 
 // String with Cyrillic character ya.
 static const char CyrillicUTF[] = "\xd0\xaf";
+static const char TruncatedUTF[] = "\xc2";
+static const char MalformedUTF[] = {'\xc2', 'A', '\0'};
 
 TEST(ConverterEBCDIC, convertToEBCDIC) {
   // Hello string.
@@ -94,4 +96,20 @@ TEST(ConverterEBCDIC, convertFromEBCDIC) {
   Dst.clear();
 }
 
+TEST(ConverterEBCDIC, convertToEBCDICRejectsTruncatedUTF8) {
+  SmallString<8> Dst;
+
+  std::error_code EC =
+      ConverterEBCDIC::convertToEBCDIC(StringRef(TruncatedUTF, 1), Dst);
+  EXPECT_EQ(EC, std::errc::invalid_argument);
+}
+
+TEST(ConverterEBCDIC, convertToEBCDICRejectsMalformedUTF8Continuation) {
+  SmallString<8> Dst;
+
+  std::error_code EC =
+      ConverterEBCDIC::convertToEBCDIC(StringRef(MalformedUTF, 2), Dst);
+  EXPECT_EQ(EC, std::errc::illegal_byte_sequence);
+}
+
 } // namespace

``````````

</details>


https://github.com/llvm/llvm-project/pull/185176


More information about the llvm-commits mailing list