[cfe-commits] r143415 - in /cfe/trunk: include/clang/Basic/ConvertUTF.h lib/Basic/ConvertUTF.c

Eli Friedman eli.friedman at gmail.com
Mon Oct 31 19:10:55 PDT 2011


Author: efriedma
Date: Mon Oct 31 21:10:54 2011
New Revision: 143415

URL: http://llvm.org/viewvc/llvm-project?rev=143415&view=rev
Log:
Move ConvertUTF8toUTF32 out of #if 0, in preparation for a patch which needs it.


Modified:
    cfe/trunk/include/clang/Basic/ConvertUTF.h
    cfe/trunk/lib/Basic/ConvertUTF.c

Modified: cfe/trunk/include/clang/Basic/ConvertUTF.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/ConvertUTF.h?rev=143415&r1=143414&r2=143415&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/ConvertUTF.h (original)
+++ cfe/trunk/include/clang/Basic/ConvertUTF.h Mon Oct 31 21:10:54 2011
@@ -98,7 +98,7 @@
     bit mask & shift operations.
 ------------------------------------------------------------------------ */
 
-typedef unsigned long   UTF32;  /* at least 32 bits */
+typedef unsigned int    UTF32;  /* at least 32 bits */
 typedef unsigned short  UTF16;  /* at least 16 bits */
 typedef unsigned char   UTF8;   /* typically 8 bits */
 typedef unsigned char   Boolean; /* 0 or 1 */
@@ -131,15 +131,15 @@
   const UTF8** sourceStart, const UTF8* sourceEnd,
   UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
 
+ConversionResult ConvertUTF8toUTF32 (
+  const UTF8** sourceStart, const UTF8* sourceEnd,
+  UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
+
 #ifdef CLANG_NEEDS_THESE_ONE_DAY
 ConversionResult ConvertUTF16toUTF8 (
   const UTF16** sourceStart, const UTF16* sourceEnd,
   UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
 
-ConversionResult ConvertUTF8toUTF32 (
-  const UTF8** sourceStart, const UTF8* sourceEnd,
-  UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
-
 ConversionResult ConvertUTF32toUTF8 (
   const UTF32** sourceStart, const UTF32* sourceEnd,
   UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);

Modified: cfe/trunk/lib/Basic/ConvertUTF.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/ConvertUTF.c?rev=143415&r1=143414&r2=143415&view=diff
==============================================================================
--- cfe/trunk/lib/Basic/ConvertUTF.c (original)
+++ cfe/trunk/lib/Basic/ConvertUTF.c Mon Oct 31 21:10:54 2011
@@ -339,67 +339,6 @@
     return result;
 }
 
-/* --------------------------------------------------------------------- */
-
-ConversionResult ConvertUTF8toUTF32 (
-        const UTF8** sourceStart, const UTF8* sourceEnd, 
-        UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
-    ConversionResult result = conversionOK;
-    const UTF8* source = *sourceStart;
-    UTF32* target = *targetStart;
-    while (source < sourceEnd) {
-        UTF32 ch = 0;
-        unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
-        if (source + extraBytesToRead >= sourceEnd) {
-            result = sourceExhausted; break;
-        }
-        /* Do this check whether lenient or strict */
-        if (!isLegalUTF8(source, extraBytesToRead+1)) {
-            result = sourceIllegal;
-            break;
-        }
-        /*
-         * The cases all fall through. See "Note A" below.
-         */
-        switch (extraBytesToRead) {
-            case 5: ch += *source++; ch <<= 6;
-            case 4: ch += *source++; ch <<= 6;
-            case 3: ch += *source++; ch <<= 6;
-            case 2: ch += *source++; ch <<= 6;
-            case 1: ch += *source++; ch <<= 6;
-            case 0: ch += *source++;
-        }
-        ch -= offsetsFromUTF8[extraBytesToRead];
-
-        if (target >= targetEnd) {
-            source -= (extraBytesToRead+1); /* Back up the source pointer! */
-            result = targetExhausted; break;
-        }
-        if (ch <= UNI_MAX_LEGAL_UTF32) {
-            /*
-             * UTF-16 surrogate values are illegal in UTF-32, and anything
-             * over Plane 17 (> 0x10FFFF) is illegal.
-             */
-            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-                if (flags == strictConversion) {
-                    source -= (extraBytesToRead+1); /* return to the illegal value itself */
-                    result = sourceIllegal;
-                    break;
-                } else {
-                    *target++ = UNI_REPLACEMENT_CHAR;
-                }
-            } else {
-                *target++ = ch;
-            }
-        } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
-            result = sourceIllegal;
-            *target++ = UNI_REPLACEMENT_CHAR;
-        }
-    }
-    *sourceStart = source;
-    *targetStart = target;
-    return result;
-}
 #endif
 
 /* --------------------------------------------------------------------- */
@@ -527,6 +466,68 @@
     return result;
 }
 
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF8toUTF32 (
+        const UTF8** sourceStart, const UTF8* sourceEnd, 
+        UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF8* source = *sourceStart;
+    UTF32* target = *targetStart;
+    while (source < sourceEnd) {
+        UTF32 ch = 0;
+        unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+        if (source + extraBytesToRead >= sourceEnd) {
+            result = sourceExhausted; break;
+        }
+        /* Do this check whether lenient or strict */
+        if (!isLegalUTF8(source, extraBytesToRead+1)) {
+            result = sourceIllegal;
+            break;
+        }
+        /*
+         * The cases all fall through. See "Note A" below.
+         */
+        switch (extraBytesToRead) {
+            case 5: ch += *source++; ch <<= 6;
+            case 4: ch += *source++; ch <<= 6;
+            case 3: ch += *source++; ch <<= 6;
+            case 2: ch += *source++; ch <<= 6;
+            case 1: ch += *source++; ch <<= 6;
+            case 0: ch += *source++;
+        }
+        ch -= offsetsFromUTF8[extraBytesToRead];
+
+        if (target >= targetEnd) {
+            source -= (extraBytesToRead+1); /* Back up the source pointer! */
+            result = targetExhausted; break;
+        }
+        if (ch <= UNI_MAX_LEGAL_UTF32) {
+            /*
+             * UTF-16 surrogate values are illegal in UTF-32, and anything
+             * over Plane 17 (> 0x10FFFF) is illegal.
+             */
+            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+                if (flags == strictConversion) {
+                    source -= (extraBytesToRead+1); /* return to the illegal value itself */
+                    result = sourceIllegal;
+                    break;
+                } else {
+                    *target++ = UNI_REPLACEMENT_CHAR;
+                }
+            } else {
+                *target++ = ch;
+            }
+        } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
+            result = sourceIllegal;
+            *target++ = UNI_REPLACEMENT_CHAR;
+        }
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
 /* ---------------------------------------------------------------------
 
     Note A.





More information about the cfe-commits mailing list