[cfe-commits] r113253 - in /cfe/trunk: include/clang/Basic/DiagnosticSemaKinds.td lib/CodeGen/CodeGenModule.cpp lib/Sema/SemaChecking.cpp test/CodeGen/illegal-UTF8.m test/Sema/builtins.c

Fariborz Jahanian fjahanian at apple.com
Tue Sep 7 12:38:13 PDT 2010


Author: fjahanian
Date: Tue Sep  7 14:38:13 2010
New Revision: 113253

URL: http://llvm.org/viewvc/llvm-project?rev=113253&view=rev
Log:
Have Sema check for validity of CGString literal
instead of asserting in IRGen. Fixes radar 8390459.

Modified:
    cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
    cfe/trunk/lib/CodeGen/CodeGenModule.cpp
    cfe/trunk/lib/Sema/SemaChecking.cpp
    cfe/trunk/test/CodeGen/illegal-UTF8.m
    cfe/trunk/test/Sema/builtins.c

Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=113253&r1=113252&r2=113253&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Tue Sep  7 14:38:13 2010
@@ -3091,6 +3091,9 @@
   "CFString literal is not a string constant">;
 def warn_cfstring_literal_contains_nul_character : Warning<
   "CFString literal contains NUL character">;
+def warn_cfstring_truncated : Warning<
+  "input conversion stopped due to an input byte that does not "
+  "belong to the input codeset UTF-8">;
 
 // Statements.
 def err_continue_not_in_loop : Error<

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=113253&r1=113252&r2=113253&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Tue Sep  7 14:38:13 2010
@@ -1498,15 +1498,6 @@
                                                &ToPtr, ToPtr + NumBytes,
                                                strictConversion);
 
-  // Check for conversion failure.
-  if (Result != conversionOK) {
-    // FIXME: Have Sema::CheckObjCString() validate the UTF-8 string and remove
-    // this duplicate code.
-    assert(Result == sourceIllegal && "UTF-8 to UTF-16 conversion failed");
-    StringLength = NumBytes;
-    return Map.GetOrCreateValue(String);
-  }
-
   // ConvertUTF8toUTF16 returns the length in ToPtr.
   StringLength = ToPtr - &ToBuf[0];
 

Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=113253&r1=113252&r2=113253&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Tue Sep  7 14:38:13 2010
@@ -32,6 +32,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/ConvertUTF.h"
+
 #include <limits>
 using namespace clang;
 using namespace sema;
@@ -581,9 +583,6 @@
 
 /// CheckObjCString - Checks that the argument to the builtin
 /// CFString constructor is correct
-/// FIXME: GCC currently emits the following warning:
-/// "warning: input conversion stopped due to an input byte that does not
-///           belong to the input codeset UTF-8"
 /// Note: It might also make sense to do the UTF-16 conversion here (would
 /// simplify the backend).
 bool Sema::CheckObjCString(Expr *Arg) {
@@ -602,7 +601,21 @@
          diag::warn_cfstring_literal_contains_nul_character)
       << Arg->getSourceRange();
   }
-
+  if (Literal->containsNonAsciiOrNull()) {
+    llvm::StringRef String = Literal->getString();
+    unsigned NumBytes = String.size();
+    llvm::SmallVector<UTF16, 128> ToBuf(NumBytes);
+    const UTF8 *FromPtr = (UTF8 *)String.data();
+    UTF16 *ToPtr = &ToBuf[0];
+    
+    ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes,
+                                                 &ToPtr, ToPtr + NumBytes,
+                                                 strictConversion);
+    // Check for conversion failure.
+    if (Result != conversionOK)
+      Diag(Arg->getLocStart(),
+           diag::warn_cfstring_truncated) << Arg->getSourceRange();
+  }
   return false;
 }
 

Modified: cfe/trunk/test/CodeGen/illegal-UTF8.m
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/illegal-UTF8.m?rev=113253&r1=113252&r2=113253&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/illegal-UTF8.m (original)
+++ cfe/trunk/test/CodeGen/illegal-UTF8.m Tue Sep  7 14:38:13 2010
@@ -2,7 +2,5 @@
 
 @class NSString;
 
-// FIXME: GCC emits the following warning:
-// CodeGen/illegal-UTF8.m:4: warning: input conversion stopped due to an input byte that does not belong to the input codeset UTF-8
 
-NSString *S = @"\xff\xff___WAIT___";
+NSString *S = @"\xff\xff___WAIT___"; // expected-warning {{input conversion stopped due to an input byte that does not belong to the input codeset UTF-8}}

Modified: cfe/trunk/test/Sema/builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/builtins.c?rev=113253&r1=113252&r2=113253&view=diff
==============================================================================
--- cfe/trunk/test/Sema/builtins.c (original)
+++ cfe/trunk/test/Sema/builtins.c Tue Sep  7 14:38:13 2010
@@ -26,7 +26,7 @@
 #define CFSTR __builtin___CFStringMakeConstantString
 void test7() {
   const void *X;
-  X = CFSTR("\242");
+  X = CFSTR("\242"); // expected-warning {{input conversion stopped}}
   X = CFSTR("\0"); // expected-warning {{ CFString literal contains NUL character }}
   X = CFSTR(242); // expected-error {{ CFString literal is not a string constant }} expected-warning {{incompatible integer to pointer conversion}}
   X = CFSTR("foo", "bar"); // expected-error {{too many arguments to function call}}





More information about the cfe-commits mailing list