[cfe-commits] r113253 - in /cfe/trunk: include/clang/Basic/DiagnosticSemaKinds.td lib/CodeGen/CodeGenModule.cpp lib/Sema/SemaChecking.cpp test/CodeGen/illegal-UTF8.m test/Sema/builtins.c
Fariborz Jahanian
fjahanian at apple.com
Tue Sep 7 12:38:13 PDT 2010
Author: fjahanian
Date: Tue Sep 7 14:38:13 2010
New Revision: 113253
URL: http://llvm.org/viewvc/llvm-project?rev=113253&view=rev
Log:
Have Sema check for validity of CGString literal
instead of asserting in IRGen. Fixes radar 8390459.
Modified:
cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/lib/Sema/SemaChecking.cpp
cfe/trunk/test/CodeGen/illegal-UTF8.m
cfe/trunk/test/Sema/builtins.c
Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=113253&r1=113252&r2=113253&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Tue Sep 7 14:38:13 2010
@@ -3091,6 +3091,9 @@
"CFString literal is not a string constant">;
def warn_cfstring_literal_contains_nul_character : Warning<
"CFString literal contains NUL character">;
+def warn_cfstring_truncated : Warning<
+ "input conversion stopped due to an input byte that does not "
+ "belong to the input codeset UTF-8">;
// Statements.
def err_continue_not_in_loop : Error<
Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=113253&r1=113252&r2=113253&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Tue Sep 7 14:38:13 2010
@@ -1498,15 +1498,6 @@
&ToPtr, ToPtr + NumBytes,
strictConversion);
- // Check for conversion failure.
- if (Result != conversionOK) {
- // FIXME: Have Sema::CheckObjCString() validate the UTF-8 string and remove
- // this duplicate code.
- assert(Result == sourceIllegal && "UTF-8 to UTF-16 conversion failed");
- StringLength = NumBytes;
- return Map.GetOrCreateValue(String);
- }
-
// ConvertUTF8toUTF16 returns the length in ToPtr.
StringLength = ToPtr - &ToBuf[0];
Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=113253&r1=113252&r2=113253&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Tue Sep 7 14:38:13 2010
@@ -32,6 +32,8 @@
#include "llvm/Support/raw_ostream.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/ConvertUTF.h"
+
#include <limits>
using namespace clang;
using namespace sema;
@@ -581,9 +583,6 @@
/// CheckObjCString - Checks that the argument to the builtin
/// CFString constructor is correct
-/// FIXME: GCC currently emits the following warning:
-/// "warning: input conversion stopped due to an input byte that does not
-/// belong to the input codeset UTF-8"
/// Note: It might also make sense to do the UTF-16 conversion here (would
/// simplify the backend).
bool Sema::CheckObjCString(Expr *Arg) {
@@ -602,7 +601,21 @@
diag::warn_cfstring_literal_contains_nul_character)
<< Arg->getSourceRange();
}
-
+ if (Literal->containsNonAsciiOrNull()) {
+ llvm::StringRef String = Literal->getString();
+ unsigned NumBytes = String.size();
+ llvm::SmallVector<UTF16, 128> ToBuf(NumBytes);
+ const UTF8 *FromPtr = (UTF8 *)String.data();
+ UTF16 *ToPtr = &ToBuf[0];
+
+ ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes,
+ &ToPtr, ToPtr + NumBytes,
+ strictConversion);
+ // Check for conversion failure.
+ if (Result != conversionOK)
+ Diag(Arg->getLocStart(),
+ diag::warn_cfstring_truncated) << Arg->getSourceRange();
+ }
return false;
}
Modified: cfe/trunk/test/CodeGen/illegal-UTF8.m
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/illegal-UTF8.m?rev=113253&r1=113252&r2=113253&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/illegal-UTF8.m (original)
+++ cfe/trunk/test/CodeGen/illegal-UTF8.m Tue Sep 7 14:38:13 2010
@@ -2,7 +2,5 @@
@class NSString;
-// FIXME: GCC emits the following warning:
-// CodeGen/illegal-UTF8.m:4: warning: input conversion stopped due to an input byte that does not belong to the input codeset UTF-8
-NSString *S = @"\xff\xff___WAIT___";
+NSString *S = @"\xff\xff___WAIT___"; // expected-warning {{input conversion stopped due to an input byte that does not belong to the input codeset UTF-8}}
Modified: cfe/trunk/test/Sema/builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/builtins.c?rev=113253&r1=113252&r2=113253&view=diff
==============================================================================
--- cfe/trunk/test/Sema/builtins.c (original)
+++ cfe/trunk/test/Sema/builtins.c Tue Sep 7 14:38:13 2010
@@ -26,7 +26,7 @@
#define CFSTR __builtin___CFStringMakeConstantString
void test7() {
const void *X;
- X = CFSTR("\242");
+ X = CFSTR("\242"); // expected-warning {{input conversion stopped}}
X = CFSTR("\0"); // expected-warning {{ CFString literal contains NUL character }}
X = CFSTR(242); // expected-error {{ CFString literal is not a string constant }} expected-warning {{incompatible integer to pointer conversion}}
X = CFSTR("foo", "bar"); // expected-error {{too many arguments to function call}}
More information about the cfe-commits
mailing list