[cfe-commits] r64924 - in /cfe/trunk: include/clang/AST/Expr.h lib/Sema/Sema.h lib/Sema/SemaChecking.cpp test/SemaObjC/exprs.m
Chris Lattner
sabre at nondot.org
Wed Feb 18 09:49:49 PST 2009
Author: lattner
Date: Wed Feb 18 11:49:48 2009
New Revision: 64924
URL: http://llvm.org/viewvc/llvm-project?rev=64924&view=rev
Log:
Start improving diagnostics that relate to subcharacters of string literals.
First step, handle diagnostics in StringLiteral's that are due to token pasting.
For example, we now handle:
id str2 = @"foo"
"bar"
@"baz"
" b\0larg"; // expected-warning {{literal contains NUL character}}
Correctly:
test/SemaObjC/exprs.m:17:15: warning: CFString literal contains NUL character
" b\0larg"; // expected-warning {{literal contains NUL character}}
~~~^~~~~~~
There are several other related issues still to be done.
Modified:
cfe/trunk/include/clang/AST/Expr.h
cfe/trunk/lib/Sema/Sema.h
cfe/trunk/lib/Sema/SemaChecking.cpp
cfe/trunk/test/SemaObjC/exprs.m
Modified: cfe/trunk/include/clang/AST/Expr.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/Expr.h?rev=64924&r1=64923&r2=64924&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/Expr.h (original)
+++ cfe/trunk/include/clang/AST/Expr.h Wed Feb 18 11:49:48 2009
@@ -520,7 +520,6 @@
typedef const SourceLocation *tokloc_iterator;
tokloc_iterator tokloc_begin() const { return TokLocs; }
tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; }
-
virtual SourceRange getSourceRange() const {
return SourceRange(TokLocs[0], TokLocs[NumConcatenated-1]);
Modified: cfe/trunk/lib/Sema/Sema.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/Sema.h?rev=64924&r1=64923&r2=64924&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/Sema.h (original)
+++ cfe/trunk/lib/Sema/Sema.h Wed Feb 18 11:49:48 2009
@@ -1989,6 +1989,8 @@
private:
Action::OwningExprResult CheckFunctionCall(FunctionDecl *FDecl,
CallExpr *TheCall);
+ SourceLocation getLocationOfStringLiteralByte(const StringLiteral *SL,
+ unsigned ByteNo) const;
bool CheckObjCString(Expr *Arg);
bool SemaBuiltinVAStart(CallExpr *TheCall);
bool SemaBuiltinUnorderedCompare(CallExpr *TheCall);
Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=64924&r1=64923&r2=64924&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Wed Feb 18 11:49:48 2009
@@ -20,6 +20,71 @@
#include "clang/Lex/Preprocessor.h"
using namespace clang;
+/// getLocationOfStringLiteralByte - Return a source location that points to the
+/// specified byte of the specified string literal.
+///
+/// Strings are amazingly complex. They can be formed from multiple tokens and
+/// can have escape sequences in them in addition to the usual trigraph and
+/// escaped newline business. This routine handles this complexity.
+///
+SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
+ unsigned ByteNo) const {
+ assert(!SL->isWide() && "This doesn't work for wide strings yet");
+
+ // Loop over all of the tokens in this string until we find the one that
+ // contains the byte we're looking for.
+ unsigned TokNo = 0;
+ while (1) {
+ assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
+ SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
+
+ // Get the spelling of the string so that we can get the data that makes up
+ // the string literal, not the identifier for the macro it is potentially
+ // expanded through.
+ SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
+
+ // Re-lex the token to get its length and original spelling.
+ std::pair<FileID, unsigned> LocInfo =
+ SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
+ std::pair<const char *,const char *> Buffer =
+ SourceMgr.getBufferData(LocInfo.first);
+ const char *StrData = Buffer.first+LocInfo.second;
+
+ // Create a langops struct and enable trigraphs. This is sufficient for
+ // relexing tokens.
+ LangOptions LangOpts;
+ LangOpts.Trigraphs = true;
+
+ // Create a lexer starting at the beginning of this token.
+ Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData,
+ Buffer.second);
+ Token TheTok;
+ TheLexer.LexFromRawLexer(TheTok);
+
+ // The length of the string is the token length minus the two quotes.
+ unsigned TokNumBytes = TheTok.getLength()-2;
+
+ // If we found the token we're looking for, return the location.
+ // FIXME: This should consider character escapes!
+ if (ByteNo < TokNumBytes ||
+ (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
+ // If the original token came from a macro expansion, just return the
+ // start of the token. We don't want to magically jump to the spelling
+ // for a diagnostic. We do the above business in case some tokens come
+ // from a macro expansion but others don't.
+ if (!StrTokLoc.isFileID()) return StrTokLoc;
+
+ // We advance +1 to step over the '"'.
+ return PP.AdvanceToTokenCharacter(StrTokLoc, ByteNo+1);
+ }
+
+ // Move to the next string token.
+ ++TokNo;
+ ByteNo -= TokNumBytes;
+ }
+}
+
+
/// CheckFunctionCall - Check a direct function call for various correctness
/// and safety properties not strictly enforced by the C type system.
Action::OwningExprResult
@@ -108,14 +173,14 @@
for (unsigned i = 0; i < Length; ++i) {
if (!isascii(Data[i])) {
- Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
+ Diag(getLocationOfStringLiteralByte(Literal, i),
diag::warn_cfstring_literal_contains_non_ascii_character)
<< Arg->getSourceRange();
break;
}
if (!Data[i]) {
- Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
+ Diag(getLocationOfStringLiteralByte(Literal, i),
diag::warn_cfstring_literal_contains_nul_character)
<< Arg->getSourceRange();
break;
@@ -565,7 +630,7 @@
if (Str[StrIdx] == '\0') {
// The string returned by getStrData() is not null-terminated,
// so the presence of a null character is likely an error.
- Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1),
+ Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
diag::warn_printf_format_string_contains_null_char)
<< OrigFormatExpr->getSourceRange();
return;
@@ -587,8 +652,7 @@
++numConversions;
if (!HasVAListArg && numConversions > numDataArgs) {
- SourceLocation Loc = FExpr->getLocStart();
- Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1);
+ SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
if (Str[StrIdx-1] == '.')
Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
@@ -607,8 +671,7 @@
if (BT->getKind() == BuiltinType::Int)
break;
- SourceLocation Loc =
- PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1);
+ SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
if (Str[StrIdx-1] == '.')
Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
@@ -655,8 +718,8 @@
case 'n': {
++numConversions;
CurrentState = state_OrdChr;
- SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
- LastConversionIdx+1);
+ SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
+ LastConversionIdx);
Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
break;
@@ -669,8 +732,8 @@
CurrentState = state_OrdChr;
else {
// Issue a warning: invalid format conversion.
- SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
- LastConversionIdx+1);
+ SourceLocation Loc =
+ getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Diag(Loc, diag::warn_printf_invalid_conversion)
<< std::string(Str+LastConversionIdx,
@@ -690,8 +753,8 @@
CurrentState = state_OrdChr;
else {
// Issue a warning: invalid format conversion.
- SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
- LastConversionIdx+1);
+ SourceLocation Loc =
+ getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Diag(Loc, diag::warn_printf_invalid_conversion)
<< std::string(Str+LastConversionIdx, Str+StrIdx)
@@ -713,8 +776,8 @@
if (CurrentState == state_Conversion) {
// Issue a warning: invalid format conversion.
- SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
- LastConversionIdx+1);
+ SourceLocation Loc =
+ getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Diag(Loc, diag::warn_printf_invalid_conversion)
<< std::string(Str+LastConversionIdx,
@@ -727,8 +790,8 @@
// CHECK: Does the number of format conversions exceed the number
// of data arguments?
if (numConversions > numDataArgs) {
- SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
- LastConversionIdx);
+ SourceLocation Loc =
+ getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Diag(Loc, diag::warn_printf_insufficient_data_args)
<< OrigFormatExpr->getSourceRange();
Modified: cfe/trunk/test/SemaObjC/exprs.m
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaObjC/exprs.m?rev=64924&r1=64923&r2=64924&view=diff
==============================================================================
--- cfe/trunk/test/SemaObjC/exprs.m (original)
+++ cfe/trunk/test/SemaObjC/exprs.m Wed Feb 18 11:49:48 2009
@@ -1,7 +1,19 @@
// RUN: clang %s -fsyntax-only -verify
// rdar://6597252
-Class foo(Class X) {
+Class test1(Class X) {
return 1 ? X : X;
}
+
+// rdar://6079877
+void test2() {
+ id str = @"foo"
+ "bar\0" // expected-warning {{literal contains NUL character}}
+ @"baz" " blarg";
+ id str2 = @"foo"
+ "bar"
+ @"baz"
+ " b\0larg"; // expected-warning {{literal contains NUL character}}
+
+}
\ No newline at end of file
More information about the cfe-commits
mailing list