[cfe-commits] r94837 - in /cfe/trunk: include/clang/Basic/DiagnosticSemaKinds.td lib/Sema/Sema.h lib/Sema/SemaChecking.cpp test/Sema/format-strings.c test/SemaObjC/format-strings-objc.m

Ted Kremenek kremenek at apple.com
Fri Jan 29 12:55:36 PST 2010


Author: kremenek
Date: Fri Jan 29 14:55:36 2010
New Revision: 94837

URL: http://llvm.org/viewvc/llvm-project?rev=94837&view=rev
Log:
Switch Sema over to using the new implementation of format string
checking.  It passes all existing tests, and the diagnostics have been
refined to provide better range information (we now highlight
individual format specifiers) and more precise wording in the
diagnostics.

Modified:
    cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
    cfe/trunk/lib/Sema/Sema.h
    cfe/trunk/lib/Sema/SemaChecking.cpp
    cfe/trunk/test/Sema/format-strings.c
    cfe/trunk/test/SemaObjC/format-strings-objc.m

Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=94837&r1=94836&r2=94837&view=diff

==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Fri Jan 29 14:55:36 2010
@@ -2442,11 +2442,11 @@
 def warn_printf_insufficient_data_args : Warning<
   "more '%%' conversions than data arguments">, InGroup<Format>;
 def warn_printf_too_many_data_args : Warning<
-  "more data arguments than '%%' conversions">, InGroup<FormatExtraArgs>;
+  "more data arguments than format specifiers">, InGroup<FormatExtraArgs>;
 def warn_printf_invalid_conversion : Warning<
-  "invalid conversion '%0'">, InGroup<Format>;
+  "invalid conversion specifier '%0'">, InGroup<Format>;
 def warn_printf_incomplete_specifier : Warning<
-  "incomplete format specifier '%0'">, InGroup<Format>;
+  "incomplete format specifier">, InGroup<Format>;
 def warn_printf_missing_format_string : Warning<
   "format string missing">, InGroup<Format>;
 def warn_null_arg : Warning<

Modified: cfe/trunk/lib/Sema/Sema.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/Sema.h?rev=94837&r1=94836&r2=94837&view=diff

==============================================================================
--- cfe/trunk/lib/Sema/Sema.h (original)
+++ cfe/trunk/lib/Sema/Sema.h Fri Jan 29 14:55:36 2010
@@ -4052,13 +4052,6 @@
   bool SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall,
                               bool HasVAListArg, unsigned format_idx,
                               unsigned firstDataArg);
-  // FIXME: This function is placeholder for transitioning the printf
-  //  format string checking to a new codepath.  It will eventually
-  //  replace CheckPrintfString().
-  void AlternateCheckPrintfString(const StringLiteral *FExpr,
-                                  const Expr *OrigFormatExpr,
-                                  const CallExpr *TheCall, bool HasVAListArg,
-                                  unsigned format_idx, unsigned firstDataArg);  
   void CheckPrintfString(const StringLiteral *FExpr, const Expr *OrigFormatExpr,
                          const CallExpr *TheCall, bool HasVAListArg,
                          unsigned format_idx, unsigned firstDataArg);

Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=94837&r1=94836&r2=94837&view=diff

==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Fri Jan 29 14:55:36 2010
@@ -1033,254 +1033,6 @@
            << OrigFormatExpr->getSourceRange();
 }
 
-void Sema::CheckPrintfString(const StringLiteral *FExpr,
-                             const Expr *OrigFormatExpr,
-                             const CallExpr *TheCall, bool HasVAListArg,
-                             unsigned format_idx, unsigned firstDataArg) {
-    
-  static bool UseAlternatePrintfChecking = false;
-  if (UseAlternatePrintfChecking) {
-    AlternateCheckPrintfString(FExpr, OrigFormatExpr, TheCall,
-                               HasVAListArg, format_idx, firstDataArg);
-    return;
-  }    
-  
-
-  const ObjCStringLiteral *ObjCFExpr =
-    dyn_cast<ObjCStringLiteral>(OrigFormatExpr);
-
-  // CHECK: is the format string a wide literal?
-  if (FExpr->isWide()) {
-    Diag(FExpr->getLocStart(),
-         diag::warn_printf_format_string_is_wide_literal)
-      << OrigFormatExpr->getSourceRange();
-    return;
-  }
-
-  // Str - The format string.  NOTE: this is NOT null-terminated!
-  const char *Str = FExpr->getStrData();
-
-  // CHECK: empty format string?
-  unsigned StrLen = FExpr->getByteLength();
-
-  if (StrLen == 0) {
-    Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string)
-      << OrigFormatExpr->getSourceRange();
-    return;
-  }
-  
-  // We process the format string using a binary state machine.  The
-  // current state is stored in CurrentState.
-  enum {
-    state_OrdChr,
-    state_Conversion
-  } CurrentState = state_OrdChr;
-
-  // numConversions - The number of conversions seen so far.  This is
-  //  incremented as we traverse the format string.
-  unsigned numConversions = 0;
-
-  // numDataArgs - The number of data arguments after the format
-  //  string.  This can only be determined for non vprintf-like
-  //  functions.  For those functions, this value is 1 (the sole
-  //  va_arg argument).
-  unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg;
-
-  // Inspect the format string.
-  unsigned StrIdx = 0;
-
-  // LastConversionIdx - Index within the format string where we last saw
-  //  a '%' character that starts a new format conversion.
-  unsigned LastConversionIdx = 0;
-
-  for (; StrIdx < StrLen; ++StrIdx) {
-
-    // Is the number of detected conversion conversions greater than
-    // the number of matching data arguments?  If so, stop.
-    if (!HasVAListArg && numConversions > numDataArgs) break;
-
-    // Handle "\0"
-    if (Str[StrIdx] == '\0') {
-      // The string returned by getStrData() is not null-terminated,
-      // so the presence of a null character is likely an error.
-      Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
-           diag::warn_printf_format_string_contains_null_char)
-        <<  OrigFormatExpr->getSourceRange();
-      return;
-    }
-
-    // Ordinary characters (not processing a format conversion).
-    if (CurrentState == state_OrdChr) {
-      if (Str[StrIdx] == '%') {
-        CurrentState = state_Conversion;
-        LastConversionIdx = StrIdx;
-      }
-      continue;
-    }
-
-    // Seen '%'.  Now processing a format conversion.
-    switch (Str[StrIdx]) {
-    // Handle dynamic precision or width specifier.
-    case '*': {
-      ++numConversions;
-
-      if (!HasVAListArg) {
-        if (numConversions > numDataArgs) {
-          SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
-
-          if (Str[StrIdx-1] == '.')
-            Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
-              << OrigFormatExpr->getSourceRange();
-          else
-            Diag(Loc, diag::warn_printf_asterisk_width_missing_arg)
-              << OrigFormatExpr->getSourceRange();
-
-          // Don't do any more checking.  We'll just emit spurious errors.
-          return;
-        }
-
-        // Perform type checking on width/precision specifier.
-        const Expr *E = TheCall->getArg(format_idx+numConversions);
-        if (const BuiltinType *BT = E->getType()->getAs<BuiltinType>())
-          if (BT->getKind() == BuiltinType::Int)
-            break;
-
-        SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
-
-        if (Str[StrIdx-1] == '.')
-          Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
-          << E->getType() << E->getSourceRange();
-        else
-          Diag(Loc, diag::warn_printf_asterisk_width_wrong_type)
-          << E->getType() << E->getSourceRange();
-
-        break;
-      }
-    }
-
-    // Characters which can terminate a format conversion
-    // (e.g. "%d").  Characters that specify length modifiers or
-    // other flags are handled by the default case below.
-    //
-    // FIXME: additional checks will go into the following cases.
-    case 'i':
-    case 'd':
-    case 'o':
-    case 'u':
-    case 'x':
-    case 'X':
-    case 'e':
-    case 'E':
-    case 'f':
-    case 'F':
-    case 'g':
-    case 'G':
-    case 'a':
-    case 'A':
-    case 'c':
-    case 's':
-    case 'p':
-      ++numConversions;
-      CurrentState = state_OrdChr;
-      break;
-
-    case 'm':
-      // FIXME: Warn in situations where this isn't supported!
-      CurrentState = state_OrdChr;
-      break;
-
-    // CHECK: Are we using "%n"?  Issue a warning.
-    case 'n': {
-      ++numConversions;
-      CurrentState = state_OrdChr;
-      SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
-                                                          LastConversionIdx);
-
-      Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
-      break;
-    }
-
-    // Handle "%@"
-    case '@':
-      // %@ is allowed in ObjC format strings only.
-      if (ObjCFExpr != NULL)
-        CurrentState = state_OrdChr;
-      else {
-        // Issue a warning: invalid format conversion.
-        SourceLocation Loc =
-          getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
-
-        Diag(Loc, diag::warn_printf_invalid_conversion)
-          <<  std::string(Str+LastConversionIdx,
-                          Str+std::min(LastConversionIdx+2, StrLen))
-          << OrigFormatExpr->getSourceRange();
-      }
-      ++numConversions;
-      break;
-
-    // Handle "%%"
-    case '%':
-      // Sanity check: Was the first "%" character the previous one?
-      // If not, we will assume that we have a malformed format
-      // conversion, and that the current "%" character is the start
-      // of a new conversion.
-      if (StrIdx - LastConversionIdx == 1)
-        CurrentState = state_OrdChr;
-      else {
-        // Issue a warning: invalid format conversion.
-        SourceLocation Loc =
-          getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
-
-        Diag(Loc, diag::warn_printf_invalid_conversion)
-          << std::string(Str+LastConversionIdx, Str+StrIdx)
-          << OrigFormatExpr->getSourceRange();
-
-        // This conversion is broken.  Advance to the next format
-        // conversion.
-        LastConversionIdx = StrIdx;
-        ++numConversions;
-      }
-      break;
-
-    default:
-      // This case catches all other characters: flags, widths, etc.
-      // We should eventually process those as well.
-      break;
-    }
-  }
-
-  if (CurrentState == state_Conversion) {
-    // Issue a warning: invalid format conversion.
-    SourceLocation Loc =
-      getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
-
-    Diag(Loc, diag::warn_printf_invalid_conversion)
-      << std::string(Str+LastConversionIdx,
-                     Str+std::min(LastConversionIdx+2, StrLen))
-      << OrigFormatExpr->getSourceRange();
-    return;
-  }
-
-  if (!HasVAListArg) {
-    // CHECK: Does the number of format conversions exceed the number
-    //        of data arguments?
-    if (numConversions > numDataArgs) {
-      SourceLocation Loc =
-        getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
-
-      Diag(Loc, diag::warn_printf_insufficient_data_args)
-        << OrigFormatExpr->getSourceRange();
-    }
-    // CHECK: Does the number of data arguments exceed the number of
-    //        format conversions in the format string?
-    else if (numConversions < numDataArgs)
-      Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
-           diag::warn_printf_too_many_data_args)
-        << OrigFormatExpr->getSourceRange();
-  }
-}
-
-
 namespace {
 class CheckPrintfHandler : public FormatStringHandler {
   Sema &S;
@@ -1320,20 +1072,29 @@
                              const char *startSpecifier,
                              unsigned specifierLen);
 private:
-  SourceRange getFormatRange();
+  SourceRange getFormatStringRange();
+  SourceRange getFormatSpecifierRange(const char *startSpecifier,
+									  unsigned specifierLen);
   SourceLocation getLocationOfByte(const char *x);
   
   bool HandleAmount(const analyze_printf::OptionalAmount &Amt,
-                    unsigned MissingArgDiag, unsigned BadTypeDiag);
+                    unsigned MissingArgDiag, unsigned BadTypeDiag,
+					const char *startSpecifier, unsigned specifierLen);
   
   const Expr *getDataArg(unsigned i) const;
 };
 }
 
-SourceRange CheckPrintfHandler::getFormatRange() {
+SourceRange CheckPrintfHandler::getFormatStringRange() {
   return OrigFormatExpr->getSourceRange();
 }
 
+SourceRange CheckPrintfHandler::
+getFormatSpecifierRange(const char *startSpecifier, unsigned specifierLen) {
+  return SourceRange(getLocationOfByte(startSpecifier),
+					 getLocationOfByte(startSpecifier+specifierLen-1));
+}
+
 SourceLocation CheckPrintfHandler::getLocationOfByte(const char *x) {
   return S.getLocationOfStringLiteralByte(FExpr, x - Beg);  
 }
@@ -1343,8 +1104,7 @@
                                 unsigned specifierLen) {  
   SourceLocation Loc = getLocationOfByte(startSpecifier);
   S.Diag(Loc, diag::warn_printf_incomplete_specifier)
-    << llvm::StringRef(startSpecifier, specifierLen)
-    << getFormatRange();
+    << getFormatSpecifierRange(startSpecifier, specifierLen);
 }
 
 void CheckPrintfHandler::
@@ -1358,14 +1118,14 @@
   SourceLocation Loc = getLocationOfByte(CS.getStart());
   S.Diag(Loc, diag::warn_printf_invalid_conversion)
       << llvm::StringRef(CS.getStart(), CS.getLength())
-      << getFormatRange();  
+      << getFormatSpecifierRange(startSpecifier, specifierLen);  
 }
 
 void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) {
   // The presence of a null character is likely an error.
   S.Diag(getLocationOfByte(nullCharacter),
          diag::warn_printf_format_string_contains_null_char)
-    << getFormatRange();
+    << getFormatStringRange();
 }
 
 const Expr *CheckPrintfHandler::getDataArg(unsigned i) const {
@@ -1375,14 +1135,16 @@
 bool
 CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt,
                                  unsigned MissingArgDiag,
-                                 unsigned BadTypeDiag) {
+                                 unsigned BadTypeDiag,
+								 const char *startSpecifier,
+								 unsigned specifierLen) {
 
   if (Amt.hasDataArgument()) {
     ++NumConversions;
     if (!HasVAListArg) {
       if (NumConversions > NumDataArgs) {
         S.Diag(getLocationOfByte(Amt.getStart()), MissingArgDiag)
-          << getFormatRange();      
+          << getFormatSpecifierRange(startSpecifier, specifierLen);      
         // Don't do any more checking.  We will just emit
         // spurious errors.
         return false;
@@ -1394,7 +1156,9 @@
       const BuiltinType *BT = T->getAs<BuiltinType>();
       if (!BT || BT->getKind() != BuiltinType::Int) {
         S.Diag(getLocationOfByte(Amt.getStart()), BadTypeDiag)
-          << T << getFormatRange() << Arg->getSourceRange();
+          << T
+		  << getFormatSpecifierRange(startSpecifier, specifierLen)
+		  << Arg->getSourceRange();
         // Don't do any more checking.  We will just emit
         // spurious errors.
         return false;
@@ -1416,13 +1180,15 @@
   // have matching data arguments.
   if (!HandleAmount(FS.getFieldWidth(),
                     diag::warn_printf_asterisk_width_missing_arg,
-                    diag::warn_printf_asterisk_width_wrong_type)) {
+                    diag::warn_printf_asterisk_width_wrong_type,
+					startSpecifier, specifierLen)) {
     return false;
   }
     
   if (!HandleAmount(FS.getPrecision(),
                     diag::warn_printf_asterisk_precision_missing_arg,
-                    diag::warn_printf_asterisk_precision_wrong_type)) {
+                    diag::warn_printf_asterisk_precision_wrong_type,
+					startSpecifier, specifierLen)) {
     return false;
   }
 
@@ -1434,6 +1200,12 @@
     // Continue checking the other format specifiers.
     return true;
   }
+
+  if (!CS.consumesDataArgument()) {
+    // FIXME: Technically specifying a precision or field width here
+    // makes no sense.  Worth issuing a warning at some point.
+	return true;
+  }
   
   ++NumConversions;  
   
@@ -1441,7 +1213,7 @@
   // a possible security issue.
   if (CS.getKind() == ConversionSpecifier::OutIntPtrArg) {
     S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_write_back)
-      << getFormatRange();           
+      << getFormatSpecifierRange(startSpecifier, specifierLen);           
     // Continue checking the other format specifiers.
     return true;
   }
@@ -1454,7 +1226,7 @@
   if (NumConversions > NumDataArgs) {
     S.Diag(getLocationOfByte(CS.getStart()),
            diag::warn_printf_insufficient_data_args)
-      << getFormatRange();    
+      << getFormatSpecifierRange(startSpecifier, specifierLen);    
     // Don't do any more checking.
     return false;
   }
@@ -1468,14 +1240,13 @@
   if (!HasVAListArg && NumConversions < NumDataArgs)
     S.Diag(getDataArg(NumConversions+1)->getLocStart(),
            diag::warn_printf_too_many_data_args)
-      << getFormatRange();
+      << getFormatStringRange();
 }
 
-void
-Sema::AlternateCheckPrintfString(const StringLiteral *FExpr,
-                                 const Expr *OrigFormatExpr,
-                                 const CallExpr *TheCall, bool HasVAListArg,
-                                 unsigned format_idx, unsigned firstDataArg) {
+void Sema::CheckPrintfString(const StringLiteral *FExpr,
+							 const Expr *OrigFormatExpr,
+							 const CallExpr *TheCall, bool HasVAListArg,
+							 unsigned format_idx, unsigned firstDataArg) {
   
   // CHECK: is the format string a wide literal?
   if (FExpr->isWide()) {

Modified: cfe/trunk/test/Sema/format-strings.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/format-strings.c?rev=94837&r1=94836&r2=94837&view=diff

==============================================================================
--- cfe/trunk/test/Sema/format-strings.c (original)
+++ cfe/trunk/test/Sema/format-strings.c Fri Jan 29 14:55:36 2010
@@ -51,7 +51,7 @@
   printf(i == 1 ? "yes" : "no"); // no-warning
   printf(i == 0 ? (i == 1 ? "yes" : "no") : "dont know"); // no-warning
   printf(i == 0 ? (i == 1 ? s : "no") : "dont know"); // expected-warning{{format string is not a string literal}}
-  printf("yes" ?: "no %d", 1); // expected-warning{{more data arguments than '%' conversions}}
+  printf("yes" ?: "no %d", 1); // expected-warning{{more data arguments than format specifiers}}
 }
 
 void check_writeback_specifier()
@@ -65,10 +65,10 @@
 
 void check_invalid_specifier(FILE* fp, char *buf)
 {
-  printf("%s%lb%d","unix",10,20); // expected-warning {{lid conversion '%lb'}}
-  fprintf(fp,"%%%l"); // expected-warning {{lid conversion '%l'}}
+  printf("%s%lb%d","unix",10,20); // expected-warning {{invalid conversion specifier 'b'}}
+  fprintf(fp,"%%%l"); // expected-warning {{incomplete format specifier}}
   sprintf(buf,"%%%%%ld%d%d", 1, 2, 3); // no-warning
-  snprintf(buf, 2, "%%%%%ld%;%d", 1, 2, 3); // expected-warning {{sion '%;'}}
+  snprintf(buf, 2, "%%%%%ld%;%d", 1, 2, 3); // expected-warning {{invalid conversion specifier ';'}}
 }
 
 void check_null_char_string(char* b)
@@ -139,3 +139,21 @@
   vprintf ("%*.*d", v8);  // no-warning
 }
 
+void test10(int x, float f, int i) {
+  printf("%@", 12); // expected-warning{{invalid conversion specifier '@'}}
+  printf("\0"); // expected-warning{{format string contains '\0' within the string body}}
+  printf("xs\0"); // expected-warning{{format string contains '\0' within the string body}}
+  printf("%*d\n"); // expected-warning{{'*' specified field width is missing a matching 'int' argument}}
+  printf("%*.*d\n", x); // expected-warning{{'.*' specified field precision is missing a matching 'int' argument}}
+  printf("%*d\n", f, x); // expected-warning{{field width should have type 'int', but argument has type 'double'}}
+  printf("%*.*d\n", x, f, x); // expected-warning{{field precision should have type 'int', but argument has type 'double'}}
+  printf("%**\n"); // expected-warning{{invalid conversion specifier '*'}}
+  printf("%n", &i); // expected-warning{{use of '%n' in format string discouraged (potentially insecure)}}
+  printf("%d%d\n", x); // expected-warning{{more '%' conversions than data arguments}}
+  printf("%d\n", x, x); // expected-warning{{more data arguments than format specifiers}}
+  printf("%W%d%Z\n", x, x, x); // expected-warning{{invalid conversion specifier 'W'}} expected-warning{{invalid conversion specifier 'Z'}}
+  printf("%"); // expected-warning{{incomplete format specifier}}
+  printf("%.d", x); // no-warning
+  printf("%.", x);  // expected-warning{{incomplete format specifier}}
+} 
+

Modified: cfe/trunk/test/SemaObjC/format-strings-objc.m
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaObjC/format-strings-objc.m?rev=94837&r1=94836&r2=94837&view=diff

==============================================================================
--- cfe/trunk/test/SemaObjC/format-strings-objc.m (original)
+++ cfe/trunk/test/SemaObjC/format-strings-objc.m Fri Jan 29 14:55:36 2010
@@ -35,7 +35,7 @@
 
 void check_nslog(unsigned k) {
   NSLog(@"%d%%", k); // no-warning
-  NSLog(@"%s%lb%d", "unix", 10,20); // expected-warning {{lid conversion '%lb'}}
+  NSLog(@"%s%lb%d", "unix", 10,20); // expected-warning {{invalid conversion specifier 'b'}}
 }
 
 // Check type validation





More information about the cfe-commits mailing list