[cfe-commits] r97248 - in /cfe/trunk: include/clang/Analysis/Analyses/PrintfFormatString.h include/clang/Basic/DiagnosticSemaKinds.td lib/Analysis/PrintfFormatString.cpp lib/Sema/SemaChecking.cpp test/Sema/format-strings.c

Ted Kremenek kremenek at apple.com
Fri Feb 26 11:18:41 PST 2010


Author: kremenek
Date: Fri Feb 26 13:18:41 2010
New Revision: 97248

URL: http://llvm.org/viewvc/llvm-project?rev=97248&view=rev
Log:
For printf format string checking, move the tracking of the data argument index out of
Sema and into analyze_printf::ParseFormatString().  Also use a bitvector to determine
what arguments have been covered (instead of just checking to see if the last argument consumed is the max argument).  This is prep. for support positional arguments (an IEEE extension).

Modified:
    cfe/trunk/include/clang/Analysis/Analyses/PrintfFormatString.h
    cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
    cfe/trunk/lib/Analysis/PrintfFormatString.cpp
    cfe/trunk/lib/Sema/SemaChecking.cpp
    cfe/trunk/test/Sema/format-strings.c

Modified: cfe/trunk/include/clang/Analysis/Analyses/PrintfFormatString.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Analysis/Analyses/PrintfFormatString.h?rev=97248&r1=97247&r2=97248&view=diff
==============================================================================
--- cfe/trunk/include/clang/Analysis/Analyses/PrintfFormatString.h (original)
+++ cfe/trunk/include/clang/Analysis/Analyses/PrintfFormatString.h Fri Feb 26 13:18:41 2010
@@ -152,18 +152,21 @@
 public:
   enum HowSpecified { NotSpecified, Constant, Arg };
 
-  OptionalAmount(HowSpecified h, const char *st)
-    : start(st), hs(h), amt(0) {}
+  OptionalAmount(HowSpecified h, unsigned i, const char *st)
+    : start(st), hs(h), amt(i) {}
 
   OptionalAmount()
     : start(0), hs(NotSpecified), amt(0) {}
 
-  OptionalAmount(unsigned i, const char *st)
-    : start(st), hs(Constant), amt(i) {}
-
   HowSpecified getHowSpecified() const { return hs; }
+
   bool hasDataArgument() const { return hs == Arg; }
 
+  unsigned getArgIndex() const {
+    assert(hasDataArgument());
+    return amt;
+  }
+
   unsigned getConstantAmount() const {
     assert(hs == Constant);
     return amt;
@@ -188,14 +191,14 @@
   unsigned HasSpacePrefix : 1;
   unsigned HasAlternativeForm : 1;
   unsigned HasLeadingZeroes : 1;
-  unsigned flags : 5;
+  unsigned argIndex;
   ConversionSpecifier CS;
   OptionalAmount FieldWidth;
   OptionalAmount Precision;
 public:
   FormatSpecifier() : LM(None),
     IsLeftJustified(0), HasPlusPrefix(0), HasSpacePrefix(0),
-    HasAlternativeForm(0), HasLeadingZeroes(0) {}
+    HasAlternativeForm(0), HasLeadingZeroes(0), argIndex(0) {}
 
   static FormatSpecifier Parse(const char *beg, const char *end);
 
@@ -212,6 +215,16 @@
   void setHasAlternativeForm() { HasAlternativeForm = 1; }
   void setHasLeadingZeros() { HasLeadingZeroes = 1; }
 
+  void setArgIndex(unsigned i) {
+    assert(CS.consumesDataArgument());
+    argIndex = i;
+  }
+
+  unsigned getArgIndex() const {
+    assert(CS.consumesDataArgument());
+    return argIndex;
+  }
+
   // Methods for querying the format specifier.
 
   const ConversionSpecifier &getConversionSpecifier() const {
@@ -262,10 +275,10 @@
 
   virtual void HandleNullChar(const char *nullCharacter) {}
 
-  virtual void
+  virtual bool
     HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
                                      const char *startSpecifier,
-                                     unsigned specifierLen) {}
+                                     unsigned specifierLen) { return true; }
 
   virtual bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS,
                                      const char *startSpecifier,

Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=97248&r1=97247&r2=97248&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Fri Feb 26 13:18:41 2010
@@ -2521,8 +2521,8 @@
   InGroup<FormatSecurity>;
 def warn_printf_insufficient_data_args : Warning<
   "more '%%' conversions than data arguments">, InGroup<Format>;
-def warn_printf_too_many_data_args : Warning<
-  "more data arguments than format specifiers">, InGroup<FormatExtraArgs>;
+def warn_printf_data_arg_not_used : Warning<
+  "data argument not used by format string">, InGroup<FormatExtraArgs>;
 def warn_printf_invalid_conversion : Warning<
   "invalid conversion specifier '%0'">, InGroup<Format>;
 def warn_printf_incomplete_specifier : Warning<

Modified: cfe/trunk/lib/Analysis/PrintfFormatString.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/PrintfFormatString.cpp?rev=97248&r1=97247&r2=97248&view=diff
==============================================================================
--- cfe/trunk/lib/Analysis/PrintfFormatString.cpp (original)
+++ cfe/trunk/lib/Analysis/PrintfFormatString.cpp Fri Feb 26 13:18:41 2010
@@ -62,7 +62,8 @@
 // Methods for parsing format strings.
 //===----------------------------------------------------------------------===//
 
-static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
+static OptionalAmount ParseAmount(const char *&Beg, const char *E,
+                                  unsigned &argIndex) {
   const char *I = Beg;
   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
 
@@ -78,11 +79,11 @@
     }
 
     if (foundDigits)
-      return OptionalAmount(accumulator, Beg);
+      return OptionalAmount(OptionalAmount::Constant, accumulator, Beg);
 
     if (c == '*') {
       ++I;
-      return OptionalAmount(OptionalAmount::Arg, Beg);
+      return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg);
     }
 
     break;
@@ -93,7 +94,8 @@
 
 static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
                                                   const char *&Beg,
-                                                  const char *E) {
+                                                  const char *E,
+                                                  unsigned &argIndex) {
 
   using namespace clang::analyze_printf;
 
@@ -149,7 +151,7 @@
   }
 
   // Look for the field width (if any).
-  FS.setFieldWidth(ParseAmount(I, E));
+  FS.setFieldWidth(ParseAmount(I, E, argIndex));
 
   if (I == E) {
     // No more characters left?
@@ -165,7 +167,7 @@
       return true;
     }
 
-    FS.setPrecision(ParseAmount(I, E));
+    FS.setPrecision(ParseAmount(I, E, argIndex));
 
     if (I == E) {
       // No more characters left?
@@ -241,20 +243,26 @@
     // Glibc specific.
     case 'm': k = ConversionSpecifier::PrintErrno; break;
   }
-  FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
+  ConversionSpecifier CS(conversionPosition, k);
+  FS.setConversionSpecifier(CS);
+  if (CS.consumesDataArgument())
+    FS.setArgIndex(argIndex++);
 
   if (k == ConversionSpecifier::InvalidSpecifier) {
-    H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
-    return false; // Keep processing format specifiers.
+    // Assume the conversion takes one argument.
+    return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
   }
   return FormatSpecifierResult(Start, FS);
 }
 
 bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
                        const char *I, const char *E) {
+
+  unsigned argIndex = 0;
+
   // Keep looking for a format specifier until we have exhausted the string.
   while (I != E) {
-    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
+    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex);
     // Did a fail-stop error of any kind occur when parsing the specifier?
     // If so, don't do any more processing.
     if (FSR.shouldStop())
@@ -430,7 +438,7 @@
       return Ctx.LongDoubleTy;
     return Ctx.DoubleTy;
   }
-  
+
   switch (CS.getKind()) {
     case ConversionSpecifier::CStrArg:
       return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy                                            : ArgTypeResult::CStrTy);
@@ -438,11 +446,11 @@
       // FIXME: This appears to be Mac OS X specific.
       return ArgTypeResult::WCStrTy;
     case ConversionSpecifier::CArg:
-      return Ctx.WCharTy;    
+      return Ctx.WCharTy;
     default:
       break;
   }
-  
+
   // FIXME: Handle other cases.
   return ArgTypeResult();
 }

Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=97248&r1=97247&r2=97248&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Fri Feb 26 13:18:41 2010
@@ -955,6 +955,7 @@
 ///  FormatGuard: Automatic Protection From printf Format String
 ///  Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
 ///
+/// TODO:
 /// Functionality implemented:
 ///
 ///  We can statically check the following properties for string
@@ -965,7 +966,7 @@
 ///      data arguments?
 ///
 ///  (2) Does each format conversion correctly match the type of the
-///      corresponding data argument?  (TODO)
+///      corresponding data argument?
 ///
 /// Moreover, for all printf functions we can:
 ///
@@ -984,7 +985,6 @@
 ///
 /// All of these checks can be done by parsing the format string.
 ///
-/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
 void
 Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg,
                            unsigned format_idx, unsigned firstDataArg) {
@@ -1044,13 +1044,13 @@
   Sema &S;
   const StringLiteral *FExpr;
   const Expr *OrigFormatExpr;
-  unsigned NumConversions;
   const unsigned NumDataArgs;
   const bool IsObjCLiteral;
   const char *Beg; // Start of format string.
   const bool HasVAListArg;
   const CallExpr *TheCall;
   unsigned FormatIdx;
+  llvm::BitVector CoveredArgs;
 public:
   CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,
                      const Expr *origFormatExpr,
@@ -1058,17 +1058,20 @@
                      const char *beg, bool hasVAListArg,
                      const CallExpr *theCall, unsigned formatIdx)
     : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr),
-      NumConversions(0), NumDataArgs(numDataArgs),
+      NumDataArgs(numDataArgs),
       IsObjCLiteral(isObjCLiteral), Beg(beg),
       HasVAListArg(hasVAListArg),
-      TheCall(theCall), FormatIdx(formatIdx) {}
+      TheCall(theCall), FormatIdx(formatIdx) {
+        CoveredArgs.resize(numDataArgs);
+        CoveredArgs.reset();
+      }
 
   void DoneProcessing();
 
   void HandleIncompleteFormatSpecifier(const char *startSpecifier,
                                        unsigned specifierLen);
 
-  void
+  bool
   HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
                                    const char *startSpecifier,
                                    unsigned specifierLen);
@@ -1117,18 +1120,35 @@
     << getFormatSpecifierRange(startSpecifier, specifierLen);
 }
 
-void CheckPrintfHandler::
+bool CheckPrintfHandler::
 HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
                                  const char *startSpecifier,
                                  unsigned specifierLen) {
 
-  ++NumConversions;
+  unsigned argIndex = FS.getArgIndex();
+  bool keepGoing = true;
+  if (argIndex < NumDataArgs) {
+    // Consider the argument coverered, even though the specifier doesn't
+    // make sense.
+    CoveredArgs.set(argIndex);
+  }
+  else {
+    // If argIndex exceeds the number of data arguments we
+    // don't issue a warning because that is just a cascade of warnings (and
+    // they may have intended '%%' anyway). We don't want to continue processing
+    // the format string after this point, however, as we will like just get
+    // gibberish when trying to match arguments.
+    keepGoing = false;
+  }
+
   const analyze_printf::ConversionSpecifier &CS =
     FS.getConversionSpecifier();
   SourceLocation Loc = getLocationOfByte(CS.getStart());
   S.Diag(Loc, diag::warn_printf_invalid_conversion)
       << llvm::StringRef(CS.getStart(), CS.getLength())
       << getFormatSpecifierRange(startSpecifier, specifierLen);
+
+  return keepGoing;
 }
 
 void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) {
@@ -1139,7 +1159,7 @@
 }
 
 const Expr *CheckPrintfHandler::getDataArg(unsigned i) const {
-  return TheCall->getArg(FormatIdx + i);
+  return TheCall->getArg(FormatIdx + i + 1);
 }
 
 
@@ -1162,9 +1182,9 @@
                                  unsigned specifierLen) {
 
   if (Amt.hasDataArgument()) {
-    ++NumConversions;
     if (!HasVAListArg) {
-      if (NumConversions > NumDataArgs) {
+      unsigned argIndex = Amt.getArgIndex();
+      if (argIndex >= NumDataArgs) {
         S.Diag(getLocationOfByte(Amt.getStart()), MissingArgDiag)
           << getFormatSpecifierRange(startSpecifier, specifierLen);
         // Don't do any more checking.  We will just emit
@@ -1176,7 +1196,8 @@
       // Although not in conformance with C99, we also allow the argument to be
       // an 'unsigned int' as that is a reasonably safe case.  GCC also
       // doesn't emit a warning for that case.
-      const Expr *Arg = getDataArg(NumConversions);
+      CoveredArgs.set(argIndex);
+      const Expr *Arg = getDataArg(argIndex);
       QualType T = Arg->getType();
 
       const analyze_printf::ArgTypeResult &ATR = Amt.getArgType(S.Context);
@@ -1221,22 +1242,21 @@
     return false;
   }
 
-  // Check for using an Objective-C specific conversion specifier
-  // in a non-ObjC literal.
-  if (!IsObjCLiteral && CS.isObjCArg()) {
-    HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen);
-
-    // Continue checking the other format specifiers.
-    return true;
-  }
-
   if (!CS.consumesDataArgument()) {
     // FIXME: Technically specifying a precision or field width here
     // makes no sense.  Worth issuing a warning at some point.
     return true;
   }
 
-  ++NumConversions;
+  // Consume the argument.
+  unsigned argIndex = FS.getArgIndex();
+  CoveredArgs.set(argIndex);
+
+  // Check for using an Objective-C specific conversion specifier
+  // in a non-ObjC literal.
+  if (!IsObjCLiteral && CS.isObjCArg()) {
+    return HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen);
+  }
 
   // Are we using '%n'?  Issue a warning about this being
   // a possible security issue.
@@ -1270,7 +1290,7 @@
   if (HasVAListArg)
     return true;
 
-  if (NumConversions > NumDataArgs) {
+  if (argIndex >= NumDataArgs) {
     S.Diag(getLocationOfByte(CS.getStart()),
            diag::warn_printf_insufficient_data_args)
       << getFormatSpecifierRange(startSpecifier, specifierLen);
@@ -1280,7 +1300,7 @@
 
   // Now type check the data expression that matches the
   // format specifier.
-  const Expr *Ex = getDataArg(NumConversions);
+  const Expr *Ex = getDataArg(argIndex);
   const analyze_printf::ArgTypeResult &ATR = FS.getArgType(S.Context);
   if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) {
     // Check if we didn't match because of an implicit cast from a 'char'
@@ -1304,10 +1324,17 @@
 void CheckPrintfHandler::DoneProcessing() {
   // Does the number of data arguments exceed the number of
   // format conversions in the format string?
-  if (!HasVAListArg && NumConversions < NumDataArgs)
-    S.Diag(getDataArg(NumConversions+1)->getLocStart(),
-           diag::warn_printf_too_many_data_args)
-      << getFormatStringRange();
+  if (!HasVAListArg) {
+    // Find any arguments that weren't covered.
+    CoveredArgs.flip();
+    signed notCoveredArg = CoveredArgs.find_first();
+    if (notCoveredArg >= 0) {
+      assert((unsigned)notCoveredArg < NumDataArgs);
+      S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(),
+             diag::warn_printf_data_arg_not_used)
+        << getFormatStringRange();
+    }
+  }
 }
 
 void Sema::CheckPrintfString(const StringLiteral *FExpr,

Modified: cfe/trunk/test/Sema/format-strings.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/format-strings.c?rev=97248&r1=97247&r2=97248&view=diff
==============================================================================
--- cfe/trunk/test/Sema/format-strings.c (original)
+++ cfe/trunk/test/Sema/format-strings.c Fri Feb 26 13:18:41 2010
@@ -55,7 +55,7 @@
   printf(i == 1 ? "yes" : "no"); // no-warning
   printf(i == 0 ? (i == 1 ? "yes" : "no") : "dont know"); // no-warning
   printf(i == 0 ? (i == 1 ? s : "no") : "dont know"); // expected-warning{{format string is not a string literal}}
-  printf("yes" ?: "no %d", 1); // expected-warning{{more data arguments than format specifiers}}
+  printf("yes" ?: "no %d", 1); // expected-warning{{data argument not used by format string}}
 }
 
 void check_writeback_specifier()
@@ -155,7 +155,7 @@
   printf("%**\n"); // expected-warning{{invalid conversion specifier '*'}}
   printf("%n", &i); // expected-warning{{use of '%n' in format string discouraged (potentially insecure)}}
   printf("%d%d\n", x); // expected-warning{{more '%' conversions than data arguments}}
-  printf("%d\n", x, x); // expected-warning{{more data arguments than format specifiers}}
+  printf("%d\n", x, x); // expected-warning{{data argument not used by format string}}
   printf("%W%d%Z\n", x, x, x); // expected-warning{{invalid conversion specifier 'W'}} expected-warning{{invalid conversion specifier 'Z'}}
   printf("%"); // expected-warning{{incomplete format specifier}}
   printf("%.d", x); // no-warning





More information about the cfe-commits mailing list