[cfe-commits] r108500 - in /cfe/trunk: include/clang/Analysis/Analyses/FormatString.h include/clang/Analysis/Analyses/PrintfFormatString.h include/clang/Basic/DiagnosticSemaKinds.td lib/Analysis/CMakeLists.txt lib/Analysis/FormatString.cpp lib/Analysis/PrintfFormatString.cpp lib/Sema/Sema.h lib/Sema/SemaChecking.cpp

Ted Kremenek kremenek at apple.com
Thu Jul 15 19:11:22 PDT 2010


Author: kremenek
Date: Thu Jul 15 21:11:22 2010
New Revision: 108500

URL: http://llvm.org/viewvc/llvm-project?rev=108500&view=rev
Log:
Add most of the boilerplate support for scanf format string checking.  This includes
handling the parsing of scanf format strings and hooking the checking into Sema.
Most of this checking logic piggybacks on what was already there for checking printf format
strings, but the checking logic has been refactored to support both.

What is left to be done is to support argument type checking in format strings and of course
fix the usual tail of bugs that will follow.

Added:
    cfe/trunk/include/clang/Analysis/Analyses/FormatString.h
    cfe/trunk/lib/Analysis/FormatString.cpp
Removed:
    cfe/trunk/include/clang/Analysis/Analyses/PrintfFormatString.h
Modified:
    cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
    cfe/trunk/lib/Analysis/CMakeLists.txt
    cfe/trunk/lib/Analysis/PrintfFormatString.cpp
    cfe/trunk/lib/Sema/Sema.h
    cfe/trunk/lib/Sema/SemaChecking.cpp

Added: cfe/trunk/include/clang/Analysis/Analyses/FormatString.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Analysis/Analyses/FormatString.h?rev=108500&view=auto
==============================================================================
--- cfe/trunk/include/clang/Analysis/Analyses/FormatString.h (added)
+++ cfe/trunk/include/clang/Analysis/Analyses/FormatString.h Thu Jul 15 21:11:22 2010
@@ -0,0 +1,620 @@
+//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines APIs for analyzing the format strings of printf, fscanf,
+// and friends.
+//
+// The structure of format strings for fprintf are described in C99 7.19.6.1.
+//
+// The structure of format strings for fscanf are described in C99 7.19.6.2.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_H
+#define LLVM_CLANG_FORMAT_H
+
+#include "clang/AST/CanonicalType.h"
+
+namespace clang {
+
+//===----------------------------------------------------------------------===//
+/// Common components of both fprintf and fscanf format strings.
+namespace analyze_format_string {
+
+/// Class representing optional flags with location and representation
+/// information.
+class OptionalFlag {
+public:
+  OptionalFlag(const char *Representation)
+      : representation(Representation), flag(false) {}
+  bool isSet() { return flag; }
+  void set() { flag = true; }
+  void clear() { flag = false; }
+  void setPosition(const char *position) {
+    assert(position);
+    this->position = position;
+  }
+  const char *getPosition() const {
+    assert(position);
+    return position;
+  }
+  const char *toString() const { return representation; }
+
+  // Overloaded operators for bool like qualities
+  operator bool() const { return flag; }
+  OptionalFlag& operator=(const bool &rhs) {
+    flag = rhs;
+    return *this;  // Return a reference to myself.
+  }
+private:
+  const char *representation;
+  const char *position;
+  bool flag;
+};
+
+/// Represents the length modifier in a format string in scanf/printf.
+class LengthModifier {
+public:
+  enum Kind {
+    None,
+    AsChar,       // 'hh'
+    AsShort,      // 'h'
+    AsLong,       // 'l'
+    AsLongLong,   // 'll', 'q' (BSD, deprecated)
+    AsIntMax,     // 'j'
+    AsSizeT,      // 'z'
+    AsPtrDiff,    // 't'
+    AsLongDouble, // 'L'
+    AsWideChar = AsLong // for '%ls', only makes sense for printf
+  };
+
+  LengthModifier()
+    : Position(0), kind(None) {}
+  LengthModifier(const char *pos, Kind k)
+    : Position(pos), kind(k) {}
+
+  const char *getStart() const {
+    return Position;
+  }
+
+  unsigned getLength() const {
+    switch (kind) {
+      default:
+        return 1;
+      case AsLongLong:
+      case AsChar:
+        return 2;
+      case None:
+        return 0;
+    }
+  }
+
+  Kind getKind() const { return kind; }
+  void setKind(Kind k) { kind = k; }
+
+  const char *toString() const;
+
+private:
+  const char *Position;
+  Kind kind;
+};
+
+class ArgTypeResult {
+public:
+  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
+    CStrTy, WCStrTy };
+private:
+  const Kind K;
+  QualType T;
+  ArgTypeResult(bool) : K(InvalidTy) {}
+public:
+  ArgTypeResult(Kind k = UnknownTy) : K(k) {}
+  ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
+  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
+
+  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
+
+  bool isValid() const { return K != InvalidTy; }
+
+  const QualType *getSpecificType() const {
+    return K == SpecificTy ? &T : 0;
+  }
+
+  bool matchesType(ASTContext &C, QualType argTy) const;
+
+  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
+
+  QualType getRepresentativeType(ASTContext &C) const;
+};
+
+class OptionalAmount {
+public:
+  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
+
+  OptionalAmount(HowSpecified howSpecified,
+                 unsigned amount,
+                 const char *amountStart,
+                 unsigned amountLength,
+                 bool usesPositionalArg)
+  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
+  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
+
+  OptionalAmount(bool valid = true)
+  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
+  UsesPositionalArg(0), UsesDotPrefix(0) {}
+
+  bool isInvalid() const {
+    return hs == Invalid;
+  }
+
+  HowSpecified getHowSpecified() const { return hs; }
+  void setHowSpecified(HowSpecified h) { hs = h; }
+
+  bool hasDataArgument() const { return hs == Arg; }
+
+  unsigned getArgIndex() const {
+    assert(hasDataArgument());
+    return amt;
+  }
+
+  unsigned getConstantAmount() const {
+    assert(hs == Constant);
+    return amt;
+  }
+
+  const char *getStart() const {
+      // We include the . character if it is given.
+    return start - UsesDotPrefix;
+  }
+
+  unsigned getConstantLength() const {
+    assert(hs == Constant);
+    return length + UsesDotPrefix;
+  }
+
+  ArgTypeResult getArgType(ASTContext &Ctx) const;
+
+  void toString(llvm::raw_ostream &os) const;
+
+  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
+  unsigned getPositionalArgIndex() const {
+    assert(hasDataArgument());
+    return amt + 1;
+  }
+
+  bool usesDotPrefix() const { return UsesDotPrefix; }
+  void setUsesDotPrefix() { UsesDotPrefix = true; }
+
+private:
+  const char *start;
+  unsigned length;
+  HowSpecified hs;
+  unsigned amt;
+  bool UsesPositionalArg : 1;
+  bool UsesDotPrefix;
+};
+
+
+class FormatSpecifier {
+protected:
+  LengthModifier LM;
+  OptionalAmount FieldWidth;
+    /// Positional arguments, an IEEE extension:
+    ///  IEEE Std 1003.1, 2004 Edition
+    ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
+  bool UsesPositionalArg;
+  unsigned argIndex;
+public:
+  FormatSpecifier() : UsesPositionalArg(false), argIndex(0) {}
+
+  void setLengthModifier(LengthModifier lm) {
+    LM = lm;
+  }
+
+  void setUsesPositionalArg() { UsesPositionalArg = true; }
+
+  void setArgIndex(unsigned i) {
+      // assert(CS.consumesDataArgument());
+    argIndex = i;
+  }
+
+  unsigned getArgIndex() const {
+      //assert(CS.consumesDataArgument());
+    return argIndex;
+  }
+
+  unsigned getPositionalArgIndex() const {
+      //assert(CS.consumesDataArgument());
+    return argIndex + 1;
+  }
+
+  const LengthModifier &getLengthModifier() const {
+    return LM;
+  }
+
+  const OptionalAmount &getFieldWidth() const {
+    return FieldWidth;
+  }
+
+  void setFieldWidth(const OptionalAmount &Amt) {
+    FieldWidth = Amt;
+  }
+
+  bool usesPositionalArg() const { return UsesPositionalArg; }
+};
+
+} // end analyze_format_string namespace
+
+//===----------------------------------------------------------------------===//
+/// Pieces specific to fprintf format strings.
+
+namespace analyze_printf {
+
+class ConversionSpecifier {
+public:
+  enum Kind {
+    InvalidSpecifier = 0,
+      // C99 conversion specifiers.
+    dArg, // 'd'
+    IntAsCharArg,  // 'c'
+    iArg, // 'i',
+    oArg, // 'o',
+    uArg, // 'u',
+    xArg, // 'x',
+    XArg, // 'X',
+    fArg, // 'f',
+    FArg, // 'F',
+    eArg, // 'e',
+    EArg, // 'E',
+    gArg, // 'g',
+    GArg, // 'G',
+    aArg, // 'a',
+    AArg, // 'A',
+    CStrArg,       // 's'
+    VoidPtrArg,    // 'p'
+    OutIntPtrArg,  // 'n'
+    PercentArg,    // '%'
+      // MacOS X unicode extensions.
+    CArg, // 'C'
+    UnicodeStrArg, // 'S'
+      // Objective-C specific specifiers.
+    ObjCObjArg,    // '@'
+      // GlibC specific specifiers.
+    PrintErrno,    // 'm'
+      // Specifier ranges.
+    IntArgBeg = dArg,
+    IntArgEnd = iArg,
+    UIntArgBeg = oArg,
+    UIntArgEnd = XArg,
+    DoubleArgBeg = fArg,
+    DoubleArgEnd = AArg,
+    C99Beg = IntArgBeg,
+    C99End = DoubleArgEnd,
+    ObjCBeg = ObjCObjArg,
+    ObjCEnd = ObjCObjArg
+  };
+
+  ConversionSpecifier()
+  : Position(0), kind(InvalidSpecifier) {}
+
+  ConversionSpecifier(const char *pos, Kind k)
+  : Position(pos), kind(k) {}
+
+  const char *getStart() const {
+    return Position;
+  }
+
+  llvm::StringRef getCharacters() const {
+    return llvm::StringRef(getStart(), getLength());
+  }
+
+  bool consumesDataArgument() const {
+    switch (kind) {
+      case PercentArg:
+      case PrintErrno:
+        return false;
+      default:
+        return true;
+    }
+  }
+
+  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
+  bool isIntArg() const { return kind >= dArg && kind <= iArg; }
+  bool isUIntArg() const { return kind >= oArg && kind <= XArg; }
+  bool isDoubleArg() const { return kind >= fArg && kind <= AArg; }
+  Kind getKind() const { return kind; }
+  void setKind(Kind k) { kind = k; }
+  unsigned getLength() const {
+      // Conversion specifiers currently only are represented by
+      // single characters, but we be flexible.
+    return 1;
+  }
+  const char *toString() const;
+
+private:
+  const char *Position;
+  Kind kind;
+};
+
+using analyze_format_string::ArgTypeResult;
+using analyze_format_string::LengthModifier;
+using analyze_format_string::OptionalAmount;
+using analyze_format_string::OptionalFlag;
+
+class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
+  OptionalFlag IsLeftJustified; // '-'
+  OptionalFlag HasPlusPrefix; // '+'
+  OptionalFlag HasSpacePrefix; // ' '
+  OptionalFlag HasAlternativeForm; // '#'
+  OptionalFlag HasLeadingZeroes; // '0'
+  ConversionSpecifier CS;
+  OptionalAmount Precision;
+public:
+  PrintfSpecifier() :
+  IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
+  HasAlternativeForm("#"), HasLeadingZeroes("0") {}
+
+  static PrintfSpecifier Parse(const char *beg, const char *end);
+
+    // Methods for incrementally constructing the PrintfSpecifier.
+  void setConversionSpecifier(const ConversionSpecifier &cs) {
+    CS = cs;
+  }
+  void setIsLeftJustified(const char *position) {
+    IsLeftJustified = true;
+    IsLeftJustified.setPosition(position);
+  }
+  void setHasPlusPrefix(const char *position) {
+    HasPlusPrefix = true;
+    HasPlusPrefix.setPosition(position);
+  }
+  void setHasSpacePrefix(const char *position) {
+    HasSpacePrefix = true;
+    HasSpacePrefix.setPosition(position);
+  }
+  void setHasAlternativeForm(const char *position) {
+    HasAlternativeForm = true;
+    HasAlternativeForm.setPosition(position);
+  }
+  void setHasLeadingZeros(const char *position) {
+    HasLeadingZeroes = true;
+    HasLeadingZeroes.setPosition(position);
+  }
+  void setUsesPositionalArg() { UsesPositionalArg = true; }
+
+    // Methods for querying the format specifier.
+
+  const ConversionSpecifier &getConversionSpecifier() const {
+    return CS;
+  }
+
+  void setPrecision(const OptionalAmount &Amt) {
+    Precision = Amt;
+    Precision.setUsesDotPrefix();
+  }
+
+  const OptionalAmount &getPrecision() const {
+    return Precision;
+  }
+
+    /// \brief Returns the builtin type that a data argument
+    /// paired with this format specifier should have.  This method
+    /// will return null if the format specifier does not have
+    /// a matching data argument or the matching argument matches
+    /// more than one type.
+  ArgTypeResult getArgType(ASTContext &Ctx) const;
+
+  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
+  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
+  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
+  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
+  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
+  bool usesPositionalArg() const { return UsesPositionalArg; }
+
+    /// Changes the specifier and length according to a QualType, retaining any
+    /// flags or options. Returns true on success, or false when a conversion
+    /// was not successful.
+  bool fixType(QualType QT);
+
+  void toString(llvm::raw_ostream &os) const;
+
+    // Validation methods - to check if any element results in undefined behavior
+  bool hasValidPlusPrefix() const;
+  bool hasValidAlternativeForm() const;
+  bool hasValidLeadingZeros() const;
+  bool hasValidSpacePrefix() const;
+  bool hasValidLeftJustified() const;
+
+  bool hasValidLengthModifier() const;
+  bool hasValidPrecision() const;
+  bool hasValidFieldWidth() const;
+};
+}  // end analyze_printf namespace
+
+//===----------------------------------------------------------------------===//
+/// Pieces specific to fscanf format strings.
+
+namespace analyze_scanf {
+
+class ConversionSpecifier {
+public:
+  enum Kind {
+    InvalidSpecifier = 0,
+      // C99 conversion specifiers.
+    dArg, // 'd'
+    iArg, // 'i',
+    oArg, // 'o',
+    uArg, // 'u',
+    xArg, // 'x',
+    XArg, // 'X',
+    fArg, // 'f',
+    FArg, // 'F',
+    eArg, // 'e',
+    EArg, // 'E',
+    gArg, // 'g',
+    GArg, // 'G',
+    aArg, // 'a',
+    AArg, // 'A',
+    sArg, // 's', // match sequence of non-write-space characters
+    VoidPtrArg,        // 'p'
+    cArg,              // 'c', differs from printf, writes array of characters
+    ConsumedSoFarArg,  // 'n', differs from printf, writes back args consumed
+    PercentArg,        // '%'
+    ScanListArg,       // '[' followed by scan list
+      // IEEE Std 1003.1 extensions.
+    CArg, // 'C', same as writing 'lc'
+    SArg, // 'S', same as writing 'ls'
+      // Specifier ranges.
+    IntArgBeg = dArg,
+    IntArgEnd = iArg,
+    UIntArgBeg = oArg,
+    UIntArgEnd = XArg,
+    DoubleArgBeg = fArg,
+    DoubleArgEnd = AArg
+  };
+
+  ConversionSpecifier()
+  : Position(0), EndScanList(0), kind(InvalidSpecifier) {}
+
+  ConversionSpecifier(const char *pos, Kind k)
+  : Position(pos), EndScanList(0), kind(k) {}
+
+  const char *getStart() const {
+    return Position;
+  }
+
+  void setEndScanList(const char *pos) { EndScanList = pos; }
+
+  llvm::StringRef getCharacters() const {
+    return llvm::StringRef(getStart(), getLength());
+  }
+
+  bool consumesDataArgument() const {
+    return kind != PercentArg;
+  }
+
+  bool isIntArg() const { return kind >= dArg && kind <= iArg; }
+  bool isUIntArg() const { return kind >= oArg && kind <= XArg; }
+  bool isDoubleArg() const { return kind >= fArg && kind <= AArg; }
+  Kind getKind() const { return kind; }
+  void setKind(Kind k) { kind = k; }
+
+  unsigned getLength() const {
+    return EndScanList ? EndScanList - Position : 1;
+  }
+
+  const char *toString() const;
+
+private:
+  const char *Position;
+  const char *EndScanList;
+  Kind kind;
+};
+
+using analyze_format_string::LengthModifier;
+using analyze_format_string::OptionalAmount;
+using analyze_format_string::OptionalFlag;
+
+class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
+  OptionalFlag SuppressAssignment; // '*'
+  ConversionSpecifier CS;
+public:
+  ScanfSpecifier() : SuppressAssignment("*") {}
+
+  void setSuppressAssignment(const char *position) {
+    SuppressAssignment = true;
+    SuppressAssignment.setPosition(position);
+  }
+
+  const OptionalFlag &getSuppressAssignment() const {
+    return SuppressAssignment;
+  }
+
+  void setConversionSpecifier(const ConversionSpecifier &cs) {
+    CS = cs;
+  }
+
+  const ConversionSpecifier &getConversionSpecifier() const {
+    return CS;
+  }
+  
+  bool consumesDataArgument() const {
+    return CS.consumesDataArgument() && !SuppressAssignment;
+  }
+
+  static ScanfSpecifier Parse(const char *beg, const char *end);
+
+};
+
+} // end analyze_scanf namespace
+
+//===----------------------------------------------------------------------===//
+// Parsing and processing of format strings (both fprintf and fscanf).
+
+namespace analyze_format_string {
+
+enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
+
+class FormatStringHandler {
+public:
+  FormatStringHandler() {}
+  virtual ~FormatStringHandler();
+
+  virtual void HandleNullChar(const char *nullCharacter) {}
+
+  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
+                                     PositionContext p) {}
+
+  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
+
+  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
+                                         unsigned specifierLen) {}
+
+  // Printf-specific handlers.
+
+  virtual bool HandleInvalidPrintfConversionSpecifier(
+                                      const analyze_printf::PrintfSpecifier &FS,
+                                      const char *startSpecifier,
+                                      unsigned specifierLen) {
+    return true;
+  }
+
+  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+                                     const char *startSpecifier,
+                                     unsigned specifierLen) {
+    return true;
+  }
+
+    // Scanf-specific handlers.
+
+  virtual bool HandleInvalidScanfConversionSpecifier(
+                                        const analyze_scanf::ScanfSpecifier &FS,
+                                        const char *startSpecifier,
+                                        unsigned specifierLen) {
+    return true;
+  }
+
+  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
+                                    const char *startSpecifier,
+                                    unsigned specifierLen) {
+    return true;
+  }
+
+  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
+};
+
+bool ParsePrintfString(FormatStringHandler &H,
+                       const char *beg, const char *end);
+
+bool ParseScanfString(FormatStringHandler &H,
+                       const char *beg, const char *end);
+
+} // end analyze_format_string namespace
+} // end clang namespace
+#endif

Removed: cfe/trunk/include/clang/Analysis/Analyses/PrintfFormatString.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Analysis/Analyses/PrintfFormatString.h?rev=108499&view=auto
==============================================================================
--- cfe/trunk/include/clang/Analysis/Analyses/PrintfFormatString.h (original)
+++ cfe/trunk/include/clang/Analysis/Analyses/PrintfFormatString.h (removed)
@@ -1,445 +0,0 @@
-//==- PrintfFormatStrings.h - Analysis of printf format strings --*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Handling of format string in printf and friends.  The structure of format
-// strings for fprintf() are described in C99 7.19.6.1.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_FPRINTF_FORMAT_H
-#define LLVM_CLANG_FPRINTF_FORMAT_H
-
-#include "clang/AST/CanonicalType.h"
-
-namespace clang {
-
-class ASTContext;
-
-namespace analyze_printf {
-
-class ArgTypeResult {
-public:
-  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
-              CStrTy, WCStrTy };
-private:
-  const Kind K;
-  QualType T;
-  ArgTypeResult(bool) : K(InvalidTy) {}
-public:
-  ArgTypeResult(Kind k = UnknownTy) : K(k) {}
-  ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
-  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
-
-  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
-
-  bool isValid() const { return K != InvalidTy; }
-
-  const QualType *getSpecificType() const {
-    return K == SpecificTy ? &T : 0;
-  }
-
-  bool matchesType(ASTContext &C, QualType argTy) const;
-
-  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
-
-  QualType getRepresentativeType(ASTContext &C) const;
-};
-
-class ConversionSpecifier {
-public:
-  enum Kind {
-   InvalidSpecifier = 0,
-    // C99 conversion specifiers.
-   dArg, // 'd'
-   IntAsCharArg,  // 'c'
-   iArg, // 'i',
-   oArg, // 'o',
-   uArg, // 'u',
-   xArg, // 'x',
-   XArg, // 'X',
-   fArg, // 'f',
-   FArg, // 'F',
-   eArg, // 'e',
-   EArg, // 'E',
-   gArg, // 'g',
-   GArg, // 'G',
-   aArg, // 'a',
-   AArg, // 'A',
-   CStrArg,       // 's'
-   VoidPtrArg,    // 'p'
-   OutIntPtrArg,  // 'n'
-   PercentArg,    // '%'
-   // MacOS X unicode extensions.
-   CArg, // 'C'
-   UnicodeStrArg, // 'S'
-   // Objective-C specific specifiers.
-   ObjCObjArg,    // '@'
-   // GlibC specific specifiers.
-   PrintErrno,    // 'm'
-   // Specifier ranges.
-   IntArgBeg = dArg,
-   IntArgEnd = iArg,
-   UIntArgBeg = oArg,
-   UIntArgEnd = XArg,
-   DoubleArgBeg = fArg,
-   DoubleArgEnd = AArg,
-   C99Beg = IntArgBeg,
-   C99End = DoubleArgEnd,
-   ObjCBeg = ObjCObjArg,
-   ObjCEnd = ObjCObjArg
-  };
-
-  ConversionSpecifier()
-    : Position(0), kind(InvalidSpecifier) {}
-
-  ConversionSpecifier(const char *pos, Kind k)
-    : Position(pos), kind(k) {}
-
-  const char *getStart() const {
-    return Position;
-  }
-
-  llvm::StringRef getCharacters() const {
-    return llvm::StringRef(getStart(), getLength());
-  }
-
-  bool consumesDataArgument() const {
-    switch (kind) {
-  	  case PercentArg:
-	  case PrintErrno:
-		return false;
-	  default:
-		return true;
-	}
-  }
-
-  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
-  bool isIntArg() const { return kind >= dArg && kind <= iArg; }
-  bool isUIntArg() const { return kind >= oArg && kind <= XArg; }
-  bool isDoubleArg() const { return kind >= fArg && kind <= AArg; }
-  Kind getKind() const { return kind; }
-  void setKind(Kind k) { kind = k; }
-  unsigned getLength() const {
-    // Conversion specifiers currently only are represented by
-    // single characters, but we be flexible.
-    return 1;
-  }
-  const char *toString() const;
-
-private:
-  const char *Position;
-  Kind kind;
-};
-
-class LengthModifier {
-public:
-  enum Kind {
-   None,
-   AsChar,       // 'hh'
-   AsShort,      // 'h'
-   AsLong,       // 'l'
-   AsLongLong,   // 'll', 'q' (BSD, deprecated)
-   AsIntMax,     // 'j'
-   AsSizeT,      // 'z'
-   AsPtrDiff,    // 't'
-   AsLongDouble, // 'L'
-   AsWideChar = AsLong // for '%ls'
-  };
-
-  LengthModifier()
-    : Position(0), kind(None) {}
-  LengthModifier(const char *pos, Kind k)
-    : Position(pos), kind(k) {}
-
-  const char *getStart() const {
-    return Position;
-  }
-
-  unsigned getLength() const {
-    switch (kind) {
-    default:
-      return 1;
-    case AsLongLong:
-    case AsChar:
-      return 2;
-    case None:
-      return 0;
-    }
-  }
-
-  Kind getKind() const { return kind; }
-  void setKind(Kind k) { kind = k; }
-
-  const char *toString() const;
-
-private:
-  const char *Position;
-  Kind kind;
-};
-
-class OptionalAmount {
-public:
-  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
-
-  OptionalAmount(HowSpecified howSpecified,
-                 unsigned amount,
-                 const char *amountStart,
-                 unsigned amountLength,
-                 bool usesPositionalArg)
-    : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
-      UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
-
-  OptionalAmount(bool valid = true)
-    : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
-      UsesPositionalArg(0), UsesDotPrefix(0) {}
-
-  bool isInvalid() const {
-    return hs == Invalid;
-  }
-
-  HowSpecified getHowSpecified() const { return hs; }
-  void setHowSpecified(HowSpecified h) { hs = h; }
-
-  bool hasDataArgument() const { return hs == Arg; }
-
-  unsigned getArgIndex() const {
-    assert(hasDataArgument());
-    return amt;
-  }
-
-  unsigned getConstantAmount() const {
-    assert(hs == Constant);
-    return amt;
-  }
-
-  const char *getStart() const {
-    // We include the . character if it is given.
-    return start - UsesDotPrefix;
-  }
-
-  unsigned getConstantLength() const {
-    assert(hs == Constant);
-    return length + UsesDotPrefix;
-  }
-
-  ArgTypeResult getArgType(ASTContext &Ctx) const;
-
-  void toString(llvm::raw_ostream &os) const;
-
-  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
-  unsigned getPositionalArgIndex() const {
-    assert(hasDataArgument());
-    return amt + 1;
-  }
-
-  bool usesDotPrefix() const { return UsesDotPrefix; }
-  void setUsesDotPrefix() { UsesDotPrefix = true; }
-
-private:
-  const char *start;
-  unsigned length;
-  HowSpecified hs;
-  unsigned amt;
-  bool UsesPositionalArg : 1;
-  bool UsesDotPrefix;
-};
-
-// Class representing optional flags with location and representation
-// information.
-class OptionalFlag {
-public:
-  OptionalFlag(const char *Representation)
-      : representation(Representation), flag(false) {}
-  bool isSet() { return flag; }
-  void set() { flag = true; }
-  void clear() { flag = false; }
-  void setPosition(const char *position) {
-    assert(position);
-    this->position = position;
-  }
-  const char *getPosition() const {
-    assert(position);
-    return position;
-  }
-  const char *toString() const { return representation; }
-
-  // Overloaded operators for bool like qualities
-  operator bool() const { return flag; }
-  OptionalFlag& operator=(const bool &rhs) {
-    flag = rhs;
-    return *this;  // Return a reference to myself.
-  }
-private:
-  const char *representation;
-  const char *position;
-  bool flag;
-};
-
-class FormatSpecifier {
-  LengthModifier LM;
-  OptionalFlag IsLeftJustified; // '-'
-  OptionalFlag HasPlusPrefix; // '+'
-  OptionalFlag HasSpacePrefix; // ' '
-  OptionalFlag HasAlternativeForm; // '#'
-  OptionalFlag HasLeadingZeroes; // '0'
-  /// Positional arguments, an IEEE extension:
-  ///  IEEE Std 1003.1, 2004 Edition
-  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
-  bool UsesPositionalArg;
-  unsigned argIndex;
-  ConversionSpecifier CS;
-  OptionalAmount FieldWidth;
-  OptionalAmount Precision;
-public:
-  FormatSpecifier() :
-    IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
-    HasAlternativeForm("#"), HasLeadingZeroes("0"), UsesPositionalArg(false),
-    argIndex(0) {}
-
-  static FormatSpecifier Parse(const char *beg, const char *end);
-
-  // Methods for incrementally constructing the FormatSpecifier.
-  void setConversionSpecifier(const ConversionSpecifier &cs) {
-    CS = cs;
-  }
-  void setLengthModifier(LengthModifier lm) {
-    LM = lm;
-  }
-  void setIsLeftJustified(const char *position) {
-    IsLeftJustified = true;
-    IsLeftJustified.setPosition(position);
-  }
-  void setHasPlusPrefix(const char *position) {
-    HasPlusPrefix = true;
-    HasPlusPrefix.setPosition(position);
-  }
-  void setHasSpacePrefix(const char *position) {
-    HasSpacePrefix = true;
-    HasSpacePrefix.setPosition(position);
-  }
-  void setHasAlternativeForm(const char *position) {
-    HasAlternativeForm = true;
-    HasAlternativeForm.setPosition(position);
-  }
-  void setHasLeadingZeros(const char *position) {
-    HasLeadingZeroes = true;
-    HasLeadingZeroes.setPosition(position);
-  }
-  void setUsesPositionalArg() { UsesPositionalArg = true; }
-
-  void setArgIndex(unsigned i) {
-    assert(CS.consumesDataArgument());
-    argIndex = i;
-  }
-
-  unsigned getArgIndex() const {
-    assert(CS.consumesDataArgument());
-    return argIndex;
-  }
-
-  unsigned getPositionalArgIndex() const {
-    assert(CS.consumesDataArgument());
-    return argIndex + 1;
-  }
-
-  // Methods for querying the format specifier.
-
-  const ConversionSpecifier &getConversionSpecifier() const {
-    return CS;
-  }
-
-  const LengthModifier &getLengthModifier() const {
-    return LM;
-  }
-
-  const OptionalAmount &getFieldWidth() const {
-    return FieldWidth;
-  }
-
-  void setFieldWidth(const OptionalAmount &Amt) {
-    FieldWidth = Amt;
-  }
-
-  void setPrecision(const OptionalAmount &Amt) {
-    Precision = Amt;
-    Precision.setUsesDotPrefix();
-  }
-
-  const OptionalAmount &getPrecision() const {
-    return Precision;
-  }
-
-  /// \brief Returns the builtin type that a data argument
-  /// paired with this format specifier should have.  This method
-  /// will return null if the format specifier does not have
-  /// a matching data argument or the matching argument matches
-  /// more than one type.
-  ArgTypeResult getArgType(ASTContext &Ctx) const;
-
-  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
-  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
-  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
-  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
-  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
-  bool usesPositionalArg() const { return UsesPositionalArg; }
-
-  /// Changes the specifier and length according to a QualType, retaining any
-  /// flags or options. Returns true on success, or false when a conversion
-  /// was not successful.
-  bool fixType(QualType QT);
-
-  void toString(llvm::raw_ostream &os) const;
-
-  // Validation methods - to check if any element results in undefined behavior
-  bool hasValidPlusPrefix() const;
-  bool hasValidAlternativeForm() const;
-  bool hasValidLeadingZeros() const;
-  bool hasValidSpacePrefix() const;
-  bool hasValidLeftJustified() const;
-
-  bool hasValidLengthModifier() const;
-  bool hasValidPrecision() const;
-  bool hasValidFieldWidth() const;
-};
-
-enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
-
-class FormatStringHandler {
-public:
-  FormatStringHandler() {}
-  virtual ~FormatStringHandler();
-
-  virtual void HandleIncompleteFormatSpecifier(const char *startSpecifier,
-                                               unsigned specifierLen) {}
-
-  virtual void HandleNullChar(const char *nullCharacter) {}
-
-  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
-                                     PositionContext p) {}
-
-  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
-
-  virtual bool
-    HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
-                                     const char *startSpecifier,
-                                     unsigned specifierLen) { return true; }
-
-  virtual bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS,
-                                     const char *startSpecifier,
-                                     unsigned specifierLen) {
-    return true;
-  }
-};
-
-bool ParseFormatString(FormatStringHandler &H,
-                       const char *beg, const char *end);
-
-} // end printf namespace
-} // end clang namespace
-#endif

Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=108500&r1=108499&r2=108500&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Thu Jul 15 21:11:22 2010
@@ -2183,10 +2183,10 @@
   "initializer of a builtin type can only take one argument">;
 def err_value_init_for_array_type : Error<
   "array types cannot be value-initialized">;
-def warn_printf_nonliteral_noargs : Warning<
+def warn_format_nonliteral_noargs : Warning<
   "format string is not a string literal (potentially insecure)">,
   InGroup<FormatSecurity>;
-def warn_printf_nonliteral : Warning<
+def warn_format_nonliteral : Warning<
   "format string is not a string literal">,
   InGroup<FormatNonLiteral>, DefaultIgnore;
 
@@ -2936,29 +2936,32 @@
   "invalid conversion specifier '%0'">, InGroup<Format>;
 def warn_printf_incomplete_specifier : Warning<
   "incomplete format specifier">, InGroup<Format>;
-def warn_printf_missing_format_string : Warning<
+def warn_missing_format_string : Warning<
   "format string missing">, InGroup<Format>;
+def warn_scanf_nonzero_width : Warning<
+  "conversion specifies 0 input characters for field width">,
+  InGroup<Format>;
 def warn_printf_conversion_argument_type_mismatch : Warning<
   "conversion specifies type %0 but the argument has type %1">,
   InGroup<Format>;
 def warn_printf_positional_arg_exceeds_data_args : Warning <
   "data argument position '%0' exceeds the number of data arguments (%1)">,
   InGroup<Format>;
-def warn_printf_zero_positional_specifier : Warning<
+def warn_format_zero_positional_specifier : Warning<
   "position arguments in format strings start counting at 1 (not 0)">,
   InGroup<Format>;
-def warn_printf_invalid_positional_specifier : Warning<
+def warn_format_invalid_positional_specifier : Warning<
   "invalid position specified for %select{field width|field precision}0">,
   InGroup<Format>;
-def warn_printf_mix_positional_nonpositional_args : Warning<
+def warn_format_mix_positional_nonpositional_args : Warning<
   "cannot mix positional and non-positional arguments in format string">,
   InGroup<Format>;
 def warn_null_arg : Warning<
   "null passed to a callee which requires a non-null argument">,
   InGroup<NonNull>;
-def warn_printf_empty_format_string : Warning<
+def warn_empty_format_string : Warning<
   "format string is empty">, InGroup<FormatZeroLength>;
-def warn_printf_format_string_is_wide_literal : Warning<
+def warn_format_string_is_wide_literal : Warning<
   "format string should not be a wide string">, InGroup<Format>;
 def warn_printf_format_string_contains_null_char : Warning<
   "format string contains '\\0' within the string body">, InGroup<Format>;

Modified: cfe/trunk/lib/Analysis/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/CMakeLists.txt?rev=108500&r1=108499&r2=108500&view=diff
==============================================================================
--- cfe/trunk/lib/Analysis/CMakeLists.txt (original)
+++ cfe/trunk/lib/Analysis/CMakeLists.txt Thu Jul 15 21:11:22 2010
@@ -3,9 +3,11 @@
 add_clang_library(clangAnalysis
   AnalysisContext.cpp
   CFG.cpp
+  FormatString.cpp
   LiveVariables.cpp
   PrintfFormatString.cpp
   ReachableCode.cpp
+  ScanfFormatString.cpp
   UninitializedValues.cpp
   )
 

Added: cfe/trunk/lib/Analysis/FormatString.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/FormatString.cpp?rev=108500&view=auto
==============================================================================
--- cfe/trunk/lib/Analysis/FormatString.cpp (added)
+++ cfe/trunk/lib/Analysis/FormatString.cpp Thu Jul 15 21:11:22 2010
@@ -0,0 +1,380 @@
+// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Shared details for processing format strings of printf and scanf
+// (and friends).
+//
+//===----------------------------------------------------------------------===//
+
+#include "FormatStringParsing.h"
+
+using clang::analyze_format_string::ArgTypeResult;
+using clang::analyze_format_string::FormatStringHandler;
+using clang::analyze_format_string::FormatSpecifier;
+using clang::analyze_format_string::LengthModifier;
+using clang::analyze_format_string::OptionalAmount;
+using clang::analyze_format_string::PositionContext;
+using namespace clang;
+
+// Key function to FormatStringHandler.
+FormatStringHandler::~FormatStringHandler() {}
+
+//===----------------------------------------------------------------------===//
+// Functions for parsing format strings components in both printf and
+// scanf format strings.
+//===----------------------------------------------------------------------===//
+
+OptionalAmount
+clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
+  const char *I = Beg;
+  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
+
+  unsigned accumulator = 0;
+  bool hasDigits = false;
+
+  for ( ; I != E; ++I) {
+    char c = *I;
+    if (c >= '0' && c <= '9') {
+      hasDigits = true;
+      accumulator = (accumulator * 10) + (c - '0');
+      continue;
+    }
+
+    if (hasDigits)
+      return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
+          false);
+
+    break;
+  }
+
+  return OptionalAmount();
+}
+
+OptionalAmount
+clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
+                                                     const char *E,
+                                                     unsigned &argIndex) {
+  if (*Beg == '*') {
+    ++Beg;
+    return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
+  }
+
+  return ParseAmount(Beg, E);
+}
+
+OptionalAmount
+clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
+                                                  const char *Start,
+                                                  const char *&Beg,
+                                                  const char *E,
+                                                  PositionContext p) {
+  if (*Beg == '*') {
+    const char *I = Beg + 1;
+    const OptionalAmount &Amt = ParseAmount(I, E);
+
+    if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
+      H.HandleInvalidPosition(Beg, I - Beg, p);
+      return OptionalAmount(false);
+    }
+
+    if (I == E) {
+      // No more characters left?
+      H.HandleIncompleteSpecifier(Start, E - Start);
+      return OptionalAmount(false);
+    }
+
+    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
+
+    if (*I == '$') {
+      // Handle positional arguments
+
+      // Special case: '*0$', since this is an easy mistake.
+      if (Amt.getConstantAmount() == 0) {
+        H.HandleZeroPosition(Beg, I - Beg + 1);
+        return OptionalAmount(false);
+      }
+
+      const char *Tmp = Beg;
+      Beg = ++I;
+
+      return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
+                            Tmp, 0, true);
+    }
+
+    H.HandleInvalidPosition(Beg, I - Beg, p);
+    return OptionalAmount(false);
+  }
+
+  return ParseAmount(Beg, E);
+}
+
+
+bool
+clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
+                                              FormatSpecifier &CS,
+                                              const char *Start,
+                                              const char *&Beg, const char *E,
+                                              unsigned *argIndex) {
+  // FIXME: Support negative field widths.
+  if (argIndex) {
+    CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
+  }
+  else {
+    const OptionalAmount Amt =
+      ParsePositionAmount(H, Start, Beg, E,
+                          analyze_format_string::FieldWidthPos);
+
+    if (Amt.isInvalid())
+      return true;
+    CS.setFieldWidth(Amt);
+  }
+  return false;
+}
+
+bool
+clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
+                                               FormatSpecifier &FS,
+                                               const char *Start,
+                                               const char *&Beg,
+                                               const char *E) {
+  const char *I = Beg;
+
+  const OptionalAmount &Amt = ParseAmount(I, E);
+
+  if (I == E) {
+    // No more characters left?
+    H.HandleIncompleteSpecifier(Start, E - Start);
+    return true;
+  }
+
+  if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
+    // Special case: '%0$', since this is an easy mistake.
+    if (Amt.getConstantAmount() == 0) {
+      H.HandleZeroPosition(Start, I - Start);
+      return true;
+    }
+
+    FS.setArgIndex(Amt.getConstantAmount() - 1);
+    FS.setUsesPositionalArg();
+    // Update the caller's pointer if we decided to consume
+    // these characters.
+    Beg = I;
+    return false;
+  }
+
+  return false;
+}
+
+bool
+clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
+                                                  const char *&I,
+                                                  const char *E) {
+  LengthModifier::Kind lmKind = LengthModifier::None;
+  const char *lmPosition = I;
+  switch (*I) {
+    default:
+      return false;
+    case 'h':
+      ++I;
+      lmKind = (I != E && *I == 'h') ?
+      ++I, LengthModifier::AsChar : LengthModifier::AsShort;
+      break;
+    case 'l':
+      ++I;
+      lmKind = (I != E && *I == 'l') ?
+      ++I, LengthModifier::AsLongLong : LengthModifier::AsLong;
+      break;
+    case 'j': lmKind = LengthModifier::AsIntMax;     ++I; break;
+    case 'z': lmKind = LengthModifier::AsSizeT;      ++I; break;
+    case 't': lmKind = LengthModifier::AsPtrDiff;    ++I; break;
+    case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
+    case 'q': lmKind = LengthModifier::AsLongLong;   ++I; break;
+  }
+  LengthModifier lm(lmPosition, lmKind);
+  FS.setLengthModifier(lm);
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Methods on ArgTypeResult.
+//===----------------------------------------------------------------------===//
+
+bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
+  switch (K) {
+    case InvalidTy:
+      assert(false && "ArgTypeResult must be valid");
+      return true;
+
+    case UnknownTy:
+      return true;
+
+    case SpecificTy: {
+      argTy = C.getCanonicalType(argTy).getUnqualifiedType();
+      if (T == argTy)
+        return true;
+      if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
+        switch (BT->getKind()) {
+          default:
+            break;
+          case BuiltinType::Char_S:
+          case BuiltinType::SChar:
+            return T == C.UnsignedCharTy;
+          case BuiltinType::Char_U:
+          case BuiltinType::UChar:
+            return T == C.SignedCharTy;
+          case BuiltinType::Short:
+            return T == C.UnsignedShortTy;
+          case BuiltinType::UShort:
+            return T == C.ShortTy;
+          case BuiltinType::Int:
+            return T == C.UnsignedIntTy;
+          case BuiltinType::UInt:
+            return T == C.IntTy;
+          case BuiltinType::Long:
+            return T == C.UnsignedLongTy;
+          case BuiltinType::ULong:
+            return T == C.LongTy;
+          case BuiltinType::LongLong:
+            return T == C.UnsignedLongLongTy;
+          case BuiltinType::ULongLong:
+            return T == C.LongLongTy;
+        }
+      return false;
+    }
+
+    case CStrTy: {
+      const PointerType *PT = argTy->getAs<PointerType>();
+      if (!PT)
+        return false;
+      QualType pointeeTy = PT->getPointeeType();
+      if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
+        switch (BT->getKind()) {
+          case BuiltinType::Void:
+          case BuiltinType::Char_U:
+          case BuiltinType::UChar:
+          case BuiltinType::Char_S:
+          case BuiltinType::SChar:
+            return true;
+          default:
+            break;
+        }
+
+      return false;
+    }
+
+    case WCStrTy: {
+      const PointerType *PT = argTy->getAs<PointerType>();
+      if (!PT)
+        return false;
+      QualType pointeeTy =
+        C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
+      return pointeeTy == C.getWCharType();
+    }
+
+    case CPointerTy:
+      return argTy->getAs<PointerType>() != NULL ||
+      	     argTy->getAs<ObjCObjectPointerType>() != NULL;
+
+    case ObjCPointerTy:
+      return argTy->getAs<ObjCObjectPointerType>() != NULL;
+  }
+
+  // FIXME: Should be unreachable, but Clang is currently emitting
+  // a warning.
+  return false;
+}
+
+QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
+  switch (K) {
+    case InvalidTy:
+      assert(false && "No representative type for Invalid ArgTypeResult");
+      // Fall-through.
+    case UnknownTy:
+      return QualType();
+    case SpecificTy:
+      return T;
+    case CStrTy:
+      return C.getPointerType(C.CharTy);
+    case WCStrTy:
+      return C.getPointerType(C.getWCharType());
+    case ObjCPointerTy:
+      return C.ObjCBuiltinIdTy;
+    case CPointerTy:
+      return C.VoidPtrTy;
+  }
+
+  // FIXME: Should be unreachable, but Clang is currently emitting
+  // a warning.
+  return QualType();
+}
+
+//===----------------------------------------------------------------------===//
+// Methods on OptionalAmount.
+//===----------------------------------------------------------------------===//
+
+ArgTypeResult
+analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const {
+  return Ctx.IntTy;
+}
+
+//===----------------------------------------------------------------------===//
+// Methods on LengthModifier.
+//===----------------------------------------------------------------------===//
+
+const char *
+analyze_format_string::LengthModifier::toString() const {
+  switch (kind) {
+  case AsChar:
+    return "hh";
+  case AsShort:
+    return "h";
+  case AsLong: // or AsWideChar
+    return "l";
+  case AsLongLong:
+    return "ll";
+  case AsIntMax:
+    return "j";
+  case AsSizeT:
+    return "z";
+  case AsPtrDiff:
+    return "t";
+  case AsLongDouble:
+    return "L";
+  case None:
+    return "";
+  }
+  return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+// Methods on OptionalAmount.
+//===----------------------------------------------------------------------===//
+
+void
+analyze_format_string::OptionalAmount::toString(llvm::raw_ostream &os) const {
+  switch (hs) {
+  case Invalid:
+  case NotSpecified:
+    return;
+  case Arg:
+    if (UsesDotPrefix)
+        os << ".";
+    if (usesPositionalArg())
+      os << "*" << getPositionalArgIndex() << "$";
+    else
+      os << "*";
+    break;
+  case Constant:
+    if (UsesDotPrefix)
+        os << ".";
+    os << amt;
+    break;
+  }
+}
+

Modified: cfe/trunk/lib/Analysis/PrintfFormatString.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/PrintfFormatString.cpp?rev=108500&r1=108499&r2=108500&view=diff
==============================================================================
--- cfe/trunk/lib/Analysis/PrintfFormatString.cpp (original)
+++ cfe/trunk/lib/Analysis/PrintfFormatString.cpp Thu Jul 15 21:11:22 2010
@@ -1,4 +1,4 @@
-//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
+//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,141 +12,28 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/Analysis/Analyses/PrintfFormatString.h"
-#include "clang/AST/ASTContext.h"
-#include "clang/AST/Type.h"
-#include "llvm/Support/raw_ostream.h"
-
-using clang::analyze_printf::ArgTypeResult;
-using clang::analyze_printf::FormatSpecifier;
-using clang::analyze_printf::FormatStringHandler;
-using clang::analyze_printf::OptionalAmount;
-using clang::analyze_printf::PositionContext;
+#include "clang/Analysis/Analyses/FormatString.h"
+#include "FormatStringParsing.h"
+
+using clang::analyze_format_string::ArgTypeResult;
+using clang::analyze_format_string::FormatStringHandler;
+using clang::analyze_format_string::LengthModifier;
+using clang::analyze_format_string::OptionalAmount;
 using clang::analyze_printf::ConversionSpecifier;
-using clang::analyze_printf::LengthModifier;
+using clang::analyze_printf::PrintfSpecifier;
 
 using namespace clang;
 
-namespace {
-class FormatSpecifierResult {
-  FormatSpecifier FS;
-  const char *Start;
-  bool Stop;
-public:
-  FormatSpecifierResult(bool stop = false)
-    : Start(0), Stop(stop) {}
-  FormatSpecifierResult(const char *start,
-                        const FormatSpecifier &fs)
-    : FS(fs), Start(start), Stop(false) {}
-
-  const char *getStart() const { return Start; }
-  bool shouldStop() const { return Stop; }
-  bool hasValue() const { return Start != 0; }
-  const FormatSpecifier &getValue() const {
-    assert(hasValue());
-    return FS;
-  }
-  const FormatSpecifier &getValue() { return FS; }
-};
-} // end anonymous namespace
-
-template <typename T>
-class UpdateOnReturn {
-  T &ValueToUpdate;
-  const T &ValueToCopy;
-public:
-  UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
-    : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
-
-  ~UpdateOnReturn() {
-    ValueToUpdate = ValueToCopy;
-  }
-};
+typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
+        PrintfSpecifierResult;
 
 //===----------------------------------------------------------------------===//
 // Methods for parsing format strings.
 //===----------------------------------------------------------------------===//
 
-static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
-  const char *I = Beg;
-  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
-
-  unsigned accumulator = 0;
-  bool hasDigits = false;
-
-  for ( ; I != E; ++I) {
-    char c = *I;
-    if (c >= '0' && c <= '9') {
-      hasDigits = true;
-      accumulator = (accumulator * 10) + (c - '0');
-      continue;
-    }
-
-    if (hasDigits)
-      return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
-          false);
-
-    break;
-  }
-
-  return OptionalAmount();
-}
-
-static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E,
-                                             unsigned &argIndex) {
-  if (*Beg == '*') {
-    ++Beg;
-    return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
-  }
-
-  return ParseAmount(Beg, E);
-}
-
-static OptionalAmount ParsePositionAmount(FormatStringHandler &H,
-                                          const char *Start,
-                                          const char *&Beg, const char *E,
-                                          PositionContext p) {
-  if (*Beg == '*') {
-    const char *I = Beg + 1;
-    const OptionalAmount &Amt = ParseAmount(I, E);
-
-    if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
-      H.HandleInvalidPosition(Beg, I - Beg, p);
-      return OptionalAmount(false);
-    }
-
-    if (I== E) {
-      // No more characters left?
-      H.HandleIncompleteFormatSpecifier(Start, E - Start);
-      return OptionalAmount(false);
-    }
-
-    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
-
-    if (*I == '$') {
-      // Handle positional arguments
-
-      // Special case: '*0$', since this is an easy mistake.
-      if (Amt.getConstantAmount() == 0) {
-        H.HandleZeroPosition(Beg, I - Beg + 1);
-        return OptionalAmount(false);
-      }
-
-      const char *Tmp = Beg;
-      Beg = ++I;
+using analyze_format_string::ParseNonPositionAmount;
 
-      return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
-                            Tmp, 0, true);
-    }
-
-    H.HandleInvalidPosition(Beg, I - Beg, p);
-    return OptionalAmount(false);
-  }
-
-  return ParseAmount(Beg, E);
-}
-
-static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS,
+static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
                            const char *Start, const char *&Beg, const char *E,
                            unsigned *argIndex) {
   if (argIndex) {
@@ -154,7 +41,7 @@
   }
   else {
     const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
-                                                  analyze_printf::PrecisionPos);
+                                           analyze_format_string::PrecisionPos);
     if (Amt.isInvalid())
       return true;
     FS.setPrecision(Amt);
@@ -162,57 +49,7 @@
   return false;
 }
 
-static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS,
-                            const char *Start, const char *&Beg, const char *E,
-                            unsigned *argIndex) {
-  // FIXME: Support negative field widths.
-  if (argIndex) {
-    FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
-  }
-  else {
-    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
-                                                 analyze_printf::FieldWidthPos);
-    if (Amt.isInvalid())
-      return true;
-    FS.setFieldWidth(Amt);
-  }
-  return false;
-}
-
-static bool ParseArgPosition(FormatStringHandler &H,
-                             FormatSpecifier &FS, const char *Start,
-                             const char *&Beg, const char *E) {
-
-  using namespace clang::analyze_printf;
-  const char *I = Beg;
-
-  const OptionalAmount &Amt = ParseAmount(I, E);
-
-  if (I == E) {
-    // No more characters left?
-    H.HandleIncompleteFormatSpecifier(Start, E - Start);
-    return true;
-  }
-
-  if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
-    // Special case: '%0$', since this is an easy mistake.
-    if (Amt.getConstantAmount() == 0) {
-      H.HandleZeroPosition(Start, I - Start);
-      return true;
-    }
-
-    FS.setArgIndex(Amt.getConstantAmount() - 1);
-    FS.setUsesPositionalArg();
-    // Update the caller's pointer if we decided to consume
-    // these characters.
-    Beg = I;
-    return false;
-  }
-
-  return false;
-}
-
-static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
+static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
                                                   const char *&Beg,
                                                   const char *E,
                                                   unsigned &argIndex) {
@@ -243,17 +80,17 @@
 
   if (I == E) {
     // No more characters left?
-    H.HandleIncompleteFormatSpecifier(Start, E - Start);
+    H.HandleIncompleteSpecifier(Start, E - Start);
     return true;
   }
 
-  FormatSpecifier FS;
+  PrintfSpecifier FS;
   if (ParseArgPosition(H, FS, Start, I, E))
     return true;
 
   if (I == E) {
     // No more characters left?
-    H.HandleIncompleteFormatSpecifier(Start, E - Start);
+    H.HandleIncompleteSpecifier(Start, E - Start);
     return true;
   }
 
@@ -274,7 +111,7 @@
 
   if (I == E) {
     // No more characters left?
-    H.HandleIncompleteFormatSpecifier(Start, E - Start);
+    H.HandleIncompleteSpecifier(Start, E - Start);
     return true;
   }
 
@@ -285,7 +122,7 @@
 
   if (I == E) {
     // No more characters left?
-    H.HandleIncompleteFormatSpecifier(Start, E - Start);
+    H.HandleIncompleteSpecifier(Start, E - Start);
     return true;
   }
 
@@ -293,7 +130,7 @@
   if (*I == '.') {
     ++I;
     if (I == E) {
-      H.HandleIncompleteFormatSpecifier(Start, E - Start);
+      H.HandleIncompleteSpecifier(Start, E - Start);
       return true;
     }
 
@@ -303,39 +140,15 @@
 
     if (I == E) {
       // No more characters left?
-      H.HandleIncompleteFormatSpecifier(Start, E - Start);
+      H.HandleIncompleteSpecifier(Start, E - Start);
       return true;
     }
   }
 
   // Look for the length modifier.
-  LengthModifier::Kind lmKind = LengthModifier::None;
-  const char *lmPosition = I;
-  switch (*I) {
-    default:
-      break;
-    case 'h':
-      ++I;
-      lmKind = (I != E && *I == 'h') ?
-          ++I, LengthModifier::AsChar : LengthModifier::AsShort;
-      break;
-    case 'l':
-      ++I;
-      lmKind = (I != E && *I == 'l') ?
-          ++I, LengthModifier::AsLongLong : LengthModifier::AsLong;
-      break;
-    case 'j': lmKind = LengthModifier::AsIntMax;     ++I; break;
-    case 'z': lmKind = LengthModifier::AsSizeT;      ++I; break;
-    case 't': lmKind = LengthModifier::AsPtrDiff;    ++I; break;
-    case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
-    case 'q': lmKind = LengthModifier::AsLongLong;   ++I; break;
-  }
-  LengthModifier lm(lmPosition, lmKind);
-  FS.setLengthModifier(lm);
-
-  if (I == E) {
+  if (ParseLengthModifier(FS, I, E) && I == E) {
     // No more characters left?
-    H.HandleIncompleteFormatSpecifier(Start, E - Start);
+    H.HandleIncompleteSpecifier(Start, E - Start);
     return true;
   }
 
@@ -386,19 +199,20 @@
 
   if (k == ConversionSpecifier::InvalidSpecifier) {
     // Assume the conversion takes one argument.
-    return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
+    return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg);
   }
-  return FormatSpecifierResult(Start, FS);
+  return PrintfSpecifierResult(Start, FS);
 }
 
-bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
-                       const char *I, const char *E) {
+bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
+                                                     const char *I,
+                                                     const char *E) {
 
   unsigned argIndex = 0;
 
   // Keep looking for a format specifier until we have exhausted the string.
   while (I != E) {
-    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex);
+    const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex);
     // Did a fail-stop error of any kind occur when parsing the specifier?
     // If so, don't do any more processing.
     if (FSR.shouldStop())
@@ -408,7 +222,7 @@
     if (!FSR.hasValue())
       continue;
     // We have a format specifier.  Pass it to the callback.
-    if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
+    if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
                                  I - FSR.getStart()))
       return true;
   }
@@ -416,129 +230,6 @@
   return false;
 }
 
-FormatStringHandler::~FormatStringHandler() {}
-
-//===----------------------------------------------------------------------===//
-// Methods on ArgTypeResult.
-//===----------------------------------------------------------------------===//
-
-bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
-  switch (K) {
-    case InvalidTy:
-      assert(false && "ArgTypeResult must be valid");
-      return true;
-
-    case UnknownTy:
-      return true;
-
-    case SpecificTy: {
-      argTy = C.getCanonicalType(argTy).getUnqualifiedType();
-      if (T == argTy)
-        return true;
-      if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
-        switch (BT->getKind()) {
-          default:
-            break;
-          case BuiltinType::Char_S:
-          case BuiltinType::SChar:
-            return T == C.UnsignedCharTy;
-          case BuiltinType::Char_U:
-          case BuiltinType::UChar:
-            return T == C.SignedCharTy;
-          case BuiltinType::Short:
-            return T == C.UnsignedShortTy;
-          case BuiltinType::UShort:
-            return T == C.ShortTy;
-          case BuiltinType::Int:
-            return T == C.UnsignedIntTy;
-          case BuiltinType::UInt:
-            return T == C.IntTy;
-          case BuiltinType::Long:
-            return T == C.UnsignedLongTy;
-          case BuiltinType::ULong:
-            return T == C.LongTy;
-          case BuiltinType::LongLong:
-            return T == C.UnsignedLongLongTy;
-          case BuiltinType::ULongLong:
-            return T == C.LongLongTy;
-        }
-      return false;
-    }
-
-    case CStrTy: {
-      const PointerType *PT = argTy->getAs<PointerType>();
-      if (!PT)
-        return false;
-      QualType pointeeTy = PT->getPointeeType();
-      if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
-        switch (BT->getKind()) {
-          case BuiltinType::Void:
-          case BuiltinType::Char_U:
-          case BuiltinType::UChar:
-          case BuiltinType::Char_S:
-          case BuiltinType::SChar:
-            return true;
-          default:
-            break;
-        }
-
-      return false;
-    }
-
-    case WCStrTy: {
-      const PointerType *PT = argTy->getAs<PointerType>();
-      if (!PT)
-        return false;
-      QualType pointeeTy =
-        C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
-      return pointeeTy == C.getWCharType();
-    }
-
-    case CPointerTy:
-      return argTy->getAs<PointerType>() != NULL ||
-      	     argTy->getAs<ObjCObjectPointerType>() != NULL;
-
-    case ObjCPointerTy:
-      return argTy->getAs<ObjCObjectPointerType>() != NULL;
-  }
-
-  // FIXME: Should be unreachable, but Clang is currently emitting
-  // a warning.
-  return false;
-}
-
-QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
-  switch (K) {
-    case InvalidTy:
-      assert(false && "No representative type for Invalid ArgTypeResult");
-      // Fall-through.
-    case UnknownTy:
-      return QualType();
-    case SpecificTy:
-      return T;
-    case CStrTy:
-      return C.getPointerType(C.CharTy);
-    case WCStrTy:
-      return C.getPointerType(C.getWCharType());
-    case ObjCPointerTy:
-      return C.ObjCBuiltinIdTy;
-    case CPointerTy:
-      return C.VoidPtrTy;
-  }
-
-  // FIXME: Should be unreachable, but Clang is currently emitting
-  // a warning.
-  return QualType();
-}
-
-//===----------------------------------------------------------------------===//
-// Methods on OptionalAmount.
-//===----------------------------------------------------------------------===//
-
-ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const {
-  return Ctx.IntTy;
-}
-
 //===----------------------------------------------------------------------===//
 // Methods on ConversionSpecifier.
 //===----------------------------------------------------------------------===//
@@ -579,63 +270,10 @@
 }
 
 //===----------------------------------------------------------------------===//
-// Methods on LengthModifier.
-//===----------------------------------------------------------------------===//
-
-const char *LengthModifier::toString() const {
-  switch (kind) {
-  case AsChar:
-    return "hh";
-  case AsShort:
-    return "h";
-  case AsLong: // or AsWideChar
-    return "l";
-  case AsLongLong:
-    return "ll";
-  case AsIntMax:
-    return "j";
-  case AsSizeT:
-    return "z";
-  case AsPtrDiff:
-    return "t";
-  case AsLongDouble:
-    return "L";
-  case None:
-    return "";
-  }
-  return NULL;
-}
-
-//===----------------------------------------------------------------------===//
-// Methods on OptionalAmount.
-//===----------------------------------------------------------------------===//
-
-void OptionalAmount::toString(llvm::raw_ostream &os) const {
-  switch (hs) {
-  case Invalid:
-  case NotSpecified:
-    return;
-  case Arg:
-    if (UsesDotPrefix)
-        os << ".";
-    if (usesPositionalArg())
-      os << "*" << getPositionalArgIndex() << "$";
-    else
-      os << "*";
-    break;
-  case Constant:
-    if (UsesDotPrefix)
-        os << ".";
-    os << amt;
-    break;
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Methods on FormatSpecifier.
+// Methods on PrintfSpecifier.
 //===----------------------------------------------------------------------===//
 
-ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
+ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
   if (!CS.consumesDataArgument())
     return ArgTypeResult::Invalid();
 
@@ -702,7 +340,7 @@
   return ArgTypeResult();
 }
 
-bool FormatSpecifier::fixType(QualType QT) {
+bool PrintfSpecifier::fixType(QualType QT) {
   // Handle strings first (char *, wchar_t *)
   if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
     CS.setKind(ConversionSpecifier::CStrArg);
@@ -783,9 +421,9 @@
   return true;
 }
 
-void FormatSpecifier::toString(llvm::raw_ostream &os) const {
+void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
   // Whilst some features have no defined order, we are using the order
-  // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1)
+  // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1)
   os << "%";
 
   // Positional args
@@ -810,7 +448,7 @@
   os << CS.toString();
 }
 
-bool FormatSpecifier::hasValidPlusPrefix() const {
+bool PrintfSpecifier::hasValidPlusPrefix() const {
   if (!HasPlusPrefix)
     return true;
 
@@ -833,7 +471,7 @@
   }
 }
 
-bool FormatSpecifier::hasValidAlternativeForm() const {
+bool PrintfSpecifier::hasValidAlternativeForm() const {
   if (!HasAlternativeForm)
     return true;
 
@@ -856,7 +494,7 @@
   }
 }
 
-bool FormatSpecifier::hasValidLeadingZeros() const {
+bool PrintfSpecifier::hasValidLeadingZeros() const {
   if (!HasLeadingZeroes)
     return true;
 
@@ -883,7 +521,7 @@
   }
 }
 
-bool FormatSpecifier::hasValidSpacePrefix() const {
+bool PrintfSpecifier::hasValidSpacePrefix() const {
   if (!HasSpacePrefix)
     return true;
 
@@ -906,7 +544,7 @@
   }
 }
 
-bool FormatSpecifier::hasValidLeftJustified() const {
+bool PrintfSpecifier::hasValidLeftJustified() const {
   if (!IsLeftJustified)
     return true;
 
@@ -920,7 +558,7 @@
   }
 }
 
-bool FormatSpecifier::hasValidLengthModifier() const {
+bool PrintfSpecifier::hasValidLengthModifier() const {
   switch (LM.getKind()) {
   case LengthModifier::None:
     return true;
@@ -988,7 +626,7 @@
   return false;
 }
 
-bool FormatSpecifier::hasValidPrecision() const {
+bool PrintfSpecifier::hasValidPrecision() const {
   if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
     return true;
 
@@ -1015,7 +653,7 @@
     return false;
   }
 }
-bool FormatSpecifier::hasValidFieldWidth() const {
+bool PrintfSpecifier::hasValidFieldWidth() const {
   if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
       return true;
 

Modified: cfe/trunk/lib/Sema/Sema.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/Sema.h?rev=108500&r1=108499&r2=108500&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/Sema.h (original)
+++ cfe/trunk/lib/Sema/Sema.h Thu Jul 15 21:11:22 2010
@@ -4618,16 +4618,23 @@
   OwningExprResult SemaBuiltinAtomicOverloaded(OwningExprResult TheCallResult);
   bool SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum,
                               llvm::APSInt &Result);
+
   bool SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall,
                               bool HasVAListArg, unsigned format_idx,
-                              unsigned firstDataArg);
-  void CheckPrintfString(const StringLiteral *FExpr, const Expr *OrigFormatExpr,
+                              unsigned firstDataArg, bool isPrintf);
+
+  void CheckFormatString(const StringLiteral *FExpr, const Expr *OrigFormatExpr,
                          const CallExpr *TheCall, bool HasVAListArg,
-                         unsigned format_idx, unsigned firstDataArg);
+                         unsigned format_idx, unsigned firstDataArg,
+                         bool isPrintf);
+
   void CheckNonNullArguments(const NonNullAttr *NonNull,
                              const CallExpr *TheCall);
-  void CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg,
-                            unsigned format_idx, unsigned firstDataArg);
+
+  void CheckPrintfScanfArguments(const CallExpr *TheCall, bool HasVAListArg,
+                                 unsigned format_idx, unsigned firstDataArg,
+                                 bool isPrintf);
+
   void CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
                             SourceLocation ReturnLoc);
   void CheckFloatComparison(SourceLocation loc, Expr* lex, Expr* rex);

Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=108500&r1=108499&r2=108500&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Thu Jul 15 21:11:22 2010
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Sema.h"
-#include "clang/Analysis/Analyses/PrintfFormatString.h"
+#include "clang/Analysis/Analyses/FormatString.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/DeclObjC.h"
@@ -334,10 +334,13 @@
 
   // Printf checking.
   if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) {
-    if (CheckablePrintfAttr(Format, TheCall)) {
+    const bool b = Format->getType() == "scanf";
+    if (b || CheckablePrintfAttr(Format, TheCall)) {
       bool HasVAListArg = Format->getFirstArg() == 0;
-      CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
-                           HasVAListArg ? 0 : Format->getFirstArg() - 1);
+      CheckPrintfScanfArguments(TheCall, HasVAListArg,
+                                Format->getFormatIdx() - 1,
+                                HasVAListArg ? 0 : Format->getFirstArg() - 1,
+                                !b);
     }
   }
 
@@ -362,12 +365,13 @@
   if (!Ty->isBlockPointerType())
     return false;
 
-  if (!CheckablePrintfAttr(Format, TheCall))
+  const bool b = Format->getType() == "scanf";
+  if (!b && !CheckablePrintfAttr(Format, TheCall))
     return false;
 
   bool HasVAListArg = Format->getFirstArg() == 0;
-  CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
-                       HasVAListArg ? 0 : Format->getFirstArg() - 1);
+  CheckPrintfScanfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
+                            HasVAListArg ? 0 : Format->getFirstArg() - 1, !b);
 
   return false;
 }
@@ -941,29 +945,31 @@
 // Handle i > 1 ? "x" : "y", recursivelly
 bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall,
                                   bool HasVAListArg,
-                                  unsigned format_idx, unsigned firstDataArg) {
+                                  unsigned format_idx, unsigned firstDataArg,
+                                  bool isPrintf) {
+
   if (E->isTypeDependent() || E->isValueDependent())
     return false;
 
   switch (E->getStmtClass()) {
   case Stmt::ConditionalOperatorClass: {
     const ConditionalOperator *C = cast<ConditionalOperator>(E);
-    return SemaCheckStringLiteral(C->getTrueExpr(), TheCall,
-                                  HasVAListArg, format_idx, firstDataArg)
-        && SemaCheckStringLiteral(C->getRHS(), TheCall,
-                                  HasVAListArg, format_idx, firstDataArg);
+    return SemaCheckStringLiteral(C->getTrueExpr(), TheCall, HasVAListArg,
+                                  format_idx, firstDataArg, isPrintf)
+        && SemaCheckStringLiteral(C->getRHS(), TheCall, HasVAListArg,
+                                  format_idx, firstDataArg, isPrintf);
   }
 
   case Stmt::ImplicitCastExprClass: {
     const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E);
     return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
-                                  format_idx, firstDataArg);
+                                  format_idx, firstDataArg, isPrintf);
   }
 
   case Stmt::ParenExprClass: {
     const ParenExpr *Expr = cast<ParenExpr>(E);
     return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
-                                  format_idx, firstDataArg);
+                                  format_idx, firstDataArg, isPrintf);
   }
 
   case Stmt::DeclRefExprClass: {
@@ -985,7 +991,8 @@
       if (isConstant) {
         if (const Expr *Init = VD->getAnyInitializer())
           return SemaCheckStringLiteral(Init, TheCall,
-                                        HasVAListArg, format_idx, firstDataArg);
+                                        HasVAListArg, format_idx, firstDataArg,
+                                        isPrintf);
       }
 
       // For vprintf* functions (i.e., HasVAListArg==true), we add a
@@ -1025,7 +1032,7 @@
             const Expr *Arg = CE->getArg(ArgIndex - 1);
 
             return SemaCheckStringLiteral(Arg, TheCall, HasVAListArg,
-                                          format_idx, firstDataArg);
+                                          format_idx, firstDataArg, isPrintf);
           }
         }
       }
@@ -1043,8 +1050,8 @@
       StrE = cast<StringLiteral>(E);
 
     if (StrE) {
-      CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx,
-                        firstDataArg);
+      CheckFormatString(StrE, E, TheCall, HasVAListArg, format_idx,
+                        firstDataArg, isPrintf);
       return true;
     }
 
@@ -1069,55 +1076,13 @@
   }
 }
 
-/// CheckPrintfArguments - Check calls to printf (and similar functions) for
-/// correct use of format strings.
-///
-///  HasVAListArg - A predicate indicating whether the printf-like
-///    function is passed an explicit va_arg argument (e.g., vprintf)
-///
-///  format_idx - The index into Args for the format string.
-///
-/// Improper format strings to functions in the printf family can be
-/// the source of bizarre bugs and very serious security holes.  A
-/// good source of information is available in the following paper
-/// (which includes additional references):
-///
-///  FormatGuard: Automatic Protection From printf Format String
-///  Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
-///
-/// TODO:
-/// Functionality implemented:
-///
-///  We can statically check the following properties for string
-///  literal format strings for non v.*printf functions (where the
-///  arguments are passed directly):
-//
-///  (1) Are the number of format conversions equal to the number of
-///      data arguments?
-///
-///  (2) Does each format conversion correctly match the type of the
-///      corresponding data argument?
-///
-/// Moreover, for all printf functions we can:
-///
-///  (3) Check for a missing format string (when not caught by type checking).
-///
-///  (4) Check for no-operation flags; e.g. using "#" with format
-///      conversion 'c'  (TODO)
-///
-///  (5) Check the use of '%n', a major source of security holes.
-///
-///  (6) Check for malformed format conversions that don't specify anything.
-///
-///  (7) Check for empty format strings.  e.g: printf("");
-///
-///  (8) Check that the format string is a wide literal.
-///
-/// All of these checks can be done by parsing the format string.
-///
+/// CheckPrintfScanfArguments - Check calls to printf and scanf (and similar
+/// functions) for correct use of format strings.
 void
-Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg,
-                           unsigned format_idx, unsigned firstDataArg) {
+Sema::CheckPrintfScanfArguments(const CallExpr *TheCall, bool HasVAListArg,
+                                unsigned format_idx, unsigned firstDataArg,
+                                bool isPrintf) {
+
   const Expr *Fn = TheCall->getCallee();
 
   // The way the format attribute works in GCC, the implicit this argument
@@ -1132,9 +1097,9 @@
       --firstDataArg;
   }
 
-  // CHECK: printf-like function is called with no format string.
+  // CHECK: printf/scanf-like function is called with no format string.
   if (format_idx >= TheCall->getNumArgs()) {
-    Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string)
+    Diag(TheCall->getRParenLoc(), diag::warn_missing_format_string)
       << Fn->getSourceRange();
     return;
   }
@@ -1154,23 +1119,24 @@
   // ObjC string uses the same format specifiers as C string, so we can use
   // the same format string checking logic for both ObjC and C strings.
   if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx,
-                             firstDataArg))
+                             firstDataArg, isPrintf))
     return;  // Literal format string found, check done!
 
   // If there are no arguments specified, warn with -Wformat-security, otherwise
   // warn only with -Wformat-nonliteral.
   if (TheCall->getNumArgs() == format_idx+1)
     Diag(TheCall->getArg(format_idx)->getLocStart(),
-         diag::warn_printf_nonliteral_noargs)
+         diag::warn_format_nonliteral_noargs)
       << OrigFormatExpr->getSourceRange();
   else
     Diag(TheCall->getArg(format_idx)->getLocStart(),
-         diag::warn_printf_nonliteral)
+         diag::warn_format_nonliteral)
            << OrigFormatExpr->getSourceRange();
 }
 
 namespace {
-class CheckPrintfHandler : public analyze_printf::FormatStringHandler {
+class CheckFormatHandler : public analyze_format_string::FormatStringHandler {
+protected:
   Sema &S;
   const StringLiteral *FExpr;
   const Expr *OrigFormatExpr;
@@ -1185,7 +1151,7 @@
   bool usesPositionalArgs;
   bool atFirstArg;
 public:
-  CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,
+  CheckFormatHandler(Sema &s, const StringLiteral *fexpr,
                      const Expr *origFormatExpr, unsigned firstDataArg,
                      unsigned numDataArgs, bool isObjCLiteral,
                      const char *beg, bool hasVAListArg,
@@ -1203,55 +1169,33 @@
 
   void DoneProcessing();
 
-  void HandleIncompleteFormatSpecifier(const char *startSpecifier,
-                                       unsigned specifierLen);
-
-  bool
-  HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
-                                   const char *startSpecifier,
-                                   unsigned specifierLen);
-
+  void HandleIncompleteSpecifier(const char *startSpecifier,
+                                 unsigned specifierLen);
+    
   virtual void HandleInvalidPosition(const char *startSpecifier,
                                      unsigned specifierLen,
-                                     analyze_printf::PositionContext p);
+                                     analyze_format_string::PositionContext p);
 
   virtual void HandleZeroPosition(const char *startPos, unsigned posLen);
 
   void HandleNullChar(const char *nullCharacter);
 
-  bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS,
-                             const char *startSpecifier,
-                             unsigned specifierLen);
-private:
+protected:
   SourceRange getFormatStringRange();
-  CharSourceRange getFormatSpecifierRange(const char *startSpecifier,
-                                          unsigned specifierLen);
+  CharSourceRange getSpecifierRange(const char *startSpecifier,
+                                    unsigned specifierLen);
   SourceLocation getLocationOfByte(const char *x);
 
-  bool HandleAmount(const analyze_printf::OptionalAmount &Amt, unsigned k,
-                    const char *startSpecifier, unsigned specifierLen);
-  void HandleInvalidAmount(const analyze_printf::FormatSpecifier &FS,
-                           const analyze_printf::OptionalAmount &Amt,
-                           unsigned type,
-                           const char *startSpecifier, unsigned specifierLen);
-  void HandleFlag(const analyze_printf::FormatSpecifier &FS,
-                  const analyze_printf::OptionalFlag &flag,
-                  const char *startSpecifier, unsigned specifierLen);
-  void HandleIgnoredFlag(const analyze_printf::FormatSpecifier &FS,
-                         const analyze_printf::OptionalFlag &ignoredFlag,
-                         const analyze_printf::OptionalFlag &flag,
-                         const char *startSpecifier, unsigned specifierLen);
-
   const Expr *getDataArg(unsigned i) const;
 };
 }
 
-SourceRange CheckPrintfHandler::getFormatStringRange() {
+SourceRange CheckFormatHandler::getFormatStringRange() {
   return OrigFormatExpr->getSourceRange();
 }
 
-CharSourceRange CheckPrintfHandler::
-getFormatSpecifierRange(const char *startSpecifier, unsigned specifierLen) {
+CharSourceRange CheckFormatHandler::
+getSpecifierRange(const char *startSpecifier, unsigned specifierLen) {
   SourceLocation Start = getLocationOfByte(startSpecifier);
   SourceLocation End   = getLocationOfByte(startSpecifier + specifierLen - 1);
 
@@ -1261,43 +1205,109 @@
   return CharSourceRange::getCharRange(Start, End);
 }
 
-SourceLocation CheckPrintfHandler::getLocationOfByte(const char *x) {
+SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) {
   return S.getLocationOfStringLiteralByte(FExpr, x - Beg);
 }
 
-void CheckPrintfHandler::
-HandleIncompleteFormatSpecifier(const char *startSpecifier,
-                                unsigned specifierLen) {
+void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier,
+                                                   unsigned specifierLen){
   SourceLocation Loc = getLocationOfByte(startSpecifier);
   S.Diag(Loc, diag::warn_printf_incomplete_specifier)
-    << getFormatSpecifierRange(startSpecifier, specifierLen);
+    << getSpecifierRange(startSpecifier, specifierLen);
 }
 
 void
-CheckPrintfHandler::HandleInvalidPosition(const char *startPos, unsigned posLen,
-                                          analyze_printf::PositionContext p) {
+CheckFormatHandler::HandleInvalidPosition(const char *startPos, unsigned posLen,
+                                     analyze_format_string::PositionContext p) {
   SourceLocation Loc = getLocationOfByte(startPos);
-  S.Diag(Loc, diag::warn_printf_invalid_positional_specifier)
-    << (unsigned) p << getFormatSpecifierRange(startPos, posLen);
+  S.Diag(Loc, diag::warn_format_invalid_positional_specifier)
+    << (unsigned) p << getSpecifierRange(startPos, posLen);
 }
 
-void CheckPrintfHandler::HandleZeroPosition(const char *startPos,
+void CheckFormatHandler::HandleZeroPosition(const char *startPos,
                                             unsigned posLen) {
   SourceLocation Loc = getLocationOfByte(startPos);
-  S.Diag(Loc, diag::warn_printf_zero_positional_specifier)
-    << getFormatSpecifierRange(startPos, posLen);
+  S.Diag(Loc, diag::warn_format_zero_positional_specifier)
+    << getSpecifierRange(startPos, posLen);
 }
 
-bool CheckPrintfHandler::
-HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
-                                 const char *startSpecifier,
-                                 unsigned specifierLen) {
+void CheckFormatHandler::HandleNullChar(const char *nullCharacter) {
+  // The presence of a null character is likely an error.
+  S.Diag(getLocationOfByte(nullCharacter),
+         diag::warn_printf_format_string_contains_null_char)
+    << getFormatStringRange();
+}
 
+const Expr *CheckFormatHandler::getDataArg(unsigned i) const {
+  return TheCall->getArg(FirstDataArg + i);
+}
+
+void CheckFormatHandler::DoneProcessing() {
+    // Does the number of data arguments exceed the number of
+    // format conversions in the format string?
+  if (!HasVAListArg) {
+      // Find any arguments that weren't covered.
+    CoveredArgs.flip();
+    signed notCoveredArg = CoveredArgs.find_first();
+    if (notCoveredArg >= 0) {
+      assert((unsigned)notCoveredArg < NumDataArgs);
+      S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(),
+             diag::warn_printf_data_arg_not_used)
+      << getFormatStringRange();
+    }
+  }
+}
+
+//===--- CHECK: Printf format string checking ------------------------------===//
+
+namespace {
+class CheckPrintfHandler : public CheckFormatHandler {
+public:
+  CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,
+                     const Expr *origFormatExpr, unsigned firstDataArg,
+                     unsigned numDataArgs, bool isObjCLiteral,
+                     const char *beg, bool hasVAListArg,
+                     const CallExpr *theCall, unsigned formatIdx)
+  : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg,
+                       numDataArgs, isObjCLiteral, beg, hasVAListArg,
+                       theCall, formatIdx) {}
+  
+  
+  bool HandleInvalidPrintfConversionSpecifier(
+                                      const analyze_printf::PrintfSpecifier &FS,
+                                      const char *startSpecifier,
+                                      unsigned specifierLen);
+  
+  bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+                             const char *startSpecifier,
+                             unsigned specifierLen);
+  
+  bool HandleAmount(const analyze_format_string::OptionalAmount &Amt, unsigned k,
+                    const char *startSpecifier, unsigned specifierLen);
+  void HandleInvalidAmount(const analyze_printf::PrintfSpecifier &FS,
+                           const analyze_printf::OptionalAmount &Amt,
+                           unsigned type,
+                           const char *startSpecifier, unsigned specifierLen);
+  void HandleFlag(const analyze_printf::PrintfSpecifier &FS,
+                  const analyze_printf::OptionalFlag &flag,
+                  const char *startSpecifier, unsigned specifierLen);
+  void HandleIgnoredFlag(const analyze_printf::PrintfSpecifier &FS,
+                         const analyze_printf::OptionalFlag &ignoredFlag,
+                         const analyze_printf::OptionalFlag &flag,
+                         const char *startSpecifier, unsigned specifierLen);
+};  
+}
+
+bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier(
+                                      const analyze_printf::PrintfSpecifier &FS,
+                                      const char *startSpecifier,
+                                      unsigned specifierLen) {
+  
   unsigned argIndex = FS.getArgIndex();
   bool keepGoing = true;
   if (argIndex < NumDataArgs) {
-    // Consider the argument coverered, even though the specifier doesn't
-    // make sense.
+      // Consider the argument coverered, even though the specifier doesn't
+      // make sense.
     CoveredArgs.set(argIndex);
   }
   else {
@@ -1308,32 +1318,21 @@
     // gibberish when trying to match arguments.
     keepGoing = false;
   }
-
+  
   const analyze_printf::ConversionSpecifier &CS =
-    FS.getConversionSpecifier();
+  FS.getConversionSpecifier();
   SourceLocation Loc = getLocationOfByte(CS.getStart());
   S.Diag(Loc, diag::warn_printf_invalid_conversion)
-      << llvm::StringRef(CS.getStart(), CS.getLength())
-      << getFormatSpecifierRange(startSpecifier, specifierLen);
-
+  << llvm::StringRef(CS.getStart(), CS.getLength())
+  << getSpecifierRange(startSpecifier, specifierLen);
+  
   return keepGoing;
 }
 
-void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) {
-  // The presence of a null character is likely an error.
-  S.Diag(getLocationOfByte(nullCharacter),
-         diag::warn_printf_format_string_contains_null_char)
-    << getFormatStringRange();
-}
-
-const Expr *CheckPrintfHandler::getDataArg(unsigned i) const {
-  return TheCall->getArg(FirstDataArg + i);
-}
-
-bool
-CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt,
-                                 unsigned k, const char *startSpecifier,
-                                 unsigned specifierLen) {
+bool CheckPrintfHandler::HandleAmount(
+                               const analyze_format_string::OptionalAmount &Amt,
+                               unsigned k, const char *startSpecifier,
+                               unsigned specifierLen) {
 
   if (Amt.hasDataArgument()) {
     if (!HasVAListArg) {
@@ -1341,7 +1340,7 @@
       if (argIndex >= NumDataArgs) {
         S.Diag(getLocationOfByte(Amt.getStart()),
                diag::warn_printf_asterisk_missing_arg)
-          << k << getFormatSpecifierRange(startSpecifier, specifierLen);
+          << k << getSpecifierRange(startSpecifier, specifierLen);
         // Don't do any more checking.  We will just emit
         // spurious errors.
         return false;
@@ -1363,7 +1362,7 @@
                diag::warn_printf_asterisk_wrong_type)
           << k
           << ATR.getRepresentativeType(S.Context) << T
-          << getFormatSpecifierRange(startSpecifier, specifierLen)
+          << getSpecifierRange(startSpecifier, specifierLen)
           << Arg->getSourceRange();
         // Don't do any more checking.  We will just emit
         // spurious errors.
@@ -1375,7 +1374,7 @@
 }
 
 void CheckPrintfHandler::HandleInvalidAmount(
-                                      const analyze_printf::FormatSpecifier &FS,
+                                      const analyze_printf::PrintfSpecifier &FS,
                                       const analyze_printf::OptionalAmount &Amt,
                                       unsigned type,
                                       const char *startSpecifier,
@@ -1387,8 +1386,8 @@
         diag::warn_printf_nonsensical_optional_amount)
       << type
       << CS.toString()
-      << getFormatSpecifierRange(startSpecifier, specifierLen)
-      << FixItHint::CreateRemoval(getFormatSpecifierRange(Amt.getStart(),
+      << getSpecifierRange(startSpecifier, specifierLen)
+      << FixItHint::CreateRemoval(getSpecifierRange(Amt.getStart(),
           Amt.getConstantLength()));
     break;
 
@@ -1397,12 +1396,12 @@
         diag::warn_printf_nonsensical_optional_amount)
       << type
       << CS.toString()
-      << getFormatSpecifierRange(startSpecifier, specifierLen);
+      << getSpecifierRange(startSpecifier, specifierLen);
     break;
   }
 }
 
-void CheckPrintfHandler::HandleFlag(const analyze_printf::FormatSpecifier &FS,
+void CheckPrintfHandler::HandleFlag(const analyze_printf::PrintfSpecifier &FS,
                                     const analyze_printf::OptionalFlag &flag,
                                     const char *startSpecifier,
                                     unsigned specifierLen) {
@@ -1411,12 +1410,12 @@
   S.Diag(getLocationOfByte(flag.getPosition()),
       diag::warn_printf_nonsensical_flag)
     << flag.toString() << CS.toString()
-    << getFormatSpecifierRange(startSpecifier, specifierLen)
-    << FixItHint::CreateRemoval(getFormatSpecifierRange(flag.getPosition(), 1));
+    << getSpecifierRange(startSpecifier, specifierLen)
+    << FixItHint::CreateRemoval(getSpecifierRange(flag.getPosition(), 1));
 }
 
 void CheckPrintfHandler::HandleIgnoredFlag(
-                                const analyze_printf::FormatSpecifier &FS,
+                                const analyze_printf::PrintfSpecifier &FS,
                                 const analyze_printf::OptionalFlag &ignoredFlag,
                                 const analyze_printf::OptionalFlag &flag,
                                 const char *startSpecifier,
@@ -1425,13 +1424,13 @@
   S.Diag(getLocationOfByte(ignoredFlag.getPosition()),
       diag::warn_printf_ignored_flag)
     << ignoredFlag.toString() << flag.toString()
-    << getFormatSpecifierRange(startSpecifier, specifierLen)
-    << FixItHint::CreateRemoval(getFormatSpecifierRange(
+    << getSpecifierRange(startSpecifier, specifierLen)
+    << FixItHint::CreateRemoval(getSpecifierRange(
         ignoredFlag.getPosition(), 1));
 }
 
 bool
-CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier
+CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier
                                             &FS,
                                           const char *startSpecifier,
                                           unsigned specifierLen) {
@@ -1446,8 +1445,8 @@
   else if (usesPositionalArgs != FS.usesPositionalArg()) {
     // Cannot mix-and-match positional and non-positional arguments.
     S.Diag(getLocationOfByte(CS.getStart()),
-           diag::warn_printf_mix_positional_nonpositional_args)
-      << getFormatSpecifierRange(startSpecifier, specifierLen);
+           diag::warn_format_mix_positional_nonpositional_args)
+      << getSpecifierRange(startSpecifier, specifierLen);
     return false;
   }
 
@@ -1481,7 +1480,8 @@
   // Check for using an Objective-C specific conversion specifier
   // in a non-ObjC literal.
   if (!IsObjCLiteral && CS.isObjCArg()) {
-    return HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen);
+    return HandleInvalidPrintfConversionSpecifier(FS, startSpecifier,
+                                                  specifierLen);
   }
 
   // Check for invalid use of field width
@@ -1522,15 +1522,15 @@
     S.Diag(getLocationOfByte(LM.getStart()),
         diag::warn_printf_nonsensical_length)
       << LM.toString() << CS.toString()
-      << getFormatSpecifierRange(startSpecifier, specifierLen)
-      << FixItHint::CreateRemoval(getFormatSpecifierRange(LM.getStart(),
+      << getSpecifierRange(startSpecifier, specifierLen)
+      << FixItHint::CreateRemoval(getSpecifierRange(LM.getStart(),
           LM.getLength()));
 
   // Are we using '%n'?
   if (CS.getKind() == ConversionSpecifier::OutIntPtrArg) {
     // Issue a warning about this being a possible security issue.
     S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_write_back)
-      << getFormatSpecifierRange(startSpecifier, specifierLen);
+      << getSpecifierRange(startSpecifier, specifierLen);
     // Continue checking the other format specifiers.
     return true;
   }
@@ -1544,12 +1544,12 @@
       S.Diag(getLocationOfByte(CS.getStart()),
              diag::warn_printf_positional_arg_exceeds_data_args)
         << (argIndex+1) << NumDataArgs
-        << getFormatSpecifierRange(startSpecifier, specifierLen);
+        << getSpecifierRange(startSpecifier, specifierLen);
     }
     else {
       S.Diag(getLocationOfByte(CS.getStart()),
              diag::warn_printf_insufficient_data_args)
-        << getFormatSpecifierRange(startSpecifier, specifierLen);
+        << getSpecifierRange(startSpecifier, specifierLen);
     }
 
     // Don't do any more checking.
@@ -1570,7 +1570,7 @@
           return true;
 
     // We may be able to offer a FixItHint if it is a supported type.
-    FormatSpecifier fixedFS = FS;
+    PrintfSpecifier fixedFS = FS;
     bool success = fixedFS.fixType(Ex->getType());
 
     if (success) {
@@ -1582,17 +1582,17 @@
       S.Diag(getLocationOfByte(CS.getStart()),
           diag::warn_printf_conversion_argument_type_mismatch)
         << ATR.getRepresentativeType(S.Context) << Ex->getType()
-        << getFormatSpecifierRange(startSpecifier, specifierLen)
+        << getSpecifierRange(startSpecifier, specifierLen)
         << Ex->getSourceRange()
         << FixItHint::CreateReplacement(
-            getFormatSpecifierRange(startSpecifier, specifierLen),
+            getSpecifierRange(startSpecifier, specifierLen),
             os.str());
     }
     else {
       S.Diag(getLocationOfByte(CS.getStart()),
              diag::warn_printf_conversion_argument_type_mismatch)
         << ATR.getRepresentativeType(S.Context) << Ex->getType()
-        << getFormatSpecifierRange(startSpecifier, specifierLen)
+        << getSpecifierRange(startSpecifier, specifierLen)
         << Ex->getSourceRange();
     }
   }
@@ -1600,54 +1600,150 @@
   return true;
 }
 
-void CheckPrintfHandler::DoneProcessing() {
-  // Does the number of data arguments exceed the number of
-  // format conversions in the format string?
-  if (!HasVAListArg) {
-    // Find any arguments that weren't covered.
-    CoveredArgs.flip();
-    signed notCoveredArg = CoveredArgs.find_first();
-    if (notCoveredArg >= 0) {
-      assert((unsigned)notCoveredArg < NumDataArgs);
-      S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(),
-             diag::warn_printf_data_arg_not_used)
-        << getFormatStringRange();
+//===--- CHECK: Scanf format string checking ------------------------------===//
+
+namespace {  
+class CheckScanfHandler : public CheckFormatHandler {
+public:
+  CheckScanfHandler(Sema &s, const StringLiteral *fexpr,
+                    const Expr *origFormatExpr, unsigned firstDataArg,
+                    unsigned numDataArgs, bool isObjCLiteral,
+                    const char *beg, bool hasVAListArg,
+                    const CallExpr *theCall, unsigned formatIdx)
+  : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg,
+                       numDataArgs, isObjCLiteral, beg, hasVAListArg,
+                       theCall, formatIdx) {}
+  
+  bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
+                            const char *startSpecifier,
+                            unsigned specifierLen);
+};
+}
+
+bool CheckScanfHandler::HandleScanfSpecifier(
+                                       const analyze_scanf::ScanfSpecifier &FS,
+                                       const char *startSpecifier,
+                                       unsigned specifierLen) {
+  
+  using namespace analyze_scanf;
+  using namespace analyze_format_string;  
+
+  const ConversionSpecifier &CS = FS.getConversionSpecifier();
+
+  // FIXME: Handle case where '%' and '*' don't consume an argument.
+  // This needs to be done for the printf case as well.
+  if (atFirstArg) {
+    atFirstArg = false;
+    usesPositionalArgs = FS.usesPositionalArg();
+  }
+  else if (usesPositionalArgs != FS.usesPositionalArg()) {
+    // Cannot mix-and-match positional and non-positional arguments.
+    S.Diag(getLocationOfByte(CS.getStart()),
+           diag::warn_format_mix_positional_nonpositional_args)
+      << getSpecifierRange(startSpecifier, specifierLen);
+    return false;
+  }
+  
+  // Check if the field with is non-zero.
+  const OptionalAmount &Amt = FS.getFieldWidth();
+  if (Amt.getHowSpecified() == OptionalAmount::Constant) {
+    if (Amt.getConstantAmount() == 0) {
+      const CharSourceRange &R = getSpecifierRange(Amt.getStart(),
+                                                   Amt.getConstantLength());
+      S.Diag(getLocationOfByte(Amt.getStart()),
+             diag::warn_scanf_nonzero_width)
+        << R << FixItHint::CreateRemoval(R);
+    }
+  }
+  
+  if (!FS.consumesDataArgument()) {
+    // FIXME: Technically specifying a precision or field width here
+    // makes no sense.  Worth issuing a warning at some point.
+    return true;
+  }
+  
+  // Consume the argument.
+  unsigned argIndex = FS.getArgIndex();
+  if (argIndex < NumDataArgs) {
+      // The check to see if the argIndex is valid will come later.
+      // We set the bit here because we may exit early from this
+      // function if we encounter some other error.
+    CoveredArgs.set(argIndex);
+  }
+  
+  // FIXME: Check that the length modifier is valid with the given
+  // conversion specifier.
+  
+  // The remaining checks depend on the data arguments.
+  if (HasVAListArg)
+    return true;
+  
+  if (argIndex >= NumDataArgs) {
+    if (FS.usesPositionalArg())  {
+      S.Diag(getLocationOfByte(CS.getStart()),
+             diag::warn_printf_positional_arg_exceeds_data_args)
+      << (argIndex+1) << NumDataArgs
+      << getSpecifierRange(startSpecifier, specifierLen);
     }
+    else {
+      S.Diag(getLocationOfByte(CS.getStart()),
+             diag::warn_printf_insufficient_data_args)
+      << getSpecifierRange(startSpecifier, specifierLen);
+    }
+    
+    // Don't do any more checking.
+    return false;
   }
+  
+  // FIXME: Check that the argument type matches the format specifier.
+  
+  return true;
 }
 
-void Sema::CheckPrintfString(const StringLiteral *FExpr,
+void Sema::CheckFormatString(const StringLiteral *FExpr,
                              const Expr *OrigFormatExpr,
                              const CallExpr *TheCall, bool HasVAListArg,
-                             unsigned format_idx, unsigned firstDataArg) {
-
+                             unsigned format_idx, unsigned firstDataArg,
+                             bool isPrintf) {
+  
   // CHECK: is the format string a wide literal?
   if (FExpr->isWide()) {
     Diag(FExpr->getLocStart(),
-         diag::warn_printf_format_string_is_wide_literal)
+         diag::warn_format_string_is_wide_literal)
     << OrigFormatExpr->getSourceRange();
     return;
   }
-
+  
   // Str - The format string.  NOTE: this is NOT null-terminated!
   const char *Str = FExpr->getStrData();
-
+  
   // CHECK: empty format string?
   unsigned StrLen = FExpr->getByteLength();
-
+  
   if (StrLen == 0) {
-    Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string)
+    Diag(FExpr->getLocStart(), diag::warn_empty_format_string)
     << OrigFormatExpr->getSourceRange();
     return;
   }
-
-  CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg,
-                       TheCall->getNumArgs() - firstDataArg,
-                       isa<ObjCStringLiteral>(OrigFormatExpr), Str,
-                       HasVAListArg, TheCall, format_idx);
-
-  if (!analyze_printf::ParseFormatString(H, Str, Str + StrLen))
-    H.DoneProcessing();
+  
+  if (isPrintf) {
+    CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg,
+                         TheCall->getNumArgs() - firstDataArg,
+                         isa<ObjCStringLiteral>(OrigFormatExpr), Str,
+                         HasVAListArg, TheCall, format_idx);
+  
+    if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen))
+      H.DoneProcessing();
+  }
+  else {
+    CheckScanfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg,
+                        TheCall->getNumArgs() - firstDataArg,
+                        isa<ObjCStringLiteral>(OrigFormatExpr), Str,
+                        HasVAListArg, TheCall, format_idx);
+    
+    if (!analyze_format_string::ParseScanfString(H, Str, Str + StrLen))
+      H.DoneProcessing();
+  }
 }
 
 //===--- CHECK: Return Address of Stack Variable --------------------------===//





More information about the cfe-commits mailing list