[clang] [clang] Implement __attribute__((format_matches)) (PR #116708)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 18 14:53:11 PST 2024
https://github.com/apple-fcloutier created https://github.com/llvm/llvm-project/pull/116708
This implements ``__attribute__((format_matches))``, as described in the RFC: https://discourse.llvm.org/t/rfc-format-attribute-attribute-format-like/83076
The ``format`` attribute only allows the compiler to check that a format string matches its arguments. If the format string is passed independently of its arguments, there is no way to have the compiler check it. ``format_matches(flavor, fmtidx, example)`` allows the compiler to check format strings against the ``example`` format string instead of against format arguments. See the changes to AttrDocs.td in this diff for more information.
Implementation-wise, this change subclasses CheckPrintfHandler and CheckScanfHandler to allow them to collect specifiers into arrays, and implements comparing that two specifiers are equivalent. `checkFormatStringExpr` gets a new `ReferenceFormatString` argument that is piped down when calling a function with the `format_matches` attribute (and is `nullptr` otherwise); this is the string that the actual format string is compared against.
Although this change does not enable -Wformat-nonliteral by default, IMO, all the pieces are now in place such that it could be.
>From 11aa136f4e9e2d3d3dc133dec37af62714ed607b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Cloutier?= <fcloutier at apple.com>
Date: Tue, 12 Nov 2024 10:39:18 -0800
Subject: [PATCH] [clang] Implement __attribute__((format_matches))
This implements ``__attribute__((format_matches))``, as described in the RFC:
https://discourse.llvm.org/t/rfc-format-attribute-attribute-format-like/83076
The ``format`` attribute only allows the compiler to check that a format
string matches its arguments. If the format string is passed independently
of its arguments, there is no way to have the compiler check it.
``format_matches(flavor, fmtidx, example)`` allows the compiler to check
format strings against the ``example`` format string instead of against
format arguments. See the changes to AttrDocs.td in this diff for more
information.
Implementation-wise, this change subclasses CheckPrintfHandler and
CheckScanfHandler to allow them to collect specifiers into arrays,
and implements comparing that two specifiers are equivalent.
Radar-Id: 84936554
---
clang/docs/ReleaseNotes.rst | 44 +
clang/include/clang/AST/FormatString.h | 3 +-
clang/include/clang/Basic/Attr.td | 10 +
clang/include/clang/Basic/AttrDocs.td | 97 +++
.../clang/Basic/DiagnosticSemaKinds.td | 21 +
clang/include/clang/Sema/Sema.h | 37 +-
clang/lib/AST/AttrImpl.cpp | 5 +
clang/lib/AST/FormatString.cpp | 91 +++
clang/lib/Sema/SemaChecking.cpp | 754 ++++++++++++++----
clang/lib/Sema/SemaDeclAttr.cpp | 120 ++-
clang/lib/Sema/SemaObjC.cpp | 3 +-
clang/test/Sema/format-string-matches.c | 122 +++
12 files changed, 1121 insertions(+), 186 deletions(-)
create mode 100644 clang/test/Sema/format-string-matches.c
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 0efe62f1804cd0a..66e4758fe89e243 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -357,6 +357,50 @@ Non-comprehensive list of changes in this release
- ``__builtin_reduce_add`` function can now be used in constant expressions.
+- There is a new ``format_matches`` attribute to complement the existing
+ ``format`` attribute. ``format_matches`` allows the compiler to verify that
+ a format string argument is equivalent to a reference format string: it is
+ useful when a function accepts a format string without its accompanying
+ arguments to format. For instance:
+
+ .. code-block:: c
+
+ static int status_code;
+ static const char *status_string;
+
+ void print_status(const char *fmt) {
+ fprintf(stderr, fmt, status_code, status_string);
+ // ^ warning: format string is not a string literal [-Wformat-nonliteral]
+ }
+
+ void stuff(void) {
+ print_status("%s (%#08x)\n");
+ // order of %s and %x is swapped but there is no diagnostic
+ }
+
+ Before the introducion of ``format_matches``, this code cannot be verified
+ at compile-time. ``format_matches`` plugs that hole:
+
+ .. code-block:: c
+
+ __attribute__((format_matches(printf, 1, "%x %s")))
+ void print_status(const char *fmt) {
+ fprintf(stderr, fmt, status_code, status_string);
+ // ^ `fmt` verified as if it was "%x %s" here; no longer triggers
+ // -Wformat-nonliteral, would warn if arguments did not match "%x %s"
+ }
+
+ void stuff(void) {
+ print_status("%s (%#08x)\n");
+ // warning: format specifier 's' is incompatible with 'x'
+ // warning: format specifier 'x' is incompatible with 's'
+ }
+
+ Like with ``format``, the first argument is the format string flavor and the
+ second argument is the index of the format string parameter.
+ ``format_matches`` accepts an example valid format string as its third
+ argument. For more information, see the Clang attributes documentation.
+
New Compiler Flags
------------------
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index a074dd23e2ad4c7..3560766433fe220 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -292,7 +292,7 @@ class ArgType {
};
private:
- const Kind K;
+ Kind K;
QualType T;
const char *Name = nullptr;
bool Ptr = false;
@@ -338,6 +338,7 @@ class ArgType {
}
MatchKind matchesType(ASTContext &C, QualType argTy) const;
+ MatchKind matchesArgType(ASTContext &C, const ArgType &other) const;
QualType getRepresentativeType(ASTContext &C) const;
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 6035a563d5fce77..7723e631aa2529c 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1807,6 +1807,16 @@ def Format : InheritableAttr {
let Documentation = [FormatDocs];
}
+def FormatMatches : InheritableAttr {
+ let Spellings = [GCC<"format_matches">];
+ let Args = [IdentifierArgument<"Type">, IntArgument<"FormatIdx">, ExprArgument<"ExpectedFormat">];
+ let AdditionalMembers = [{
+ StringLiteral *getFormatString() const;
+ }];
+ let Subjects = SubjectList<[ObjCMethod, Block, HasFunctionProto]>;
+ let Documentation = [FormatMatchesDocs];
+}
+
def FormatArg : InheritableAttr {
let Spellings = [GCC<"format_arg">];
let Args = [ParamIdxArgument<"FormatIdx">];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 2fdceca163ee63b..2ca1bf1a454271a 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -3738,6 +3738,103 @@ behavior of the program is undefined.
}];
}
+def FormatMatchesDocs : Documentation {
+ let Category = DocCatFunction;
+ let Content = [{
+
+The ``format`` attribute is the basis for the enforcement of diagnostics in the
+``-Wformat`` family, but it only handles the case where the format string is
+passed along with the arguments it is going to format. It cannot handle the case
+where the format string and the format arguments are passed separately from each
+other. For instance:
+
+.. code-block:: c
+
+ static const char *first_name;
+ static double todays_temperature;
+ static int wind_speed;
+
+ void say_hi(const char *fmt) {
+ printf(fmt, first_name, todays_temperature); // warning: format string is not a string literal
+ printf(fmt, first_name, wind_speed); // warning: format string is not a string literal
+ }
+
+ int main() {
+ say_hi("hello %s, it is %g degrees outside");
+ say_hi("hello %s, it is %d degrees outside!"); // no diagnostic
+ }
+
+In this example, ``fmt`` is expected to format a ``const char *`` and a
+``double``, but these values are not passed to ``say_hi``. Without the
+``format`` attribute (which cannot apply in this case), the -Wformat-nonliteral
+diagnostic triggers in the body of ``say_hi``, and incorrect ``say_hi`` call
+sites do not trigger a diagnostic.
+
+To complement the ``format`` attribute, Clang also defines the
+``format_matches`` attribute. Its syntax is similar to the ``format``
+attribute's, but instead of taking the index of the first formatted value
+argument, it takes a C string literal with the expected specifiers:
+
+.. code-block:: c
+
+ static const char *first_name;
+ static double todays_temperature;
+ static int wind_speed;
+
+ __attribute__((__format_matches__(printf, 1, "%s %g")))
+ void say_hi(const char *fmt) {
+ printf(fmt, first_name, todays_temperature); // no dignostic
+ printf(fmt, first_name, wind_speed); // warning: format specifies type 'int' but the argument has type 'double'
+ }
+
+ int main() {
+ say_hi("hello %s, it is %g degrees outside");
+ say_hi("it is %g degrees outside, have a good day %s!");
+ // warning: format specifies 'double' where 'const char *' is required
+ // warning: format specifies 'const char *' where 'double' is required
+ }
+
+The third argument to ``format_matches`` is expected to evaluate to a **C string
+literal** even when the format string would normally be a different type for the
+given flavor, like a ``CFStringRef`` or a ``NSString *``.
+
+In the implementation of a function with the ``format_matches`` attribute,
+format verification works as if the format string was identical to the one
+specified in the attribute.
+
+At the call sites of functions with the ``format_matches`` attribute, format
+verification instead compares the two format strings to evaluate their
+equivalence. Each format flavor defines equivalence between format specifiers.
+Generally speaking, two specifiers are equivalent if they format the same type.
+For instance, in the ``printf`` flavor, ``%2i`` and ``%-0.5d`` are compatible.
+When ``-Wformat-signedness`` is disabled, ``%d`` and ``%u`` are compatible. For
+a negative example, ``%ld`` is incompatible with ``%d``.
+
+Do note the following un-obvious cases:
+
+* Passing ``NULL`` as the format string is accepted without diagnostics.
+* When the format string is not NULL, it cannot _miss_ specifiers, even in
+ trailing positions. For instance, ``%d`` is not accepted when the required
+ format is ``%d %d %d``.
+* Specifiers for integers as small or smaller than ``int`` (such as ``%hhd``)
+ are all mutually compatible because standard argument promotion ensures that
+ integers are at least the size of ``int`` when passed as variadic arguments.
+ With ``-Wformat-signedness``, mixing specifier for types with a different
+ signedness still results in a diagnostic.
+* Format strings expecting a variable modifier (such as ``%*s``) are
+ incompatible with format strings that would itemize the variable modifiers
+ (such as ``%i %s``), even if the two specify ABI-compatible argument lists.
+* All pointer specifiers, modifiers aside, are mutually incompatible. For
+ instance, ``%s`` is not compatible with ``%p``, and ``%p`` is not compatible
+ with ``%n``, and ``%hhn`` is incompatible with ``%s``, even if the pointers
+ are ABI-compatible or identical on the selected platform. However, ``%0.5s``
+ is compatible with ``%s``, since the difference only exists in modifier flags.
+ This is not overridable with ``-Wformat-pedantic`` or its inverse, which
+ control similar behavior in ``-Wformat``.
+
+ }];
+}
+
def FlagEnumDocs : Documentation {
let Category = DocCatDecl;
let Content = [{
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 3caf471d3037f9e..0e7ffc95f8b53ef 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7687,6 +7687,7 @@ def warn_format_nonliteral_noargs : Warning<
def warn_format_nonliteral : Warning<
"format string is not a string literal">,
InGroup<FormatNonLiteral>, DefaultIgnore;
+def err_format_nonliteral : Error<"format string is not a string literal">;
def err_unexpected_interface : Error<
"unexpected interface name %0: expected expression">;
@@ -9998,6 +9999,8 @@ def note_previous_declaration_as : Note<
def warn_printf_insufficient_data_args : Warning<
"more '%%' conversions than data arguments">, InGroup<FormatInsufficientArgs>;
+def warn_format_cmp_insufficient_specifiers : Warning<
+ "fewer specifiers in format string than expected">, InGroup<FormatInsufficientArgs>;
def warn_printf_data_arg_not_used : Warning<
"data argument not used by format string">, InGroup<FormatExtraArgs>;
def warn_format_invalid_conversion : Warning<
@@ -10115,6 +10118,24 @@ def note_format_fix_specifier : Note<"did you mean to use '%0'?">;
def note_printf_c_str: Note<"did you mean to call the %0 method?">;
def note_format_security_fixit: Note<
"treat the string as an argument to avoid this">;
+def warn_format_cmp_role_mismatch : Warning<
+ "format argument is %select{a value|an indirect field width|an indirect "
+ "precision|an auxiliary value}0, but it should be %select{a value|an indirect "
+ "field width|an indirect precision|an auxiliary value}1">, InGroup<Format>;
+def warn_format_cmp_modifierfor_mismatch : Warning<
+ "format argument modifies specifier at position %0, but it should modify "
+ "specifier at position %1">, InGroup<Format>;
+def warn_format_cmp_sensitivity_mismatch : Warning<
+ "argument sensitivity is %select{unspecified|private|public|sensitive}0, but "
+ "it should be %select{unspecified|private|public|sensitive}1">, InGroup<Format>;
+def warn_format_cmp_specifier_mismatch : Warning<
+ "format specifier '%0' is incompatible with '%1'">, InGroup<Format>;
+def warn_format_cmp_specifier_sign_mismatch : Warning<
+ "signedness of format specifier '%0' is incompatible with '%1'">, InGroup<Format>;
+def warn_format_cmp_specifier_mismatch_pedantic : Extension<
+ warn_format_cmp_specifier_sign_mismatch.Summary>, InGroup<FormatPedantic>;
+def note_format_cmp_with : Note<
+ "comparing with this %select{specifier|format string}0">;
def warn_null_arg : Warning<
"null passed to a callee that requires a non-null argument">,
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index d6f3508a5243f36..22df0bc2923db4e 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2150,6 +2150,7 @@ class Sema final : public SemaBase {
FAPK_Fixed, // values to format are fixed (no C-style variadic arguments)
FAPK_Variadic, // values to format are passed as variadic arguments
FAPK_VAList, // values to format are passed in a va_list
+ FAPK_Elsewhere, // values to format are not passed to this function
};
// Used to grab the relevant information from a FormatAttr and a
@@ -2160,12 +2161,15 @@ class Sema final : public SemaBase {
FormatArgumentPassingKind ArgPassingKind;
};
- /// Given a FunctionDecl's FormatAttr, attempts to populate the
- /// FomatStringInfo parameter with the FormatAttr's correct format_idx and
- /// firstDataArg. Returns true when the format fits the function and the
- /// FormatStringInfo has been populated.
- static bool getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember,
- bool IsVariadic, FormatStringInfo *FSI);
+ /// Given a function and its FormatAttr or FormatMatchesAttr info, attempts to
+ /// populate the FomatStringInfo parameter with the attribute's correct
+ /// format_idx and firstDataArg. Returns true when the format fits the
+ /// function and the FormatStringInfo has been populated.
+ static bool getFormatStringInfo(const Decl *Function, unsigned FormatIdx,
+ unsigned FirstArg, FormatStringInfo *FSI);
+ static bool getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg,
+ bool IsCXXMember, bool IsVariadic,
+ FormatStringInfo *FSI);
// Used by C++ template instantiation.
ExprResult BuiltinShuffleVector(CallExpr *TheCall);
@@ -2188,7 +2192,9 @@ class Sema final : public SemaBase {
FST_Syslog,
FST_Unknown
};
+ static FormatStringType GetFormatStringType(StringRef FormatFlavor);
static FormatStringType GetFormatStringType(const FormatAttr *Format);
+ static FormatStringType GetFormatStringType(const FormatMatchesAttr *Format);
bool FormatStringHasSArg(const StringLiteral *FExpr);
@@ -2535,11 +2541,17 @@ class Sema final : public SemaBase {
VariadicCallType CallType, SourceLocation Loc,
SourceRange Range,
llvm::SmallBitVector &CheckedVarArgs);
+ bool CheckFormatString(const FormatMatchesAttr *Format,
+ ArrayRef<const Expr *> Args, bool IsCXXMember,
+ VariadicCallType CallType, SourceLocation Loc,
+ SourceRange Range,
+ llvm::SmallBitVector &CheckedVarArgs);
bool CheckFormatArguments(ArrayRef<const Expr *> Args,
- FormatArgumentPassingKind FAPK, unsigned format_idx,
- unsigned firstDataArg, FormatStringType Type,
- VariadicCallType CallType, SourceLocation Loc,
- SourceRange range,
+ FormatArgumentPassingKind FAPK,
+ const StringLiteral *ReferenceFormatString,
+ unsigned format_idx, unsigned firstDataArg,
+ FormatStringType Type, VariadicCallType CallType,
+ SourceLocation Loc, SourceRange range,
llvm::SmallBitVector &CheckedVarArgs);
void CheckInfNaNFunction(const CallExpr *Call, const FunctionDecl *FDecl);
@@ -4527,6 +4539,11 @@ class Sema final : public SemaBase {
FormatAttr *mergeFormatAttr(Decl *D, const AttributeCommonInfo &CI,
IdentifierInfo *Format, int FormatIdx,
int FirstArg);
+ FormatMatchesAttr *mergeFormatMatchesAttr(Decl *D,
+ const AttributeCommonInfo &CI,
+ IdentifierInfo *Format,
+ int FormatIdx,
+ StringLiteral *FormatStr);
/// AddAlignedAttr - Adds an aligned attribute to a particular declaration.
void AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E,
diff --git a/clang/lib/AST/AttrImpl.cpp b/clang/lib/AST/AttrImpl.cpp
index f198a9acf8481fd..5a169f815637f6f 100644
--- a/clang/lib/AST/AttrImpl.cpp
+++ b/clang/lib/AST/AttrImpl.cpp
@@ -270,4 +270,9 @@ unsigned AlignedAttr::getAlignment(ASTContext &Ctx) const {
return Ctx.getTargetDefaultAlignForAttributeAligned();
}
+StringLiteral *FormatMatchesAttr::getFormatString() const {
+ // This cannot go in headers because StringLiteral and Expr may be incomplete.
+ return cast<StringLiteral>(getExpectedFormat());
+}
+
#include "clang/AST/AttrImpl.inc"
diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index e892c1592df9869..5d3b56fc4e71377 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -595,6 +595,97 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const {
llvm_unreachable("Invalid ArgType Kind!");
}
+static analyze_format_string::ArgType::MatchKind
+integerTypeMatch(ASTContext &C, QualType A, QualType B, bool CheckSign) {
+ using MK = analyze_format_string::ArgType::MatchKind;
+
+ uint64_t IntSize = C.getTypeSize(C.IntTy);
+ uint64_t ASize = C.getTypeSize(A);
+ uint64_t BSize = C.getTypeSize(B);
+ if (std::max(ASize, IntSize) != std::max(BSize, IntSize))
+ return MK::NoMatch;
+ if (CheckSign && A->isSignedIntegerType() != B->isSignedIntegerType())
+ return MK::NoMatchSignedness;
+ if (ASize != BSize)
+ return MK::MatchPromotion;
+ return MK::Match;
+}
+
+analyze_format_string::ArgType::MatchKind
+ArgType::matchesArgType(ASTContext &C, const ArgType &Other) const {
+ using AK = analyze_format_string::ArgType::Kind;
+
+ // Per matchesType.
+ if (K == AK::InvalidTy || Other.K == AK::InvalidTy)
+ return NoMatch;
+ if (K == AK::UnknownTy || Other.K == AK::UnknownTy)
+ return Match;
+
+ // Handle whether either (or both, or neither) sides has Ptr set,
+ // in addition to whether either (or both, or neither) sides is a SpecificTy
+ // that is a pointer.
+ ArgType Left = *this;
+ bool LeftWasPointer = false;
+ ArgType Right = Other;
+ bool RightWasPointer = false;
+ if (Left.Ptr) {
+ Left.Ptr = false;
+ LeftWasPointer = true;
+ } else if (Left.K == AK::SpecificTy && Left.T->isPointerType()) {
+ Left.T = Left.T->getPointeeType();
+ LeftWasPointer = true;
+ }
+ if (Right.Ptr) {
+ Right.Ptr = false;
+ RightWasPointer = true;
+ } else if (Right.K == AK::SpecificTy && Right.T->isPointerType()) {
+ Right.T = Right.T->getPointeeType();
+ RightWasPointer = true;
+ }
+
+ if (LeftWasPointer != RightWasPointer)
+ return NoMatch;
+
+ // Ensure that if at least one side is a SpecificTy, then Left is a
+ // SpecificTy.
+ if (Right.K == AK::SpecificTy)
+ std::swap(Left, Right);
+
+ if (Left.K == AK::SpecificTy) {
+ if (Right.K == AK::SpecificTy) {
+ auto Canon1 = C.getCanonicalType(Left.T);
+ auto Canon2 = C.getCanonicalType(Right.T);
+ if (Canon1 == Canon2)
+ return Match;
+
+ auto *BT1 = QualType(Canon1)->getAs<BuiltinType>();
+ auto *BT2 = QualType(Canon2)->getAs<BuiltinType>();
+ if (BT1 == nullptr || BT2 == nullptr)
+ return NoMatch;
+ if (BT1 == BT2)
+ return Match;
+
+ if (!LeftWasPointer && BT1->isInteger() && BT2->isInteger())
+ return integerTypeMatch(C, Canon1, Canon2, true);
+ return NoMatch;
+ } else if (Right.K == AK::AnyCharTy) {
+ if (!LeftWasPointer && Left.T->isIntegerType())
+ return integerTypeMatch(C, Left.T, C.CharTy, false);
+ return NoMatch;
+ } else if (Right.K == AK::WIntTy) {
+ if (!LeftWasPointer && Left.T->isIntegerType())
+ return integerTypeMatch(C, Left.T, C.WIntTy, true);
+ return NoMatch;
+ }
+ // It's hypothetically possible to create an AK::SpecificTy ArgType
+ // that matches another kind of ArgType, but in practice Clang doesn't
+ // do that, so ignore that case.
+ return NoMatch;
+ }
+
+ return Left.K == Right.K ? Match : NoMatch;
+}
+
ArgType ArgType::makeVectorType(ASTContext &C, unsigned NumElts) const {
// Check for valid vector element types.
if (T.isNull())
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 2d4a7cd287b70d7..27fd147fda7708b 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3016,17 +3016,33 @@ bool Sema::ValueIsRunOfOnes(CallExpr *TheCall, unsigned ArgNum) {
<< ArgNum << Arg->getSourceRange();
}
-bool Sema::getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember,
- bool IsVariadic, FormatStringInfo *FSI) {
- if (Format->getFirstArg() == 0)
+bool Sema::getFormatStringInfo(const Decl *D, unsigned FormatIdx,
+ unsigned FirstArg, FormatStringInfo *FSI) {
+ bool IsCXXMember = false;
+ if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
+ IsCXXMember = MD->isInstance();
+ bool IsVariadic = false;
+ if (const FunctionType *FnTy = D->getFunctionType())
+ IsVariadic = cast<FunctionProtoType>(FnTy)->isVariadic();
+ else if (const auto *BD = dyn_cast<BlockDecl>(D))
+ IsVariadic = BD->isVariadic();
+ else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D))
+ IsVariadic = OMD->isVariadic();
+
+ return getFormatStringInfo(FormatIdx, FirstArg, IsCXXMember, IsVariadic, FSI);
+}
+
+bool Sema::getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg,
+ bool IsCXXMember, bool IsVariadic,
+ FormatStringInfo *FSI) {
+ if (FirstArg == 0)
FSI->ArgPassingKind = FAPK_VAList;
else if (IsVariadic)
FSI->ArgPassingKind = FAPK_Variadic;
else
FSI->ArgPassingKind = FAPK_Fixed;
- FSI->FormatIdx = Format->getFormatIdx() - 1;
- FSI->FirstDataArg =
- FSI->ArgPassingKind == FAPK_VAList ? 0 : Format->getFirstArg() - 1;
+ FSI->FormatIdx = FormatIdx - 1;
+ FSI->FirstDataArg = FSI->ArgPassingKind == FAPK_VAList ? 0 : FirstArg - 1;
// The way the format attribute works in GCC, the implicit this argument
// of member functions is counted. However, it doesn't appear in our own
@@ -3233,10 +3249,15 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto,
// Printf and scanf checking.
llvm::SmallBitVector CheckedVarArgs;
if (FDecl) {
- for (const auto *I : FDecl->specific_attrs<FormatAttr>()) {
+ for (const auto *I : FDecl->specific_attrs<FormatMatchesAttr>()) {
// Only create vector if there are format attributes.
CheckedVarArgs.resize(Args.size());
+ CheckFormatString(I, Args, IsMemberFunction, CallType, Loc, Range,
+ CheckedVarArgs);
+ }
+ for (const auto *I : FDecl->specific_attrs<FormatAttr>()) {
+ CheckedVarArgs.resize(Args.size());
CheckFormatArguments(I, Args, IsMemberFunction, CallType, Loc, Range,
CheckedVarArgs);
}
@@ -5370,7 +5391,7 @@ bool Sema::BuiltinOSLogFormat(CallExpr *TheCall) {
llvm::SmallBitVector CheckedVarArgs(NumArgs, false);
ArrayRef<const Expr *> Args(TheCall->getArgs(), TheCall->getNumArgs());
bool Success = CheckFormatArguments(
- Args, FAPK_Variadic, FormatIdx, FirstDataArg, FST_OSLog,
+ Args, FAPK_Variadic, nullptr, FormatIdx, FirstDataArg, FST_OSLog,
VariadicFunction, TheCall->getBeginLoc(), SourceRange(),
CheckedVarArgs);
if (!Success)
@@ -5727,13 +5748,13 @@ class FormatStringLiteral {
const StringLiteral *FExpr;
int64_t Offset;
- public:
+public:
FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
: FExpr(fexpr), Offset(Offset) {}
- StringRef getString() const {
- return FExpr->getString().drop_front(Offset);
- }
+ const StringLiteral *getFormatString() const { return FExpr; }
+
+ StringRef getString() const { return FExpr->getString().drop_front(Offset); }
unsigned getByteLength() const {
return FExpr->getByteLength() - getCharByteWidth() * Offset;
@@ -5771,7 +5792,8 @@ class FormatStringLiteral {
} // namespace
static void CheckFormatString(
- Sema &S, const FormatStringLiteral *FExpr, const Expr *OrigFormatExpr,
+ Sema &S, const FormatStringLiteral *FExpr,
+ const StringLiteral *ReferenceFormatString, const Expr *OrigFormatExpr,
ArrayRef<const Expr *> Args, Sema::FormatArgumentPassingKind APK,
unsigned format_idx, unsigned firstDataArg, Sema::FormatStringType Type,
bool inFunctionCall, Sema::VariadicCallType CallType,
@@ -5785,14 +5807,13 @@ static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
// If this function returns false on the arguments to a function expecting a
// format string, we will usually need to emit a warning.
// True string literals are then checked by CheckFormatString.
-static StringLiteralCheckType
-checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
- Sema::FormatArgumentPassingKind APK, unsigned format_idx,
- unsigned firstDataArg, Sema::FormatStringType Type,
- Sema::VariadicCallType CallType, bool InFunctionCall,
- llvm::SmallBitVector &CheckedVarArgs,
- UncoveredArgHandler &UncoveredArg, llvm::APSInt Offset,
- bool IgnoreStringsWithoutSpecifiers = false) {
+static StringLiteralCheckType checkFormatStringExpr(
+ Sema &S, const StringLiteral *ReferenceFormatString, const Expr *E,
+ ArrayRef<const Expr *> Args, Sema::FormatArgumentPassingKind APK,
+ unsigned format_idx, unsigned firstDataArg, Sema::FormatStringType Type,
+ Sema::VariadicCallType CallType, bool InFunctionCall,
+ llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
+ llvm::APSInt Offset, bool IgnoreStringsWithoutSpecifiers = false) {
if (S.isConstantEvaluatedContext())
return SLCT_NotALiteral;
tryAgain:
@@ -5814,10 +5835,10 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
case Stmt::InitListExprClass:
// Handle expressions like {"foobar"}.
if (const clang::Expr *SLE = maybeConstEvalStringLiteral(S.Context, E)) {
- return checkFormatStringExpr(S, SLE, Args, APK, format_idx, firstDataArg,
- Type, CallType, /*InFunctionCall*/ false,
- CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ return checkFormatStringExpr(
+ S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
+ Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
+ UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
}
return SLCT_NotALiteral;
case Stmt::BinaryConditionalOperatorClass:
@@ -5849,19 +5870,19 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
if (!CheckLeft)
Left = SLCT_UncheckedLiteral;
else {
- Left = checkFormatStringExpr(S, C->getTrueExpr(), Args, APK, format_idx,
- firstDataArg, Type, CallType, InFunctionCall,
- CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ Left = checkFormatStringExpr(
+ S, ReferenceFormatString, C->getTrueExpr(), Args, APK, format_idx,
+ firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
+ UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
if (Left == SLCT_NotALiteral || !CheckRight) {
return Left;
}
}
StringLiteralCheckType Right = checkFormatStringExpr(
- S, C->getFalseExpr(), Args, APK, format_idx, firstDataArg, Type,
- CallType, InFunctionCall, CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ S, ReferenceFormatString, C->getFalseExpr(), Args, APK, format_idx,
+ firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
+ UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
return (CheckLeft && Left < Right) ? Left : Right;
}
@@ -5911,7 +5932,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
Init = InitList->getInit(0)->IgnoreParenImpCasts();
}
return checkFormatStringExpr(
- S, Init, Args, APK, format_idx, firstDataArg, Type, CallType,
+ S, ReferenceFormatString, Init, Args, APK, format_idx,
+ firstDataArg, Type, CallType,
/*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, Offset);
}
}
@@ -5932,7 +5954,17 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
// ...
// }
//
- // Another interaction that we need to support is calling a variadic
+ // Another interaction that we need to support is using a format string
+ // specified by the format_matches attribute:
+ //
+ // __attribute__((format_matches(printf, 1, "%s %d")))
+ // void logmessage(char const *fmt, const char *a, int b) {
+ // printf(fmt, a, b); /* do not emit a warning about "fmt" */
+ // printf(fmt, 123.4); /* emit warnings that "%s %d" is incompatible */
+ // ...
+ // }
+ //
+ // Yet another interaction that we need to support is calling a variadic
// format function from a format function that has fixed arguments. For
// instance:
//
@@ -5955,39 +5987,41 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
//
if (const auto *PV = dyn_cast<ParmVarDecl>(VD)) {
if (const auto *D = dyn_cast<Decl>(PV->getDeclContext())) {
- for (const auto *PVFormat : D->specific_attrs<FormatAttr>()) {
- bool IsCXXMember = false;
- if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
- IsCXXMember = MD->isInstance();
-
- bool IsVariadic = false;
- if (const FunctionType *FnTy = D->getFunctionType())
- IsVariadic = cast<FunctionProtoType>(FnTy)->isVariadic();
- else if (const auto *BD = dyn_cast<BlockDecl>(D))
- IsVariadic = BD->isVariadic();
- else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D))
- IsVariadic = OMD->isVariadic();
+ for (const auto *PVFormatMatches :
+ D->specific_attrs<FormatMatchesAttr>()) {
+ Sema::FormatStringInfo CallerFSI;
+ if (!Sema::getFormatStringInfo(D, PVFormatMatches->getFormatIdx(),
+ 0, &CallerFSI))
+ continue;
+ if (PV->getFunctionScopeIndex() == CallerFSI.FormatIdx)
+ return checkFormatStringExpr(
+ S, nullptr, PVFormatMatches->getFormatString(), Args, APK,
+ format_idx, firstDataArg, Type, CallType,
+ /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg,
+ Offset, IgnoreStringsWithoutSpecifiers);
+ }
+ for (const auto *PVFormat : D->specific_attrs<FormatAttr>()) {
Sema::FormatStringInfo CallerFSI;
- if (Sema::getFormatStringInfo(PVFormat, IsCXXMember, IsVariadic,
- &CallerFSI)) {
- // We also check if the formats are compatible.
- // We can't pass a 'scanf' string to a 'printf' function.
- if (PV->getFunctionScopeIndex() == CallerFSI.FormatIdx &&
- Type == S.GetFormatStringType(PVFormat)) {
- // Lastly, check that argument passing kinds transition in a
- // way that makes sense:
- // from a caller with FAPK_VAList, allow FAPK_VAList
- // from a caller with FAPK_Fixed, allow FAPK_Fixed
- // from a caller with FAPK_Fixed, allow FAPK_Variadic
- // from a caller with FAPK_Variadic, allow FAPK_VAList
- switch (combineFAPK(CallerFSI.ArgPassingKind, APK)) {
- case combineFAPK(Sema::FAPK_VAList, Sema::FAPK_VAList):
- case combineFAPK(Sema::FAPK_Fixed, Sema::FAPK_Fixed):
- case combineFAPK(Sema::FAPK_Fixed, Sema::FAPK_Variadic):
- case combineFAPK(Sema::FAPK_Variadic, Sema::FAPK_VAList):
- return SLCT_UncheckedLiteral;
- }
+ if (!Sema::getFormatStringInfo(D, PVFormat->getFormatIdx(),
+ PVFormat->getFirstArg(), &CallerFSI))
+ continue;
+ // We also check if the formats are compatible.
+ // We can't pass a 'scanf' string to a 'printf' function.
+ if (PV->getFunctionScopeIndex() == CallerFSI.FormatIdx &&
+ Type == S.GetFormatStringType(PVFormat)) {
+ // Lastly, check that argument passing kinds transition in a
+ // way that makes sense:
+ // from a caller with FAPK_VAList, allow FAPK_VAList
+ // from a caller with FAPK_Fixed, allow FAPK_Fixed
+ // from a caller with FAPK_Fixed, allow FAPK_Variadic
+ // from a caller with FAPK_Variadic, allow FAPK_VAList
+ switch (combineFAPK(CallerFSI.ArgPassingKind, APK)) {
+ case combineFAPK(Sema::FAPK_VAList, Sema::FAPK_VAList):
+ case combineFAPK(Sema::FAPK_Fixed, Sema::FAPK_Fixed):
+ case combineFAPK(Sema::FAPK_Fixed, Sema::FAPK_Variadic):
+ case combineFAPK(Sema::FAPK_Variadic, Sema::FAPK_VAList):
+ return SLCT_UncheckedLiteral;
}
}
}
@@ -6007,9 +6041,9 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
for (const auto *FA : ND->specific_attrs<FormatArgAttr>()) {
const Expr *Arg = CE->getArg(FA->getFormatIdx().getASTIndex());
StringLiteralCheckType Result = checkFormatStringExpr(
- S, Arg, Args, APK, format_idx, firstDataArg, Type, CallType,
- InFunctionCall, CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
+ Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
+ Offset, IgnoreStringsWithoutSpecifiers);
if (IsFirst) {
CommonResult = Result;
IsFirst = false;
@@ -6024,17 +6058,17 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) {
const Expr *Arg = CE->getArg(0);
return checkFormatStringExpr(
- S, Arg, Args, APK, format_idx, firstDataArg, Type, CallType,
- InFunctionCall, CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ S, ReferenceFormatString, Arg, Args, APK, format_idx,
+ firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
+ UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
}
}
}
if (const Expr *SLE = maybeConstEvalStringLiteral(S.Context, E))
- return checkFormatStringExpr(S, SLE, Args, APK, format_idx, firstDataArg,
- Type, CallType, /*InFunctionCall*/ false,
- CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ return checkFormatStringExpr(
+ S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
+ Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
+ UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
return SLCT_NotALiteral;
}
case Stmt::ObjCMessageExprClass: {
@@ -6058,9 +6092,9 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
const Expr *Arg = ME->getArg(FA->getFormatIdx().getASTIndex());
return checkFormatStringExpr(
- S, Arg, Args, APK, format_idx, firstDataArg, Type, CallType,
- InFunctionCall, CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
+ Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
+ Offset, IgnoreStringsWithoutSpecifiers);
}
}
@@ -6082,8 +6116,9 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
return SLCT_NotALiteral;
}
FormatStringLiteral FStr(StrE, Offset.sextOrTrunc(64).getSExtValue());
- CheckFormatString(S, &FStr, E, Args, APK, format_idx, firstDataArg, Type,
- InFunctionCall, CallType, CheckedVarArgs, UncoveredArg,
+ CheckFormatString(S, &FStr, ReferenceFormatString, E, Args, APK,
+ format_idx, firstDataArg, Type, InFunctionCall,
+ CallType, CheckedVarArgs, UncoveredArg,
IgnoreStringsWithoutSpecifiers);
return SLCT_CheckedLiteral;
}
@@ -6160,8 +6195,8 @@ static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
return nullptr;
}
-Sema::FormatStringType Sema::GetFormatStringType(const FormatAttr *Format) {
- return llvm::StringSwitch<FormatStringType>(Format->getType()->getName())
+Sema::FormatStringType Sema::GetFormatStringType(StringRef Flavor) {
+ return llvm::StringSwitch<FormatStringType>(Flavor)
.Case("scanf", FST_Scanf)
.Cases("printf", "printf0", "syslog", FST_Printf)
.Cases("NSString", "CFString", FST_NSString)
@@ -6174,22 +6209,49 @@ Sema::FormatStringType Sema::GetFormatStringType(const FormatAttr *Format) {
.Default(FST_Unknown);
}
+Sema::FormatStringType Sema::GetFormatStringType(const FormatAttr *Format) {
+ return GetFormatStringType(Format->getType()->getName());
+}
+
+Sema::FormatStringType
+Sema::GetFormatStringType(const FormatMatchesAttr *Format) {
+ return GetFormatStringType(Format->getType()->getName());
+}
+
bool Sema::CheckFormatArguments(const FormatAttr *Format,
ArrayRef<const Expr *> Args, bool IsCXXMember,
VariadicCallType CallType, SourceLocation Loc,
SourceRange Range,
llvm::SmallBitVector &CheckedVarArgs) {
FormatStringInfo FSI;
- if (getFormatStringInfo(Format, IsCXXMember, CallType != VariadicDoesNotApply,
- &FSI))
- return CheckFormatArguments(Args, FSI.ArgPassingKind, FSI.FormatIdx,
+ if (getFormatStringInfo(Format->getFormatIdx(), Format->getFirstArg(),
+ IsCXXMember, CallType != VariadicDoesNotApply, &FSI))
+ return CheckFormatArguments(
+ Args, FSI.ArgPassingKind, nullptr, FSI.FormatIdx, FSI.FirstDataArg,
+ GetFormatStringType(Format), CallType, Loc, Range, CheckedVarArgs);
+ return false;
+}
+
+bool Sema::CheckFormatString(const FormatMatchesAttr *Format,
+ ArrayRef<const Expr *> Args, bool IsCXXMember,
+ VariadicCallType CallType, SourceLocation Loc,
+ SourceRange Range,
+ llvm::SmallBitVector &CheckedVarArgs) {
+ FormatStringInfo FSI;
+ if (getFormatStringInfo(Format->getFormatIdx(), 0, IsCXXMember, false,
+ &FSI)) {
+ FSI.ArgPassingKind = Sema::FAPK_Elsewhere;
+ return CheckFormatArguments(Args, FSI.ArgPassingKind,
+ Format->getFormatString(), FSI.FormatIdx,
FSI.FirstDataArg, GetFormatStringType(Format),
CallType, Loc, Range, CheckedVarArgs);
+ }
return false;
}
bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
Sema::FormatArgumentPassingKind APK,
+ const StringLiteral *ReferenceFormatString,
unsigned format_idx, unsigned firstDataArg,
FormatStringType Type,
VariadicCallType CallType, SourceLocation Loc,
@@ -6217,8 +6279,8 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
// the same format string checking logic for both ObjC and C strings.
UncoveredArgHandler UncoveredArg;
StringLiteralCheckType CT = checkFormatStringExpr(
- *this, OrigFormatExpr, Args, APK, format_idx, firstDataArg, Type,
- CallType,
+ *this, ReferenceFormatString, OrigFormatExpr, Args, APK, format_idx,
+ firstDataArg, Type, CallType,
/*IsFunctionCall*/ true, CheckedVarArgs, UncoveredArg,
/*no string offset*/ llvm::APSInt(64, false) = 0);
@@ -6314,6 +6376,11 @@ class CheckFormatHandler : public analyze_format_string::FormatStringHandler {
CoveredArgs.reset();
}
+ bool HasFormatArguments() const {
+ return ArgPassingKind == Sema::FAPK_Fixed ||
+ ArgPassingKind == Sema::FAPK_Variadic;
+ }
+
void DoneProcessing();
void HandleIncompleteSpecifier(const char *startSpecifier,
@@ -6543,7 +6610,7 @@ const Expr *CheckFormatHandler::getDataArg(unsigned i) const {
void CheckFormatHandler::DoneProcessing() {
// Does the number of data arguments exceed the number of
// format conversions in the format string?
- if (ArgPassingKind != Sema::FAPK_VAList) {
+ if (HasFormatArguments()) {
// Find any arguments that weren't covered.
CoveredArgs.flip();
signed notCoveredArg = CoveredArgs.find_first();
@@ -6652,7 +6719,7 @@ CheckFormatHandler::CheckNumArgs(
const analyze_format_string::ConversionSpecifier &CS,
const char *startSpecifier, unsigned specifierLen, unsigned argIndex) {
- if (argIndex >= NumDataArgs) {
+ if (HasFormatArguments() && argIndex >= NumDataArgs) {
PartialDiagnostic PDiag = FS.usesPositionalArg()
? (S.PDiag(diag::warn_printf_positional_arg_exceeds_data_args)
<< (argIndex+1) << NumDataArgs)
@@ -6793,18 +6860,102 @@ class CheckPrintfHandler : public CheckFormatHandler {
void HandleInvalidObjCModifierFlag(const char *startFlag,
unsigned flagLen) override;
- void HandleObjCFlagsWithNonObjCConversion(const char *flagsStart,
- const char *flagsEnd,
- const char *conversionPosition)
- override;
+ void
+ HandleObjCFlagsWithNonObjCConversion(const char *flagsStart,
+ const char *flagsEnd,
+ const char *conversionPosition) override;
+};
+
+/// Keeps around the information needed to verify that two specifiers are
+/// compatible.
+class EquatableFormatArgument {
+public:
+ enum SpecifierSensitivity { SS_None, SS_Private, SS_Public, SS_Sensitive };
+ enum FormatArgumentRole {
+ FAR_Data,
+ FAR_FieldWidth,
+ FAR_Precision,
+ FAR_Auxiliary, // FreeBSD kernel %b and %D
+ };
+
+private:
+ analyze_format_string::ArgType ArgType;
+ analyze_format_string::LengthModifier::Kind LengthMod;
+ StringRef SpecifierLetter;
+ CharSourceRange Range;
+ SourceLocation ElementLoc;
+ FormatArgumentRole Role : 2;
+ SpecifierSensitivity Sensitivity : 2; // only set for FAR_Data
+ unsigned Position : 14;
+ unsigned ModifierFor : 14; // not set for FAR_Data
+
+public:
+ EquatableFormatArgument(CharSourceRange Range, SourceLocation ElementLoc,
+ analyze_format_string::LengthModifier::Kind LengthMod,
+ StringRef SpecifierLetter,
+ analyze_format_string::ArgType ArgType,
+ FormatArgumentRole Role,
+ SpecifierSensitivity Sensitivity, unsigned Position,
+ unsigned ModifierFor)
+ : ArgType(ArgType), LengthMod(LengthMod),
+ SpecifierLetter(SpecifierLetter), Range(Range), ElementLoc(ElementLoc),
+ Role(Role), Sensitivity(Sensitivity), Position(Position),
+ ModifierFor(ModifierFor) {}
+
+ unsigned getPosition() const { return Position; }
+ SourceLocation getSourceLocation() const { return ElementLoc; }
+ CharSourceRange getSourceRange() const { return Range; }
+ analyze_format_string::LengthModifier getLengthModifier() const {
+ return analyze_format_string::LengthModifier(nullptr, LengthMod);
+ }
+ void setModifierFor(unsigned V) { ModifierFor = V; }
+
+ std::string buildFormatSpecifier() const {
+ std::string result;
+ llvm::raw_string_ostream(result)
+ << getLengthModifier().toString() << SpecifierLetter;
+ return result;
+ }
+
+ void VerifyCompatible(Sema &S, const EquatableFormatArgument &Other) const;
+};
+
+/// Turns format strings into lists of EquatableSpecifier objects.
+class DecomposePrintfHandler : public CheckPrintfHandler {
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Specs;
+ bool HadError;
+
+ DecomposePrintfHandler(
+ Sema &s, const FormatStringLiteral *fexpr, const Expr *origFormatExpr,
+ const Sema::FormatStringType type, unsigned firstDataArg,
+ unsigned numDataArgs, bool isObjC, const char *beg,
+ Sema::FormatArgumentPassingKind APK, ArrayRef<const Expr *> Args,
+ unsigned formatIdx, bool inFunctionCall, Sema::VariadicCallType CallType,
+ llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Specs)
+ : CheckPrintfHandler(s, fexpr, origFormatExpr, type, firstDataArg,
+ numDataArgs, isObjC, beg, APK, Args, formatIdx,
+ inFunctionCall, CallType, CheckedVarArgs,
+ UncoveredArg),
+ Specs(Specs), HadError(false) {}
+
+public:
+ static bool
+ GetSpecifiers(Sema &S, const FormatStringLiteral *FSL,
+ Sema::FormatStringType type, bool IsObjC, bool InFunctionCall,
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Args);
+
+ virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen,
+ const TargetInfo &Target) override;
};
} // namespace
bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier(
- const analyze_printf::PrintfSpecifier &FS,
- const char *startSpecifier,
- unsigned specifierLen) {
+ const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier,
+ unsigned specifierLen) {
const analyze_printf::PrintfConversionSpecifier &CS =
FS.getConversionSpecifier();
@@ -6822,7 +6973,7 @@ bool CheckPrintfHandler::HandleAmount(
const analyze_format_string::OptionalAmount &Amt, unsigned k,
const char *startSpecifier, unsigned specifierLen) {
if (Amt.hasDataArgument()) {
- if (ArgPassingKind != Sema::FAPK_VAList) {
+ if (HasFormatArguments()) {
unsigned argIndex = Amt.getArgIndex();
if (argIndex >= NumDataArgs) {
EmitFormatDiagnostic(S.PDiag(diag::warn_printf_asterisk_missing_arg)
@@ -6947,8 +7098,185 @@ void CheckPrintfHandler::HandleObjCFlagsWithNonObjCConversion(
auto diag = diag::warn_printf_ObjCflags_without_ObjCConversion;
EmitFormatDiagnostic(S.PDiag(diag) << StringRef(conversionPosition, 1),
getLocationOfByte(conversionPosition),
- /*IsStringLocation*/true,
- Range, FixItHint::CreateRemoval(Range));
+ /*IsStringLocation*/ true, Range,
+ FixItHint::CreateRemoval(Range));
+}
+
+void EquatableFormatArgument::VerifyCompatible(
+ Sema &S, const EquatableFormatArgument &Other) const {
+ using MK = analyze_format_string::ArgType::MatchKind;
+ if (Role != Other.Role) {
+ // diagnose and stop
+ S.Diag(ElementLoc, diag::warn_format_cmp_role_mismatch)
+ << Role << Other.Role << Range;
+ S.Diag(Other.ElementLoc, diag::note_format_cmp_with) << 0 << Other.Range;
+ return;
+ }
+
+ if (Role != FAR_Data) {
+ if (ModifierFor != Other.ModifierFor) {
+ // diagnose and stop
+ S.Diag(ElementLoc, diag::warn_format_cmp_modifierfor_mismatch)
+ << (ModifierFor + 1) << (Other.ModifierFor + 1) << Range;
+ S.Diag(Other.ElementLoc, diag::note_format_cmp_with) << 0 << Other.Range;
+ }
+ return;
+ }
+
+ if (Sensitivity != Other.Sensitivity) {
+ // diagnose and continue
+ S.Diag(ElementLoc, diag::warn_format_cmp_sensitivity_mismatch)
+ << Sensitivity << Other.Sensitivity << Range;
+ S.Diag(Other.ElementLoc, diag::note_format_cmp_with) << 0 << Other.Range;
+ }
+
+ switch (ArgType.matchesArgType(S.Context, Other.ArgType)) {
+ case MK::Match:
+ case MK::MatchPromotion:
+ break;
+
+ case MK::NoMatch:
+ case MK::NoMatchTypeConfusion:
+ case MK::NoMatchPromotionTypeConfusion:
+ S.Diag(ElementLoc, diag::warn_format_cmp_specifier_mismatch)
+ << buildFormatSpecifier() << Other.buildFormatSpecifier() << Range;
+ S.Diag(Other.ElementLoc, diag::note_format_cmp_with) << 0 << Other.Range;
+ break;
+
+ case MK::NoMatchPedantic:
+ S.Diag(ElementLoc, diag::warn_format_cmp_specifier_mismatch_pedantic)
+ << buildFormatSpecifier() << Other.buildFormatSpecifier() << Range;
+ S.Diag(Other.ElementLoc, diag::note_format_cmp_with) << 0 << Other.Range;
+ break;
+
+ case MK::NoMatchSignedness:
+ if (!S.getDiagnostics().isIgnored(
+ diag::warn_format_conversion_argument_type_mismatch_signedness,
+ ElementLoc)) {
+ S.Diag(ElementLoc, diag::warn_format_cmp_specifier_sign_mismatch)
+ << buildFormatSpecifier() << Other.buildFormatSpecifier() << Range;
+ S.Diag(Other.ElementLoc, diag::note_format_cmp_with) << 0 << Other.Range;
+ }
+ break;
+ }
+}
+
+bool DecomposePrintfHandler::GetSpecifiers(
+ Sema &S, const FormatStringLiteral *FSL, Sema::FormatStringType Type,
+ bool IsObjC, bool InFunctionCall,
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Args) {
+ StringRef Data = FSL->getString();
+ const char *Str = Data.data();
+ llvm::SmallBitVector BV;
+ UncoveredArgHandler UA;
+ DecomposePrintfHandler H(S, FSL, FSL->getFormatString(), Type, 0, 0, IsObjC,
+ Str, Sema::FAPK_Elsewhere, {FSL->getFormatString()},
+ 0, InFunctionCall, Sema::VariadicDoesNotApply, BV,
+ UA, Args);
+
+ if (!analyze_format_string::ParsePrintfString(
+ H, Str, Str + Data.size(), S.getLangOpts(), S.Context.getTargetInfo(),
+ Type == Sema::FST_FreeBSDKPrintf))
+ H.DoneProcessing();
+ if (H.HadError)
+ return false;
+
+ std::sort(
+ Args.begin(), Args.end(),
+ [](const EquatableFormatArgument &A, const EquatableFormatArgument &B) {
+ return A.getPosition() < B.getPosition();
+ });
+ return true;
+}
+
+bool DecomposePrintfHandler::HandlePrintfSpecifier(
+ const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier,
+ unsigned specifierLen, const TargetInfo &Target) {
+ if (!CheckPrintfHandler::HandlePrintfSpecifier(FS, startSpecifier,
+ specifierLen, Target)) {
+ HadError = true;
+ return false;
+ }
+
+ // Do not add any specifiers to the list for %%. This is possibly incorrect
+ // if using a precision/width with a data argument, but that combination is
+ // meaningless and we wouldn't know which format to attach the
+ // precision/width to.
+ const auto &CS = FS.getConversionSpecifier();
+ if (CS.getKind() == analyze_format_string::ConversionSpecifier::PercentArg)
+ return true;
+
+ // have to patch these to have the right ModifierFor if they are used
+ const unsigned Unset = ~0;
+ unsigned FieldWidthIndex = Unset;
+ unsigned PrecisionIndex = Unset;
+
+ // field width?
+ const auto &FieldWidth = FS.getFieldWidth();
+ if (!FieldWidth.isInvalid() && FieldWidth.hasDataArgument()) {
+ FieldWidthIndex = Specs.size();
+ Specs.emplace_back(getSpecifierRange(startSpecifier, specifierLen),
+ getLocationOfByte(FieldWidth.getStart()),
+ analyze_format_string::LengthModifier::None, "*",
+ FieldWidth.getArgType(S.Context),
+ EquatableFormatArgument::FAR_FieldWidth,
+ EquatableFormatArgument::SS_None,
+ FieldWidth.usesPositionalArg()
+ ? FieldWidth.getPositionalArgIndex() - 1
+ : FieldWidthIndex,
+ 0);
+ }
+ // precision?
+ const auto &Precision = FS.getPrecision();
+ if (!Precision.isInvalid() && Precision.hasDataArgument()) {
+ PrecisionIndex = Specs.size();
+ Specs.emplace_back(
+ getSpecifierRange(startSpecifier, specifierLen),
+ getLocationOfByte(Precision.getStart()),
+ analyze_format_string::LengthModifier::None, ".*",
+ Precision.getArgType(S.Context), EquatableFormatArgument::FAR_Precision,
+ EquatableFormatArgument::SS_None,
+ Precision.usesPositionalArg() ? Precision.getPositionalArgIndex() - 1
+ : PrecisionIndex,
+ 0);
+ }
+
+ // this specifier
+ unsigned SpecIndex =
+ FS.usesPositionalArg() ? FS.getPositionalArgIndex() - 1 : Specs.size();
+ if (FieldWidthIndex != Unset)
+ Specs[FieldWidthIndex].setModifierFor(SpecIndex);
+ if (PrecisionIndex != Unset)
+ Specs[PrecisionIndex].setModifierFor(SpecIndex);
+
+ EquatableFormatArgument::SpecifierSensitivity Sensitivity;
+ if (FS.isPrivate())
+ Sensitivity = EquatableFormatArgument::SS_Private;
+ else if (FS.isPublic())
+ Sensitivity = EquatableFormatArgument::SS_Public;
+ else if (FS.isSensitive())
+ Sensitivity = EquatableFormatArgument::SS_Sensitive;
+ else
+ Sensitivity = EquatableFormatArgument::SS_None;
+
+ Specs.emplace_back(
+ getSpecifierRange(startSpecifier, specifierLen),
+ getLocationOfByte(CS.getStart()), FS.getLengthModifier().getKind(),
+ CS.getCharacters(), FS.getArgType(S.Context, isObjCContext()),
+ EquatableFormatArgument::FAR_Data, Sensitivity, SpecIndex, 0);
+
+ // auxiliary argument?
+ if (CS.getKind() == analyze_format_string::ConversionSpecifier::FreeBSDbArg ||
+ CS.getKind() == analyze_format_string::ConversionSpecifier::FreeBSDDArg) {
+ Specs.emplace_back(getSpecifierRange(startSpecifier, specifierLen),
+ getLocationOfByte(CS.getStart()),
+ analyze_format_string::LengthModifier::None,
+ CS.getCharacters(),
+ analyze_format_string::ArgType::CStrTy,
+ EquatableFormatArgument::FAR_Auxiliary, Sensitivity,
+ SpecIndex + 1, SpecIndex);
+ }
+ return true;
}
// Determines if the specified is a C++ class or struct containing
@@ -7077,34 +7405,36 @@ bool CheckPrintfHandler::HandlePrintfSpecifier(
if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex + 1))
return false;
- // Claim the second argument.
- CoveredArgs.set(argIndex + 1);
-
- // Type check the first argument (int for %b, pointer for %D)
- const Expr *Ex = getDataArg(argIndex);
- const analyze_printf::ArgType &AT =
- (CS.getKind() == ConversionSpecifier::FreeBSDbArg) ?
- ArgType(S.Context.IntTy) : ArgType::CPointerTy;
- if (AT.isValid() && !AT.matchesType(S.Context, Ex->getType()))
- EmitFormatDiagnostic(
- S.PDiag(diag::warn_format_conversion_argument_type_mismatch)
- << AT.getRepresentativeTypeName(S.Context) << Ex->getType()
- << false << Ex->getSourceRange(),
- Ex->getBeginLoc(), /*IsStringLocation*/ false,
- getSpecifierRange(startSpecifier, specifierLen));
-
- // Type check the second argument (char * for both %b and %D)
- Ex = getDataArg(argIndex + 1);
- const analyze_printf::ArgType &AT2 = ArgType::CStrTy;
- if (AT2.isValid() && !AT2.matchesType(S.Context, Ex->getType()))
- EmitFormatDiagnostic(
- S.PDiag(diag::warn_format_conversion_argument_type_mismatch)
- << AT2.getRepresentativeTypeName(S.Context) << Ex->getType()
- << false << Ex->getSourceRange(),
- Ex->getBeginLoc(), /*IsStringLocation*/ false,
- getSpecifierRange(startSpecifier, specifierLen));
-
- return true;
+ if (HasFormatArguments()) {
+ // Claim the second argument.
+ CoveredArgs.set(argIndex + 1);
+
+ // Type check the first argument (int for %b, pointer for %D)
+ const Expr *Ex = getDataArg(argIndex);
+ const analyze_printf::ArgType &AT =
+ (CS.getKind() == ConversionSpecifier::FreeBSDbArg)
+ ? ArgType(S.Context.IntTy)
+ : ArgType::CPointerTy;
+ if (AT.isValid() && !AT.matchesType(S.Context, Ex->getType()))
+ EmitFormatDiagnostic(
+ S.PDiag(diag::warn_format_conversion_argument_type_mismatch)
+ << AT.getRepresentativeTypeName(S.Context) << Ex->getType()
+ << false << Ex->getSourceRange(),
+ Ex->getBeginLoc(), /*IsStringLocation*/ false,
+ getSpecifierRange(startSpecifier, specifierLen));
+
+ // Type check the second argument (char * for both %b and %D)
+ Ex = getDataArg(argIndex + 1);
+ const analyze_printf::ArgType &AT2 = ArgType::CStrTy;
+ if (AT2.isValid() && !AT2.matchesType(S.Context, Ex->getType()))
+ EmitFormatDiagnostic(
+ S.PDiag(diag::warn_format_conversion_argument_type_mismatch)
+ << AT2.getRepresentativeTypeName(S.Context) << Ex->getType()
+ << false << Ex->getSourceRange(),
+ Ex->getBeginLoc(), /*IsStringLocation*/ false,
+ getSpecifierRange(startSpecifier, specifierLen));
+ }
+ return true;
}
// Check for using an Objective-C specific conversion specifier
@@ -7224,7 +7554,7 @@ bool CheckPrintfHandler::HandlePrintfSpecifier(
HandleNonStandardConversionSpecifier(CS, startSpecifier, specifierLen);
// The remaining checks depend on the data arguments.
- if (ArgPassingKind == Sema::FAPK_VAList)
+ if (!HasFormatArguments())
return true;
if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex))
@@ -7793,6 +8123,36 @@ class CheckScanfHandler : public CheckFormatHandler {
void HandleIncompleteScanList(const char *start, const char *end) override;
};
+class DecomposeScanfHandler : public CheckScanfHandler {
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Specs;
+ bool HadError;
+
+ DecomposeScanfHandler(Sema &s, const FormatStringLiteral *fexpr,
+ const Expr *origFormatExpr, Sema::FormatStringType type,
+ unsigned firstDataArg, unsigned numDataArgs,
+ const char *beg, Sema::FormatArgumentPassingKind APK,
+ ArrayRef<const Expr *> Args, unsigned formatIdx,
+ bool inFunctionCall, Sema::VariadicCallType CallType,
+ llvm::SmallBitVector &CheckedVarArgs,
+ UncoveredArgHandler &UncoveredArg,
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Specs)
+ : CheckScanfHandler(s, fexpr, origFormatExpr, type, firstDataArg,
+ numDataArgs, beg, APK, Args, formatIdx,
+ inFunctionCall, CallType, CheckedVarArgs,
+ UncoveredArg),
+ Specs(Specs), HadError(false) {}
+
+public:
+ static bool
+ GetSpecifiers(Sema &S, const FormatStringLiteral *FSL,
+ Sema::FormatStringType type, bool InFunctionCall,
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Args);
+
+ bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen) override;
+};
+
} // namespace
void CheckScanfHandler::HandleIncompleteScanList(const char *start,
@@ -7881,7 +8241,7 @@ bool CheckScanfHandler::HandleScanfSpecifier(
HandleNonStandardConversionSpecifier(CS, startSpecifier, specifierLen);
// The remaining checks depend on the data arguments.
- if (ArgPassingKind == Sema::FAPK_VAList)
+ if (!HasFormatArguments())
return true;
if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex))
@@ -7939,8 +8299,84 @@ bool CheckScanfHandler::HandleScanfSpecifier(
return true;
}
+bool DecomposeScanfHandler::GetSpecifiers(
+ Sema &S, const FormatStringLiteral *FSL, Sema::FormatStringType Type,
+ bool InFunctionCall, llvm::SmallVectorImpl<EquatableFormatArgument> &Args) {
+ StringRef Data = FSL->getString();
+ const char *Str = Data.data();
+ llvm::SmallBitVector BV;
+ UncoveredArgHandler UA;
+ DecomposeScanfHandler H(S, FSL, FSL->getFormatString(), Type, 0, 0, Str,
+ Sema::FAPK_Elsewhere, {FSL->getFormatString()}, 0,
+ InFunctionCall, Sema::VariadicDoesNotApply, BV, UA,
+ Args);
+
+ if (!analyze_format_string::ParseScanfString(H, Str, Str + Data.size(),
+ S.getLangOpts(),
+ S.Context.getTargetInfo()))
+ H.DoneProcessing();
+ if (H.HadError)
+ return false;
+
+ std::sort(
+ Args.begin(), Args.end(),
+ [](const EquatableFormatArgument &A, const EquatableFormatArgument &B) {
+ return A.getPosition() < B.getPosition();
+ });
+ return true;
+}
+
+bool DecomposeScanfHandler::HandleScanfSpecifier(
+ const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier,
+ unsigned specifierLen) {
+ if (!CheckScanfHandler::HandleScanfSpecifier(FS, startSpecifier,
+ specifierLen)) {
+ HadError = true;
+ return false;
+ }
+
+ const auto &CS = FS.getConversionSpecifier();
+ Specs.emplace_back(
+ getSpecifierRange(startSpecifier, specifierLen),
+ getLocationOfByte(CS.getStart()), FS.getLengthModifier().getKind(),
+ CS.getCharacters(), FS.getArgType(S.Context),
+ EquatableFormatArgument::FAR_Data, EquatableFormatArgument::SS_None,
+ FS.usesPositionalArg() ? FS.getPositionalArgIndex() - 1 : Specs.size(),
+ 0);
+ return true;
+}
+
+static void CompareFormatSpecifiers(Sema &S, const StringLiteral *Ref,
+ ArrayRef<EquatableFormatArgument> RefArgs,
+ const StringLiteral *Fmt,
+ ArrayRef<EquatableFormatArgument> FmtArgs) {
+ unsigned CommonArgs;
+ if (RefArgs.size() > FmtArgs.size()) {
+ CommonArgs = FmtArgs.size();
+ // "data argument not used by format string"
+ const auto &Arg = RefArgs[FmtArgs.size()];
+ S.Diag(Arg.getSourceLocation(), diag::warn_printf_data_arg_not_used)
+ << Arg.getSourceRange();
+ S.Diag(Fmt->getBeginLoc(), diag::note_format_cmp_with) << 1;
+ } else if (FmtArgs.size() > RefArgs.size()) {
+ CommonArgs = RefArgs.size();
+ // "more % conversions than data arguments"
+ const auto &Arg = FmtArgs[RefArgs.size()];
+ S.Diag(Fmt->getBeginLoc(), diag::warn_format_cmp_insufficient_specifiers)
+ << Arg.getSourceRange();
+ S.Diag(Ref->getBeginLoc(), diag::note_format_cmp_with) << 1;
+ } else {
+ CommonArgs = FmtArgs.size();
+ }
+
+ for (size_t I = 0; I < CommonArgs; ++I) {
+ FmtArgs[I].VerifyCompatible(S, RefArgs[I]);
+ }
+}
+
static void CheckFormatString(
- Sema &S, const FormatStringLiteral *FExpr, const Expr *OrigFormatExpr,
+ Sema &S, const FormatStringLiteral *FExpr,
+ const StringLiteral *ReferenceFormatString, const Expr *OrigFormatExpr,
ArrayRef<const Expr *> Args, Sema::FormatArgumentPassingKind APK,
unsigned format_idx, unsigned firstDataArg, Sema::FormatStringType Type,
bool inFunctionCall, Sema::VariadicCallType CallType,
@@ -7994,24 +8430,48 @@ static void CheckFormatString(
if (Type == Sema::FST_Printf || Type == Sema::FST_NSString ||
Type == Sema::FST_FreeBSDKPrintf || Type == Sema::FST_OSLog ||
Type == Sema::FST_OSTrace || Type == Sema::FST_Syslog) {
- CheckPrintfHandler H(
- S, FExpr, OrigFormatExpr, Type, firstDataArg, numDataArgs,
- (Type == Sema::FST_NSString || Type == Sema::FST_OSTrace), Str, APK,
- Args, format_idx, inFunctionCall, CallType, CheckedVarArgs,
- UncoveredArg);
-
- if (!analyze_format_string::ParsePrintfString(
- H, Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo(),
- Type == Sema::FST_FreeBSDKPrintf))
- H.DoneProcessing();
+ bool IsObjC = Type == Sema::FST_NSString || Type == Sema::FST_OSTrace;
+ if (ReferenceFormatString == nullptr) {
+ CheckPrintfHandler H(S, FExpr, OrigFormatExpr, Type, firstDataArg,
+ numDataArgs, IsObjC, Str, APK, Args, format_idx,
+ inFunctionCall, CallType, CheckedVarArgs,
+ UncoveredArg);
+ if (!analyze_format_string::ParsePrintfString(
+ H, Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo(),
+ Type == Sema::FST_FreeBSDKPrintf))
+ H.DoneProcessing();
+ } else {
+ llvm::SmallVector<EquatableFormatArgument, 9> RefArgs, FmtArgs;
+ FormatStringLiteral RefLit = ReferenceFormatString;
+ if (DecomposePrintfHandler::GetSpecifiers(S, &RefLit, Type, IsObjC,
+ inFunctionCall, RefArgs) &&
+ DecomposePrintfHandler::GetSpecifiers(S, FExpr, Type, IsObjC,
+ inFunctionCall, FmtArgs)) {
+ CompareFormatSpecifiers(S, ReferenceFormatString, RefArgs,
+ FExpr->getFormatString(), FmtArgs);
+ }
+ }
} else if (Type == Sema::FST_Scanf) {
- CheckScanfHandler H(S, FExpr, OrigFormatExpr, Type, firstDataArg,
- numDataArgs, Str, APK, Args, format_idx, inFunctionCall,
- CallType, CheckedVarArgs, UncoveredArg);
-
- if (!analyze_format_string::ParseScanfString(
- H, Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo()))
- H.DoneProcessing();
+ if (ReferenceFormatString == nullptr) {
+ CheckScanfHandler H(S, FExpr, OrigFormatExpr, Type, firstDataArg,
+ numDataArgs, Str, APK, Args, format_idx,
+ inFunctionCall, CallType, CheckedVarArgs,
+ UncoveredArg);
+
+ if (!analyze_format_string::ParseScanfString(
+ H, Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo()))
+ H.DoneProcessing();
+ } else {
+ llvm::SmallVector<EquatableFormatArgument, 9> RefArgs, FmtArgs;
+ FormatStringLiteral RefLit = ReferenceFormatString;
+ if (DecomposeScanfHandler::GetSpecifiers(S, &RefLit, Type, inFunctionCall,
+ RefArgs) &&
+ DecomposeScanfHandler::GetSpecifiers(S, FExpr, Type, inFunctionCall,
+ FmtArgs)) {
+ CompareFormatSpecifiers(S, ReferenceFormatString, RefArgs,
+ FExpr->getFormatString(), FmtArgs);
+ }
+ }
} // TODO: handle other formats
}
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 0f5baa1e1eb3655..8fa021f55490725 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3610,61 +3610,92 @@ FormatAttr *Sema::mergeFormatAttr(Decl *D, const AttributeCommonInfo &CI,
return ::new (Context) FormatAttr(Context, CI, Format, FormatIdx, FirstArg);
}
+FormatMatchesAttr *Sema::mergeFormatMatchesAttr(Decl *D,
+ const AttributeCommonInfo &CI,
+ IdentifierInfo *Format,
+ int FormatIdx,
+ StringLiteral *FormatStr) {
+ // Check whether we already have an equivalent FormatMatches attribute.
+ for (auto *F : D->specific_attrs<FormatMatchesAttr>()) {
+ if (F->getType() == Format && F->getFormatIdx() == FormatIdx &&
+ true /* TODO: compare format strings */) {
+ // If we don't have a valid location for this attribute, adopt the
+ // location.
+ if (F->getLocation().isInvalid())
+ F->setRange(CI.getRange());
+ return nullptr;
+ }
+ }
+
+ return ::new (Context)
+ FormatMatchesAttr(Context, CI, Format, FormatIdx, FormatStr);
+}
+
+struct FormatAttrCommon {
+ FormatAttrKind Kind;
+ IdentifierInfo *Identifier;
+ unsigned NumArgs;
+ unsigned FormatStringIdx;
+};
+
/// Handle __attribute__((format(type,idx,firstarg))) attributes based on
/// http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
-static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+static bool handleFormatAttrCommon(Sema &S, Decl *D, const ParsedAttr &AL,
+ FormatAttrCommon *Info) {
+ // Checks the first two arguments of the attribute; this is shared between
+ // Format and FormatMatches attributes.
+
if (!AL.isArgIdent(0)) {
S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
<< AL << 1 << AANT_ArgumentIdentifier;
- return;
+ return false;
}
// In C++ the implicit 'this' function parameter also counts, and they are
// counted from one.
bool HasImplicitThisParam = isInstanceMethod(D);
- unsigned NumArgs = getFunctionOrMethodNumParams(D) + HasImplicitThisParam;
+ Info->NumArgs = getFunctionOrMethodNumParams(D) + HasImplicitThisParam;
- IdentifierInfo *II = AL.getArgAsIdent(0)->Ident;
- StringRef Format = II->getName();
+ Info->Identifier = AL.getArgAsIdent(0)->Ident;
+ StringRef Format = Info->Identifier->getName();
if (normalizeName(Format)) {
// If we've modified the string name, we need a new identifier for it.
- II = &S.Context.Idents.get(Format);
+ Info->Identifier = &S.Context.Idents.get(Format);
}
// Check for supported formats.
- FormatAttrKind Kind = getFormatAttrKind(Format);
+ Info->Kind = getFormatAttrKind(Format);
- if (Kind == IgnoredFormat)
- return;
+ if (Info->Kind == IgnoredFormat)
+ return false;
- if (Kind == InvalidFormat) {
+ if (Info->Kind == InvalidFormat) {
S.Diag(AL.getLoc(), diag::warn_attribute_type_not_supported)
- << AL << II->getName();
- return;
+ << AL << Info->Identifier->getName();
+ return false;
}
// checks for the 2nd argument
Expr *IdxExpr = AL.getArgAsExpr(1);
- uint32_t Idx;
- if (!S.checkUInt32Argument(AL, IdxExpr, Idx, 2))
- return;
+ if (!S.checkUInt32Argument(AL, IdxExpr, Info->FormatStringIdx, 2))
+ return false;
- if (Idx < 1 || Idx > NumArgs) {
+ if (Info->FormatStringIdx < 1 || Info->FormatStringIdx > Info->NumArgs) {
S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_bounds)
<< AL << 2 << IdxExpr->getSourceRange();
- return;
+ return false;
}
// FIXME: Do we need to bounds check?
- unsigned ArgIdx = Idx - 1;
+ unsigned ArgIdx = Info->FormatStringIdx - 1;
if (HasImplicitThisParam) {
if (ArgIdx == 0) {
S.Diag(AL.getLoc(),
diag::err_format_attribute_implicit_this_format_string)
- << IdxExpr->getSourceRange();
- return;
+ << IdxExpr->getSourceRange();
+ return false;
}
ArgIdx--;
}
@@ -3676,10 +3707,19 @@ static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
(!Ty->isPointerType() ||
!Ty->castAs<PointerType>()->getPointeeType()->isCharType())) {
S.Diag(AL.getLoc(), diag::err_format_attribute_not)
- << IdxExpr->getSourceRange() << getFunctionOrMethodParamRange(D, ArgIdx);
- return;
+ << IdxExpr->getSourceRange()
+ << getFunctionOrMethodParamRange(D, ArgIdx);
+ return false;
}
+ return true;
+}
+
+static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ FormatAttrCommon Info;
+ if (!handleFormatAttrCommon(S, D, AL, &Info))
+ return;
+
// check the 3rd argument
Expr *FirstArgExpr = AL.getArgAsExpr(2);
uint32_t FirstArg;
@@ -3688,7 +3728,7 @@ static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
// FirstArg == 0 is is always valid.
if (FirstArg != 0) {
- if (Kind == StrftimeFormat) {
+ if (Info.Kind == StrftimeFormat) {
// If the kind is strftime, FirstArg must be 0 because strftime does not
// use any variadic arguments.
S.Diag(AL.getLoc(), diag::err_format_strftime_third_parameter)
@@ -3699,17 +3739,17 @@ static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
// Else, if the function is variadic, then FirstArg must be 0 or the
// "position" of the ... parameter. It's unusual to use 0 with variadic
// functions, so the fixit proposes the latter.
- if (FirstArg != NumArgs + 1) {
+ if (FirstArg != Info.NumArgs + 1) {
S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_bounds)
<< AL << 3 << FirstArgExpr->getSourceRange()
<< FixItHint::CreateReplacement(FirstArgExpr->getSourceRange(),
- std::to_string(NumArgs + 1));
+ std::to_string(Info.NumArgs + 1));
return;
}
} else {
// Inescapable GCC compatibility diagnostic.
S.Diag(D->getLocation(), diag::warn_gcc_requires_variadic_function) << AL;
- if (FirstArg <= Idx) {
+ if (FirstArg <= Info.FormatStringIdx) {
// Else, the function is not variadic, and FirstArg must be 0 or any
// parameter after the format parameter. We don't offer a fixit because
// there are too many possible good values.
@@ -3720,11 +3760,34 @@ static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
}
}
- FormatAttr *NewAttr = S.mergeFormatAttr(D, AL, II, Idx, FirstArg);
+ FormatAttr *NewAttr =
+ S.mergeFormatAttr(D, AL, Info.Identifier, Info.FormatStringIdx, FirstArg);
if (NewAttr)
D->addAttr(NewAttr);
}
+static void handleFormatMatchesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ FormatAttrCommon Info;
+ if (!handleFormatAttrCommon(S, D, AL, &Info))
+ return;
+
+ Expr *FormatStrExpr = AL.getArgAsExpr(2)->IgnoreParenImpCasts();
+ if (auto *SL = dyn_cast<StringLiteral>(FormatStrExpr)) {
+ // Aside from whether we started with a string literal, there is generally
+ // no useful checking that we can do here. For instance, "invalid" format
+ // specifiers (like %!) are simply ignored by printf and don't have a
+ // diagnostic.
+ if (auto *NewAttr = S.mergeFormatMatchesAttr(D, AL, Info.Identifier,
+ Info.FormatStringIdx, SL))
+ D->addAttr(NewAttr);
+ return;
+ }
+
+ S.Diag(AL.getLoc(), diag::err_format_nonliteral)
+ << FormatStrExpr->getSourceRange();
+ return;
+}
+
/// Handle __attribute__((callback(CalleeIdx, PayloadIdx0, ...))) attributes.
static void handleCallbackAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
// The index that identifies the callback callee is mandatory.
@@ -6748,6 +6811,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_Format:
handleFormatAttr(S, D, AL);
break;
+ case ParsedAttr::AT_FormatMatches:
+ handleFormatMatchesAttr(S, D, AL);
+ break;
case ParsedAttr::AT_FormatArg:
handleFormatArgAttr(S, D, AL);
break;
diff --git a/clang/lib/Sema/SemaObjC.cpp b/clang/lib/Sema/SemaObjC.cpp
index 57eda18c2d8e151..073d9791d037b27 100644
--- a/clang/lib/Sema/SemaObjC.cpp
+++ b/clang/lib/Sema/SemaObjC.cpp
@@ -2259,7 +2259,8 @@ void SemaObjC::handleExternallyRetainedAttr(Decl *D, const ParsedAttr &AL) {
bool SemaObjC::GetFormatNSStringIdx(const FormatAttr *Format, unsigned &Idx) {
Sema::FormatStringInfo FSI;
if ((SemaRef.GetFormatStringType(Format) == Sema::FST_NSString) &&
- SemaRef.getFormatStringInfo(Format, false, true, &FSI)) {
+ SemaRef.getFormatStringInfo(Format->getFormatIdx(), Format->getFirstArg(),
+ false, true, &FSI)) {
Idx = FSI.FormatIdx;
return true;
}
diff --git a/clang/test/Sema/format-string-matches.c b/clang/test/Sema/format-string-matches.c
new file mode 100644
index 000000000000000..fcbdfe38749baf7
--- /dev/null
+++ b/clang/test/Sema/format-string-matches.c
@@ -0,0 +1,122 @@
+// RUN: %clang_cc1 -verify -fblocks -fsyntax-only -Wformat-nonliteral -Wformat-signedness -isystem %S/Inputs %s
+// RUN: %clang_cc1 -verify -fblocks -fsyntax-only -Wformat-nonliteral -Wformat-signedness -isystem %S/Inputs -fno-signed-char %s
+
+#include <stdarg.h>
+
+__attribute__((format(printf, 1, 2)))
+int printf(const char *fmt, ...);
+
+__attribute__((format(printf, 1, 0)))
+int vprintf(const char *fmt, va_list);
+
+__attribute__((format(scanf, 1, 2)))
+int scanf(const char *fmt, ...);
+
+// MARK: -
+// Calling printf with a format from format_matches(printf) diagnoses with
+// that format string
+__attribute__((format_matches(printf, 1, "%s %1.5s")))
+void format_str_str0(const char *fmt) {
+ printf(fmt, "hello", "world");
+}
+
+__attribute__((format_matches(printf, 1, "%s" "%1.5s")))
+void format_str_str1(const char *fmt) {
+ printf(fmt, "hello", "world");
+}
+
+__attribute__((format_matches(printf, 1, ("%s" "%1.5s") + 5))) // expected-error{{format string is not a string literal}}
+void format_str_str2(const char *fmt);
+
+__attribute__((format_matches(printf, 1, "%s %g"))) // expected-note{{format string is defined here}}
+void format_str_double_warn(const char *fmt) {
+ printf(fmt, "hello", "world"); // expected-warning{{format specifies type 'double' but the argument has type 'char *'}}
+}
+
+__attribute__((format_matches(printf, 1, "%s %g")))
+void vformat(const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ vprintf(fmt, ap); // XXX: ideally this would be a diagnostic
+ va_end(ap);
+}
+
+// MARK: -
+// Calling scanf
+__attribute__((format_matches(scanf, 1, "%hhi %g"))) // expected-note 3{{comparing with this specifier}}
+void scan(const char *fmt) {
+ char i;
+ float g;
+ scanf(fmt, &i, &g);
+}
+
+__attribute__((format_matches(scanf, 1, "%hhi %g"))) // expected-note{{format string is defined here}}
+void scan2(const char *fmt) {
+ char i;
+ double g;
+ scanf(fmt, &i, &g); // expected-warning{{format specifies type 'float *' but the argument has type 'double *'}}
+}
+
+void call_scan(void) {
+ scan("%hhd %e");
+ scan("%hd %Le"); // \
+ expected-warning{{format specifier 'hd' is incompatible with 'hhi'}} \
+ expected-warning{{format specifier 'Le' is incompatible with 'g'}}
+ scan("%s %p"); // expected-warning{{format specifier 'p' is incompatible with 'g'}}
+}
+
+// MARK: -
+// Calling a function with format_matches diagnoses for incompatible formats.
+
+void cvt_percent(const char *c) __attribute__((format_matches(printf, 1, "%%"))); // expected-note{{comparing with this format string}}
+void cvt_at(const char *c) __attribute__((format_matches(NSString, 1, "%@"))); // \
+ expected-warning{{data argument not used by format string}} \
+ expected-note{{comparing with this specifier}} \
+ expected-note{{comparing with this format string}}
+void cvt_c(const char *c) __attribute__((format_matches(printf, 1, "%c"))); // expected-note{{comparing with this specifier}}
+void cvt_u(const char *c) __attribute__((format_matches(printf, 1, "%u"))); // expected-note{{comparing with this specifier}}
+void cvt_i(const char *c) __attribute__((format_matches(printf, 1, "%i"))); // expected-note 2{{comparing with this specifier}}
+void cvt_p(const char *c) __attribute__((format_matches(printf, 1, "%p")));
+void cvt_s(const char *c) __attribute__((format_matches(printf, 1, "%s"))); // expected-note{{comparing with this specifier}}
+void cvt_n(const char *c) __attribute__((format_matches(printf, 1, "%n"))); // expected-note{{comparing with this specifier}}
+
+void test_compatibility(void) {
+ cvt_c("%i");
+ cvt_i("%c");
+ cvt_c("%u"); // expected-warning{{signedness of format specifier 'u' is incompatible with 'c'}}
+ cvt_u("%c"); // expected-warning{{signedness of format specifier 'c' is incompatible with 'u'}}
+
+ cvt_i("%li"); // expected-warning{{format specifier 'li' is incompatible with 'i'}}
+ cvt_i("%p"); // expected-warning{{format specifier 'p' is incompatible with 'i'}}
+ cvt_n("%s"); // expected-warning{{format specifier 's' is incompatible with 'n'}}
+ cvt_s("%hhn"); // expected-warning{{format specifier 'hhn' is incompatible with 's'}}
+
+ cvt_p("%@"); // expected-warning{{invalid conversion specifier '@'}}
+ cvt_at("%p"); // expected-warning{{format specifier 'p' is incompatible with '@'}}
+
+ cvt_percent("hello");
+ cvt_percent("%c"); // expected-warning{{fewer specifiers in format string than expected}}
+}
+
+void test_too_few_args(void) {
+ cvt_at("a"); // expected-note{{comparing with this format string}}
+ cvt_at("%@ %@"); // expected-warning{{fewer specifiers in format string than expected}}
+}
+
+void cvt_several(const char *c) __attribute__((format_matches(printf, 1, "%f %i %s"))); // expected-note{{comparing with this specifier}}
+
+void test_moving_args_around(void) {
+ cvt_several("%1g %-d %1.5s");
+
+ cvt_several("%3$s %1$g %2$i");
+
+ cvt_several("%f %*s"); // expected-warning{{format argument is an indirect field width, but it should be a value}}
+}
+
+void cvt_freebsd_D(const char *c) __attribute__((format_matches(freebsd_kprintf, 1, "%D"))); // expected-note{{comparing with this specifier}}
+
+void test_freebsd_specifiers(void) {
+ cvt_freebsd_D("%D");
+ cvt_freebsd_D("%b");
+ cvt_freebsd_D("%s %i"); // expected-warning{{format argument is a value, but it should be an auxiliary value}}
+}
More information about the cfe-commits
mailing list