[clang] [clang] Implement __attribute__((format_matches)) (PR #116708)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 7 09:57:45 PST 2025
=?utf-8?q?Félix?= Cloutier <fcloutier at apple.com>,
=?utf-8?q?Félix?= Cloutier <fcloutier at apple.com>,
=?utf-8?q?Félix?= Cloutier <fcloutier at apple.com>,
=?utf-8?q?Félix?= Cloutier <fcloutier at apple.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/116708 at github.com>
https://github.com/apple-fcloutier updated https://github.com/llvm/llvm-project/pull/116708
>From a9780a6fdbd99735ca864701d34fbd0f063c5b8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Cloutier?= <fcloutier at apple.com>
Date: Tue, 12 Nov 2024 10:39:18 -0800
Subject: [PATCH 1/5] [clang] Implement __attribute__((format_matches))
This implements ``__attribute__((format_matches))``, as described in the RFC:
https://discourse.llvm.org/t/rfc-format-attribute-attribute-format-like/83076
The ``format`` attribute only allows the compiler to check that a format
string matches its arguments. If the format string is passed independently
of its arguments, there is no way to have the compiler check it.
``format_matches(flavor, fmtidx, example)`` allows the compiler to check
format strings against the ``example`` format string instead of against
format arguments. See the changes to AttrDocs.td in this diff for more
information.
Implementation-wise, this change subclasses CheckPrintfHandler and
CheckScanfHandler to allow them to collect specifiers into arrays,
and implements comparing that two specifiers are equivalent.
Radar-Id: 84936554
---
clang/docs/ReleaseNotes.rst | 44 +
clang/include/clang/AST/FormatString.h | 3 +-
clang/include/clang/Basic/Attr.td | 10 +
clang/include/clang/Basic/AttrDocs.td | 97 +++
.../clang/Basic/DiagnosticSemaKinds.td | 21 +
clang/include/clang/Sema/Sema.h | 37 +-
clang/lib/AST/AttrImpl.cpp | 5 +
clang/lib/AST/FormatString.cpp | 91 +++
clang/lib/Sema/SemaChecking.cpp | 761 ++++++++++++++----
clang/lib/Sema/SemaDeclAttr.cpp | 120 ++-
clang/lib/Sema/SemaObjC.cpp | 3 +-
clang/test/Sema/format-string-matches.c | 122 +++
12 files changed, 1128 insertions(+), 186 deletions(-)
create mode 100644 clang/test/Sema/format-string-matches.c
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index aca07e2ba9cf2d..a0061b6aad3c05 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -430,6 +430,50 @@ Non-comprehensive list of changes in this release
- Matrix types (a Clang extension) can now be used in pseudo-destructor expressions,
which allows them to be stored in STL containers.
+- There is a new ``format_matches`` attribute to complement the existing
+ ``format`` attribute. ``format_matches`` allows the compiler to verify that
+ a format string argument is equivalent to a reference format string: it is
+ useful when a function accepts a format string without its accompanying
+ arguments to format. For instance:
+
+ .. code-block:: c
+
+ static int status_code;
+ static const char *status_string;
+
+ void print_status(const char *fmt) {
+ fprintf(stderr, fmt, status_code, status_string);
+ // ^ warning: format string is not a string literal [-Wformat-nonliteral]
+ }
+
+ void stuff(void) {
+ print_status("%s (%#08x)\n");
+ // order of %s and %x is swapped but there is no diagnostic
+ }
+
+ Before the introducion of ``format_matches``, this code cannot be verified
+ at compile-time. ``format_matches`` plugs that hole:
+
+ .. code-block:: c
+
+ __attribute__((format_matches(printf, 1, "%x %s")))
+ void print_status(const char *fmt) {
+ fprintf(stderr, fmt, status_code, status_string);
+ // ^ `fmt` verified as if it was "%x %s" here; no longer triggers
+ // -Wformat-nonliteral, would warn if arguments did not match "%x %s"
+ }
+
+ void stuff(void) {
+ print_status("%s (%#08x)\n");
+ // warning: format specifier 's' is incompatible with 'x'
+ // warning: format specifier 'x' is incompatible with 's'
+ }
+
+ Like with ``format``, the first argument is the format string flavor and the
+ second argument is the index of the format string parameter.
+ ``format_matches`` accepts an example valid format string as its third
+ argument. For more information, see the Clang attributes documentation.
+
New Compiler Flags
------------------
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index a074dd23e2ad4c..3560766433fe22 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -292,7 +292,7 @@ class ArgType {
};
private:
- const Kind K;
+ Kind K;
QualType T;
const char *Name = nullptr;
bool Ptr = false;
@@ -338,6 +338,7 @@ class ArgType {
}
MatchKind matchesType(ASTContext &C, QualType argTy) const;
+ MatchKind matchesArgType(ASTContext &C, const ArgType &other) const;
QualType getRepresentativeType(ASTContext &C) const;
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 52ad72eb608c31..a5d8e4911c753a 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1813,6 +1813,16 @@ def Format : InheritableAttr {
let Documentation = [FormatDocs];
}
+def FormatMatches : InheritableAttr {
+ let Spellings = [GCC<"format_matches">];
+ let Args = [IdentifierArgument<"Type">, IntArgument<"FormatIdx">, ExprArgument<"ExpectedFormat">];
+ let AdditionalMembers = [{
+ StringLiteral *getFormatString() const;
+ }];
+ let Subjects = SubjectList<[ObjCMethod, Block, HasFunctionProto]>;
+ let Documentation = [FormatMatchesDocs];
+}
+
def FormatArg : InheritableAttr {
let Spellings = [GCC<"format_arg">];
let Args = [ParamIdxArgument<"FormatIdx">];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index fdad4c9a3ea191..a1f5dbde7ef3b8 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -3743,6 +3743,103 @@ behavior of the program is undefined.
}];
}
+def FormatMatchesDocs : Documentation {
+ let Category = DocCatFunction;
+ let Content = [{
+
+The ``format`` attribute is the basis for the enforcement of diagnostics in the
+``-Wformat`` family, but it only handles the case where the format string is
+passed along with the arguments it is going to format. It cannot handle the case
+where the format string and the format arguments are passed separately from each
+other. For instance:
+
+.. code-block:: c
+
+ static const char *first_name;
+ static double todays_temperature;
+ static int wind_speed;
+
+ void say_hi(const char *fmt) {
+ printf(fmt, first_name, todays_temperature); // warning: format string is not a string literal
+ printf(fmt, first_name, wind_speed); // warning: format string is not a string literal
+ }
+
+ int main() {
+ say_hi("hello %s, it is %g degrees outside");
+ say_hi("hello %s, it is %d degrees outside!"); // no diagnostic
+ }
+
+In this example, ``fmt`` is expected to format a ``const char *`` and a
+``double``, but these values are not passed to ``say_hi``. Without the
+``format`` attribute (which cannot apply in this case), the -Wformat-nonliteral
+diagnostic triggers in the body of ``say_hi``, and incorrect ``say_hi`` call
+sites do not trigger a diagnostic.
+
+To complement the ``format`` attribute, Clang also defines the
+``format_matches`` attribute. Its syntax is similar to the ``format``
+attribute's, but instead of taking the index of the first formatted value
+argument, it takes a C string literal with the expected specifiers:
+
+.. code-block:: c
+
+ static const char *first_name;
+ static double todays_temperature;
+ static int wind_speed;
+
+ __attribute__((__format_matches__(printf, 1, "%s %g")))
+ void say_hi(const char *fmt) {
+ printf(fmt, first_name, todays_temperature); // no dignostic
+ printf(fmt, first_name, wind_speed); // warning: format specifies type 'int' but the argument has type 'double'
+ }
+
+ int main() {
+ say_hi("hello %s, it is %g degrees outside");
+ say_hi("it is %g degrees outside, have a good day %s!");
+ // warning: format specifies 'double' where 'const char *' is required
+ // warning: format specifies 'const char *' where 'double' is required
+ }
+
+The third argument to ``format_matches`` is expected to evaluate to a **C string
+literal** even when the format string would normally be a different type for the
+given flavor, like a ``CFStringRef`` or a ``NSString *``.
+
+In the implementation of a function with the ``format_matches`` attribute,
+format verification works as if the format string was identical to the one
+specified in the attribute.
+
+At the call sites of functions with the ``format_matches`` attribute, format
+verification instead compares the two format strings to evaluate their
+equivalence. Each format flavor defines equivalence between format specifiers.
+Generally speaking, two specifiers are equivalent if they format the same type.
+For instance, in the ``printf`` flavor, ``%2i`` and ``%-0.5d`` are compatible.
+When ``-Wformat-signedness`` is disabled, ``%d`` and ``%u`` are compatible. For
+a negative example, ``%ld`` is incompatible with ``%d``.
+
+Do note the following un-obvious cases:
+
+* Passing ``NULL`` as the format string is accepted without diagnostics.
+* When the format string is not NULL, it cannot _miss_ specifiers, even in
+ trailing positions. For instance, ``%d`` is not accepted when the required
+ format is ``%d %d %d``.
+* Specifiers for integers as small or smaller than ``int`` (such as ``%hhd``)
+ are all mutually compatible because standard argument promotion ensures that
+ integers are at least the size of ``int`` when passed as variadic arguments.
+ With ``-Wformat-signedness``, mixing specifier for types with a different
+ signedness still results in a diagnostic.
+* Format strings expecting a variable modifier (such as ``%*s``) are
+ incompatible with format strings that would itemize the variable modifiers
+ (such as ``%i %s``), even if the two specify ABI-compatible argument lists.
+* All pointer specifiers, modifiers aside, are mutually incompatible. For
+ instance, ``%s`` is not compatible with ``%p``, and ``%p`` is not compatible
+ with ``%n``, and ``%hhn`` is incompatible with ``%s``, even if the pointers
+ are ABI-compatible or identical on the selected platform. However, ``%0.5s``
+ is compatible with ``%s``, since the difference only exists in modifier flags.
+ This is not overridable with ``-Wformat-pedantic`` or its inverse, which
+ control similar behavior in ``-Wformat``.
+
+ }];
+}
+
def FlagEnumDocs : Documentation {
let Category = DocCatDecl;
let Content = [{
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 330ae045616aba..afd99a93c85520 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7709,6 +7709,7 @@ def warn_format_nonliteral_noargs : Warning<
def warn_format_nonliteral : Warning<
"format string is not a string literal">,
InGroup<FormatNonLiteral>, DefaultIgnore;
+def err_format_nonliteral : Error<"format string is not a string literal">;
def err_unexpected_interface : Error<
"unexpected interface name %0: expected expression">;
@@ -10017,6 +10018,8 @@ def note_previous_declaration_as : Note<
def warn_printf_insufficient_data_args : Warning<
"more '%%' conversions than data arguments">, InGroup<FormatInsufficientArgs>;
+def warn_format_cmp_insufficient_specifiers : Warning<
+ "fewer specifiers in format string than expected">, InGroup<FormatInsufficientArgs>;
def warn_printf_data_arg_not_used : Warning<
"data argument not used by format string">, InGroup<FormatExtraArgs>;
def warn_format_invalid_conversion : Warning<
@@ -10134,6 +10137,24 @@ def note_format_fix_specifier : Note<"did you mean to use '%0'?">;
def note_printf_c_str: Note<"did you mean to call the %0 method?">;
def note_format_security_fixit: Note<
"treat the string as an argument to avoid this">;
+def warn_format_cmp_role_mismatch : Warning<
+ "format argument is %select{a value|an indirect field width|an indirect "
+ "precision|an auxiliary value}0, but it should be %select{a value|an indirect "
+ "field width|an indirect precision|an auxiliary value}1">, InGroup<Format>;
+def warn_format_cmp_modifierfor_mismatch : Warning<
+ "format argument modifies specifier at position %0, but it should modify "
+ "specifier at position %1">, InGroup<Format>;
+def warn_format_cmp_sensitivity_mismatch : Warning<
+ "argument sensitivity is %select{unspecified|private|public|sensitive}0, but "
+ "it should be %select{unspecified|private|public|sensitive}1">, InGroup<Format>;
+def warn_format_cmp_specifier_mismatch : Warning<
+ "format specifier '%0' is incompatible with '%1'">, InGroup<Format>;
+def warn_format_cmp_specifier_sign_mismatch : Warning<
+ "signedness of format specifier '%0' is incompatible with '%1'">, InGroup<Format>;
+def warn_format_cmp_specifier_mismatch_pedantic : Extension<
+ warn_format_cmp_specifier_sign_mismatch.Summary>, InGroup<FormatPedantic>;
+def note_format_cmp_with : Note<
+ "comparing with this %select{specifier|format string}0">;
def warn_null_arg : Warning<
"null passed to a callee that requires a non-null argument">,
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 5ee7ea48cc983c..8c696c95b39a54 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2153,6 +2153,7 @@ class Sema final : public SemaBase {
FAPK_Fixed, // values to format are fixed (no C-style variadic arguments)
FAPK_Variadic, // values to format are passed as variadic arguments
FAPK_VAList, // values to format are passed in a va_list
+ FAPK_Elsewhere, // values to format are not passed to this function
};
// Used to grab the relevant information from a FormatAttr and a
@@ -2163,12 +2164,15 @@ class Sema final : public SemaBase {
FormatArgumentPassingKind ArgPassingKind;
};
- /// Given a FunctionDecl's FormatAttr, attempts to populate the
- /// FomatStringInfo parameter with the FormatAttr's correct format_idx and
- /// firstDataArg. Returns true when the format fits the function and the
- /// FormatStringInfo has been populated.
- static bool getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember,
- bool IsVariadic, FormatStringInfo *FSI);
+ /// Given a function and its FormatAttr or FormatMatchesAttr info, attempts to
+ /// populate the FomatStringInfo parameter with the attribute's correct
+ /// format_idx and firstDataArg. Returns true when the format fits the
+ /// function and the FormatStringInfo has been populated.
+ static bool getFormatStringInfo(const Decl *Function, unsigned FormatIdx,
+ unsigned FirstArg, FormatStringInfo *FSI);
+ static bool getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg,
+ bool IsCXXMember, bool IsVariadic,
+ FormatStringInfo *FSI);
// Used by C++ template instantiation.
ExprResult BuiltinShuffleVector(CallExpr *TheCall);
@@ -2191,7 +2195,9 @@ class Sema final : public SemaBase {
FST_Syslog,
FST_Unknown
};
+ static FormatStringType GetFormatStringType(StringRef FormatFlavor);
static FormatStringType GetFormatStringType(const FormatAttr *Format);
+ static FormatStringType GetFormatStringType(const FormatMatchesAttr *Format);
bool FormatStringHasSArg(const StringLiteral *FExpr);
@@ -2552,11 +2558,17 @@ class Sema final : public SemaBase {
VariadicCallType CallType, SourceLocation Loc,
SourceRange Range,
llvm::SmallBitVector &CheckedVarArgs);
+ bool CheckFormatString(const FormatMatchesAttr *Format,
+ ArrayRef<const Expr *> Args, bool IsCXXMember,
+ VariadicCallType CallType, SourceLocation Loc,
+ SourceRange Range,
+ llvm::SmallBitVector &CheckedVarArgs);
bool CheckFormatArguments(ArrayRef<const Expr *> Args,
- FormatArgumentPassingKind FAPK, unsigned format_idx,
- unsigned firstDataArg, FormatStringType Type,
- VariadicCallType CallType, SourceLocation Loc,
- SourceRange range,
+ FormatArgumentPassingKind FAPK,
+ const StringLiteral *ReferenceFormatString,
+ unsigned format_idx, unsigned firstDataArg,
+ FormatStringType Type, VariadicCallType CallType,
+ SourceLocation Loc, SourceRange range,
llvm::SmallBitVector &CheckedVarArgs);
void CheckInfNaNFunction(const CallExpr *Call, const FunctionDecl *FDecl);
@@ -4544,6 +4556,11 @@ class Sema final : public SemaBase {
FormatAttr *mergeFormatAttr(Decl *D, const AttributeCommonInfo &CI,
IdentifierInfo *Format, int FormatIdx,
int FirstArg);
+ FormatMatchesAttr *mergeFormatMatchesAttr(Decl *D,
+ const AttributeCommonInfo &CI,
+ IdentifierInfo *Format,
+ int FormatIdx,
+ StringLiteral *FormatStr);
/// AddAlignedAttr - Adds an aligned attribute to a particular declaration.
void AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E,
diff --git a/clang/lib/AST/AttrImpl.cpp b/clang/lib/AST/AttrImpl.cpp
index f198a9acf8481f..5a169f815637f6 100644
--- a/clang/lib/AST/AttrImpl.cpp
+++ b/clang/lib/AST/AttrImpl.cpp
@@ -270,4 +270,9 @@ unsigned AlignedAttr::getAlignment(ASTContext &Ctx) const {
return Ctx.getTargetDefaultAlignForAttributeAligned();
}
+StringLiteral *FormatMatchesAttr::getFormatString() const {
+ // This cannot go in headers because StringLiteral and Expr may be incomplete.
+ return cast<StringLiteral>(getExpectedFormat());
+}
+
#include "clang/AST/AttrImpl.inc"
diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index e892c1592df986..5d3b56fc4e7137 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -595,6 +595,97 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const {
llvm_unreachable("Invalid ArgType Kind!");
}
+static analyze_format_string::ArgType::MatchKind
+integerTypeMatch(ASTContext &C, QualType A, QualType B, bool CheckSign) {
+ using MK = analyze_format_string::ArgType::MatchKind;
+
+ uint64_t IntSize = C.getTypeSize(C.IntTy);
+ uint64_t ASize = C.getTypeSize(A);
+ uint64_t BSize = C.getTypeSize(B);
+ if (std::max(ASize, IntSize) != std::max(BSize, IntSize))
+ return MK::NoMatch;
+ if (CheckSign && A->isSignedIntegerType() != B->isSignedIntegerType())
+ return MK::NoMatchSignedness;
+ if (ASize != BSize)
+ return MK::MatchPromotion;
+ return MK::Match;
+}
+
+analyze_format_string::ArgType::MatchKind
+ArgType::matchesArgType(ASTContext &C, const ArgType &Other) const {
+ using AK = analyze_format_string::ArgType::Kind;
+
+ // Per matchesType.
+ if (K == AK::InvalidTy || Other.K == AK::InvalidTy)
+ return NoMatch;
+ if (K == AK::UnknownTy || Other.K == AK::UnknownTy)
+ return Match;
+
+ // Handle whether either (or both, or neither) sides has Ptr set,
+ // in addition to whether either (or both, or neither) sides is a SpecificTy
+ // that is a pointer.
+ ArgType Left = *this;
+ bool LeftWasPointer = false;
+ ArgType Right = Other;
+ bool RightWasPointer = false;
+ if (Left.Ptr) {
+ Left.Ptr = false;
+ LeftWasPointer = true;
+ } else if (Left.K == AK::SpecificTy && Left.T->isPointerType()) {
+ Left.T = Left.T->getPointeeType();
+ LeftWasPointer = true;
+ }
+ if (Right.Ptr) {
+ Right.Ptr = false;
+ RightWasPointer = true;
+ } else if (Right.K == AK::SpecificTy && Right.T->isPointerType()) {
+ Right.T = Right.T->getPointeeType();
+ RightWasPointer = true;
+ }
+
+ if (LeftWasPointer != RightWasPointer)
+ return NoMatch;
+
+ // Ensure that if at least one side is a SpecificTy, then Left is a
+ // SpecificTy.
+ if (Right.K == AK::SpecificTy)
+ std::swap(Left, Right);
+
+ if (Left.K == AK::SpecificTy) {
+ if (Right.K == AK::SpecificTy) {
+ auto Canon1 = C.getCanonicalType(Left.T);
+ auto Canon2 = C.getCanonicalType(Right.T);
+ if (Canon1 == Canon2)
+ return Match;
+
+ auto *BT1 = QualType(Canon1)->getAs<BuiltinType>();
+ auto *BT2 = QualType(Canon2)->getAs<BuiltinType>();
+ if (BT1 == nullptr || BT2 == nullptr)
+ return NoMatch;
+ if (BT1 == BT2)
+ return Match;
+
+ if (!LeftWasPointer && BT1->isInteger() && BT2->isInteger())
+ return integerTypeMatch(C, Canon1, Canon2, true);
+ return NoMatch;
+ } else if (Right.K == AK::AnyCharTy) {
+ if (!LeftWasPointer && Left.T->isIntegerType())
+ return integerTypeMatch(C, Left.T, C.CharTy, false);
+ return NoMatch;
+ } else if (Right.K == AK::WIntTy) {
+ if (!LeftWasPointer && Left.T->isIntegerType())
+ return integerTypeMatch(C, Left.T, C.WIntTy, true);
+ return NoMatch;
+ }
+ // It's hypothetically possible to create an AK::SpecificTy ArgType
+ // that matches another kind of ArgType, but in practice Clang doesn't
+ // do that, so ignore that case.
+ return NoMatch;
+ }
+
+ return Left.K == Right.K ? Match : NoMatch;
+}
+
ArgType ArgType::makeVectorType(ASTContext &C, unsigned NumElts) const {
// Check for valid vector element types.
if (T.isNull())
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index ce846ae88c38b4..a5e668395c9797 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3017,17 +3017,33 @@ bool Sema::ValueIsRunOfOnes(CallExpr *TheCall, unsigned ArgNum) {
<< ArgNum << Arg->getSourceRange();
}
-bool Sema::getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember,
- bool IsVariadic, FormatStringInfo *FSI) {
- if (Format->getFirstArg() == 0)
+bool Sema::getFormatStringInfo(const Decl *D, unsigned FormatIdx,
+ unsigned FirstArg, FormatStringInfo *FSI) {
+ bool IsCXXMember = false;
+ if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
+ IsCXXMember = MD->isInstance();
+ bool IsVariadic = false;
+ if (const FunctionType *FnTy = D->getFunctionType())
+ IsVariadic = cast<FunctionProtoType>(FnTy)->isVariadic();
+ else if (const auto *BD = dyn_cast<BlockDecl>(D))
+ IsVariadic = BD->isVariadic();
+ else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D))
+ IsVariadic = OMD->isVariadic();
+
+ return getFormatStringInfo(FormatIdx, FirstArg, IsCXXMember, IsVariadic, FSI);
+}
+
+bool Sema::getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg,
+ bool IsCXXMember, bool IsVariadic,
+ FormatStringInfo *FSI) {
+ if (FirstArg == 0)
FSI->ArgPassingKind = FAPK_VAList;
else if (IsVariadic)
FSI->ArgPassingKind = FAPK_Variadic;
else
FSI->ArgPassingKind = FAPK_Fixed;
- FSI->FormatIdx = Format->getFormatIdx() - 1;
- FSI->FirstDataArg =
- FSI->ArgPassingKind == FAPK_VAList ? 0 : Format->getFirstArg() - 1;
+ FSI->FormatIdx = FormatIdx - 1;
+ FSI->FirstDataArg = FSI->ArgPassingKind == FAPK_VAList ? 0 : FirstArg - 1;
// The way the format attribute works in GCC, the implicit this argument
// of member functions is counted. However, it doesn't appear in our own
@@ -3281,10 +3297,15 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto,
// Printf and scanf checking.
llvm::SmallBitVector CheckedVarArgs;
if (FDecl) {
- for (const auto *I : FDecl->specific_attrs<FormatAttr>()) {
+ for (const auto *I : FDecl->specific_attrs<FormatMatchesAttr>()) {
// Only create vector if there are format attributes.
CheckedVarArgs.resize(Args.size());
+ CheckFormatString(I, Args, IsMemberFunction, CallType, Loc, Range,
+ CheckedVarArgs);
+ }
+ for (const auto *I : FDecl->specific_attrs<FormatAttr>()) {
+ CheckedVarArgs.resize(Args.size());
CheckFormatArguments(I, Args, IsMemberFunction, CallType, Loc, Range,
CheckedVarArgs);
}
@@ -5421,7 +5442,7 @@ bool Sema::BuiltinOSLogFormat(CallExpr *TheCall) {
llvm::SmallBitVector CheckedVarArgs(NumArgs, false);
ArrayRef<const Expr *> Args(TheCall->getArgs(), TheCall->getNumArgs());
bool Success = CheckFormatArguments(
- Args, FAPK_Variadic, FormatIdx, FirstDataArg, FST_OSLog,
+ Args, FAPK_Variadic, nullptr, FormatIdx, FirstDataArg, FST_OSLog,
VariadicFunction, TheCall->getBeginLoc(), SourceRange(),
CheckedVarArgs);
if (!Success)
@@ -5817,13 +5838,13 @@ class FormatStringLiteral {
const StringLiteral *FExpr;
int64_t Offset;
- public:
+public:
FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
: FExpr(fexpr), Offset(Offset) {}
- StringRef getString() const {
- return FExpr->getString().drop_front(Offset);
- }
+ const StringLiteral *getFormatString() const { return FExpr; }
+
+ StringRef getString() const { return FExpr->getString().drop_front(Offset); }
unsigned getByteLength() const {
return FExpr->getByteLength() - getCharByteWidth() * Offset;
@@ -5861,7 +5882,8 @@ class FormatStringLiteral {
} // namespace
static void CheckFormatString(
- Sema &S, const FormatStringLiteral *FExpr, const Expr *OrigFormatExpr,
+ Sema &S, const FormatStringLiteral *FExpr,
+ const StringLiteral *ReferenceFormatString, const Expr *OrigFormatExpr,
ArrayRef<const Expr *> Args, Sema::FormatArgumentPassingKind APK,
unsigned format_idx, unsigned firstDataArg, Sema::FormatStringType Type,
bool inFunctionCall, Sema::VariadicCallType CallType,
@@ -5875,14 +5897,13 @@ static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
// If this function returns false on the arguments to a function expecting a
// format string, we will usually need to emit a warning.
// True string literals are then checked by CheckFormatString.
-static StringLiteralCheckType
-checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
- Sema::FormatArgumentPassingKind APK, unsigned format_idx,
- unsigned firstDataArg, Sema::FormatStringType Type,
- Sema::VariadicCallType CallType, bool InFunctionCall,
- llvm::SmallBitVector &CheckedVarArgs,
- UncoveredArgHandler &UncoveredArg, llvm::APSInt Offset,
- bool IgnoreStringsWithoutSpecifiers = false) {
+static StringLiteralCheckType checkFormatStringExpr(
+ Sema &S, const StringLiteral *ReferenceFormatString, const Expr *E,
+ ArrayRef<const Expr *> Args, Sema::FormatArgumentPassingKind APK,
+ unsigned format_idx, unsigned firstDataArg, Sema::FormatStringType Type,
+ Sema::VariadicCallType CallType, bool InFunctionCall,
+ llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
+ llvm::APSInt Offset, bool IgnoreStringsWithoutSpecifiers = false) {
if (S.isConstantEvaluatedContext())
return SLCT_NotALiteral;
tryAgain:
@@ -5904,10 +5925,10 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
case Stmt::InitListExprClass:
// Handle expressions like {"foobar"}.
if (const clang::Expr *SLE = maybeConstEvalStringLiteral(S.Context, E)) {
- return checkFormatStringExpr(S, SLE, Args, APK, format_idx, firstDataArg,
- Type, CallType, /*InFunctionCall*/ false,
- CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ return checkFormatStringExpr(
+ S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
+ Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
+ UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
}
return SLCT_NotALiteral;
case Stmt::BinaryConditionalOperatorClass:
@@ -5939,19 +5960,19 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
if (!CheckLeft)
Left = SLCT_UncheckedLiteral;
else {
- Left = checkFormatStringExpr(S, C->getTrueExpr(), Args, APK, format_idx,
- firstDataArg, Type, CallType, InFunctionCall,
- CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ Left = checkFormatStringExpr(
+ S, ReferenceFormatString, C->getTrueExpr(), Args, APK, format_idx,
+ firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
+ UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
if (Left == SLCT_NotALiteral || !CheckRight) {
return Left;
}
}
StringLiteralCheckType Right = checkFormatStringExpr(
- S, C->getFalseExpr(), Args, APK, format_idx, firstDataArg, Type,
- CallType, InFunctionCall, CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ S, ReferenceFormatString, C->getFalseExpr(), Args, APK, format_idx,
+ firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
+ UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
return (CheckLeft && Left < Right) ? Left : Right;
}
@@ -6001,7 +6022,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
Init = InitList->getInit(0)->IgnoreParenImpCasts();
}
return checkFormatStringExpr(
- S, Init, Args, APK, format_idx, firstDataArg, Type, CallType,
+ S, ReferenceFormatString, Init, Args, APK, format_idx,
+ firstDataArg, Type, CallType,
/*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, Offset);
}
}
@@ -6022,7 +6044,17 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
// ...
// }
//
- // Another interaction that we need to support is calling a variadic
+ // Another interaction that we need to support is using a format string
+ // specified by the format_matches attribute:
+ //
+ // __attribute__((format_matches(printf, 1, "%s %d")))
+ // void logmessage(char const *fmt, const char *a, int b) {
+ // printf(fmt, a, b); /* do not emit a warning about "fmt" */
+ // printf(fmt, 123.4); /* emit warnings that "%s %d" is incompatible */
+ // ...
+ // }
+ //
+ // Yet another interaction that we need to support is calling a variadic
// format function from a format function that has fixed arguments. For
// instance:
//
@@ -6045,39 +6077,41 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
//
if (const auto *PV = dyn_cast<ParmVarDecl>(VD)) {
if (const auto *D = dyn_cast<Decl>(PV->getDeclContext())) {
- for (const auto *PVFormat : D->specific_attrs<FormatAttr>()) {
- bool IsCXXMember = false;
- if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
- IsCXXMember = MD->isInstance();
-
- bool IsVariadic = false;
- if (const FunctionType *FnTy = D->getFunctionType())
- IsVariadic = cast<FunctionProtoType>(FnTy)->isVariadic();
- else if (const auto *BD = dyn_cast<BlockDecl>(D))
- IsVariadic = BD->isVariadic();
- else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D))
- IsVariadic = OMD->isVariadic();
+ for (const auto *PVFormatMatches :
+ D->specific_attrs<FormatMatchesAttr>()) {
+ Sema::FormatStringInfo CallerFSI;
+ if (!Sema::getFormatStringInfo(D, PVFormatMatches->getFormatIdx(),
+ 0, &CallerFSI))
+ continue;
+ if (PV->getFunctionScopeIndex() == CallerFSI.FormatIdx)
+ return checkFormatStringExpr(
+ S, nullptr, PVFormatMatches->getFormatString(), Args, APK,
+ format_idx, firstDataArg, Type, CallType,
+ /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg,
+ Offset, IgnoreStringsWithoutSpecifiers);
+ }
+ for (const auto *PVFormat : D->specific_attrs<FormatAttr>()) {
Sema::FormatStringInfo CallerFSI;
- if (Sema::getFormatStringInfo(PVFormat, IsCXXMember, IsVariadic,
- &CallerFSI)) {
- // We also check if the formats are compatible.
- // We can't pass a 'scanf' string to a 'printf' function.
- if (PV->getFunctionScopeIndex() == CallerFSI.FormatIdx &&
- Type == S.GetFormatStringType(PVFormat)) {
- // Lastly, check that argument passing kinds transition in a
- // way that makes sense:
- // from a caller with FAPK_VAList, allow FAPK_VAList
- // from a caller with FAPK_Fixed, allow FAPK_Fixed
- // from a caller with FAPK_Fixed, allow FAPK_Variadic
- // from a caller with FAPK_Variadic, allow FAPK_VAList
- switch (combineFAPK(CallerFSI.ArgPassingKind, APK)) {
- case combineFAPK(Sema::FAPK_VAList, Sema::FAPK_VAList):
- case combineFAPK(Sema::FAPK_Fixed, Sema::FAPK_Fixed):
- case combineFAPK(Sema::FAPK_Fixed, Sema::FAPK_Variadic):
- case combineFAPK(Sema::FAPK_Variadic, Sema::FAPK_VAList):
- return SLCT_UncheckedLiteral;
- }
+ if (!Sema::getFormatStringInfo(D, PVFormat->getFormatIdx(),
+ PVFormat->getFirstArg(), &CallerFSI))
+ continue;
+ // We also check if the formats are compatible.
+ // We can't pass a 'scanf' string to a 'printf' function.
+ if (PV->getFunctionScopeIndex() == CallerFSI.FormatIdx &&
+ Type == S.GetFormatStringType(PVFormat)) {
+ // Lastly, check that argument passing kinds transition in a
+ // way that makes sense:
+ // from a caller with FAPK_VAList, allow FAPK_VAList
+ // from a caller with FAPK_Fixed, allow FAPK_Fixed
+ // from a caller with FAPK_Fixed, allow FAPK_Variadic
+ // from a caller with FAPK_Variadic, allow FAPK_VAList
+ switch (combineFAPK(CallerFSI.ArgPassingKind, APK)) {
+ case combineFAPK(Sema::FAPK_VAList, Sema::FAPK_VAList):
+ case combineFAPK(Sema::FAPK_Fixed, Sema::FAPK_Fixed):
+ case combineFAPK(Sema::FAPK_Fixed, Sema::FAPK_Variadic):
+ case combineFAPK(Sema::FAPK_Variadic, Sema::FAPK_VAList):
+ return SLCT_UncheckedLiteral;
}
}
}
@@ -6097,9 +6131,9 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
for (const auto *FA : ND->specific_attrs<FormatArgAttr>()) {
const Expr *Arg = CE->getArg(FA->getFormatIdx().getASTIndex());
StringLiteralCheckType Result = checkFormatStringExpr(
- S, Arg, Args, APK, format_idx, firstDataArg, Type, CallType,
- InFunctionCall, CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
+ Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
+ Offset, IgnoreStringsWithoutSpecifiers);
if (IsFirst) {
CommonResult = Result;
IsFirst = false;
@@ -6114,17 +6148,17 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) {
const Expr *Arg = CE->getArg(0);
return checkFormatStringExpr(
- S, Arg, Args, APK, format_idx, firstDataArg, Type, CallType,
- InFunctionCall, CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ S, ReferenceFormatString, Arg, Args, APK, format_idx,
+ firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
+ UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
}
}
}
if (const Expr *SLE = maybeConstEvalStringLiteral(S.Context, E))
- return checkFormatStringExpr(S, SLE, Args, APK, format_idx, firstDataArg,
- Type, CallType, /*InFunctionCall*/ false,
- CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ return checkFormatStringExpr(
+ S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
+ Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
+ UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
return SLCT_NotALiteral;
}
case Stmt::ObjCMessageExprClass: {
@@ -6148,9 +6182,9 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
const Expr *Arg = ME->getArg(FA->getFormatIdx().getASTIndex());
return checkFormatStringExpr(
- S, Arg, Args, APK, format_idx, firstDataArg, Type, CallType,
- InFunctionCall, CheckedVarArgs, UncoveredArg, Offset,
- IgnoreStringsWithoutSpecifiers);
+ S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
+ Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
+ Offset, IgnoreStringsWithoutSpecifiers);
}
}
@@ -6172,8 +6206,9 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
return SLCT_NotALiteral;
}
FormatStringLiteral FStr(StrE, Offset.sextOrTrunc(64).getSExtValue());
- CheckFormatString(S, &FStr, E, Args, APK, format_idx, firstDataArg, Type,
- InFunctionCall, CallType, CheckedVarArgs, UncoveredArg,
+ CheckFormatString(S, &FStr, ReferenceFormatString, E, Args, APK,
+ format_idx, firstDataArg, Type, InFunctionCall,
+ CallType, CheckedVarArgs, UncoveredArg,
IgnoreStringsWithoutSpecifiers);
return SLCT_CheckedLiteral;
}
@@ -6250,8 +6285,8 @@ static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
return nullptr;
}
-Sema::FormatStringType Sema::GetFormatStringType(const FormatAttr *Format) {
- return llvm::StringSwitch<FormatStringType>(Format->getType()->getName())
+Sema::FormatStringType Sema::GetFormatStringType(StringRef Flavor) {
+ return llvm::StringSwitch<FormatStringType>(Flavor)
.Case("scanf", FST_Scanf)
.Cases("printf", "printf0", "syslog", FST_Printf)
.Cases("NSString", "CFString", FST_NSString)
@@ -6264,22 +6299,49 @@ Sema::FormatStringType Sema::GetFormatStringType(const FormatAttr *Format) {
.Default(FST_Unknown);
}
+Sema::FormatStringType Sema::GetFormatStringType(const FormatAttr *Format) {
+ return GetFormatStringType(Format->getType()->getName());
+}
+
+Sema::FormatStringType
+Sema::GetFormatStringType(const FormatMatchesAttr *Format) {
+ return GetFormatStringType(Format->getType()->getName());
+}
+
bool Sema::CheckFormatArguments(const FormatAttr *Format,
ArrayRef<const Expr *> Args, bool IsCXXMember,
VariadicCallType CallType, SourceLocation Loc,
SourceRange Range,
llvm::SmallBitVector &CheckedVarArgs) {
FormatStringInfo FSI;
- if (getFormatStringInfo(Format, IsCXXMember, CallType != VariadicDoesNotApply,
- &FSI))
- return CheckFormatArguments(Args, FSI.ArgPassingKind, FSI.FormatIdx,
+ if (getFormatStringInfo(Format->getFormatIdx(), Format->getFirstArg(),
+ IsCXXMember, CallType != VariadicDoesNotApply, &FSI))
+ return CheckFormatArguments(
+ Args, FSI.ArgPassingKind, nullptr, FSI.FormatIdx, FSI.FirstDataArg,
+ GetFormatStringType(Format), CallType, Loc, Range, CheckedVarArgs);
+ return false;
+}
+
+bool Sema::CheckFormatString(const FormatMatchesAttr *Format,
+ ArrayRef<const Expr *> Args, bool IsCXXMember,
+ VariadicCallType CallType, SourceLocation Loc,
+ SourceRange Range,
+ llvm::SmallBitVector &CheckedVarArgs) {
+ FormatStringInfo FSI;
+ if (getFormatStringInfo(Format->getFormatIdx(), 0, IsCXXMember, false,
+ &FSI)) {
+ FSI.ArgPassingKind = Sema::FAPK_Elsewhere;
+ return CheckFormatArguments(Args, FSI.ArgPassingKind,
+ Format->getFormatString(), FSI.FormatIdx,
FSI.FirstDataArg, GetFormatStringType(Format),
CallType, Loc, Range, CheckedVarArgs);
+ }
return false;
}
bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
Sema::FormatArgumentPassingKind APK,
+ const StringLiteral *ReferenceFormatString,
unsigned format_idx, unsigned firstDataArg,
FormatStringType Type,
VariadicCallType CallType, SourceLocation Loc,
@@ -6307,8 +6369,8 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
// the same format string checking logic for both ObjC and C strings.
UncoveredArgHandler UncoveredArg;
StringLiteralCheckType CT = checkFormatStringExpr(
- *this, OrigFormatExpr, Args, APK, format_idx, firstDataArg, Type,
- CallType,
+ *this, ReferenceFormatString, OrigFormatExpr, Args, APK, format_idx,
+ firstDataArg, Type, CallType,
/*IsFunctionCall*/ true, CheckedVarArgs, UncoveredArg,
/*no string offset*/ llvm::APSInt(64, false) = 0);
@@ -6404,6 +6466,11 @@ class CheckFormatHandler : public analyze_format_string::FormatStringHandler {
CoveredArgs.reset();
}
+ bool HasFormatArguments() const {
+ return ArgPassingKind == Sema::FAPK_Fixed ||
+ ArgPassingKind == Sema::FAPK_Variadic;
+ }
+
void DoneProcessing();
void HandleIncompleteSpecifier(const char *startSpecifier,
@@ -6639,7 +6706,7 @@ const Expr *CheckFormatHandler::getDataArg(unsigned i) const {
void CheckFormatHandler::DoneProcessing() {
// Does the number of data arguments exceed the number of
// format conversions in the format string?
- if (ArgPassingKind != Sema::FAPK_VAList) {
+ if (HasFormatArguments()) {
// Find any arguments that weren't covered.
CoveredArgs.flip();
signed notCoveredArg = CoveredArgs.find_first();
@@ -6748,7 +6815,7 @@ CheckFormatHandler::CheckNumArgs(
const analyze_format_string::ConversionSpecifier &CS,
const char *startSpecifier, unsigned specifierLen, unsigned argIndex) {
- if (argIndex >= NumDataArgs) {
+ if (HasFormatArguments() && argIndex >= NumDataArgs) {
PartialDiagnostic PDiag = FS.usesPositionalArg()
? (S.PDiag(diag::warn_printf_positional_arg_exceeds_data_args)
<< (argIndex+1) << NumDataArgs)
@@ -6889,18 +6956,108 @@ class CheckPrintfHandler : public CheckFormatHandler {
void HandleInvalidObjCModifierFlag(const char *startFlag,
unsigned flagLen) override;
- void HandleObjCFlagsWithNonObjCConversion(const char *flagsStart,
- const char *flagsEnd,
- const char *conversionPosition)
- override;
+ void
+ HandleObjCFlagsWithNonObjCConversion(const char *flagsStart,
+ const char *flagsEnd,
+ const char *conversionPosition) override;
+};
+
+/// Keeps around the information needed to verify that two specifiers are
+/// compatible.
+class EquatableFormatArgument {
+public:
+ enum SpecifierSensitivity : unsigned {
+ SS_None,
+ SS_Private,
+ SS_Public,
+ SS_Sensitive
+ };
+
+ enum FormatArgumentRole : unsigned {
+ FAR_Data,
+ FAR_FieldWidth,
+ FAR_Precision,
+ FAR_Auxiliary, // FreeBSD kernel %b and %D
+ };
+
+private:
+ analyze_format_string::ArgType ArgType;
+ analyze_format_string::LengthModifier::Kind LengthMod;
+ StringRef SpecifierLetter;
+ CharSourceRange Range;
+ SourceLocation ElementLoc;
+ FormatArgumentRole Role : 2;
+ SpecifierSensitivity Sensitivity : 2; // only set for FAR_Data
+ unsigned Position : 14;
+ unsigned ModifierFor : 14; // not set for FAR_Data
+
+public:
+ EquatableFormatArgument(CharSourceRange Range, SourceLocation ElementLoc,
+ analyze_format_string::LengthModifier::Kind LengthMod,
+ StringRef SpecifierLetter,
+ analyze_format_string::ArgType ArgType,
+ FormatArgumentRole Role,
+ SpecifierSensitivity Sensitivity, unsigned Position,
+ unsigned ModifierFor)
+ : ArgType(ArgType), LengthMod(LengthMod),
+ SpecifierLetter(SpecifierLetter), Range(Range), ElementLoc(ElementLoc),
+ Role(Role), Sensitivity(Sensitivity), Position(Position),
+ ModifierFor(ModifierFor) {}
+
+ unsigned getPosition() const { return Position; }
+ SourceLocation getSourceLocation() const { return ElementLoc; }
+ CharSourceRange getSourceRange() const { return Range; }
+ analyze_format_string::LengthModifier getLengthModifier() const {
+ return analyze_format_string::LengthModifier(nullptr, LengthMod);
+ }
+ void setModifierFor(unsigned V) { ModifierFor = V; }
+
+ std::string buildFormatSpecifier() const {
+ std::string result;
+ llvm::raw_string_ostream(result)
+ << getLengthModifier().toString() << SpecifierLetter;
+ return result;
+ }
+
+ void VerifyCompatible(Sema &S, const EquatableFormatArgument &Other) const;
+};
+
+/// Turns format strings into lists of EquatableSpecifier objects.
+class DecomposePrintfHandler : public CheckPrintfHandler {
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Specs;
+ bool HadError;
+
+ DecomposePrintfHandler(
+ Sema &s, const FormatStringLiteral *fexpr, const Expr *origFormatExpr,
+ const Sema::FormatStringType type, unsigned firstDataArg,
+ unsigned numDataArgs, bool isObjC, const char *beg,
+ Sema::FormatArgumentPassingKind APK, ArrayRef<const Expr *> Args,
+ unsigned formatIdx, bool inFunctionCall, Sema::VariadicCallType CallType,
+ llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Specs)
+ : CheckPrintfHandler(s, fexpr, origFormatExpr, type, firstDataArg,
+ numDataArgs, isObjC, beg, APK, Args, formatIdx,
+ inFunctionCall, CallType, CheckedVarArgs,
+ UncoveredArg),
+ Specs(Specs), HadError(false) {}
+
+public:
+ static bool
+ GetSpecifiers(Sema &S, const FormatStringLiteral *FSL,
+ Sema::FormatStringType type, bool IsObjC, bool InFunctionCall,
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Args);
+
+ virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen,
+ const TargetInfo &Target) override;
};
} // namespace
bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier(
- const analyze_printf::PrintfSpecifier &FS,
- const char *startSpecifier,
- unsigned specifierLen) {
+ const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier,
+ unsigned specifierLen) {
const analyze_printf::PrintfConversionSpecifier &CS =
FS.getConversionSpecifier();
@@ -6918,7 +7075,7 @@ bool CheckPrintfHandler::HandleAmount(
const analyze_format_string::OptionalAmount &Amt, unsigned k,
const char *startSpecifier, unsigned specifierLen) {
if (Amt.hasDataArgument()) {
- if (ArgPassingKind != Sema::FAPK_VAList) {
+ if (HasFormatArguments()) {
unsigned argIndex = Amt.getArgIndex();
if (argIndex >= NumDataArgs) {
EmitFormatDiagnostic(S.PDiag(diag::warn_printf_asterisk_missing_arg)
@@ -7043,8 +7200,185 @@ void CheckPrintfHandler::HandleObjCFlagsWithNonObjCConversion(
auto diag = diag::warn_printf_ObjCflags_without_ObjCConversion;
EmitFormatDiagnostic(S.PDiag(diag) << StringRef(conversionPosition, 1),
getLocationOfByte(conversionPosition),
- /*IsStringLocation*/true,
- Range, FixItHint::CreateRemoval(Range));
+ /*IsStringLocation*/ true, Range,
+ FixItHint::CreateRemoval(Range));
+}
+
+void EquatableFormatArgument::VerifyCompatible(
+ Sema &S, const EquatableFormatArgument &Other) const {
+ using MK = analyze_format_string::ArgType::MatchKind;
+ if (Role != Other.Role) {
+ // diagnose and stop
+ S.Diag(ElementLoc, diag::warn_format_cmp_role_mismatch)
+ << Role << Other.Role << Range;
+ S.Diag(Other.ElementLoc, diag::note_format_cmp_with) << 0 << Other.Range;
+ return;
+ }
+
+ if (Role != FAR_Data) {
+ if (ModifierFor != Other.ModifierFor) {
+ // diagnose and stop
+ S.Diag(ElementLoc, diag::warn_format_cmp_modifierfor_mismatch)
+ << (ModifierFor + 1) << (Other.ModifierFor + 1) << Range;
+ S.Diag(Other.ElementLoc, diag::note_format_cmp_with) << 0 << Other.Range;
+ }
+ return;
+ }
+
+ if (Sensitivity != Other.Sensitivity) {
+ // diagnose and continue
+ S.Diag(ElementLoc, diag::warn_format_cmp_sensitivity_mismatch)
+ << Sensitivity << Other.Sensitivity << Range;
+ S.Diag(Other.ElementLoc, diag::note_format_cmp_with) << 0 << Other.Range;
+ }
+
+ switch (ArgType.matchesArgType(S.Context, Other.ArgType)) {
+ case MK::Match:
+ case MK::MatchPromotion:
+ break;
+
+ case MK::NoMatch:
+ case MK::NoMatchTypeConfusion:
+ case MK::NoMatchPromotionTypeConfusion:
+ S.Diag(ElementLoc, diag::warn_format_cmp_specifier_mismatch)
+ << buildFormatSpecifier() << Other.buildFormatSpecifier() << Range;
+ S.Diag(Other.ElementLoc, diag::note_format_cmp_with) << 0 << Other.Range;
+ break;
+
+ case MK::NoMatchPedantic:
+ S.Diag(ElementLoc, diag::warn_format_cmp_specifier_mismatch_pedantic)
+ << buildFormatSpecifier() << Other.buildFormatSpecifier() << Range;
+ S.Diag(Other.ElementLoc, diag::note_format_cmp_with) << 0 << Other.Range;
+ break;
+
+ case MK::NoMatchSignedness:
+ if (!S.getDiagnostics().isIgnored(
+ diag::warn_format_conversion_argument_type_mismatch_signedness,
+ ElementLoc)) {
+ S.Diag(ElementLoc, diag::warn_format_cmp_specifier_sign_mismatch)
+ << buildFormatSpecifier() << Other.buildFormatSpecifier() << Range;
+ S.Diag(Other.ElementLoc, diag::note_format_cmp_with) << 0 << Other.Range;
+ }
+ break;
+ }
+}
+
+bool DecomposePrintfHandler::GetSpecifiers(
+ Sema &S, const FormatStringLiteral *FSL, Sema::FormatStringType Type,
+ bool IsObjC, bool InFunctionCall,
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Args) {
+ StringRef Data = FSL->getString();
+ const char *Str = Data.data();
+ llvm::SmallBitVector BV;
+ UncoveredArgHandler UA;
+ DecomposePrintfHandler H(S, FSL, FSL->getFormatString(), Type, 0, 0, IsObjC,
+ Str, Sema::FAPK_Elsewhere, {FSL->getFormatString()},
+ 0, InFunctionCall, Sema::VariadicDoesNotApply, BV,
+ UA, Args);
+
+ if (!analyze_format_string::ParsePrintfString(
+ H, Str, Str + Data.size(), S.getLangOpts(), S.Context.getTargetInfo(),
+ Type == Sema::FST_FreeBSDKPrintf))
+ H.DoneProcessing();
+ if (H.HadError)
+ return false;
+
+ std::sort(
+ Args.begin(), Args.end(),
+ [](const EquatableFormatArgument &A, const EquatableFormatArgument &B) {
+ return A.getPosition() < B.getPosition();
+ });
+ return true;
+}
+
+bool DecomposePrintfHandler::HandlePrintfSpecifier(
+ const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier,
+ unsigned specifierLen, const TargetInfo &Target) {
+ if (!CheckPrintfHandler::HandlePrintfSpecifier(FS, startSpecifier,
+ specifierLen, Target)) {
+ HadError = true;
+ return false;
+ }
+
+ // Do not add any specifiers to the list for %%. This is possibly incorrect
+ // if using a precision/width with a data argument, but that combination is
+ // meaningless and we wouldn't know which format to attach the
+ // precision/width to.
+ const auto &CS = FS.getConversionSpecifier();
+ if (CS.getKind() == analyze_format_string::ConversionSpecifier::PercentArg)
+ return true;
+
+ // have to patch these to have the right ModifierFor if they are used
+ const unsigned Unset = ~0;
+ unsigned FieldWidthIndex = Unset;
+ unsigned PrecisionIndex = Unset;
+
+ // field width?
+ const auto &FieldWidth = FS.getFieldWidth();
+ if (!FieldWidth.isInvalid() && FieldWidth.hasDataArgument()) {
+ FieldWidthIndex = Specs.size();
+ Specs.emplace_back(getSpecifierRange(startSpecifier, specifierLen),
+ getLocationOfByte(FieldWidth.getStart()),
+ analyze_format_string::LengthModifier::None, "*",
+ FieldWidth.getArgType(S.Context),
+ EquatableFormatArgument::FAR_FieldWidth,
+ EquatableFormatArgument::SS_None,
+ FieldWidth.usesPositionalArg()
+ ? FieldWidth.getPositionalArgIndex() - 1
+ : FieldWidthIndex,
+ 0);
+ }
+ // precision?
+ const auto &Precision = FS.getPrecision();
+ if (!Precision.isInvalid() && Precision.hasDataArgument()) {
+ PrecisionIndex = Specs.size();
+ Specs.emplace_back(
+ getSpecifierRange(startSpecifier, specifierLen),
+ getLocationOfByte(Precision.getStart()),
+ analyze_format_string::LengthModifier::None, ".*",
+ Precision.getArgType(S.Context), EquatableFormatArgument::FAR_Precision,
+ EquatableFormatArgument::SS_None,
+ Precision.usesPositionalArg() ? Precision.getPositionalArgIndex() - 1
+ : PrecisionIndex,
+ 0);
+ }
+
+ // this specifier
+ unsigned SpecIndex =
+ FS.usesPositionalArg() ? FS.getPositionalArgIndex() - 1 : Specs.size();
+ if (FieldWidthIndex != Unset)
+ Specs[FieldWidthIndex].setModifierFor(SpecIndex);
+ if (PrecisionIndex != Unset)
+ Specs[PrecisionIndex].setModifierFor(SpecIndex);
+
+ EquatableFormatArgument::SpecifierSensitivity Sensitivity;
+ if (FS.isPrivate())
+ Sensitivity = EquatableFormatArgument::SS_Private;
+ else if (FS.isPublic())
+ Sensitivity = EquatableFormatArgument::SS_Public;
+ else if (FS.isSensitive())
+ Sensitivity = EquatableFormatArgument::SS_Sensitive;
+ else
+ Sensitivity = EquatableFormatArgument::SS_None;
+
+ Specs.emplace_back(
+ getSpecifierRange(startSpecifier, specifierLen),
+ getLocationOfByte(CS.getStart()), FS.getLengthModifier().getKind(),
+ CS.getCharacters(), FS.getArgType(S.Context, isObjCContext()),
+ EquatableFormatArgument::FAR_Data, Sensitivity, SpecIndex, 0);
+
+ // auxiliary argument?
+ if (CS.getKind() == analyze_format_string::ConversionSpecifier::FreeBSDbArg ||
+ CS.getKind() == analyze_format_string::ConversionSpecifier::FreeBSDDArg) {
+ Specs.emplace_back(getSpecifierRange(startSpecifier, specifierLen),
+ getLocationOfByte(CS.getStart()),
+ analyze_format_string::LengthModifier::None,
+ CS.getCharacters(),
+ analyze_format_string::ArgType::CStrTy,
+ EquatableFormatArgument::FAR_Auxiliary, Sensitivity,
+ SpecIndex + 1, SpecIndex);
+ }
+ return true;
}
// Determines if the specified is a C++ class or struct containing
@@ -7173,34 +7507,36 @@ bool CheckPrintfHandler::HandlePrintfSpecifier(
if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex + 1))
return false;
- // Claim the second argument.
- CoveredArgs.set(argIndex + 1);
-
- // Type check the first argument (int for %b, pointer for %D)
- const Expr *Ex = getDataArg(argIndex);
- const analyze_printf::ArgType &AT =
- (CS.getKind() == ConversionSpecifier::FreeBSDbArg) ?
- ArgType(S.Context.IntTy) : ArgType::CPointerTy;
- if (AT.isValid() && !AT.matchesType(S.Context, Ex->getType()))
- EmitFormatDiagnostic(
- S.PDiag(diag::warn_format_conversion_argument_type_mismatch)
- << AT.getRepresentativeTypeName(S.Context) << Ex->getType()
- << false << Ex->getSourceRange(),
- Ex->getBeginLoc(), /*IsStringLocation*/ false,
- getSpecifierRange(startSpecifier, specifierLen));
-
- // Type check the second argument (char * for both %b and %D)
- Ex = getDataArg(argIndex + 1);
- const analyze_printf::ArgType &AT2 = ArgType::CStrTy;
- if (AT2.isValid() && !AT2.matchesType(S.Context, Ex->getType()))
- EmitFormatDiagnostic(
- S.PDiag(diag::warn_format_conversion_argument_type_mismatch)
- << AT2.getRepresentativeTypeName(S.Context) << Ex->getType()
- << false << Ex->getSourceRange(),
- Ex->getBeginLoc(), /*IsStringLocation*/ false,
- getSpecifierRange(startSpecifier, specifierLen));
-
- return true;
+ if (HasFormatArguments()) {
+ // Claim the second argument.
+ CoveredArgs.set(argIndex + 1);
+
+ // Type check the first argument (int for %b, pointer for %D)
+ const Expr *Ex = getDataArg(argIndex);
+ const analyze_printf::ArgType &AT =
+ (CS.getKind() == ConversionSpecifier::FreeBSDbArg)
+ ? ArgType(S.Context.IntTy)
+ : ArgType::CPointerTy;
+ if (AT.isValid() && !AT.matchesType(S.Context, Ex->getType()))
+ EmitFormatDiagnostic(
+ S.PDiag(diag::warn_format_conversion_argument_type_mismatch)
+ << AT.getRepresentativeTypeName(S.Context) << Ex->getType()
+ << false << Ex->getSourceRange(),
+ Ex->getBeginLoc(), /*IsStringLocation*/ false,
+ getSpecifierRange(startSpecifier, specifierLen));
+
+ // Type check the second argument (char * for both %b and %D)
+ Ex = getDataArg(argIndex + 1);
+ const analyze_printf::ArgType &AT2 = ArgType::CStrTy;
+ if (AT2.isValid() && !AT2.matchesType(S.Context, Ex->getType()))
+ EmitFormatDiagnostic(
+ S.PDiag(diag::warn_format_conversion_argument_type_mismatch)
+ << AT2.getRepresentativeTypeName(S.Context) << Ex->getType()
+ << false << Ex->getSourceRange(),
+ Ex->getBeginLoc(), /*IsStringLocation*/ false,
+ getSpecifierRange(startSpecifier, specifierLen));
+ }
+ return true;
}
// Check for using an Objective-C specific conversion specifier
@@ -7320,7 +7656,7 @@ bool CheckPrintfHandler::HandlePrintfSpecifier(
HandleNonStandardConversionSpecifier(CS, startSpecifier, specifierLen);
// The remaining checks depend on the data arguments.
- if (ArgPassingKind == Sema::FAPK_VAList)
+ if (!HasFormatArguments())
return true;
if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex))
@@ -7889,6 +8225,36 @@ class CheckScanfHandler : public CheckFormatHandler {
void HandleIncompleteScanList(const char *start, const char *end) override;
};
+class DecomposeScanfHandler : public CheckScanfHandler {
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Specs;
+ bool HadError;
+
+ DecomposeScanfHandler(Sema &s, const FormatStringLiteral *fexpr,
+ const Expr *origFormatExpr, Sema::FormatStringType type,
+ unsigned firstDataArg, unsigned numDataArgs,
+ const char *beg, Sema::FormatArgumentPassingKind APK,
+ ArrayRef<const Expr *> Args, unsigned formatIdx,
+ bool inFunctionCall, Sema::VariadicCallType CallType,
+ llvm::SmallBitVector &CheckedVarArgs,
+ UncoveredArgHandler &UncoveredArg,
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Specs)
+ : CheckScanfHandler(s, fexpr, origFormatExpr, type, firstDataArg,
+ numDataArgs, beg, APK, Args, formatIdx,
+ inFunctionCall, CallType, CheckedVarArgs,
+ UncoveredArg),
+ Specs(Specs), HadError(false) {}
+
+public:
+ static bool
+ GetSpecifiers(Sema &S, const FormatStringLiteral *FSL,
+ Sema::FormatStringType type, bool InFunctionCall,
+ llvm::SmallVectorImpl<EquatableFormatArgument> &Args);
+
+ bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen) override;
+};
+
} // namespace
void CheckScanfHandler::HandleIncompleteScanList(const char *start,
@@ -7977,7 +8343,7 @@ bool CheckScanfHandler::HandleScanfSpecifier(
HandleNonStandardConversionSpecifier(CS, startSpecifier, specifierLen);
// The remaining checks depend on the data arguments.
- if (ArgPassingKind == Sema::FAPK_VAList)
+ if (!HasFormatArguments())
return true;
if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex))
@@ -8035,8 +8401,84 @@ bool CheckScanfHandler::HandleScanfSpecifier(
return true;
}
+bool DecomposeScanfHandler::GetSpecifiers(
+ Sema &S, const FormatStringLiteral *FSL, Sema::FormatStringType Type,
+ bool InFunctionCall, llvm::SmallVectorImpl<EquatableFormatArgument> &Args) {
+ StringRef Data = FSL->getString();
+ const char *Str = Data.data();
+ llvm::SmallBitVector BV;
+ UncoveredArgHandler UA;
+ DecomposeScanfHandler H(S, FSL, FSL->getFormatString(), Type, 0, 0, Str,
+ Sema::FAPK_Elsewhere, {FSL->getFormatString()}, 0,
+ InFunctionCall, Sema::VariadicDoesNotApply, BV, UA,
+ Args);
+
+ if (!analyze_format_string::ParseScanfString(H, Str, Str + Data.size(),
+ S.getLangOpts(),
+ S.Context.getTargetInfo()))
+ H.DoneProcessing();
+ if (H.HadError)
+ return false;
+
+ std::sort(
+ Args.begin(), Args.end(),
+ [](const EquatableFormatArgument &A, const EquatableFormatArgument &B) {
+ return A.getPosition() < B.getPosition();
+ });
+ return true;
+}
+
+bool DecomposeScanfHandler::HandleScanfSpecifier(
+ const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier,
+ unsigned specifierLen) {
+ if (!CheckScanfHandler::HandleScanfSpecifier(FS, startSpecifier,
+ specifierLen)) {
+ HadError = true;
+ return false;
+ }
+
+ const auto &CS = FS.getConversionSpecifier();
+ Specs.emplace_back(
+ getSpecifierRange(startSpecifier, specifierLen),
+ getLocationOfByte(CS.getStart()), FS.getLengthModifier().getKind(),
+ CS.getCharacters(), FS.getArgType(S.Context),
+ EquatableFormatArgument::FAR_Data, EquatableFormatArgument::SS_None,
+ FS.usesPositionalArg() ? FS.getPositionalArgIndex() - 1 : Specs.size(),
+ 0);
+ return true;
+}
+
+static void CompareFormatSpecifiers(Sema &S, const StringLiteral *Ref,
+ ArrayRef<EquatableFormatArgument> RefArgs,
+ const StringLiteral *Fmt,
+ ArrayRef<EquatableFormatArgument> FmtArgs) {
+ unsigned CommonArgs;
+ if (RefArgs.size() > FmtArgs.size()) {
+ CommonArgs = FmtArgs.size();
+ // "data argument not used by format string"
+ const auto &Arg = RefArgs[FmtArgs.size()];
+ S.Diag(Arg.getSourceLocation(), diag::warn_printf_data_arg_not_used)
+ << Arg.getSourceRange();
+ S.Diag(Fmt->getBeginLoc(), diag::note_format_cmp_with) << 1;
+ } else if (FmtArgs.size() > RefArgs.size()) {
+ CommonArgs = RefArgs.size();
+ // "more % conversions than data arguments"
+ const auto &Arg = FmtArgs[RefArgs.size()];
+ S.Diag(Fmt->getBeginLoc(), diag::warn_format_cmp_insufficient_specifiers)
+ << Arg.getSourceRange();
+ S.Diag(Ref->getBeginLoc(), diag::note_format_cmp_with) << 1;
+ } else {
+ CommonArgs = FmtArgs.size();
+ }
+
+ for (size_t I = 0; I < CommonArgs; ++I) {
+ FmtArgs[I].VerifyCompatible(S, RefArgs[I]);
+ }
+}
+
static void CheckFormatString(
- Sema &S, const FormatStringLiteral *FExpr, const Expr *OrigFormatExpr,
+ Sema &S, const FormatStringLiteral *FExpr,
+ const StringLiteral *ReferenceFormatString, const Expr *OrigFormatExpr,
ArrayRef<const Expr *> Args, Sema::FormatArgumentPassingKind APK,
unsigned format_idx, unsigned firstDataArg, Sema::FormatStringType Type,
bool inFunctionCall, Sema::VariadicCallType CallType,
@@ -8091,24 +8533,49 @@ static void CheckFormatString(
Type == Sema::FST_Kprintf || Type == Sema::FST_FreeBSDKPrintf ||
Type == Sema::FST_OSLog || Type == Sema::FST_OSTrace ||
Type == Sema::FST_Syslog) {
- CheckPrintfHandler H(
- S, FExpr, OrigFormatExpr, Type, firstDataArg, numDataArgs,
- (Type == Sema::FST_NSString || Type == Sema::FST_OSTrace), Str, APK,
- Args, format_idx, inFunctionCall, CallType, CheckedVarArgs,
- UncoveredArg);
-
- if (!analyze_format_string::ParsePrintfString(
- H, Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo(),
- Type == Sema::FST_Kprintf || Type == Sema::FST_FreeBSDKPrintf))
- H.DoneProcessing();
- } else if (Type == Sema::FST_Scanf) {
- CheckScanfHandler H(S, FExpr, OrigFormatExpr, Type, firstDataArg,
- numDataArgs, Str, APK, Args, format_idx, inFunctionCall,
- CallType, CheckedVarArgs, UncoveredArg);
+ bool IsObjC = Type == Sema::FST_NSString || Type == Sema::FST_OSTrace;
+ if (ReferenceFormatString == nullptr) {
+ CheckPrintfHandler H(
+ S, FExpr, OrigFormatExpr, Type, firstDataArg, numDataArgs, IsObjC, Str,
+ APK, Args, format_idx, inFunctionCall, CallType, CheckedVarArgs,
+ UncoveredArg);
- if (!analyze_format_string::ParseScanfString(
- H, Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo()))
- H.DoneProcessing();
+ if (!analyze_format_string::ParsePrintfString(
+ H, Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo(),
+ Type == Sema::FST_Kprintf || Type == Sema::FST_FreeBSDKPrintf))
+ H.DoneProcessing();
+ } else {
+ llvm::SmallVector<EquatableFormatArgument, 9> RefArgs, FmtArgs;
+ FormatStringLiteral RefLit = ReferenceFormatString;
+ if (DecomposePrintfHandler::GetSpecifiers(S, &RefLit, Type, IsObjC,
+ inFunctionCall, RefArgs) &&
+ DecomposePrintfHandler::GetSpecifiers(S, FExpr, Type, IsObjC,
+ inFunctionCall, FmtArgs)) {
+ CompareFormatSpecifiers(S, ReferenceFormatString, RefArgs,
+ FExpr->getFormatString(), FmtArgs);
+ }
+ }
+ } else if (Type == Sema::FST_Scanf) {
+ if (ReferenceFormatString == nullptr) {
+ CheckScanfHandler H(S, FExpr, OrigFormatExpr, Type, firstDataArg,
+ numDataArgs, Str, APK, Args, format_idx,
+ inFunctionCall, CallType, CheckedVarArgs,
+ UncoveredArg);
+
+ if (!analyze_format_string::ParseScanfString(
+ H, Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo()))
+ H.DoneProcessing();
+ } else {
+ llvm::SmallVector<EquatableFormatArgument, 9> RefArgs, FmtArgs;
+ FormatStringLiteral RefLit = ReferenceFormatString;
+ if (DecomposeScanfHandler::GetSpecifiers(S, &RefLit, Type, inFunctionCall,
+ RefArgs) &&
+ DecomposeScanfHandler::GetSpecifiers(S, FExpr, Type, inFunctionCall,
+ FmtArgs)) {
+ CompareFormatSpecifiers(S, ReferenceFormatString, RefArgs,
+ FExpr->getFormatString(), FmtArgs);
+ }
+ }
} // TODO: handle other formats
}
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index bb4d33560b93b8..e4ab0e0923fe56 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3648,61 +3648,92 @@ FormatAttr *Sema::mergeFormatAttr(Decl *D, const AttributeCommonInfo &CI,
return ::new (Context) FormatAttr(Context, CI, Format, FormatIdx, FirstArg);
}
+FormatMatchesAttr *Sema::mergeFormatMatchesAttr(Decl *D,
+ const AttributeCommonInfo &CI,
+ IdentifierInfo *Format,
+ int FormatIdx,
+ StringLiteral *FormatStr) {
+ // Check whether we already have an equivalent FormatMatches attribute.
+ for (auto *F : D->specific_attrs<FormatMatchesAttr>()) {
+ if (F->getType() == Format && F->getFormatIdx() == FormatIdx &&
+ true /* TODO: compare format strings */) {
+ // If we don't have a valid location for this attribute, adopt the
+ // location.
+ if (F->getLocation().isInvalid())
+ F->setRange(CI.getRange());
+ return nullptr;
+ }
+ }
+
+ return ::new (Context)
+ FormatMatchesAttr(Context, CI, Format, FormatIdx, FormatStr);
+}
+
+struct FormatAttrCommon {
+ FormatAttrKind Kind;
+ IdentifierInfo *Identifier;
+ unsigned NumArgs;
+ unsigned FormatStringIdx;
+};
+
/// Handle __attribute__((format(type,idx,firstarg))) attributes based on
/// http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
-static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+static bool handleFormatAttrCommon(Sema &S, Decl *D, const ParsedAttr &AL,
+ FormatAttrCommon *Info) {
+ // Checks the first two arguments of the attribute; this is shared between
+ // Format and FormatMatches attributes.
+
if (!AL.isArgIdent(0)) {
S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
<< AL << 1 << AANT_ArgumentIdentifier;
- return;
+ return false;
}
// In C++ the implicit 'this' function parameter also counts, and they are
// counted from one.
bool HasImplicitThisParam = isInstanceMethod(D);
- unsigned NumArgs = getFunctionOrMethodNumParams(D) + HasImplicitThisParam;
+ Info->NumArgs = getFunctionOrMethodNumParams(D) + HasImplicitThisParam;
- IdentifierInfo *II = AL.getArgAsIdent(0)->Ident;
- StringRef Format = II->getName();
+ Info->Identifier = AL.getArgAsIdent(0)->Ident;
+ StringRef Format = Info->Identifier->getName();
if (normalizeName(Format)) {
// If we've modified the string name, we need a new identifier for it.
- II = &S.Context.Idents.get(Format);
+ Info->Identifier = &S.Context.Idents.get(Format);
}
// Check for supported formats.
- FormatAttrKind Kind = getFormatAttrKind(Format);
+ Info->Kind = getFormatAttrKind(Format);
- if (Kind == IgnoredFormat)
- return;
+ if (Info->Kind == IgnoredFormat)
+ return false;
- if (Kind == InvalidFormat) {
+ if (Info->Kind == InvalidFormat) {
S.Diag(AL.getLoc(), diag::warn_attribute_type_not_supported)
- << AL << II->getName();
- return;
+ << AL << Info->Identifier->getName();
+ return false;
}
// checks for the 2nd argument
Expr *IdxExpr = AL.getArgAsExpr(1);
- uint32_t Idx;
- if (!S.checkUInt32Argument(AL, IdxExpr, Idx, 2))
- return;
+ if (!S.checkUInt32Argument(AL, IdxExpr, Info->FormatStringIdx, 2))
+ return false;
- if (Idx < 1 || Idx > NumArgs) {
+ if (Info->FormatStringIdx < 1 || Info->FormatStringIdx > Info->NumArgs) {
S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_bounds)
<< AL << 2 << IdxExpr->getSourceRange();
- return;
+ return false;
}
// FIXME: Do we need to bounds check?
- unsigned ArgIdx = Idx - 1;
+ unsigned ArgIdx = Info->FormatStringIdx - 1;
if (HasImplicitThisParam) {
if (ArgIdx == 0) {
S.Diag(AL.getLoc(),
diag::err_format_attribute_implicit_this_format_string)
- << IdxExpr->getSourceRange();
- return;
+ << IdxExpr->getSourceRange();
+ return false;
}
ArgIdx--;
}
@@ -3714,10 +3745,19 @@ static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
(!Ty->isPointerType() ||
!Ty->castAs<PointerType>()->getPointeeType()->isCharType())) {
S.Diag(AL.getLoc(), diag::err_format_attribute_not)
- << IdxExpr->getSourceRange() << getFunctionOrMethodParamRange(D, ArgIdx);
- return;
+ << IdxExpr->getSourceRange()
+ << getFunctionOrMethodParamRange(D, ArgIdx);
+ return false;
}
+ return true;
+}
+
+static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ FormatAttrCommon Info;
+ if (!handleFormatAttrCommon(S, D, AL, &Info))
+ return;
+
// check the 3rd argument
Expr *FirstArgExpr = AL.getArgAsExpr(2);
uint32_t FirstArg;
@@ -3726,7 +3766,7 @@ static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
// FirstArg == 0 is is always valid.
if (FirstArg != 0) {
- if (Kind == StrftimeFormat) {
+ if (Info.Kind == StrftimeFormat) {
// If the kind is strftime, FirstArg must be 0 because strftime does not
// use any variadic arguments.
S.Diag(AL.getLoc(), diag::err_format_strftime_third_parameter)
@@ -3737,17 +3777,17 @@ static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
// Else, if the function is variadic, then FirstArg must be 0 or the
// "position" of the ... parameter. It's unusual to use 0 with variadic
// functions, so the fixit proposes the latter.
- if (FirstArg != NumArgs + 1) {
+ if (FirstArg != Info.NumArgs + 1) {
S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_bounds)
<< AL << 3 << FirstArgExpr->getSourceRange()
<< FixItHint::CreateReplacement(FirstArgExpr->getSourceRange(),
- std::to_string(NumArgs + 1));
+ std::to_string(Info.NumArgs + 1));
return;
}
} else {
// Inescapable GCC compatibility diagnostic.
S.Diag(D->getLocation(), diag::warn_gcc_requires_variadic_function) << AL;
- if (FirstArg <= Idx) {
+ if (FirstArg <= Info.FormatStringIdx) {
// Else, the function is not variadic, and FirstArg must be 0 or any
// parameter after the format parameter. We don't offer a fixit because
// there are too many possible good values.
@@ -3758,11 +3798,34 @@ static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
}
}
- FormatAttr *NewAttr = S.mergeFormatAttr(D, AL, II, Idx, FirstArg);
+ FormatAttr *NewAttr =
+ S.mergeFormatAttr(D, AL, Info.Identifier, Info.FormatStringIdx, FirstArg);
if (NewAttr)
D->addAttr(NewAttr);
}
+static void handleFormatMatchesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ FormatAttrCommon Info;
+ if (!handleFormatAttrCommon(S, D, AL, &Info))
+ return;
+
+ Expr *FormatStrExpr = AL.getArgAsExpr(2)->IgnoreParenImpCasts();
+ if (auto *SL = dyn_cast<StringLiteral>(FormatStrExpr)) {
+ // Aside from whether we started with a string literal, there is generally
+ // no useful checking that we can do here. For instance, "invalid" format
+ // specifiers (like %!) are simply ignored by printf and don't have a
+ // diagnostic.
+ if (auto *NewAttr = S.mergeFormatMatchesAttr(D, AL, Info.Identifier,
+ Info.FormatStringIdx, SL))
+ D->addAttr(NewAttr);
+ return;
+ }
+
+ S.Diag(AL.getLoc(), diag::err_format_nonliteral)
+ << FormatStrExpr->getSourceRange();
+ return;
+}
+
/// Handle __attribute__((callback(CalleeIdx, PayloadIdx0, ...))) attributes.
static void handleCallbackAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
// The index that identifies the callback callee is mandatory.
@@ -6797,6 +6860,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_Format:
handleFormatAttr(S, D, AL);
break;
+ case ParsedAttr::AT_FormatMatches:
+ handleFormatMatchesAttr(S, D, AL);
+ break;
case ParsedAttr::AT_FormatArg:
handleFormatArgAttr(S, D, AL);
break;
diff --git a/clang/lib/Sema/SemaObjC.cpp b/clang/lib/Sema/SemaObjC.cpp
index 57eda18c2d8e15..073d9791d037b2 100644
--- a/clang/lib/Sema/SemaObjC.cpp
+++ b/clang/lib/Sema/SemaObjC.cpp
@@ -2259,7 +2259,8 @@ void SemaObjC::handleExternallyRetainedAttr(Decl *D, const ParsedAttr &AL) {
bool SemaObjC::GetFormatNSStringIdx(const FormatAttr *Format, unsigned &Idx) {
Sema::FormatStringInfo FSI;
if ((SemaRef.GetFormatStringType(Format) == Sema::FST_NSString) &&
- SemaRef.getFormatStringInfo(Format, false, true, &FSI)) {
+ SemaRef.getFormatStringInfo(Format->getFormatIdx(), Format->getFirstArg(),
+ false, true, &FSI)) {
Idx = FSI.FormatIdx;
return true;
}
diff --git a/clang/test/Sema/format-string-matches.c b/clang/test/Sema/format-string-matches.c
new file mode 100644
index 00000000000000..23ad5b41f49d34
--- /dev/null
+++ b/clang/test/Sema/format-string-matches.c
@@ -0,0 +1,122 @@
+// RUN: %clang_cc1 -verify -fblocks -fsyntax-only -Wformat-nonliteral -Wformat-signedness -isystem %S/Inputs %s
+// RUN: %clang_cc1 -verify -fblocks -fsyntax-only -Wformat-nonliteral -Wformat-signedness -isystem %S/Inputs -fno-signed-char %s
+
+#include <stdarg.h>
+
+__attribute__((format(printf, 1, 2)))
+int printf(const char *fmt, ...);
+
+__attribute__((format(printf, 1, 0)))
+int vprintf(const char *fmt, va_list);
+
+__attribute__((format(scanf, 1, 2)))
+int scanf(const char *fmt, ...);
+
+// MARK: -
+// Calling printf with a format from format_matches(printf) diagnoses with
+// that format string
+__attribute__((format_matches(printf, 1, "%s %1.5s")))
+void format_str_str0(const char *fmt) {
+ printf(fmt, "hello", "world");
+}
+
+__attribute__((format_matches(printf, 1, "%s" "%1.5s")))
+void format_str_str1(const char *fmt) {
+ printf(fmt, "hello", "world");
+}
+
+__attribute__((format_matches(printf, 1, ("%s" "%1.5s") + 5))) // expected-error{{format string is not a string literal}}
+void format_str_str2(const char *fmt);
+
+__attribute__((format_matches(printf, 1, "%s %g"))) // expected-note{{format string is defined here}}
+void format_str_double_warn(const char *fmt) {
+ printf(fmt, "hello", "world"); // expected-warning{{format specifies type 'double' but the argument has type 'char *'}}
+}
+
+__attribute__((format_matches(printf, 1, "%s %g")))
+void vformat(const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ vprintf(fmt, ap); // XXX: ideally this would be a diagnostic
+ va_end(ap);
+}
+
+// MARK: -
+// Calling scanf
+__attribute__((format_matches(scanf, 1, "%hhi %g"))) // expected-note 3{{comparing with this specifier}}
+void scan(const char *fmt) {
+ char i;
+ float g;
+ scanf(fmt, &i, &g);
+}
+
+__attribute__((format_matches(scanf, 1, "%hhi %g"))) // expected-note{{format string is defined here}}
+void scan2(const char *fmt) {
+ char i;
+ double g;
+ scanf(fmt, &i, &g); // expected-warning{{format specifies type 'float *' but the argument has type 'double *'}}
+}
+
+void call_scan(void) {
+ scan("%hhd %e");
+ scan("%hd %Le"); // \
+ expected-warning{{format specifier 'hd' is incompatible with 'hhi'}} \
+ expected-warning{{format specifier 'Le' is incompatible with 'g'}}
+ scan("%s %p"); // expected-warning{{format specifier 'p' is incompatible with 'g'}}
+}
+
+// MARK: -
+// Calling a function with format_matches diagnoses for incompatible formats.
+
+void cvt_percent(const char *c) __attribute__((format_matches(printf, 1, "%%"))); // expected-note{{comparing with this format string}}
+void cvt_at(const char *c) __attribute__((format_matches(NSString, 1, "%@"))); // \
+ expected-warning{{data argument not used by format string}} \
+ expected-note{{comparing with this specifier}} \
+ expected-note{{comparing with this format string}}
+void cvt_c(const char *c) __attribute__((format_matches(printf, 1, "%c"))); // expected-note{{comparing with this specifier}}
+void cvt_u(const char *c) __attribute__((format_matches(printf, 1, "%u"))); // expected-note{{comparing with this specifier}}
+void cvt_i(const char *c) __attribute__((format_matches(printf, 1, "%i"))); // expected-note 2{{comparing with this specifier}}
+void cvt_p(const char *c) __attribute__((format_matches(printf, 1, "%p")));
+void cvt_s(const char *c) __attribute__((format_matches(printf, 1, "%s"))); // expected-note{{comparing with this specifier}}
+void cvt_n(const char *c) __attribute__((format_matches(printf, 1, "%n"))); // expected-note{{comparing with this specifier}}
+
+void test_compatibility(void) {
+ cvt_c("%i");
+ cvt_i("%c");
+ cvt_c("%u"); // expected-warning{{signedness of format specifier 'u' is incompatible with 'c'}}
+ cvt_u("%c"); // expected-warning{{signedness of format specifier 'c' is incompatible with 'u'}}
+
+ cvt_i("%lli"); // expected-warning{{format specifier 'lli' is incompatible with 'i'}}
+ cvt_i("%p"); // expected-warning{{format specifier 'p' is incompatible with 'i'}}
+ cvt_n("%s"); // expected-warning{{format specifier 's' is incompatible with 'n'}}
+ cvt_s("%hhn"); // expected-warning{{format specifier 'hhn' is incompatible with 's'}}
+
+ cvt_p("%@"); // expected-warning{{invalid conversion specifier '@'}}
+ cvt_at("%p"); // expected-warning{{format specifier 'p' is incompatible with '@'}}
+
+ cvt_percent("hello");
+ cvt_percent("%c"); // expected-warning{{fewer specifiers in format string than expected}}
+}
+
+void test_too_few_args(void) {
+ cvt_at("a"); // expected-note{{comparing with this format string}}
+ cvt_at("%@ %@"); // expected-warning{{fewer specifiers in format string than expected}}
+}
+
+void cvt_several(const char *c) __attribute__((format_matches(printf, 1, "%f %i %s"))); // expected-note{{comparing with this specifier}}
+
+void test_moving_args_around(void) {
+ cvt_several("%1g %-d %1.5s");
+
+ cvt_several("%3$s %1$g %2$i");
+
+ cvt_several("%f %*s"); // expected-warning{{format argument is an indirect field width, but it should be a value}}
+}
+
+void cvt_freebsd_D(const char *c) __attribute__((format_matches(freebsd_kprintf, 1, "%D"))); // expected-note{{comparing with this specifier}}
+
+void test_freebsd_specifiers(void) {
+ cvt_freebsd_D("%D");
+ cvt_freebsd_D("%b");
+ cvt_freebsd_D("%s %i"); // expected-warning{{format argument is a value, but it should be an auxiliary value}}
+}
>From f96aa6308d7f4cadc69c8d59ff653458e683c4d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Cloutier?= <fcloutier at apple.com>
Date: Thu, 2 Jan 2025 13:44:28 -0800
Subject: [PATCH 2/5] Address RFC, review feedback
---
clang/include/clang/Basic/AttrDocs.td | 51 +++++++++---
clang/lib/AST/AttrImpl.cpp | 1 -
clang/lib/Sema/SemaChecking.cpp | 106 ++----------------------
clang/test/Sema/format-string-matches.c | 36 ++------
4 files changed, 57 insertions(+), 137 deletions(-)
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index a1f5dbde7ef3b8..7ad9278bfa4a57 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -3760,20 +3760,23 @@ other. For instance:
static int wind_speed;
void say_hi(const char *fmt) {
- printf(fmt, first_name, todays_temperature); // warning: format string is not a string literal
- printf(fmt, first_name, wind_speed); // warning: format string is not a string literal
+ printf(fmt, first_name, todays_temperature);
+ // ^ warning: format string is not a string literal
+ printf(fmt, first_name, wind_speed);
+ // ^ warning: format string is not a string literal
}
int main() {
say_hi("hello %s, it is %g degrees outside");
- say_hi("hello %s, it is %d degrees outside!"); // no diagnostic
+ say_hi("hello %s, it is %d degrees outside!");
+ // ^ no diagnostic, but %d cannot format doubles
}
In this example, ``fmt`` is expected to format a ``const char *`` and a
``double``, but these values are not passed to ``say_hi``. Without the
``format`` attribute (which cannot apply in this case), the -Wformat-nonliteral
-diagnostic triggers in the body of ``say_hi``, and incorrect ``say_hi`` call
-sites do not trigger a diagnostic.
+diagnostic unnecessarily triggers in the body of ``say_hi``, and incorrect
+``say_hi`` call sites do not trigger a diagnostic.
To complement the ``format`` attribute, Clang also defines the
``format_matches`` attribute. Its syntax is similar to the ``format``
@@ -3803,10 +3806,31 @@ The third argument to ``format_matches`` is expected to evaluate to a **C string
literal** even when the format string would normally be a different type for the
given flavor, like a ``CFStringRef`` or a ``NSString *``.
+The only requirement on the format string literal is that it has specifiers
+that are compatible with the arguments that will be used. It can contain
+arbitrary non-format characters. For instance, for the purposes of compile-time
+validation, ``"%s scored %g%% on her test"`` and ``"%s%g"`` are interchangeable
+as the format string argument. As a means of self-documentation, users may
+prefer the former when it provides a useful example of an expected format
+string.
+
In the implementation of a function with the ``format_matches`` attribute,
format verification works as if the format string was identical to the one
specified in the attribute.
+.. code-block:: c
+
+ __attribute__((__format_matches__(printf, 1, "%s %g")))
+ void say_hi(const char *fmt) {
+ printf(fmt, "person", 546);
+ // ^ warning: format specifies type 'double' but the
+ // argument has type 'int'
+ // note: format string is defined here:
+ // __attribute__((__format_matches__(printf, 1, "%s %g")))
+ // ^~
+ }
+
+
At the call sites of functions with the ``format_matches`` attribute, format
verification instead compares the two format strings to evaluate their
equivalence. Each format flavor defines equivalence between format specifiers.
@@ -3817,15 +3841,14 @@ a negative example, ``%ld`` is incompatible with ``%d``.
Do note the following un-obvious cases:
-* Passing ``NULL`` as the format string is accepted without diagnostics.
+* Passing ``NULL`` as the format string does not trigger format diagnostics.
* When the format string is not NULL, it cannot _miss_ specifiers, even in
trailing positions. For instance, ``%d`` is not accepted when the required
format is ``%d %d %d``.
-* Specifiers for integers as small or smaller than ``int`` (such as ``%hhd``)
- are all mutually compatible because standard argument promotion ensures that
- integers are at least the size of ``int`` when passed as variadic arguments.
- With ``-Wformat-signedness``, mixing specifier for types with a different
- signedness still results in a diagnostic.
+* While checks for the ``format`` attribute tolerate sone size mismatches
+ that standard argument promotion renders immaterial (such as formatting an
+ ``int`` with ``%hhd``, which specifies a ``char``-sized integer), checks for
+ ``format_matches`` require specified argument sizes to match exactly.
* Format strings expecting a variable modifier (such as ``%*s``) are
incompatible with format strings that would itemize the variable modifiers
(such as ``%i %s``), even if the two specify ABI-compatible argument lists.
@@ -3837,6 +3860,12 @@ Do note the following un-obvious cases:
This is not overridable with ``-Wformat-pedantic`` or its inverse, which
control similar behavior in ``-Wformat``.
+At this time, clang implements ``format_matches`` only for format types in the
+``printf`` family. This includes variants such as Apple's NSString format and
+the FreeBSD ``kprintf``, but excludes ``scanf``. Using a known but unsupported
+format silently fails in order to be compatible with other implementations that
+would support these formats.
+
}];
}
diff --git a/clang/lib/AST/AttrImpl.cpp b/clang/lib/AST/AttrImpl.cpp
index 5a169f815637f6..fefb8f55a9ee26 100644
--- a/clang/lib/AST/AttrImpl.cpp
+++ b/clang/lib/AST/AttrImpl.cpp
@@ -271,7 +271,6 @@ unsigned AlignedAttr::getAlignment(ASTContext &Ctx) const {
}
StringLiteral *FormatMatchesAttr::getFormatString() const {
- // This cannot go in headers because StringLiteral and Expr may be incomplete.
return cast<StringLiteral>(getExpectedFormat());
}
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a5e668395c9797..bbf02c43ca9ad9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -7234,9 +7234,11 @@ void EquatableFormatArgument::VerifyCompatible(
switch (ArgType.matchesArgType(S.Context, Other.ArgType)) {
case MK::Match:
- case MK::MatchPromotion:
break;
+ case MK::MatchPromotion:
+ // Per consensus reached at https://discourse.llvm.org/t/-/83076/12,
+ // MatchPromotion is treated as a failure by format_matches.
case MK::NoMatch:
case MK::NoMatchTypeConfusion:
case MK::NoMatchPromotionTypeConfusion:
@@ -8225,36 +8227,6 @@ class CheckScanfHandler : public CheckFormatHandler {
void HandleIncompleteScanList(const char *start, const char *end) override;
};
-class DecomposeScanfHandler : public CheckScanfHandler {
- llvm::SmallVectorImpl<EquatableFormatArgument> &Specs;
- bool HadError;
-
- DecomposeScanfHandler(Sema &s, const FormatStringLiteral *fexpr,
- const Expr *origFormatExpr, Sema::FormatStringType type,
- unsigned firstDataArg, unsigned numDataArgs,
- const char *beg, Sema::FormatArgumentPassingKind APK,
- ArrayRef<const Expr *> Args, unsigned formatIdx,
- bool inFunctionCall, Sema::VariadicCallType CallType,
- llvm::SmallBitVector &CheckedVarArgs,
- UncoveredArgHandler &UncoveredArg,
- llvm::SmallVectorImpl<EquatableFormatArgument> &Specs)
- : CheckScanfHandler(s, fexpr, origFormatExpr, type, firstDataArg,
- numDataArgs, beg, APK, Args, formatIdx,
- inFunctionCall, CallType, CheckedVarArgs,
- UncoveredArg),
- Specs(Specs), HadError(false) {}
-
-public:
- static bool
- GetSpecifiers(Sema &S, const FormatStringLiteral *FSL,
- Sema::FormatStringType type, bool InFunctionCall,
- llvm::SmallVectorImpl<EquatableFormatArgument> &Args);
-
- bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
- const char *startSpecifier,
- unsigned specifierLen) override;
-};
-
} // namespace
void CheckScanfHandler::HandleIncompleteScanList(const char *start,
@@ -8401,53 +8373,6 @@ bool CheckScanfHandler::HandleScanfSpecifier(
return true;
}
-bool DecomposeScanfHandler::GetSpecifiers(
- Sema &S, const FormatStringLiteral *FSL, Sema::FormatStringType Type,
- bool InFunctionCall, llvm::SmallVectorImpl<EquatableFormatArgument> &Args) {
- StringRef Data = FSL->getString();
- const char *Str = Data.data();
- llvm::SmallBitVector BV;
- UncoveredArgHandler UA;
- DecomposeScanfHandler H(S, FSL, FSL->getFormatString(), Type, 0, 0, Str,
- Sema::FAPK_Elsewhere, {FSL->getFormatString()}, 0,
- InFunctionCall, Sema::VariadicDoesNotApply, BV, UA,
- Args);
-
- if (!analyze_format_string::ParseScanfString(H, Str, Str + Data.size(),
- S.getLangOpts(),
- S.Context.getTargetInfo()))
- H.DoneProcessing();
- if (H.HadError)
- return false;
-
- std::sort(
- Args.begin(), Args.end(),
- [](const EquatableFormatArgument &A, const EquatableFormatArgument &B) {
- return A.getPosition() < B.getPosition();
- });
- return true;
-}
-
-bool DecomposeScanfHandler::HandleScanfSpecifier(
- const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier,
- unsigned specifierLen) {
- if (!CheckScanfHandler::HandleScanfSpecifier(FS, startSpecifier,
- specifierLen)) {
- HadError = true;
- return false;
- }
-
- const auto &CS = FS.getConversionSpecifier();
- Specs.emplace_back(
- getSpecifierRange(startSpecifier, specifierLen),
- getLocationOfByte(CS.getStart()), FS.getLengthModifier().getKind(),
- CS.getCharacters(), FS.getArgType(S.Context),
- EquatableFormatArgument::FAR_Data, EquatableFormatArgument::SS_None,
- FS.usesPositionalArg() ? FS.getPositionalArgIndex() - 1 : Specs.size(),
- 0);
- return true;
-}
-
static void CompareFormatSpecifiers(Sema &S, const StringLiteral *Ref,
ArrayRef<EquatableFormatArgument> RefArgs,
const StringLiteral *Fmt,
@@ -8556,26 +8481,13 @@ static void CheckFormatString(
}
}
} else if (Type == Sema::FST_Scanf) {
- if (ReferenceFormatString == nullptr) {
- CheckScanfHandler H(S, FExpr, OrigFormatExpr, Type, firstDataArg,
- numDataArgs, Str, APK, Args, format_idx,
- inFunctionCall, CallType, CheckedVarArgs,
- UncoveredArg);
+ CheckScanfHandler H(S, FExpr, OrigFormatExpr, Type, firstDataArg,
+ numDataArgs, Str, APK, Args, format_idx, inFunctionCall,
+ CallType, CheckedVarArgs, UncoveredArg);
- if (!analyze_format_string::ParseScanfString(
- H, Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo()))
- H.DoneProcessing();
- } else {
- llvm::SmallVector<EquatableFormatArgument, 9> RefArgs, FmtArgs;
- FormatStringLiteral RefLit = ReferenceFormatString;
- if (DecomposeScanfHandler::GetSpecifiers(S, &RefLit, Type, inFunctionCall,
- RefArgs) &&
- DecomposeScanfHandler::GetSpecifiers(S, FExpr, Type, inFunctionCall,
- FmtArgs)) {
- CompareFormatSpecifiers(S, ReferenceFormatString, RefArgs,
- FExpr->getFormatString(), FmtArgs);
- }
- }
+ if (!analyze_format_string::ParseScanfString(
+ H, Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo()))
+ H.DoneProcessing();
} // TODO: handle other formats
}
diff --git a/clang/test/Sema/format-string-matches.c b/clang/test/Sema/format-string-matches.c
index 23ad5b41f49d34..10ddbd5d092db6 100644
--- a/clang/test/Sema/format-string-matches.c
+++ b/clang/test/Sema/format-string-matches.c
@@ -9,9 +9,6 @@ int printf(const char *fmt, ...);
__attribute__((format(printf, 1, 0)))
int vprintf(const char *fmt, va_list);
-__attribute__((format(scanf, 1, 2)))
-int scanf(const char *fmt, ...);
-
// MARK: -
// Calling printf with a format from format_matches(printf) diagnoses with
// that format string
@@ -41,30 +38,6 @@ void vformat(const char *fmt, ...) {
va_end(ap);
}
-// MARK: -
-// Calling scanf
-__attribute__((format_matches(scanf, 1, "%hhi %g"))) // expected-note 3{{comparing with this specifier}}
-void scan(const char *fmt) {
- char i;
- float g;
- scanf(fmt, &i, &g);
-}
-
-__attribute__((format_matches(scanf, 1, "%hhi %g"))) // expected-note{{format string is defined here}}
-void scan2(const char *fmt) {
- char i;
- double g;
- scanf(fmt, &i, &g); // expected-warning{{format specifies type 'float *' but the argument has type 'double *'}}
-}
-
-void call_scan(void) {
- scan("%hhd %e");
- scan("%hd %Le"); // \
- expected-warning{{format specifier 'hd' is incompatible with 'hhi'}} \
- expected-warning{{format specifier 'Le' is incompatible with 'g'}}
- scan("%s %p"); // expected-warning{{format specifier 'p' is incompatible with 'g'}}
-}
-
// MARK: -
// Calling a function with format_matches diagnoses for incompatible formats.
@@ -75,7 +48,8 @@ void cvt_at(const char *c) __attribute__((format_matches(NSString, 1, "%@"))); /
expected-note{{comparing with this format string}}
void cvt_c(const char *c) __attribute__((format_matches(printf, 1, "%c"))); // expected-note{{comparing with this specifier}}
void cvt_u(const char *c) __attribute__((format_matches(printf, 1, "%u"))); // expected-note{{comparing with this specifier}}
-void cvt_i(const char *c) __attribute__((format_matches(printf, 1, "%i"))); // expected-note 2{{comparing with this specifier}}
+void cvt_hhi(const char *c) __attribute__((format_matches(printf, 1, "%hhi"))); // expected-note 3{{comparing with this specifier}}
+void cvt_i(const char *c) __attribute__((format_matches(printf, 1, "%i"))); // expected-note 4{{comparing with this specifier}}
void cvt_p(const char *c) __attribute__((format_matches(printf, 1, "%p")));
void cvt_s(const char *c) __attribute__((format_matches(printf, 1, "%s"))); // expected-note{{comparing with this specifier}}
void cvt_n(const char *c) __attribute__((format_matches(printf, 1, "%n"))); // expected-note{{comparing with this specifier}}
@@ -86,8 +60,14 @@ void test_compatibility(void) {
cvt_c("%u"); // expected-warning{{signedness of format specifier 'u' is incompatible with 'c'}}
cvt_u("%c"); // expected-warning{{signedness of format specifier 'c' is incompatible with 'u'}}
+ cvt_i("%hi"); // expected-warning{{format specifier 'hi' is incompatible with 'i'}}
+ cvt_i("%hhi"); // expected-warning{{format specifier 'hhi' is incompatible with 'i'}}
cvt_i("%lli"); // expected-warning{{format specifier 'lli' is incompatible with 'i'}}
cvt_i("%p"); // expected-warning{{format specifier 'p' is incompatible with 'i'}}
+ cvt_hhi("%hhi");
+ cvt_hhi("%hi"); // expected-warning{{format specifier 'hi' is incompatible with 'hhi'}}
+ cvt_hhi("%i"); // expected-warning{{format specifier 'i' is incompatible with 'hhi'}}
+ cvt_hhi("%li"); // expected-warning{{format specifier 'li' is incompatible with 'hhi'}}
cvt_n("%s"); // expected-warning{{format specifier 's' is incompatible with 'n'}}
cvt_s("%hhn"); // expected-warning{{format specifier 'hhn' is incompatible with 's'}}
>From fbe14c9ba85bee9107790e1f7467cb50a07beee8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Cloutier?= <fcloutier at apple.com>
Date: Thu, 2 Jan 2025 14:39:47 -0800
Subject: [PATCH 3/5] Fix post-rebase format issues
---
clang/lib/Sema/SemaChecking.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index bbf02c43ca9ad9..15ec03f8e6bdb6 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -8460,10 +8460,10 @@ static void CheckFormatString(
Type == Sema::FST_Syslog) {
bool IsObjC = Type == Sema::FST_NSString || Type == Sema::FST_OSTrace;
if (ReferenceFormatString == nullptr) {
- CheckPrintfHandler H(
- S, FExpr, OrigFormatExpr, Type, firstDataArg, numDataArgs, IsObjC, Str,
- APK, Args, format_idx, inFunctionCall, CallType, CheckedVarArgs,
- UncoveredArg);
+ CheckPrintfHandler H(S, FExpr, OrigFormatExpr, Type, firstDataArg,
+ numDataArgs, IsObjC, Str, APK, Args, format_idx,
+ inFunctionCall, CallType, CheckedVarArgs,
+ UncoveredArg);
if (!analyze_format_string::ParsePrintfString(
H, Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo(),
>From 29fa8eba8c3d7ea11208bfcc81cc4e7c2f655bc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Cloutier?= <fcloutier at apple.com>
Date: Tue, 7 Jan 2025 08:48:50 -0800
Subject: [PATCH 4/5] Improve diagnostics when compared format strings have
different numbers of specifiers
---
.../clang/Basic/DiagnosticSemaKinds.td | 4 +--
clang/lib/Sema/SemaChecking.cpp | 34 +++++++++++--------
clang/test/Sema/format-string-matches.c | 17 ++++++----
3 files changed, 33 insertions(+), 22 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index afd99a93c85520..ba397db60e230d 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10018,8 +10018,8 @@ def note_previous_declaration_as : Note<
def warn_printf_insufficient_data_args : Warning<
"more '%%' conversions than data arguments">, InGroup<FormatInsufficientArgs>;
-def warn_format_cmp_insufficient_specifiers : Warning<
- "fewer specifiers in format string than expected">, InGroup<FormatInsufficientArgs>;
+def warn_format_cmp_specifier_arity : Warning<
+ "%select{fewer|more}0 specifiers in format string than expected">, InGroup<FormatInsufficientArgs>;
def warn_printf_data_arg_not_used : Warning<
"data argument not used by format string">, InGroup<FormatExtraArgs>;
def warn_format_invalid_conversion : Warning<
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 15ec03f8e6bdb6..1ea541fce9b311 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -8376,22 +8376,27 @@ bool CheckScanfHandler::HandleScanfSpecifier(
static void CompareFormatSpecifiers(Sema &S, const StringLiteral *Ref,
ArrayRef<EquatableFormatArgument> RefArgs,
const StringLiteral *Fmt,
- ArrayRef<EquatableFormatArgument> FmtArgs) {
+ ArrayRef<EquatableFormatArgument> FmtArgs,
+ const Expr *FmtExpr, bool InFunctionCall) {
unsigned CommonArgs;
- if (RefArgs.size() > FmtArgs.size()) {
- CommonArgs = FmtArgs.size();
- // "data argument not used by format string"
- const auto &Arg = RefArgs[FmtArgs.size()];
- S.Diag(Arg.getSourceLocation(), diag::warn_printf_data_arg_not_used)
- << Arg.getSourceRange();
- S.Diag(Fmt->getBeginLoc(), diag::note_format_cmp_with) << 1;
- } else if (FmtArgs.size() > RefArgs.size()) {
+ if (FmtArgs.size() > RefArgs.size()) {
CommonArgs = RefArgs.size();
- // "more % conversions than data arguments"
- const auto &Arg = FmtArgs[RefArgs.size()];
- S.Diag(Fmt->getBeginLoc(), diag::warn_format_cmp_insufficient_specifiers)
- << Arg.getSourceRange();
+ // "data argument not used by format string"
+ CheckFormatHandler::EmitFormatDiagnostic(
+ S, InFunctionCall, FmtExpr,
+ S.PDiag(diag::warn_format_cmp_specifier_arity) << 1,
+ FmtExpr->getBeginLoc(), false,
+ FmtArgs[RefArgs.size()].getSourceRange());
S.Diag(Ref->getBeginLoc(), diag::note_format_cmp_with) << 1;
+ } else if (RefArgs.size() > FmtArgs.size()) {
+ CommonArgs = FmtArgs.size();
+ // "fewer specifiers in format string than expected"
+ CheckFormatHandler::EmitFormatDiagnostic(
+ S, InFunctionCall, FmtExpr,
+ S.PDiag(diag::warn_format_cmp_specifier_arity) << 0,
+ FmtExpr->getBeginLoc(), false, Fmt->getSourceRange());
+ S.Diag(Ref->getBeginLoc(), diag::note_format_cmp_with)
+ << 1 << RefArgs[FmtArgs.size()].getSourceRange();
} else {
CommonArgs = FmtArgs.size();
}
@@ -8477,7 +8482,8 @@ static void CheckFormatString(
DecomposePrintfHandler::GetSpecifiers(S, FExpr, Type, IsObjC,
inFunctionCall, FmtArgs)) {
CompareFormatSpecifiers(S, ReferenceFormatString, RefArgs,
- FExpr->getFormatString(), FmtArgs);
+ FExpr->getFormatString(), FmtArgs,
+ Args[format_idx], inFunctionCall);
}
}
} else if (Type == Sema::FST_Scanf) {
diff --git a/clang/test/Sema/format-string-matches.c b/clang/test/Sema/format-string-matches.c
index 10ddbd5d092db6..f39cb2f08acc56 100644
--- a/clang/test/Sema/format-string-matches.c
+++ b/clang/test/Sema/format-string-matches.c
@@ -41,11 +41,10 @@ void vformat(const char *fmt, ...) {
// MARK: -
// Calling a function with format_matches diagnoses for incompatible formats.
-void cvt_percent(const char *c) __attribute__((format_matches(printf, 1, "%%"))); // expected-note{{comparing with this format string}}
+void cvt_percent(const char *c) __attribute__((format_matches(printf, 1, "%%"))); // expected-note 2{{comparing with this format string}}
void cvt_at(const char *c) __attribute__((format_matches(NSString, 1, "%@"))); // \
- expected-warning{{data argument not used by format string}} \
expected-note{{comparing with this specifier}} \
- expected-note{{comparing with this format string}}
+ expected-note 3{{comparing with this format string}}
void cvt_c(const char *c) __attribute__((format_matches(printf, 1, "%c"))); // expected-note{{comparing with this specifier}}
void cvt_u(const char *c) __attribute__((format_matches(printf, 1, "%u"))); // expected-note{{comparing with this specifier}}
void cvt_hhi(const char *c) __attribute__((format_matches(printf, 1, "%hhi"))); // expected-note 3{{comparing with this specifier}}
@@ -75,12 +74,18 @@ void test_compatibility(void) {
cvt_at("%p"); // expected-warning{{format specifier 'p' is incompatible with '@'}}
cvt_percent("hello");
- cvt_percent("%c"); // expected-warning{{fewer specifiers in format string than expected}}
+ cvt_percent("%c"); // expected-warning{{more specifiers in format string than expected}}
+
+ const char *const too_many = "%c"; // expected-note{{format string is defined here}}
+ cvt_percent(too_many); // expected-warning{{more specifiers in format string than expected}}
}
void test_too_few_args(void) {
- cvt_at("a"); // expected-note{{comparing with this format string}}
- cvt_at("%@ %@"); // expected-warning{{fewer specifiers in format string than expected}}
+ cvt_at("a"); // expected-warning{{fewer specifiers in format string than expected}}
+ cvt_at("%@ %@"); // expected-warning{{more specifiers in format string than expected}}
+
+ const char *const too_few = "a"; // expected-note{{format string is defined here}}
+ cvt_at(too_few); // expected-warning{{fewer specifiers in format string than expected}}
}
void cvt_several(const char *c) __attribute__((format_matches(printf, 1, "%f %i %s"))); // expected-note{{comparing with this specifier}}
>From dbfef7f53802c2d5b9e77c2eef3202f9fdf967da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Cloutier?= <fcloutier at apple.com>
Date: Tue, 7 Jan 2025 08:58:10 -0800
Subject: [PATCH 5/5] Validate that format strings match when both caller and
callee have format_matches
---
clang/lib/Sema/SemaChecking.cpp | 4 ++--
clang/test/Sema/format-string-matches.c | 16 ++++++++++++++++
2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 1ea541fce9b311..f10f59fa4c2f7b 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -6085,8 +6085,8 @@ static StringLiteralCheckType checkFormatStringExpr(
continue;
if (PV->getFunctionScopeIndex() == CallerFSI.FormatIdx)
return checkFormatStringExpr(
- S, nullptr, PVFormatMatches->getFormatString(), Args, APK,
- format_idx, firstDataArg, Type, CallType,
+ S, ReferenceFormatString, PVFormatMatches->getFormatString(),
+ Args, APK, format_idx, firstDataArg, Type, CallType,
/*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg,
Offset, IgnoreStringsWithoutSpecifiers);
}
diff --git a/clang/test/Sema/format-string-matches.c b/clang/test/Sema/format-string-matches.c
index f39cb2f08acc56..0192bcbdc32d08 100644
--- a/clang/test/Sema/format-string-matches.c
+++ b/clang/test/Sema/format-string-matches.c
@@ -105,3 +105,19 @@ void test_freebsd_specifiers(void) {
cvt_freebsd_D("%b");
cvt_freebsd_D("%s %i"); // expected-warning{{format argument is a value, but it should be an auxiliary value}}
}
+
+__attribute__((format_matches(printf, 1, "%s"))) // expected-note{{comparing with this specifier}}
+__attribute__((format_matches(os_log, 2, "%i"))) // expected-note{{comparing with this specifier}}
+void test_recv_multiple_format_strings(const char *fmt1, const char *fmt2);
+
+__attribute__((format_matches(printf, 1, "%s")))
+__attribute__((format_matches(os_log, 2, "%i")))
+void test_multiple_format_strings(const char *fmt1, const char *fmt2) {
+ test_recv_multiple_format_strings("%s", "%i");
+ test_recv_multiple_format_strings("%s", "%s"); // expected-warning{{format specifier 's' is incompatible with 'i'}}
+ test_recv_multiple_format_strings("%i", "%i"); // expected-warning{{format specifier 'i' is incompatible with 's'}}
+
+ test_recv_multiple_format_strings(fmt1, fmt2);
+ test_recv_multiple_format_strings("%.5s", fmt2);
+ test_recv_multiple_format_strings(fmt1, "%04d");
+}
More information about the cfe-commits
mailing list