[clang] [-Wunsafe-buffer-usage] Add check for custom printf/scanf functions (PR #173096)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Dec 19 12:23:19 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-analysis
@llvm/pr-subscribers-clang
Author: Ziqing Luo (ziqingluo-90)
<details>
<summary>Changes</summary>
This commit adds support for functions annotated with `__attribute__((__format__(__printf__, ...)))` (or `__scanf__`). These functions will be treated the same way as printf/scanf functions in the standard C library by `-Wunsafe-buffer-usage`
rdar://143233737
---
Full diff: https://github.com/llvm/llvm-project/pull/173096.diff
3 Files Affected:
- (modified) clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def (+1)
- (modified) clang/lib/Analysis/UnsafeBufferUsage.cpp (+100-15)
- (modified) clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp (+28-4)
``````````diff
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index fae5f8b8aa8e3..f9bba5d54e9c7 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -40,6 +40,7 @@ WARNING_GADGET(UnsafeBufferUsageCtorAttr)
WARNING_GADGET(DataInvocation)
WARNING_GADGET(UniquePtrArrayAccess)
WARNING_OPTIONAL_GADGET(UnsafeLibcFunctionCall)
+WARNING_OPTIONAL_GADGET(UnsafeFormatAttributedFunctionCall)
WARNING_OPTIONAL_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)`
FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context
FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 7ef20726d0ab9..7c21ec86af544 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -825,9 +825,11 @@ struct LibcFunNamePrefixSuffixParser {
//
// `UnsafeArg` is the output argument that will be set only if this function
// returns true.
-static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
- const unsigned FmtArgIdx, ASTContext &Ctx,
- bool isKprintf = false) {
+// `FmtArgIdx` is insignificant if its value is negative, meaning that format
+// arguments start at `FmtIdx` + 1.
+static bool hasUnsafeFormatOrSArg(ASTContext &Ctx, const CallExpr *Call,
+ const Expr *&UnsafeArg, const unsigned FmtIdx,
+ int FmtArgIdx = -1, bool isKprintf = false) {
class StringFormatStringHandler
: public analyze_format_string::FormatStringHandler {
const CallExpr *Call;
@@ -850,8 +852,8 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
unsigned PArgIdx = -1;
if (Precision.hasDataArgument())
- PArgIdx = Precision.getPositionalArgIndex() + FmtArgIdx;
- if (0 < PArgIdx && PArgIdx < Call->getNumArgs()) {
+ PArgIdx = Precision.getArgIndex() + FmtArgIdx;
+ if (0 <= PArgIdx && PArgIdx < Call->getNumArgs()) {
const Expr *PArg = Call->getArg(PArgIdx);
// Strip the cast if `PArg` is a cast-to-int expression:
@@ -886,9 +888,9 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
analyze_printf::PrintfConversionSpecifier::sArg)
return true; // continue parsing
- unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
+ unsigned ArgIdx = FS.getArgIndex() + FmtArgIdx;
- if (!(0 < ArgIdx && ArgIdx < Call->getNumArgs()))
+ if (!(0 <= ArgIdx && ArgIdx < Call->getNumArgs()))
// If the `ArgIdx` is invalid, give up.
return true; // continue parsing
@@ -921,12 +923,15 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
bool isUnsafeArgSet() { return UnsafeArgSet; }
};
- const Expr *Fmt = Call->getArg(FmtArgIdx);
+ const Expr *Fmt = Call->getArg(FmtIdx);
+ unsigned FmtArgStartingIdx =
+ FmtArgIdx < 0 ? FmtIdx + 1 : static_cast<unsigned>(FmtArgIdx);
if (auto *SL = dyn_cast<clang::StringLiteral>(Fmt->IgnoreParenImpCasts())) {
if (SL->getCharByteWidth() == 1) {
StringRef FmtStr = SL->getString();
- StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx);
+ StringFormatStringHandler Handler(Call, FmtArgStartingIdx, UnsafeArg,
+ Ctx);
return analyze_format_string::ParsePrintfString(
Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
@@ -935,7 +940,8 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
}
if (auto FmtStr = SL->tryEvaluateString(Ctx)) {
- StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx);
+ StringFormatStringHandler Handler(Call, FmtArgStartingIdx, UnsafeArg,
+ Ctx);
return analyze_format_string::ParsePrintfString(
Handler, FmtStr->data(), FmtStr->data() + FmtStr->size(),
Ctx.getLangOpts(), Ctx.getTargetInfo(), isKprintf) &&
@@ -946,7 +952,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
// In this case, this call is considered unsafe if at least one argument
// (including the format argument) is unsafe pointer.
return llvm::any_of(
- llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
+ llvm::make_range(Call->arg_begin() + FmtArgStartingIdx, Call->arg_end()),
[&UnsafeArg, &Ctx](const Expr *Arg) -> bool {
if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg, Ctx)) {
UnsafeArg = Arg;
@@ -1161,7 +1167,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx,
// It is a fprintf:
const Expr *UnsafeArg;
- if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 1, Ctx, false)) {
+ if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 1)) {
Result.addNode(Tag, DynTypedNode::create(*UnsafeArg));
return true;
}
@@ -1175,7 +1181,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx,
if (auto *II = FD->getIdentifier())
isKprintf = II->getName() == "kprintf";
- if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 0, Ctx, isKprintf)) {
+ if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 0, -1, isKprintf)) {
Result.addNode(Tag, DynTypedNode::create(*UnsafeArg));
return true;
}
@@ -1190,7 +1196,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx,
// second is an integer, it is a snprintf:
const Expr *UnsafeArg;
- if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 2, Ctx, false)) {
+ if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 2)) {
Result.addNode(Tag, DynTypedNode::create(*UnsafeArg));
return true;
}
@@ -2068,6 +2074,7 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
constexpr static const char *const UnsafeVaListTag =
"UnsafeLibcFunctionCall_va_list";
+public:
enum UnsafeKind {
OTHERS = 0, // no specific information, the callee function is unsafe
SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead.
@@ -2080,7 +2087,6 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
// considered unsafe as it is not compile-time check
} WarnedFunKind = OTHERS;
-public:
UnsafeLibcFunctionCallGadget(const MatchResult &Result)
: WarningGadget(Kind::UnsafeLibcFunctionCall),
Call(Result.getNodeAs<CallExpr>(Tag)) {
@@ -2171,6 +2177,85 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
};
+class UnsafeFormatAttributedFunctionCallGadget : public WarningGadget {
+ const CallExpr *const Call;
+ const Expr *UnsafeArg = nullptr;
+ constexpr static const char *const Tag = "UnsafeFormatAttributedFunctionCall";
+ constexpr static const char *const UnsafeStringTag =
+ "UnsafeFormatAttributedFunctionCall_string";
+
+public:
+ UnsafeFormatAttributedFunctionCallGadget(const MatchResult &Result)
+ : WarningGadget(Kind::UnsafeLibcFunctionCall),
+ Call(Result.getNodeAs<CallExpr>(Tag)),
+ UnsafeArg(Result.getNodeAs<Expr>(UnsafeStringTag)) {}
+
+ static bool matches(const Stmt *S, ASTContext &Ctx,
+ const UnsafeBufferUsageHandler *Handler,
+ MatchResult &Result) {
+ if (ignoreUnsafeLibcCall(Ctx, *S, Handler))
+ return false;
+ auto *CE = dyn_cast<CallExpr>(S);
+ if (!CE || !CE->getDirectCallee())
+ return false;
+ const auto *FD = dyn_cast<FunctionDecl>(CE->getDirectCallee());
+ if (!FD)
+ return false;
+
+ const FormatAttr *Attr = nullptr;
+ bool IsPrintf = false;
+ bool AnyAttr = llvm::any_of(
+ FD->specific_attrs<FormatAttr>(),
+ [&Attr, &IsPrintf](const FormatAttr *FA) -> bool {
+ if (const auto *II = FA->getType()) {
+ if (II->getName() == "printf" || II->getName() == "scanf") {
+ Attr = FA;
+ IsPrintf = II->getName() == "printf";
+ return true;
+ }
+ }
+ return false;
+ });
+ const Expr *UnsafeArg;
+
+ if (AnyAttr && !IsPrintf &&
+ (CE->getNumArgs() >= static_cast<unsigned>(Attr->getFirstArg()))) {
+ // for scanf-like functions, any format argument is considered unsafe:
+ Result.addNode(Tag, DynTypedNode::create(*CE));
+ return true;
+ }
+ if (AnyAttr && libc_func_matchers::hasUnsafeFormatOrSArg(
+ Ctx, CE, UnsafeArg,
+ // FormatAttribute indexes are 1-based:
+ Attr->getFormatIdx() - 1, Attr->getFirstArg() - 1)) {
+ Result.addNode(Tag, DynTypedNode::create(*CE));
+ Result.addNode(UnsafeStringTag, DynTypedNode::create(*UnsafeArg));
+ return true;
+ }
+ return false;
+ }
+
+ const Stmt *getBaseStmt() const { return Call; }
+
+ SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); }
+
+ void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
+ bool IsRelatedToDecl,
+ ASTContext &Ctx) const override {
+ if (UnsafeArg)
+ Handler.handleUnsafeLibcCall(
+ Call, UnsafeLibcFunctionCallGadget::UnsafeKind::STRING, Ctx,
+ UnsafeArg);
+ else
+ Handler.handleUnsafeLibcCall(
+ Call, UnsafeLibcFunctionCallGadget::UnsafeKind::OTHERS, Ctx);
+ }
+
+ DeclUseList getClaimedVarUseSites() const override { return {}; }
+
+ SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
+};
+
// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
// Context (see `findStmtsInUnspecifiedLvalueContext`).
// Note here `[]` is the built-in subscript operator.
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index 4f1af79609223..8df65ebc2eaf0 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -Wno-gcc-compat\
// RUN: -verify %s
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -Wno-gcc-compat\
// RUN: -verify %s -x objective-c++
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call -Wno-gcc-compat\
// RUN: -verify %s
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call -Wno-gcc-compat\
// RUN: -verify %s -DTEST_STD_NS
typedef struct {} FILE;
@@ -255,3 +255,27 @@ void dontCrashForInvalidFormatString() {
snprintf((char*)0, 0, "%");
snprintf((char*)0, 0, "\0");
}
+
+
+// Also warn about unsafe printf/scanf-like functions:
+void myprintf(const char *F, ...) __attribute__((__format__ (__printf__, 1, 2)));
+void myprintf_2(const char *F, int irrelevant, const char *Str) __attribute__((__format__ (__printf__, 1, 3)));
+void myscanf(const char *F, ...) __attribute__((__format__ (__scanf__, 1, 2)));
+
+void test_myprintf(char * Str, std::string StdStr) {
+ myprintf("hello", Str);
+ myprintf("hello %s", StdStr.c_str());
+ myprintf("hello %s", Str); // expected-warning{{function 'myprintf' is unsafe}} \
+ expected-note{{string argument is not guaranteed to be null-terminated}}
+
+ myprintf_2("hello", 0, Str);
+ myprintf_2("hello %s", 0, StdStr.c_str());
+ myprintf_2("hello %s", 0, Str); // expected-warning{{function 'myprintf_2' is unsafe}} \
+ expected-note{{string argument is not guaranteed to be null-terminated}}
+ myscanf("hello %s");
+ myscanf("hello %s", Str); // expected-warning{{function 'myscanf' is unsafe}}
+
+ int X;
+
+ myscanf("hello %d", &X); // expected-warning{{function 'myscanf' is unsafe}}
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/173096
More information about the cfe-commits
mailing list