[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
Ziqing Luo via cfe-commits
cfe-commits at lists.llvm.org
Wed Aug 28 17:20:19 PDT 2024
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583
>From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/5] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
functions
Warning about calls to libc functions involving buffer access. Warned
functions are hardcoded by names.
(rdar://117182250)
---
.../Analysis/Analyses/UnsafeBufferUsage.h | 12 +
.../Analyses/UnsafeBufferUsageGadgets.def | 1 +
.../clang/Basic/DiagnosticSemaKinds.td | 7 +
clang/lib/Analysis/UnsafeBufferUsage.cpp | 408 +++++++++++++++++-
clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 +
...-usage-libc-functions-inline-namespace.cpp | 60 +++
...arn-unsafe-buffer-usage-libc-functions.cpp | 86 ++++
...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +-
8 files changed, 586 insertions(+), 4 deletions(-)
create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e115..3e0fae6db7562d 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -15,6 +15,7 @@
#define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
#include "clang/AST/Stmt.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/Support/Debug.h"
@@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler {
virtual void handleUnsafeOperation(const Stmt *Operation,
bool IsRelatedToDecl, ASTContext &Ctx) = 0;
+ /// Invoked when a call to an unsafe libc function is found.
+ /// \param PrintfInfo
+ /// is 0 if the callee function is not a member of the printf family;
+ /// is 1 if the callee is `sprintf`;
+ /// is 2 if arguments of the call have `__size_by` relation but are not in a
+ /// safe pattern;
+ /// is 3 if string arguments do not guarantee null-termination
+ /// is 4 if the callee takes va_list
+ virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+ ASTContext &Ctx) = 0;
+
/// Invoked when an unsafe operation with a std container is found.
virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
bool IsRelatedToDecl,
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b9..ac01b285ae833b 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
WARNING_GADGET(UnsafeBufferUsageAttr)
WARNING_GADGET(UnsafeBufferUsageCtorAttr)
WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)`
FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context
FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 554dbaff2ce0d8..7e1e3686ce6554 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning<
"%select{unsafe pointer operation|unsafe pointer arithmetic|"
"unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">,
InGroup<UnsafeBufferUsage>, DefaultIgnore;
+def warn_unsafe_buffer_libc_call : Warning<
+ "function %0 introduces unsafe buffer access">,
+ InGroup<UnsafeBufferUsage>, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+ "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match"
+ "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination"
+ "| do not use va_list that cannot be checked at compile-time for bounds safety}0">;
def note_unsafe_buffer_operation : Note<
"used%select{| in pointer arithmetic| in buffer access}0 here">;
def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b6..751fb75f6ed602 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,24 @@
#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
#include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Basic/CharInfo.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Casting.h"
+#include <functional>
#include <memory>
#include <optional>
#include <queue>
@@ -443,6 +447,337 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
return false;
}
+namespace libc_fun_disjoint_inner_matchers {
+// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match
+// disjoint node sets. They all take a `CoreName`, which is the substring of a
+// function name after `ignoreLibcPrefixAndSuffix`. They are suppose to be used
+// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different
+// libc function calls.
+
+// Matches a function call node such that
+// 1. It's name, after stripping off predefined prefix and suffix, is
+// `CoreName`; and
+// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
+// is a set of libc function names.
+//
+// Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`.
+// And, the notation `CoreName[str/wcs]` means a new name obtained from replace
+// string "wcs" with "str" in `CoreName`.
+//
+// Also note, the set of predefined function names does not include `printf`
+// functions, they are checked exclusively with other matchers below.
+// Maintaining the invariant that all matchers under
+// `libc_fun_disjoint_inner_matchers` are disjoint.
+AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
+ static const std::set<StringRef> PredefinedNames{
+ // numeric conversion:
+ "atof",
+ "atoi",
+ "atol",
+ "atoll",
+ "strtol",
+ "strtoll",
+ "strtoul",
+ "strtoull",
+ "strtof",
+ "strtod",
+ "strtold",
+ "strtoimax",
+ "strtoumax",
+ // "strfromf", "strfromd", "strfroml", // C23?
+ // string manipulation:
+ "strcpy",
+ "strncpy",
+ "strlcpy",
+ "strcat",
+ "strncat",
+ "strlcat",
+ "strxfrm",
+ "strdup",
+ "strndup",
+ // string examination:
+ "strlen",
+ "strnlen",
+ "strcmp",
+ "strncmp",
+ "stricmp",
+ "strcasecmp",
+ "strcoll",
+ "strchr",
+ "strrchr",
+ "strspn",
+ "strcspn",
+ "strpbrk",
+ "strstr",
+ "strtok",
+ // "mem-" functions
+ "memchr",
+ "wmemchr",
+ "memcmp",
+ "wmemcmp",
+ "memcpy",
+ "memccpy",
+ "mempcpy",
+ "wmemcpy",
+ "memmove",
+ "wmemmove",
+ "memset",
+ "wmemset",
+ // IO:
+ "fread",
+ "fwrite",
+ "fgets",
+ "fgetws",
+ "gets",
+ "fputs",
+ "fputws",
+ "puts",
+ // others
+ "strerror_s",
+ "strerror_r",
+ "bcopy",
+ "bzero",
+ "bsearch",
+ "qsort",
+ };
+ // This is safe: strlen("hello"). We don't want to be noisy on this case.
+ auto isSafeStrlen = [&Node](StringRef Name) -> bool {
+ return Name == "strlen" && Node.getNumArgs() == 1 &&
+ isa<StringLiteral>(Node.getArg(0)->IgnoreParenImpCasts());
+ };
+
+ // Match predefined names:
+ if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end())
+ return !isSafeStrlen(CoreName);
+
+ std::string NameWCS = CoreName.str();
+ size_t WcsPos = NameWCS.find("wcs");
+
+ while (WcsPos != std::string::npos) {
+ NameWCS[WcsPos++] = 's';
+ NameWCS[WcsPos++] = 't';
+ NameWCS[WcsPos++] = 'r';
+ WcsPos = NameWCS.find("wcs", WcsPos);
+ }
+ if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
+ return !isSafeStrlen(NameWCS);
+ // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They
+ // all should end with "scanf"):
+ return CoreName.ends_with("scanf");
+}
+
+// Match a call to one of the `-printf` functions taking `va_list`. We cannot
+// check safety for these functions so they should be changed to their
+// non-va_list versions.
+AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) {
+ StringRef Name = CoreName;
+
+ if (!Name.ends_with("printf"))
+ return false; // neither printf nor scanf
+ return Name.starts_with("v");
+}
+
+// Matches a call to one of the `-sprintf` functions (excluding the ones with
+// va_list) as they are always unsafe and should be changed to corresponding
+// `-snprintf`s.
+AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) {
+ StringRef Name = CoreName;
+
+ if (!Name.ends_with("printf") || Name.starts_with("v"))
+ return false;
+
+ StringRef Prefix = Name.drop_back(6);
+
+ if (Prefix.ends_with("w"))
+ Prefix = Prefix.drop_back(1);
+ return Prefix == "s";
+}
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+ if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
+ return true;
+ if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts()))
+ return true;
+ if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
+ const CXXMethodDecl *MD = MCE->getMethodDecl();
+ const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+ if (MD && RD && RD->isInStdNamespace())
+ if (MD->getName() == "c_str" && RD->getName() == "basic_string")
+ return true;
+ }
+ return false;
+}
+
+// Matches a call to one of the `-printf" functions (excluding the ones with
+// va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but
+// fail to guarantee their null-termination. In other words, these calls are
+// safe if they use null-termination guaranteed string pointers.
+AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) {
+ StringRef Name = CoreName;
+
+ if (!Name.ends_with("printf") || Name.starts_with("v"))
+ return false;
+
+ StringRef Prefix = Name.drop_back(6);
+
+ if (Prefix.ends_with("w"))
+ Prefix = Prefix.drop_back(1);
+
+ auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool {
+ return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+ };
+
+ if (Prefix.empty() ||
+ Prefix == "k") // printf: all pointer args should be null-terminated
+ return any_of(Node.arguments(), AnyUnsafeStrPtr);
+ if (Prefix == "f" && Node.getNumArgs() > 1)
+ return any_of(llvm::make_range(Node.arg_begin() + 1, Node.arg_end()),
+ AnyUnsafeStrPtr);
+ if (Prefix == "sn" && Node.getNumArgs() > 2) {
+ return any_of(llvm::make_range(Node.arg_begin() + 2, Node.arg_end()),
+ AnyUnsafeStrPtr);
+ }
+ return false; // A call to a "-printf" falls into another category.
+}
+
+// Matches a call to one of the `snprintf` functions (excluding the ones with
+// va_list) such that the first two arguments fail to conform to safe patterns.
+//
+// For the first two arguments: `ptr` and `size`, they are safe if in the
+// following patterns:
+// ptr := DRE.data();
+// size:= DRE.size()/DRE.size_bytes()
+// And DRE is a hardened container or view.
+AST_MATCHER_P(CallExpr, unsafeSizedByInSnprintfs, StringRef, CoreName) {
+ StringRef Name = CoreName;
+
+ if (!Name.ends_with("printf") || Name.starts_with("v"))
+ return false; // not snprint or use va_list
+
+ StringRef Prefix = Name.drop_back(6);
+
+ if (Prefix.ends_with("w"))
+ Prefix = Prefix.drop_back(1);
+
+ if (Prefix != "sn")
+ return false; // not snprint
+
+ static StringRef SizedObjs[] = {"span", "array", "vector",
+ "basic_string_view", "basic_string"};
+ const Expr *CharPtr = (*Node.arg_begin())->IgnoreParenImpCasts();
+ const Expr *Size = (*(Node.arg_begin() + 1))->IgnoreParenImpCasts();
+
+ if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(CharPtr))
+ if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) {
+ auto *DREOfPtr = dyn_cast<DeclRefExpr>(
+ MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts());
+ auto *DREOfSize = dyn_cast<DeclRefExpr>(
+ MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts());
+
+ if (!DREOfPtr || !DREOfSize)
+ return true; // not in safe pattern
+ if (DREOfPtr->getDecl() != DREOfSize->getDecl())
+ return true; // not in safe pattern
+ if (MCEPtr->getMethodDecl()->getName() != "data")
+ return true; // not in safe pattern
+
+ if (MCESize->getMethodDecl()->getName() == "size_bytes" ||
+ // Note here the pointer must be a pointer-to-char type unless there
+ // is explicit casting. If there is explicit casting, this branch
+ // is unreachable. Thus, at this branch "size" and "size_bytes" are
+ // equivalent as the pointer is a char pointer:
+ MCESize->getMethodDecl()->getName() == "size")
+ for (StringRef SizedObj : SizedObjs)
+ if (MCEPtr->getRecordDecl()->isInStdNamespace() &&
+ MCEPtr->getRecordDecl()->getCanonicalDecl()->getName() ==
+ SizedObj)
+ return false; // It is in fact safe
+ }
+ return true; // ptr and size are not in safe pattern
+}
+} // namespace libc_fun_disjoint_inner_matchers
+
+// Match call to a function whose name may have prefixes like "__builtin_" or
+// "__asan_" and suffixes like "_s" or "_chk". This matcher takes an argument,
+// which should be applied to the core name---the subtring after stripping off
+// prefix and suffix of the function name.
+// The application results in an inner matcher that matches the call node with
+// respect to the core name of the callee.
+AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix,
+ std::function<internal::Matcher<CallExpr>(StringRef)>,
+ InnerMatcher) {
+ // Given a function name, returns its core name `CoreName` according to the
+ // following grammar.
+ //
+ // LibcName := CoreName | CoreName + "_s"
+ // MatchingName := "__builtin_" + LibcName |
+ // "__builtin___" + LibcName + "_chk" |
+ // "__asan_" + LibcName
+ //
+ struct NameParser {
+ StringRef matchName(StringRef FunName, bool isBuiltin) {
+ // Try to match __builtin_:
+ if (isBuiltin && FunName.starts_with("__builtin_"))
+ // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
+ // no match:
+ return matchLibcNameOrBuiltinChk(
+ FunName.drop_front(10 /* truncate "__builtin_" */));
+ // Try to match __asan_:
+ if (FunName.starts_with("__asan_"))
+ return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */));
+ return matchLibcName(FunName);
+ }
+
+ // Parameter `Name` is the substring after stripping off the prefix
+ // "__builtin_".
+ StringRef matchLibcNameOrBuiltinChk(StringRef Name) {
+ if (Name.starts_with("__") && Name.ends_with("_chk"))
+ return matchLibcName(
+ Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
+ return matchLibcName(Name);
+ }
+
+ StringRef matchLibcName(StringRef Name) {
+ if (Name.ends_with("_s"))
+ return Name.drop_back(2 /* truncate "_s" */);
+ return Name;
+ }
+ } TheLittleParser;
+
+ const FunctionDecl *FD = Node.getDirectCallee();
+ const IdentifierInfo *II;
+
+ if (!FD)
+ return false;
+ II = FD->getIdentifier();
+ // If this is a special C++ name without IdentifierInfo, it can't be a
+ // C library function.
+ if (!II)
+ return false;
+
+ // Look through 'extern "C"' and anything similar invented in the future.
+ // In general, C library functions will be in the TU directly.
+ if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
+ // If that's not the case, we also consider "C functions" re-declared in
+ // `std` namespace.
+ if (!FD->getDeclContext()->getRedeclContext()->isStdNamespace())
+ return false;
+ }
+
+ // If this function is not externally visible, it is not a C library function.
+ // Note that we make an exception for inline functions, which may be
+ // declared in header files without external linkage.
+ if (!FD->isInlined() && !FD->isExternallyVisible())
+ return false;
+
+ StringRef CoreName =
+ TheLittleParser.matchName(II->getName(), FD->getBuiltinID());
+
+ return InnerMatcher(CoreName).matches(Node, Finder, Builder);
+}
} // namespace clang::ast_matchers
namespace {
@@ -1025,6 +1360,77 @@ class DataInvocationGadget : public WarningGadget {
DeclUseList getClaimedVarUseSites() const override { return {}; }
};
+class UnsafeLibcFunctionCallGadget : public WarningGadget {
+ const CallExpr *const Call;
+ constexpr static const char *const Tag = "UnsafeLibcFunctionCall";
+ // Extra tags for additional information:
+ constexpr static const char *const UnsafeSprintfTag =
+ "UnsafeLibcFunctionCall_sprintf";
+ constexpr static const char *const UnsafeSizedByTag =
+ "UnsafeLibcFunctionCall_sized_by";
+ constexpr static const char *const UnsafeStringTag =
+ "UnsafeLibcFunctionCall_string";
+ constexpr static const char *const UnsafeVaListTag =
+ "UnsafeLibcFunctionCall_va_list";
+
+ enum UnsafeKind {
+ OTHERS = 0, // no specific information, the callee function is unsafe
+ SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead.
+ SIZED_BY = 2, // a pair of function arguments have "__sized_by" relation but
+ // they do not conform to safe patterns
+ STRING = 3, // an argument is a pointer-to-char-as-string but does not
+ // guarantee null-termination
+ VA_LIST = 4, // one of the `-printf`s function that take va_list, which is
+ // considered unsafe as it is not compile-time check
+ } WarnedFunKind = OTHERS;
+
+public:
+ UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result)
+ : WarningGadget(Kind::UnsafeLibcFunctionCall),
+ Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {
+ if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_sprintf"))
+ WarnedFunKind = SPRINTF;
+ else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_string"))
+ WarnedFunKind = STRING;
+ else if (Result.Nodes.getNodeAs<CallExpr>(
+ "UnsafeLibcFunctionCall_sized_by"))
+ WarnedFunKind = SIZED_BY;
+ else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_va_list"))
+ WarnedFunKind = VA_LIST;
+ }
+
+ static Matcher matcher() {
+ auto anyOfLibcInnerMatcher = [](StringRef S) {
+ return anyOf(
+ libc_fun_disjoint_inner_matchers::predefinedUnsafeLibcFunCall(S),
+ callExpr(libc_fun_disjoint_inner_matchers::unsafeStringInPrintfs(S))
+ .bind(UnsafeStringTag),
+ callExpr(
+ libc_fun_disjoint_inner_matchers::unsafeSizedByInSnprintfs(S))
+ .bind(UnsafeSizedByTag),
+ callExpr(libc_fun_disjoint_inner_matchers::unsafeSprintfs(S))
+ .bind(UnsafeSprintfTag),
+ callExpr(libc_fun_disjoint_inner_matchers::unsafeVaListPrintfs(S))
+ .bind(UnsafeVaListTag));
+ };
+
+ return stmt(
+ callExpr(ignoreLibcPrefixAndSuffix(anyOfLibcInnerMatcher)).bind(Tag));
+ }
+
+ const Stmt *getBaseStmt() const { return Call; }
+
+ SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); }
+
+ void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
+ bool IsRelatedToDecl,
+ ASTContext &Ctx) const override {
+ Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx);
+ }
+
+ DeclUseList getClaimedVarUseSites() const override { return {}; }
+};
+
// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
// Context (see `isInUnspecifiedLvalueContext`).
// Note here `[]` is the built-in subscript operator.
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 0f604c61fa3af9..53ade2df5c311b 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2292,6 +2292,18 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
}
}
+ void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+ ASTContext &Ctx) override {
+ // We have checked that there is a direct callee with an identifier name:
+ StringRef Name = Call->getDirectCallee()->getName();
+
+ S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call)
+ << Name << Call->getSourceRange();
+ if (PrintfInfo > 0)
+ S.Diag(Call->getBeginLoc(), diag::note_unsafe_buffer_printf_call)
+ << PrintfInfo << Call->getSourceRange();
+ }
+
void handleUnsafeOperationInContainer(const Stmt *Operation,
bool IsRelatedToDecl,
ASTContext &Ctx) override {
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
new file mode 100644
index 00000000000000..eebbc381a262ff
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN: -verify %s
+
+namespace std {
+ inline namespace __1 {
+ template< class InputIt, class OutputIt >
+ OutputIt copy( InputIt first, InputIt last,
+ OutputIt d_first );
+
+ struct iterator{};
+ template<typename T>
+ struct span {
+ T * ptr;
+ T * data();
+ unsigned size_bytes();
+ unsigned size();
+ iterator begin() const noexcept;
+ iterator end() const noexcept;
+ };
+
+ template<typename T>
+ struct basic_string {
+ T* p;
+ T *c_str();
+ T *data();
+ unsigned size_bytes();
+ };
+
+ typedef basic_string<char> string;
+ typedef basic_string<wchar_t> wstring;
+
+ // C function under std:
+ void memcpy();
+ void strcpy();
+ int snprintf( char* buffer, unsigned buf_size, const char* format, ... );
+ }
+}
+
+void f(char * p, char * q, std::span<char> s) {
+ std::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}}
+ std::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}}
+ std::__1::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}}
+ std::__1::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}}
+
+ /* Test printfs */
+ std::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+ std::__1::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+ std::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
+ std::__1::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
+}
+
+void v(std::string s1) {
+ std::snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+ std::__1::snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+}
+
+void g(char *begin, char *end, char *p, std::span<char> s) {
+ std::copy(begin, end, p); // no warn
+ std::copy(s.begin(), s.end(), s.begin()); // no warn
+}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
new file mode 100644
index 00000000000000..ea2c5ec82211b2
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN: -verify %s
+
+typedef struct {} FILE;
+void memcpy();
+void __asan_memcpy();
+void strcpy();
+void strcpy_s();
+void wcscpy_s();
+unsigned strlen( const char* str );
+int fprintf( FILE* stream, const char* format, ... );
+int printf( const char* format, ... );
+int sprintf( char* buffer, const char* format, ... );
+int snprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int sscanf_s(const char * buffer, const char * format, ...);
+int sscanf(const char * buffer, const char * format, ... );
+
+namespace std {
+ template< class InputIt, class OutputIt >
+ OutputIt copy( InputIt first, InputIt last,
+ OutputIt d_first );
+
+ struct iterator{};
+ template<typename T>
+ struct span {
+ T * ptr;
+ T * data();
+ unsigned size_bytes();
+ unsigned size();
+ iterator begin() const noexcept;
+ iterator end() const noexcept;
+ };
+
+ template<typename T>
+ struct basic_string {
+ T* p;
+ T *c_str();
+ T *data();
+ unsigned size_bytes();
+ };
+
+ typedef basic_string<char> string;
+ typedef basic_string<wchar_t> wstring;
+
+ // C function under std:
+ void memcpy();
+ void strcpy();
+}
+
+void f(char * p, char * q, std::span<char> s, std::span<char> s2) {
+ memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}}
+ std::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}}
+ __builtin_memcpy(p, q, 64); // expected-warning{{function __builtin_memcpy introduces unsafe buffer access}}
+ __builtin___memcpy_chk(p, q, 8, 64); // expected-warning{{function __builtin___memcpy_chk introduces unsafe buffer access}}
+ __asan_memcpy(); // expected-warning{{function __asan_memcpy introduces unsafe buffer access}}
+ strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}}
+ std::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}}
+ strcpy_s(); // expected-warning{{function strcpy_s introduces unsafe buffer access}}
+ wcscpy_s(); // expected-warning{{function wcscpy_s introduces unsafe buffer access}}
+
+
+ /* Test printfs */
+ fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+ printf("%s%d", p, *p); // expected-warning{{function printf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+ sprintf(q, "%s%d", "hello", *p); // expected-warning{{function sprintf introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}}
+ snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+ snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+ vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function vsnprintf introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}}
+ sscanf(p, "%s%d", "hello", *p); // expected-warning{{function sscanf introduces unsafe buffer access}}
+ sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function sscanf_s introduces unsafe buffer access}}
+ fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+ printf("%s%d", "hello", *p); // no warn
+ snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
+ strlen("hello");// no warn
+}
+
+void v(std::string s1) {
+ snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+}
+
+
+void g(char *begin, char *end, char *p, std::span<char> s) {
+ std::copy(begin, end, p); // no warn
+ std::copy(s.begin(), s.end(), s.begin()); // no warn
+}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
index 844311c3a51a58..668efe0e178b53 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
@@ -1,8 +1,6 @@
// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fsafe-buffer-usage-suggestions -verify %s
-// expected-no-diagnostics
-
typedef unsigned __darwin_size_t;
typedef __darwin_size_t size_t;
#define bzero(s, n) __builtin_bzero(s, n)
-void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); }
+void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function __builtin_bzero introduces unsafe buffer access}}
>From e9a5b7a5dd064728c143a8b8a26c8951b427392b Mon Sep 17 00:00:00 2001
From: ziqingluo-90 <ziqing_luo at apple.com>
Date: Mon, 12 Aug 2024 17:46:48 -0700
Subject: [PATCH 2/5] Reduce false positives in warnings about printfs
- Predefined name macros such as `__PRETTY_FUNCTION__` are considered safe.
- We need to distinguish between `%s` and `%p` as both accept pointers
but the latter is not a buffer operation. This leaves us no choice
other than parsing the format string. Fortunately, the building
blocks of format parsing have already existed and are quite handy.
---
clang/include/clang/AST/FormatString.h | 12 ++++
clang/lib/AST/PrintfFormatString.cpp | 28 ++++++++++
clang/lib/Analysis/UnsafeBufferUsage.cpp | 56 +++++++++++++++----
...arn-unsafe-buffer-usage-libc-functions.cpp | 11 +++-
4 files changed, 93 insertions(+), 14 deletions(-)
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index a074dd23e2ad4c..cdba2a7abe49d9 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -783,6 +783,18 @@ bool ParsePrintfString(FormatStringHandler &H,
bool ParseFormatStringHasSArg(const char *beg, const char *end,
const LangOptions &LO, const TargetInfo &Target);
+/// Parse C format string and return index (relative to `ArgIndex`) of the first
+/// found `s` specifier. Return 0 if not found.
+/// \param I The start of the C format string; Updated to the first unparsed
+/// position upon return.
+/// \param E The end of the C format string;
+/// \param ArgIndex The argument index of the last found `s` specifier; Or the
+/// argument index of the formatter in initial case.
+unsigned ParseFormatStringFirstSArgIndex(const char *&I, const char *E,
+ unsigned ArgIndex,
+ const LangOptions &LO,
+ const TargetInfo &Target);
+
bool ParseScanfString(FormatStringHandler &H,
const char *beg, const char *end, const LangOptions &LO,
const TargetInfo &Target);
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index 3c6cd2d0f43417..9992afd402d370 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -483,6 +483,34 @@ bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
return false;
}
+unsigned clang::analyze_format_string::ParseFormatStringFirstSArgIndex(
+ const char *&I, const char *E, unsigned ArgIndex, const LangOptions &LO,
+ const TargetInfo &Target) {
+ unsigned argIndex = ArgIndex;
+
+ // Keep looking for a %s format specifier until we have exhausted the string.
+ FormatStringHandler H;
+ while (I != E) {
+ const PrintfSpecifierResult &FSR =
+ ParsePrintfSpecifier(H, I, E, argIndex, LO, Target, false, false);
+ // Did a fail-stop error of any kind occur when parsing the specifier?
+ // If so, don't do any more processing.
+ if (FSR.shouldStop())
+ return false;
+ // Did we exhaust the string or encounter an error that
+ // we can recover from?
+ if (!FSR.hasValue())
+ continue;
+ const analyze_printf::PrintfSpecifier &FS = FSR.getValue();
+ // Return true if this a %s format specifier.
+ if (FS.getConversionSpecifier().getKind() ==
+ ConversionSpecifier::Kind::sArg) {
+ return FS.getPositionalArgIndex();
+ }
+ }
+ return false;
+}
+
bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
const char *Begin, const char *End, const LangOptions &LO,
const TargetInfo &Target) {
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 751fb75f6ed602..745de657fcd274 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -12,6 +12,7 @@
#include "clang/AST/DeclCXX.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
+#include "clang/AST/FormatString.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtVisitor.h"
@@ -611,6 +612,38 @@ static bool isNullTermPointer(const Expr *Ptr) {
return false;
}
+// Return true iff at least one of following cases holds:
+// 1. Format string is a literal and there is an unsafe pointer argument
+// corresponding to an `s` specifier;
+// 2. Format string is not a literal and there is least an unsafe pointer
+// argument (including the formatter argument).
+static bool hasUnsafeFormatOrSArg(const Expr *Fmt, unsigned FmtArgIdx,
+ const CallExpr *Call, ASTContext &Ctx) {
+ if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) {
+ StringRef FmtStr = SL->getString();
+ auto I = FmtStr.begin();
+ auto E = FmtStr.end();
+ unsigned ArgIdx = FmtArgIdx;
+
+ do {
+ ArgIdx = analyze_format_string::ParseFormatStringFirstSArgIndex(
+ I, E, ArgIdx, Ctx.getLangOpts(), Ctx.getTargetInfo());
+ if (ArgIdx && Call->getNumArgs() > ArgIdx &&
+ !isNullTermPointer(Call->getArg(ArgIdx)))
+ return true;
+ } while (ArgIdx);
+ return false;
+ }
+ // If format is not a string literal, we cannot analyze the format string.
+ // In this case, this call is considered unsafe if at least one argument
+ // (including the format argument) is unsafe pointer.
+ return llvm::any_of(
+ llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
+ [](const Expr *Arg) {
+ return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+ });
+}
+
// Matches a call to one of the `-printf" functions (excluding the ones with
// va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but
// fail to guarantee their null-termination. In other words, these calls are
@@ -626,19 +659,18 @@ AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) {
if (Prefix.ends_with("w"))
Prefix = Prefix.drop_back(1);
- auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool {
- return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
- };
-
if (Prefix.empty() ||
Prefix == "k") // printf: all pointer args should be null-terminated
- return any_of(Node.arguments(), AnyUnsafeStrPtr);
- if (Prefix == "f" && Node.getNumArgs() > 1)
- return any_of(llvm::make_range(Node.arg_begin() + 1, Node.arg_end()),
- AnyUnsafeStrPtr);
- if (Prefix == "sn" && Node.getNumArgs() > 2) {
- return any_of(llvm::make_range(Node.arg_begin() + 2, Node.arg_end()),
- AnyUnsafeStrPtr);
+ return hasUnsafeFormatOrSArg(Node.getArg(0), 0, &Node,
+ Finder->getASTContext());
+ if (Prefix == "f")
+ return hasUnsafeFormatOrSArg(Node.getArg(1), 1, &Node,
+ Finder->getASTContext());
+ if (Prefix == "sn") {
+ // The first two arguments need to be in safe patterns, which is checked
+ // by `isSafeSizedby`:
+ return hasUnsafeFormatOrSArg(Node.getArg(2), 2, &Node,
+ Finder->getASTContext());
}
return false; // A call to a "-printf" falls into another category.
}
@@ -775,7 +807,7 @@ AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix,
StringRef CoreName =
TheLittleParser.matchName(II->getName(), FD->getBuiltinID());
-
+
return InnerMatcher(CoreName).matches(Node, Finder, Builder);
}
} // namespace clang::ast_matchers
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index ea2c5ec82211b2..c3d7f8fd05435b 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -70,13 +70,20 @@ void f(char * p, char * q, std::span<char> s, std::span<char> s2) {
sscanf(p, "%s%d", "hello", *p); // expected-warning{{function sscanf introduces unsafe buffer access}}
sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function sscanf_s introduces unsafe buffer access}}
fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+ fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn
printf("%s%d", "hello", *p); // no warn
snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
+ snprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn
strlen("hello");// no warn
}
-void v(std::string s1) {
- snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+void v(std::string s1, int *p) {
+ snprintf(s1.data(), s1.size_bytes(), "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn
+ snprintf(s1.data(), s1.size_bytes(), s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn
+ printf("%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn
+ printf(s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn
+ fprintf((FILE*)0, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn
+ fprintf((FILE*)0, s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn
}
>From fa21260c1ec31eec115125a777a66874df9b38c4 Mon Sep 17 00:00:00 2001
From: ziqingluo-90 <ziqing_luo at apple.com>
Date: Wed, 14 Aug 2024 17:02:19 -0700
Subject: [PATCH 3/5] Address comments
---
clang/include/clang/AST/FormatString.h | 12 -
clang/lib/AST/PrintfFormatString.cpp | 28 -
clang/lib/Analysis/UnsafeBufferUsage.cpp | 624 ++++++++++--------
clang/lib/Sema/AnalysisBasedWarnings.cpp | 6 +-
...-usage-libc-functions-inline-namespace.cpp | 12 +-
...arn-unsafe-buffer-usage-libc-functions.cpp | 44 +-
...n-unsafe-buffer-usage-test-unreachable.cpp | 2 +-
7 files changed, 369 insertions(+), 359 deletions(-)
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index cdba2a7abe49d9..a074dd23e2ad4c 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -783,18 +783,6 @@ bool ParsePrintfString(FormatStringHandler &H,
bool ParseFormatStringHasSArg(const char *beg, const char *end,
const LangOptions &LO, const TargetInfo &Target);
-/// Parse C format string and return index (relative to `ArgIndex`) of the first
-/// found `s` specifier. Return 0 if not found.
-/// \param I The start of the C format string; Updated to the first unparsed
-/// position upon return.
-/// \param E The end of the C format string;
-/// \param ArgIndex The argument index of the last found `s` specifier; Or the
-/// argument index of the formatter in initial case.
-unsigned ParseFormatStringFirstSArgIndex(const char *&I, const char *E,
- unsigned ArgIndex,
- const LangOptions &LO,
- const TargetInfo &Target);
-
bool ParseScanfString(FormatStringHandler &H,
const char *beg, const char *end, const LangOptions &LO,
const TargetInfo &Target);
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index 9992afd402d370..3c6cd2d0f43417 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -483,34 +483,6 @@ bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
return false;
}
-unsigned clang::analyze_format_string::ParseFormatStringFirstSArgIndex(
- const char *&I, const char *E, unsigned ArgIndex, const LangOptions &LO,
- const TargetInfo &Target) {
- unsigned argIndex = ArgIndex;
-
- // Keep looking for a %s format specifier until we have exhausted the string.
- FormatStringHandler H;
- while (I != E) {
- const PrintfSpecifierResult &FSR =
- ParsePrintfSpecifier(H, I, E, argIndex, LO, Target, false, false);
- // Did a fail-stop error of any kind occur when parsing the specifier?
- // If so, don't do any more processing.
- if (FSR.shouldStop())
- return false;
- // Did we exhaust the string or encounter an error that
- // we can recover from?
- if (!FSR.hasValue())
- continue;
- const analyze_printf::PrintfSpecifier &FS = FSR.getValue();
- // Return true if this a %s format specifier.
- if (FS.getConversionSpecifier().getKind() ==
- ConversionSpecifier::Kind::sArg) {
- return FS.getPositionalArgIndex();
- }
- }
- return false;
-}
-
bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
const char *Begin, const char *End, const LangOptions &LO,
const TargetInfo &Target) {
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 745de657fcd274..a1526125edf098 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -16,6 +16,7 @@
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Basic/SourceLocation.h"
@@ -23,11 +24,11 @@
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Casting.h"
-#include <functional>
#include <memory>
#include <optional>
#include <queue>
@@ -448,110 +449,223 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
return false;
}
-namespace libc_fun_disjoint_inner_matchers {
-// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match
-// disjoint node sets. They all take a `CoreName`, which is the substring of a
-// function name after `ignoreLibcPrefixAndSuffix`. They are suppose to be used
-// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different
-// libc function calls.
+AST_MATCHER_P(CallExpr, hasNumArgs, unsigned, Num) {
+ return Node.getNumArgs() == Num;
+}
+
+namespace libc_func_matchers {
+// Under `libc_func_matchers`, define a set of matchers that match unsafe
+// functions in libc and unsafe calls to them.
+
+// A tiny parser to strip off common prefix and suffix of libc function names
+// in real code.
+//
+// Given a function name, `matchName` returns `CoreName` according to the
+// following grammar:
+//
+// LibcName := CoreName | CoreName + "_s"
+// MatchingName := "__builtin_" + LibcName |
+// "__builtin___" + LibcName + "_chk" |
+// "__asan_" + LibcName
+//
+struct LibcFunNamePrefixSuffixParser {
+ StringRef matchName(StringRef FunName, bool isBuiltin) {
+ // Try to match __builtin_:
+ if (isBuiltin && FunName.starts_with("__builtin_"))
+ // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
+ // no match:
+ return matchLibcNameOrBuiltinChk(
+ FunName.drop_front(10 /* truncate "__builtin_" */));
+ // Try to match __asan_:
+ if (FunName.starts_with("__asan_"))
+ return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */));
+ return matchLibcName(FunName);
+ }
+
+ // Parameter `Name` is the substring after stripping off the prefix
+ // "__builtin_".
+ StringRef matchLibcNameOrBuiltinChk(StringRef Name) {
+ if (Name.starts_with("__") && Name.ends_with("_chk"))
+ return matchLibcName(
+ Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
+ return matchLibcName(Name);
+ }
+
+ StringRef matchLibcName(StringRef Name) {
+ if (Name.ends_with("_s"))
+ return Name.drop_back(2 /* truncate "_s" */);
+ return Name;
+ }
+};
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+ if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
+ return true;
+ if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts()))
+ return true;
+ if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
+ const CXXMethodDecl *MD = MCE->getMethodDecl();
+ const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
-// Matches a function call node such that
+ if (MD && RD && RD->isInStdNamespace())
+ if (MD->getName() == "c_str" && RD->getName() == "basic_string")
+ return true;
+ }
+ return false;
+}
+
+// Return true iff at least one of following cases holds:
+// 1. Format string is a literal and there is an unsafe pointer argument
+// corresponding to an `s` specifier;
+// 2. Format string is not a literal and there is least an unsafe pointer
+// argument (including the formatter argument).
+static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx,
+ ASTContext &Ctx, bool isKprintf = false) {
+ class StringFormatStringHandler
+ : public analyze_format_string::FormatStringHandler {
+ const CallExpr *Call;
+ unsigned FmtArgIdx;
+
+ public:
+ StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx)
+ : Call(Call), FmtArgIdx(FmtArgIdx) {}
+
+ bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen,
+ const TargetInfo &Target) override {
+ if (FS.getConversionSpecifier().getKind() ==
+ analyze_printf::PrintfConversionSpecifier::sArg) {
+ unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
+
+ if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
+ if (!isNullTermPointer(Call->getArg(ArgIdx)))
+ return false; // stop parsing
+ }
+ return true; // continue parsing
+ }
+ };
+
+ const Expr *Fmt = Call->getArg(FmtArgIdx);
+
+ if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) {
+ StringRef FmtStr = SL->getString();
+ StringFormatStringHandler Handler(Call, FmtArgIdx);
+
+ return analyze_format_string::ParsePrintfString(
+ Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
+ Ctx.getTargetInfo(), isKprintf);
+ }
+ // If format is not a string literal, we cannot analyze the format string.
+ // In this case, this call is considered unsafe if at least one argument
+ // (including the format argument) is unsafe pointer.
+ return llvm::any_of(
+ llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
+ [](const Expr *Arg) {
+ return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+ });
+}
+
+// Matches a FunctionDecl node such that
// 1. It's name, after stripping off predefined prefix and suffix, is
// `CoreName`; and
// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
// is a set of libc function names.
//
-// Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`.
-// And, the notation `CoreName[str/wcs]` means a new name obtained from replace
+// Note: For predefined prefix and suffix, see `LibcFunNamePrefixSuffixParser`.
+// The notation `CoreName[str/wcs]` means a new name obtained from replace
// string "wcs" with "str" in `CoreName`.
-//
-// Also note, the set of predefined function names does not include `printf`
-// functions, they are checked exclusively with other matchers below.
-// Maintaining the invariant that all matchers under
-// `libc_fun_disjoint_inner_matchers` are disjoint.
-AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
- static const std::set<StringRef> PredefinedNames{
- // numeric conversion:
- "atof",
- "atoi",
- "atol",
- "atoll",
- "strtol",
- "strtoll",
- "strtoul",
- "strtoull",
- "strtof",
- "strtod",
- "strtold",
- "strtoimax",
- "strtoumax",
- // "strfromf", "strfromd", "strfroml", // C23?
- // string manipulation:
- "strcpy",
- "strncpy",
- "strlcpy",
- "strcat",
- "strncat",
- "strlcat",
- "strxfrm",
- "strdup",
- "strndup",
- // string examination:
- "strlen",
- "strnlen",
- "strcmp",
- "strncmp",
- "stricmp",
- "strcasecmp",
- "strcoll",
- "strchr",
- "strrchr",
- "strspn",
- "strcspn",
- "strpbrk",
- "strstr",
- "strtok",
- // "mem-" functions
- "memchr",
- "wmemchr",
- "memcmp",
- "wmemcmp",
- "memcpy",
- "memccpy",
- "mempcpy",
- "wmemcpy",
- "memmove",
- "wmemmove",
- "memset",
- "wmemset",
- // IO:
- "fread",
- "fwrite",
- "fgets",
- "fgetws",
- "gets",
- "fputs",
- "fputws",
- "puts",
- // others
- "strerror_s",
- "strerror_r",
- "bcopy",
- "bzero",
- "bsearch",
- "qsort",
- };
- // This is safe: strlen("hello"). We don't want to be noisy on this case.
- auto isSafeStrlen = [&Node](StringRef Name) -> bool {
- return Name == "strlen" && Node.getNumArgs() == 1 &&
- isa<StringLiteral>(Node.getArg(0)->IgnoreParenImpCasts());
- };
+AST_MATCHER(FunctionDecl, isPredefinedUnsafeLibcFunc) {
+ static std::unique_ptr<std::set<StringRef>> PredefinedNames = nullptr;
+ if (!PredefinedNames)
+ PredefinedNames =
+ std::make_unique<std::set<StringRef>, std::set<StringRef>>({
+ // numeric conversion:
+ "atof",
+ "atoi",
+ "atol",
+ "atoll",
+ "strtol",
+ "strtoll",
+ "strtoul",
+ "strtoull",
+ "strtof",
+ "strtod",
+ "strtold",
+ "strtoimax",
+ "strtoumax",
+ // "strfromf", "strfromd", "strfroml", // C23?
+ // string manipulation:
+ "strcpy",
+ "strncpy",
+ "strlcpy",
+ "strcat",
+ "strncat",
+ "strlcat",
+ "strxfrm",
+ "strdup",
+ "strndup",
+ // string examination:
+ "strlen",
+ "strnlen",
+ "strcmp",
+ "strncmp",
+ "stricmp",
+ "strcasecmp",
+ "strcoll",
+ "strchr",
+ "strrchr",
+ "strspn",
+ "strcspn",
+ "strpbrk",
+ "strstr",
+ "strtok",
+ // "mem-" functions
+ "memchr",
+ "wmemchr",
+ "memcmp",
+ "wmemcmp",
+ "memcpy",
+ "memccpy",
+ "mempcpy",
+ "wmemcpy",
+ "memmove",
+ "wmemmove",
+ "memset",
+ "wmemset",
+ // IO:
+ "fread",
+ "fwrite",
+ "fgets",
+ "fgetws",
+ "gets",
+ "fputs",
+ "fputws",
+ "puts",
+ // others
+ "strerror_s",
+ "strerror_r",
+ "bcopy",
+ "bzero",
+ "bsearch",
+ "qsort",
+ });
+
+ auto *II = Node.getIdentifier();
+
+ if (!II)
+ return false;
+
+ StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+ II->getName(), Node.getBuiltinID());
// Match predefined names:
- if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end())
- return !isSafeStrlen(CoreName);
+ if (PredefinedNames->find(Name) != PredefinedNames->end())
+ return true;
- std::string NameWCS = CoreName.str();
+ std::string NameWCS = Name.str();
size_t WcsPos = NameWCS.find("wcs");
while (WcsPos != std::string::npos) {
@@ -560,31 +674,44 @@ AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
NameWCS[WcsPos++] = 'r';
WcsPos = NameWCS.find("wcs", WcsPos);
}
- if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
- return !isSafeStrlen(NameWCS);
+ if (PredefinedNames->find(NameWCS) != PredefinedNames->end())
+ return true;
// All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They
// all should end with "scanf"):
- return CoreName.ends_with("scanf");
+ return Name.ends_with("scanf");
}
-// Match a call to one of the `-printf` functions taking `va_list`. We cannot
+// Match a call to one of the `v*printf` functions taking `va_list`. We cannot
// check safety for these functions so they should be changed to their
// non-va_list versions.
-AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) {
- StringRef Name = CoreName;
+AST_MATCHER(FunctionDecl, isUnsafeVaListPrintfFunc) {
+ auto *II = Node.getIdentifier();
+
+ if (!II)
+ return false;
+
+ StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+ II->getName(), Node.getBuiltinID());
if (!Name.ends_with("printf"))
return false; // neither printf nor scanf
return Name.starts_with("v");
}
-// Matches a call to one of the `-sprintf` functions (excluding the ones with
-// va_list) as they are always unsafe and should be changed to corresponding
-// `-snprintf`s.
-AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) {
- StringRef Name = CoreName;
+// Matches a call to one of the `sprintf` functions as they are always unsafe
+// and should be changed to `snprintf`.
+AST_MATCHER(FunctionDecl, isUnsafeSprintfFunc) {
+ auto *II = Node.getIdentifier();
- if (!Name.ends_with("printf") || Name.starts_with("v"))
+ if (!II)
+ return false;
+
+ StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+ II->getName(), Node.getBuiltinID());
+
+ if (!Name.ends_with("printf") ||
+ // Let `isUnsafeVaListPrintfFunc` check for cases with va-list:
+ Name.starts_with("v"))
return false;
StringRef Prefix = Name.drop_back(6);
@@ -594,62 +721,17 @@ AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) {
return Prefix == "s";
}
-// A pointer type expression is known to be null-terminated, if it has the
-// form: E.c_str(), for any expression E of `std::string` type.
-static bool isNullTermPointer(const Expr *Ptr) {
- if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
- return true;
- if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts()))
- return true;
- if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
- const CXXMethodDecl *MD = MCE->getMethodDecl();
- const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
-
- if (MD && RD && RD->isInStdNamespace())
- if (MD->getName() == "c_str" && RD->getName() == "basic_string")
- return true;
- }
- return false;
-}
+// Match function declarations of `printf`, `fprintf`, `snprintf` and their wide
+// character versions. Calls to these functions can be safe if their arguments
+// are carefully made safe.
+AST_MATCHER(FunctionDecl, isNormalPrintfFunc) {
+ auto *II = Node.getIdentifier();
-// Return true iff at least one of following cases holds:
-// 1. Format string is a literal and there is an unsafe pointer argument
-// corresponding to an `s` specifier;
-// 2. Format string is not a literal and there is least an unsafe pointer
-// argument (including the formatter argument).
-static bool hasUnsafeFormatOrSArg(const Expr *Fmt, unsigned FmtArgIdx,
- const CallExpr *Call, ASTContext &Ctx) {
- if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) {
- StringRef FmtStr = SL->getString();
- auto I = FmtStr.begin();
- auto E = FmtStr.end();
- unsigned ArgIdx = FmtArgIdx;
-
- do {
- ArgIdx = analyze_format_string::ParseFormatStringFirstSArgIndex(
- I, E, ArgIdx, Ctx.getLangOpts(), Ctx.getTargetInfo());
- if (ArgIdx && Call->getNumArgs() > ArgIdx &&
- !isNullTermPointer(Call->getArg(ArgIdx)))
- return true;
- } while (ArgIdx);
+ if (!II)
return false;
- }
- // If format is not a string literal, we cannot analyze the format string.
- // In this case, this call is considered unsafe if at least one argument
- // (including the format argument) is unsafe pointer.
- return llvm::any_of(
- llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
- [](const Expr *Arg) {
- return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
- });
-}
-// Matches a call to one of the `-printf" functions (excluding the ones with
-// va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but
-// fail to guarantee their null-termination. In other words, these calls are
-// safe if they use null-termination guaranteed string pointers.
-AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) {
- StringRef Name = CoreName;
+ StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+ II->getName(), Node.getBuiltinID());
if (!Name.ends_with("printf") || Name.starts_with("v"))
return false;
@@ -659,50 +741,76 @@ AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) {
if (Prefix.ends_with("w"))
Prefix = Prefix.drop_back(1);
- if (Prefix.empty() ||
- Prefix == "k") // printf: all pointer args should be null-terminated
- return hasUnsafeFormatOrSArg(Node.getArg(0), 0, &Node,
- Finder->getASTContext());
- if (Prefix == "f")
- return hasUnsafeFormatOrSArg(Node.getArg(1), 1, &Node,
- Finder->getASTContext());
- if (Prefix == "sn") {
- // The first two arguments need to be in safe patterns, which is checked
- // by `isSafeSizedby`:
- return hasUnsafeFormatOrSArg(Node.getArg(2), 2, &Node,
- Finder->getASTContext());
+ return Prefix.empty() || Prefix == "k" || Prefix == "f" || Prefix == "sn";
+}
+
+// This matcher requires that it is known that the callee `isNormalPrintf`.
+// Then if the format string is a string literal, this matcher matches when at
+// least one string argument is unsafe. If the format is not a string literal,
+// this matcher matches when at least one pointer type argument is unsafe.
+AST_MATCHER(CallExpr, hasUnsafePrintfStringArg) {
+ // Determine what printf it is:
+ const Expr *FirstArg = Node.getArg(0);
+ ASTContext &Ctx = Finder->getASTContext();
+
+ if (isa<StringLiteral>(FirstArg->IgnoreParenImpCasts())) {
+ // It is a printf/kprintf. And, the format is a string literal:
+ bool isKprintf = false;
+ if (auto *Callee = Node.getDirectCallee())
+ if (auto *II = Node.getDirectCallee()->getIdentifier())
+ isKprintf = II->getName() == "kprintf";
+ return hasUnsafeFormatOrSArg(&Node, 0, Ctx, isKprintf);
+ }
+
+ QualType PtrTy = FirstArg->getType();
+
+ assert(PtrTy->isPointerType());
+
+ QualType PteTy = (cast<PointerType>(PtrTy))->getPointeeType();
+
+ if (!Ctx.getFILEType().isNull() /* If `FILE *` is not ever in the ASTContext,
+ there can't be any file pointer then */
+ && PteTy.getCanonicalType() == Ctx.getFILEType().getCanonicalType()) {
+ // It is a fprintf:
+ return hasUnsafeFormatOrSArg(&Node, 1, Ctx, false);
}
- return false; // A call to a "-printf" falls into another category.
+
+ const Expr *SecondArg = Node.getArg(1);
+
+ if (SecondArg->getType()->isIntegerType()) {
+ // It is a snprintf:
+ return hasUnsafeFormatOrSArg(&Node, 2, Ctx, false);
+ }
+ // It is printf but the format string is passed by pointer. The only thing we
+ // can do is to require all pointers to be null-terminated:
+ return llvm::any_of(Node.arguments(), [](const Expr *Arg) -> bool {
+ return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+ });
}
-// Matches a call to one of the `snprintf` functions (excluding the ones with
-// va_list) such that the first two arguments fail to conform to safe patterns.
+// This matcher requires that it is known that the callee `isNormalPrintf`.
+// Then it matches if the first two arguments of the call is a pointer and an
+// integer and they are not in a safe pattern.
//
// For the first two arguments: `ptr` and `size`, they are safe if in the
// following patterns:
// ptr := DRE.data();
// size:= DRE.size()/DRE.size_bytes()
// And DRE is a hardened container or view.
-AST_MATCHER_P(CallExpr, unsafeSizedByInSnprintfs, StringRef, CoreName) {
- StringRef Name = CoreName;
-
- if (!Name.ends_with("printf") || Name.starts_with("v"))
- return false; // not snprint or use va_list
+AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
+ if (Node.getNumArgs() < 3)
+ return false; // not an snprintf call
- StringRef Prefix = Name.drop_back(6);
+ const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1);
- if (Prefix.ends_with("w"))
- Prefix = Prefix.drop_back(1);
-
- if (Prefix != "sn")
- return false; // not snprint
+ if (!Buf->getType()->isPointerType() || !Size->getType()->isIntegerType())
+ return false; // not an snprintf call
static StringRef SizedObjs[] = {"span", "array", "vector",
"basic_string_view", "basic_string"};
- const Expr *CharPtr = (*Node.arg_begin())->IgnoreParenImpCasts();
- const Expr *Size = (*(Node.arg_begin() + 1))->IgnoreParenImpCasts();
-
- if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(CharPtr))
+ Buf = Buf->IgnoreParenImpCasts();
+ Size = Size->IgnoreParenImpCasts();
+ if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Buf))
if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) {
auto *DREOfPtr = dyn_cast<DeclRefExpr>(
MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts());
@@ -730,86 +838,7 @@ AST_MATCHER_P(CallExpr, unsafeSizedByInSnprintfs, StringRef, CoreName) {
}
return true; // ptr and size are not in safe pattern
}
-} // namespace libc_fun_disjoint_inner_matchers
-
-// Match call to a function whose name may have prefixes like "__builtin_" or
-// "__asan_" and suffixes like "_s" or "_chk". This matcher takes an argument,
-// which should be applied to the core name---the subtring after stripping off
-// prefix and suffix of the function name.
-// The application results in an inner matcher that matches the call node with
-// respect to the core name of the callee.
-AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix,
- std::function<internal::Matcher<CallExpr>(StringRef)>,
- InnerMatcher) {
- // Given a function name, returns its core name `CoreName` according to the
- // following grammar.
- //
- // LibcName := CoreName | CoreName + "_s"
- // MatchingName := "__builtin_" + LibcName |
- // "__builtin___" + LibcName + "_chk" |
- // "__asan_" + LibcName
- //
- struct NameParser {
- StringRef matchName(StringRef FunName, bool isBuiltin) {
- // Try to match __builtin_:
- if (isBuiltin && FunName.starts_with("__builtin_"))
- // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
- // no match:
- return matchLibcNameOrBuiltinChk(
- FunName.drop_front(10 /* truncate "__builtin_" */));
- // Try to match __asan_:
- if (FunName.starts_with("__asan_"))
- return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */));
- return matchLibcName(FunName);
- }
-
- // Parameter `Name` is the substring after stripping off the prefix
- // "__builtin_".
- StringRef matchLibcNameOrBuiltinChk(StringRef Name) {
- if (Name.starts_with("__") && Name.ends_with("_chk"))
- return matchLibcName(
- Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
- return matchLibcName(Name);
- }
-
- StringRef matchLibcName(StringRef Name) {
- if (Name.ends_with("_s"))
- return Name.drop_back(2 /* truncate "_s" */);
- return Name;
- }
- } TheLittleParser;
-
- const FunctionDecl *FD = Node.getDirectCallee();
- const IdentifierInfo *II;
-
- if (!FD)
- return false;
- II = FD->getIdentifier();
- // If this is a special C++ name without IdentifierInfo, it can't be a
- // C library function.
- if (!II)
- return false;
-
- // Look through 'extern "C"' and anything similar invented in the future.
- // In general, C library functions will be in the TU directly.
- if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
- // If that's not the case, we also consider "C functions" re-declared in
- // `std` namespace.
- if (!FD->getDeclContext()->getRedeclContext()->isStdNamespace())
- return false;
- }
-
- // If this function is not externally visible, it is not a C library function.
- // Note that we make an exception for inline functions, which may be
- // declared in header files without external linkage.
- if (!FD->isInlined() && !FD->isExternallyVisible())
- return false;
-
- StringRef CoreName =
- TheLittleParser.matchName(II->getName(), FD->getBuiltinID());
-
- return InnerMatcher(CoreName).matches(Node, Finder, Builder);
-}
+} // namespace libc_func_matchers
} // namespace clang::ast_matchers
namespace {
@@ -1420,34 +1449,49 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result)
: WarningGadget(Kind::UnsafeLibcFunctionCall),
Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {
- if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_sprintf"))
+ if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSprintfTag))
WarnedFunKind = SPRINTF;
- else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_string"))
+ else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeStringTag))
WarnedFunKind = STRING;
- else if (Result.Nodes.getNodeAs<CallExpr>(
- "UnsafeLibcFunctionCall_sized_by"))
+ else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSizedByTag))
WarnedFunKind = SIZED_BY;
- else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_va_list"))
+ else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeVaListTag))
WarnedFunKind = VA_LIST;
}
static Matcher matcher() {
- auto anyOfLibcInnerMatcher = [](StringRef S) {
- return anyOf(
- libc_fun_disjoint_inner_matchers::predefinedUnsafeLibcFunCall(S),
- callExpr(libc_fun_disjoint_inner_matchers::unsafeStringInPrintfs(S))
- .bind(UnsafeStringTag),
- callExpr(
- libc_fun_disjoint_inner_matchers::unsafeSizedByInSnprintfs(S))
- .bind(UnsafeSizedByTag),
- callExpr(libc_fun_disjoint_inner_matchers::unsafeSprintfs(S))
- .bind(UnsafeSprintfTag),
- callExpr(libc_fun_disjoint_inner_matchers::unsafeVaListPrintfs(S))
- .bind(UnsafeVaListTag));
- };
-
return stmt(
- callExpr(ignoreLibcPrefixAndSuffix(anyOfLibcInnerMatcher)).bind(Tag));
+ stmt(
+ anyOf(
+ // Match a call to a predefined unsafe libc function (unless the
+ // call has a sole string literal argument):
+ callExpr(callee(functionDecl(
+ libc_func_matchers::isPredefinedUnsafeLibcFunc())),
+ unless(allOf(hasArgument(0, expr(stringLiteral())),
+ hasNumArgs(1)))),
+ // Match a call to one of the `v*printf` functions taking
+ // va-list, which cannot be checked at compile-time:
+ callExpr(callee(functionDecl(
+ libc_func_matchers::isUnsafeVaListPrintfFunc())))
+ .bind(UnsafeVaListTag),
+ // Match a call to a `sprintf` function, which is never safe:
+ callExpr(callee(functionDecl(
+ libc_func_matchers::isUnsafeSprintfFunc())))
+ .bind(UnsafeSprintfTag),
+ // Match a call to an `snprintf` function. And first two
+ // arguments of the call (that describe a buffer) are not in
+ // safe patterns:
+ callExpr(callee(functionDecl(
+ libc_func_matchers::isNormalPrintfFunc())),
+ libc_func_matchers::hasUnsafeSnprintfBuffer())
+ .bind(UnsafeSizedByTag),
+ // Match a call to a `printf` function, which can be safe if all
+ // arguments are null-terminated:
+ callExpr(callee(functionDecl(
+ libc_func_matchers::isNormalPrintfFunc())),
+ libc_func_matchers::hasUnsafePrintfStringArg())
+ .bind(UnsafeStringTag)))
+ .bind(Tag));
}
const Stmt *getBaseStmt() const { return Call; }
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 53ade2df5c311b..b0f3b4ed61acc4 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2294,11 +2294,9 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
ASTContext &Ctx) override {
- // We have checked that there is a direct callee with an identifier name:
- StringRef Name = Call->getDirectCallee()->getName();
-
S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call)
- << Name << Call->getSourceRange();
+ << Call->getDirectCallee() // We've checked there is a direct callee
+ << Call->getSourceRange();
if (PrintfInfo > 0)
S.Diag(Call->getBeginLoc(), diag::note_unsafe_buffer_printf_call)
<< PrintfInfo << Call->getSourceRange();
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
index eebbc381a262ff..61a316456ace6a 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
@@ -37,14 +37,14 @@ namespace std {
}
void f(char * p, char * q, std::span<char> s) {
- std::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}}
- std::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}}
- std::__1::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}}
- std::__1::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}}
+ std::memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}}
+ std::strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}}
+ std::__1::memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}}
+ std::__1::strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}}
/* Test printfs */
- std::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
- std::__1::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+ std::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+ std::__1::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
std::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
std::__1::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index c3d7f8fd05435b..0eeedff611b588 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -11,7 +11,10 @@ unsigned strlen( const char* str );
int fprintf( FILE* stream, const char* format, ... );
int printf( const char* format, ... );
int sprintf( char* buffer, const char* format, ... );
+int swprintf( char* buffer, const char* format, ... );
int snprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int snwprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int snwprintf_s( char* buffer, unsigned buf_size, const char* format, ... );
int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... );
int sscanf_s(const char * buffer, const char * format, ...);
int sscanf(const char * buffer, const char * format, ... );
@@ -49,31 +52,36 @@ namespace std {
}
void f(char * p, char * q, std::span<char> s, std::span<char> s2) {
- memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}}
- std::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}}
- __builtin_memcpy(p, q, 64); // expected-warning{{function __builtin_memcpy introduces unsafe buffer access}}
- __builtin___memcpy_chk(p, q, 8, 64); // expected-warning{{function __builtin___memcpy_chk introduces unsafe buffer access}}
- __asan_memcpy(); // expected-warning{{function __asan_memcpy introduces unsafe buffer access}}
- strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}}
- std::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}}
- strcpy_s(); // expected-warning{{function strcpy_s introduces unsafe buffer access}}
- wcscpy_s(); // expected-warning{{function wcscpy_s introduces unsafe buffer access}}
+ memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}}
+ std::memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}}
+ __builtin_memcpy(p, q, 64); // expected-warning{{function '__builtin_memcpy' introduces unsafe buffer access}}
+ __builtin___memcpy_chk(p, q, 8, 64); // expected-warning{{function '__builtin___memcpy_chk' introduces unsafe buffer access}}
+ __asan_memcpy(); // expected-warning{{function '__asan_memcpy' introduces unsafe buffer access}}
+ strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}}
+ std::strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}}
+ strcpy_s(); // expected-warning{{function 'strcpy_s' introduces unsafe buffer access}}
+ wcscpy_s(); // expected-warning{{function 'wcscpy_s' introduces unsafe buffer access}}
/* Test printfs */
- fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
- printf("%s%d", p, *p); // expected-warning{{function printf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
- sprintf(q, "%s%d", "hello", *p); // expected-warning{{function sprintf introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}}
- snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
- snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
- vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function vsnprintf introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}}
- sscanf(p, "%s%d", "hello", *p); // expected-warning{{function sscanf introduces unsafe buffer access}}
- sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function sscanf_s introduces unsafe buffer access}}
- fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+ fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+ printf("%s%d", p, *p); // expected-warning{{function 'printf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+ sprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'sprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}}
+ swprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'swprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}}
+ snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+ snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+ snwprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+ snwprintf_s(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf_s' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+ vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function 'vsnprintf' introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}}
+ sscanf(p, "%s%d", "hello", *p); // expected-warning{{function 'sscanf' introduces unsafe buffer access}}
+ sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function 'sscanf_s' introduces unsafe buffer access}}
+ fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn
printf("%s%d", "hello", *p); // no warn
snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
snprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn
+ snwprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn
+ snwprintf_s(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn
strlen("hello");// no warn
}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
index 668efe0e178b53..22a43490551c20 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
@@ -3,4 +3,4 @@
typedef unsigned __darwin_size_t;
typedef __darwin_size_t size_t;
#define bzero(s, n) __builtin_bzero(s, n)
-void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function __builtin_bzero introduces unsafe buffer access}}
+void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function '__builtin_bzero' introduces unsafe buffer access}}
>From 276dee85324cc2df2a6c6461330609c57b833dc5 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Wed, 28 Aug 2024 17:00:49 -0700
Subject: [PATCH 4/5] Address more comments, mainly for improving diag
messsages
---
.../Analysis/Analyses/UnsafeBufferUsage.h | 5 +-
.../clang/Basic/DiagnosticSemaKinds.td | 4 +-
clang/lib/Analysis/UnsafeBufferUsage.cpp | 142 +++++++++++-------
clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 +-
...arn-unsafe-buffer-usage-libc-functions.cpp | 15 +-
5 files changed, 112 insertions(+), 66 deletions(-)
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 3e0fae6db7562d..aa2c01ad10d45d 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -115,8 +115,11 @@ class UnsafeBufferUsageHandler {
/// safe pattern;
/// is 3 if string arguments do not guarantee null-termination
/// is 4 if the callee takes va_list
+ /// \param UnsafeArg one of the actual arguments that is unsafe, non-null
+ /// only when `2 <= PrintfInfo <= 3`
virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
- ASTContext &Ctx) = 0;
+ ASTContext &Ctx,
+ const Expr *UnsafeArg = nullptr) = 0;
/// Invoked when an unsafe operation with a std container is found.
virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 7e1e3686ce6554..3d8dcfaf3a7132 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12388,8 +12388,8 @@ def warn_unsafe_buffer_libc_call : Warning<
InGroup<UnsafeBufferUsage>, DefaultIgnore;
def note_unsafe_buffer_printf_call : Note<
"%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match"
- "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination"
- "| do not use va_list that cannot be checked at compile-time for bounds safety}0">;
+ "| use 'std::string::c_str' or a string literal as string pointer to guarantee null-termination"
+ "| do not use va_list, which cannot be checked at compile-time for bounds safety}0">;
def note_unsafe_buffer_operation : Note<
"used%select{| in pointer arithmetic| in buffer access}0 here">;
def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index a1526125edf098..7bcc3c05dee98c 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -521,16 +521,22 @@ static bool isNullTermPointer(const Expr *Ptr) {
// corresponding to an `s` specifier;
// 2. Format string is not a literal and there is least an unsafe pointer
// argument (including the formatter argument).
-static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx,
- ASTContext &Ctx, bool isKprintf = false) {
+//
+// `UnsafeArg` is the output argument that will be set only if this function
+// returns true.
+static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
+ const unsigned FmtArgIdx, ASTContext &Ctx,
+ bool isKprintf = false) {
class StringFormatStringHandler
: public analyze_format_string::FormatStringHandler {
const CallExpr *Call;
unsigned FmtArgIdx;
+ const Expr *&UnsafeArg;
public:
- StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx)
- : Call(Call), FmtArgIdx(FmtArgIdx) {}
+ StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx,
+ const Expr *&UnsafeArg)
+ : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {}
bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
const char *startSpecifier,
@@ -541,8 +547,11 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx,
unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
- if (!isNullTermPointer(Call->getArg(ArgIdx)))
- return false; // stop parsing
+ if (!isNullTermPointer(Call->getArg(ArgIdx))) {
+ UnsafeArg = Call->getArg(ArgIdx); // output
+ // returning false stops parsing immediately
+ return false;
+ }
}
return true; // continue parsing
}
@@ -552,7 +561,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx,
if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) {
StringRef FmtStr = SL->getString();
- StringFormatStringHandler Handler(Call, FmtArgIdx);
+ StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg);
return analyze_format_string::ParsePrintfString(
Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
@@ -563,8 +572,12 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx,
// (including the format argument) is unsafe pointer.
return llvm::any_of(
llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
- [](const Expr *Arg) {
- return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+ [&UnsafeArg](const Expr *Arg) -> bool {
+ if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg)) {
+ UnsafeArg = Arg;
+ return true;
+ }
+ return false;
});
}
@@ -748,7 +761,9 @@ AST_MATCHER(FunctionDecl, isNormalPrintfFunc) {
// Then if the format string is a string literal, this matcher matches when at
// least one string argument is unsafe. If the format is not a string literal,
// this matcher matches when at least one pointer type argument is unsafe.
-AST_MATCHER(CallExpr, hasUnsafePrintfStringArg) {
+AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
+ clang::ast_matchers::internal::Matcher<Expr>,
+ UnsafeStringArgMatcher) {
// Determine what printf it is:
const Expr *FirstArg = Node.getArg(0);
ASTContext &Ctx = Finder->getASTContext();
@@ -756,10 +771,14 @@ AST_MATCHER(CallExpr, hasUnsafePrintfStringArg) {
if (isa<StringLiteral>(FirstArg->IgnoreParenImpCasts())) {
// It is a printf/kprintf. And, the format is a string literal:
bool isKprintf = false;
+ const Expr *UnsafeArg;
+
if (auto *Callee = Node.getDirectCallee())
if (auto *II = Node.getDirectCallee()->getIdentifier())
isKprintf = II->getName() == "kprintf";
- return hasUnsafeFormatOrSArg(&Node, 0, Ctx, isKprintf);
+ if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 0, Ctx, isKprintf))
+ return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder);
+ return false;
}
QualType PtrTy = FirstArg->getType();
@@ -772,20 +791,31 @@ AST_MATCHER(CallExpr, hasUnsafePrintfStringArg) {
there can't be any file pointer then */
&& PteTy.getCanonicalType() == Ctx.getFILEType().getCanonicalType()) {
// It is a fprintf:
- return hasUnsafeFormatOrSArg(&Node, 1, Ctx, false);
+ const Expr *UnsafeArg;
+
+ if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 1, Ctx, false))
+ return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder);
+ return false;
}
const Expr *SecondArg = Node.getArg(1);
if (SecondArg->getType()->isIntegerType()) {
// It is a snprintf:
- return hasUnsafeFormatOrSArg(&Node, 2, Ctx, false);
+ const Expr *UnsafeArg;
+
+ if (unsigned UnsafeArgIdx =
+ hasUnsafeFormatOrSArg(&Node, UnsafeArg, 2, Ctx, false))
+ return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder);
+ return false;
}
// It is printf but the format string is passed by pointer. The only thing we
// can do is to require all pointers to be null-terminated:
- return llvm::any_of(Node.arguments(), [](const Expr *Arg) -> bool {
- return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
- });
+ for (auto Arg : Node.arguments())
+ if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg))
+ if (UnsafeStringArgMatcher.matches(*Arg, Finder, Builder))
+ return true;
+ return false;
}
// This matcher requires that it is known that the callee `isNormalPrintf`.
@@ -1423,6 +1453,7 @@ class DataInvocationGadget : public WarningGadget {
class UnsafeLibcFunctionCallGadget : public WarningGadget {
const CallExpr *const Call;
+ const Expr *UnsafeArg = nullptr;
constexpr static const char *const Tag = "UnsafeLibcFunctionCall";
// Extra tags for additional information:
constexpr static const char *const UnsafeSprintfTag =
@@ -1437,8 +1468,8 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
enum UnsafeKind {
OTHERS = 0, // no specific information, the callee function is unsafe
SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead.
- SIZED_BY = 2, // a pair of function arguments have "__sized_by" relation but
- // they do not conform to safe patterns
+ SIZED_BY = 2, // the first two arguments of `snprintf` function have
+ // "__sized_by" relation but they do not conform to safe patterns
STRING = 3, // an argument is a pointer-to-char-as-string but does not
// guarantee null-termination
VA_LIST = 4, // one of the `-printf`s function that take va_list, which is
@@ -1449,49 +1480,52 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result)
: WarningGadget(Kind::UnsafeLibcFunctionCall),
Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {
- if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSprintfTag))
+ if (Result.Nodes.getNodeAs<Decl>(UnsafeSprintfTag))
WarnedFunKind = SPRINTF;
- else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeStringTag))
+ else if (auto *E = Result.Nodes.getNodeAs<Expr>(UnsafeStringTag)) {
WarnedFunKind = STRING;
- else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSizedByTag))
+ UnsafeArg = E;
+ } else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSizedByTag)) {
WarnedFunKind = SIZED_BY;
- else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeVaListTag))
+ UnsafeArg = Call->getArg(0);
+ } else if (Result.Nodes.getNodeAs<Decl>(UnsafeVaListTag))
WarnedFunKind = VA_LIST;
}
static Matcher matcher() {
- return stmt(
- stmt(
- anyOf(
- // Match a call to a predefined unsafe libc function (unless the
- // call has a sole string literal argument):
- callExpr(callee(functionDecl(
- libc_func_matchers::isPredefinedUnsafeLibcFunc())),
- unless(allOf(hasArgument(0, expr(stringLiteral())),
- hasNumArgs(1)))),
- // Match a call to one of the `v*printf` functions taking
- // va-list, which cannot be checked at compile-time:
- callExpr(callee(functionDecl(
- libc_func_matchers::isUnsafeVaListPrintfFunc())))
+ return stmt(anyOf(
+ callExpr(
+ callee(functionDecl(anyOf(
+ // Match a predefined unsafe libc
+ // function:
+ functionDecl(libc_func_matchers::isPredefinedUnsafeLibcFunc()),
+ // Match a call to one of the `v*printf` functions
+ // taking va-list, which cannot be checked at
+ // compile-time:
+ functionDecl(libc_func_matchers::isUnsafeVaListPrintfFunc())
.bind(UnsafeVaListTag),
- // Match a call to a `sprintf` function, which is never safe:
- callExpr(callee(functionDecl(
- libc_func_matchers::isUnsafeSprintfFunc())))
- .bind(UnsafeSprintfTag),
- // Match a call to an `snprintf` function. And first two
- // arguments of the call (that describe a buffer) are not in
- // safe patterns:
- callExpr(callee(functionDecl(
- libc_func_matchers::isNormalPrintfFunc())),
- libc_func_matchers::hasUnsafeSnprintfBuffer())
- .bind(UnsafeSizedByTag),
- // Match a call to a `printf` function, which can be safe if all
- // arguments are null-terminated:
- callExpr(callee(functionDecl(
- libc_func_matchers::isNormalPrintfFunc())),
- libc_func_matchers::hasUnsafePrintfStringArg())
- .bind(UnsafeStringTag)))
- .bind(Tag));
+ // Match a call to a `sprintf` function, which is never
+ // safe:
+ functionDecl(libc_func_matchers::isUnsafeSprintfFunc())
+ .bind(UnsafeSprintfTag)))),
+ // (unless the call has a sole string literal argument):
+ unless(
+ allOf(hasArgument(0, expr(stringLiteral())), hasNumArgs(1)))),
+
+ // The following two cases require checking against actual
+ // arguments of the call:
+
+ // Match a call to an `snprintf` function. And first two
+ // arguments of the call (that describe a buffer) are not in
+ // safe patterns:
+ callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())),
+ libc_func_matchers::hasUnsafeSnprintfBuffer())
+ .bind(UnsafeSizedByTag),
+ // Match a call to a `printf` function, which can be safe if
+ // all arguments are null-terminated:
+ callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())),
+ libc_func_matchers::hasUnsafePrintfStringArg(
+ expr().bind(UnsafeStringTag)))));
}
const Stmt *getBaseStmt() const { return Call; }
@@ -1501,7 +1535,7 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
bool IsRelatedToDecl,
ASTContext &Ctx) const override {
- Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx);
+ Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx, UnsafeArg);
}
DeclUseList getClaimedVarUseSites() const override { return {}; }
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index b0f3b4ed61acc4..598f095f541110 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2293,13 +2293,17 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
}
void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
- ASTContext &Ctx) override {
+ ASTContext &Ctx,
+ const Expr *UnsafeArg = nullptr) override {
S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call)
<< Call->getDirectCallee() // We've checked there is a direct callee
<< Call->getSourceRange();
- if (PrintfInfo > 0)
- S.Diag(Call->getBeginLoc(), diag::note_unsafe_buffer_printf_call)
- << PrintfInfo << Call->getSourceRange();
+ if (PrintfInfo > 0) {
+ SourceRange R =
+ UnsafeArg ? UnsafeArg->getSourceRange() : Call->getSourceRange();
+ S.Diag(R.getBegin(), diag::note_unsafe_buffer_printf_call)
+ << PrintfInfo << R;
+ }
}
void handleUnsafeOperationInContainer(const Stmt *Operation,
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index 0eeedff611b588..c116289ee96a69 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -64,18 +64,23 @@ void f(char * p, char * q, std::span<char> s, std::span<char> s2) {
/* Test printfs */
- fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
- printf("%s%d", p, *p); // expected-warning{{function 'printf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+ fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or a string literal as string pointer to guarantee null-termination}}
+ printf("%s%d", // expected-warning{{function 'printf' introduces unsafe buffer access}}
+ p, // expected-note{{use 'std::string::c_str' or a string literal as string pointer to guarantee null-termination}} note attached to the unsafe argument
+ *p);
sprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'sprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}}
swprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'swprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}}
snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
snwprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
- snwprintf_s(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf_s' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
- vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function 'vsnprintf' introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}}
+ snwprintf_s( // expected-warning{{function 'snwprintf_s' introduces unsafe buffer access}}
+ s.data(), // expected-note{{buffer pointer and size may not match}} // note attached to the buffer
+ s2.size(),
+ "%s%d", "hello", *p);
+ vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function 'vsnprintf' introduces unsafe buffer access}} expected-note{{do not use va_list, which cannot be checked at compile-time for bounds safety}}
sscanf(p, "%s%d", "hello", *p); // expected-warning{{function 'sscanf' introduces unsafe buffer access}}
sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function 'sscanf_s' introduces unsafe buffer access}}
- fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+ fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or a string literal as string pointer to guarantee null-termination}}
fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn
printf("%s%d", "hello", *p); // no warn
snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
>From f85a487c4cbb57678081fd95bfe5e2bf691f7226 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Wed, 28 Aug 2024 17:19:40 -0700
Subject: [PATCH 5/5] clean up include list
---
clang/lib/Analysis/UnsafeBufferUsage.cpp | 6 ------
1 file changed, 6 deletions(-)
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 7bcc3c05dee98c..8e62ae4bbda6f1 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,25 +9,19 @@
#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
-#include "clang/AST/DeclCXX.h"
#include "clang/AST/Expr.h"
-#include "clang/AST/ExprCXX.h"
#include "clang/AST/FormatString.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtVisitor.h"
-#include "clang/AST/Type.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Casting.h"
#include <memory>
#include <optional>
More information about the cfe-commits
mailing list