[clang] Warning Libc functions (PR #101583)
Ziqing Luo via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 1 17:52:32 PDT 2024
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583
>From 0ccb5a8fc0855b2dfb948c4bb844e0394b5cedb0 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/3] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
functions
Warning about calls to libc functions involving buffer access. Warned
functions are hardcoded by names.
---
.../Analyses/UnsafeBufferUsageGadgets.def | 1 +
clang/lib/Analysis/UnsafeBufferUsage.cpp | 279 +++++++++++++++++-
...arn-unsafe-buffer-usage-libc-functions.cpp | 80 +++++
...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +-
4 files changed, 360 insertions(+), 4 deletions(-)
create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b..ac01b285ae833 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
WARNING_GADGET(UnsafeBufferUsageAttr)
WARNING_GADGET(UnsafeBufferUsageCtorAttr)
WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)`
FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context
FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b..926a78b6f0096 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,25 @@
#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
#include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Basic/CharInfo.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Casting.h"
+#include <cstddef>
#include <memory>
#include <optional>
#include <queue>
@@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
return false;
}
+AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
+ static const std::set<StringRef> PredefinedNames{
+ // numeric conversion:
+ "atof",
+ "atoi",
+ "atol",
+ "atoll",
+ "strtol",
+ "strtoll",
+ "strtoul",
+ "strtoull",
+ "strtof",
+ "strtod",
+ "strtold",
+ "strtoimax",
+ "strtoumax",
+ // "strfromf", "strfromd", "strfroml", // C23?
+ // string manipulation:
+ "strcpy",
+ "strncpy",
+ "strlcpy",
+ "strcat",
+ "strncat",
+ "strlcat",
+ "strxfrm",
+ "strdup",
+ "strndup",
+ // string examination:
+ "strlen",
+ "strnlen",
+ "strcmp",
+ "strncmp",
+ "stricmp",
+ "strcasecmp",
+ "strcoll",
+ "strchr",
+ "strrchr",
+ "strspn",
+ "strcspn",
+ "strpbrk",
+ "strstr",
+ "strtok",
+ // "mem-" functions
+ "memchr",
+ "wmemchr",
+ "memcmp",
+ "wmemcmp",
+ "memcpy",
+ "memccpy",
+ "mempcpy",
+ "wmemcpy",
+ "memmove",
+ "wmemmove",
+ "memset",
+ "wmemset",
+ // IO:
+ "fread",
+ "fwrite",
+ "fgets",
+ "fgetws",
+ "gets",
+ "fputs",
+ "fputws",
+ "puts",
+ // others
+ "strerror_s",
+ "strerror_r",
+ "bcopy",
+ "bzero",
+ "bsearch",
+ "qsort",
+ };
+
+ // A tiny name parser for unsafe libc function names with additional
+ // checks for `printf`s:
+ struct FuncNameMatch {
+ const CallExpr *const Call;
+ FuncNameMatch(const CallExpr *Call) : Call(Call) {}
+
+ // For a name `S` in `PredefinedNames` or a member of the printf/scanf
+ // family, define matching function names with `S` by the grammar below:
+ //
+ // CoreName := S | S["wcs"/"str"]
+ // LibcName := CoreName | CoreName + "_s"
+ // MatchingName := "__builtin_" + LibcName |
+ // "__builtin___" + LibcName + "_chk" |
+ // "__asan_" + LibcName
+ //
+ // (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.)
+ bool matchName(StringRef FunName) {
+ // Try to match __builtin_:
+ if (FunName.starts_with("__builtin_"))
+ // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
+ // no match:
+ return matchLibcNameOrBuiltinChk(
+ FunName.drop_front(10 /* truncate "__builtin_" */));
+ // Try to match __asan_:
+ if (FunName.starts_with("__asan_"))
+ return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */));
+ return matchLibcName(FunName);
+ }
+
+ // Parameter `Name` is the substring after stripping off the prefix
+ // "__builtin_".
+ bool matchLibcNameOrBuiltinChk(StringRef Name) {
+ if (Name.starts_with("__") && Name.ends_with("_chk"))
+ return matchLibcName(
+ Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
+ return matchLibcName(Name);
+ }
+
+ bool matchLibcName(StringRef Name) {
+ if (Name.ends_with("_s"))
+ return matchCoreName(Name.drop_back(2 /* truncate "_s" */));
+ return matchCoreName(Name);
+ }
+
+ bool matchCoreName(StringRef Name) {
+ if (PredefinedNames.find(Name.str()) != PredefinedNames.end())
+ return !isSafeStrlen(Name); // match unless it's a safe strlen call
+
+ std::string NameWCS = Name.str();
+ size_t WcsPos = NameWCS.find("wcs");
+
+ while (WcsPos != std::string::npos) {
+ NameWCS[WcsPos++] = 's';
+ NameWCS[WcsPos++] = 't';
+ NameWCS[WcsPos++] = 'r';
+ WcsPos = NameWCS.find("wcs", WcsPos);
+ }
+ if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
+ return !isSafeStrlen(NameWCS);
+ return matchPrintfOrScanfFamily(Name);
+ }
+
+ bool matchPrintfOrScanfFamily(StringRef Name) {
+ if (Name.ends_with("scanf"))
+ return true; // simply warn about scanf functions
+ if (!Name.ends_with("printf"))
+ return false; // neither printf nor scanf
+ if (Name.starts_with("v"))
+ // cannot check args for va_list, so `vprintf`s are treated as regular
+ // unsafe libc calls:
+ return true;
+
+ // Truncate "printf", focus on prefixes. There are different possible
+ // name prefixes: "k", "f", "s", "sn", "fw", ..., "snw". We strip off the
+ // 'w' and handle printfs differently by "k", "f", "s", "sn" or no prefix:
+ StringRef Prefix = Name.drop_back(6);
+
+ if (Prefix.ends_with("w"))
+ Prefix = Prefix.drop_back(1);
+ return isUnsafePrintf(Prefix);
+ }
+
+ // A pointer type expression is known to be null-terminated, if it has the
+ // form: E.c_str(), for any expression E of `std::string` type.
+ static bool isNullTermPointer(const Expr *Ptr) {
+ if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
+ return true;
+ if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
+ const CXXMethodDecl *MD = MCE->getMethodDecl();
+ const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+ if (MD && RD && RD->isInStdNamespace())
+ if (MD->getName() == "c_str" && RD->getName() == "basic_string")
+ return true;
+ }
+ return false;
+ }
+
+ // Check safe patterns for printfs w.r.t their prefixes:
+ bool isUnsafePrintf(StringRef Prefix /* empty, 'k', 'f', 's', or 'sn' */) {
+ auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool {
+ return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+ };
+
+ if (Prefix.empty() || Prefix == "k") // printf: all pointer args should be null-terminated
+ return llvm::any_of(Call->arguments(), AnyUnsafeStrPtr);
+
+ if (Prefix == "f" && Call->getNumArgs() > 1) {
+ // fprintf, same as printf but skip the first arg
+ auto Range = llvm::make_range(Call->arg_begin() + 1, Call->arg_end());
+ return llvm::any_of(Range, AnyUnsafeStrPtr);
+ }
+ if (Prefix == "sn" && Call->getNumArgs() > 2) {
+ // The first two arguments need to be in safe patterns, which is checked
+ // by `isSafeSizedby`:
+ auto Range = llvm::make_range(Call->arg_begin() + 2, Call->arg_end());
+ return !isSafeSizedby(*Call->arg_begin(), *(Call->arg_begin() + 1)) ||
+ llvm::any_of(Range, AnyUnsafeStrPtr);
+ }
+ // Note `sprintf`s should be changed to `snprintf`s, so they are treated
+ // as regular unsafe libc calls:
+ return true;
+ }
+
+ // Checks if the two Exprs `SizedByPtr` and `Size` are in the pattern:
+ // SizedByPtr := DRE.data()
+ // Size := DRE.size_bytes(), for a same DRE of sized-container/view
+ // type.
+ bool isSafeSizedby(const Expr *SizedByPtr, const Expr *Size) {
+ static StringRef SizedObjs[] = {"span", "array", "vector",
+ "basic_string_view", "basic_string"};
+ if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(SizedByPtr))
+ if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) {
+ if (auto *DREPtr =
+ dyn_cast<DeclRefExpr>(MCEPtr->getImplicitObjectArgument()))
+ if (auto *DRESize =
+ dyn_cast<DeclRefExpr>(MCESize->getImplicitObjectArgument()))
+ if (DREPtr->getDecl() ==
+ DRESize->getDecl()) // coming out of the same variable
+ if (MCEPtr->getMethodDecl()->getName() == "data")
+ if (MCESize->getMethodDecl()->getName() == "size_bytes")
+ for (StringRef SizedObj : SizedObjs)
+ if (MCEPtr->getRecordDecl()->isInStdNamespace() &&
+ MCEPtr->getRecordDecl()
+ ->getCanonicalDecl()
+ ->getName() == SizedObj)
+ return true;
+ }
+ return false;
+ }
+
+ // This is safe: strlen("hello"). We don't want to be noisy on this case.
+ bool isSafeStrlen(StringRef Name) {
+ return Name == "strlen" && Call->getNumArgs() == 1 &&
+ isa<StringLiteral>(Call->getArg(0)->IgnoreParenImpCasts());
+ }
+ } FuncNameMatch{&Node};
+
+ const FunctionDecl *FD = Node.getDirectCallee();
+ const IdentifierInfo *II;
+
+ if (!FD)
+ return false;
+ II = FD->getIdentifier();
+ // If this is a special C++ name without IdentifierInfo, it can't be a
+ // C library function.
+ if (!II)
+ return false;
+
+ // Look through 'extern "C"' and anything similar invented in the future.
+ // If this function is not in TU directly, it is not a C library function.
+ if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit())
+ return false;
+
+ // If this function is not externally visible, it is not a C library function.
+ // Note that we make an exception for inline functions, which may be
+ // declared in header files without external linkage.
+ if (!FD->isInlined() && !FD->isExternallyVisible())
+ return false;
+ return FuncNameMatch.matchName(II->getName());
+}
} // namespace clang::ast_matchers
namespace {
@@ -1025,6 +1284,24 @@ class DataInvocationGadget : public WarningGadget {
DeclUseList getClaimedVarUseSites() const override { return {}; }
};
+class UnsafeLibcFunctionCallGadget : public WarningGadget {
+ const CallExpr *const Call;
+ constexpr static const char *const Tag = "UnsafeLibcFunctionCall";
+
+public:
+ UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result)
+ : WarningGadget(Kind::UnsafeLibcFunctionCall),
+ Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {}
+
+ static Matcher matcher() {
+ return stmt(callExpr(isUnsafeLibcFunctionCall()).bind(Tag));
+ }
+
+ const Stmt *getBaseStmt() const override { return Call; }
+
+ DeclUseList getClaimedVarUseSites() const override { return {}; }
+};
+
// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
// Context (see `isInUnspecifiedLvalueContext`).
// Note here `[]` is the built-in subscript operator.
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
new file mode 100644
index 0000000000000..326ecd6f284e5
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -0,0 +1,80 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN: -verify %s
+
+typedef struct {} FILE;
+void memcpy();
+void __builtin___memcpy_chk();
+void strcpy();
+void strcpy_s();
+void wcscpy_s();
+unsigned strlen( const char* str );
+int fprintf( FILE* stream, const char* format, ... );
+int printf( const char* format, ... );
+int sprintf( char* buffer, const char* format, ... );
+int snprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int sscanf_s(const char * buffer, const char * format, ...);
+int sscanf(const char * buffer, const char * format, ... );
+
+namespace std {
+ template< class InputIt, class OutputIt >
+ OutputIt copy( InputIt first, InputIt last,
+ OutputIt d_first );
+
+ struct iterator{};
+ template<typename T>
+ struct span {
+ T * ptr;
+ T * data();
+ unsigned size_bytes();
+ unsigned size();
+ iterator begin() const noexcept;
+ iterator end() const noexcept;
+ };
+
+ template<typename T>
+ struct basic_string {
+ T* p;
+ T *c_str();
+ T *data();
+ unsigned size_bytes();
+ };
+
+ typedef basic_string<char> string;
+ typedef basic_string<wchar_t> wstring;
+}
+
+void f(char * p, char * q, std::span<char> s) {
+ memcpy(); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+ __builtin_memcpy(p, q, 64); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+ __builtin___memcpy_chk(); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+ strcpy(); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+ strcpy_s(); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+ wcscpy_s(); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+
+
+ /* Test printfs */
+
+ fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+ printf("%s%d", p, *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+ sprintf(q, "%s%d", "hello", *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+ snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+ snprintf(s.data(), s.size(), "%s%d", "hello", *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+ vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+ sscanf(p, "%s%d", "hello", *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+ sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+ fprintf((FILE*)p, "%s%d", "hello", *p); // no warn
+ printf("%s%d", "hello", *p); // no warn
+ snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
+ strlen("hello");// no warn
+}
+
+void v(std::string s1, std::wstring s2) {
+ snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+}
+
+
+void g(char *begin, char *end, char *p, std::span<char> s) {
+ std::copy(begin, end, p); // no warn
+ std::copy(s.begin(), s.end(), s.begin()); // no warn
+}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
index 844311c3a51a5..6a4541c403a48 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
@@ -1,8 +1,6 @@
// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fsafe-buffer-usage-suggestions -verify %s
-// expected-no-diagnostics
-
typedef unsigned __darwin_size_t;
typedef __darwin_size_t size_t;
#define bzero(s, n) __builtin_bzero(s, n)
-void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); }
+void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function introduces unsafe buffer manipulation}}
>From 2dff88db07d59a2b6e52230abf66ac3d4d723196 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Thu, 1 Aug 2024 16:38:50 -0700
Subject: [PATCH 2/3] [-Wunsafe-buffer-usage] Warn string_view constructions
that not guarantee null-termination
One could use `string_view` as the safe view over strings in read-only
scenarios as it is hardened, preserving bounds info and guaranteeing
null-termination with the new warning.
If one finds the warning being too restrictive, turn it off with
`-Wno-unsafe-buffer-usage-in-string-view`. The rest of the warnings
will not assume null-termination guarantee of `string_view` as well
then.
---
.../Analysis/Analyses/UnsafeBufferUsage.h | 8 +-
.../Analyses/UnsafeBufferUsageGadgets.def | 11 ++-
clang/include/clang/Basic/DiagnosticGroups.td | 3 +-
.../clang/Basic/DiagnosticSemaKinds.td | 3 +
clang/lib/Analysis/UnsafeBufferUsage.cpp | 86 ++++++++++++++++---
clang/lib/Sema/AnalysisBasedWarnings.cpp | 17 ++++
...arn-unsafe-buffer-usage-in-string_view.cpp | 85 ++++++++++++++++++
...arn-unsafe-buffer-usage-libc-functions.cpp | 22 ++++-
8 files changed, 216 insertions(+), 19 deletions(-)
create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-in-string_view.cpp
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e11..f7c5b887756d9 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -147,10 +147,16 @@ class UnsafeBufferUsageHandler {
virtual bool isSafeBufferOptOut(const SourceLocation &Loc) const = 0;
/// \return true iff unsafe uses in containers should NOT be reported at
- /// `Loc`; false otherwise.
+ /// `Loc`; false otherwise. Controlled by `-Wunsafe-buffer-usage-in-container`
virtual bool
ignoreUnsafeBufferInContainer(const SourceLocation &Loc) const = 0;
+ /// \return true iff unsafe uses in string_views should NOT be reported at
+ /// `Loc`; false otherwise. Controlled by
+ /// `-Wunsafe-buffer-usage-in-string-view`
+ virtual bool
+ ignoreUnsafeBufferInStringView(const SourceLocation &Loc) const = 0;
+
virtual std::string
getUnsafeBufferUsageAttributeTextAt(SourceLocation Loc,
StringRef WSSuffix = "") const = 0;
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index ac01b285ae833..7f3fd24b5cdc7 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -18,6 +18,12 @@
#define WARNING_GADGET(name) GADGET(name)
#endif
+/// Make `WARNING_LIBC_GADGET` self a subset of WARNING_GADGET so that it can be
+/// tuned specially according to whether we warn `StringViewUnsafeConstructor`.
+#ifndef WARNING_LIBC_GADGET
+#define WARNING_LIBC_GADGET(name) WARNING_GADGET(name)
+#endif
+
/// A `WARNING_GADGET` subset, where the code pattern of each gadget
/// corresponds uses of a (possibly hardened) contatiner (e.g., `std::span`).
#ifndef WARNING_CONTAINER_GADGET
@@ -38,8 +44,9 @@ WARNING_GADGET(PointerArithmetic)
WARNING_GADGET(UnsafeBufferUsageAttr)
WARNING_GADGET(UnsafeBufferUsageCtorAttr)
WARNING_GADGET(DataInvocation)
-WARNING_GADGET(UnsafeLibcFunctionCall)
+WARNING_LIBC_GADGET(UnsafeLibcFunctionCall)
WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)`
+WARNING_CONTAINER_GADGET(StringViewUnsafeConstructor) // `std::string_view` constructed from std::string guarantees null-termination
FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context
FIXABLE_GADGET(DerefSimplePtrArithFixable)
FIXABLE_GADGET(PointerDereference)
@@ -53,5 +60,7 @@ FIXABLE_GADGET(PointerInit)
#undef FIXABLE_GADGET
#undef WARNING_GADGET
+#undef WARNING_LIBC_GADGET
#undef WARNING_CONTAINER_GADGET
+#undef WARNING_STRINGVIEW_GADGET
#undef GADGET
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 19c3f1e043349..68413fab136fe 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1551,7 +1551,8 @@ def HLSLAvailability : DiagGroup<"hlsl-availability">;
def ReadOnlyPlacementChecks : DiagGroup<"read-only-types">;
// Warnings and fixes to support the "safe buffers" programming model.
-def UnsafeBufferUsageInContainer : DiagGroup<"unsafe-buffer-usage-in-container">;
+def UnsafeBufferUsageInStringView : DiagGroup<"unsafe-buffer-usage-in-string-view">;
+def UnsafeBufferUsageInContainer : DiagGroup<"unsafe-buffer-usage-in-container", [UnsafeBufferUsageInStringView]>;
def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInContainer]>;
// Warnings and notes related to the function effects system underlying
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 581434d33c5c9..713a3b10b413b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12388,6 +12388,9 @@ def note_safe_buffer_usage_suggestions_disabled : Note<
def warn_unsafe_buffer_usage_in_container : Warning<
"the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information">,
InGroup<UnsafeBufferUsageInContainer>, DefaultIgnore;
+def warn_unsafe_buffer_usage_in_string_view : Warning<
+ "construct string_view from raw pointers does not guarantee null-termination, construct from std::string instead">,
+ InGroup<UnsafeBufferUsageInStringView>, DefaultIgnore;
#ifndef NDEBUG
// Not a user-facing diagnostic. Useful for debugging false negatives in
// -fsafe-buffer-usage-suggestions (i.e. lack of -Wunsafe-buffer-usage fixits).
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 926a78b6f0096..bd29296ee2dde 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -247,11 +247,16 @@ AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *,
return !Handler->isSafeBufferOptOut(Node.getBeginLoc());
}
-AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer,
+AST_MATCHER_P(Stmt, ignoreSpanTwoParamConstructor,
const UnsafeBufferUsageHandler *, Handler) {
return Handler->ignoreUnsafeBufferInContainer(Node.getBeginLoc());
}
+AST_MATCHER_P(Stmt, ignoreStringViewUnsafeConstructor,
+ const UnsafeBufferUsageHandler *, Handler) {
+ return Handler->ignoreUnsafeBufferInStringView(Node.getBeginLoc());
+}
+
AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) {
return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder);
}
@@ -448,7 +453,8 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
return false;
}
-AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
+AST_MATCHER_P(CallExpr, isUnsafeLibcFunctionCall, internal::Matcher<Expr>,
+ hasUnsafeStringView) {
static const std::set<StringRef> PredefinedNames{
// numeric conversion:
"atof",
@@ -525,7 +531,9 @@ AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
// checks for `printf`s:
struct FuncNameMatch {
const CallExpr *const Call;
- FuncNameMatch(const CallExpr *Call) : Call(Call) {}
+ const bool hasUnsafeStringView;
+ FuncNameMatch(const CallExpr *Call, bool hasUnsafeStringView)
+ : Call(Call), hasUnsafeStringView(hasUnsafeStringView) {}
// For a name `S` in `PredefinedNames` or a member of the printf/scanf
// family, define matching function names with `S` by the grammar below:
@@ -605,27 +613,35 @@ AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
// A pointer type expression is known to be null-terminated, if it has the
// form: E.c_str(), for any expression E of `std::string` type.
- static bool isNullTermPointer(const Expr *Ptr) {
+ static bool isNullTermPointer(const Expr *Ptr,
+ bool UnsafeStringView = true) {
if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
return true;
if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
const CXXMethodDecl *MD = MCE->getMethodDecl();
const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
- if (MD && RD && RD->isInStdNamespace())
+ if (MD && RD && RD->isInStdNamespace()) {
if (MD->getName() == "c_str" && RD->getName() == "basic_string")
return true;
+ if (!UnsafeStringView && MD->getName() == "data" &&
+ RD->getName() == "basic_string_view")
+ return true;
+ }
}
return false;
}
// Check safe patterns for printfs w.r.t their prefixes:
bool isUnsafePrintf(StringRef Prefix /* empty, 'k', 'f', 's', or 'sn' */) {
- auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool {
- return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+ const bool hasUnsafeStringView = this->hasUnsafeStringView;
+ auto AnyUnsafeStrPtr = [&hasUnsafeStringView](const Expr *Arg) -> bool {
+ return Arg->getType()->isPointerType() &&
+ !isNullTermPointer(Arg, hasUnsafeStringView);
};
- if (Prefix.empty() || Prefix == "k") // printf: all pointer args should be null-terminated
+ if (Prefix.empty() ||
+ Prefix == "k") // printf: all pointer args should be null-terminated
return llvm::any_of(Call->arguments(), AnyUnsafeStrPtr);
if (Prefix == "f" && Call->getNumArgs() > 1) {
@@ -677,7 +693,7 @@ AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
return Name == "strlen" && Call->getNumArgs() == 1 &&
isa<StringLiteral>(Call->getArg(0)->IgnoreParenImpCasts());
}
- } FuncNameMatch{&Node};
+ } FuncNameMatch{&Node, hasUnsafeStringView.matches(Node, Finder, Builder)};
const FunctionDecl *FD = Node.getDirectCallee();
const IdentifierInfo *II;
@@ -1037,6 +1053,47 @@ class SpanTwoParamConstructorGadget : public WarningGadget {
}
};
+class StringViewUnsafeConstructorGadget : public WarningGadget {
+ static constexpr const char *const Tag = "StringViewUnsafeConstructor";
+ const CXXConstructExpr *Ctor;
+
+public:
+ StringViewUnsafeConstructorGadget(const MatchFinder::MatchResult &Result)
+ : WarningGadget(Kind::StringViewUnsafeConstructor),
+ Ctor(Result.Nodes.getNodeAs<CXXConstructExpr>(Tag)) {}
+
+ static bool classof(const Gadget *G) {
+ return G->getKind() == Kind::StringViewUnsafeConstructor;
+ }
+
+ // Matches any `basic_string_view` constructor call that is not a copy
+ // constructor or on a string literal:
+ static Matcher matcher() {
+ auto isStringViewDecl = hasDeclaration(cxxConstructorDecl(
+ hasDeclContext(isInStdNamespace()), hasName("basic_string_view")));
+
+ return stmt(
+ cxxConstructExpr(
+ isStringViewDecl,
+ unless(
+ hasArgument(0, expr(ignoringParenImpCasts(stringLiteral())))),
+ unless(hasDeclaration(cxxConstructorDecl(isCopyConstructor()))))
+ .bind(Tag));
+ }
+
+ const Stmt *getBaseStmt() const override { return Ctor; }
+
+ DeclUseList getClaimedVarUseSites() const override {
+ // If the constructor call is of the form `std::basic_string_view{ptrVar,
+ // ...}`, `ptrVar` is considered an unsafe variable.
+ if (auto *DRE = dyn_cast<DeclRefExpr>(Ctor->getArg(0))) {
+ if (DRE->getType()->isPointerType() && isa<VarDecl>(DRE->getDecl()))
+ return {DRE};
+ }
+ return {};
+ }
+};
+
/// A pointer initialization expression of the form:
/// \code
/// int *p = q;
@@ -1293,8 +1350,10 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
: WarningGadget(Kind::UnsafeLibcFunctionCall),
Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {}
- static Matcher matcher() {
- return stmt(callExpr(isUnsafeLibcFunctionCall()).bind(Tag));
+ static Matcher
+ matcher(ast_matchers::internal::Matcher<Expr> HasUnsafeStringView) {
+ return stmt(
+ callExpr(isUnsafeLibcFunctionCall(HasUnsafeStringView)).bind(Tag));
}
const Stmt *getBaseStmt() const override { return Call; }
@@ -1724,10 +1783,13 @@ findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler,
#define WARNING_GADGET(x) \
allOf(x ## Gadget::matcher().bind(#x), \
notInSafeBufferOptOut(&Handler)),
+#define WARNING_LIBC_GADGET(x) \
+ allOf(x ## Gadget::matcher(ignoreStringViewUnsafeConstructor(&Handler)).bind(#x), \
+ notInSafeBufferOptOut(&Handler)),
#define WARNING_CONTAINER_GADGET(x) \
allOf(x ## Gadget::matcher().bind(#x), \
notInSafeBufferOptOut(&Handler), \
- unless(ignoreUnsafeBufferInContainer(&Handler))),
+ unless(ignore ## x(&Handler))),
#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
// Avoid a hanging comma.
unless(stmt())
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 0f604c61fa3af..e5f38be388a5c 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2256,6 +2256,17 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
Range = UO->getSubExpr()->getSourceRange();
MsgParam = 1;
}
+ } else if (const auto *CtorExpr = dyn_cast<CXXConstructExpr>(Operation)) {
+ if (CtorExpr->getConstructor()->getCanonicalDecl()->getNameAsString() ==
+ "span")
+ S.Diag(CtorExpr->getLocation(),
+ diag::warn_unsafe_buffer_usage_in_container)
+ << CtorExpr->getSourceRange();
+ else // it is warning about "string_view":
+ S.Diag(CtorExpr->getLocation(),
+ diag::warn_unsafe_buffer_usage_in_string_view)
+ << CtorExpr->getSourceRange();
+ return;
} else {
if (isa<CallExpr>(Operation) || isa<CXXConstructExpr>(Operation)) {
// note_unsafe_buffer_operation doesn't have this mode yet.
@@ -2370,6 +2381,12 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
return S.Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container, Loc);
}
+ bool
+ ignoreUnsafeBufferInStringView(const SourceLocation &Loc) const override {
+ return S.Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_string_view,
+ Loc);
+ }
+
// Returns the text representation of clang::unsafe_buffer_usage attribute.
// `WSSuffix` holds customized "white-space"s, e.g., newline or whilespace
// characters.
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-string_view.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-string_view.cpp
new file mode 100644
index 0000000000000..536751446f585
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-string_view.cpp
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -verify %s
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -verify %s -Wno-unsafe-buffer-usage-in-string-view -D_IGNORE_STRING_VIEW
+
+namespace std {
+ struct iterator{};
+
+ template<typename T>
+ struct basic_string_view {
+ T* p;
+ T *data();
+ unsigned size_bytes();
+ iterator begin();
+ iterator end();
+
+ constexpr basic_string_view( const T* s ){};
+ constexpr basic_string_view( const T* s, unsigned count );
+ constexpr basic_string_view( const basic_string_view& other ) noexcept = default;
+ template< class It, class End >
+ constexpr basic_string_view( It first, End last ) {};
+
+ };
+
+ typedef basic_string_view<char> string_view;
+ typedef basic_string_view<wchar_t> wstring_view;
+
+ template<typename T>
+ struct basic_string {
+ T *c_str();
+ operator std::basic_string_view<T>() {
+ }
+ };
+
+ typedef basic_string<char> string;
+ typedef basic_string<wchar_t> wstring;
+
+ template <class T> struct span {
+ constexpr span(T *, unsigned){}
+ };
+}
+
+void foo(std::string_view);
+void f(char * p, std::string S, std::string_view V) {
+ std::string_view SVs[] {S, "S"};
+ foo(S);
+ foo("S");
+ foo({S});
+ foo({"S"});
+ foo(V);
+#ifndef _IGNORE_STRING_VIEW
+ foo(p); // expected-warning{{construct string_view from raw pointers does not guarantee null-termination, construct from std::string instead}}
+#else
+ foo(p); // no warn
+#endif
+#ifndef _IGNORE_STRING_VIEW
+ foo({p, 10}); // expected-warning{{construct string_view from raw pointers does not guarantee null-termination, construct from std::string instead}}
+#else
+ foo({p, 10}); // no warn
+#endif
+
+ std::string_view SV{S};
+ std::string_view SV2{"S"};
+ std::string_view SV3 = S;
+ std::string_view SV4 = "S";
+ std::string_view SV5 = std::string_view(S);
+#ifndef _IGNORE_STRING_VIEW
+ std::string_view SV10 = p; // expected-warning{{construct string_view from raw pointers does not guarantee null-termination, construct from std::string instead}}
+#else
+ std::string_view SV10 = p; // no warn
+#endif
+#ifndef _IGNORE_STRING_VIEW
+ std::string_view SV11{p}; // expected-warning{{construct string_view from raw pointers does not guarantee null-termination, construct from std::string instead}}
+#else
+ std::string_view SV11{p}; // no warn
+#endif
+#ifndef _IGNORE_STRING_VIEW
+ std::string_view SV12{V.begin(), V.end()}; // expected-warning{{construct string_view from raw pointers does not guarantee null-termination, construct from std::string instead}}
+#else
+ std::string_view SV12{V.begin(), V.end()}; // no warn
+#endif
+}
+
+void g(int * p) {
+ // This warning is not affected:
+ std::span<int> S{p, 10}; // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}}
+}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index 326ecd6f284e5..feaeb23d73127 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
-// RUN: -verify %s
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -verify %s
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -verify %s -Wno-unsafe-buffer-usage-in-string-view -D_IGNORE_STRING_VIEW
typedef struct {} FILE;
void memcpy();
@@ -42,6 +42,15 @@ namespace std {
typedef basic_string<char> string;
typedef basic_string<wchar_t> wstring;
+
+ template<typename T>
+ struct basic_string_view {
+ T* p;
+ T *data();
+ unsigned size_bytes();
+ };
+
+ typedef basic_string_view<char> string_view;
}
void f(char * p, char * q, std::span<char> s) {
@@ -69,8 +78,13 @@ void f(char * p, char * q, std::span<char> s) {
strlen("hello");// no warn
}
-void v(std::string s1, std::wstring s2) {
- snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+void v(std::string s, std::string_view sv) {
+ snprintf(s.data(), s.size_bytes(), "%s%d", s.c_str(), 0); // no warn
+#ifndef _IGNORE_STRING_VIEW
+ snprintf(sv.data(), sv.size_bytes(), "%s%d", sv.data(), 0); // no warn
+#else
+ snprintf(sv.data(), sv.size_bytes(), "%s%d", sv.data(), 0); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+#endif
}
>From d8cd544fe164ffd3208d4d6e5ba654d2420acb7c Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Thu, 1 Aug 2024 17:49:42 -0700
Subject: [PATCH 3/3] [Ignore for Code Review] Clean up headers and filling the
gap between our code in trunk and my local branch
---
clang/lib/Analysis/UnsafeBufferUsage.cpp | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index bd29296ee2dde..a6ae1c0420530 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -14,7 +14,6 @@
#include "clang/AST/ExprCXX.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
-#include "clang/AST/StmtVisitor.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Basic/SourceLocation.h"
@@ -22,12 +21,10 @@
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Casting.h"
-#include <cstddef>
#include <memory>
#include <optional>
#include <queue>
@@ -1081,7 +1078,15 @@ class StringViewUnsafeConstructorGadget : public WarningGadget {
.bind(Tag));
}
- const Stmt *getBaseStmt() const override { return Ctor; }
+ const Stmt *getBaseStmt() const { return Ctor; }
+
+ SourceLocation getSourceLoc() const override {return Ctor->getLocation();}
+
+ void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
+ bool IsRelatedToDecl,
+ ASTContext &Ctx) const override {
+ Handler.handleUnsafeOperation(Ctor, false, Ctx);
+ }
DeclUseList getClaimedVarUseSites() const override {
// If the constructor call is of the form `std::basic_string_view{ptrVar,
@@ -1356,7 +1361,15 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
callExpr(isUnsafeLibcFunctionCall(HasUnsafeStringView)).bind(Tag));
}
- const Stmt *getBaseStmt() const override { return Call; }
+ const Stmt *getBaseStmt() const { return Call; }
+
+ SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); }
+
+ void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
+ bool IsRelatedToDecl,
+ ASTContext &Ctx) const override {
+ Handler.handleUnsafeOperation(Call, false, Ctx);
+ }
DeclUseList getClaimedVarUseSites() const override { return {}; }
};
More information about the cfe-commits
mailing list