[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
Ziqing Luo via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 15 15:21:16 PDT 2024
================
@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
return false;
}
+namespace libc_fun_disjoint_inner_matchers {
+// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match
+// disjoint node sets. They all take a `CoreName`, which is the substring of a
+// function name after `ignoreLibcPrefixAndSuffix`. They are suppose to be used
+// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different
+// libc function calls.
+
+// Matches a function call node such that
+// 1. It's name, after stripping off predefined prefix and suffix, is
+// `CoreName`; and
+// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
+// is a set of libc function names.
+//
+// Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`.
+// And, the notation `CoreName[str/wcs]` means a new name obtained from replace
+// string "wcs" with "str" in `CoreName`.
+//
+// Also note, the set of predefined function names does not include `printf`
+// functions, they are checked exclusively with other matchers below.
+// Maintaining the invariant that all matchers under
+// `libc_fun_disjoint_inner_matchers` are disjoint.
+AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
+ static const std::set<StringRef> PredefinedNames{
+ // numeric conversion:
+ "atof",
+ "atoi",
+ "atol",
+ "atoll",
+ "strtol",
+ "strtoll",
+ "strtoul",
+ "strtoull",
+ "strtof",
+ "strtod",
+ "strtold",
+ "strtoimax",
+ "strtoumax",
+ // "strfromf", "strfromd", "strfroml", // C23?
+ // string manipulation:
+ "strcpy",
+ "strncpy",
+ "strlcpy",
+ "strcat",
+ "strncat",
+ "strlcat",
+ "strxfrm",
+ "strdup",
+ "strndup",
+ // string examination:
+ "strlen",
+ "strnlen",
+ "strcmp",
+ "strncmp",
+ "stricmp",
+ "strcasecmp",
+ "strcoll",
+ "strchr",
+ "strrchr",
+ "strspn",
+ "strcspn",
+ "strpbrk",
+ "strstr",
+ "strtok",
+ // "mem-" functions
+ "memchr",
+ "wmemchr",
+ "memcmp",
+ "wmemcmp",
+ "memcpy",
+ "memccpy",
+ "mempcpy",
+ "wmemcpy",
+ "memmove",
+ "wmemmove",
+ "memset",
+ "wmemset",
+ // IO:
+ "fread",
+ "fwrite",
+ "fgets",
+ "fgetws",
+ "gets",
+ "fputs",
+ "fputws",
+ "puts",
+ // others
+ "strerror_s",
+ "strerror_r",
+ "bcopy",
+ "bzero",
+ "bsearch",
+ "qsort",
+ };
+ // This is safe: strlen("hello"). We don't want to be noisy on this case.
+ auto isSafeStrlen = [&Node](StringRef Name) -> bool {
+ return Name == "strlen" && Node.getNumArgs() == 1 &&
+ isa<StringLiteral>(Node.getArg(0)->IgnoreParenImpCasts());
+ };
+
+ // Match predefined names:
+ if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end())
+ return !isSafeStrlen(CoreName);
+
+ std::string NameWCS = CoreName.str();
+ size_t WcsPos = NameWCS.find("wcs");
+
+ while (WcsPos != std::string::npos) {
+ NameWCS[WcsPos++] = 's';
+ NameWCS[WcsPos++] = 't';
+ NameWCS[WcsPos++] = 'r';
+ WcsPos = NameWCS.find("wcs", WcsPos);
+ }
+ if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
+ return !isSafeStrlen(NameWCS);
+ // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They
+ // all should end with "scanf"):
+ return CoreName.ends_with("scanf");
+}
+
+// Match a call to one of the `-printf` functions taking `va_list`. We cannot
+// check safety for these functions so they should be changed to their
+// non-va_list versions.
+AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) {
+ StringRef Name = CoreName;
+
+ if (!Name.ends_with("printf"))
+ return false; // neither printf nor scanf
+ return Name.starts_with("v");
+}
+
+// Matches a call to one of the `-sprintf` functions (excluding the ones with
+// va_list) as they are always unsafe and should be changed to corresponding
+// `-snprintf`s.
+AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) {
+ StringRef Name = CoreName;
+
+ if (!Name.ends_with("printf") || Name.starts_with("v"))
+ return false;
+
+ StringRef Prefix = Name.drop_back(6);
+
+ if (Prefix.ends_with("w"))
+ Prefix = Prefix.drop_back(1);
+ return Prefix == "s";
+}
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+ if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
+ return true;
+ if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts()))
+ return true;
+ if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
+ const CXXMethodDecl *MD = MCE->getMethodDecl();
+ const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+ if (MD && RD && RD->isInStdNamespace())
+ if (MD->getName() == "c_str" && RD->getName() == "basic_string")
+ return true;
+ }
+ return false;
+}
+
+// Return true iff at least one of following cases holds:
+// 1. Format string is a literal and there is an unsafe pointer argument
+// corresponding to an `s` specifier;
+// 2. Format string is not a literal and there is least an unsafe pointer
+// argument (including the formatter argument).
+static bool hasUnsafeFormatOrSArg(const Expr *Fmt, unsigned FmtArgIdx,
+ const CallExpr *Call, ASTContext &Ctx) {
+ if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) {
+ StringRef FmtStr = SL->getString();
+ auto I = FmtStr.begin();
+ auto E = FmtStr.end();
+ unsigned ArgIdx = FmtArgIdx;
+
+ do {
+ ArgIdx = analyze_format_string::ParseFormatStringFirstSArgIndex(
+ I, E, ArgIdx, Ctx.getLangOpts(), Ctx.getTargetInfo());
+ if (ArgIdx && Call->getNumArgs() > ArgIdx &&
+ !isNullTermPointer(Call->getArg(ArgIdx)))
+ return true;
+ } while (ArgIdx);
+ return false;
+ }
+ // If format is not a string literal, we cannot analyze the format string.
+ // In this case, this call is considered unsafe if at least one argument
+ // (including the format argument) is unsafe pointer.
----------------
ziqingluo-90 wrote:
> I suspect that it's not particularly important whether the target-buffer argument of `sprintf`/`snprintf` is null-terminated. They don't consider a null terminator when they print, they treat it as a simple buffer.
Yes, this is why the checking range is `llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),`. It starts from the format string argument to the rest of them. If it's `snprintf`, it starts from the 3rd argument, skipping the first two.
https://github.com/llvm/llvm-project/pull/101583
More information about the cfe-commits
mailing list