[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
Henrik G. Olsson via cfe-commits
cfe-commits at lists.llvm.org
Thu Sep 19 16:48:35 PDT 2024
================
@@ -443,6 +443,426 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
return false;
}
+AST_MATCHER_P(CallExpr, hasNumArgs, unsigned, Num) {
+ return Node.getNumArgs() == Num;
+}
+
+namespace libc_func_matchers {
+// Under `libc_func_matchers`, define a set of matchers that match unsafe
+// functions in libc and unsafe calls to them.
+
+// A tiny parser to strip off common prefix and suffix of libc function names
+// in real code.
+//
+// Given a function name, `matchName` returns `CoreName` according to the
+// following grammar:
+//
+// LibcName := CoreName | CoreName + "_s"
+// MatchingName := "__builtin_" + LibcName |
+// "__builtin___" + LibcName + "_chk" |
+// "__asan_" + LibcName
+//
+struct LibcFunNamePrefixSuffixParser {
+ StringRef matchName(StringRef FunName, bool isBuiltin) {
+ // Try to match __builtin_:
+ if (isBuiltin && FunName.starts_with("__builtin_"))
+ // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
+ // no match:
+ return matchLibcNameOrBuiltinChk(
+ FunName.drop_front(10 /* truncate "__builtin_" */));
+ // Try to match __asan_:
+ if (FunName.starts_with("__asan_"))
+ return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */));
+ return matchLibcName(FunName);
+ }
+
+ // Parameter `Name` is the substring after stripping off the prefix
+ // "__builtin_".
+ StringRef matchLibcNameOrBuiltinChk(StringRef Name) {
+ if (Name.starts_with("__") && Name.ends_with("_chk"))
+ return matchLibcName(
+ Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
+ return matchLibcName(Name);
+ }
+
+ StringRef matchLibcName(StringRef Name) {
+ if (Name.ends_with("_s"))
+ return Name.drop_back(2 /* truncate "_s" */);
+ return Name;
+ }
+};
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+ if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
+ return true;
+ if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts()))
+ return true;
+ if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
+ const CXXMethodDecl *MD = MCE->getMethodDecl();
+ const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+ if (MD && RD && RD->isInStdNamespace())
+ if (MD->getName() == "c_str" && RD->getName() == "basic_string")
+ return true;
+ }
+ return false;
+}
+
+// Return true iff at least one of following cases holds:
+// 1. Format string is a literal and there is an unsafe pointer argument
+// corresponding to an `s` specifier;
+// 2. Format string is not a literal and there is least an unsafe pointer
+// argument (including the formatter argument).
+//
+// `UnsafeArg` is the output argument that will be set only if this function
+// returns true.
+static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
+ const unsigned FmtArgIdx, ASTContext &Ctx,
+ bool isKprintf = false) {
+ class StringFormatStringHandler
+ : public analyze_format_string::FormatStringHandler {
+ const CallExpr *Call;
+ unsigned FmtArgIdx;
+ const Expr *&UnsafeArg;
+
+ public:
+ StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx,
+ const Expr *&UnsafeArg)
+ : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {}
+
+ bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen,
+ const TargetInfo &Target) override {
+ if (FS.getConversionSpecifier().getKind() ==
+ analyze_printf::PrintfConversionSpecifier::sArg) {
+ unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
+
+ if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
+ if (!isNullTermPointer(Call->getArg(ArgIdx))) {
+ UnsafeArg = Call->getArg(ArgIdx); // output
+ // returning false stops parsing immediately
+ return false;
+ }
+ }
+ return true; // continue parsing
+ }
+ };
+
+ const Expr *Fmt = Call->getArg(FmtArgIdx);
+
+ if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) {
+ StringRef FmtStr = SL->getString();
+ StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg);
+
+ return analyze_format_string::ParsePrintfString(
+ Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
+ Ctx.getTargetInfo(), isKprintf);
+ }
+ // If format is not a string literal, we cannot analyze the format string.
+ // In this case, this call is considered unsafe if at least one argument
+ // (including the format argument) is unsafe pointer.
+ return llvm::any_of(
+ llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
+ [&UnsafeArg](const Expr *Arg) -> bool {
+ if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg)) {
+ UnsafeArg = Arg;
+ return true;
+ }
+ return false;
+ });
+}
+
+// Matches a FunctionDecl node such that
+// 1. It's name, after stripping off predefined prefix and suffix, is
+// `CoreName`; and
+// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
+// is a set of libc function names.
+//
+// Note: For predefined prefix and suffix, see `LibcFunNamePrefixSuffixParser`.
+// The notation `CoreName[str/wcs]` means a new name obtained from replace
+// string "wcs" with "str" in `CoreName`.
+AST_MATCHER(FunctionDecl, isPredefinedUnsafeLibcFunc) {
+ static std::unique_ptr<std::set<StringRef>> PredefinedNames = nullptr;
+ if (!PredefinedNames)
+ PredefinedNames =
+ std::make_unique<std::set<StringRef>, std::set<StringRef>>({
+ // numeric conversion:
+ "atof",
+ "atoi",
+ "atol",
+ "atoll",
+ "strtol",
+ "strtoll",
+ "strtoul",
+ "strtoull",
+ "strtof",
+ "strtod",
+ "strtold",
+ "strtoimax",
+ "strtoumax",
+ // "strfromf", "strfromd", "strfroml", // C23?
+ // string manipulation:
+ "strcpy",
+ "strncpy",
+ "strlcpy",
+ "strcat",
+ "strncat",
+ "strlcat",
+ "strxfrm",
+ "strdup",
+ "strndup",
+ // string examination:
+ "strlen",
+ "strnlen",
+ "strcmp",
+ "strncmp",
+ "stricmp",
+ "strcasecmp",
+ "strcoll",
+ "strchr",
+ "strrchr",
+ "strspn",
+ "strcspn",
+ "strpbrk",
+ "strstr",
+ "strtok",
+ // "mem-" functions
+ "memchr",
+ "wmemchr",
+ "memcmp",
+ "wmemcmp",
+ "memcpy",
+ "memccpy",
+ "mempcpy",
+ "wmemcpy",
+ "memmove",
+ "wmemmove",
+ "memset",
+ "wmemset",
+ // IO:
+ "fread",
+ "fwrite",
+ "fgets",
+ "fgetws",
+ "gets",
+ "fputs",
+ "fputws",
+ "puts",
+ // others
+ "strerror_s",
+ "strerror_r",
+ "bcopy",
+ "bzero",
+ "bsearch",
+ "qsort",
+ });
+
+ auto *II = Node.getIdentifier();
+
+ if (!II)
+ return false;
+
+ StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+ II->getName(), Node.getBuiltinID());
+
+ // Match predefined names:
+ if (PredefinedNames->find(Name) != PredefinedNames->end())
+ return true;
+
+ std::string NameWCS = Name.str();
+ size_t WcsPos = NameWCS.find("wcs");
+
+ while (WcsPos != std::string::npos) {
+ NameWCS[WcsPos++] = 's';
+ NameWCS[WcsPos++] = 't';
+ NameWCS[WcsPos++] = 'r';
+ WcsPos = NameWCS.find("wcs", WcsPos);
+ }
+ if (PredefinedNames->find(NameWCS) != PredefinedNames->end())
+ return true;
+ // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They
+ // all should end with "scanf"):
+ return Name.ends_with("scanf");
+}
+
+// Match a call to one of the `v*printf` functions taking `va_list`. We cannot
+// check safety for these functions so they should be changed to their
+// non-va_list versions.
+AST_MATCHER(FunctionDecl, isUnsafeVaListPrintfFunc) {
+ auto *II = Node.getIdentifier();
+
+ if (!II)
+ return false;
+
+ StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+ II->getName(), Node.getBuiltinID());
+
+ if (!Name.ends_with("printf"))
+ return false; // neither printf nor scanf
+ return Name.starts_with("v");
+}
+
+// Matches a call to one of the `sprintf` functions as they are always unsafe
+// and should be changed to `snprintf`.
+AST_MATCHER(FunctionDecl, isUnsafeSprintfFunc) {
+ auto *II = Node.getIdentifier();
+
+ if (!II)
+ return false;
+
+ StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+ II->getName(), Node.getBuiltinID());
+
+ if (!Name.ends_with("printf") ||
+ // Let `isUnsafeVaListPrintfFunc` check for cases with va-list:
+ Name.starts_with("v"))
+ return false;
+
+ StringRef Prefix = Name.drop_back(6);
+
+ if (Prefix.ends_with("w"))
+ Prefix = Prefix.drop_back(1);
+ return Prefix == "s";
+}
+
+// Match function declarations of `printf`, `fprintf`, `snprintf` and their wide
+// character versions. Calls to these functions can be safe if their arguments
+// are carefully made safe.
+AST_MATCHER(FunctionDecl, isNormalPrintfFunc) {
+ auto *II = Node.getIdentifier();
+
+ if (!II)
+ return false;
+
+ StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+ II->getName(), Node.getBuiltinID());
+
+ if (!Name.ends_with("printf") || Name.starts_with("v"))
+ return false;
+
+ StringRef Prefix = Name.drop_back(6);
+
+ if (Prefix.ends_with("w"))
+ Prefix = Prefix.drop_back(1);
+
+ return Prefix.empty() || Prefix == "k" || Prefix == "f" || Prefix == "sn";
+}
+
+// This matcher requires that it is known that the callee `isNormalPrintf`.
+// Then if the format string is a string literal, this matcher matches when at
+// least one string argument is unsafe. If the format is not a string literal,
+// this matcher matches when at least one pointer type argument is unsafe.
+AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
+ clang::ast_matchers::internal::Matcher<Expr>,
+ UnsafeStringArgMatcher) {
+ // Determine what printf it is:
+ const Expr *FirstArg = Node.getArg(0);
+ ASTContext &Ctx = Finder->getASTContext();
+
+ if (isa<StringLiteral>(FirstArg->IgnoreParenImpCasts())) {
+ // It is a printf/kprintf. And, the format is a string literal:
+ bool isKprintf = false;
+ const Expr *UnsafeArg;
+
+ if (auto *Callee = Node.getDirectCallee())
+ if (auto *II = Node.getDirectCallee()->getIdentifier())
+ isKprintf = II->getName() == "kprintf";
+ if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 0, Ctx, isKprintf))
+ return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder);
+ return false;
+ }
+
+ QualType PtrTy = FirstArg->getType();
+
+ assert(PtrTy->isPointerType());
+
+ QualType PteTy = (cast<PointerType>(PtrTy))->getPointeeType();
----------------
hnrklssn wrote:
`cast<PointerType>(PtrTy)` is only safe to do if `isa<PointerType>(PtrTy)` is true. `PtrTy->isPointerType()` is true also for various sugar types that wrap `PointerType`. To unwrap them, use `PtrTy->getAs<PointerType>()`.
https://github.com/llvm/llvm-project/pull/101583
More information about the cfe-commits
mailing list