[clang] Warning Libc functions (PR #101583)
Ziqing Luo via cfe-commits
cfe-commits at lists.llvm.org
Mon Aug 5 10:46:27 PDT 2024
================
@@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
return false;
}
+AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
+ static const std::set<StringRef> PredefinedNames{
+ // numeric conversion:
+ "atof",
+ "atoi",
+ "atol",
+ "atoll",
+ "strtol",
+ "strtoll",
+ "strtoul",
+ "strtoull",
+ "strtof",
+ "strtod",
+ "strtold",
+ "strtoimax",
+ "strtoumax",
+ // "strfromf", "strfromd", "strfroml", // C23?
+ // string manipulation:
+ "strcpy",
+ "strncpy",
+ "strlcpy",
+ "strcat",
+ "strncat",
+ "strlcat",
+ "strxfrm",
+ "strdup",
+ "strndup",
+ // string examination:
+ "strlen",
+ "strnlen",
+ "strcmp",
+ "strncmp",
+ "stricmp",
+ "strcasecmp",
+ "strcoll",
+ "strchr",
+ "strrchr",
+ "strspn",
+ "strcspn",
+ "strpbrk",
+ "strstr",
+ "strtok",
+ // "mem-" functions
+ "memchr",
+ "wmemchr",
+ "memcmp",
+ "wmemcmp",
+ "memcpy",
+ "memccpy",
+ "mempcpy",
+ "wmemcpy",
+ "memmove",
+ "wmemmove",
+ "memset",
+ "wmemset",
+ // IO:
+ "fread",
+ "fwrite",
+ "fgets",
+ "fgetws",
+ "gets",
+ "fputs",
+ "fputws",
+ "puts",
+ // others
+ "strerror_s",
+ "strerror_r",
+ "bcopy",
+ "bzero",
+ "bsearch",
+ "qsort",
+ };
+
+ // A tiny name parser for unsafe libc function names with additional
+ // checks for `printf`s:
+ struct FuncNameMatch {
+ const CallExpr *const Call;
+ FuncNameMatch(const CallExpr *Call) : Call(Call) {}
+
+ // For a name `S` in `PredefinedNames` or a member of the printf/scanf
+ // family, define matching function names with `S` by the grammar below:
+ //
+ // CoreName := S | S["wcs"/"str"]
+ // LibcName := CoreName | CoreName + "_s"
+ // MatchingName := "__builtin_" + LibcName |
+ // "__builtin___" + LibcName + "_chk" |
+ // "__asan_" + LibcName
+ //
+ // (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.)
+ bool matchName(StringRef FunName) {
+ // Try to match __builtin_:
+ if (FunName.starts_with("__builtin_"))
+ // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
+ // no match:
+ return matchLibcNameOrBuiltinChk(
+ FunName.drop_front(10 /* truncate "__builtin_" */));
+ // Try to match __asan_:
+ if (FunName.starts_with("__asan_"))
+ return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */));
+ return matchLibcName(FunName);
+ }
+
+ // Parameter `Name` is the substring after stripping off the prefix
+ // "__builtin_".
+ bool matchLibcNameOrBuiltinChk(StringRef Name) {
+ if (Name.starts_with("__") && Name.ends_with("_chk"))
+ return matchLibcName(
+ Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
+ return matchLibcName(Name);
+ }
+
+ bool matchLibcName(StringRef Name) {
+ if (Name.ends_with("_s"))
+ return matchCoreName(Name.drop_back(2 /* truncate "_s" */));
+ return matchCoreName(Name);
+ }
+
+ bool matchCoreName(StringRef Name) {
+ if (PredefinedNames.find(Name.str()) != PredefinedNames.end())
+ return !isSafeStrlen(Name); // match unless it's a safe strlen call
+
+ std::string NameWCS = Name.str();
+ size_t WcsPos = NameWCS.find("wcs");
+
+ while (WcsPos != std::string::npos) {
+ NameWCS[WcsPos++] = 's';
+ NameWCS[WcsPos++] = 't';
+ NameWCS[WcsPos++] = 'r';
+ WcsPos = NameWCS.find("wcs", WcsPos);
+ }
+ if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
+ return !isSafeStrlen(NameWCS);
+ return matchPrintfOrScanfFamily(Name);
+ }
+
+ bool matchPrintfOrScanfFamily(StringRef Name) {
+ if (Name.ends_with("scanf"))
+ return true; // simply warn about scanf functions
+ if (!Name.ends_with("printf"))
+ return false; // neither printf nor scanf
+ if (Name.starts_with("v"))
+ // cannot check args for va_list, so `vprintf`s are treated as regular
+ // unsafe libc calls:
+ return true;
+
+ // Truncate "printf", focus on prefixes. There are different possible
+ // name prefixes: "k", "f", "s", "sn", "fw", ..., "snw". We strip off the
+ // 'w' and handle printfs differently by "k", "f", "s", "sn" or no prefix:
+ StringRef Prefix = Name.drop_back(6);
+
+ if (Prefix.ends_with("w"))
+ Prefix = Prefix.drop_back(1);
+ return isUnsafePrintf(Prefix);
+ }
+
+ // A pointer type expression is known to be null-terminated, if it has the
+ // form: E.c_str(), for any expression E of `std::string` type.
+ static bool isNullTermPointer(const Expr *Ptr) {
+ if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
+ return true;
+ if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
+ const CXXMethodDecl *MD = MCE->getMethodDecl();
+ const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+ if (MD && RD && RD->isInStdNamespace())
+ if (MD->getName() == "c_str" && RD->getName() == "basic_string")
+ return true;
+ }
+ return false;
+ }
+
+ // Check safe patterns for printfs w.r.t their prefixes:
+ bool isUnsafePrintf(StringRef Prefix /* empty, 'k', 'f', 's', or 'sn' */) {
+ auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool {
+ return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+ };
+
+ if (Prefix.empty() || Prefix == "k") // printf: all pointer args should be null-terminated
+ return llvm::any_of(Call->arguments(), AnyUnsafeStrPtr);
+
+ if (Prefix == "f" && Call->getNumArgs() > 1) {
+ // fprintf, same as printf but skip the first arg
+ auto Range = llvm::make_range(Call->arg_begin() + 1, Call->arg_end());
+ return llvm::any_of(Range, AnyUnsafeStrPtr);
+ }
+ if (Prefix == "sn" && Call->getNumArgs() > 2) {
+ // The first two arguments need to be in safe patterns, which is checked
+ // by `isSafeSizedby`:
+ auto Range = llvm::make_range(Call->arg_begin() + 2, Call->arg_end());
+ return !isSafeSizedby(*Call->arg_begin(), *(Call->arg_begin() + 1)) ||
+ llvm::any_of(Range, AnyUnsafeStrPtr);
+ }
+ // Note `sprintf`s should be changed to `snprintf`s, so they are treated
+ // as regular unsafe libc calls:
+ return true;
+ }
+
+ // Checks if the two Exprs `SizedByPtr` and `Size` are in the pattern:
+ // SizedByPtr := DRE.data()
+ // Size := DRE.size_bytes(), for a same DRE of sized-container/view
+ // type.
+ bool isSafeSizedby(const Expr *SizedByPtr, const Expr *Size) {
+ static StringRef SizedObjs[] = {"span", "array", "vector",
+ "basic_string_view", "basic_string"};
+ if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(SizedByPtr))
+ if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) {
+ if (auto *DREPtr =
+ dyn_cast<DeclRefExpr>(MCEPtr->getImplicitObjectArgument()))
+ if (auto *DRESize =
+ dyn_cast<DeclRefExpr>(MCESize->getImplicitObjectArgument()))
+ if (DREPtr->getDecl() ==
+ DRESize->getDecl()) // coming out of the same variable
+ if (MCEPtr->getMethodDecl()->getName() == "data")
+ if (MCESize->getMethodDecl()->getName() == "size_bytes")
+ for (StringRef SizedObj : SizedObjs)
+ if (MCEPtr->getRecordDecl()->isInStdNamespace() &&
+ MCEPtr->getRecordDecl()
+ ->getCanonicalDecl()
+ ->getName() == SizedObj)
+ return true;
+ }
+ return false;
+ }
+
+ // This is safe: strlen("hello"). We don't want to be noisy on this case.
+ bool isSafeStrlen(StringRef Name) {
+ return Name == "strlen" && Call->getNumArgs() == 1 &&
+ isa<StringLiteral>(Call->getArg(0)->IgnoreParenImpCasts());
+ }
+ } FuncNameMatch{&Node};
+
+ const FunctionDecl *FD = Node.getDirectCallee();
+ const IdentifierInfo *II;
+
+ if (!FD)
+ return false;
+ II = FD->getIdentifier();
+ // If this is a special C++ name without IdentifierInfo, it can't be a
+ // C library function.
+ if (!II)
+ return false;
+
+ // Look through 'extern "C"' and anything similar invented in the future.
+ // If this function is not in TU directly, it is not a C library function.
+ if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit())
+ return false;
+
+ // If this function is not externally visible, it is not a C library function.
+ // Note that we make an exception for inline functions, which may be
+ // declared in header files without external linkage.
+ if (!FD->isInlined() && !FD->isExternallyVisible())
----------------
ziqingluo-90 wrote:
I just blindly bought what the comment says. Let me know if you think this check is actually problematic 🤔.
https://github.com/llvm/llvm-project/pull/101583
More information about the cfe-commits
mailing list