[clang] Warning Libc functions (PR #101583)

Artem Dergachev via cfe-commits cfe-commits at lists.llvm.org
Tue Aug 13 15:16:38 PDT 2024


================
@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+namespace libc_fun_disjoint_inner_matchers {
+// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match
+// disjoint node sets.   They all take a `CoreName`, which is the substring of a
+// function name after `ignoreLibcPrefixAndSuffix`.  They are suppose to be used
+// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different
+// libc function calls.
+
+// Matches a function call node such that
+//  1. It's name, after stripping off predefined prefix and suffix, is
+//     `CoreName`; and
+//  2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
+//     is a set of libc function names.
+//
+//  Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`.
+//  And, the notation `CoreName[str/wcs]` means a new name obtained from replace
+//  string "wcs" with "str" in `CoreName`.
+//
+//  Also note, the set of predefined function names does not include `printf`
+//  functions, they are checked exclusively with other matchers below.
+//  Maintaining the invariant that all matchers under
+//  `libc_fun_disjoint_inner_matchers` are disjoint.
+AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
+  static const std::set<StringRef> PredefinedNames{
+      // numeric conversion:
+      "atof",
+      "atoi",
+      "atol",
+      "atoll",
+      "strtol",
+      "strtoll",
+      "strtoul",
+      "strtoull",
+      "strtof",
+      "strtod",
+      "strtold",
+      "strtoimax",
+      "strtoumax",
+      // "strfromf",  "strfromd", "strfroml", // C23?
+      // string manipulation:
+      "strcpy",
+      "strncpy",
+      "strlcpy",
+      "strcat",
+      "strncat",
+      "strlcat",
+      "strxfrm",
+      "strdup",
+      "strndup",
+      // string examination:
+      "strlen",
+      "strnlen",
+      "strcmp",
+      "strncmp",
+      "stricmp",
+      "strcasecmp",
+      "strcoll",
+      "strchr",
+      "strrchr",
+      "strspn",
+      "strcspn",
+      "strpbrk",
+      "strstr",
+      "strtok",
+      // "mem-" functions
+      "memchr",
+      "wmemchr",
+      "memcmp",
+      "wmemcmp",
+      "memcpy",
+      "memccpy",
+      "mempcpy",
+      "wmemcpy",
+      "memmove",
+      "wmemmove",
+      "memset",
+      "wmemset",
+      // IO:
+      "fread",
+      "fwrite",
+      "fgets",
+      "fgetws",
+      "gets",
+      "fputs",
+      "fputws",
+      "puts",
+      // others
+      "strerror_s",
+      "strerror_r",
+      "bcopy",
+      "bzero",
+      "bsearch",
+      "qsort",
+  };
+  // This is safe: strlen("hello").  We don't want to be noisy on this case.
+  auto isSafeStrlen = [&Node](StringRef Name) -> bool {
+    return Name == "strlen" && Node.getNumArgs() == 1 &&
+           isa<StringLiteral>(Node.getArg(0)->IgnoreParenImpCasts());
+  };
+
+  // Match predefined names:
+  if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end())
+    return !isSafeStrlen(CoreName);
+
+  std::string NameWCS = CoreName.str();
+  size_t WcsPos = NameWCS.find("wcs");
+
+  while (WcsPos != std::string::npos) {
+    NameWCS[WcsPos++] = 's';
+    NameWCS[WcsPos++] = 't';
+    NameWCS[WcsPos++] = 'r';
+    WcsPos = NameWCS.find("wcs", WcsPos);
+  }
+  if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
+    return !isSafeStrlen(NameWCS);
+  // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They
+  // all should end with "scanf"):
+  return CoreName.ends_with("scanf");
+}
+
+// Match a call to one of the `-printf` functions taking `va_list`.  We cannot
+// check safety for these functions so they should be changed to their
+// non-va_list versions.
+AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf"))
+    return false; // neither printf nor scanf
+  return Name.starts_with("v");
+}
+
+// Matches a call to one of the `-sprintf` functions (excluding the ones with
+// va_list) as they are always unsafe and should be changed to corresponding
+// `-snprintf`s.
+AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf") || Name.starts_with("v"))
+    return false;
+
+  StringRef Prefix = Name.drop_back(6);
+
+  if (Prefix.ends_with("w"))
+    Prefix = Prefix.drop_back(1);
+  return Prefix == "s";
+}
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+  if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
+    return true;
+  if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts()))
+    return true;
+  if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
+    const CXXMethodDecl *MD = MCE->getMethodDecl();
+    const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+    if (MD && RD && RD->isInStdNamespace())
+      if (MD->getName() == "c_str" && RD->getName() == "basic_string")
+        return true;
+  }
+  return false;
+}
+
+// Return true iff at least one of following cases holds:
+//  1. Format string is a literal and there is an unsafe pointer argument
+//     corresponding to an `s` specifier;
+//  2. Format string is not a literal and there is least an unsafe pointer
+//     argument (including the formatter argument).
+static bool hasUnsafeFormatOrSArg(const Expr *Fmt, unsigned FmtArgIdx,
+                                  const CallExpr *Call, ASTContext &Ctx) {
+  if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) {
+    StringRef FmtStr = SL->getString();
+    auto I = FmtStr.begin();
+    auto E = FmtStr.end();
+    unsigned ArgIdx = FmtArgIdx;
+
+    do {
+      ArgIdx = analyze_format_string::ParseFormatStringFirstSArgIndex(
+          I, E, ArgIdx, Ctx.getLangOpts(), Ctx.getTargetInfo());
+      if (ArgIdx && Call->getNumArgs() > ArgIdx &&
+          !isNullTermPointer(Call->getArg(ArgIdx)))
+        return true;
+    } while (ArgIdx);
+    return false;
+  }
+  // If format is not a string literal, we cannot analyze the format string.
+  // In this case, this call is considered unsafe if at least one argument
+  // (including the format argument) is unsafe pointer.
+  return llvm::any_of(
+      llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
+      [](const Expr *Arg) {
+        return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+      });
+}
+
+// Matches a call to one of the `-printf" functions (excluding the ones with
+// va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but
+// fail to guarantee their null-termination.  In other words, these calls are
+// safe if they use null-termination guaranteed string pointers.
+AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf") || Name.starts_with("v"))
+    return false;
+
+  StringRef Prefix = Name.drop_back(6);
+
+  if (Prefix.ends_with("w"))
+    Prefix = Prefix.drop_back(1);
+
+  if (Prefix.empty() ||
+      Prefix == "k") // printf: all pointer args should be null-terminated
+    return hasUnsafeFormatOrSArg(Node.getArg(0), 0, &Node,
+                                 Finder->getASTContext());
+  if (Prefix == "f")
+    return hasUnsafeFormatOrSArg(Node.getArg(1), 1, &Node,
+                                 Finder->getASTContext());
+  if (Prefix == "sn") {
+    // The first two arguments need to be in safe patterns, which is checked
+    // by `isSafeSizedby`:
+    return hasUnsafeFormatOrSArg(Node.getArg(2), 2, &Node,
+                                 Finder->getASTContext());
+  }
+  return false; // A call to a "-printf" falls into another category.
+}
+
+// Matches a call to one of the `snprintf` functions (excluding the ones with
+// va_list) such that the first two arguments fail to conform to safe patterns.
+//
+// For the first two arguments: `ptr` and `size`, they are safe if in the
+// following patterns:
+//    ptr := DRE.data();
+//    size:= DRE.size()/DRE.size_bytes()
+// And DRE is a hardened container or view.
+AST_MATCHER_P(CallExpr, unsafeSizedByInSnprintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf") || Name.starts_with("v"))
+    return false; // not snprint or use va_list
+
+  StringRef Prefix = Name.drop_back(6);
+
+  if (Prefix.ends_with("w"))
+    Prefix = Prefix.drop_back(1);
+
+  if (Prefix != "sn")
+    return false; // not snprint
+
+  static StringRef SizedObjs[] = {"span", "array", "vector",
+                                  "basic_string_view", "basic_string"};
+  const Expr *CharPtr = (*Node.arg_begin())->IgnoreParenImpCasts();
+  const Expr *Size = (*(Node.arg_begin() + 1))->IgnoreParenImpCasts();
+
+  if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(CharPtr))
+    if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) {
+      auto *DREOfPtr = dyn_cast<DeclRefExpr>(
+          MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts());
+      auto *DREOfSize = dyn_cast<DeclRefExpr>(
+          MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts());
+
+      if (!DREOfPtr || !DREOfSize)
+        return true; // not in safe pattern
+      if (DREOfPtr->getDecl() != DREOfSize->getDecl())
+        return true; // not in safe pattern
+      if (MCEPtr->getMethodDecl()->getName() != "data")
+        return true; // not in safe pattern
+
+      if (MCESize->getMethodDecl()->getName() == "size_bytes" ||
+          // Note here the pointer must be a pointer-to-char type unless there
+          // is explicit casting.  If there is explicit casting, this branch
+          // is unreachable. Thus, at this branch "size" and "size_bytes" are
+          // equivalent as the pointer is a char pointer:
+          MCESize->getMethodDecl()->getName() == "size")
+        for (StringRef SizedObj : SizedObjs)
+          if (MCEPtr->getRecordDecl()->isInStdNamespace() &&
+              MCEPtr->getRecordDecl()->getCanonicalDecl()->getName() ==
+                  SizedObj)
+            return false; // It is in fact safe
+    }
+  return true; // ptr and size are not in safe pattern
+}
+} // namespace libc_fun_disjoint_inner_matchers
+
+// Match call to a function whose name may have prefixes like "__builtin_" or
+// "__asan_" and suffixes like "_s" or "_chk".  This matcher takes an argument,
+// which should be applied to the core name---the subtring after stripping off
+// prefix and suffix of the function name.
+// The application results in an inner matcher that matches the call node with
+// respect to the core name of the callee.
+AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix,
+              std::function<internal::Matcher<CallExpr>(StringRef)>,
----------------
haoNoQ wrote:

Looks like you're dynamically generating the inner matcher in the middle of the match? I probably need to process this more, and it's likely that there could be a more "compile-time" way to do this, let me think 🤔

https://github.com/llvm/llvm-project/pull/101583


More information about the cfe-commits mailing list