[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

Ziqing Luo via cfe-commits cfe-commits at lists.llvm.org
Wed Aug 28 17:20:19 PDT 2024


https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583

>From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/5] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.

(rdar://117182250)
---
 .../Analysis/Analyses/UnsafeBufferUsage.h     |  12 +
 .../Analyses/UnsafeBufferUsageGadgets.def     |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td        |   7 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp      | 408 +++++++++++++++++-
 clang/lib/Sema/AnalysisBasedWarnings.cpp      |  12 +
 ...-usage-libc-functions-inline-namespace.cpp |  60 +++
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  86 ++++
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 8 files changed, 586 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e115..3e0fae6db7562d 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/Support/Debug.h"
@@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler {
   virtual void handleUnsafeOperation(const Stmt *Operation,
                                      bool IsRelatedToDecl, ASTContext &Ctx) = 0;
 
+  /// Invoked when a call to an unsafe libc function is found.
+  /// \param PrintfInfo
+  ///  is 0 if the callee function is not a member of the printf family;
+  ///  is 1 if the callee is `sprintf`;
+  ///  is 2 if arguments of the call have `__size_by` relation but are not in a
+  ///  safe pattern;
+  ///  is 3 if string arguments do not guarantee null-termination
+  ///  is 4 if the callee takes va_list
+  virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+                                    ASTContext &Ctx) = 0;
+
   /// Invoked when an unsafe operation with a std container is found.
   virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
                                                 bool IsRelatedToDecl,
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b9..ac01b285ae833b 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)`
 FIXABLE_GADGET(ULCArraySubscript)          // `DRE[any]` in an Unspecified Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 554dbaff2ce0d8..7e1e3686ce6554 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">,
   InGroup<UnsafeBufferUsage>, DefaultIgnore;
+def warn_unsafe_buffer_libc_call : Warning<
+  "function %0 introduces unsafe buffer access">,
+  InGroup<UnsafeBufferUsage>, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match"
+          "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination"
+          "| do not use va_list that cannot be checked at compile-time for bounds safety}0">;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
 def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b6..751fb75f6ed602 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,24 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
+#include <functional>
 #include <memory>
 #include <optional>
 #include <queue>
@@ -443,6 +447,337 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+namespace libc_fun_disjoint_inner_matchers {
+// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match
+// disjoint node sets.   They all take a `CoreName`, which is the substring of a
+// function name after `ignoreLibcPrefixAndSuffix`.  They are suppose to be used
+// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different
+// libc function calls.
+
+// Matches a function call node such that
+//  1. It's name, after stripping off predefined prefix and suffix, is
+//     `CoreName`; and
+//  2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
+//     is a set of libc function names.
+//
+//  Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`.
+//  And, the notation `CoreName[str/wcs]` means a new name obtained from replace
+//  string "wcs" with "str" in `CoreName`.
+//
+//  Also note, the set of predefined function names does not include `printf`
+//  functions, they are checked exclusively with other matchers below.
+//  Maintaining the invariant that all matchers under
+//  `libc_fun_disjoint_inner_matchers` are disjoint.
+AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
+  static const std::set<StringRef> PredefinedNames{
+      // numeric conversion:
+      "atof",
+      "atoi",
+      "atol",
+      "atoll",
+      "strtol",
+      "strtoll",
+      "strtoul",
+      "strtoull",
+      "strtof",
+      "strtod",
+      "strtold",
+      "strtoimax",
+      "strtoumax",
+      // "strfromf",  "strfromd", "strfroml", // C23?
+      // string manipulation:
+      "strcpy",
+      "strncpy",
+      "strlcpy",
+      "strcat",
+      "strncat",
+      "strlcat",
+      "strxfrm",
+      "strdup",
+      "strndup",
+      // string examination:
+      "strlen",
+      "strnlen",
+      "strcmp",
+      "strncmp",
+      "stricmp",
+      "strcasecmp",
+      "strcoll",
+      "strchr",
+      "strrchr",
+      "strspn",
+      "strcspn",
+      "strpbrk",
+      "strstr",
+      "strtok",
+      // "mem-" functions
+      "memchr",
+      "wmemchr",
+      "memcmp",
+      "wmemcmp",
+      "memcpy",
+      "memccpy",
+      "mempcpy",
+      "wmemcpy",
+      "memmove",
+      "wmemmove",
+      "memset",
+      "wmemset",
+      // IO:
+      "fread",
+      "fwrite",
+      "fgets",
+      "fgetws",
+      "gets",
+      "fputs",
+      "fputws",
+      "puts",
+      // others
+      "strerror_s",
+      "strerror_r",
+      "bcopy",
+      "bzero",
+      "bsearch",
+      "qsort",
+  };
+  // This is safe: strlen("hello").  We don't want to be noisy on this case.
+  auto isSafeStrlen = [&Node](StringRef Name) -> bool {
+    return Name == "strlen" && Node.getNumArgs() == 1 &&
+           isa<StringLiteral>(Node.getArg(0)->IgnoreParenImpCasts());
+  };
+
+  // Match predefined names:
+  if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end())
+    return !isSafeStrlen(CoreName);
+
+  std::string NameWCS = CoreName.str();
+  size_t WcsPos = NameWCS.find("wcs");
+
+  while (WcsPos != std::string::npos) {
+    NameWCS[WcsPos++] = 's';
+    NameWCS[WcsPos++] = 't';
+    NameWCS[WcsPos++] = 'r';
+    WcsPos = NameWCS.find("wcs", WcsPos);
+  }
+  if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
+    return !isSafeStrlen(NameWCS);
+  // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They
+  // all should end with "scanf"):
+  return CoreName.ends_with("scanf");
+}
+
+// Match a call to one of the `-printf` functions taking `va_list`.  We cannot
+// check safety for these functions so they should be changed to their
+// non-va_list versions.
+AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf"))
+    return false; // neither printf nor scanf
+  return Name.starts_with("v");
+}
+
+// Matches a call to one of the `-sprintf` functions (excluding the ones with
+// va_list) as they are always unsafe and should be changed to corresponding
+// `-snprintf`s.
+AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf") || Name.starts_with("v"))
+    return false;
+
+  StringRef Prefix = Name.drop_back(6);
+
+  if (Prefix.ends_with("w"))
+    Prefix = Prefix.drop_back(1);
+  return Prefix == "s";
+}
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+  if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
+    return true;
+  if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts()))
+    return true;
+  if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
+    const CXXMethodDecl *MD = MCE->getMethodDecl();
+    const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+    if (MD && RD && RD->isInStdNamespace())
+      if (MD->getName() == "c_str" && RD->getName() == "basic_string")
+        return true;
+  }
+  return false;
+}
+
+// Matches a call to one of the `-printf" functions (excluding the ones with
+// va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but
+// fail to guarantee their null-termination.  In other words, these calls are
+// safe if they use null-termination guaranteed string pointers.
+AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf") || Name.starts_with("v"))
+    return false;
+
+  StringRef Prefix = Name.drop_back(6);
+
+  if (Prefix.ends_with("w"))
+    Prefix = Prefix.drop_back(1);
+
+  auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool {
+    return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+  };
+
+  if (Prefix.empty() ||
+      Prefix == "k") // printf: all pointer args should be null-terminated
+    return any_of(Node.arguments(), AnyUnsafeStrPtr);
+  if (Prefix == "f" && Node.getNumArgs() > 1)
+    return any_of(llvm::make_range(Node.arg_begin() + 1, Node.arg_end()),
+                  AnyUnsafeStrPtr);
+  if (Prefix == "sn" && Node.getNumArgs() > 2) {
+    return any_of(llvm::make_range(Node.arg_begin() + 2, Node.arg_end()),
+                  AnyUnsafeStrPtr);
+  }
+  return false; // A call to a "-printf" falls into another category.
+}
+
+// Matches a call to one of the `snprintf` functions (excluding the ones with
+// va_list) such that the first two arguments fail to conform to safe patterns.
+//
+// For the first two arguments: `ptr` and `size`, they are safe if in the
+// following patterns:
+//    ptr := DRE.data();
+//    size:= DRE.size()/DRE.size_bytes()
+// And DRE is a hardened container or view.
+AST_MATCHER_P(CallExpr, unsafeSizedByInSnprintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf") || Name.starts_with("v"))
+    return false; // not snprint or use va_list
+
+  StringRef Prefix = Name.drop_back(6);
+
+  if (Prefix.ends_with("w"))
+    Prefix = Prefix.drop_back(1);
+
+  if (Prefix != "sn")
+    return false; // not snprint
+
+  static StringRef SizedObjs[] = {"span", "array", "vector",
+                                  "basic_string_view", "basic_string"};
+  const Expr *CharPtr = (*Node.arg_begin())->IgnoreParenImpCasts();
+  const Expr *Size = (*(Node.arg_begin() + 1))->IgnoreParenImpCasts();
+
+  if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(CharPtr))
+    if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) {
+      auto *DREOfPtr = dyn_cast<DeclRefExpr>(
+          MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts());
+      auto *DREOfSize = dyn_cast<DeclRefExpr>(
+          MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts());
+
+      if (!DREOfPtr || !DREOfSize)
+        return true; // not in safe pattern
+      if (DREOfPtr->getDecl() != DREOfSize->getDecl())
+        return true; // not in safe pattern
+      if (MCEPtr->getMethodDecl()->getName() != "data")
+        return true; // not in safe pattern
+
+      if (MCESize->getMethodDecl()->getName() == "size_bytes" ||
+          // Note here the pointer must be a pointer-to-char type unless there
+          // is explicit casting.  If there is explicit casting, this branch
+          // is unreachable. Thus, at this branch "size" and "size_bytes" are
+          // equivalent as the pointer is a char pointer:
+          MCESize->getMethodDecl()->getName() == "size")
+        for (StringRef SizedObj : SizedObjs)
+          if (MCEPtr->getRecordDecl()->isInStdNamespace() &&
+              MCEPtr->getRecordDecl()->getCanonicalDecl()->getName() ==
+                  SizedObj)
+            return false; // It is in fact safe
+    }
+  return true; // ptr and size are not in safe pattern
+}
+} // namespace libc_fun_disjoint_inner_matchers
+
+// Match call to a function whose name may have prefixes like "__builtin_" or
+// "__asan_" and suffixes like "_s" or "_chk".  This matcher takes an argument,
+// which should be applied to the core name---the subtring after stripping off
+// prefix and suffix of the function name.
+// The application results in an inner matcher that matches the call node with
+// respect to the core name of the callee.
+AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix,
+              std::function<internal::Matcher<CallExpr>(StringRef)>,
+              InnerMatcher) {
+  //  Given a function name, returns its core name `CoreName` according to the
+  //  following grammar.
+  //
+  //  LibcName     := CoreName | CoreName + "_s"
+  //  MatchingName := "__builtin_" + LibcName              |
+  //                  "__builtin___" + LibcName + "_chk"   |
+  //                  "__asan_" + LibcName
+  //
+  struct NameParser {
+    StringRef matchName(StringRef FunName, bool isBuiltin) {
+      // Try to match __builtin_:
+      if (isBuiltin && FunName.starts_with("__builtin_"))
+        // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
+        // no match:
+        return matchLibcNameOrBuiltinChk(
+            FunName.drop_front(10 /* truncate "__builtin_" */));
+      // Try to match __asan_:
+      if (FunName.starts_with("__asan_"))
+        return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */));
+      return matchLibcName(FunName);
+    }
+
+    // Parameter `Name` is the substring after stripping off the prefix
+    // "__builtin_".
+    StringRef matchLibcNameOrBuiltinChk(StringRef Name) {
+      if (Name.starts_with("__") && Name.ends_with("_chk"))
+        return matchLibcName(
+            Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
+      return matchLibcName(Name);
+    }
+
+    StringRef matchLibcName(StringRef Name) {
+      if (Name.ends_with("_s"))
+        return Name.drop_back(2 /* truncate "_s" */);
+      return Name;
+    }
+  } TheLittleParser;
+
+  const FunctionDecl *FD = Node.getDirectCallee();
+  const IdentifierInfo *II;
+
+  if (!FD)
+    return false;
+  II = FD->getIdentifier();
+  // If this is a special C++ name without IdentifierInfo, it can't be a
+  // C library function.
+  if (!II)
+    return false;
+
+  // Look through 'extern "C"' and anything similar invented in the future.
+  // In general, C library functions will be in the TU directly.
+  if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
+    // If that's not the case, we also consider "C functions" re-declared in
+    // `std` namespace.
+    if (!FD->getDeclContext()->getRedeclContext()->isStdNamespace())
+      return false;
+  }
+
+  // If this function is not externally visible, it is not a C library function.
+  // Note that we make an exception for inline functions, which may be
+  // declared in header files without external linkage.
+  if (!FD->isInlined() && !FD->isExternallyVisible())
+    return false;
+
+  StringRef CoreName =
+      TheLittleParser.matchName(II->getName(), FD->getBuiltinID());
+      
+  return InnerMatcher(CoreName).matches(Node, Finder, Builder);
+}
 } // namespace clang::ast_matchers
 
 namespace {
@@ -1025,6 +1360,77 @@ class DataInvocationGadget : public WarningGadget {
   DeclUseList getClaimedVarUseSites() const override { return {}; }
 };
 
+class UnsafeLibcFunctionCallGadget : public WarningGadget {
+  const CallExpr *const Call;
+  constexpr static const char *const Tag = "UnsafeLibcFunctionCall";
+  // Extra tags for additional information:
+  constexpr static const char *const UnsafeSprintfTag =
+      "UnsafeLibcFunctionCall_sprintf";
+  constexpr static const char *const UnsafeSizedByTag =
+      "UnsafeLibcFunctionCall_sized_by";
+  constexpr static const char *const UnsafeStringTag =
+      "UnsafeLibcFunctionCall_string";
+  constexpr static const char *const UnsafeVaListTag =
+      "UnsafeLibcFunctionCall_va_list";
+
+  enum UnsafeKind {
+    OTHERS = 0,   // no specific information, the callee function is unsafe
+    SPRINTF = 1,  // never call `-sprintf`s, call `-snprintf`s instead.
+    SIZED_BY = 2, // a pair of function arguments have "__sized_by" relation but
+                  // they do not conform to safe patterns
+    STRING = 3,   // an argument is a pointer-to-char-as-string but does not
+                  // guarantee null-termination
+    VA_LIST = 4,  // one of the `-printf`s function that take va_list, which is
+                  // considered unsafe as it is not compile-time check
+  } WarnedFunKind = OTHERS;
+
+public:
+  UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result)
+      : WarningGadget(Kind::UnsafeLibcFunctionCall),
+        Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {
+    if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_sprintf"))
+      WarnedFunKind = SPRINTF;
+    else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_string"))
+      WarnedFunKind = STRING;
+    else if (Result.Nodes.getNodeAs<CallExpr>(
+                 "UnsafeLibcFunctionCall_sized_by"))
+      WarnedFunKind = SIZED_BY;
+    else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_va_list"))
+      WarnedFunKind = VA_LIST;
+  }
+
+  static Matcher matcher() {
+    auto anyOfLibcInnerMatcher = [](StringRef S) {
+      return anyOf(
+          libc_fun_disjoint_inner_matchers::predefinedUnsafeLibcFunCall(S),
+          callExpr(libc_fun_disjoint_inner_matchers::unsafeStringInPrintfs(S))
+              .bind(UnsafeStringTag),
+          callExpr(
+              libc_fun_disjoint_inner_matchers::unsafeSizedByInSnprintfs(S))
+              .bind(UnsafeSizedByTag),
+          callExpr(libc_fun_disjoint_inner_matchers::unsafeSprintfs(S))
+              .bind(UnsafeSprintfTag),
+          callExpr(libc_fun_disjoint_inner_matchers::unsafeVaListPrintfs(S))
+              .bind(UnsafeVaListTag));
+    };
+
+    return stmt(
+        callExpr(ignoreLibcPrefixAndSuffix(anyOfLibcInnerMatcher)).bind(Tag));
+  }
+
+  const Stmt *getBaseStmt() const { return Call; }
+
+  SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); }
+
+  void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
+                             bool IsRelatedToDecl,
+                             ASTContext &Ctx) const override {
+    Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx);
+  }
+
+  DeclUseList getClaimedVarUseSites() const override { return {}; }
+};
+
 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
 // Context (see `isInUnspecifiedLvalueContext`).
 // Note here `[]` is the built-in subscript operator.
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 0f604c61fa3af9..53ade2df5c311b 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2292,6 +2292,18 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
     }
   }
 
+  void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+                            ASTContext &Ctx) override {
+    // We have checked that there is a direct callee with an identifier name:
+    StringRef Name = Call->getDirectCallee()->getName();
+
+    S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call)
+        << Name << Call->getSourceRange();
+    if (PrintfInfo > 0)
+      S.Diag(Call->getBeginLoc(), diag::note_unsafe_buffer_printf_call)
+          << PrintfInfo << Call->getSourceRange();
+  }
+
   void handleUnsafeOperationInContainer(const Stmt *Operation,
                                         bool IsRelatedToDecl,
                                         ASTContext &Ctx) override {
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
new file mode 100644
index 00000000000000..eebbc381a262ff
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN:            -verify %s
+
+namespace std {
+  inline namespace __1 {
+  template< class InputIt, class OutputIt >
+  OutputIt copy( InputIt first, InputIt last,
+		 OutputIt d_first );
+
+  struct iterator{};
+  template<typename T>
+  struct span {
+    T * ptr;
+    T * data();
+    unsigned size_bytes();
+    unsigned size();
+    iterator begin() const noexcept;
+    iterator end() const noexcept;
+  };
+
+  template<typename T>
+  struct basic_string {
+    T* p;
+    T *c_str();
+    T *data();
+    unsigned size_bytes();
+  };
+
+  typedef basic_string<char> string;
+  typedef basic_string<wchar_t> wstring;
+
+  // C function under std:
+  void memcpy();
+  void strcpy();
+  int snprintf( char* buffer, unsigned buf_size, const char* format, ... );
+  }
+}
+
+void f(char * p, char * q, std::span<char> s) {
+  std::memcpy();              // expected-warning{{function memcpy introduces unsafe buffer access}}
+  std::strcpy();              // expected-warning{{function strcpy introduces unsafe buffer access}}
+  std::__1::memcpy();              // expected-warning{{function memcpy introduces unsafe buffer access}}
+  std::__1::strcpy();              // expected-warning{{function strcpy introduces unsafe buffer access}}
+
+  /* Test printfs */
+  std::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+  std::__1::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+  std::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
+  std::__1::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
+}
+
+void v(std::string s1) {
+  std::snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+  std::__1::snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+}
+
+void g(char *begin, char *end, char *p, std::span<char> s) {
+  std::copy(begin, end, p); // no warn
+  std::copy(s.begin(), s.end(), s.begin()); // no warn
+}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
new file mode 100644
index 00000000000000..ea2c5ec82211b2
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN:            -verify %s
+
+typedef struct {} FILE;
+void memcpy();
+void __asan_memcpy();
+void strcpy();
+void strcpy_s();
+void wcscpy_s();
+unsigned strlen( const char* str );
+int fprintf( FILE* stream, const char* format, ... );
+int printf( const char* format, ... );
+int sprintf( char* buffer, const char* format, ... );
+int snprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int sscanf_s(const char * buffer, const char * format, ...);
+int sscanf(const char * buffer, const char * format, ... );
+
+namespace std {
+  template< class InputIt, class OutputIt >
+  OutputIt copy( InputIt first, InputIt last,
+		 OutputIt d_first );
+
+  struct iterator{};
+  template<typename T>
+  struct span {
+    T * ptr;
+    T * data();
+    unsigned size_bytes();
+    unsigned size();
+    iterator begin() const noexcept;
+    iterator end() const noexcept;
+  };
+
+  template<typename T>
+  struct basic_string {
+    T* p;
+    T *c_str();
+    T *data();
+    unsigned size_bytes();
+  };
+
+  typedef basic_string<char> string;
+  typedef basic_string<wchar_t> wstring;
+
+  // C function under std:
+  void memcpy();
+  void strcpy();
+}
+
+void f(char * p, char * q, std::span<char> s, std::span<char> s2) {
+  memcpy();                   // expected-warning{{function memcpy introduces unsafe buffer access}}
+  std::memcpy();              // expected-warning{{function memcpy introduces unsafe buffer access}}
+  __builtin_memcpy(p, q, 64); // expected-warning{{function __builtin_memcpy introduces unsafe buffer access}}
+  __builtin___memcpy_chk(p, q, 8, 64);  // expected-warning{{function __builtin___memcpy_chk introduces unsafe buffer access}}
+  __asan_memcpy();                      // expected-warning{{function __asan_memcpy introduces unsafe buffer access}}
+  strcpy();                   // expected-warning{{function strcpy introduces unsafe buffer access}}
+  std::strcpy();              // expected-warning{{function strcpy introduces unsafe buffer access}}
+  strcpy_s();                 // expected-warning{{function strcpy_s introduces unsafe buffer access}}
+  wcscpy_s();                 // expected-warning{{function wcscpy_s introduces unsafe buffer access}}
+
+
+  /* Test printfs */
+  fprintf((FILE*)p, "%s%d", p, *p);  // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+  printf("%s%d", p, *p);  // expected-warning{{function printf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+  sprintf(q, "%s%d", "hello", *p); // expected-warning{{function sprintf introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}}
+  snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+  snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+  vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function vsnprintf introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}}
+  sscanf(p, "%s%d", "hello", *p);    // expected-warning{{function sscanf introduces unsafe buffer access}}
+  sscanf_s(p, "%s%d", "hello", *p);  // expected-warning{{function sscanf_s introduces unsafe buffer access}}
+  fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+  printf("%s%d", "hello", *p); // no warn
+  snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
+  strlen("hello");// no warn
+}
+
+void v(std::string s1) {
+  snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+}
+
+
+void g(char *begin, char *end, char *p, std::span<char> s) {
+  std::copy(begin, end, p); // no warn
+  std::copy(s.begin(), s.end(), s.begin()); // no warn
+}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
index 844311c3a51a58..668efe0e178b53 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
@@ -1,8 +1,6 @@
 // RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fsafe-buffer-usage-suggestions -verify %s
 
-// expected-no-diagnostics
-
 typedef unsigned __darwin_size_t;
 typedef __darwin_size_t size_t;
  #define bzero(s, n) __builtin_bzero(s, n)
-void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); }
+void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function __builtin_bzero introduces unsafe buffer access}}

>From e9a5b7a5dd064728c143a8b8a26c8951b427392b Mon Sep 17 00:00:00 2001
From: ziqingluo-90 <ziqing_luo at apple.com>
Date: Mon, 12 Aug 2024 17:46:48 -0700
Subject: [PATCH 2/5] Reduce false positives in warnings about printfs

- Predefined name macros such as `__PRETTY_FUNCTION__` are considered safe.
- We need to distinguish between `%s` and `%p` as both accept pointers
  but the latter is not a buffer operation.  This leaves us no choice
  other than parsing the format string.  Fortunately, the building
  blocks of format parsing have already existed and are quite handy.
---
 clang/include/clang/AST/FormatString.h        | 12 ++++
 clang/lib/AST/PrintfFormatString.cpp          | 28 ++++++++++
 clang/lib/Analysis/UnsafeBufferUsage.cpp      | 56 +++++++++++++++----
 ...arn-unsafe-buffer-usage-libc-functions.cpp | 11 +++-
 4 files changed, 93 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index a074dd23e2ad4c..cdba2a7abe49d9 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -783,6 +783,18 @@ bool ParsePrintfString(FormatStringHandler &H,
 bool ParseFormatStringHasSArg(const char *beg, const char *end,
                               const LangOptions &LO, const TargetInfo &Target);
 
+/// Parse C format string and return index (relative to `ArgIndex`) of the first
+/// found `s` specifier.  Return 0 if not found.
+/// \param I The start of the C format string; Updated to the first unparsed
+/// position upon return.
+/// \param E The end of the C format string;
+/// \param ArgIndex The argument index of the last found `s` specifier; Or the
+/// argument index of the formatter in initial case.
+unsigned ParseFormatStringFirstSArgIndex(const char *&I, const char *E,
+                                         unsigned ArgIndex,
+                                         const LangOptions &LO,
+                                         const TargetInfo &Target);
+
 bool ParseScanfString(FormatStringHandler &H,
                       const char *beg, const char *end, const LangOptions &LO,
                       const TargetInfo &Target);
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index 3c6cd2d0f43417..9992afd402d370 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -483,6 +483,34 @@ bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
   return false;
 }
 
+unsigned clang::analyze_format_string::ParseFormatStringFirstSArgIndex(
+    const char *&I, const char *E, unsigned ArgIndex, const LangOptions &LO,
+    const TargetInfo &Target) {
+  unsigned argIndex = ArgIndex;
+
+  // Keep looking for a %s format specifier until we have exhausted the string.
+  FormatStringHandler H;
+  while (I != E) {
+    const PrintfSpecifierResult &FSR =
+        ParsePrintfSpecifier(H, I, E, argIndex, LO, Target, false, false);
+    // Did a fail-stop error of any kind occur when parsing the specifier?
+    // If so, don't do any more processing.
+    if (FSR.shouldStop())
+      return false;
+    // Did we exhaust the string or encounter an error that
+    // we can recover from?
+    if (!FSR.hasValue())
+      continue;
+    const analyze_printf::PrintfSpecifier &FS = FSR.getValue();
+    // Return true if this a %s format specifier.
+    if (FS.getConversionSpecifier().getKind() ==
+        ConversionSpecifier::Kind::sArg) {
+      return FS.getPositionalArgIndex();
+    }
+  }
+  return false;
+}
+
 bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
     const char *Begin, const char *End, const LangOptions &LO,
     const TargetInfo &Target) {
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 751fb75f6ed602..745de657fcd274 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -12,6 +12,7 @@
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/AST/FormatString.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
@@ -611,6 +612,38 @@ static bool isNullTermPointer(const Expr *Ptr) {
   return false;
 }
 
+// Return true iff at least one of following cases holds:
+//  1. Format string is a literal and there is an unsafe pointer argument
+//     corresponding to an `s` specifier;
+//  2. Format string is not a literal and there is least an unsafe pointer
+//     argument (including the formatter argument).
+static bool hasUnsafeFormatOrSArg(const Expr *Fmt, unsigned FmtArgIdx,
+                                  const CallExpr *Call, ASTContext &Ctx) {
+  if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) {
+    StringRef FmtStr = SL->getString();
+    auto I = FmtStr.begin();
+    auto E = FmtStr.end();
+    unsigned ArgIdx = FmtArgIdx;
+
+    do {
+      ArgIdx = analyze_format_string::ParseFormatStringFirstSArgIndex(
+          I, E, ArgIdx, Ctx.getLangOpts(), Ctx.getTargetInfo());
+      if (ArgIdx && Call->getNumArgs() > ArgIdx &&
+          !isNullTermPointer(Call->getArg(ArgIdx)))
+        return true;
+    } while (ArgIdx);
+    return false;
+  }
+  // If format is not a string literal, we cannot analyze the format string.
+  // In this case, this call is considered unsafe if at least one argument
+  // (including the format argument) is unsafe pointer.
+  return llvm::any_of(
+      llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
+      [](const Expr *Arg) {
+        return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+      });
+}
+
 // Matches a call to one of the `-printf" functions (excluding the ones with
 // va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but
 // fail to guarantee their null-termination.  In other words, these calls are
@@ -626,19 +659,18 @@ AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) {
   if (Prefix.ends_with("w"))
     Prefix = Prefix.drop_back(1);
 
-  auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool {
-    return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
-  };
-
   if (Prefix.empty() ||
       Prefix == "k") // printf: all pointer args should be null-terminated
-    return any_of(Node.arguments(), AnyUnsafeStrPtr);
-  if (Prefix == "f" && Node.getNumArgs() > 1)
-    return any_of(llvm::make_range(Node.arg_begin() + 1, Node.arg_end()),
-                  AnyUnsafeStrPtr);
-  if (Prefix == "sn" && Node.getNumArgs() > 2) {
-    return any_of(llvm::make_range(Node.arg_begin() + 2, Node.arg_end()),
-                  AnyUnsafeStrPtr);
+    return hasUnsafeFormatOrSArg(Node.getArg(0), 0, &Node,
+                                 Finder->getASTContext());
+  if (Prefix == "f")
+    return hasUnsafeFormatOrSArg(Node.getArg(1), 1, &Node,
+                                 Finder->getASTContext());
+  if (Prefix == "sn") {
+    // The first two arguments need to be in safe patterns, which is checked
+    // by `isSafeSizedby`:
+    return hasUnsafeFormatOrSArg(Node.getArg(2), 2, &Node,
+                                 Finder->getASTContext());
   }
   return false; // A call to a "-printf" falls into another category.
 }
@@ -775,7 +807,7 @@ AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix,
 
   StringRef CoreName =
       TheLittleParser.matchName(II->getName(), FD->getBuiltinID());
-      
+
   return InnerMatcher(CoreName).matches(Node, Finder, Builder);
 }
 } // namespace clang::ast_matchers
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index ea2c5ec82211b2..c3d7f8fd05435b 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -70,13 +70,20 @@ void f(char * p, char * q, std::span<char> s, std::span<char> s2) {
   sscanf(p, "%s%d", "hello", *p);    // expected-warning{{function sscanf introduces unsafe buffer access}}
   sscanf_s(p, "%s%d", "hello", *p);  // expected-warning{{function sscanf_s introduces unsafe buffer access}}
   fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+  fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn
   printf("%s%d", "hello", *p); // no warn
   snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
+  snprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn
   strlen("hello");// no warn
 }
 
-void v(std::string s1) {
-  snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+void v(std::string s1, int *p) {
+  snprintf(s1.data(), s1.size_bytes(), "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn
+  snprintf(s1.data(), s1.size_bytes(), s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str());      // no warn
+  printf("%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str());              // no warn
+  printf(s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str());                   // no warn
+  fprintf((FILE*)0, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str());   // no warn
+  fprintf((FILE*)0, s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str());        // no warn
 }
 
 

>From fa21260c1ec31eec115125a777a66874df9b38c4 Mon Sep 17 00:00:00 2001
From: ziqingluo-90 <ziqing_luo at apple.com>
Date: Wed, 14 Aug 2024 17:02:19 -0700
Subject: [PATCH 3/5] Address comments

---
 clang/include/clang/AST/FormatString.h        |  12 -
 clang/lib/AST/PrintfFormatString.cpp          |  28 -
 clang/lib/Analysis/UnsafeBufferUsage.cpp      | 624 ++++++++++--------
 clang/lib/Sema/AnalysisBasedWarnings.cpp      |   6 +-
 ...-usage-libc-functions-inline-namespace.cpp |  12 +-
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  44 +-
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   2 +-
 7 files changed, 369 insertions(+), 359 deletions(-)

diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index cdba2a7abe49d9..a074dd23e2ad4c 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -783,18 +783,6 @@ bool ParsePrintfString(FormatStringHandler &H,
 bool ParseFormatStringHasSArg(const char *beg, const char *end,
                               const LangOptions &LO, const TargetInfo &Target);
 
-/// Parse C format string and return index (relative to `ArgIndex`) of the first
-/// found `s` specifier.  Return 0 if not found.
-/// \param I The start of the C format string; Updated to the first unparsed
-/// position upon return.
-/// \param E The end of the C format string;
-/// \param ArgIndex The argument index of the last found `s` specifier; Or the
-/// argument index of the formatter in initial case.
-unsigned ParseFormatStringFirstSArgIndex(const char *&I, const char *E,
-                                         unsigned ArgIndex,
-                                         const LangOptions &LO,
-                                         const TargetInfo &Target);
-
 bool ParseScanfString(FormatStringHandler &H,
                       const char *beg, const char *end, const LangOptions &LO,
                       const TargetInfo &Target);
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index 9992afd402d370..3c6cd2d0f43417 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -483,34 +483,6 @@ bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
   return false;
 }
 
-unsigned clang::analyze_format_string::ParseFormatStringFirstSArgIndex(
-    const char *&I, const char *E, unsigned ArgIndex, const LangOptions &LO,
-    const TargetInfo &Target) {
-  unsigned argIndex = ArgIndex;
-
-  // Keep looking for a %s format specifier until we have exhausted the string.
-  FormatStringHandler H;
-  while (I != E) {
-    const PrintfSpecifierResult &FSR =
-        ParsePrintfSpecifier(H, I, E, argIndex, LO, Target, false, false);
-    // Did a fail-stop error of any kind occur when parsing the specifier?
-    // If so, don't do any more processing.
-    if (FSR.shouldStop())
-      return false;
-    // Did we exhaust the string or encounter an error that
-    // we can recover from?
-    if (!FSR.hasValue())
-      continue;
-    const analyze_printf::PrintfSpecifier &FS = FSR.getValue();
-    // Return true if this a %s format specifier.
-    if (FS.getConversionSpecifier().getKind() ==
-        ConversionSpecifier::Kind::sArg) {
-      return FS.getPositionalArgIndex();
-    }
-  }
-  return false;
-}
-
 bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
     const char *Begin, const char *End, const LangOptions &LO,
     const TargetInfo &Target) {
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 745de657fcd274..a1526125edf098 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -16,6 +16,7 @@
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Basic/SourceLocation.h"
@@ -23,11 +24,11 @@
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
-#include <functional>
 #include <memory>
 #include <optional>
 #include <queue>
@@ -448,110 +449,223 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
-namespace libc_fun_disjoint_inner_matchers {
-// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match
-// disjoint node sets.   They all take a `CoreName`, which is the substring of a
-// function name after `ignoreLibcPrefixAndSuffix`.  They are suppose to be used
-// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different
-// libc function calls.
+AST_MATCHER_P(CallExpr, hasNumArgs, unsigned, Num) {
+  return Node.getNumArgs() == Num;
+}
+
+namespace libc_func_matchers {
+// Under `libc_func_matchers`, define a set of matchers that match unsafe
+// functions in libc and unsafe calls to them.
+
+//  A tiny parser to strip off common prefix and suffix of libc function names
+//  in real code.
+//
+//  Given a function name, `matchName` returns `CoreName` according to the
+//  following grammar:
+//
+//  LibcName     := CoreName | CoreName + "_s"
+//  MatchingName := "__builtin_" + LibcName              |
+//                  "__builtin___" + LibcName + "_chk"   |
+//                  "__asan_" + LibcName
+//
+struct LibcFunNamePrefixSuffixParser {
+  StringRef matchName(StringRef FunName, bool isBuiltin) {
+    // Try to match __builtin_:
+    if (isBuiltin && FunName.starts_with("__builtin_"))
+      // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
+      // no match:
+      return matchLibcNameOrBuiltinChk(
+          FunName.drop_front(10 /* truncate "__builtin_" */));
+    // Try to match __asan_:
+    if (FunName.starts_with("__asan_"))
+      return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */));
+    return matchLibcName(FunName);
+  }
+
+  // Parameter `Name` is the substring after stripping off the prefix
+  // "__builtin_".
+  StringRef matchLibcNameOrBuiltinChk(StringRef Name) {
+    if (Name.starts_with("__") && Name.ends_with("_chk"))
+      return matchLibcName(
+          Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
+    return matchLibcName(Name);
+  }
+
+  StringRef matchLibcName(StringRef Name) {
+    if (Name.ends_with("_s"))
+      return Name.drop_back(2 /* truncate "_s" */);
+    return Name;
+  }
+};
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+  if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
+    return true;
+  if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts()))
+    return true;
+  if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
+    const CXXMethodDecl *MD = MCE->getMethodDecl();
+    const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
 
-// Matches a function call node such that
+    if (MD && RD && RD->isInStdNamespace())
+      if (MD->getName() == "c_str" && RD->getName() == "basic_string")
+        return true;
+  }
+  return false;
+}
+
+// Return true iff at least one of following cases holds:
+//  1. Format string is a literal and there is an unsafe pointer argument
+//     corresponding to an `s` specifier;
+//  2. Format string is not a literal and there is least an unsafe pointer
+//     argument (including the formatter argument).
+static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx,
+                                  ASTContext &Ctx, bool isKprintf = false) {
+  class StringFormatStringHandler
+      : public analyze_format_string::FormatStringHandler {
+    const CallExpr *Call;
+    unsigned FmtArgIdx;
+
+  public:
+    StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx)
+        : Call(Call), FmtArgIdx(FmtArgIdx) {}
+
+    bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+                               const char *startSpecifier,
+                               unsigned specifierLen,
+                               const TargetInfo &Target) override {
+      if (FS.getConversionSpecifier().getKind() ==
+          analyze_printf::PrintfConversionSpecifier::sArg) {
+        unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
+
+        if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
+          if (!isNullTermPointer(Call->getArg(ArgIdx)))
+            return false; // stop parsing
+      }
+      return true; // continue parsing
+    }
+  };
+
+  const Expr *Fmt = Call->getArg(FmtArgIdx);
+
+  if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) {
+    StringRef FmtStr = SL->getString();
+    StringFormatStringHandler Handler(Call, FmtArgIdx);
+
+    return analyze_format_string::ParsePrintfString(
+        Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
+        Ctx.getTargetInfo(), isKprintf);
+  }
+  // If format is not a string literal, we cannot analyze the format string.
+  // In this case, this call is considered unsafe if at least one argument
+  // (including the format argument) is unsafe pointer.
+  return llvm::any_of(
+      llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
+      [](const Expr *Arg) {
+        return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+      });
+}
+
+// Matches a FunctionDecl node such that
 //  1. It's name, after stripping off predefined prefix and suffix, is
 //     `CoreName`; and
 //  2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
 //     is a set of libc function names.
 //
-//  Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`.
-//  And, the notation `CoreName[str/wcs]` means a new name obtained from replace
+//  Note: For predefined prefix and suffix, see `LibcFunNamePrefixSuffixParser`.
+//  The notation `CoreName[str/wcs]` means a new name obtained from replace
 //  string "wcs" with "str" in `CoreName`.
-//
-//  Also note, the set of predefined function names does not include `printf`
-//  functions, they are checked exclusively with other matchers below.
-//  Maintaining the invariant that all matchers under
-//  `libc_fun_disjoint_inner_matchers` are disjoint.
-AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
-  static const std::set<StringRef> PredefinedNames{
-      // numeric conversion:
-      "atof",
-      "atoi",
-      "atol",
-      "atoll",
-      "strtol",
-      "strtoll",
-      "strtoul",
-      "strtoull",
-      "strtof",
-      "strtod",
-      "strtold",
-      "strtoimax",
-      "strtoumax",
-      // "strfromf",  "strfromd", "strfroml", // C23?
-      // string manipulation:
-      "strcpy",
-      "strncpy",
-      "strlcpy",
-      "strcat",
-      "strncat",
-      "strlcat",
-      "strxfrm",
-      "strdup",
-      "strndup",
-      // string examination:
-      "strlen",
-      "strnlen",
-      "strcmp",
-      "strncmp",
-      "stricmp",
-      "strcasecmp",
-      "strcoll",
-      "strchr",
-      "strrchr",
-      "strspn",
-      "strcspn",
-      "strpbrk",
-      "strstr",
-      "strtok",
-      // "mem-" functions
-      "memchr",
-      "wmemchr",
-      "memcmp",
-      "wmemcmp",
-      "memcpy",
-      "memccpy",
-      "mempcpy",
-      "wmemcpy",
-      "memmove",
-      "wmemmove",
-      "memset",
-      "wmemset",
-      // IO:
-      "fread",
-      "fwrite",
-      "fgets",
-      "fgetws",
-      "gets",
-      "fputs",
-      "fputws",
-      "puts",
-      // others
-      "strerror_s",
-      "strerror_r",
-      "bcopy",
-      "bzero",
-      "bsearch",
-      "qsort",
-  };
-  // This is safe: strlen("hello").  We don't want to be noisy on this case.
-  auto isSafeStrlen = [&Node](StringRef Name) -> bool {
-    return Name == "strlen" && Node.getNumArgs() == 1 &&
-           isa<StringLiteral>(Node.getArg(0)->IgnoreParenImpCasts());
-  };
+AST_MATCHER(FunctionDecl, isPredefinedUnsafeLibcFunc) {
+  static std::unique_ptr<std::set<StringRef>> PredefinedNames = nullptr;
+  if (!PredefinedNames)
+    PredefinedNames =
+        std::make_unique<std::set<StringRef>, std::set<StringRef>>({
+            // numeric conversion:
+            "atof",
+            "atoi",
+            "atol",
+            "atoll",
+            "strtol",
+            "strtoll",
+            "strtoul",
+            "strtoull",
+            "strtof",
+            "strtod",
+            "strtold",
+            "strtoimax",
+            "strtoumax",
+            // "strfromf",  "strfromd", "strfroml", // C23?
+            // string manipulation:
+            "strcpy",
+            "strncpy",
+            "strlcpy",
+            "strcat",
+            "strncat",
+            "strlcat",
+            "strxfrm",
+            "strdup",
+            "strndup",
+            // string examination:
+            "strlen",
+            "strnlen",
+            "strcmp",
+            "strncmp",
+            "stricmp",
+            "strcasecmp",
+            "strcoll",
+            "strchr",
+            "strrchr",
+            "strspn",
+            "strcspn",
+            "strpbrk",
+            "strstr",
+            "strtok",
+            // "mem-" functions
+            "memchr",
+            "wmemchr",
+            "memcmp",
+            "wmemcmp",
+            "memcpy",
+            "memccpy",
+            "mempcpy",
+            "wmemcpy",
+            "memmove",
+            "wmemmove",
+            "memset",
+            "wmemset",
+            // IO:
+            "fread",
+            "fwrite",
+            "fgets",
+            "fgetws",
+            "gets",
+            "fputs",
+            "fputws",
+            "puts",
+            // others
+            "strerror_s",
+            "strerror_r",
+            "bcopy",
+            "bzero",
+            "bsearch",
+            "qsort",
+        });
+
+  auto *II = Node.getIdentifier();
+
+  if (!II)
+    return false;
+
+  StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+      II->getName(), Node.getBuiltinID());
 
   // Match predefined names:
-  if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end())
-    return !isSafeStrlen(CoreName);
+  if (PredefinedNames->find(Name) != PredefinedNames->end())
+    return true;
 
-  std::string NameWCS = CoreName.str();
+  std::string NameWCS = Name.str();
   size_t WcsPos = NameWCS.find("wcs");
 
   while (WcsPos != std::string::npos) {
@@ -560,31 +674,44 @@ AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
     NameWCS[WcsPos++] = 'r';
     WcsPos = NameWCS.find("wcs", WcsPos);
   }
-  if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
-    return !isSafeStrlen(NameWCS);
+  if (PredefinedNames->find(NameWCS) != PredefinedNames->end())
+    return true;
   // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They
   // all should end with "scanf"):
-  return CoreName.ends_with("scanf");
+  return Name.ends_with("scanf");
 }
 
-// Match a call to one of the `-printf` functions taking `va_list`.  We cannot
+// Match a call to one of the `v*printf` functions taking `va_list`.  We cannot
 // check safety for these functions so they should be changed to their
 // non-va_list versions.
-AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) {
-  StringRef Name = CoreName;
+AST_MATCHER(FunctionDecl, isUnsafeVaListPrintfFunc) {
+  auto *II = Node.getIdentifier();
+
+  if (!II)
+    return false;
+
+  StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+      II->getName(), Node.getBuiltinID());
 
   if (!Name.ends_with("printf"))
     return false; // neither printf nor scanf
   return Name.starts_with("v");
 }
 
-// Matches a call to one of the `-sprintf` functions (excluding the ones with
-// va_list) as they are always unsafe and should be changed to corresponding
-// `-snprintf`s.
-AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) {
-  StringRef Name = CoreName;
+// Matches a call to one of the `sprintf` functions as they are always unsafe
+// and should be changed to `snprintf`.
+AST_MATCHER(FunctionDecl, isUnsafeSprintfFunc) {
+  auto *II = Node.getIdentifier();
 
-  if (!Name.ends_with("printf") || Name.starts_with("v"))
+  if (!II)
+    return false;
+
+  StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+      II->getName(), Node.getBuiltinID());
+
+  if (!Name.ends_with("printf") ||
+      // Let `isUnsafeVaListPrintfFunc` check for cases with va-list:
+      Name.starts_with("v"))
     return false;
 
   StringRef Prefix = Name.drop_back(6);
@@ -594,62 +721,17 @@ AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) {
   return Prefix == "s";
 }
 
-// A pointer type expression is known to be null-terminated, if it has the
-// form: E.c_str(), for any expression E of `std::string` type.
-static bool isNullTermPointer(const Expr *Ptr) {
-  if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
-    return true;
-  if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts()))
-    return true;
-  if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
-    const CXXMethodDecl *MD = MCE->getMethodDecl();
-    const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
-
-    if (MD && RD && RD->isInStdNamespace())
-      if (MD->getName() == "c_str" && RD->getName() == "basic_string")
-        return true;
-  }
-  return false;
-}
+// Match function declarations of `printf`, `fprintf`, `snprintf` and their wide
+// character versions.  Calls to these functions can be safe if their arguments
+// are carefully made safe.
+AST_MATCHER(FunctionDecl, isNormalPrintfFunc) {
+  auto *II = Node.getIdentifier();
 
-// Return true iff at least one of following cases holds:
-//  1. Format string is a literal and there is an unsafe pointer argument
-//     corresponding to an `s` specifier;
-//  2. Format string is not a literal and there is least an unsafe pointer
-//     argument (including the formatter argument).
-static bool hasUnsafeFormatOrSArg(const Expr *Fmt, unsigned FmtArgIdx,
-                                  const CallExpr *Call, ASTContext &Ctx) {
-  if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) {
-    StringRef FmtStr = SL->getString();
-    auto I = FmtStr.begin();
-    auto E = FmtStr.end();
-    unsigned ArgIdx = FmtArgIdx;
-
-    do {
-      ArgIdx = analyze_format_string::ParseFormatStringFirstSArgIndex(
-          I, E, ArgIdx, Ctx.getLangOpts(), Ctx.getTargetInfo());
-      if (ArgIdx && Call->getNumArgs() > ArgIdx &&
-          !isNullTermPointer(Call->getArg(ArgIdx)))
-        return true;
-    } while (ArgIdx);
+  if (!II)
     return false;
-  }
-  // If format is not a string literal, we cannot analyze the format string.
-  // In this case, this call is considered unsafe if at least one argument
-  // (including the format argument) is unsafe pointer.
-  return llvm::any_of(
-      llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
-      [](const Expr *Arg) {
-        return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
-      });
-}
 
-// Matches a call to one of the `-printf" functions (excluding the ones with
-// va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but
-// fail to guarantee their null-termination.  In other words, these calls are
-// safe if they use null-termination guaranteed string pointers.
-AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) {
-  StringRef Name = CoreName;
+  StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+      II->getName(), Node.getBuiltinID());
 
   if (!Name.ends_with("printf") || Name.starts_with("v"))
     return false;
@@ -659,50 +741,76 @@ AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) {
   if (Prefix.ends_with("w"))
     Prefix = Prefix.drop_back(1);
 
-  if (Prefix.empty() ||
-      Prefix == "k") // printf: all pointer args should be null-terminated
-    return hasUnsafeFormatOrSArg(Node.getArg(0), 0, &Node,
-                                 Finder->getASTContext());
-  if (Prefix == "f")
-    return hasUnsafeFormatOrSArg(Node.getArg(1), 1, &Node,
-                                 Finder->getASTContext());
-  if (Prefix == "sn") {
-    // The first two arguments need to be in safe patterns, which is checked
-    // by `isSafeSizedby`:
-    return hasUnsafeFormatOrSArg(Node.getArg(2), 2, &Node,
-                                 Finder->getASTContext());
+  return Prefix.empty() || Prefix == "k" || Prefix == "f" || Prefix == "sn";
+}
+
+// This matcher requires that it is known that the callee `isNormalPrintf`.
+// Then if the format string is a string literal, this matcher matches when at
+// least one string argument is unsafe. If the format is not a string literal,
+// this matcher matches when at least one pointer type argument is unsafe.
+AST_MATCHER(CallExpr, hasUnsafePrintfStringArg) {
+  // Determine what printf it is:
+  const Expr *FirstArg = Node.getArg(0);
+  ASTContext &Ctx = Finder->getASTContext();
+
+  if (isa<StringLiteral>(FirstArg->IgnoreParenImpCasts())) {
+    // It is a printf/kprintf. And, the format is a string literal:
+    bool isKprintf = false;
+    if (auto *Callee = Node.getDirectCallee())
+      if (auto *II = Node.getDirectCallee()->getIdentifier())
+        isKprintf = II->getName() == "kprintf";
+    return hasUnsafeFormatOrSArg(&Node, 0, Ctx, isKprintf);
+  }
+
+  QualType PtrTy = FirstArg->getType();
+
+  assert(PtrTy->isPointerType());
+
+  QualType PteTy = (cast<PointerType>(PtrTy))->getPointeeType();
+
+  if (!Ctx.getFILEType().isNull() /* If `FILE *` is not ever in the ASTContext,
+                                     there can't be any file pointer then */
+      && PteTy.getCanonicalType() == Ctx.getFILEType().getCanonicalType()) {
+    // It is a fprintf:
+    return hasUnsafeFormatOrSArg(&Node, 1, Ctx, false);
   }
-  return false; // A call to a "-printf" falls into another category.
+
+  const Expr *SecondArg = Node.getArg(1);
+
+  if (SecondArg->getType()->isIntegerType()) {
+    // It is a snprintf:
+    return hasUnsafeFormatOrSArg(&Node, 2, Ctx, false);
+  }
+  // It is printf but the format string is passed by pointer. The only thing we
+  // can do is to require all pointers to be null-terminated:
+  return llvm::any_of(Node.arguments(), [](const Expr *Arg) -> bool {
+    return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+  });
 }
 
-// Matches a call to one of the `snprintf` functions (excluding the ones with
-// va_list) such that the first two arguments fail to conform to safe patterns.
+// This matcher requires that it is known that the callee `isNormalPrintf`.
+// Then it matches if the first two arguments of the call is a pointer and an
+// integer and they are not in a safe pattern.
 //
 // For the first two arguments: `ptr` and `size`, they are safe if in the
 // following patterns:
 //    ptr := DRE.data();
 //    size:= DRE.size()/DRE.size_bytes()
 // And DRE is a hardened container or view.
-AST_MATCHER_P(CallExpr, unsafeSizedByInSnprintfs, StringRef, CoreName) {
-  StringRef Name = CoreName;
-
-  if (!Name.ends_with("printf") || Name.starts_with("v"))
-    return false; // not snprint or use va_list
+AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
+  if (Node.getNumArgs() < 3)
+    return false; // not an snprintf call
 
-  StringRef Prefix = Name.drop_back(6);
+  const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1);
 
-  if (Prefix.ends_with("w"))
-    Prefix = Prefix.drop_back(1);
-
-  if (Prefix != "sn")
-    return false; // not snprint
+  if (!Buf->getType()->isPointerType() || !Size->getType()->isIntegerType())
+    return false; // not an snprintf call
 
   static StringRef SizedObjs[] = {"span", "array", "vector",
                                   "basic_string_view", "basic_string"};
-  const Expr *CharPtr = (*Node.arg_begin())->IgnoreParenImpCasts();
-  const Expr *Size = (*(Node.arg_begin() + 1))->IgnoreParenImpCasts();
-
-  if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(CharPtr))
+  Buf = Buf->IgnoreParenImpCasts();
+  Size = Size->IgnoreParenImpCasts();
+  if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Buf))
     if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) {
       auto *DREOfPtr = dyn_cast<DeclRefExpr>(
           MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts());
@@ -730,86 +838,7 @@ AST_MATCHER_P(CallExpr, unsafeSizedByInSnprintfs, StringRef, CoreName) {
     }
   return true; // ptr and size are not in safe pattern
 }
-} // namespace libc_fun_disjoint_inner_matchers
-
-// Match call to a function whose name may have prefixes like "__builtin_" or
-// "__asan_" and suffixes like "_s" or "_chk".  This matcher takes an argument,
-// which should be applied to the core name---the subtring after stripping off
-// prefix and suffix of the function name.
-// The application results in an inner matcher that matches the call node with
-// respect to the core name of the callee.
-AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix,
-              std::function<internal::Matcher<CallExpr>(StringRef)>,
-              InnerMatcher) {
-  //  Given a function name, returns its core name `CoreName` according to the
-  //  following grammar.
-  //
-  //  LibcName     := CoreName | CoreName + "_s"
-  //  MatchingName := "__builtin_" + LibcName              |
-  //                  "__builtin___" + LibcName + "_chk"   |
-  //                  "__asan_" + LibcName
-  //
-  struct NameParser {
-    StringRef matchName(StringRef FunName, bool isBuiltin) {
-      // Try to match __builtin_:
-      if (isBuiltin && FunName.starts_with("__builtin_"))
-        // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
-        // no match:
-        return matchLibcNameOrBuiltinChk(
-            FunName.drop_front(10 /* truncate "__builtin_" */));
-      // Try to match __asan_:
-      if (FunName.starts_with("__asan_"))
-        return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */));
-      return matchLibcName(FunName);
-    }
-
-    // Parameter `Name` is the substring after stripping off the prefix
-    // "__builtin_".
-    StringRef matchLibcNameOrBuiltinChk(StringRef Name) {
-      if (Name.starts_with("__") && Name.ends_with("_chk"))
-        return matchLibcName(
-            Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
-      return matchLibcName(Name);
-    }
-
-    StringRef matchLibcName(StringRef Name) {
-      if (Name.ends_with("_s"))
-        return Name.drop_back(2 /* truncate "_s" */);
-      return Name;
-    }
-  } TheLittleParser;
-
-  const FunctionDecl *FD = Node.getDirectCallee();
-  const IdentifierInfo *II;
-
-  if (!FD)
-    return false;
-  II = FD->getIdentifier();
-  // If this is a special C++ name without IdentifierInfo, it can't be a
-  // C library function.
-  if (!II)
-    return false;
-
-  // Look through 'extern "C"' and anything similar invented in the future.
-  // In general, C library functions will be in the TU directly.
-  if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
-    // If that's not the case, we also consider "C functions" re-declared in
-    // `std` namespace.
-    if (!FD->getDeclContext()->getRedeclContext()->isStdNamespace())
-      return false;
-  }
-
-  // If this function is not externally visible, it is not a C library function.
-  // Note that we make an exception for inline functions, which may be
-  // declared in header files without external linkage.
-  if (!FD->isInlined() && !FD->isExternallyVisible())
-    return false;
-
-  StringRef CoreName =
-      TheLittleParser.matchName(II->getName(), FD->getBuiltinID());
-
-  return InnerMatcher(CoreName).matches(Node, Finder, Builder);
-}
+} // namespace libc_func_matchers
 } // namespace clang::ast_matchers
 
 namespace {
@@ -1420,34 +1449,49 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
   UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result)
       : WarningGadget(Kind::UnsafeLibcFunctionCall),
         Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {
-    if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_sprintf"))
+    if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSprintfTag))
       WarnedFunKind = SPRINTF;
-    else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_string"))
+    else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeStringTag))
       WarnedFunKind = STRING;
-    else if (Result.Nodes.getNodeAs<CallExpr>(
-                 "UnsafeLibcFunctionCall_sized_by"))
+    else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSizedByTag))
       WarnedFunKind = SIZED_BY;
-    else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_va_list"))
+    else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeVaListTag))
       WarnedFunKind = VA_LIST;
   }
 
   static Matcher matcher() {
-    auto anyOfLibcInnerMatcher = [](StringRef S) {
-      return anyOf(
-          libc_fun_disjoint_inner_matchers::predefinedUnsafeLibcFunCall(S),
-          callExpr(libc_fun_disjoint_inner_matchers::unsafeStringInPrintfs(S))
-              .bind(UnsafeStringTag),
-          callExpr(
-              libc_fun_disjoint_inner_matchers::unsafeSizedByInSnprintfs(S))
-              .bind(UnsafeSizedByTag),
-          callExpr(libc_fun_disjoint_inner_matchers::unsafeSprintfs(S))
-              .bind(UnsafeSprintfTag),
-          callExpr(libc_fun_disjoint_inner_matchers::unsafeVaListPrintfs(S))
-              .bind(UnsafeVaListTag));
-    };
-
     return stmt(
-        callExpr(ignoreLibcPrefixAndSuffix(anyOfLibcInnerMatcher)).bind(Tag));
+        stmt(
+            anyOf(
+                // Match a call to a predefined unsafe libc function (unless the
+                // call has a sole string literal argument):
+                callExpr(callee(functionDecl(
+                             libc_func_matchers::isPredefinedUnsafeLibcFunc())),
+                         unless(allOf(hasArgument(0, expr(stringLiteral())),
+                                      hasNumArgs(1)))),
+                // Match a call to one of the `v*printf` functions taking
+                // va-list, which cannot be checked at compile-time:
+                callExpr(callee(functionDecl(
+                             libc_func_matchers::isUnsafeVaListPrintfFunc())))
+                    .bind(UnsafeVaListTag),
+                // Match a call to a `sprintf` function, which is never safe:
+                callExpr(callee(functionDecl(
+                             libc_func_matchers::isUnsafeSprintfFunc())))
+                    .bind(UnsafeSprintfTag),
+                // Match a call to an `snprintf` function. And first two
+                // arguments of the call (that describe a buffer) are not in
+                // safe patterns:
+                callExpr(callee(functionDecl(
+                             libc_func_matchers::isNormalPrintfFunc())),
+                         libc_func_matchers::hasUnsafeSnprintfBuffer())
+                    .bind(UnsafeSizedByTag),
+                // Match a call to a `printf` function, which can be safe if all
+                // arguments are null-terminated:
+                callExpr(callee(functionDecl(
+                             libc_func_matchers::isNormalPrintfFunc())),
+                         libc_func_matchers::hasUnsafePrintfStringArg())
+                    .bind(UnsafeStringTag)))
+            .bind(Tag));
   }
 
   const Stmt *getBaseStmt() const { return Call; }
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 53ade2df5c311b..b0f3b4ed61acc4 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2294,11 +2294,9 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
 
   void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
                             ASTContext &Ctx) override {
-    // We have checked that there is a direct callee with an identifier name:
-    StringRef Name = Call->getDirectCallee()->getName();
-
     S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call)
-        << Name << Call->getSourceRange();
+        << Call->getDirectCallee() // We've checked there is a direct callee
+        << Call->getSourceRange();
     if (PrintfInfo > 0)
       S.Diag(Call->getBeginLoc(), diag::note_unsafe_buffer_printf_call)
           << PrintfInfo << Call->getSourceRange();
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
index eebbc381a262ff..61a316456ace6a 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
@@ -37,14 +37,14 @@ namespace std {
 }
 
 void f(char * p, char * q, std::span<char> s) {
-  std::memcpy();              // expected-warning{{function memcpy introduces unsafe buffer access}}
-  std::strcpy();              // expected-warning{{function strcpy introduces unsafe buffer access}}
-  std::__1::memcpy();              // expected-warning{{function memcpy introduces unsafe buffer access}}
-  std::__1::strcpy();              // expected-warning{{function strcpy introduces unsafe buffer access}}
+  std::memcpy();              // expected-warning{{function 'memcpy' introduces unsafe buffer access}}
+  std::strcpy();              // expected-warning{{function 'strcpy' introduces unsafe buffer access}}
+  std::__1::memcpy();              // expected-warning{{function 'memcpy' introduces unsafe buffer access}}
+  std::__1::strcpy();              // expected-warning{{function 'strcpy' introduces unsafe buffer access}}
 
   /* Test printfs */
-  std::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
-  std::__1::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+  std::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+  std::__1::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
   std::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
   std::__1::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
 }
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index c3d7f8fd05435b..0eeedff611b588 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -11,7 +11,10 @@ unsigned strlen( const char* str );
 int fprintf( FILE* stream, const char* format, ... );
 int printf( const char* format, ... );
 int sprintf( char* buffer, const char* format, ... );
+int swprintf( char* buffer, const char* format, ... );
 int snprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int snwprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int snwprintf_s( char* buffer, unsigned buf_size, const char* format, ... );
 int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... );
 int sscanf_s(const char * buffer, const char * format, ...);
 int sscanf(const char * buffer, const char * format, ... );
@@ -49,31 +52,36 @@ namespace std {
 }
 
 void f(char * p, char * q, std::span<char> s, std::span<char> s2) {
-  memcpy();                   // expected-warning{{function memcpy introduces unsafe buffer access}}
-  std::memcpy();              // expected-warning{{function memcpy introduces unsafe buffer access}}
-  __builtin_memcpy(p, q, 64); // expected-warning{{function __builtin_memcpy introduces unsafe buffer access}}
-  __builtin___memcpy_chk(p, q, 8, 64);  // expected-warning{{function __builtin___memcpy_chk introduces unsafe buffer access}}
-  __asan_memcpy();                      // expected-warning{{function __asan_memcpy introduces unsafe buffer access}}
-  strcpy();                   // expected-warning{{function strcpy introduces unsafe buffer access}}
-  std::strcpy();              // expected-warning{{function strcpy introduces unsafe buffer access}}
-  strcpy_s();                 // expected-warning{{function strcpy_s introduces unsafe buffer access}}
-  wcscpy_s();                 // expected-warning{{function wcscpy_s introduces unsafe buffer access}}
+  memcpy();                   // expected-warning{{function 'memcpy' introduces unsafe buffer access}}
+  std::memcpy();              // expected-warning{{function 'memcpy' introduces unsafe buffer access}}
+  __builtin_memcpy(p, q, 64); // expected-warning{{function '__builtin_memcpy' introduces unsafe buffer access}}
+  __builtin___memcpy_chk(p, q, 8, 64);  // expected-warning{{function '__builtin___memcpy_chk' introduces unsafe buffer access}}
+  __asan_memcpy();                      // expected-warning{{function '__asan_memcpy' introduces unsafe buffer access}}
+  strcpy();                   // expected-warning{{function 'strcpy' introduces unsafe buffer access}}
+  std::strcpy();              // expected-warning{{function 'strcpy' introduces unsafe buffer access}}
+  strcpy_s();                 // expected-warning{{function 'strcpy_s' introduces unsafe buffer access}}
+  wcscpy_s();                 // expected-warning{{function 'wcscpy_s' introduces unsafe buffer access}}
 
 
   /* Test printfs */
-  fprintf((FILE*)p, "%s%d", p, *p);  // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
-  printf("%s%d", p, *p);  // expected-warning{{function printf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
-  sprintf(q, "%s%d", "hello", *p); // expected-warning{{function sprintf introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}}
-  snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
-  snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
-  vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function vsnprintf introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}}
-  sscanf(p, "%s%d", "hello", *p);    // expected-warning{{function sscanf introduces unsafe buffer access}}
-  sscanf_s(p, "%s%d", "hello", *p);  // expected-warning{{function sscanf_s introduces unsafe buffer access}}
-  fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+  fprintf((FILE*)p, "%s%d", p, *p);  // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+  printf("%s%d", p, *p);  // expected-warning{{function 'printf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+  sprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'sprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}}
+  swprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'swprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}}
+  snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+  snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+  snwprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+  snwprintf_s(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf_s' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
+  vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function 'vsnprintf' introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}}
+  sscanf(p, "%s%d", "hello", *p);    // expected-warning{{function 'sscanf' introduces unsafe buffer access}}
+  sscanf_s(p, "%s%d", "hello", *p);  // expected-warning{{function 'sscanf_s' introduces unsafe buffer access}}
+  fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
   fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn
   printf("%s%d", "hello", *p); // no warn
   snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
   snprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn
+  snwprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn
+  snwprintf_s(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn
   strlen("hello");// no warn
 }
 
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
index 668efe0e178b53..22a43490551c20 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
@@ -3,4 +3,4 @@
 typedef unsigned __darwin_size_t;
 typedef __darwin_size_t size_t;
  #define bzero(s, n) __builtin_bzero(s, n)
-void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function __builtin_bzero introduces unsafe buffer access}}
+void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function '__builtin_bzero' introduces unsafe buffer access}}

>From 276dee85324cc2df2a6c6461330609c57b833dc5 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Wed, 28 Aug 2024 17:00:49 -0700
Subject: [PATCH 4/5] Address more comments, mainly for improving diag
 messsages

---
 .../Analysis/Analyses/UnsafeBufferUsage.h     |   5 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |   4 +-
 clang/lib/Analysis/UnsafeBufferUsage.cpp      | 142 +++++++++++-------
 clang/lib/Sema/AnalysisBasedWarnings.cpp      |  12 +-
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  15 +-
 5 files changed, 112 insertions(+), 66 deletions(-)

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 3e0fae6db7562d..aa2c01ad10d45d 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -115,8 +115,11 @@ class UnsafeBufferUsageHandler {
   ///  safe pattern;
   ///  is 3 if string arguments do not guarantee null-termination
   ///  is 4 if the callee takes va_list
+  /// \param UnsafeArg one of the actual arguments that is unsafe, non-null
+  /// only when `2 <= PrintfInfo <= 3`
   virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
-                                    ASTContext &Ctx) = 0;
+                                    ASTContext &Ctx,
+                                    const Expr *UnsafeArg = nullptr) = 0;
 
   /// Invoked when an unsafe operation with a std container is found.
   virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 7e1e3686ce6554..3d8dcfaf3a7132 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12388,8 +12388,8 @@ def warn_unsafe_buffer_libc_call : Warning<
   InGroup<UnsafeBufferUsage>, DefaultIgnore;
 def note_unsafe_buffer_printf_call : Note<
   "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match"
-          "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination"
-          "| do not use va_list that cannot be checked at compile-time for bounds safety}0">;
+          "| use 'std::string::c_str' or a string literal as string pointer to guarantee null-termination"
+          "| do not use va_list, which cannot be checked at compile-time for bounds safety}0">;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
 def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index a1526125edf098..7bcc3c05dee98c 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -521,16 +521,22 @@ static bool isNullTermPointer(const Expr *Ptr) {
 //     corresponding to an `s` specifier;
 //  2. Format string is not a literal and there is least an unsafe pointer
 //     argument (including the formatter argument).
-static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx,
-                                  ASTContext &Ctx, bool isKprintf = false) {
+//
+// `UnsafeArg` is the output argument that will be set only if this function
+// returns true.
+static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
+                                  const unsigned FmtArgIdx, ASTContext &Ctx,
+                                  bool isKprintf = false) {
   class StringFormatStringHandler
       : public analyze_format_string::FormatStringHandler {
     const CallExpr *Call;
     unsigned FmtArgIdx;
+    const Expr *&UnsafeArg;
 
   public:
-    StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx)
-        : Call(Call), FmtArgIdx(FmtArgIdx) {}
+    StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx,
+                              const Expr *&UnsafeArg)
+        : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {}
 
     bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
                                const char *startSpecifier,
@@ -541,8 +547,11 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx,
         unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
 
         if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
-          if (!isNullTermPointer(Call->getArg(ArgIdx)))
-            return false; // stop parsing
+          if (!isNullTermPointer(Call->getArg(ArgIdx))) {
+            UnsafeArg = Call->getArg(ArgIdx); // output
+            // returning false stops parsing immediately
+            return false;
+          }
       }
       return true; // continue parsing
     }
@@ -552,7 +561,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx,
 
   if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) {
     StringRef FmtStr = SL->getString();
-    StringFormatStringHandler Handler(Call, FmtArgIdx);
+    StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg);
 
     return analyze_format_string::ParsePrintfString(
         Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
@@ -563,8 +572,12 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx,
   // (including the format argument) is unsafe pointer.
   return llvm::any_of(
       llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
-      [](const Expr *Arg) {
-        return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+      [&UnsafeArg](const Expr *Arg) -> bool {
+        if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg)) {
+          UnsafeArg = Arg;
+          return true;
+        }
+        return false;
       });
 }
 
@@ -748,7 +761,9 @@ AST_MATCHER(FunctionDecl, isNormalPrintfFunc) {
 // Then if the format string is a string literal, this matcher matches when at
 // least one string argument is unsafe. If the format is not a string literal,
 // this matcher matches when at least one pointer type argument is unsafe.
-AST_MATCHER(CallExpr, hasUnsafePrintfStringArg) {
+AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
+              clang::ast_matchers::internal::Matcher<Expr>,
+              UnsafeStringArgMatcher) {
   // Determine what printf it is:
   const Expr *FirstArg = Node.getArg(0);
   ASTContext &Ctx = Finder->getASTContext();
@@ -756,10 +771,14 @@ AST_MATCHER(CallExpr, hasUnsafePrintfStringArg) {
   if (isa<StringLiteral>(FirstArg->IgnoreParenImpCasts())) {
     // It is a printf/kprintf. And, the format is a string literal:
     bool isKprintf = false;
+    const Expr *UnsafeArg;
+
     if (auto *Callee = Node.getDirectCallee())
       if (auto *II = Node.getDirectCallee()->getIdentifier())
         isKprintf = II->getName() == "kprintf";
-    return hasUnsafeFormatOrSArg(&Node, 0, Ctx, isKprintf);
+    if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 0, Ctx, isKprintf))
+      return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder);
+    return false;
   }
 
   QualType PtrTy = FirstArg->getType();
@@ -772,20 +791,31 @@ AST_MATCHER(CallExpr, hasUnsafePrintfStringArg) {
                                      there can't be any file pointer then */
       && PteTy.getCanonicalType() == Ctx.getFILEType().getCanonicalType()) {
     // It is a fprintf:
-    return hasUnsafeFormatOrSArg(&Node, 1, Ctx, false);
+    const Expr *UnsafeArg;
+
+    if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 1, Ctx, false))
+      return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder);
+    return false;
   }
 
   const Expr *SecondArg = Node.getArg(1);
 
   if (SecondArg->getType()->isIntegerType()) {
     // It is a snprintf:
-    return hasUnsafeFormatOrSArg(&Node, 2, Ctx, false);
+    const Expr *UnsafeArg;
+
+    if (unsigned UnsafeArgIdx =
+            hasUnsafeFormatOrSArg(&Node, UnsafeArg, 2, Ctx, false))
+      return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder);
+    return false;
   }
   // It is printf but the format string is passed by pointer. The only thing we
   // can do is to require all pointers to be null-terminated:
-  return llvm::any_of(Node.arguments(), [](const Expr *Arg) -> bool {
-    return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
-  });
+  for (auto Arg : Node.arguments())
+    if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg))
+      if (UnsafeStringArgMatcher.matches(*Arg, Finder, Builder))
+        return true;
+  return false;
 }
 
 // This matcher requires that it is known that the callee `isNormalPrintf`.
@@ -1423,6 +1453,7 @@ class DataInvocationGadget : public WarningGadget {
 
 class UnsafeLibcFunctionCallGadget : public WarningGadget {
   const CallExpr *const Call;
+  const Expr *UnsafeArg = nullptr;
   constexpr static const char *const Tag = "UnsafeLibcFunctionCall";
   // Extra tags for additional information:
   constexpr static const char *const UnsafeSprintfTag =
@@ -1437,8 +1468,8 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
   enum UnsafeKind {
     OTHERS = 0,   // no specific information, the callee function is unsafe
     SPRINTF = 1,  // never call `-sprintf`s, call `-snprintf`s instead.
-    SIZED_BY = 2, // a pair of function arguments have "__sized_by" relation but
-                  // they do not conform to safe patterns
+    SIZED_BY = 2, // the first two arguments of `snprintf` function have
+                  // "__sized_by" relation but they do not conform to safe patterns
     STRING = 3,   // an argument is a pointer-to-char-as-string but does not
                   // guarantee null-termination
     VA_LIST = 4,  // one of the `-printf`s function that take va_list, which is
@@ -1449,49 +1480,52 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
   UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result)
       : WarningGadget(Kind::UnsafeLibcFunctionCall),
         Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {
-    if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSprintfTag))
+    if (Result.Nodes.getNodeAs<Decl>(UnsafeSprintfTag))
       WarnedFunKind = SPRINTF;
-    else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeStringTag))
+    else if (auto *E = Result.Nodes.getNodeAs<Expr>(UnsafeStringTag)) {
       WarnedFunKind = STRING;
-    else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSizedByTag))
+      UnsafeArg = E;
+    } else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSizedByTag)) {
       WarnedFunKind = SIZED_BY;
-    else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeVaListTag))
+      UnsafeArg = Call->getArg(0);
+    } else if (Result.Nodes.getNodeAs<Decl>(UnsafeVaListTag))
       WarnedFunKind = VA_LIST;
   }
 
   static Matcher matcher() {
-    return stmt(
-        stmt(
-            anyOf(
-                // Match a call to a predefined unsafe libc function (unless the
-                // call has a sole string literal argument):
-                callExpr(callee(functionDecl(
-                             libc_func_matchers::isPredefinedUnsafeLibcFunc())),
-                         unless(allOf(hasArgument(0, expr(stringLiteral())),
-                                      hasNumArgs(1)))),
-                // Match a call to one of the `v*printf` functions taking
-                // va-list, which cannot be checked at compile-time:
-                callExpr(callee(functionDecl(
-                             libc_func_matchers::isUnsafeVaListPrintfFunc())))
+    return stmt(anyOf(
+        callExpr(
+            callee(functionDecl(anyOf(
+                // Match a predefined unsafe libc
+                // function:
+                functionDecl(libc_func_matchers::isPredefinedUnsafeLibcFunc()),
+                // Match a call to one of the `v*printf` functions
+                // taking va-list, which cannot be checked at
+                // compile-time:
+                functionDecl(libc_func_matchers::isUnsafeVaListPrintfFunc())
                     .bind(UnsafeVaListTag),
-                // Match a call to a `sprintf` function, which is never safe:
-                callExpr(callee(functionDecl(
-                             libc_func_matchers::isUnsafeSprintfFunc())))
-                    .bind(UnsafeSprintfTag),
-                // Match a call to an `snprintf` function. And first two
-                // arguments of the call (that describe a buffer) are not in
-                // safe patterns:
-                callExpr(callee(functionDecl(
-                             libc_func_matchers::isNormalPrintfFunc())),
-                         libc_func_matchers::hasUnsafeSnprintfBuffer())
-                    .bind(UnsafeSizedByTag),
-                // Match a call to a `printf` function, which can be safe if all
-                // arguments are null-terminated:
-                callExpr(callee(functionDecl(
-                             libc_func_matchers::isNormalPrintfFunc())),
-                         libc_func_matchers::hasUnsafePrintfStringArg())
-                    .bind(UnsafeStringTag)))
-            .bind(Tag));
+                // Match a call to a `sprintf` function, which is never
+                // safe:
+                functionDecl(libc_func_matchers::isUnsafeSprintfFunc())
+                    .bind(UnsafeSprintfTag)))),
+            //  (unless the call has a sole string literal argument):
+            unless(
+                allOf(hasArgument(0, expr(stringLiteral())), hasNumArgs(1)))),
+
+        // The following two cases require checking against actual
+        // arguments of the call:
+
+        // Match a call to an `snprintf` function. And first two
+        // arguments of the call (that describe a buffer) are not in
+        // safe patterns:
+        callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())),
+                 libc_func_matchers::hasUnsafeSnprintfBuffer())
+            .bind(UnsafeSizedByTag),
+        // Match a call to a `printf` function, which can be safe if
+        // all arguments are null-terminated:
+        callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())),
+                 libc_func_matchers::hasUnsafePrintfStringArg(
+                     expr().bind(UnsafeStringTag)))));
   }
 
   const Stmt *getBaseStmt() const { return Call; }
@@ -1501,7 +1535,7 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
   void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
                              bool IsRelatedToDecl,
                              ASTContext &Ctx) const override {
-    Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx);
+    Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx, UnsafeArg);
   }
 
   DeclUseList getClaimedVarUseSites() const override { return {}; }
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index b0f3b4ed61acc4..598f095f541110 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2293,13 +2293,17 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
   }
 
   void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
-                            ASTContext &Ctx) override {
+                            ASTContext &Ctx,
+                            const Expr *UnsafeArg = nullptr) override {
     S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call)
         << Call->getDirectCallee() // We've checked there is a direct callee
         << Call->getSourceRange();
-    if (PrintfInfo > 0)
-      S.Diag(Call->getBeginLoc(), diag::note_unsafe_buffer_printf_call)
-          << PrintfInfo << Call->getSourceRange();
+    if (PrintfInfo > 0) {
+      SourceRange R =
+          UnsafeArg ? UnsafeArg->getSourceRange() : Call->getSourceRange();
+      S.Diag(R.getBegin(), diag::note_unsafe_buffer_printf_call)
+          << PrintfInfo << R;
+    }
   }
 
   void handleUnsafeOperationInContainer(const Stmt *Operation,
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index 0eeedff611b588..c116289ee96a69 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -64,18 +64,23 @@ void f(char * p, char * q, std::span<char> s, std::span<char> s2) {
 
 
   /* Test printfs */
-  fprintf((FILE*)p, "%s%d", p, *p);  // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
-  printf("%s%d", p, *p);  // expected-warning{{function 'printf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+  fprintf((FILE*)p, "%s%d", p, *p);  // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or a string literal as string pointer to guarantee null-termination}}
+  printf("%s%d", // expected-warning{{function 'printf' introduces unsafe buffer access}}
+	 p,    // expected-note{{use 'std::string::c_str' or a string literal as string pointer to guarantee null-termination}} note attached to the unsafe argument
+	 *p);
   sprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'sprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}}
   swprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'swprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}}
   snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
   snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
   snwprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
-  snwprintf_s(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf_s' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}}
-  vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function 'vsnprintf' introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}}
+  snwprintf_s(                      // expected-warning{{function 'snwprintf_s' introduces unsafe buffer access}}
+	      s.data(),             // expected-note{{buffer pointer and size may not match}} // note attached to the buffer
+	      s2.size(),
+	      "%s%d", "hello", *p);
+  vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function 'vsnprintf' introduces unsafe buffer access}} expected-note{{do not use va_list, which cannot be checked at compile-time for bounds safety}}
   sscanf(p, "%s%d", "hello", *p);    // expected-warning{{function 'sscanf' introduces unsafe buffer access}}
   sscanf_s(p, "%s%d", "hello", *p);  // expected-warning{{function 'sscanf_s' introduces unsafe buffer access}}
-  fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
+  fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or a string literal as string pointer to guarantee null-termination}}
   fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn
   printf("%s%d", "hello", *p); // no warn
   snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn

>From f85a487c4cbb57678081fd95bfe5e2bf691f7226 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Wed, 28 Aug 2024 17:19:40 -0700
Subject: [PATCH 5/5] clean up include list

---
 clang/lib/Analysis/UnsafeBufferUsage.cpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 7bcc3c05dee98c..8e62ae4bbda6f1 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,25 +9,19 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
-#include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
-#include "clang/AST/ExprCXX.h"
 #include "clang/AST/FormatString.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
-#include "clang/AST/Type.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
 #include <memory>
 #include <optional>



More information about the cfe-commits mailing list