[clang] [-Wunsafe-buffer-usage] Support safe patterns of "%.*s" in printf functions (PR #145862)

via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 26 02:53:50 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-analysis

Author: Ziqing Luo (ziqingluo-90)

<details>
<summary>Changes</summary>

The character buffer passed to a "%.*s" specifier may be safely bound if
the precision is properly specified, even if the buffer does not guarantee null-termination.
For example,
```
void f(std::span<char> span) {
  printf("%.*s", (int)span.size(), span.data());  // "span.data()" does not guarantee null-termination but is safely bound by "span.size()", so this call is safe
}
```
rdar://154072130

---
Full diff: https://github.com/llvm/llvm-project/pull/145862.diff


2 Files Affected:

- (modified) clang/lib/Analysis/UnsafeBufferUsage.cpp (+176-116) 
- (modified) clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp (+39) 


``````````diff
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 6048169b56640..aa2d0e3172b6d 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -25,6 +25,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/SmallSet.h"
@@ -453,22 +454,108 @@ static bool areEqualIntegers(const Expr *E1, const Expr *E2, ASTContext &Ctx) {
   }
 }
 
+// Providing that `Ptr` is a pointer and `Size` is an unsigned-integral
+// expression, returns true iff they follow one of the following safe
+// patterns:
+//  1. Ptr is `DRE.data()` and Size is `DRE.size()`, where DRE is a hardened
+//     container or view;
+//
+//  2. Ptr is `a` and Size is `n`, where `a` is of an array-of-T with constant
+//     size `n`;
+//
+//  3. Ptr is `&var` and Size is `1`; or
+//     Ptr is `std::addressof(...)` and Size is `1`;
+//
+//  4. Size is `0`;
+static bool isPtrBufferSafe(const Expr *Ptr, const Expr *Size,
+                            ASTContext &Ctx) {
+  // Pattern 1:
+  if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts()))
+    if (auto *MCESize =
+            dyn_cast<CXXMemberCallExpr>(Size->IgnoreParenImpCasts())) {
+      auto *DREOfPtr = dyn_cast<DeclRefExpr>(
+          MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts());
+      auto *DREOfSize = dyn_cast<DeclRefExpr>(
+          MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts());
+
+      if (!DREOfPtr || !DREOfSize)
+        return false; // not in safe pattern
+      if (DREOfPtr->getDecl() != DREOfSize->getDecl())
+        return false;
+      if (MCEPtr->getMethodDecl()->getName() != "data")
+        return false;
+      // `MCEPtr->getRecordDecl()` must be non-null as `DREOfPtr` is non-null:
+      if (!MCEPtr->getRecordDecl()->isInStdNamespace())
+        return false;
+
+      auto *ObjII = MCEPtr->getRecordDecl()->getIdentifier();
+
+      if (!ObjII)
+        return false;
+
+      bool AcceptSizeBytes = Ptr->getType()->getPointeeType()->isCharType();
+
+      if (!((AcceptSizeBytes &&
+             MCESize->getMethodDecl()->getName() == "size_bytes") ||
+            // Note here the pointer must be a pointer-to-char type unless there
+            // is explicit casting.  If there is explicit casting, this branch
+            // is unreachable. Thus, at this branch "size" and "size_bytes" are
+            // equivalent as the pointer is a char pointer:
+            MCESize->getMethodDecl()->getName() == "size"))
+        return false;
+
+      return llvm::is_contained({SIZED_CONTAINER_OR_VIEW_LIST},
+                                ObjII->getName());
+    }
+
+  Expr::EvalResult ER;
+
+  // Pattern 2-4:
+  if (Size->EvaluateAsInt(ER, Ctx)) {
+    // Pattern 2:
+    if (auto *DRE = dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) {
+      if (auto *CAT = Ctx.getAsConstantArrayType(DRE->getType())) {
+        llvm::APSInt SizeInt = ER.Val.getInt();
+
+        return llvm::APSInt::compareValues(
+                   SizeInt, llvm::APSInt(CAT->getSize(), true)) == 0;
+      }
+      return false;
+    }
+
+    // Pattern 3:
+    if (ER.Val.getInt().isOne()) {
+      if (auto *UO = dyn_cast<UnaryOperator>(Ptr->IgnoreParenImpCasts()))
+        return UO && UO->getOpcode() == UnaryOperator::Opcode::UO_AddrOf;
+      if (auto *CE = dyn_cast<CallExpr>(Ptr->IgnoreParenImpCasts())) {
+        auto *FnDecl = CE->getDirectCallee();
+
+        return FnDecl && FnDecl->getNameAsString() == "addressof" &&
+               FnDecl->isInStdNamespace();
+      }
+      return false;
+    }
+    // Pattern 4:
+    if (ER.Val.getInt().isZero())
+      return true;
+  }
+  return false;
+}
+
 // Given a two-param std::span construct call, matches iff the call has the
 // following forms:
 //   1. `std::span<T>{new T[n], n}`, where `n` is a literal or a DRE
 //   2. `std::span<T>{new T, 1}`
-//   3. `std::span<T>{&var, 1}` or `std::span<T>{std::addressof(...), 1}`
-//   4. `std::span<T>{a, n}`, where `a` is of an array-of-T with constant size
-//   `n`
-//   5. `std::span<T>{any, 0}`
-//   6. `std::span<T>{ (char *)f(args), args[N] * arg*[M]}`, where
+//   3. `std::span<T>{ (char *)f(args), args[N] * arg*[M]}`, where
 //       `f` is a function with attribute `alloc_size(N, M)`;
 //       `args` represents the list of arguments;
 //       `N, M` are parameter indexes to the allocating element number and size.
 //        Sometimes, there is only one parameter index representing the total
 //        size.
-//   7. `std::span<T>{x.begin(), x.end()}` where `x` is an object in the
+//   4. `std::span<T>{x.begin(), x.end()}` where `x` is an object in the
 //      SIZED_CONTAINER_OR_VIEW_LIST.
+//   5. `isPtrBufferSafe` returns true of the two arguments of the span
+//      constructor
 static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node,
                                         ASTContext &Ctx) {
   assert(Node.getNumArgs() == 2 &&
@@ -495,7 +582,7 @@ static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node,
     // Check form 5:
     return true;
 
-  // Check forms 1-3:
+  // Check forms 1-2:
   switch (Arg0->getStmtClass()) {
   case Stmt::CXXNewExprClass:
     if (auto Size = cast<CXXNewExpr>(Arg0)->getArraySize()) {
@@ -509,35 +596,11 @@ static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node,
       return Arg1CV && Arg1CV->isOne();
     }
     break;
-  case Stmt::UnaryOperatorClass:
-    if (cast<UnaryOperator>(Arg0)->getOpcode() ==
-        UnaryOperator::Opcode::UO_AddrOf)
-      // Check form 3:
-      return Arg1CV && Arg1CV->isOne();
-    break;
-  case Stmt::CallExprClass:
-    // Check form 3:
-    if (const auto *CE = dyn_cast<CallExpr>(Arg0)) {
-      const auto FnDecl = CE->getDirectCallee();
-      if (FnDecl && FnDecl->getNameAsString() == "addressof" &&
-          FnDecl->isInStdNamespace()) {
-        return Arg1CV && Arg1CV->isOne();
-      }
-    }
-    break;
   default:
     break;
   }
 
-  QualType Arg0Ty = Arg0->IgnoreImplicit()->getType();
-
-  if (auto *ConstArrTy = Ctx.getAsConstantArrayType(Arg0Ty)) {
-    const llvm::APSInt ConstArrSize = llvm::APSInt(ConstArrTy->getSize());
-
-    // Check form 4:
-    return Arg1CV && llvm::APSInt::compareValues(ConstArrSize, *Arg1CV) == 0;
-  }
-  // Check form 6:
+  // Check form 3:
   if (auto CCast = dyn_cast<CStyleCastExpr>(Arg0)) {
     if (!CCast->getType()->isPointerType())
       return false;
@@ -566,7 +629,7 @@ static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node,
         }
     }
   }
-  // Check form 7:
+  // Check form 4:
   auto IsMethodCallToSizedObject = [](const Stmt *Node, StringRef MethodName) {
     if (const auto *MC = dyn_cast<CXXMemberCallExpr>(Node)) {
       const auto *MD = MC->getMethodDecl();
@@ -592,7 +655,9 @@ static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node,
         cast<CXXMemberCallExpr>(Arg1)
             ->getImplicitObjectArgument()
             ->IgnoreParenImpCasts());
-  return false;
+
+  // Check 5:
+  return isPtrBufferSafe(Arg0, Arg1, Ctx);
 }
 
 static bool isSafeArraySubscript(const ArraySubscriptExpr &Node,
@@ -743,28 +808,81 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
     const CallExpr *Call;
     unsigned FmtArgIdx;
     const Expr *&UnsafeArg;
+    ASTContext &Ctx;
+
+    // Returns an `Expr` representing the precision if specified, null
+    // otherwise:
+    const Expr *
+    getPrecisionAsExpr(const analyze_printf::OptionalAmount &Precision,
+                       const CallExpr *Call) {
+      unsigned PArgIdx = -1;
+
+      if (Precision.hasDataArgument())
+        PArgIdx = Precision.getPositionalArgIndex() + FmtArgIdx;
+      if (0 < PArgIdx && PArgIdx < Call->getNumArgs()) {
+        const Expr *PArg = Call->getArg(PArgIdx);
+
+        // Strip the cast if `PArg` is a cast-to-int expression:
+        if (auto *CE = dyn_cast<CastExpr>(PArg);
+            CE && CE->getType()->isSignedIntegerType())
+          PArg = CE->getSubExpr();
+        return PArg;
+      }
+      if (Precision.getHowSpecified() ==
+          analyze_printf::OptionalAmount::HowSpecified::Constant) {
+        auto SizeTy = Ctx.getSizeType();
+        llvm::APSInt PArgVal = llvm::APSInt(
+            llvm::APInt(Ctx.getTypeSize(SizeTy), Precision.getConstantAmount()),
+            true);
+
+        return IntegerLiteral::Create(Ctx, PArgVal, Ctx.getSizeType(), {});
+      }
+      return nullptr;
+    }
 
   public:
     StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx,
-                              const Expr *&UnsafeArg)
-        : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {}
+                              const Expr *&UnsafeArg, ASTContext &Ctx)
+        : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg), Ctx(Ctx) {}
 
     bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
                                const char *startSpecifier,
                                unsigned specifierLen,
                                const TargetInfo &Target) override {
-      if (FS.getConversionSpecifier().getKind() ==
-          analyze_printf::PrintfConversionSpecifier::sArg) {
-        unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
-
-        if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
-          if (!isNullTermPointer(Call->getArg(ArgIdx))) {
-            UnsafeArg = Call->getArg(ArgIdx); // output
-            // returning false stops parsing immediately
-            return false;
-          }
-      }
-      return true; // continue parsing
+      if (FS.getConversionSpecifier().getKind() !=
+          analyze_printf::PrintfConversionSpecifier::sArg)
+        return true; // continue parsing
+
+      unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
+
+      if (!(0 < ArgIdx && ArgIdx < Call->getNumArgs()))
+        // If the `ArgIdx` is invalid, give up.
+        return true; // continue parsing
+
+      const Expr *Arg = Call->getArg(ArgIdx);
+
+      if (isNullTermPointer(Arg))
+        // If Arg is a null-terminated pointer, it is safe anyway.
+        return true; // continue parsing
+
+      // Otherwise, check if the specifier has a precision and if the character
+      // pointer is safely bound by the precision:
+      auto LengthModifier = FS.getLengthModifier();
+      QualType ArgType = Arg->getType();
+      bool IsArgTypeValid = // Is ArgType a character pointer type?
+          ArgType->isPointerType() &&
+          (LengthModifier.getKind() == LengthModifier.AsWideChar
+               ? ArgType->getPointeeType()->isWideCharType()
+               : ArgType->getPointeeType()->isCharType());
+
+      ArgType.dump();
+      if (auto *Precision = getPrecisionAsExpr(FS.getPrecision(), Call);
+          Precision && IsArgTypeValid)
+        if (isPtrBufferSafe(Arg, Precision, Ctx))
+          return true;
+      // Handle unsafe case:
+      UnsafeArg = Call->getArg(ArgIdx); // output
+      return false; // returning false stops parsing immediately
     }
   };
 
@@ -780,7 +898,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
     else
       goto CHECK_UNSAFE_PTR;
 
-    StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg);
+    StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx);
 
     return analyze_format_string::ParsePrintfString(
         Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
@@ -1052,24 +1170,11 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx,
   return false;
 }
 
-// This matcher requires that it is known that the callee `isNormalPrintf`.
-// Then it matches if the first two arguments of the call is a pointer and an
-// integer and they are not in a safe pattern.
-//
-// For the first two arguments: `ptr` and `size`, they are safe if in the
-// following patterns:
-//
-// Pattern 1:
-//    ptr := DRE.data();
-//    size:= DRE.size()/DRE.size_bytes()
-// And DRE is a hardened container or view.
-//
-// Pattern 2:
-//    ptr := Constant-Array-DRE;
-//    size:= any expression that has compile-time constant value equivalent to
-//           sizeof (Constant-Array-DRE)
-static bool hasUnsafeSnprintfBuffer(const CallExpr &Node,
-                                    const ASTContext &Ctx) {
+// This function requires that it is known that the callee `isNormalPrintf`.
+// It returns true iff the first two arguments of the call is a pointer
+// `Ptr` and an unsigned integer `Size` and they are NOT safe, i.e.,
+// `!isPtrBufferSafe(Ptr, Size)`.
+static bool hasUnsafeSnprintfBuffer(const CallExpr &Node, ASTContext &Ctx) {
   const FunctionDecl *FD = Node.getDirectCallee();
 
   assert(FD && "It should have been checked that FD is non-null.");
@@ -1085,57 +1190,12 @@ static bool hasUnsafeSnprintfBuffer(const CallExpr &Node,
   QualType FirstPteTy = FirstParmTy->castAs<PointerType>()->getPointeeType();
   const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1);
 
-  if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() ||
-      !Size->getType()->isIntegerType())
+  if (FirstPteTy.isConstQualified() || !FirstPteTy->isAnyCharacterType() ||
+      !Buf->getType()->isPointerType() ||
+      !Size->getType()->isUnsignedIntegerType())
     return false; // not an snprintf call
 
-  // Pattern 1:
-  static StringRef SizedObjs[] = {SIZED_CONTAINER_OR_VIEW_LIST};
-  Buf = Buf->IgnoreParenImpCasts();
-  Size = Size->IgnoreParenImpCasts();
-  if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Buf))
-    if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) {
-      auto *DREOfPtr = dyn_cast<DeclRefExpr>(
-          MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts());
-      auto *DREOfSize = dyn_cast<DeclRefExpr>(
-          MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts());
-
-      if (!DREOfPtr || !DREOfSize)
-        return true; // not in safe pattern
-      if (DREOfPtr->getDecl() != DREOfSize->getDecl())
-        return true; // not in safe pattern
-      if (MCEPtr->getMethodDecl()->getName() != "data")
-        return true; // not in safe pattern
-
-      if (MCESize->getMethodDecl()->getName() == "size_bytes" ||
-          // Note here the pointer must be a pointer-to-char type unless there
-          // is explicit casting.  If there is explicit casting, this branch
-          // is unreachable. Thus, at this branch "size" and "size_bytes" are
-          // equivalent as the pointer is a char pointer:
-          MCESize->getMethodDecl()->getName() == "size")
-        for (StringRef SizedObj : SizedObjs)
-          if (MCEPtr->getRecordDecl()->isInStdNamespace() &&
-              MCEPtr->getRecordDecl()->getCanonicalDecl()->getName() ==
-                  SizedObj)
-            return false; // It is in fact safe
-    }
-
-  // Pattern 2:
-  if (auto *DRE = dyn_cast<DeclRefExpr>(Buf->IgnoreParenImpCasts())) {
-    if (auto *CAT = Ctx.getAsConstantArrayType(DRE->getType())) {
-      Expr::EvalResult ER;
-      // The array element type must be compatible with `char` otherwise an
-      // explicit cast will be needed, which will make this check unreachable.
-      // Therefore, the array extent is same as its' bytewise size.
-      if (Size->EvaluateAsInt(ER, Ctx)) {
-        llvm::APSInt EVal = ER.Val.getInt(); // Size must have integer type
-
-        return llvm::APSInt::compareValues(
-                   EVal, llvm::APSInt(CAT->getSize(), true)) != 0;
-      }
-    }
-  }
-  return true; // ptr and size are not in safe pattern
+  return !isPtrBufferSafe(Buf, Size, Ctx);
 }
 } // namespace libc_func_matchers
 
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index a7c19bcac1607..6ae329a736d91 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -47,11 +47,24 @@ namespace std {
     T *c_str();
     T *data();
     unsigned size_bytes();
+    unsigned size();
   };
 
   typedef basic_string<char> string;
   typedef basic_string<wchar_t> wstring;
 
+  template<typename T>
+  struct basic_string_view {
+    T *c_str() const noexcept;
+    T *data()  const noexcept;
+    unsigned size();
+    const T* begin() const noexcept;
+    const T* end() const noexcept;
+  };
+
+  typedef basic_string_view<char> string_view;
+  typedef basic_string_view<wchar_t> wstring_view;
+
   // C function under std:
   void memcpy();
   void strcpy();
@@ -125,10 +138,36 @@ void safe_examples(std::string s1, int *p) {
   fprintf((FILE*)0, s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str());        // no warn
 
   char a[10];
+  char c = 'c';
 
   snprintf(a, sizeof a, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str());         // no warn
   snprintf(a, sizeof(decltype(a)), "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str());          // no warn
   snprintf(a, 10, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str());                // no warn
+  snprintf(&c, 1, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str());                // no warn
+  snprintf(nullptr, 0, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str());           // no warn
+}
+
+void test_sarg_precision(std::string Str, std::string_view Sv, std::wstring_view WSv,
+			 std::span<char> SpC, std::span<int> SpI) {
+  printf("%.*s");
+  printf("%.*s", (int)Str.size(), Str.data());
+  printf("%.*s", (int)Str.size_bytes(), Str.data());
+  printf("%.*s", (int)Sv.size(), Sv.data());
+  printf("%.*s", (int)SpC.size(), SpC.data());
+  printf("%.*s", SpC.size(), SpC.data());
+  printf("%.*ls", WSv.size(), WSv.data());
+  printf("%.*s", SpC.data());  // no warn because `SpC.data()` is passed to the precision while the actually string pointer is not given
+
+  printf("%.*s", SpI.size(), SpI.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+  printf("%.*s", SpI.size(), SpC.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+  printf("%.*s", WSv.size(), WSv.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+
+  char a[10];
+  int  b[10];
+
+  printf("%.10s", a);
+  printf("%.11s", a); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+  printf("%.10s", b); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
 }
 
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/145862


More information about the cfe-commits mailing list