[clang] [-Wunsafe-buffer-usage] Support safe patterns of "%.*s" in printf functions (PR #145862)
Ziqing Luo via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 17 02:59:17 PDT 2025
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/145862
>From 28cda3e7ea3f14305342d3ea663646df164ff044 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Tue, 24 Jun 2025 16:45:30 +0800
Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Support safe patterns of "%.*s"
in printf functions
The character buffer passed to a "%s" specifier may be safely bound if
the precision is specified, even if the buffer is not null-terminated.
For example,
```
void f(std::span<char> span) {
printf("%.*s", (int)span.size(), span.data()); // `span.data()` may not be null-terminated but is safely bound by `span.size()`,
// so this call is safe
}
```
rdar://154072130
---
clang/lib/Analysis/UnsafeBufferUsage.cpp | 84 +++++++++++++++----
...arn-unsafe-buffer-usage-libc-functions.cpp | 36 ++++++++
2 files changed, 105 insertions(+), 15 deletions(-)
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index ac47b12cc8d72..580ae583bc642 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -25,6 +25,7 @@
#include "clang/Basic/SourceLocation.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -809,28 +810,81 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
const CallExpr *Call;
unsigned FmtArgIdx;
const Expr *&UnsafeArg;
+ ASTContext &Ctx;
+
+ // Returns an `Expr` representing the precision if specified, null
+ // otherwise:
+ const Expr *
+ getPrecisionAsExpr(const analyze_printf::OptionalAmount &Precision,
+ const CallExpr *Call) {
+ unsigned PArgIdx = -1;
+
+ if (Precision.hasDataArgument())
+ PArgIdx = Precision.getPositionalArgIndex() + FmtArgIdx;
+ if (0 < PArgIdx && PArgIdx < Call->getNumArgs()) {
+ const Expr *PArg = Call->getArg(PArgIdx);
+
+ // Strip the cast if `PArg` is a cast-to-int expression:
+ if (auto *CE = dyn_cast<CastExpr>(PArg);
+ CE && CE->getType()->isSignedIntegerType())
+ PArg = CE->getSubExpr();
+ return PArg;
+ }
+ if (Precision.getHowSpecified() ==
+ analyze_printf::OptionalAmount::HowSpecified::Constant) {
+ auto SizeTy = Ctx.getSizeType();
+ llvm::APSInt PArgVal = llvm::APSInt(
+ llvm::APInt(Ctx.getTypeSize(SizeTy), Precision.getConstantAmount()),
+ true);
+
+ return IntegerLiteral::Create(Ctx, PArgVal, Ctx.getSizeType(), {});
+ }
+ return nullptr;
+ }
public:
StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx,
- const Expr *&UnsafeArg)
- : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {}
+ const Expr *&UnsafeArg, ASTContext &Ctx)
+ : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg), Ctx(Ctx) {}
bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
const char *startSpecifier,
unsigned specifierLen,
const TargetInfo &Target) override {
- if (FS.getConversionSpecifier().getKind() ==
- analyze_printf::PrintfConversionSpecifier::sArg) {
- unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
-
- if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
- if (!isNullTermPointer(Call->getArg(ArgIdx))) {
- UnsafeArg = Call->getArg(ArgIdx); // output
- // returning false stops parsing immediately
- return false;
- }
- }
- return true; // continue parsing
+ if (FS.getConversionSpecifier().getKind() !=
+ analyze_printf::PrintfConversionSpecifier::sArg)
+ return true; // continue parsing
+
+ unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
+
+ if (!(0 < ArgIdx && ArgIdx < Call->getNumArgs()))
+ // If the `ArgIdx` is invalid, give up.
+ return true; // continue parsing
+
+ const Expr *Arg = Call->getArg(ArgIdx);
+
+ if (isNullTermPointer(Arg))
+ // If Arg is a null-terminated pointer, it is safe anyway.
+ return true; // continue parsing
+
+ // Otherwise, check if the specifier has a precision and if the character
+ // pointer is safely bound by the precision:
+ auto LengthModifier = FS.getLengthModifier();
+ QualType ArgType = Arg->getType();
+ bool IsArgTypeValid = // Is ArgType a character pointer type?
+ ArgType->isPointerType() &&
+ (LengthModifier.getKind() == LengthModifier.AsWideChar
+ ? ArgType->getPointeeType()->isWideCharType()
+ : ArgType->getPointeeType()->isCharType());
+
+ ArgType.dump();
+ if (auto *Precision = getPrecisionAsExpr(FS.getPrecision(), Call);
+ Precision && IsArgTypeValid)
+ if (isPtrBufferSafe(Arg, Precision, Ctx))
+ return true;
+ // Handle unsafe case:
+ UnsafeArg = Call->getArg(ArgIdx); // output
+ return false; // returning false stops parsing immediately
}
};
@@ -846,7 +900,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
else
goto CHECK_UNSAFE_PTR;
- StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg);
+ StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx);
return analyze_format_string::ParsePrintfString(
Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index d6c32ca7baa5d..6ae329a736d91 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -47,11 +47,24 @@ namespace std {
T *c_str();
T *data();
unsigned size_bytes();
+ unsigned size();
};
typedef basic_string<char> string;
typedef basic_string<wchar_t> wstring;
+ template<typename T>
+ struct basic_string_view {
+ T *c_str() const noexcept;
+ T *data() const noexcept;
+ unsigned size();
+ const T* begin() const noexcept;
+ const T* end() const noexcept;
+ };
+
+ typedef basic_string_view<char> string_view;
+ typedef basic_string_view<wchar_t> wstring_view;
+
// C function under std:
void memcpy();
void strcpy();
@@ -134,6 +147,29 @@ void safe_examples(std::string s1, int *p) {
snprintf(nullptr, 0, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn
}
+void test_sarg_precision(std::string Str, std::string_view Sv, std::wstring_view WSv,
+ std::span<char> SpC, std::span<int> SpI) {
+ printf("%.*s");
+ printf("%.*s", (int)Str.size(), Str.data());
+ printf("%.*s", (int)Str.size_bytes(), Str.data());
+ printf("%.*s", (int)Sv.size(), Sv.data());
+ printf("%.*s", (int)SpC.size(), SpC.data());
+ printf("%.*s", SpC.size(), SpC.data());
+ printf("%.*ls", WSv.size(), WSv.data());
+ printf("%.*s", SpC.data()); // no warn because `SpC.data()` is passed to the precision while the actually string pointer is not given
+
+ printf("%.*s", SpI.size(), SpI.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+ printf("%.*s", SpI.size(), SpC.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+ printf("%.*s", WSv.size(), WSv.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+
+ char a[10];
+ int b[10];
+
+ printf("%.10s", a);
+ printf("%.11s", a); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+ printf("%.10s", b); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+}
+
void g(char *begin, char *end, char *p, std::span<char> s) {
std::copy(begin, end, p); // no warn
>From 470e9d95bd06f5a9f55247deead9206e79f1af8d Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Thu, 17 Jul 2025 17:50:59 +0800
Subject: [PATCH 2/2] Address comments
---
clang/lib/Analysis/UnsafeBufferUsage.cpp | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 580ae583bc642..40dff7eaf06bc 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -813,7 +813,13 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
ASTContext &Ctx;
// Returns an `Expr` representing the precision if specified, null
- // otherwise:
+ // otherwise.
+ // The parameter `Call` is a printf call and the parameter `Precision` is
+ // the precision of a format specifier of the `Call`.
+ //
+ // For example, for the `printf("%d, %.10s", 10, p)` call
+ // `Precision` can be the precision of either "%d" or "%.10s". The former
+ // one will have `NotSpecified` kind.
const Expr *
getPrecisionAsExpr(const analyze_printf::OptionalAmount &Precision,
const CallExpr *Call) {
@@ -877,7 +883,6 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
? ArgType->getPointeeType()->isWideCharType()
: ArgType->getPointeeType()->isCharType());
- ArgType.dump();
if (auto *Precision = getPrecisionAsExpr(FS.getPrecision(), Call);
Precision && IsArgTypeValid)
if (isPtrBufferSafe(Arg, Precision, Ctx))
More information about the cfe-commits
mailing list