[clang] Warn on unbounded scanf and invalid fopen modes (PR #190453)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Apr 4 02:50:48 PDT 2026
https://github.com/Pppp1116 created https://github.com/llvm/llvm-project/pull/190453
Adds a small `-Wformat-libc` subgroup under `-Wformat` for two libc-specific checks:
- warn on `%s` / `%[` without a field width when the destination is a fixed-size character array
- warn on invalid constant `fopen` mode strings
Validation:
- fresh build of `tools/clang/lib/Basic/CMakeFiles/obj.clangBasic.dir/DiagnosticIDs.cpp.o`
- fresh build of `tools/clang/lib/Sema/CMakeFiles/obj.clangSema.dir/SemaChecking.cpp.o`
>From d683878c5dacfc3b3d62040d32b970d9fc5327c8 Mon Sep 17 00:00:00 2001
From: Pppp1116 <pcaiadoguerreiro at gmail.com>
Date: Sat, 4 Apr 2026 10:50:07 +0100
Subject: [PATCH] Warn on unbounded scanf and invalid fopen modes
---
clang/include/clang/Basic/DiagnosticGroups.td | 4 +-
.../clang/Basic/DiagnosticSemaKinds.td | 7 +
clang/lib/Sema/SemaChecking.cpp | 202 +++++++++++++++++-
clang/test/Sema/warn-format-libc.c | 47 ++++
4 files changed, 258 insertions(+), 2 deletions(-)
create mode 100644 clang/test/Sema/warn-format-libc.c
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index dc7280c66ea79..f458e8d8111e8 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1316,6 +1316,7 @@ def FormatY2K : DiagGroup<"format-y2k">;
def FormatPedantic : DiagGroup<"format-pedantic">;
def FormatSignedness : DiagGroup<"format-signedness">;
def FormatTypeConfusion : DiagGroup<"format-type-confusion">;
+def FormatLibc : DiagGroup<"format-libc">;
def MissingFormatAttribute : DiagGroup<"missing-format-attribute">;
def FormatOverflowNonKprintf: DiagGroup<"format-overflow-non-kprintf">;
@@ -1326,7 +1327,8 @@ def FormatTruncation: DiagGroup<"format-truncation", [FormatTruncationNonKprintf
def Format : DiagGroup<"format",
[FormatExtraArgs, FormatZeroLength, NonNull,
FormatSecurity, FormatY2K, FormatInvalidSpecifier,
- FormatInsufficientArgs, FormatOverflow, FormatTruncation]>,
+ FormatInsufficientArgs, FormatOverflow,
+ FormatTruncation, FormatLibc]>,
DiagCategory<"Format String Issue">;
def FormatNonLiteral : DiagGroup<"format-nonliteral", [MissingFormatAttribute]>;
def Format2 : DiagGroup<"format=2",
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index eddf9c50033e1..3c30bb8628713 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10688,6 +10688,10 @@ def warn_missing_format_string : Warning<
def warn_scanf_nonzero_width : Warning<
"zero field width in scanf format string is unused">,
InGroup<Format>;
+def warn_scanf_no_field_width : Warning<
+ "'%%%0' conversion used without a field width, destination buffer has %1 "
+ "element%s2">,
+ InGroup<FormatLibc>, DefaultIgnore;
def warn_format_conversion_argument_type_mismatch : Warning<
"format specifies type %0 but the argument has "
"%select{type|underlying type}2 %1">,
@@ -10792,6 +10796,9 @@ def warn_printf_narg_not_supported : Warning<
def warn_scanf_scanlist_incomplete : Warning<
"no closing ']' for '%%[' in scanf format string">,
InGroup<Format>;
+def warn_fopen_invalid_mode : Warning<
+ "invalid fopen mode string '%0'">,
+ InGroup<FormatLibc>, DefaultIgnore;
def warn_format_bool_as_character : Warning<
"using '%0' format specifier, but argument has boolean value">,
InGroup<Format>;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index de8b965144971..9dc7fa4a7bbb4 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1141,6 +1141,140 @@ static bool ProcessFormatStringLiteral(const Expr *FormatExpr,
return false;
}
+static std::optional<uint64_t>
+getScanfDestinationSizeInElements(const Expr *Arg, ASTContext &Context) {
+ const Expr *BaseArg = Arg->IgnoreParenImpCasts();
+ if (const auto *ArrayTy = Context.getAsConstantArrayType(BaseArg->getType());
+ ArrayTy && ArrayTy->getSizeModifier() == ArraySizeModifier::Normal &&
+ ArrayTy->getElementType()->isAnyCharacterType()) {
+ return ArrayTy->getZExtSize();
+ }
+
+ const auto *PointerTy = Arg->getType()->getAs<PointerType>();
+ if (!PointerTy || !PointerTy->getPointeeType()->isAnyCharacterType())
+ return std::nullopt;
+
+ std::optional<uint64_t> ObjectSize = Arg->tryEvaluateObjectSize(Context, 0);
+ if (!ObjectSize)
+ return std::nullopt;
+
+ CharUnits ElementSize = Context.getTypeSizeInChars(PointerTy->getPointeeType());
+ if (ElementSize.isZero())
+ return std::nullopt;
+
+ uint64_t ElementSizeBytes = ElementSize.getQuantity();
+ if (*ObjectSize % ElementSizeBytes != 0)
+ return std::nullopt;
+
+ return *ObjectSize / ElementSizeBytes;
+}
+
+static bool isValidFOpenModeString(StringRef Mode) {
+ if (Mode.empty())
+ return false;
+
+ char MainMode = Mode.front();
+ if (MainMode != 'r' && MainMode != 'w' && MainMode != 'a')
+ return false;
+
+ bool SeenPlus = false;
+ bool SeenBinary = false;
+ bool SeenExclusive = false;
+ bool SeenCloseOnExec = false;
+ bool SeenMMap = false;
+ bool SeenModeC = false;
+ bool SeenModeN = false;
+ bool SeenText = false;
+ bool SeenCommit = false;
+ bool SeenSequential = false;
+ bool SeenShortLived = false;
+ bool SeenTemporary = false;
+ bool SeenNoInherit = false;
+
+ for (size_t I = 1; I < Mode.size(); ++I) {
+ StringRef Remaining = Mode.substr(I);
+ if (Remaining.consume_front(",")) {
+ Remaining = Remaining.ltrim();
+ if (!Remaining.consume_front("ccs="))
+ return false;
+ StringRef Encoding = Remaining;
+ return !Encoding.empty() && Encoding.find(',') == StringRef::npos;
+ }
+
+ switch (Mode[I]) {
+ case '+':
+ if (SeenPlus)
+ return false;
+ SeenPlus = true;
+ break;
+ case 'b':
+ if (SeenBinary)
+ return false;
+ SeenBinary = true;
+ break;
+ case 'x':
+ if (SeenExclusive || MainMode == 'r')
+ return false;
+ SeenExclusive = true;
+ break;
+ case 'c':
+ if (SeenModeC)
+ return false;
+ SeenModeC = true;
+ break;
+ case 'n':
+ if (SeenModeN)
+ return false;
+ SeenModeN = true;
+ break;
+ case 'e':
+ if (SeenCloseOnExec)
+ return false;
+ SeenCloseOnExec = true;
+ break;
+ case 'm':
+ if (SeenMMap)
+ return false;
+ SeenMMap = true;
+ break;
+ case 't':
+ if (SeenText)
+ return false;
+ SeenText = true;
+ break;
+ case 'R':
+ if (SeenCommit)
+ return false;
+ SeenCommit = true;
+ break;
+ case 'S':
+ if (SeenSequential)
+ return false;
+ SeenSequential = true;
+ break;
+ case 'T':
+ if (SeenTemporary)
+ return false;
+ SeenTemporary = true;
+ break;
+ case 'D':
+ if (SeenShortLived)
+ return false;
+ SeenShortLived = true;
+ break;
+ case 'N':
+ if (SeenNoInherit)
+ return false;
+ SeenNoInherit = true;
+ break;
+ default:
+ return false;
+ }
+ }
+
+ return true;
+}
+
void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
CallExpr *TheCall) {
if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
@@ -1243,6 +1377,14 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
return std::nullopt;
};
+ auto ComputeStringArgument =
+ [&](unsigned Index) -> std::optional<std::string> {
+ std::optional<unsigned> IndexOptional = TranslateIndex(Index);
+ if (!IndexOptional)
+ return std::nullopt;
+ return TheCall->getArg(*IndexOptional)->tryEvaluateString(getASTContext());
+ };
+
std::optional<llvm::APSInt> SourceSize;
std::optional<llvm::APSInt> DestinationSize;
unsigned DiagID = 0;
@@ -1289,6 +1431,21 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
break;
}
+ case Builtin::BIfopen: {
+ if (Diags.isIgnored(diag::warn_fopen_invalid_mode, TheCall->getBeginLoc()))
+ return;
+
+ std::optional<unsigned> ModeIndex = TranslateIndex(1);
+ std::optional<std::string> Mode = ComputeStringArgument(1);
+ if (!ModeIndex || !Mode || isValidFOpenModeString(*Mode))
+ return;
+
+ Expr *ModeArg = TheCall->getArg(*ModeIndex);
+ Diag(ModeArg->getBeginLoc(), diag::warn_fopen_invalid_mode)
+ << *Mode << ModeArg->getSourceRange();
+ return;
+ }
+
case Builtin::BIscanf:
case Builtin::BIfscanf:
case Builtin::BIsscanf: {
@@ -9452,6 +9609,10 @@ class CheckScanfHandler : public CheckFormatHandler {
unsigned specifierLen) override;
void HandleIncompleteScanList(const char *start, const char *end) override;
+
+ void DiagnoseMissingFieldWidth(const analyze_scanf::ScanfSpecifier &FS,
+ const Expr *Arg, const char *startSpecifier,
+ unsigned specifierLen);
};
} // namespace
@@ -9476,6 +9637,43 @@ bool CheckScanfHandler::HandleInvalidScanfConversionSpecifier(
CS.getStart(), CS.getLength());
}
+void CheckScanfHandler::DiagnoseMissingFieldWidth(
+ const analyze_scanf::ScanfSpecifier &FS, const Expr *Arg,
+ const char *startSpecifier, unsigned specifierLen) {
+ using namespace analyze_format_string;
+
+ const ConversionSpecifier &CS = FS.getConversionSpecifier();
+ if (CS.getKind() != ConversionSpecifier::sArg &&
+ CS.getKind() != ConversionSpecifier::ScanListArg)
+ return;
+
+ if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified)
+ return;
+
+ std::optional<uint64_t> BufferElements =
+ getScanfDestinationSizeInElements(Arg, S.Context);
+ if (!BufferElements)
+ return;
+
+ ArrayRef<FixItHint> FixIts;
+ SmallVector<FixItHint, 1> LocalFixIts;
+ if (*BufferElements > 1) {
+ const LengthModifier &LM = FS.getLengthModifier();
+ const char *InsertionPoint = LM.getLength() ? LM.getStart() : CS.getStart();
+ LocalFixIts.push_back(FixItHint::CreateInsertion(
+ getLocationOfByte(InsertionPoint), std::to_string(*BufferElements - 1)));
+ FixIts = LocalFixIts;
+ }
+
+ EmitFormatDiagnostic(S.PDiag(diag::warn_scanf_no_field_width)
+ << CS.toString() << *BufferElements
+ << (*BufferElements != 1),
+ getLocationOfByte(CS.getStart()),
+ /*IsStringLocation*/ true,
+ getSpecifierRange(startSpecifier, specifierLen),
+ FixIts);
+}
+
bool CheckScanfHandler::HandleScanfSpecifier(
const analyze_scanf::ScanfSpecifier &FS,
const char *startSpecifier,
@@ -9562,8 +9760,10 @@ bool CheckScanfHandler::HandleScanfSpecifier(
analyze_format_string::ArgType::MatchKind Match =
AT.matchesType(S.Context, Ex->getType());
Match = handleFormatSignedness(Match, S.getDiagnostics(), Ex->getExprLoc());
- if (Match == analyze_format_string::ArgType::Match)
+ if (Match == analyze_format_string::ArgType::Match) {
+ DiagnoseMissingFieldWidth(FS, Ex, startSpecifier, specifierLen);
return true;
+ }
bool Pedantic = Match == analyze_format_string::ArgType::NoMatchPedantic;
bool Signedness = Match == analyze_format_string::ArgType::NoMatchSignedness;
diff --git a/clang/test/Sema/warn-format-libc.c b/clang/test/Sema/warn-format-libc.c
new file mode 100644
index 0000000000000..06bb0fe3da309
--- /dev/null
+++ b/clang/test/Sema/warn-format-libc.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -std=c11 -fsyntax-only -Wformat -verify=expected %s
+// RUN: %clang_cc1 -std=c11 -fsyntax-only -Wall -verify=expected %s
+// RUN: %clang_cc1 -std=c11 -fsyntax-only -Wformat-libc -verify=expected %s
+// RUN: %clang_cc1 -std=c11 -fsyntax-only -Wformat -Wno-format-libc -verify=disabled %s
+// RUN: %clang_cc1 -std=c11 -fsyntax-only -verify=plain %s
+// disabled-no-diagnostics
+// plain-no-diagnostics
+
+typedef struct FILE FILE;
+
+int scanf(const char *restrict format, ...);
+FILE *fopen(const char *restrict path, const char *restrict mode);
+
+int wrapped_scanf(const char *format, ...)
+ __attribute__((format(scanf, 1, 2)));
+FILE *wrapped_fopen(const char *path, const char *mode)
+ __attribute__((diagnose_as_builtin(fopen, 1, 2)));
+
+void test_scanf(void) {
+ char buf[8];
+ char one[1];
+
+ scanf("%s", buf); // expected-warning {{'%s' conversion used without a field width, destination buffer has 8 elements}}
+ scanf("%[a-z]", buf); // expected-warning {{'%[' conversion used without a field width, destination buffer has 8 elements}}
+ wrapped_scanf("%s", buf); // expected-warning {{'%s' conversion used without a field width, destination buffer has 8 elements}}
+ scanf("%s", one); // expected-warning {{'%s' conversion used without a field width, destination buffer has 1 element}}
+
+ scanf("%7s", buf);
+ scanf("%7[a-z]", buf);
+}
+
+void test_scanf_unknown(char *ptr) {
+ scanf("%s", ptr);
+}
+
+void test_fopen(const char *path, const char *mode) {
+ fopen(path, "r");
+ fopen(path, "w+b");
+ fopen(path, "rb+cmxe");
+ fopen(path, "w+, ccs=UTF-8");
+ fopen(path, mode);
+
+ fopen(path, "oo"); // expected-warning {{invalid fopen mode string 'oo'}}
+ fopen(path, "rx"); // expected-warning {{invalid fopen mode string 'rx'}}
+ fopen(path, "r++"); // expected-warning {{invalid fopen mode string 'r++'}}
+ wrapped_fopen(path, ""); // expected-warning {{invalid fopen mode string ''}}
+}
More information about the cfe-commits
mailing list