[clang] [clang][Sema] Add bounds checking for libc poll (PR #177286)
Colin Kinloch via cfe-commits
cfe-commits at lists.llvm.org
Thu Jan 22 05:38:20 PST 2026
https://github.com/ColinKinloch updated https://github.com/llvm/llvm-project/pull/177286
>From 6d340f8c2bb04f6646e920d262214c04ef538cc0 Mon Sep 17 00:00:00 2001
From: Colin Kinloch <colin.kinloch at collabora.com>
Date: Wed, 21 Jan 2026 03:45:12 +0000
Subject: [PATCH 1/2] [clang][Sema] Add libc correctness warnings
Define as builtin and add correctness checks for:
* `open` / `open64` / `openat` / `openat64`
* `umask`
* `realpath`
---
clang/include/clang/Basic/BuiltinHeaders.def | 2 +
clang/include/clang/Basic/Builtins.td | 50 +++
clang/include/clang/Basic/DiagnosticGroups.td | 3 +
.../clang/Basic/DiagnosticSemaKinds.td | 25 ++
clang/lib/Sema/SemaChecking.cpp | 294 ++++++++++++++++++
clang/test/Analysis/unix-api.c | 2 +-
clang/test/Analysis/unix-api.cpp | 2 +-
clang/test/Analysis/unix-fns-o_creat.c | 2 +-
clang/test/Analysis/unix-fns.c | 4 +-
clang/test/Sema/enable_if.c | 4 +-
clang/test/Sema/enum-attr.c | 2 +-
clang/test/Sema/type-dependent-attrs.c | 2 +-
clang/test/Sema/warn-libc.c | 83 +++++
clang/utils/TableGen/ClangBuiltinsEmitter.cpp | 1 +
14 files changed, 467 insertions(+), 9 deletions(-)
create mode 100644 clang/test/Sema/warn-libc.c
diff --git a/clang/include/clang/Basic/BuiltinHeaders.def b/clang/include/clang/Basic/BuiltinHeaders.def
index d6012a896eca9..985d98404555c 100644
--- a/clang/include/clang/Basic/BuiltinHeaders.def
+++ b/clang/include/clang/Basic/BuiltinHeaders.def
@@ -17,6 +17,7 @@ HEADER(BLOCKS_H, "Blocks.h")
HEADER(COMPLEX_H, "complex.h")
HEADER(CTYPE_H, "ctype.h")
HEADER(EMMINTRIN_H, "emmintrin.h")
+HEADER(FCNTL_H, "fcntl.h")
HEADER(FOUNDATION_NSOBJCRUNTIME_H, "Foundation/NSObjCRuntime.h")
HEADER(IMMINTRIN_H, "immintrin.h")
HEADER(INTRIN_H, "intrin.h")
@@ -37,6 +38,7 @@ HEADER(STDIO_H, "stdio.h")
HEADER(STDLIB_H, "stdlib.h")
HEADER(STRINGS_H, "strings.h")
HEADER(STRING_H, "string.h")
+HEADER(SYS_STAT_H, "sys/stat.h")
HEADER(UNISTD_H, "unistd.h")
HEADER(UTILITY, "utility")
HEADER(WCHAR_H, "wchar.h")
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 9e00f3aa3020d..9d9863d36b703 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -3539,6 +3539,56 @@ def StrnCaseCmp : GNULibBuiltin<"strings.h"> {
let RequiresUndef = 1;
}
+// POSIX fcntl.h
+
+def Open : LibBuiltin<"fcntl.h"> {
+ let Spellings = ["open"];
+ let Attributes = [NoThrow];
+ let Prototype = "int(char const*, int, ...)";
+ let AddBuiltinPrefixedAlias = 1;
+}
+
+def Open64 : LibBuiltin<"fcntl.h"> {
+ let Spellings = ["open64"];
+ let Attributes = [NoThrow];
+ let Prototype = "int(char const*, int, ...)";
+ let AddBuiltinPrefixedAlias = 1;
+}
+
+def OpenAt : LibBuiltin<"fcntl.h"> {
+ let Spellings = ["openat"];
+ let Attributes = [NoThrow];
+ let Prototype = "int(int, char const*, int, ...)";
+ let AddBuiltinPrefixedAlias = 1;
+}
+
+def OpenAt64 : LibBuiltin<"fcntl.h"> {
+ let Spellings = ["openat64"];
+ let Attributes = [NoThrow];
+ let Prototype = "int(int, char const*, int, ...)";
+ let AddBuiltinPrefixedAlias = 1;
+}
+
+// POSIX stat.h
+
+def UMask : LibBuiltin<"sys/stat.h"> {
+ let Spellings = ["umask"];
+ let Attributes = [NoThrow];
+ let Prototype = "mode_t(mode_t)";
+ let AddBuiltinPrefixedAlias = 1;
+}
+
+// POSIX stdlib.h
+
+def RealPath : LibBuiltin<"stdlib.h"> {
+ let Spellings = ["realpath"];
+ let Attributes = [NoThrow, NonNull<NonOptimizing, [0]>];
+ let Prototype = "char*(char const* restrict, char* restrict)";
+ let AddBuiltinPrefixedAlias = 1;
+}
+
+// POSIX unistd.h
+
def GNU_Exit : GNULibBuiltin<"unistd.h"> {
let Spellings = ["_exit"];
let Attributes = [NoReturn];
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index de1d1e13ea712..84ed2d7d49dd5 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1802,3 +1802,6 @@ def TrivialAutoVarInit : DiagGroup<"trivial-auto-var-init">;
// A warning for options that enable a feature that is not yet complete
def ExperimentalOption : DiagGroup<"experimental-option">;
+
+// Warnings about incorrect libc usage
+def IncorrectLibcUse : DiagGroup<"incorrect-libc-use">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index a2be7ab3791b9..1e741037dbf64 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -1031,6 +1031,31 @@ def err_ptrauth_indirect_goto_addrlabel_arithmetic : Error<
"%select{subtraction|addition}0 of address-of-label expressions is not "
"supported with ptrauth indirect gotos">;
+// libc checks
+def warn_surplus_args : Warning<"too many arguments passed to '%0'; it expects "
+ "a maximum of %1 variadic parameter">,
+ InGroup<IncorrectLibcUse>;
+def warn_open_create_file_without_mode
+ : Warning<"nonzero 'mode' argument must be specified as the flag%s0 "
+ "'%1'%select{|| and '%2'}0 would result in file creation">,
+ InGroup<IncorrectLibcUse>;
+def warn_open_superfluous_mode
+ : Warning<"nonzero 'mode' argument was specified but is unnecessary for "
+ "specified 'flags'">,
+ InGroup<IncorrectLibcUse>;
+
+def warn_libc_invalid_mode_t : Warning<"invalid mode">,
+ InGroup<IncorrectLibcUse>;
+
+def warn_path_max_overflow : Warning<"'%0' distination buffer needs to be "
+ "larger than than PATH_MAX bytes (%2),"
+ " but buffer is %1">,
+ InGroup<IncorrectLibcUse>;
+
+def warn_pollfd_nfds : Warning<"the element count value '%0' is higher than "
+ "the number of elements in the array '%1'">,
+ InGroup<IncorrectLibcUse>;
+
// __ptrauth qualifier
def err_ptrauth_qualifier_invalid : Error<
"%select{return type|parameter type|property}1 may not be qualified with "
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 8e40364faf66c..683ba4efe6424 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -23,6 +23,7 @@
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclarationName.h"
+#include "clang/AST/DynamicRecursiveASTVisitor.h"
#include "clang/AST/EvaluatedExprVisitor.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
@@ -56,6 +57,8 @@
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TypeTraits.h"
#include "clang/Lex/Lexer.h" // TODO: Extract static functions to fix layering.
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/Initialization.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/Ownership.h"
@@ -1140,6 +1143,152 @@ static bool ProcessFormatStringLiteral(const Expr *FormatExpr,
return false;
}
+static std::optional<int> getPathMaxValue(const ASTContext &Ctx) {
+ if (Ctx.getTargetInfo().getTriple().isOSGlibc())
+ return {4096};
+
+ if (Ctx.getTargetInfo().getTriple().isOSDarwin())
+ return {1024};
+
+ return std::nullopt;
+}
+
+/* Follow simple references to other macros so we can match the Expr spelling */
+static const MacroInfo *resolveMacroChainAtLoc(Preprocessor &PP,
+ const IdentifierInfo *MII,
+ SourceLocation Loc) {
+ auto *MI = PP.getMacroDefinitionAtLoc(MII, Loc).getMacroInfo();
+ if (!MI)
+ return nullptr;
+ const IdentifierInfo *MIIN = MII;
+ while (MI->getNumTokens() == 1 && MI->tokens_begin()->is(tok::identifier) &&
+ (MIIN = MI->tokens_begin()->getIdentifierInfo()) &&
+ MIIN->hasMacroDefinition()) {
+ MacroDefinition MacroDef = PP.getMacroDefinitionAtLoc(MIIN, Loc);
+ MI = MacroDef.getMacroInfo();
+ }
+ return MI;
+}
+
+// Search subexpressions for macros and attempt to evaluate them
+class MacroFlagMatcher : public ConstDynamicRecursiveASTVisitor {
+ const Sema &S;
+ llvm::SmallVectorImpl<std::pair<const IdentifierInfo *, const MacroInfo *>>
+ &Macros;
+ std::map<const IdentifierInfo *, int64_t> &FoundMacros;
+ std::optional<SourceRange> LastMatchRange;
+
+public:
+ MacroFlagMatcher(const Sema &S,
+ llvm::SmallVectorImpl<std::pair<const IdentifierInfo *,
+ const MacroInfo *>> &Macros,
+ std::map<const IdentifierInfo *, int64_t> &FoundMacros)
+ : S(S), Macros(Macros), FoundMacros(FoundMacros) {}
+
+ bool VisitExpr(const Expr *E) override {
+ if (isa<ParenExpr>(E) || isa<IntegerLiteral>(E)) {
+ // Check location
+ SourceLocation SpLoc = S.SourceMgr.getSpellingLoc(E->getExprLoc());
+ for (auto *M = Macros.begin(); M != Macros.end();) {
+ if (M->second->tokens_begin()->getLocation() == SpLoc) {
+ LastMatchRange = E->getSourceRange();
+
+ Expr::EvalResult Result;
+ const Expr *SizeArg = E;
+ if (SizeArg->EvaluateAsInt(Result, S.getASTContext())) {
+ FoundMacros.insert(
+ std::pair(M->first, Result.Val.getInt().getExtValue()));
+ M = Macros.erase(M);
+ continue;
+ }
+ }
+ ++M;
+ }
+ }
+
+ return !Macros.empty();
+ }
+
+ bool dataTraverseStmtPre(const Stmt *Statement) override {
+ /* Ignore the contents of a matched macro */
+ return !(LastMatchRange &&
+ (*LastMatchRange).fullyContains(Statement->getSourceRange()));
+ }
+
+ static void getExpr(const Expr *E, ArrayRef<const IdentifierInfo *> Macros,
+ const Sema &S,
+ std::map<const IdentifierInfo *, int64_t> &FoundMacros) {
+ SmallVector<std::pair<const IdentifierInfo *, const MacroInfo *>, 2>
+ MacrosLoc;
+ for (auto &MII : Macros) {
+ auto *MI = resolveMacroChainAtLoc(S.PP, MII, E->getExprLoc());
+ if (MI)
+ MacrosLoc.push_back(std::make_pair(MII, MI));
+ }
+ MacroFlagMatcher Visitor(S, MacrosLoc, FoundMacros);
+ Visitor.TraverseStmt(E->getExprStmt());
+ }
+};
+
+static std::optional<int>
+evaluateSimpleMacroAtLocation(Preprocessor &PP, const IdentifierInfo *MacroII,
+ SourceLocation Loc) {
+ auto *MI = resolveMacroChainAtLoc(PP, MacroII, Loc);
+ if (!MI)
+ return std::nullopt;
+
+ // Fast path for single digit integer
+ if (MI->getNumTokens() == 1) {
+ const Token &T = MI->tokens().back();
+ if (T.getLength() == 1 || T.getKind() == tok::binary_data) {
+ const uint8_t Val = PP.getSpellingOfSingleCharacterNumericConstant(T);
+ return llvm::APInt(8, Val, /*isSigned=*/true).getSExtValue();
+ }
+ }
+
+ // Filter out parens.
+ std::vector<Token> FilteredTokens;
+ FilteredTokens.reserve(MI->tokens().size());
+ for (auto &T : MI->tokens())
+ if (!T.isOneOf(tok::l_paren, tok::r_paren))
+ FilteredTokens.push_back(T);
+
+ // Parse an integer at the end of the macro definition.
+ const Token &T = FilteredTokens.back();
+
+ if (!T.isLiteral())
+ return std::nullopt;
+
+ bool InvalidSpelling = false;
+ SmallVector<char> Buffer(T.getLength());
+ // `Preprocessor::getSpelling` can get the spelling of the token regardless of
+ // whether the macro is defined in a PCH or not:
+ StringRef ValueStr = PP.getSpelling(T, Buffer, &InvalidSpelling);
+
+ if (InvalidSpelling)
+ return std::nullopt;
+
+ llvm::APSInt IntValue(/*BitWidth=*/0, /*isUnsigned=*/true);
+ constexpr unsigned AutoSenseRadix = 0;
+ if (ValueStr.getAsInteger(AutoSenseRadix,
+ static_cast<llvm::APInt &>(IntValue)))
+ return std::nullopt;
+
+ // Parse an optional minus sign.
+ size_t Size = FilteredTokens.size();
+ if (Size >= 2) {
+ if (FilteredTokens[Size - 2].is(tok::minus)) {
+ // Make sure there's space for a sign bit
+ if (IntValue.isSignBitSet())
+ IntValue = IntValue.extend(IntValue.getBitWidth() + 1);
+ IntValue.setIsUnsigned(false);
+ IntValue = -IntValue;
+ }
+ }
+
+ return IntValue.getExtValue();
+}
+
void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
CallExpr *TheCall) {
if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
@@ -1240,6 +1389,21 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
return llvm::APSInt::getUnsigned(Result + 1).extOrTrunc(SizeTypeWidth);
};
+ auto DiagnoseBigMode = [&](unsigned ModeArgIndex) {
+ const Expr *ModeArg = TheCall->getArg(ModeArgIndex);
+ Expr::EvalResult Result;
+ uint32_t Mode = 0;
+
+ if (ModeArg->EvaluateAsInt(Result, Context)) {
+ Mode = Result.Val.getInt().getExtValue();
+ if ((Mode & ~0777) > 0) {
+ DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
+ PDiag(diag::warn_libc_invalid_mode_t)
+ << ModeArg->getSourceRange());
+ }
+ }
+ };
+
std::optional<llvm::APSInt> SourceSize;
std::optional<llvm::APSInt> DestinationSize;
unsigned DiagID = 0;
@@ -1450,6 +1614,136 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
}
}
DestinationSize = ComputeSizeArgument(0);
+ break;
+ }
+
+ /* incorrect-libc-use start */
+ case Builtin::BIumask:
+ case Builtin::BI__builtin_umask: {
+ DiagnoseBigMode(0);
+ break;
+ }
+
+ case Builtin::BIopen:
+ case Builtin::BI__builtin_open:
+ case Builtin::BIopen64:
+ case Builtin::BI__builtin_open64:
+ case Builtin::BIopenat:
+ case Builtin::BI__builtin_openat:
+ case Builtin::BIopenat64:
+ case Builtin::BI__builtin_openat64: {
+ /* The param count is the index of the first variadic argument (mode) */
+ unsigned ModeIndex = UseDecl->getNumParams();
+ assert(TheCall->getNumArgs() >= ModeIndex);
+ unsigned NumVarArgs = TheCall->getNumArgs() - ModeIndex;
+
+ if (NumVarArgs > 1) {
+ DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
+ PDiag(diag::warn_surplus_args)
+ << GetFunctionName() << 1);
+ }
+
+ std::optional<int> Flags;
+ const Expr *FlagsArg = TheCall->getArg(ModeIndex - 1);
+ Expr::EvalResult Result;
+
+ const Expr *ModeArg = nullptr;
+
+ std::optional<int> Mode;
+
+ if (NumVarArgs >= 1) {
+ /* GNU libc accepts modes outside the 0777 range */
+ if (!Context.getTargetInfo().getTriple().isOSLinux())
+ DiagnoseBigMode(ModeIndex);
+
+ ModeArg = TheCall->getArg(ModeIndex);
+ Expr::EvalResult Result;
+ if (ModeArg->EvaluateAsInt(Result, Context)) {
+ Mode = Result.Val.getInt().getExtValue();
+ }
+ }
+
+ if (FlagsArg->EvaluateAsInt(Result, Context))
+ Flags = Result.Val.getInt().getExtValue();
+
+ if (!Flags)
+ break;
+
+ int64_t OCreatValue = 0;
+ int64_t OTmpFileValue = 0;
+
+ bool IsOCreat = false;
+ bool IsOTmpFile = false;
+ bool ExpectsMode = false;
+
+ const IdentifierInfo *OCreatII = PP.getIdentifierInfo("O_CREAT");
+ const IdentifierInfo *OTmpFileII = PP.getIdentifierInfo("O_TMPFILE");
+
+ OCreatValue =
+ evaluateSimpleMacroAtLocation(PP, OCreatII, FlagsArg->getExprLoc())
+ .value_or(0);
+ OTmpFileValue =
+ evaluateSimpleMacroAtLocation(PP, OTmpFileII, FlagsArg->getExprLoc())
+ .value_or(0);
+
+ // Fallback to searching the argument for an expression to evaluate
+ if (!OCreatValue || !OTmpFileValue) {
+ std::map<const IdentifierInfo *, int64_t> FoundMacros;
+ MacroFlagMatcher::getExpr(FlagsArg, {OCreatII, OTmpFileII}, *this,
+ FoundMacros);
+
+ auto OCreatIt = FoundMacros.find(OCreatII);
+ auto OTmpFileIt = FoundMacros.find(OTmpFileII);
+
+ if (OCreatIt != FoundMacros.end())
+ OCreatValue = OCreatIt->second;
+ if (OTmpFileIt != FoundMacros.end())
+ OTmpFileValue = OTmpFileIt->second;
+ }
+
+ IsOCreat = OCreatValue && (*Flags & OCreatValue) == OCreatValue;
+ IsOTmpFile = OTmpFileValue && (*Flags & OTmpFileValue) == OTmpFileValue;
+ ExpectsMode = IsOCreat || IsOTmpFile;
+
+ /* check if mode should be present for flags */
+
+ // If we failed to evaluate the flags don't diagnose
+ if (!OCreatValue && !OTmpFileValue)
+ break;
+
+ if (ExpectsMode && !Mode) {
+ int Count = 0;
+ if (IsOCreat)
+ Count++;
+ if (IsOTmpFile)
+ Count++;
+ auto D = PDiag(diag::warn_open_create_file_without_mode)
+ << FlagsArg->getSourceRange() << Count;
+ if (IsOCreat)
+ D << OCreatII->getName();
+ if (IsOTmpFile)
+ D << OTmpFileII->getName();
+ DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, D);
+ } else if (!ExpectsMode && (Mode && *Mode != 0)) {
+ DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
+ PDiag(diag::warn_open_superfluous_mode)
+ << FlagsArg->getSourceRange()
+ << ModeArg->getSourceRange());
+ }
+ break;
+ }
+
+ case Builtin::BIrealpath:
+ case Builtin::BI__builtin_realpath: {
+ DiagID = diag::warn_fortify_source_overflow;
+ std::optional<int> PathMax = getPathMaxValue(Context);
+ DiagID = diag::warn_path_max_overflow;
+ if (PathMax)
+ SourceSize =
+ llvm::APSInt::getUnsigned(*PathMax).extOrTrunc(SizeTypeWidth);
+ DestinationSize = ComputeSizeArgument(TheCall->getNumArgs() - 1);
+
+ break;
}
}
diff --git a/clang/test/Analysis/unix-api.c b/clang/test/Analysis/unix-api.c
index 64ff3c0fccf42..4361e1c6481c6 100644
--- a/clang/test/Analysis/unix-api.c
+++ b/clang/test/Analysis/unix-api.c
@@ -1,4 +1,4 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API -Wno-incorrect-libc-use -verify %s
#ifndef O_RDONLY
#define O_RDONLY 0
diff --git a/clang/test/Analysis/unix-api.cpp b/clang/test/Analysis/unix-api.cpp
index 2b07d8807c1f2..36f495cdd0654 100644
--- a/clang/test/Analysis/unix-api.cpp
+++ b/clang/test/Analysis/unix-api.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API -Wno-incorrect-libc-use -verify %s
extern "C" {
#ifndef O_RDONLY
#define O_RDONLY 0
diff --git a/clang/test/Analysis/unix-fns-o_creat.c b/clang/test/Analysis/unix-fns-o_creat.c
index 76df3851cfc9c..0d17bb43c74f5 100644
--- a/clang/test/Analysis/unix-fns-o_creat.c
+++ b/clang/test/Analysis/unix-fns-o_creat.c
@@ -1,4 +1,4 @@
-// RUN: %clang_analyze_cc1 -verify -analyzer-checker=core,unix.API -analyzer-output=text %s
+// RUN: %clang_analyze_cc1 -verify -analyzer-checker=core,unix.API -Wno-incorrect-libc-use -analyzer-output=text %s
// Verify that the UnixAPIChecker finds the missing mode value regardless
// of the particular values of these macros, particularly O_CREAT.
diff --git a/clang/test/Analysis/unix-fns.c b/clang/test/Analysis/unix-fns.c
index 77894285bcb69..2356d1480aaac 100644
--- a/clang/test/Analysis/unix-fns.c
+++ b/clang/test/Analysis/unix-fns.c
@@ -1,6 +1,6 @@
-// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 -analyzer-checker=core,unix.API,osx.API,optin.portability %s -analyzer-output=plist -analyzer-config faux-bodies=true -fblocks -verify -o %t.plist
+// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 -analyzer-checker=core,unix.API,osx.API,optin.portability %s -analyzer-output=plist -analyzer-config faux-bodies=true -fblocks -Wno-incorrect-libc-use -verify -o %t.plist
// RUN: %normalize_plist <%t.plist | diff -ub %S/Inputs/expected-plists/unix-fns.c.plist -
-// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux -analyzer-checker=core,unix.API,osx.API,optin.portability %s -analyzer-output=plist -analyzer-config faux-bodies=true -fblocks -verify -o %t.plist
+// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux -analyzer-checker=core,unix.API,osx.API,optin.portability %s -analyzer-output=plist -analyzer-config faux-bodies=true -fblocks -Wno-incorrect-libc-use -verify -o %t.plist
// RUN: %normalize_plist <%t.plist | diff -ub %S/Inputs/expected-plists/unix-fns.c.plist -
// RUN: mkdir -p %t.dir
// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API,osx.API,optin.portability -analyzer-output=html -analyzer-config faux-bodies=true -fblocks -o %t.dir %s
diff --git a/clang/test/Sema/enable_if.c b/clang/test/Sema/enable_if.c
index 3ef8310a2fef7..a8e8d96b74503 100644
--- a/clang/test/Sema/enable_if.c
+++ b/clang/test/Sema/enable_if.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s -verify
-// RUN: %clang_cc1 %s -DCODEGEN -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fno-builtin-open %s -verify
+// RUN: %clang_cc1 -fno-builtin-open %s -DCODEGEN -emit-llvm -o - | FileCheck %s
#define O_CREAT 0x100
typedef int mode_t;
diff --git a/clang/test/Sema/enum-attr.c b/clang/test/Sema/enum-attr.c
index 1cdd7028e03e0..5a439f51d5912 100644
--- a/clang/test/Sema/enum-attr.c
+++ b/clang/test/Sema/enum-attr.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -Wassign-enum -Wswitch-enum -Wcovered-switch-default %s
+// RUN: %clang_cc1 -fno-builtin-open -fsyntax-only -verify -Wassign-enum -Wswitch-enum -Wcovered-switch-default %s
enum Enum {
A0 = 1, A1 = 10
diff --git a/clang/test/Sema/type-dependent-attrs.c b/clang/test/Sema/type-dependent-attrs.c
index 13068b3f94ad4..915190d07adf0 100644
--- a/clang/test/Sema/type-dependent-attrs.c
+++ b/clang/test/Sema/type-dependent-attrs.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c23 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fno-builtin-open -std=c23 -fsyntax-only -verify %s
int open() { return 0; }
void close(typeof(open()) *) {}
diff --git a/clang/test/Sema/warn-libc.c b/clang/test/Sema/warn-libc.c
new file mode 100644
index 0000000000000..607cf4a75e891
--- /dev/null
+++ b/clang/test/Sema/warn-libc.c
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify
+// RUN: %clang_cc1 -triple x86_64-linux %s -verify
+// RUN: %clang_cc1 -triple x86_64-linux %s -verify -DO_CREAT='(32 | __linux__)'
+
+
+#define FAUX_CREATE 0100
+#if O_CREAT != FAUX_CREATE
+void call_open_no_creat(void) {
+ __builtin_open("name", FAUX_CREATE, 0777);
+ __builtin_open("name", FAUX_CREATE);
+}
+#endif
+
+#define O_RDONLY 0
+#define O_WRONLY 01
+#define O_RDWR 02
+#ifndef O_CREAT
+#define O_CREAT 0100
+#endif
+#define __O_DIRECTORY 0x10000
+#define __O_TMPFILE (020000000 | __O_DIRECTORY)
+#define O_TMPFILE __O_TMPFILE /* Atomically create nameless file. */
+
+void call_open(void) {
+#if O_CREAT == 64
+ __builtin_open("name", 64); // expected-warning {{nonzero 'mode' argument must be specified as the flag 'O_CREAT' would result in file creation}}
+#endif
+ __builtin_open("name", O_TMPFILE | O_RDONLY); // expected-warning {{nonzero 'mode' argument must be specified as the flag 'O_TMPFILE' would result in file creation}}
+ __builtin_open("name", O_TMPFILE + O_RDONLY); // expected-warning {{nonzero 'mode' argument must be specified as the flag 'O_TMPFILE' would result in file creation}}
+ __builtin_open("name", O_TMPFILE); // expected-warning {{nonzero 'mode' argument must be specified as the flag 'O_TMPFILE' would result in file creation}}
+ __builtin_open("name", O_CREAT); // expected-warning {{nonzero 'mode' argument must be specified as the flag 'O_CREAT' would result in file creation}}
+ __builtin_open("name", O_CREAT | O_TMPFILE); // expected-warning {{nonzero 'mode' argument must be specified as the flags 'O_CREAT' and 'O_TMPFILE' would result in file creation}}
+ __builtin_open("name", O_CREAT | O_TMPFILE, 0777);
+ __builtin_open("name", O_CREAT | O_TMPFILE, 0777, 0); // expected-warning {{too many arguments passed to 'open'; it expects a maximum of 1 variadic parameter}}
+ __builtin_open("name", O_CREAT | O_TMPFILE, 0777, 0, 0); // expected-warning {{too many arguments passed to 'open'; it expects a maximum of 1 variadic parameter}}
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef unsigned int uint32_t;
+typedef uint32_t mode_t;
+
+mode_t umode(mode_t);
+int open(const char *pathname, int flags, ... /* mode_t mode */ );
+int open64(const char *pathname, int flags, ... /* mode_t mode */ );
+int openat(int fddir, const char *pathname, int flags, ... /* mode_t mode */ );
+int openat64(int fddir, const char *pathname, int flags, ... /* mode_t mode */ );
+
+#ifdef __cplusplus
+}
+#endif
+
+void call_openat(void) {
+ __builtin_openat(0, "name", O_CREAT, 0777);
+ __builtin_openat(0, "name", O_CREAT, 01000);
+#if !defined(__linux__)
+ // expected-warning at -2{{invalid mode}}
+#endif
+}
+
+void call_umask(void) {
+ __builtin_umask(0);
+ __builtin_umask(0777);
+ __builtin_umask(01000); // expected-warning {{invalid mode}}
+}
+
+#if defined(__APPLE__)
+#define PATH_MAX 1024
+#elif defined(__linux__)
+#define PATH_MAX 4096
+#endif
+
+void call_realpath() {
+ char too_small[PATH_MAX - 1];
+ char too_big[PATH_MAX + 1];
+ char too_just_right[PATH_MAX];
+
+ __builtin_realpath("hah", too_small); // expected-warning-re {{'realpath' distination buffer needs to be larger than than PATH_MAX bytes ({{[0-9]+}}), but buffer is {{[0-9]+}}}}
+ __builtin_realpath("hah", too_big);
+ __builtin_realpath("hah", too_just_right);
+}
diff --git a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
index fb089a811ef92..fb5829ced5f55 100644
--- a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
+++ b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
@@ -356,6 +356,7 @@ class PrototypeParser {
.Case("int32_t", "Zi")
.Case("int64_t", "Wi")
.Case("jmp_buf", "J")
+ .Case("mode_t", "UZi")
.Case("msint32_t", "Ni")
.Case("msuint32_t", "UNi")
.Case("objc_super", "M")
>From 6d641fe9842542cd2d94210bdd278ba77559912e Mon Sep 17 00:00:00 2001
From: Colin Kinloch <colin.kinloch at collabora.com>
Date: Wed, 21 Jan 2026 03:49:17 +0000
Subject: [PATCH 2/2] [clang][Sema] Add bounds checking for libc poll
Manually identify a call to the libc `poll` function and verify that the
fd count argument isn't greater than the number of elements in the fds
array.
---
clang/lib/Sema/SemaChecking.cpp | 126 ++++++++++++++++++++++++++++++++
clang/test/Sema/warn-libc.c | 42 +++++++++++
2 files changed, 168 insertions(+)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 683ba4efe6424..5b7934d0e0def 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -4563,6 +4563,125 @@ void Sema::CheckConstructorCall(FunctionDecl *FDecl, QualType ThisType,
Loc, SourceRange(), CallType);
}
+static std::optional<llvm::APSInt> GetArrayElementCount(Sema &S,
+ const Expr *BaseExpr) {
+ const Type *EffectiveType =
+ BaseExpr->getType()->getPointeeOrArrayElementType();
+ if (EffectiveType->isDependentType())
+ return {};
+
+ BaseExpr = BaseExpr->IgnoreParenCasts();
+ const ConstantArrayType *ArrayTy =
+ S.Context.getAsConstantArrayType(BaseExpr->getType());
+
+ if (!ArrayTy)
+ return {};
+
+ const Type *BaseType = ArrayTy->getElementType().getTypePtr();
+
+ if (BaseType->isDependentType() || BaseType->isIncompleteType())
+ return {};
+
+ LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
+ S.getLangOpts().getStrictFlexArraysLevel();
+
+ if (BaseExpr->isFlexibleArrayMemberLike(
+ S.Context, StrictFlexArraysLevel,
+ /*IgnoreTemplateOrMacroSubstitution=*/true))
+ return {};
+
+ llvm::APInt ArrayTySize = ArrayTy->getSize();
+ if (BaseType != EffectiveType) {
+ // Make sure we're comparing apples to apples when comparing index to
+ // size.
+ uint64_t ptrarith_typesize = S.Context.getTypeSize(EffectiveType);
+ uint64_t array_typesize = S.Context.getTypeSize(BaseType);
+
+ // Handle ptrarith_typesize being zero, such as when casting to void*.
+ // Use the size in bits (what "getTypeSize()" returns) rather than bytes.
+ if (!ptrarith_typesize)
+ ptrarith_typesize = S.Context.getCharWidth();
+
+ if (ptrarith_typesize != array_typesize) {
+ // There's a cast to a different size type involved.
+ uint64_t ratio = array_typesize / ptrarith_typesize;
+
+ // TODO: Be smarter about handling cases where array_typesize is not a
+ // multiple of ptrarith_typesize.
+ if (ptrarith_typesize * ratio == array_typesize)
+ ArrayTySize *= llvm::APInt(ArrayTySize.getBitWidth(), ratio);
+ }
+ }
+
+ return llvm::APSInt(std::move(ArrayTySize));
+}
+
+static bool CheckLibcPoll(Sema &S, FunctionDecl *FDecl, CallExpr *TheCall) {
+ // Check that the function resembles libc poll
+ if (!S.getSourceManager().isInSystemHeader(FDecl->getLocation()))
+ return false;
+
+ if (TheCall->getNumArgs() != 3)
+ return false;
+
+ if (!FDecl->getReturnType()->isSignedIntegerType())
+ return false;
+
+ const IdentifierTable::iterator It = S.Context.Idents.find("pollfd");
+
+ // If we can't find pollfd cancel the check
+ if (It == S.Context.Idents.end())
+ return false;
+
+ const IdentifierInfo *II = It->second;
+
+ Expr *FdsArg = TheCall->getArg(0);
+ QualType FdsType = FdsArg->getType();
+
+ if (!FdsType->isPointerOrReferenceType() && !FdsType->isArrayType())
+ return false;
+
+ const Type *elType = FdsType->getPointeeOrArrayElementType();
+
+ if (!elType->isRecordType())
+ return false;
+
+ const RecordDecl *RD = elType->getAsRecordDecl();
+ if (II != RD->getIdentifier())
+ return false;
+
+ // Check size type
+ Expr *NfdsArg = TheCall->getArg(1);
+ auto &ExpectedNfdsType = S.Context.UnsignedLongTy;
+ if (S.Context.getTargetInfo().getTriple().isOSDarwin())
+ ExpectedNfdsType = S.Context.UnsignedIntTy;
+
+ if (!S.Context.hasSameType(NfdsArg->getType().getUnqualifiedType(),
+ ExpectedNfdsType))
+ return false;
+
+ Expr::EvalResult Result;
+ if (!NfdsArg->EvaluateAsInt(Result, S.getASTContext()))
+ return false;
+ llvm::APSInt NfdsValue = Result.Val.getInt();
+ NfdsValue.setIsUnsigned(true);
+
+ std::optional<llvm::APSInt> FdsElCount = GetArrayElementCount(S, FdsArg);
+
+ if (FdsElCount) {
+ if (llvm::APSInt::compareValues(NfdsValue, *FdsElCount) > 0) {
+ SmallString<16> FdsElCountStr;
+ SmallString<16> NfdsValueStr;
+ FdsElCount->toString(FdsElCountStr, /*Radix=*/10);
+ NfdsValue.toString(NfdsValueStr, /*Radix=*/10);
+ S.Diag(TheCall->getBeginLoc(), diag::warn_pollfd_nfds)
+ << NfdsValueStr << FdsElCountStr;
+ }
+ }
+
+ return true;
+}
+
bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
const FunctionProtoType *Proto) {
bool IsMemberOperatorCall = isa<CXXOperatorCallExpr>(TheCall) &&
@@ -4621,6 +4740,13 @@ bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
CheckMaxUnsignedZero(TheCall, FDecl);
CheckInfNaNFunction(TheCall, FDecl);
+ if (FDecl->isExternC()) {
+ const IdentifierInfo *II = FDecl->getIdentifier();
+ if (II->isStr("poll")) {
+ CheckLibcPoll(*this, FDecl, TheCall);
+ }
+ }
+
if (getLangOpts().ObjC)
ObjC().DiagnoseCStringFormatDirectiveInCFAPI(FDecl, Args, NumArgs);
diff --git a/clang/test/Sema/warn-libc.c b/clang/test/Sema/warn-libc.c
index 607cf4a75e891..77ee9505d764e 100644
--- a/clang/test/Sema/warn-libc.c
+++ b/clang/test/Sema/warn-libc.c
@@ -81,3 +81,45 @@ void call_realpath() {
__builtin_realpath("hah", too_big);
__builtin_realpath("hah", too_just_right);
}
+
+# 1 "poll.h" 1 3
+# 1 "sys/poll.h" 1 3
+
+#if defined(__APPLE__)
+typedef unsigned int nfds_t;
+#elif defined(__linux__)
+typedef unsigned long int nfds_t;
+#endif
+
+struct pollfd {
+ int fd;
+ short events;
+ short revents;
+};
+extern int poll (struct pollfd *__fds, nfds_t __nfds, int __timeout);
+
+# 2 "poll_test.c" 2
+
+#define __builtin_poll poll
+
+void call_poll(void) {
+ struct pollfd fds[] = {
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ };
+ const nfds_t nfds = sizeof(fds) / sizeof(*fds);
+ __builtin_poll(fds, nfds, 0);
+ __builtin_poll(fds, nfds + 1, 0); // expected-warning {{the element count value '10' is higher than the number of elements in the array '9'}}
+ __builtin_poll(fds, nfds - 1, 0);
+ /* Unhandled errors */
+ __builtin_poll(&fds[1], nfds, 0);
+ __builtin_poll(fds + 1, nfds, 0);
+ __builtin_poll(fds - 1, nfds, 0);
+}
More information about the cfe-commits
mailing list