[clang] [analyzer] Clean up list of taint propagation functions (PR #91635)
DonĂ¡t Nagy via cfe-commits
cfe-commits at lists.llvm.org
Tue May 14 05:01:03 PDT 2024
================
@@ -572,196 +570,236 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
std::vector<std::pair<CallDescription, GenericTaintRule>>;
using TR = GenericTaintRule;
- const Builtin::Context &BI = C.getASTContext().BuiltinInfo;
-
RulesConstructionTy GlobalCRules{
// Sources
- {{{"fdopen"}}, TR::Source({{ReturnValueIndex}})},
- {{{"fopen"}}, TR::Source({{ReturnValueIndex}})},
- {{{"freopen"}}, TR::Source({{ReturnValueIndex}})},
- {{{"getch"}}, TR::Source({{ReturnValueIndex}})},
- {{{"getchar"}}, TR::Source({{ReturnValueIndex}})},
- {{{"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})},
- {{{"gets"}}, TR::Source({{0}, ReturnValueIndex})},
- {{{"gets_s"}}, TR::Source({{0}, ReturnValueIndex})},
- {{{"scanf"}}, TR::Source({{}, 1})},
- {{{"scanf_s"}}, TR::Source({{}, {1}})},
- {{{"wgetch"}}, TR::Source({{}, ReturnValueIndex})},
+ {{CDM::CLibrary, {"fdopen"}}, TR::Source({{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"fopen"}}, TR::Source({{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"freopen"}}, TR::Source({{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"getch"}}, TR::Source({{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"getchar"}}, TR::Source({{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"gets"}}, TR::Source({{0, ReturnValueIndex}})},
+ {{CDM::CLibrary, {"gets_s"}}, TR::Source({{0, ReturnValueIndex}})},
+ {{CDM::CLibrary, {"scanf"}}, TR::Source({{}, 1})},
+ {{CDM::CLibrary, {"scanf_s"}}, TR::Source({{}, 1})},
+ {{CDM::CLibrary, {"wgetch"}}, TR::Source({{ReturnValueIndex}})},
// Sometimes the line between taint sources and propagators is blurry.
// _IO_getc is choosen to be a source, but could also be a propagator.
// This way it is simpler, as modeling it as a propagator would require
// to model the possible sources of _IO_FILE * values, which the _IO_getc
// function takes as parameters.
- {{{"_IO_getc"}}, TR::Source({{ReturnValueIndex}})},
- {{{"getcwd"}}, TR::Source({{0, ReturnValueIndex}})},
- {{{"getwd"}}, TR::Source({{0, ReturnValueIndex}})},
- {{{"readlink"}}, TR::Source({{1, ReturnValueIndex}})},
- {{{"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})},
- {{{"get_current_dir_name"}}, TR::Source({{ReturnValueIndex}})},
- {{{"gethostname"}}, TR::Source({{0}})},
- {{{"getnameinfo"}}, TR::Source({{2, 4}})},
- {{{"getseuserbyname"}}, TR::Source({{1, 2}})},
- {{{"getgroups"}}, TR::Source({{1, ReturnValueIndex}})},
- {{{"getlogin"}}, TR::Source({{ReturnValueIndex}})},
- {{{"getlogin_r"}}, TR::Source({{0}})},
+ {{CDM::CLibrary, {"_IO_getc"}}, TR::Source({{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"getcwd"}}, TR::Source({{0, ReturnValueIndex}})},
+ {{CDM::CLibrary, {"getwd"}}, TR::Source({{0, ReturnValueIndex}})},
+ {{CDM::CLibrary, {"readlink"}}, TR::Source({{1, ReturnValueIndex}})},
+ {{CDM::CLibrary, {"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})},
+ {{CDM::CLibrary, {"get_current_dir_name"}},
+ TR::Source({{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"gethostname"}}, TR::Source({{0}})},
+ {{CDM::CLibrary, {"getnameinfo"}}, TR::Source({{2, 4}})},
+ {{CDM::CLibrary, {"getseuserbyname"}}, TR::Source({{1, 2}})},
+ {{CDM::CLibrary, {"getgroups"}}, TR::Source({{1, ReturnValueIndex}})},
+ {{CDM::CLibrary, {"getlogin"}}, TR::Source({{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"getlogin_r"}}, TR::Source({{0}})},
// Props
- {{{"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
- {{{"fgetws"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
- {{{"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
- {{{"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})},
- {{{"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
-
- {{{"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"getc_unlocked"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"getdelim"}}, TR::Prop({{3}}, {{0}})},
- {{{"getline"}}, TR::Prop({{2}}, {{0}})},
- {{{"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"pread"}}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},
- {{{"read"}}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},
- {{{"strchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"strrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"fread"}}, TR::Prop({{3}}, {{0, ReturnValueIndex}})},
- {{{"recv"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
- {{{"recvfrom"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
-
- {{{"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"ttyname_r"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
-
- {{{"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})},
- {{{"memchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"memrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
-
- {{{"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
- {{{"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
- {{{"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
-
- {{{"memcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
- {{{"memcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
- {{{"memmove"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
- // If memmem was called with a tainted needle and the search was
- // successful, that would mean that the value pointed by the return value
- // has the same content as the needle. If we choose to go by the policy of
- // content equivalence implies taintedness equivalence, that would mean
- // haystack should be considered a propagation source argument.
- {{{"memmem"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
-
- // The comment for memmem above also applies to strstr.
- {{{"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
-
- {{{"strchrnul"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
-
- {{{"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{{"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{CDM::CLibraryMaybeHardened, {"fgets"}},
+ TR::Prop({{2}}, {{0, ReturnValueIndex}})},
+ {{CDM::CLibraryMaybeHardened, {"fgetws"}},
+ TR::Prop({{2}}, {{0, ReturnValueIndex}})},
+ {{CDM::CLibrary, {"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
+ {{CDM::CLibrary, {"fscanf_s"}}, TR::Prop({{0}}, {{}, 2})},
+ {{CDM::CLibrary, {"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
+ {{CDM::CLibrary, {"sscanf_s"}}, TR::Prop({{0}}, {{}, 2})},
+
+ {{CDM::CLibrary, {"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"getc_unlocked"}},
+ TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{CDM::CLibrary, {"getdelim"}}, TR::Prop({{3}}, {{0}})},
+ // TODO: this intends to match the C function `getline()`, but the call
+ // description also matches the C++ function `std::getline()`; it should
+ // be ruled out by some additional logic.
+ {{CDM::CLibrary, {"getline"}}, TR::Prop({{2}}, {{0}})},
----------------
NagyDonat wrote:
Right now the propagation rule `TR::Prop({{2}}, {{0}})`, which propagates taint from the third argument (index 2) to the first argument (index 0), is applied to both the C-style plain `getline` and the C++ `std::getline`. However these are completely different functions:
```
ssize_t getline(char **restrict lineptr, size_t *restrict n,
FILE *restrict stream);
```
and
```
template< class CharT, class Traits, class Allocator >
std::basic_istream<CharT, Traits>&
getline( std::basic_istream<CharT, Traits>&& input,
std::basic_string<CharT, Traits, Allocator>& str, CharT delim );
// there is also a 2-argument overload without 'delim'
```
so the propagation behavior is nonsense for the C++ function `std::getline`.
Note that the matching mode `CDM::CLibrary` accepts both functions declared at TU scope and functions declared within the namespace `std`, because there are many functions for which this is the right thing to do (under C they appear at TU scope, but the C++ headers declare the same function within `std`). The functions named `getline` are an unfortunate exception, where the separate standard revisions of C and C++ used the same name with very different semantics.
https://github.com/llvm/llvm-project/pull/91635
More information about the cfe-commits
mailing list