[clang] [llvm] Reapply "[analyzer] Accept C library functions from the `std` namespace" again (PR #85791)

via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 21 09:28:19 PDT 2024


=?utf-8?q?Donát?= Nagy <donat.nagy at ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.nagy at ericsson.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/85791 at github.com>


https://github.com/NagyDonat updated https://github.com/llvm/llvm-project/pull/85791

>From 3fbc8da42726aa63ad0aef7ba12141125197279b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Tue, 19 Mar 2024 14:14:19 +0100
Subject: [PATCH 1/3] Reapply "[analyzer] Accept C library functions from the
 `std` namespace" again

This reapplies 80ab8234ac309418637488b97e0a62d8377b2ecf again, after
fixing a name collision warning in the unit tests (see the revert commit
13ccaf9b9d4400bb128b35ff4ac733e4afc3ad1c for details).

Co-authored-by: Balazs Benics <benicsbalazs at gmail.com>
---
 .../Core/PathSensitive/CallDescription.h      |  8 +-
 .../StaticAnalyzer/Core/CheckerContext.cpp    |  8 +-
 clang/unittests/StaticAnalyzer/CMakeLists.txt |  1 +
 .../StaticAnalyzer/IsCLibraryFunctionTest.cpp | 82 +++++++++++++++++++
 .../clang/unittests/StaticAnalyzer/BUILD.gn   |  1 +
 5 files changed, 91 insertions(+), 9 deletions(-)
 create mode 100644 clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h
index 3432d2648633c2..b4e1636130ca7c 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h
@@ -41,12 +41,8 @@ class CallDescription {
     ///  - We also accept calls where the number of arguments or parameters is
     ///    greater than the specified value.
     /// For the exact heuristics, see CheckerContext::isCLibraryFunction().
-    /// Note that functions whose declaration context is not a TU (e.g.
-    /// methods, functions in namespaces) are not accepted as C library
-    /// functions.
-    /// FIXME: If I understand it correctly, this discards calls where C++ code
-    /// refers a C library function through the namespace `std::` via headers
-    /// like <cstdlib>.
+    /// (This mode only matches functions that are declared either directly
+    /// within a TU or in the namespace `std`.)
     CLibrary,
 
     /// Matches "simple" functions that are not methods. (Static methods are
diff --git a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
index d6d4cec9dd3d4d..1a9bff529e9bb1 100644
--- a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
@@ -87,9 +87,11 @@ bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD,
   if (!II)
     return false;
 
-  // Look through 'extern "C"' and anything similar invented in the future.
-  // If this function is not in TU directly, it is not a C library function.
-  if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit())
+  // C library functions are either declared directly within a TU (the common
+  // case) or they are accessed through the namespace `std` (when they are used
+  // in C++ via headers like <cstdlib>).
+  const DeclContext *DC = FD->getDeclContext()->getRedeclContext();
+  if (!(DC->isTranslationUnit() || DC->isStdNamespace()))
     return false;
 
   // If this function is not externally visible, it is not a C library function.
diff --git a/clang/unittests/StaticAnalyzer/CMakeLists.txt b/clang/unittests/StaticAnalyzer/CMakeLists.txt
index 775f0f8486b8f9..db56e77331b821 100644
--- a/clang/unittests/StaticAnalyzer/CMakeLists.txt
+++ b/clang/unittests/StaticAnalyzer/CMakeLists.txt
@@ -11,6 +11,7 @@ add_clang_unittest(StaticAnalysisTests
   CallEventTest.cpp
   ConflictingEvalCallsTest.cpp
   FalsePositiveRefutationBRVisitorTest.cpp
+  IsCLibraryFunctionTest.cpp
   NoStateChangeFuncVisitorTest.cpp
   ParamRegionTest.cpp
   RangeSetTest.cpp
diff --git a/clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp b/clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp
new file mode 100644
index 00000000000000..f26acf07e23ac0
--- /dev/null
+++ b/clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp
@@ -0,0 +1,82 @@
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Frontend/ASTUnit.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/Tooling/Tooling.h"
+#include "gtest/gtest.h"
+
+#include <memory>
+
+using namespace clang;
+using namespace ento;
+using namespace ast_matchers;
+
+class IsCLibraryFunctionTest : public testing::Test {
+  std::unique_ptr<ASTUnit> ASTUnitP;
+  const FunctionDecl *Result = nullptr;
+public:
+  const FunctionDecl *getFunctionDecl() const { return Result; }
+
+  testing::AssertionResult buildAST(StringRef Code) {
+    ASTUnitP = tooling::buildASTFromCode(Code);
+    if (!ASTUnitP)
+      return testing::AssertionFailure() << "AST construction failed";
+
+    ASTContext &Context = ASTUnitP->getASTContext();
+    if (Context.getDiagnostics().hasErrorOccurred())
+      return testing::AssertionFailure() << "Compilation error";
+
+    auto Matches = ast_matchers::match(functionDecl().bind("fn"), Context);
+    if (Matches.empty())
+      return testing::AssertionFailure() << "No function declaration found";
+
+    if (Matches.size() > 1)
+      return testing::AssertionFailure()
+             << "Multiple function declarations found";
+
+    Result = Matches[0].getNodeAs<FunctionDecl>("fn");
+    return testing::AssertionSuccess();
+  }
+};
+
+TEST_F(IsCLibraryFunctionTest, AcceptsGlobal) {
+  ASSERT_TRUE(buildAST(R"cpp(void fun();)cpp"));
+  EXPECT_TRUE(CheckerContext::isCLibraryFunction(getFunctionDecl()));
+}
+
+TEST_F(IsCLibraryFunctionTest, AcceptsExternCGlobal) {
+  ASSERT_TRUE(buildAST(R"cpp(extern "C" { void fun(); })cpp"));
+  EXPECT_TRUE(CheckerContext::isCLibraryFunction(getFunctionDecl()));
+}
+
+TEST_F(IsCLibraryFunctionTest, RejectsNoInlineNoExternalLinkage) {
+  // Functions that are neither inlined nor externally visible cannot be C library functions.
+  ASSERT_TRUE(buildAST(R"cpp(static void fun();)cpp"));
+  EXPECT_FALSE(CheckerContext::isCLibraryFunction(getFunctionDecl()));
+}
+
+TEST_F(IsCLibraryFunctionTest, RejectsAnonymousNamespace) {
+  ASSERT_TRUE(buildAST(R"cpp(namespace { void fun(); })cpp"));
+  EXPECT_FALSE(CheckerContext::isCLibraryFunction(getFunctionDecl()));
+}
+
+TEST_F(IsCLibraryFunctionTest, AcceptsStdNamespace) {
+  ASSERT_TRUE(buildAST(R"cpp(namespace std { void fun(); })cpp"));
+  EXPECT_TRUE(CheckerContext::isCLibraryFunction(getFunctionDecl()));
+}
+
+TEST_F(IsCLibraryFunctionTest, RejectsOtherNamespaces) {
+  ASSERT_TRUE(buildAST(R"cpp(namespace stdx { void fun(); })cpp"));
+  EXPECT_FALSE(CheckerContext::isCLibraryFunction(getFunctionDecl()));
+}
+
+TEST_F(IsCLibraryFunctionTest, RejectsClassStatic) {
+  ASSERT_TRUE(buildAST(R"cpp(class A { static void fun(); };)cpp"));
+  EXPECT_FALSE(CheckerContext::isCLibraryFunction(getFunctionDecl()));
+}
+
+TEST_F(IsCLibraryFunctionTest, RejectsClassMember) {
+  ASSERT_TRUE(buildAST(R"cpp(class A { void fun(); };)cpp"));
+  EXPECT_FALSE(CheckerContext::isCLibraryFunction(getFunctionDecl()));
+}
diff --git a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn
index 01c2b6ced3366f..9c240cff181634 100644
--- a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn
@@ -19,6 +19,7 @@ unittest("StaticAnalysisTests") {
     "CallEventTest.cpp",
     "ConflictingEvalCallsTest.cpp",
     "FalsePositiveRefutationBRVisitorTest.cpp",
+    "IsCLibraryFunctionTest.cpp",
     "NoStateChangeFuncVisitorTest.cpp",
     "ParamRegionTest.cpp",
     "RangeSetTest.cpp",

>From f06f093b4beffd0093d9db5b0e2d329392973b98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Tue, 19 Mar 2024 15:15:45 +0100
Subject: [PATCH 2/3] Satisfy git-clang-format

---
 clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp b/clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp
index f26acf07e23ac0..d6dfcaac6f3bdc 100644
--- a/clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp
+++ b/clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp
@@ -15,6 +15,7 @@ using namespace ast_matchers;
 class IsCLibraryFunctionTest : public testing::Test {
   std::unique_ptr<ASTUnit> ASTUnitP;
   const FunctionDecl *Result = nullptr;
+
 public:
   const FunctionDecl *getFunctionDecl() const { return Result; }
 
@@ -51,7 +52,8 @@ TEST_F(IsCLibraryFunctionTest, AcceptsExternCGlobal) {
 }
 
 TEST_F(IsCLibraryFunctionTest, RejectsNoInlineNoExternalLinkage) {
-  // Functions that are neither inlined nor externally visible cannot be C library functions.
+  // Functions that are neither inlined nor externally visible cannot be C
+  // library functions.
   ASSERT_TRUE(buildAST(R"cpp(static void fun();)cpp"));
   EXPECT_FALSE(CheckerContext::isCLibraryFunction(getFunctionDecl()));
 }

>From 0a39be5d36143babe3ef32a82e9757e653f8a3e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Thu, 21 Mar 2024 17:03:59 +0100
Subject: [PATCH 3/3] Fix recognition of POSIX getline in MallocChecker

The name "getline" is used for two completely different functions:
`std::getline` from the C++ standard library works with streams and
strings, while the C function `getline` defined by POSIX and the dynamic
memory TS works with C data structures and `malloc` style allocations.

This commit adds a manual `isInStdNamespace()` check to discard calls to
`std::getline` in MallocChecker. In theory we could keep a separate
matching mode for "only a C function, not from the std namespace" but I
think that situations like this `getline` are so rare that it's better
to handle them individually.

Note that the analyzer was apparently producing correct behavior even
without this fix, because the unexpected `std::getline` calls were
silently discarded (as their arguments had unexpected types and the
callback just bailed out).
---
 .../StaticAnalyzer/Checkers/MallocChecker.cpp | 27 ++++++++++++++-----
 .../Inputs/system-header-simulator-cxx.h      | 11 +++++++-
 clang/test/Analysis/getline-cpp.cpp           | 15 +++++++++++
 3 files changed, 46 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/Analysis/getline-cpp.cpp

diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index 03cb7696707fe2..3a9b899c720b76 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -394,8 +394,10 @@ class MallocChecker
                                      const CallEvent &Call, CheckerContext &C)>;
 
   const CallDescriptionMap<CheckFn> PreFnMap{
-      {{{"getline"}, 3}, &MallocChecker::preGetdelim},
-      {{{"getdelim"}, 4}, &MallocChecker::preGetdelim},
+      // NOTE: the following CallDescription also matches the C++ standard
+      // library function std::getdelim(); the callback will filter it out.
+      {{CDM::CLibrary, {"getline"}, 3}, &MallocChecker::preGetdelim},
+      {{CDM::CLibrary, {"getdelim"}, 4}, &MallocChecker::preGetdelim},
   };
 
   const CallDescriptionMap<CheckFn> FreeingMemFnMap{
@@ -446,8 +448,11 @@ class MallocChecker
        std::bind(&MallocChecker::checkRealloc, _1, _2, _3, false)},
       {{{"g_realloc_n"}, 3}, &MallocChecker::checkReallocN},
       {{{"g_try_realloc_n"}, 3}, &MallocChecker::checkReallocN},
-      {{{"getline"}, 3}, &MallocChecker::checkGetdelim},
-      {{{"getdelim"}, 4}, &MallocChecker::checkGetdelim},
+
+      // NOTE: the following CallDescription also matches the C++ standard
+      // library function std::getdelim(); the callback will filter it out.
+      {{CDM::CLibrary, {"getline"}, 3}, &MallocChecker::checkGetdelim},
+      {{CDM::CLibrary, {"getdelim"}, 4}, &MallocChecker::checkGetdelim},
   };
 
   bool isMemCall(const CallEvent &Call) const;
@@ -1435,9 +1440,17 @@ void MallocChecker::checkGMallocN0(const CallEvent &Call,
   C.addTransition(State);
 }
 
+static bool isFromStdNamespace(const CallEvent &Call) {
+  const Decl *FD = Call.getDecl();
+  assert(FD && "a CallDescription cannot match a call without a Decl");
+  return (FD->isInStdNamespace());
+}
+
 void MallocChecker::preGetdelim(const CallEvent &Call,
                                 CheckerContext &C) const {
-  if (!Call.isGlobalCFunction())
+  // Discard calls to the C++ standard library function std::getline(), which
+  // is completely unrelated to the POSIX getline() that we're checking.
+  if (isFromStdNamespace(Call))
     return;
 
   ProgramStateRef State = C.getState();
@@ -1458,7 +1471,9 @@ void MallocChecker::preGetdelim(const CallEvent &Call,
 
 void MallocChecker::checkGetdelim(const CallEvent &Call,
                                   CheckerContext &C) const {
-  if (!Call.isGlobalCFunction())
+  // Discard calls to the C++ standard library function std::getline(), which
+  // is completely unrelated to the POSIX getline() that we're checking.
+  if (isFromStdNamespace(Call))
     return;
 
   ProgramStateRef State = C.getState();
diff --git a/clang/test/Analysis/Inputs/system-header-simulator-cxx.h b/clang/test/Analysis/Inputs/system-header-simulator-cxx.h
index 3ef7af2ea6c6ab..85db68d41a6c80 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator-cxx.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator-cxx.h
@@ -1106,11 +1106,20 @@ using ostream = basic_ostream<char>;
 extern std::ostream cout;
 
 ostream &operator<<(ostream &, const string &);
-
 #if __cplusplus >= 202002L
 template <class T>
 ostream &operator<<(ostream &, const std::unique_ptr<T> &);
 #endif
+
+template <class CharT>
+class basic_istream;
+
+using istream = basic_istream<char>;
+
+extern std::istream cin;
+
+istream &getline(istream &, string &, char);
+istream &getline(istream &, string &);
 } // namespace std
 
 #ifdef TEST_INLINABLE_ALLOCATORS
diff --git a/clang/test/Analysis/getline-cpp.cpp b/clang/test/Analysis/getline-cpp.cpp
new file mode 100644
index 00000000000000..ef9d3186009c7f
--- /dev/null
+++ b/clang/test/Analysis/getline-cpp.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix,debug.ExprInspection -verify %s
+
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix,alpha.unix,debug.ExprInspection -verify %s
+//
+// expected-no-diagnostics
+
+#include "Inputs/system-header-simulator-cxx.h"
+
+void test_std_getline() {
+  std::string userid, comment;
+  // MallocChecker should not confuse the POSIX function getline() and the
+  // unrelated C++ standard library function std::getline.
+  std::getline(std::cin, userid, ' '); // no-crash
+  std::getline(std::cin, comment); // no-crash
+}



More information about the cfe-commits mailing list