[clang-tools-extra] [clang-tidy] Add bugprone-suspicious-stringview-data-usage check (PR #83716)
Piotr Zegar via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 7 11:58:36 PST 2024
https://github.com/PiotrZSL updated https://github.com/llvm/llvm-project/pull/83716
>From d56e3336a8b4b3434c5f2d69b4ec50cafc01cd8f Mon Sep 17 00:00:00 2001
From: Piotr Zegar <piotr.zegar at nokia.com>
Date: Tue, 20 Feb 2024 18:15:56 +0000
Subject: [PATCH 1/2] [clang-tidy] Add
bugprone-suspicious-stringview-data-usage check
This check identifies suspicious usages of std::string_view::data()
that could lead to reading out-of-bounds data due to inadequate or
incorrect string null termination.
---
.../bugprone/BugproneTidyModule.cpp | 3 +
.../clang-tidy/bugprone/CMakeLists.txt | 1 +
.../SuspiciousStringviewDataUsageCheck.cpp | 100 ++++++++++++++++++
.../SuspiciousStringviewDataUsageCheck.h | 38 +++++++
clang-tools-extra/docs/ReleaseNotes.rst | 7 ++
.../suspicious-stringview-data-usage.rst | 58 ++++++++++
.../docs/clang-tidy/checks/list.rst | 1 +
.../clang-tidy/checkers/Inputs/Headers/string | 7 ++
.../suspicious-stringview-data-usage.cpp | 48 +++++++++
9 files changed, 263 insertions(+)
create mode 100644 clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.cpp
create mode 100644 clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.h
create mode 100644 clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-stringview-data-usage.rst
create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-stringview-data-usage.cpp
diff --git a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
index e518a64abc52eb..2931325d8b5798 100644
--- a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
@@ -73,6 +73,7 @@
#include "SuspiciousReallocUsageCheck.h"
#include "SuspiciousSemicolonCheck.h"
#include "SuspiciousStringCompareCheck.h"
+#include "SuspiciousStringviewDataUsageCheck.h"
#include "SwappedArgumentsCheck.h"
#include "SwitchMissingDefaultCaseCheck.h"
#include "TerminatingContinueCheck.h"
@@ -218,6 +219,8 @@ class BugproneModule : public ClangTidyModule {
"bugprone-suspicious-semicolon");
CheckFactories.registerCheck<SuspiciousStringCompareCheck>(
"bugprone-suspicious-string-compare");
+ CheckFactories.registerCheck<SuspiciousStringviewDataUsageCheck>(
+ "bugprone-suspicious-stringview-data-usage");
CheckFactories.registerCheck<SwappedArgumentsCheck>(
"bugprone-swapped-arguments");
CheckFactories.registerCheck<TerminatingContinueCheck>(
diff --git a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
index 638fba03a43587..081ba67efe1538 100644
--- a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
@@ -26,6 +26,7 @@ add_clang_library(clangTidyBugproneModule
ImplicitWideningOfMultiplicationResultCheck.cpp
InaccurateEraseCheck.cpp
IncorrectEnableIfCheck.cpp
+ SuspiciousStringviewDataUsageCheck.cpp
SwitchMissingDefaultCaseCheck.cpp
IncDecInConditionsCheck.cpp
IncorrectRoundingsCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.cpp
new file mode 100644
index 00000000000000..ffb31840c4c886
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.cpp
@@ -0,0 +1,100 @@
+//===--- SuspiciousStringviewDataUsageCheck.cpp - clang-tidy --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SuspiciousStringviewDataUsageCheck.h"
+#include "../utils/Matchers.h"
+#include "../utils/OptionsUtils.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang::tidy::bugprone {
+
+SuspiciousStringviewDataUsageCheck::SuspiciousStringviewDataUsageCheck(
+ StringRef Name, ClangTidyContext *Context)
+ : ClangTidyCheck(Name, Context),
+ StringViewTypes(utils::options::parseStringList(Options.get(
+ "StringViewTypes", "::std::basic_string_view;::llvm::StringRef"))),
+ AllowedCallees(
+ utils::options::parseStringList(Options.get("AllowedCallees", ""))) {}
+
+void SuspiciousStringviewDataUsageCheck::storeOptions(
+ ClangTidyOptions::OptionMap &Opts) {
+ Options.store(Opts, "StringViewTypes",
+ utils::options::serializeStringList(StringViewTypes));
+ Options.store(Opts, "AllowedCallees",
+ utils::options::serializeStringList(AllowedCallees));
+}
+
+bool SuspiciousStringviewDataUsageCheck::isLanguageVersionSupported(
+ const LangOptions &LangOpts) const {
+ return LangOpts.CPlusPlus;
+}
+
+std::optional<TraversalKind>
+SuspiciousStringviewDataUsageCheck::getCheckTraversalKind() const {
+ return TK_AsIs;
+}
+
+void SuspiciousStringviewDataUsageCheck::registerMatchers(MatchFinder *Finder) {
+
+ auto AncestorCall = anyOf(
+ cxxConstructExpr(), callExpr(unless(cxxOperatorCallExpr())), lambdaExpr(),
+ initListExpr(
+ hasType(qualType(hasCanonicalType(hasDeclaration(recordDecl()))))));
+
+ auto DataMethod =
+ cxxMethodDecl(hasName("data"),
+ ofClass(matchers::matchesAnyListedName(StringViewTypes)));
+
+ auto DataWithSelfCall =
+ cxxMemberCallExpr(on(ignoringParenImpCasts(expr().bind("self"))),
+ callee(DataMethod))
+ .bind("data-call");
+ auto SizeCall = cxxMemberCallExpr(
+ callee(cxxMethodDecl(hasAnyName("size", "length"))),
+ on(ignoringParenImpCasts(
+ matchers::isStatementIdenticalToBoundNode("self"))));
+
+ Finder->addMatcher(
+ cxxMemberCallExpr(
+ on(ignoringParenImpCasts(expr().bind("self"))), callee(DataMethod),
+ expr().bind("data-call"),
+ hasParent(expr(anyOf(
+ invocation(
+ expr().bind("call"), unless(cxxOperatorCallExpr()),
+ hasAnyArgument(
+ ignoringParenImpCasts(equalsBoundNode("data-call"))),
+ unless(hasAnyArgument(ignoringParenImpCasts(SizeCall))),
+ unless(hasAnyArgument(hasDescendant(expr(
+ SizeCall,
+ hasAncestor(expr(AncestorCall).bind("ancestor-size")),
+ hasAncestor(expr(equalsBoundNode("call"),
+ equalsBoundNode("ancestor-size"))))))),
+ hasDeclaration(namedDecl(
+ unless(matchers::matchesAnyListedName(AllowedCallees))))),
+ initListExpr(expr().bind("init"),
+ hasType(qualType(hasCanonicalType(hasDeclaration(
+ recordDecl(unless(matchers::matchesAnyListedName(
+ AllowedCallees))))))),
+ unless(has(ignoringParenImpCasts(SizeCall)))))))),
+ this);
+}
+
+void SuspiciousStringviewDataUsageCheck::check(
+ const MatchFinder::MatchResult &Result) {
+ const auto *DataCallExpr =
+ Result.Nodes.getNodeAs<CXXMemberCallExpr>("data-call");
+ diag(DataCallExpr->getExprLoc(),
+ "result of a `data()` call may not be null terminated, provide size "
+ "information to the callee to prevent potential issues")
+ << DataCallExpr->getCallee()->getSourceRange();
+}
+
+} // namespace clang::tidy::bugprone
diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.h b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.h
new file mode 100644
index 00000000000000..31eca0a48722fe
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.h
@@ -0,0 +1,38 @@
+//===--- SuspiciousStringviewDataUsageCheck.h - clang-tidy -------//C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SUSPICIOUSSTRINGVIEWDATAUSAGECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SUSPICIOUSSTRINGVIEWDATAUSAGECHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang::tidy::bugprone {
+
+/// Identifies suspicious usages of std::string_view::data() that could lead to
+/// reading out-of-bounds data due to inadequate or incorrect string null
+/// termination.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone/suspicious-stringview-data-usage.html
+class SuspiciousStringviewDataUsageCheck : public ClangTidyCheck {
+public:
+ SuspiciousStringviewDataUsageCheck(StringRef Name, ClangTidyContext *Context);
+ void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+ void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+ void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
+ bool isLanguageVersionSupported(const LangOptions &LangOpts) const override;
+ std::optional<TraversalKind> getCheckTraversalKind() const override;
+
+private:
+ std::vector<llvm::StringRef> StringViewTypes;
+ std::vector<llvm::StringRef> AllowedCallees;
+};
+
+} // namespace clang::tidy::bugprone
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SUSPICIOUSSTRINGVIEWDATAUSAGECHECK_H
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index d98c4ff9a75044..538061267b76fe 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -110,6 +110,13 @@ New checks
Detects error-prone Curiously Recurring Template Pattern usage, when the CRTP
can be constructed outside itself and the derived class.
+- New :doc:`bugprone-suspicious-stringview-data-usage
+ <clang-tidy/checks/bugprone/suspicious-stringview-data-usage>` check.
+
+ Identifies suspicious usages of ``std::string_view::data()`` that could lead
+ to reading out-of-bounds data due to inadequate or incorrect string null
+ termination.
+
- New :doc:`modernize-use-designated-initializers
<clang-tidy/checks/modernize/use-designated-initializers>` check.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-stringview-data-usage.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-stringview-data-usage.rst
new file mode 100644
index 00000000000000..9b38d836018103
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-stringview-data-usage.rst
@@ -0,0 +1,58 @@
+.. title:: clang-tidy - bugprone-suspicious-stringview-data-usage
+
+bugprone-suspicious-stringview-data-usage
+=========================================
+
+Identifies suspicious usages of ``std::string_view::data()`` that could lead to
+reading out-of-bounds data due to inadequate or incorrect string null
+termination.
+
+It warns when the result of ``data()`` is passed to a constructor or function
+without also passing the corresponding result of ``size()`` or ``length()``
+member function. Such usage can lead to unintended behavior, particularly when
+assuming the data pointed to by ``data()`` is null-terminated.
+
+The absence of a ``c_str()`` method in ``std::string_view`` often leads
+developers to use ``data()`` as a substitute, especially when interfacing with
+C APIs that expect null-terminated strings. However, since ``data()`` does not
+guarantee null termination, this can result in unintended behavior if the API
+relies on proper null termination for correct string interpretation.
+
+In today's programming landscape, this scenario can occur when implicitly
+converting an ``std::string_view`` to an ``std::string``. Since the constructor
+in ``std::string`` designed for string-view-like objects is ``explicit``,
+attempting to pass an ``std::string_view`` to a function expecting an
+``std::string`` will result in a compilation error. As a workaround, developers
+may be tempted to utilize the ``.data()`` method to achieve compilation,
+introducing potential risks.
+
+For instance:
+
+.. code-block:: c++
+
+ void printString(const std::string& str) {
+ std::cout << "String: " << str << std::endl;
+ }
+
+ void something(std::string_view sv) {
+ printString(sv.data());
+ }
+
+In this example, directly passing ``sv`` to the ``printString`` function would
+lead to a compilation error due to the explicit nature of the ``std::string``
+constructor. Consequently, developers might opt for ``sv.data()`` to resolve the
+compilation error, albeit introducing potential hazards as discussed.
+
+.. option:: StringViewTypes
+
+ Option allows users to specify custom string view-like types for analysis. It
+ accepts a semicolon-separated list of type names or regular expressions
+ matching these types. Default value is:
+ `::std::basic_string_view;::llvm::StringRef`.
+
+.. option:: AllowedCallees
+
+ Specifies methods, functions, or classes where the result of ``.data()`` is
+ passed to. Allows to exclude such calls from the analysis. Accepts a
+ semicolon-separated list of names or regular expressions matching these
+ entities. Default value is: empty string.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index d03e7af688f007..79e81dd174e4f3 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -139,6 +139,7 @@ Clang-Tidy Checks
:doc:`bugprone-suspicious-realloc-usage <bugprone/suspicious-realloc-usage>`,
:doc:`bugprone-suspicious-semicolon <bugprone/suspicious-semicolon>`, "Yes"
:doc:`bugprone-suspicious-string-compare <bugprone/suspicious-string-compare>`, "Yes"
+ :doc:`bugprone-suspicious-stringview-data-usage <bugprone/suspicious-stringview-data-usage>`,
:doc:`bugprone-swapped-arguments <bugprone/swapped-arguments>`, "Yes"
:doc:`bugprone-switch-missing-default-case <bugprone/switch-missing-default-case>`,
:doc:`bugprone-terminating-continue <bugprone/terminating-continue>`, "Yes"
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string
index f2e4159a224513..28e2b4a231e52e 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string
+++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string
@@ -24,6 +24,7 @@ struct basic_string {
basic_string();
basic_string(const C *p, const A &a = A());
basic_string(const C *p, size_type count);
+ basic_string(const C *b, const C *e);
~basic_string();
@@ -85,6 +86,12 @@ struct basic_string_view {
const C *str;
constexpr basic_string_view(const C* s) : str(s) {}
+ const C *data() const;
+
+ bool empty() const;
+ size_type size() const;
+ size_type length() const;
+
size_type find(_Type v, size_type pos = 0) const;
size_type find(C ch, size_type pos = 0) const;
size_type find(const C* s, size_type pos, size_type count) const;
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-stringview-data-usage.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-stringview-data-usage.cpp
new file mode 100644
index 00000000000000..638ce8591a24ac
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-stringview-data-usage.cpp
@@ -0,0 +1,48 @@
+// RUN: %check_clang_tidy -std=c++17-or-later %s bugprone-suspicious-stringview-data-usage %t -- -- -isystem %clang_tidy_headers
+#include <string>
+
+struct View {
+ const char* str;
+};
+
+struct ViewWithSize {
+ const char* str;
+ std::string_view::size_type size;
+};
+
+void something(const char*);
+void something(const char*, unsigned);
+void something(const char*, unsigned, const char*);
+void something_str(std::string, unsigned);
+
+void invalid(std::string_view sv, std::string_view sv2) {
+ std::string s(sv.data());
+// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
+ std::string si{sv.data()};
+// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
+ std::string_view s2(sv.data());
+// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
+ something(sv.data());
+// CHECK-MESSAGES: :[[@LINE-1]]:16: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
+ something(sv.data(), sv.size(), sv2.data());
+// CHECK-MESSAGES: :[[@LINE-1]]:39: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
+ something_str(sv.data(), sv.size());
+// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
+ View view{sv.data()};
+// CHECK-MESSAGES: :[[@LINE-1]]:16: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
+}
+
+void valid(std::string_view sv) {
+ std::string s1(sv.data(), sv.data() + sv.size());
+ std::string s2(sv.data(), sv.data() + sv.length());
+ std::string s3(sv.data(), sv.size() + sv.data());
+ std::string s4(sv.data(), sv.length() + sv.data());
+ std::string s5(sv.data(), sv.size());
+ std::string s6(sv.data(), sv.length());
+ something(sv.data(), sv.size());
+ something(sv.data(), sv.length());
+ ViewWithSize view1{sv.data(), sv.size()};
+ ViewWithSize view2{sv.data(), sv.length()};
+
+ const char* str{sv.data()};
+}
>From 431071344df198bd0e9d4a1e5bbef972c84e00c6 Mon Sep 17 00:00:00 2001
From: Piotr Zegar <piotr.zegar at nokia.com>
Date: Thu, 7 Mar 2024 19:58:11 +0000
Subject: [PATCH 2/2] Fix initList handling
---
.../SuspiciousStringviewDataUsageCheck.cpp | 17 +++++++++--------
.../suspicious-stringview-data-usage.cpp | 10 +++++++++-
2 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.cpp
index ffb31840c4c886..f247da6bc5eff6 100644
--- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.cpp
@@ -62,28 +62,29 @@ void SuspiciousStringviewDataUsageCheck::registerMatchers(MatchFinder *Finder) {
on(ignoringParenImpCasts(
matchers::isStatementIdenticalToBoundNode("self"))));
+ auto DescendantSizeCall = expr(hasDescendant(expr(SizeCall,
+ hasAncestor(expr(AncestorCall).bind("ancestor-size")),
+ hasAncestor(expr(equalsBoundNode("parent"), equalsBoundNode("ancestor-size"))))));
+
Finder->addMatcher(
cxxMemberCallExpr(
on(ignoringParenImpCasts(expr().bind("self"))), callee(DataMethod),
expr().bind("data-call"),
hasParent(expr(anyOf(
invocation(
- expr().bind("call"), unless(cxxOperatorCallExpr()),
+ expr().bind("parent"), unless(cxxOperatorCallExpr()),
hasAnyArgument(
ignoringParenImpCasts(equalsBoundNode("data-call"))),
unless(hasAnyArgument(ignoringParenImpCasts(SizeCall))),
- unless(hasAnyArgument(hasDescendant(expr(
- SizeCall,
- hasAncestor(expr(AncestorCall).bind("ancestor-size")),
- hasAncestor(expr(equalsBoundNode("call"),
- equalsBoundNode("ancestor-size"))))))),
+ unless(hasAnyArgument(DescendantSizeCall)),
hasDeclaration(namedDecl(
unless(matchers::matchesAnyListedName(AllowedCallees))))),
- initListExpr(expr().bind("init"),
+ initListExpr(expr().bind("parent"),
hasType(qualType(hasCanonicalType(hasDeclaration(
recordDecl(unless(matchers::matchesAnyListedName(
AllowedCallees))))))),
- unless(has(ignoringParenImpCasts(SizeCall)))))))),
+ unless(DescendantSizeCall))
+ )))),
this);
}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-stringview-data-usage.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-stringview-data-usage.cpp
index 638ce8591a24ac..8072d60a2f47a1 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-stringview-data-usage.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-stringview-data-usage.cpp
@@ -5,6 +5,11 @@ struct View {
const char* str;
};
+struct Pair {
+ const char* begin;
+ const char* end;
+};
+
struct ViewWithSize {
const char* str;
std::string_view::size_type size;
@@ -43,6 +48,9 @@ void valid(std::string_view sv) {
something(sv.data(), sv.length());
ViewWithSize view1{sv.data(), sv.size()};
ViewWithSize view2{sv.data(), sv.length()};
-
+ Pair view3{sv.data(), sv.data() + sv.size()};
+ Pair view4{sv.data(), sv.data() + sv.length()};
+ Pair view5{sv.data(), sv.size() + sv.data()};
+ Pair view6{sv.data(), sv.length() + sv.data()};
const char* str{sv.data()};
}
More information about the cfe-commits
mailing list