[clang-tools-extra] [clang-tidy] Add new modernize-string-find-startswith check (PR #72385)

Nicolas van Kempen via cfe-commits cfe-commits at lists.llvm.org
Wed Nov 15 07:32:40 PST 2023


https://github.com/nicovank updated https://github.com/llvm/llvm-project/pull/72385

>From d31148a9d99004f1a64dc6dc1bce85a58985aa0f Mon Sep 17 00:00:00 2001
From: Nicolas van Kempen <nvankempen at fb.com>
Date: Wed, 15 Nov 2023 01:13:10 -0800
Subject: [PATCH] [clang-tidy] Add new modernize-string-find-startswith check

Matchers are copied over from abseil-string-find-startswith, only the error
message is different and suggests `std::{string|string_view}::starts_with`
instead of the Abseil equivalent.
---
 .../abseil/StringFindStartswithCheck.h        |   5 +-
 .../clang-tidy/modernize/CMakeLists.txt       |   1 +
 .../modernize/ModernizeTidyModule.cpp         |   3 +
 .../modernize/StringFindStartswithCheck.cpp   | 105 ++++++++++++++++++
 .../modernize/StringFindStartswithCheck.h     |  41 +++++++
 clang-tools-extra/docs/ReleaseNotes.rst       |   8 ++
 .../checks/abseil/string-find-startswith.rst  |   4 +
 .../docs/clang-tidy/checks/list.rst           |   1 +
 .../modernize/string-find-startswith.rst      |  32 ++++++
 .../abseil/string-find-startswith.cpp         |   2 +-
 .../modernize/string-find-startswith.cpp      |  77 +++++++++++++
 11 files changed, 277 insertions(+), 2 deletions(-)
 create mode 100644 clang-tools-extra/clang-tidy/modernize/StringFindStartswithCheck.cpp
 create mode 100644 clang-tools-extra/clang-tidy/modernize/StringFindStartswithCheck.h
 create mode 100644 clang-tools-extra/docs/clang-tidy/checks/modernize/string-find-startswith.rst
 create mode 100644 clang-tools-extra/test/clang-tidy/checkers/modernize/string-find-startswith.cpp

diff --git a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.h b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.h
index 923b5caece5439b..5019b7000f1d062 100644
--- a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.h
+++ b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.h
@@ -21,7 +21,6 @@ namespace clang::tidy::abseil {
 
 // Find string.find(...) == 0 comparisons and suggest replacing with StartsWith.
 // FIXME(niko): Add similar check for EndsWith
-// FIXME(niko): Add equivalent modernize checks for C++20's std::starts_With
 class StringFindStartswithCheck : public ClangTidyCheck {
 public:
   using ClangTidyCheck::ClangTidyCheck;
@@ -31,6 +30,10 @@ class StringFindStartswithCheck : public ClangTidyCheck {
   void registerMatchers(ast_matchers::MatchFinder *Finder) override;
   void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
   void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
+  bool isLanguageVersionSupported(const LangOptions &LangOpts) const override {
+    // Prefer modernize-string-find-startswith when C++20 is available.
+    return LangOpts.CPlusPlus && !LangOpts.CPlusPlus20;
+  }
 
 private:
   const std::vector<StringRef> StringLikeClasses;
diff --git a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
index 717c400c4790330..541f58304119856 100644
--- a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
@@ -25,6 +25,7 @@ add_clang_library(clangTidyModernizeModule
   ReplaceRandomShuffleCheck.cpp
   ReturnBracedInitListCheck.cpp
   ShrinkToFitCheck.cpp
+  StringFindStartswithCheck.cpp
   TypeTraitsCheck.cpp
   UnaryStaticAssertCheck.cpp
   UseAutoCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
index 73751cf2705068d..1fcbff79ddc6f96 100644
--- a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
@@ -26,6 +26,7 @@
 #include "ReplaceRandomShuffleCheck.h"
 #include "ReturnBracedInitListCheck.h"
 #include "ShrinkToFitCheck.h"
+#include "StringFindStartswithCheck.h"
 #include "TypeTraitsCheck.h"
 #include "UnaryStaticAssertCheck.h"
 #include "UseAutoCheck.h"
@@ -66,6 +67,8 @@ class ModernizeModule : public ClangTidyModule {
     CheckFactories.registerCheck<MakeSharedCheck>("modernize-make-shared");
     CheckFactories.registerCheck<MakeUniqueCheck>("modernize-make-unique");
     CheckFactories.registerCheck<PassByValueCheck>("modernize-pass-by-value");
+    CheckFactories.registerCheck<StringFindStartswithCheck>(
+        "modernize-string-find-startswith");
     CheckFactories.registerCheck<UseStdPrintCheck>("modernize-use-std-print");
     CheckFactories.registerCheck<RawStringLiteralCheck>(
         "modernize-raw-string-literal");
diff --git a/clang-tools-extra/clang-tidy/modernize/StringFindStartswithCheck.cpp b/clang-tools-extra/clang-tidy/modernize/StringFindStartswithCheck.cpp
new file mode 100644
index 000000000000000..ea569f924416dff
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/modernize/StringFindStartswithCheck.cpp
@@ -0,0 +1,105 @@
+//===--- StringFindStartswithCheck.cpp - clang-tidy -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "StringFindStartswithCheck.h"
+
+#include "../utils/OptionsUtils.h"
+#include "clang/Lex/Lexer.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang::tidy::modernize {
+
+const auto DefaultStringLikeClasses =
+    "::std::basic_string;::std::basic_string_view";
+
+StringFindStartswithCheck::StringFindStartswithCheck(StringRef Name,
+                                                     ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context),
+      StringLikeClasses(utils::options::parseStringList(
+          Options.get("StringLikeClasses", DefaultStringLikeClasses))) {}
+
+void StringFindStartswithCheck::registerMatchers(MatchFinder *Finder) {
+  const auto ZeroLiteral = integerLiteral(equals(0));
+  const auto StringClassMatcher = cxxRecordDecl(hasAnyName(StringLikeClasses));
+  const auto StringType = hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(StringClassMatcher)));
+
+  const auto StringFind = cxxMemberCallExpr(
+      // .find()-call on a string...
+      callee(cxxMethodDecl(hasName("find")).bind("findfun")),
+      on(hasType(StringType)),
+      // ... with some search expression ...
+      hasArgument(0, expr().bind("needle")),
+      // ... and either "0" as second argument or the default argument (also 0).
+      anyOf(hasArgument(1, ZeroLiteral), hasArgument(1, cxxDefaultArgExpr())));
+
+  const auto StringRFind = cxxMemberCallExpr(
+      // .rfind()-call on a string...
+      callee(cxxMethodDecl(hasName("rfind")).bind("findfun")),
+      on(hasType(StringType)),
+      // ... with some search expression ...
+      hasArgument(0, expr().bind("needle")),
+      // ... and "0" as second argument.
+      hasArgument(1, ZeroLiteral));
+
+  Finder->addMatcher(
+      // Match [=!]= with a zero on one side and a string.(r?)find on the other.
+      binaryOperator(
+          hasAnyOperatorName("==", "!="),
+          hasOperands(ignoringParenImpCasts(ZeroLiteral),
+                      ignoringParenImpCasts(
+                          cxxMemberCallExpr(anyOf(StringFind, StringRFind))
+                              .bind("findexpr"))))
+          .bind("expr"),
+      this);
+}
+
+void StringFindStartswithCheck::check(const MatchFinder::MatchResult &Result) {
+  const auto &Context = *Result.Context;
+  const auto &Source = Context.getSourceManager();
+
+  const auto *ComparisonExpr = Result.Nodes.getNodeAs<BinaryOperator>("expr");
+  const auto *Needle = Result.Nodes.getNodeAs<Expr>("needle");
+  const auto *Haystack = Result.Nodes.getNodeAs<CXXMemberCallExpr>("findexpr")
+                             ->getImplicitObjectArgument();
+  const auto *FindFun = Result.Nodes.getNodeAs<CXXMethodDecl>("findfun");
+
+  if (ComparisonExpr->getBeginLoc().isMacroID()) {
+    return;
+  }
+
+  const auto Rev = FindFun->getName().contains("rfind");
+  const auto Neg = ComparisonExpr->getOpcode() == BO_NE;
+
+  const auto NeedleExprCode = Lexer::getSourceText(
+      CharSourceRange::getTokenRange(Needle->getSourceRange()), Source,
+      Context.getLangOpts());
+  const auto HaystackExprCode = Lexer::getSourceText(
+      CharSourceRange::getTokenRange(Haystack->getSourceRange()), Source,
+      Context.getLangOpts());
+
+  const auto ReplacementCode = ((Neg ? "!" : "") + HaystackExprCode +
+                                ".starts_with(" + NeedleExprCode + ")")
+                                   .str();
+
+  diag(ComparisonExpr->getBeginLoc(),
+       "use starts_with "
+       "instead of %select{find()|rfind()}0 %select{==|!=}1 0")
+      << Rev << Neg
+      << FixItHint::CreateReplacement(ComparisonExpr->getSourceRange(),
+                                      ReplacementCode);
+}
+
+void StringFindStartswithCheck::storeOptions(
+    ClangTidyOptions::OptionMap &Opts) {
+  Options.store(Opts, "StringLikeClasses",
+                utils::options::serializeStringList(StringLikeClasses));
+}
+
+} // namespace clang::tidy::modernize
diff --git a/clang-tools-extra/clang-tidy/modernize/StringFindStartswithCheck.h b/clang-tools-extra/clang-tidy/modernize/StringFindStartswithCheck.h
new file mode 100644
index 000000000000000..fc12749de82cb3c
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/modernize/StringFindStartswithCheck.h
@@ -0,0 +1,41 @@
+//===--- StringFindStartswithCheck.h - clang-tidy ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_STRINGFINDSTARTSWITHCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_STRINGFINDSTARTSWITHCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+#include <vector>
+
+namespace clang::tidy::modernize {
+
+/// Checks whether a ``std::string::find()`` or ``std::string::rfind()`` (and
+/// corresponding ``std::string_view`` methods) result is compared with 0, and
+/// suggests replacing with ``starts_with()``. This is both a readability and a
+/// performance issue.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/modernize/string-find-startswith.html
+class StringFindStartswithCheck : public ClangTidyCheck {
+public:
+  StringFindStartswithCheck(StringRef Name, ClangTidyContext *Context);
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+  void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
+  bool isLanguageVersionSupported(const LangOptions &LangOpts) const override {
+    return LangOpts.CPlusPlus20;
+  }
+
+private:
+  const std::vector<StringRef> StringLikeClasses;
+};
+
+} // namespace clang::tidy::modernize
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_STRINGFINDSTARTSWITHCHECK_H
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 353c6fe20269274..8f4ab856bf0a696 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -181,6 +181,14 @@ New checks
   points in a coroutine. Such hostile types include scoped-lockable types and
   types belonging to a configurable denylist.
 
+- New :doc:`modernize-string-find-startswith
+  <clang-tidy/checks/modernize/string-find-startswith>` check.
+
+  Checks whether a ``std::string::find()`` or ``std::string::rfind()`` (and
+  corresponding ``std::string_view`` methods) result is compared with 0, and
+  suggests replacing with ``starts_with()``. This is both a readability and a
+  performance issue.
+
 - New :doc:`modernize-use-constraints
   <clang-tidy/checks/modernize/use-constraints>` check.
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/abseil/string-find-startswith.rst b/clang-tools-extra/docs/clang-tidy/checks/abseil/string-find-startswith.rst
index c82c38772a5c9a8..a22192a37f07818 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/abseil/string-find-startswith.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/abseil/string-find-startswith.rst
@@ -8,6 +8,10 @@ corresponding ``std::string_view`` methods) result is compared with 0, and
 suggests replacing with ``absl::StartsWith()``. This is both a readability and
 performance issue.
 
+``starts_with`` was added as a built-in function on those types in C++20. If
+available, prefer enabling modernize-string-find-startswith instead of this
+check.
+
 .. code-block:: c++
 
   string s = "...";
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 6f987ba1672e3f2..ecf5c38a62d1a39 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -279,6 +279,7 @@ Clang-Tidy Checks
    :doc:`modernize-replace-random-shuffle <modernize/replace-random-shuffle>`, "Yes"
    :doc:`modernize-return-braced-init-list <modernize/return-braced-init-list>`, "Yes"
    :doc:`modernize-shrink-to-fit <modernize/shrink-to-fit>`, "Yes"
+   :doc:`modernize-string-find-startswith <modernize/string-find-startswith>`, "Yes"
    :doc:`modernize-type-traits <modernize/type-traits>`, "Yes"
    :doc:`modernize-unary-static-assert <modernize/unary-static-assert>`, "Yes"
    :doc:`modernize-use-auto <modernize/use-auto>`, "Yes"
diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/string-find-startswith.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/string-find-startswith.rst
new file mode 100644
index 000000000000000..997703dd59c3b3c
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/string-find-startswith.rst
@@ -0,0 +1,32 @@
+.. title:: clang-tidy - modernize-string-find-startswith
+
+modernize-string-find-startswith
+================================
+
+Checks whether a ``std::string::find()`` or ``std::string::rfind()`` (and
+corresponding ``std::string_view`` methods) result is compared with 0, and
+suggests replacing with ``starts_with()``. This is both a readability and a
+performance issue.
+
+.. code-block:: c++
+
+  string s = "...";
+  if (s.find("Hello World") == 0) { /* do something */ }
+  if (s.rfind("Hello World", 0) == 0) { /* do something */ }
+
+becomes
+
+.. code-block:: c++
+
+  string s = "...";
+  if (s.starts_with("Hello World")) { /* do something */ }
+  if (s.starts_with("Hello World")) { /* do something */ }
+
+
+Options
+-------
+
+.. option:: StringLikeClasses
+
+   Semicolon-separated list of names of string-like classes. By default both
+   ``std::basic_string`` and ``std::basic_string_view`` are considered.
diff --git a/clang-tools-extra/test/clang-tidy/checkers/abseil/string-find-startswith.cpp b/clang-tools-extra/test/clang-tidy/checkers/abseil/string-find-startswith.cpp
index 417598790bc007f..aabb30fe34f782c 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/abseil/string-find-startswith.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/abseil/string-find-startswith.cpp
@@ -1,4 +1,4 @@
-// RUN: %check_clang_tidy %s abseil-string-find-startswith %t -- \
+// RUN: %check_clang_tidy -std=c++17 %s abseil-string-find-startswith %t -- \
 // RUN:   -config="{CheckOptions: \
 // RUN:     {abseil-string-find-startswith.StringLikeClasses: \
 // RUN:       '::std::basic_string;::std::basic_string_view;::basic_string'}}" \
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/string-find-startswith.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/string-find-startswith.cpp
new file mode 100644
index 000000000000000..50e5c0ed93ea066
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/string-find-startswith.cpp
@@ -0,0 +1,77 @@
+// RUN: %check_clang_tidy -std=c++20 %s modernize-string-find-startswith %t -- -- -isystem %clang_tidy_headers
+
+#include <string>
+
+std::string foo(std::string);
+std::string bar();
+
+#define A_MACRO(x, y) ((x) == (y))
+
+void test(std::string s, std::string_view sv) {
+  s.find("a") == 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of find() == 0
+  // CHECK-FIXES: s.starts_with("a");
+
+  s.find(s) == 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with
+  // CHECK-FIXES: s.starts_with(s);
+
+  s.find("aaa") != 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with
+  // CHECK-FIXES: !s.starts_with("aaa");
+
+  s.find(foo(foo(bar()))) != 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with
+  // CHECK-FIXES: !s.starts_with(foo(foo(bar())));
+
+  if (s.find("....") == 0) { /* do something */ }
+  // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with
+  // CHECK-FIXES: if (s.starts_with("...."))
+
+  0 != s.find("a");
+  // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with
+  // CHECK-FIXES: !s.starts_with("a");
+
+  s.rfind("a", 0) == 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of rfind() == 0
+  // CHECK-FIXES: s.starts_with("a");
+
+  s.rfind(s, 0) == 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with
+  // CHECK-FIXES: s.starts_with(s);
+
+  s.rfind("aaa", 0) != 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with
+  // CHECK-FIXES: !s.starts_with("aaa");
+
+  s.rfind(foo(foo(bar())), 0) != 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with
+  // CHECK-FIXES: !s.starts_with(foo(foo(bar())));
+
+  if (s.rfind("....", 0) == 0) { /* do something */ }
+  // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use starts_with
+  // CHECK-FIXES: if (s.starts_with("...."))
+
+  0 != s.rfind("a", 0);
+  // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with
+  // CHECK-FIXES: !s.starts_with("a");
+
+  sv.find("a") == 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with
+  // CHECK-FIXES: sv.starts_with("a");
+
+  sv.rfind("a", 0) != 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with
+  // CHECK-FIXES: !sv.starts_with("a");
+
+  // Expressions that don't trigger the check are here.
+  A_MACRO(s.find("a"), 0);
+  A_MACRO(s.rfind("a", 0), 0);
+  s.find("a", 1) == 0;
+  s.find("a", 1) == 1;
+  s.find("a") == 1;
+  s.rfind("a", 1) == 0;
+  s.rfind("a", 1) == 1;
+  s.rfind("a") == 0;
+  s.rfind("a") == 1;
+}



More information about the cfe-commits mailing list