[clang-tools-extra] [clang-tidy] Enhance modernize-use-starts-ends-with to handle substr patterns (PR #116033)

Helmut Januschka via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 15 01:25:15 PST 2024


https://github.com/hjanuschka updated https://github.com/llvm/llvm-project/pull/116033

>From f79bd08bf910512c6c3b9d8ab2cd5162f9c0e5f8 Mon Sep 17 00:00:00 2001
From: Helmut Januschka <helmut at januschka.com>
Date: Wed, 13 Nov 2024 12:52:36 +0100
Subject: [PATCH 1/2] [clang-tidy] Enhance modernize-use-starts-ends-with with
 substr detection

Enhances the modernize-use-starts-ends-with check to detect substr-based
patterns that can be replaced with starts_with() (C++20). This improves code
readability and efficiency by avoiding temporary string creation.

New patterns detected:
  str.substr(0, n) == "foo"           -> str.starts_with("foo")
  "foo" == str.substr(0, n)           -> str.starts_with("foo")
  str.substr(0, n) != "foo"           -> !str.starts_with("foo")
  str.substr(0, strlen("foo")) == "foo" -> str.starts_with("foo")
  str.substr(0, prefix.size()) == "foo" -> str.starts_with("foo")

The enhancement:
- Integrates with existing starts_with patterns
- Handles substr with zero first argument
- Supports length via literals, strlen(), and size()/length()
- Validates string literal length matches
- Handles both == and != operators

Part of modernize-use-starts-ends-with check.
---
 .../modernize/UseStartsEndsWithCheck.cpp      | 65 ++++++++++++++++++-
 .../modernize/UseStartsEndsWithCheck.h        |  4 ++
 clang-tools-extra/docs/ReleaseNotes.rst       |  5 +-
 .../checks/modernize/use-starts-ends-with.rst | 45 ++++++++-----
 .../clang-tidy/checkers/Inputs/Headers/string |  5 +-
 .../modernize/use-starts-ends-with.cpp        | 36 +++++++++-
 6 files changed, 138 insertions(+), 22 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp
index 1231f954298adc..bcfb7227be722b 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp
@@ -171,10 +171,24 @@ void UseStartsEndsWithCheck::registerMatchers(MatchFinder *Finder) {
                              hasRHS(lengthExprForStringNode("needle")))))
           .bind("expr"),
       this);
+      Finder->addMatcher(
+      cxxOperatorCallExpr(
+          hasAnyOperatorName("==", "!="),
+          hasOperands(
+              expr().bind("needle"),
+              cxxMemberCallExpr(
+                  argumentCountIs(2), hasArgument(0, ZeroLiteral),
+                  hasArgument(1, lengthExprForStringNode("needle")),
+                  callee(cxxMethodDecl(hasName("substr"),
+                                       ofClass(OnClassWithStartsWithFunction))
+                             .bind("find_fun")))
+                  .bind("find_expr")))
+          .bind("expr"),
+      this);
 }
 
 void UseStartsEndsWithCheck::check(const MatchFinder::MatchResult &Result) {
-  const auto *ComparisonExpr = Result.Nodes.getNodeAs<BinaryOperator>("expr");
+  const auto *ComparisonExpr = Result.Nodes.getNodeAs<Expr>("expr");
   const auto *FindExpr = Result.Nodes.getNodeAs<CXXMemberCallExpr>("find_expr");
   const auto *FindFun = Result.Nodes.getNodeAs<CXXMethodDecl>("find_fun");
   const auto *SearchExpr = Result.Nodes.getNodeAs<Expr>("needle");
@@ -189,7 +203,54 @@ void UseStartsEndsWithCheck::check(const MatchFinder::MatchResult &Result) {
   if (ComparisonExpr->getBeginLoc().isMacroID())
     return;
 
-  const bool Neg = ComparisonExpr->getOpcode() == BO_NE;
+  bool Neg;
+  if (const auto *BO = llvm::dyn_cast<BinaryOperator>(ComparisonExpr)) {
+    Neg = BO->getOpcode() == BO_NE;
+  } else {
+    assert(llvm::isa<CXXOperatorCallExpr>(ComparisonExpr));
+    Neg = llvm::cast<CXXOperatorCallExpr>(ComparisonExpr)->getOperator() ==
+          OO_ExclaimEqual;
+  }
+
+  // Check if this is a substr case
+  bool IsSubstr = FindFun->getName() == "substr";
+
+  if (IsSubstr) {
+      const auto *SubstrCall = cast<CXXMemberCallExpr>(FindExpr);
+      const Expr *Object = SubstrCall->getImplicitObjectArgument();
+
+      std::string ObjectStr;
+      std::string SearchStr;
+      bool Invalid = false;
+
+      auto &SM = *Result.SourceManager;
+
+      CharSourceRange ObjectRange = CharSourceRange::getTokenRange(
+          Object->getBeginLoc(), Object->getEndLoc());
+      ObjectStr = Lexer::getSourceText(ObjectRange, SM, getLangOpts(), &Invalid).str();
+      if (Invalid)
+        return;
+
+      CharSourceRange SearchRange = CharSourceRange::getTokenRange(
+          SearchExpr->getBeginLoc(), SearchExpr->getEndLoc());
+      SearchStr = Lexer::getSourceText(SearchRange, SM, getLangOpts(), &Invalid).str();
+      if (Invalid)
+        return;
+
+      // Build the new expression: [!]Object.starts_with(SearchExpr)
+    std::string NewExpr =
+          (llvm::Twine(Neg ? "!" : "") + ObjectStr + "." +
+          ReplacementFunction->getName() + "(" + SearchStr + ")").str();
+    // Replace the entire comparison expression
+    auto Diag = diag(ComparisonExpr->getBeginLoc(),
+                    "use %0 instead of substr() %select{==|!=}1")
+                << ReplacementFunction->getName() << Neg;
+    Diag << FixItHint::CreateReplacement(
+        CharSourceRange::getTokenRange(ComparisonExpr->getSourceRange()),
+        NewExpr);
+      return;
+  }
+
 
   auto Diagnostic =
       diag(FindExpr->getExprLoc(), "use %0 instead of %1() %select{==|!=}2 0")
diff --git a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h
index 17c2999bda84cd..7e4a1573e81e21 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h
@@ -31,6 +31,10 @@ class UseStartsEndsWithCheck : public ClangTidyCheck {
   std::optional<TraversalKind> getCheckTraversalKind() const override {
     return TK_IgnoreUnlessSpelledInSource;
   }
+
+private:
+  void handleSubstrMatch(const ast_matchers::MatchFinder::MatchResult &Result);
+  void handleFindCompareMatch(const ast_matchers::MatchFinder::MatchResult &Result);
 };
 
 } // namespace clang::tidy::modernize
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index db971f08ca3dbc..62dbc7646740bc 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -244,7 +244,10 @@ Changes in existing checks
 
 - Improved :doc:`modernize-use-starts-ends-with
   <clang-tidy/checks/modernize/use-starts-ends-with>` check to handle two cases
-  that can be replaced with ``ends_with``
+  that can be replaced with ``ends_with`` and detect patterns using ``substr``
+  that can be replaced with ``starts_with``. Now handles cases like
+  ``str.substr(0, n) == "literal"``, with support for length determination through
+  integer literals, ``strlen()``, and ``size()``/``length()`` member functions.
 
 - Improved :doc:`modernize-use-std-format
   <clang-tidy/checks/modernize/use-std-format>` check to support replacing
diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst
index 721e927e29849f..29a5c5a7a2817b 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst
@@ -7,26 +7,39 @@ Checks for common roundabout ways to express ``starts_with`` and ``ends_with``
 and suggests replacing with the simpler method when it is available. Notably, 
 this will work with ``std::string`` and ``std::string_view``.
 
-.. code-block:: c++
+The check handles the following expressions:
 
-  std::string s = "...";
-  if (s.find("prefix") == 0) { /* do something */ }
-  if (s.rfind("prefix", 0) == 0) { /* do something */ }
-  if (s.compare(0, strlen("prefix"), "prefix") == 0) { /* do something */ }
-  if (s.compare(s.size() - strlen("suffix"), strlen("suffix"), "suffix") == 0) {
-    /* do something */
-  }
-  if (s.rfind("suffix") == (s.length() - 6)) {
-    /* do something */
-  }
-
-becomes
+====================================================== ===================
+Expression                                              Replacement
+------------------------------------------------------ -------------------
+``u.find(v) == 0``                                     ``u.starts_with(v)``
+``u.rfind(v, 0) != 0``                                 ``!u.starts_with(v)``
+``u.compare(0, v.size(), v) == 0``                     ``u.starts_with(v)``
+``u.substr(0, v.size()) == v``                         ``u.starts_with(v)``
+``v == u.substr(0, v.size())``                         ``u.starts_with(v)``
+``u.substr(0, v.size()) != v``                         ``!u.starts_with(v)``
+``u.compare(u.size() - v.size(), v.size(), v) == 0``   ``u.ends_with(v)``
+``u.rfind(v) == u.size() - v.size()``                  ``u.ends_with(v)``
+====================================================== ===================
+
+For example:
 
 .. code-block:: c++
 
   std::string s = "...";
   if (s.starts_with("prefix")) { /* do something */ }
-  if (s.starts_with("prefix")) { /* do something */ }
-  if (s.starts_with("prefix")) { /* do something */ }
-  if (s.ends_with("suffix")) { /* do something */ }
   if (s.ends_with("suffix")) { /* do something */ }
+
+Notes:
+
+* For the ``substr`` pattern, the check ensures that:
+
+  * The substring starts at position 0
+  * The length matches exactly the compared string's length
+  * The length is a constant value
+
+* Non-matching cases (will not be transformed):
+
+  * ``s.substr(1, 5) == "hello"``     // Non-zero start position
+  * ``s.substr(0, 4) == "hello"``     // Length mismatch
+  * ``s.substr(0, len) == "hello"``   // Non-constant length
\ No newline at end of file
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string
index a0e8ebbb267cd0..856dfce815f03a 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string
+++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string
@@ -59,8 +59,9 @@ struct basic_string {
   _Type& insert(size_type pos, const _Type& str);
   _Type& insert(size_type pos, const C* s);
   _Type& insert(size_type pos, const C* s, size_type n);
+  _Type substr(size_type pos = 0, size_type count = npos) const;
 
-  constexpr bool starts_with(std::basic_string_view<C, T> sv) const noexcept;
+   constexpr bool starts_with(std::basic_string_view<C, T> sv) const noexcept;
   constexpr bool starts_with(C ch) const noexcept;
   constexpr bool starts_with(const C* s) const;
 
@@ -117,7 +118,7 @@ struct basic_string_view {
   constexpr bool ends_with(const C* s) const;
 
   constexpr int compare(basic_string_view sv) const noexcept;
-
+  
   static constexpr size_t npos = -1;
 };
 
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp
index 91477241e82e54..a9b80ad9dc3486 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp
@@ -1,5 +1,5 @@
 // RUN: %check_clang_tidy -std=c++20 %s modernize-use-starts-ends-with %t -- \
-// RUN:   -- -isystem %clang_tidy_headers
+// RUN:   -- -isystem %clang_tidy_headers 
 
 #include <string.h>
 #include <string>
@@ -266,3 +266,37 @@ void test(std::string s, std::string_view sv, sub_string ss, sub_sub_string sss,
   s.compare(0, 1, "ab") == 0;
   s.rfind(suffix, 1) == s.size() - suffix.size();
 }
+
+void test_substr() {
+    std::string str("hello world");
+    std::string prefix = "hello";
+    std::string_view suffix = "world";
+    
+    // Basic pattern
+    str.substr(0, 5) == "hello";
+    // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of substr() == [modernize-use-starts-ends-with]
+    // CHECK-FIXES: str.starts_with("hello");
+    
+    // With string literal on left side
+    "hello" == str.substr(0, 5);
+    // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of substr() == [modernize-use-starts-ends-with]
+    // CHECK-FIXES: str.starts_with("hello");
+    
+    // Inequality comparison
+    str.substr(0, 5) != "world";
+    // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of substr() != [modernize-use-starts-ends-with]
+    // CHECK-FIXES: !str.starts_with("world");
+    
+    // Ensure non-zero start position is not transformed
+    str.substr(1, 5) == "hello";
+    str.substr(0, 4) == "hello"; // Length mismatch
+    
+    size_t len = 5;
+    str.substr(0, len) == "hello"; // Non-constant length
+
+    // String literal with size calculation
+    str.substr(0, strlen("hello")) == "hello";
+    // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of substr() == [modernize-use-starts-ends-with]
+    // CHECK-FIXES: str.starts_with("hello");
+
+}
\ No newline at end of file

>From 75f95123cac402e4603c0840fb9d8c9a4de867c2 Mon Sep 17 00:00:00 2001
From: Helmut Januschka <helmut at januschka.com>
Date: Fri, 15 Nov 2024 10:23:37 +0100
Subject: [PATCH 2/2] up

---
 .../clang-tidy/checks/modernize/use-starts-ends-with.rst  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst
index 29a5c5a7a2817b..41e49e19fa03fd 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst
@@ -7,11 +7,11 @@ Checks for common roundabout ways to express ``starts_with`` and ``ends_with``
 and suggests replacing with the simpler method when it is available. Notably, 
 this will work with ``std::string`` and ``std::string_view``.
 
-The check handles the following expressions:
+The check handles the follow  ing expressions:
 
-====================================================== ===================
+====================================================== =====================
 Expression                                              Replacement
------------------------------------------------------- -------------------
+------------------------------------------------------ ---------------------
 ``u.find(v) == 0``                                     ``u.starts_with(v)``
 ``u.rfind(v, 0) != 0``                                 ``!u.starts_with(v)``
 ``u.compare(0, v.size(), v) == 0``                     ``u.starts_with(v)``
@@ -20,7 +20,7 @@ Expression                                              Replacement
 ``u.substr(0, v.size()) != v``                         ``!u.starts_with(v)``
 ``u.compare(u.size() - v.size(), v.size(), v) == 0``   ``u.ends_with(v)``
 ``u.rfind(v) == u.size() - v.size()``                  ``u.ends_with(v)``
-====================================================== ===================
+====================================================== =====================
 
 For example:
 



More information about the cfe-commits mailing list