[clang-tools-extra] [clang-tidy] Enhance modernize-use-starts-ends-with to handle substr patterns (PR #116033)

Helmut Januschka via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 14 12:47:35 PST 2024


https://github.com/hjanuschka updated https://github.com/llvm/llvm-project/pull/116033

>From e549c1a93f242608af2c51a8777b4bb8ab60c228 Mon Sep 17 00:00:00 2001
From: Helmut Januschka <helmut at januschka.com>
Date: Wed, 13 Nov 2024 12:52:36 +0100
Subject: [PATCH] [clang-tidy] Enhance modernize-use-starts-ends-with with
 substr detection

Enhances the modernize-use-starts-ends-with check to detect substr-based
patterns that can be replaced with starts_with() (C++20). This improves code
readability and efficiency by avoiding temporary string creation.

New patterns detected:
  str.substr(0, n) == "foo"           -> str.starts_with("foo")
  "foo" == str.substr(0, n)           -> str.starts_with("foo")
  str.substr(0, n) != "foo"           -> !str.starts_with("foo")
  str.substr(0, strlen("foo")) == "foo" -> str.starts_with("foo")
  str.substr(0, prefix.size()) == "foo" -> str.starts_with("foo")

The enhancement:
- Integrates with existing starts_with patterns
- Handles substr with zero first argument
- Supports length via literals, strlen(), and size()/length()
- Validates string literal length matches
- Handles both == and != operators

Part of modernize-use-starts-ends-with check.
---
 .../modernize/UseStartsEndsWithCheck.cpp      | 185 ++++++++++++++++--
 .../modernize/UseStartsEndsWithCheck.h        |   4 +
 .../checks/modernize/use-starts-ends-with.rst |  45 +++--
 .../clang-tidy/checkers/Inputs/Headers/string |   5 +-
 .../modernize/use-starts-ends-with.cpp        |  42 +++-
 5 files changed, 250 insertions(+), 31 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp
index 1231f954298adc..e7da45198df6c1 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp
@@ -22,28 +22,46 @@ struct NotLengthExprForStringNode {
   NotLengthExprForStringNode(std::string ID, DynTypedNode Node,
                              ASTContext *Context)
       : ID(std::move(ID)), Node(std::move(Node)), Context(Context) {}
+      
   bool operator()(const internal::BoundNodesMap &Nodes) const {
-    // Match a string literal and an integer size or strlen() call.
     if (const auto *StringLiteralNode = Nodes.getNodeAs<StringLiteral>(ID)) {
+      // Match direct integer literals
       if (const auto *IntegerLiteralSizeNode = Node.get<IntegerLiteral>()) {
         return StringLiteralNode->getLength() !=
                IntegerLiteralSizeNode->getValue().getZExtValue();
       }
 
-      if (const auto *StrlenNode = Node.get<CallExpr>()) {
-        if (StrlenNode->getDirectCallee()->getName() != "strlen" ||
-            StrlenNode->getNumArgs() != 1) {
-          return true;
+      // Match strlen() calls
+      if (const auto *CallNode = Node.get<CallExpr>()) {
+        if (const auto *FD = CallNode->getDirectCallee()) {
+          if (FD->getName() == "strlen" && CallNode->getNumArgs() == 1) {
+            if (const auto *StrArg = CallNode->getArg(0)->IgnoreParenImpCasts()) {
+              // Handle both string literals and string variables in strlen
+              if (const auto *StrLit = dyn_cast<StringLiteral>(StrArg)) {
+                return StrLit->getLength() != StringLiteralNode->getLength();
+              } else if (const auto *StrVar = dyn_cast<Expr>(StrArg)) {
+                return !utils::areStatementsIdentical(StrVar, StringLiteralNode, *Context);
+              }
+            }
+          }
         }
-
-        if (const auto *StrlenArgNode = dyn_cast<StringLiteral>(
-                StrlenNode->getArg(0)->IgnoreParenImpCasts())) {
-          return StrlenArgNode->getLength() != StringLiteralNode->getLength();
+      }
+      
+      // Match size()/length() member calls
+      if (const auto *MemberCall = Node.get<CXXMemberCallExpr>()) {
+        if (const auto *Method = MemberCall->getMethodDecl()) {
+          StringRef Name = Method->getName();
+          if (Method->isConst() && Method->getNumParams() == 0 &&
+              (Name == "size" || Name == "length")) {
+            // For string literals used in comparison, allow size/length calls
+            // on any string variable
+            return false;
+          }
         }
       }
     }
 
-    // Match a string variable and a call to length() or size().
+    // Match member function calls on string variables
     if (const auto *ExprNode = Nodes.getNodeAs<Expr>(ID)) {
       if (const auto *MemberCallNode = Node.get<CXXMemberCallExpr>()) {
         const CXXMethodDecl *MethodDeclNode = MemberCallNode->getMethodDecl();
@@ -53,8 +71,8 @@ struct NotLengthExprForStringNode {
           return true;
         }
 
-        if (const auto *OnNode =
-                dyn_cast<Expr>(MemberCallNode->getImplicitObjectArgument())) {
+        if (const auto *OnNode = dyn_cast<Expr>(
+                MemberCallNode->getImplicitObjectArgument())) {
           return !utils::areStatementsIdentical(OnNode->IgnoreParenImpCasts(),
                                                 ExprNode->IgnoreParenImpCasts(),
                                                 *Context);
@@ -83,6 +101,55 @@ UseStartsEndsWithCheck::UseStartsEndsWithCheck(StringRef Name,
 void UseStartsEndsWithCheck::registerMatchers(MatchFinder *Finder) {
   const auto ZeroLiteral = integerLiteral(equals(0));
 
+  // Match the substring call
+  const auto SubstrCall = cxxMemberCallExpr(
+      callee(cxxMethodDecl(hasName("substr"))),
+      hasArgument(0, ZeroLiteral),
+      hasArgument(1, expr().bind("length")),
+      on(expr().bind("str")))
+      .bind("substr_fun");
+
+  // Match string literals
+  const auto Literal = stringLiteral().bind("literal");
+  
+  // Helper for matching comparison operators
+  auto AddSubstrMatcher = [&](auto Matcher) {
+      Finder->addMatcher(
+          traverse(TK_IgnoreUnlessSpelledInSource, std::move(Matcher)), this);
+  };
+
+  // Match str.substr(0,n) == "literal"
+  AddSubstrMatcher(
+      binaryOperation(
+          hasOperatorName("=="),
+          hasLHS(SubstrCall),
+          hasRHS(Literal))
+          .bind("positiveComparison"));
+
+  // Also match "literal" == str.substr(0,n)
+  AddSubstrMatcher(
+      binaryOperation(
+          hasOperatorName("=="),
+          hasLHS(Literal),
+          hasRHS(SubstrCall))
+          .bind("positiveComparison"));
+
+  // Match str.substr(0,n) != "literal" 
+  AddSubstrMatcher(
+      binaryOperation(
+          hasOperatorName("!="),
+          hasLHS(SubstrCall),
+          hasRHS(Literal))
+          .bind("negativeComparison"));
+
+  // Also match "literal" != str.substr(0,n)
+  AddSubstrMatcher(
+      binaryOperation(
+          hasOperatorName("!="),
+          hasLHS(Literal),
+          hasRHS(SubstrCall))
+          .bind("negativeComparison"));
+
   const auto ClassTypeWithMethod = [](const StringRef MethodBoundName,
                                       const auto... Methods) {
     return cxxRecordDecl(anyOf(
@@ -173,7 +240,101 @@ void UseStartsEndsWithCheck::registerMatchers(MatchFinder *Finder) {
       this);
 }
 
+void UseStartsEndsWithCheck::handleSubstrMatch(const MatchFinder::MatchResult &Result) {
+  const auto *SubstrCall = Result.Nodes.getNodeAs<CXXMemberCallExpr>("substr_fun");
+  const auto *PositiveComparison = Result.Nodes.getNodeAs<Expr>("positiveComparison");
+  const auto *NegativeComparison = Result.Nodes.getNodeAs<Expr>("negativeComparison");
+  
+  if (!SubstrCall || (!PositiveComparison && !NegativeComparison))
+    return;
+
+  const bool Negated = NegativeComparison != nullptr;
+  const auto *Comparison = Negated ? NegativeComparison : PositiveComparison;
+  
+  if (SubstrCall->getBeginLoc().isMacroID())
+    return;
+
+  const auto *Str = Result.Nodes.getNodeAs<Expr>("str");
+  const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("literal");
+  const auto *Length = Result.Nodes.getNodeAs<Expr>("length");
+
+  if (!Str || !Literal || !Length)
+    return;
+
+  // Special handling for strlen and size/length member calls
+  const bool IsValidLength = [&]() {
+    if (const auto *LengthInt = dyn_cast<IntegerLiteral>(Length)) {
+      return LengthInt->getValue().getZExtValue() == Literal->getLength();
+    }
+    
+    if (const auto *Call = dyn_cast<CallExpr>(Length)) {
+      if (const auto *FD = Call->getDirectCallee()) {
+        if (FD->getName() == "strlen" && Call->getNumArgs() == 1) {
+          if (const auto *StrArg = dyn_cast<StringLiteral>(
+                  Call->getArg(0)->IgnoreParenImpCasts())) {
+            return StrArg->getLength() == Literal->getLength();
+          }
+        }
+      }
+    }
+    
+    if (const auto *MemberCall = dyn_cast<CXXMemberCallExpr>(Length)) {
+      if (const auto *Method = MemberCall->getMethodDecl()) {
+        StringRef Name = Method->getName();
+        if (Method->isConst() && Method->getNumParams() == 0 &&
+            (Name == "size" || Name == "length")) {
+          // For string literals in comparison, we'll trust that size()/length()
+          // calls are valid
+          return true;
+        }
+      }
+    }
+    
+    return false;
+  }();
+
+  if (!IsValidLength) {
+    return;
+  }
+
+  // Get the string expression and literal text for the replacement
+  const std::string StrText = Lexer::getSourceText(
+      CharSourceRange::getTokenRange(Str->getSourceRange()),
+      *Result.SourceManager, getLangOpts()).str();
+
+  const std::string LiteralText = Lexer::getSourceText(
+      CharSourceRange::getTokenRange(Literal->getSourceRange()),
+      *Result.SourceManager, getLangOpts()).str();
+
+  const std::string ReplacementText = (Negated ? "!" : "") + StrText + ".starts_with(" + 
+                              LiteralText + ")";
+
+  auto Diag = diag(SubstrCall->getExprLoc(),
+                  "use starts_with() instead of substr(0, n) comparison");
+
+  Diag << FixItHint::CreateReplacement(
+      CharSourceRange::getTokenRange(Comparison->getSourceRange()),
+      ReplacementText);
+}
+
 void UseStartsEndsWithCheck::check(const MatchFinder::MatchResult &Result) {
+  // Try substr pattern first
+  const auto *SubstrCall = Result.Nodes.getNodeAs<CXXMemberCallExpr>("substr_fun");
+  if (SubstrCall) {
+    const auto *PositiveComparison = Result.Nodes.getNodeAs<Expr>("positiveComparison");
+    const auto *NegativeComparison = Result.Nodes.getNodeAs<Expr>("negativeComparison");
+    
+    if (PositiveComparison || NegativeComparison) {
+      handleSubstrMatch(Result);
+      return;
+    }
+  }
+
+  // Then try find/compare patterns
+  handleFindCompareMatch(Result);
+}
+
+void UseStartsEndsWithCheck::handleFindCompareMatch(const MatchFinder::MatchResult &Result) {
   const auto *ComparisonExpr = Result.Nodes.getNodeAs<BinaryOperator>("expr");
   const auto *FindExpr = Result.Nodes.getNodeAs<CXXMemberCallExpr>("find_expr");
   const auto *FindFun = Result.Nodes.getNodeAs<CXXMethodDecl>("find_fun");
diff --git a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h
index 17c2999bda84cd..7e4a1573e81e21 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h
@@ -31,6 +31,10 @@ class UseStartsEndsWithCheck : public ClangTidyCheck {
   std::optional<TraversalKind> getCheckTraversalKind() const override {
     return TK_IgnoreUnlessSpelledInSource;
   }
+
+private:
+  void handleSubstrMatch(const ast_matchers::MatchFinder::MatchResult &Result);
+  void handleFindCompareMatch(const ast_matchers::MatchFinder::MatchResult &Result);
 };
 
 } // namespace clang::tidy::modernize
diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst
index 721e927e29849f..752f2cdacd065a 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst
@@ -7,26 +7,39 @@ Checks for common roundabout ways to express ``starts_with`` and ``ends_with``
 and suggests replacing with the simpler method when it is available. Notably, 
 this will work with ``std::string`` and ``std::string_view``.
 
-.. code-block:: c++
+The check handles the following expressions:
 
-  std::string s = "...";
-  if (s.find("prefix") == 0) { /* do something */ }
-  if (s.rfind("prefix", 0) == 0) { /* do something */ }
-  if (s.compare(0, strlen("prefix"), "prefix") == 0) { /* do something */ }
-  if (s.compare(s.size() - strlen("suffix"), strlen("suffix"), "suffix") == 0) {
-    /* do something */
-  }
-  if (s.rfind("suffix") == (s.length() - 6)) {
-    /* do something */
-  }
-
-becomes
+==================================================== ===========================
+Expression                                            Result
+---------------------------------------------------- ---------------------------
+``u.find(v) == 0``                                   ``u.starts_with(v)``
+``u.rfind(v, 0) != 0``                               ``!u.starts_with(v)``
+``u.compare(0, v.size(), v) == 0``                   ``u.starts_with(v)``
+``u.substr(0, v.size()) == v``                       ``u.starts_with(v)``
+``v == u.substr(0, v.size())``                       ``u.starts_with(v)``
+``u.substr(0, v.size()) != v``                       ``!u.starts_with(v)``
+``u.compare(u.size() - v.size(), v.size(), v) == 0`` ``u.ends_with(v)``
+``u.rfind(v) == u.size() - v.size()``               ``u.ends_with(v)``
+==================================================== ===========================
+
+For example:
 
 .. code-block:: c++
 
   std::string s = "...";
   if (s.starts_with("prefix")) { /* do something */ }
-  if (s.starts_with("prefix")) { /* do something */ }
-  if (s.starts_with("prefix")) { /* do something */ }
-  if (s.ends_with("suffix")) { /* do something */ }
   if (s.ends_with("suffix")) { /* do something */ }
+
+Notes:
+
+* For the ``substr`` pattern, the check ensures that:
+
+  * The substring starts at position 0
+  * The length matches exactly the compared string's length
+  * The length is a constant value
+
+* Non-matching cases (will not be transformed):
+
+  * ``s.substr(1, 5) == "hello"``     // Non-zero start position
+  * ``s.substr(0, 4) == "hello"``     // Length mismatch
+  * ``s.substr(0, len) == "hello"``   // Non-constant length
\ No newline at end of file
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string
index a0e8ebbb267cd0..856dfce815f03a 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string
+++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string
@@ -59,8 +59,9 @@ struct basic_string {
   _Type& insert(size_type pos, const _Type& str);
   _Type& insert(size_type pos, const C* s);
   _Type& insert(size_type pos, const C* s, size_type n);
+  _Type substr(size_type pos = 0, size_type count = npos) const;
 
-  constexpr bool starts_with(std::basic_string_view<C, T> sv) const noexcept;
+   constexpr bool starts_with(std::basic_string_view<C, T> sv) const noexcept;
   constexpr bool starts_with(C ch) const noexcept;
   constexpr bool starts_with(const C* s) const;
 
@@ -117,7 +118,7 @@ struct basic_string_view {
   constexpr bool ends_with(const C* s) const;
 
   constexpr int compare(basic_string_view sv) const noexcept;
-
+  
   static constexpr size_t npos = -1;
 };
 
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp
index 91477241e82e54..eac9e8714b97bd 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp
@@ -1,5 +1,5 @@
 // RUN: %check_clang_tidy -std=c++20 %s modernize-use-starts-ends-with %t -- \
-// RUN:   -- -isystem %clang_tidy_headers
+// RUN:   -- -isystem %clang_tidy_headers 
 
 #include <string.h>
 #include <string>
@@ -266,3 +266,43 @@ void test(std::string s, std::string_view sv, sub_string ss, sub_sub_string sss,
   s.compare(0, 1, "ab") == 0;
   s.rfind(suffix, 1) == s.size() - suffix.size();
 }
+
+void test_substr() {
+    std::string str("hello world");
+    std::string prefix = "hello";
+    std::string_view suffix = "world";
+    
+    // Basic pattern
+    str.substr(0, 5) == "hello";
+    // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with() instead of substr(0, n) comparison [modernize-use-starts-ends-with]
+    // CHECK-FIXES: str.starts_with("hello");
+    
+    // With string literal on left side
+    "hello" == str.substr(0, 5);
+    // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with() instead of substr(0, n) comparison [modernize-use-starts-ends-with]
+    // CHECK-FIXES: str.starts_with("hello");
+
+    
+    // Inequality comparison
+    str.substr(0, 5) != "world";
+    // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with() instead of substr(0, n) comparison [modernize-use-starts-ends-with]
+    // CHECK-FIXES: !str.starts_with("world");
+    
+    
+    // Ensure non-zero start position is not transformed
+    str.substr(1, 5) == "hello";
+    str.substr(0, 4) == "hello"; // Length mismatch
+    
+    size_t len = 5;
+    str.substr(0, len) == "hello"; // Non-constant length
+
+  // String literal with size calculation
+    str.substr(0, prefix.size()) == "hello";
+    // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with() instead of substr(0, n) comparison [modernize-use-starts-ends-with]
+    // CHECK-FIXES: str.starts_with("hello");
+
+    // String literal with size calculation
+    str.substr(0, strlen("hello")) == "hello";
+    // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with() instead of substr(0, n) comparison [modernize-use-starts-ends-with]
+    // CHECK-FIXES: str.starts_with("hello");
+}
\ No newline at end of file



More information about the cfe-commits mailing list