[clang-tools-extra] [clang-tidy] Add modernize-substr-to-starts-with check (PR #116033)
Helmut Januschka via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 13 03:56:30 PST 2024
https://github.com/hjanuschka created https://github.com/llvm/llvm-project/pull/116033
Adds a new check that finds calls to substr when its first argument is a zero-equivalent expression and can be replaced with starts_with() (introduced in C++20). This modernization improves code readability by making the intent clearer and can be more efficient as it avoids creating temporary strings.
Converts patterns like:
str.substr(0, 3) == "foo" -> str.starts_with("foo")
str.substr(x-x, 3) == "foo" -> str.starts_with("foo")
str.substr(zero, n) == prefix -> str.starts_with(prefix)
"bar" == str.substr(i-i, 3) -> str.starts_with("bar")
str.substr(0, n) != prefix -> !str.starts_with(prefix)
The check:
- Detects zero-equivalent expressions:
* Direct zero literals (0)
* Variables initialized to zero
* Self-canceling expressions (x-x, i-i)
- Only converts when length matches exactly for string literals
- Warns about standalone substr calls that might be candidates
- Supports both string literals and string variables
- Handles both == and != operators
>From d95e8338d47ef265dd7d9dfacd475329525ea326 Mon Sep 17 00:00:00 2001
From: Helmut Januschka <helmut at januschka.com>
Date: Wed, 13 Nov 2024 12:52:36 +0100
Subject: [PATCH] [clang-tidy] Add modernize-substr-to-starts-with check
Adds a new check that finds calls to substr when its first argument is a
zero-equivalent expression and can be replaced with starts_with()
(introduced in C++20). This modernization improves code readability by making
the intent clearer and can be more efficient as it avoids creating temporary
strings.
Converts patterns like:
str.substr(0, 3) == "foo" -> str.starts_with("foo")
str.substr(x-x, 3) == "foo" -> str.starts_with("foo")
str.substr(zero, n) == prefix -> str.starts_with(prefix)
"bar" == str.substr(i-i, 3) -> str.starts_with("bar")
str.substr(0, n) != prefix -> !str.starts_with(prefix)
The check:
- Detects zero-equivalent expressions:
* Direct zero literals (0)
* Variables initialized to zero
* Self-canceling expressions (x-x, i-i)
- Only converts when length matches exactly for string literals
- Warns about standalone substr calls that might be candidates
- Supports both string literals and string variables
- Handles both == and != operators
---
.../clang-tidy/modernize/CMakeLists.txt | 1 +
.../modernize/ModernizeTidyModule.cpp | 3 +
.../modernize/SubstrToStartsWithCheck.cpp | 121 ++++++++++++++++++
.../modernize/SubstrToStartsWithCheck.h | 30 +++++
.../modernize/substr-to-starts-with.rst | 38 ++++++
.../modernize/substr-to-starts-with.cpp | 41 ++++++
6 files changed, 234 insertions(+)
create mode 100644 clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.cpp
create mode 100644 clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.h
create mode 100644 clang-tools-extra/docs/clang-tidy/checks/modernize/substr-to-starts-with.rst
create mode 100644 clang-tools-extra/test/clang-tidy/checkers/modernize/substr-to-starts-with.cpp
diff --git a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
index c919d49b42873a..ed1ba2ab62a90f 100644
--- a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
@@ -49,6 +49,7 @@ add_clang_library(clangTidyModernizeModule STATIC
UseTransparentFunctorsCheck.cpp
UseUncaughtExceptionsCheck.cpp
UseUsingCheck.cpp
+ SubstrToStartsWithCheck.cpp
LINK_LIBS
clangTidy
diff --git a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
index 18607593320635..7569211d2552ea 100644
--- a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
@@ -50,6 +50,7 @@
#include "UseTransparentFunctorsCheck.h"
#include "UseUncaughtExceptionsCheck.h"
#include "UseUsingCheck.h"
+#include "SubstrToStartsWithCheck.h"
using namespace clang::ast_matchers;
@@ -122,6 +123,8 @@ class ModernizeModule : public ClangTidyModule {
CheckFactories.registerCheck<UseUncaughtExceptionsCheck>(
"modernize-use-uncaught-exceptions");
CheckFactories.registerCheck<UseUsingCheck>("modernize-use-using");
+ CheckFactories.registerCheck<SubstrToStartsWithCheck>(
+ "modernize-substr-to-starts-with");
}
};
diff --git a/clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.cpp b/clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.cpp
new file mode 100644
index 00000000000000..b185a2a3b0afc2
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.cpp
@@ -0,0 +1,121 @@
+//===--- SubstrToStartsWithCheck.cpp - clang-tidy ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SubstrToStartsWithCheck.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang::tidy::modernize {
+
+void SubstrToStartsWithCheck::registerMatchers(MatchFinder *Finder) {
+ // Helper matcher to identify expressions that evaluate to 0
+ auto isZeroExpr = expr(anyOf(
+ integerLiteral(equals(0)),
+ ignoringParenImpCasts(declRefExpr(
+ to(varDecl(hasInitializer(integerLiteral(equals(0))))))),
+ binaryOperator(hasOperatorName("-"), hasLHS(expr()), hasRHS(expr()))));
+
+ // Match string literals or string variables
+ auto isStringLike = expr(anyOf(
+ stringLiteral().bind("literal"),
+ implicitCastExpr(hasSourceExpression(stringLiteral().bind("literal"))),
+ declRefExpr(to(varDecl(hasType(qualType(hasDeclaration(
+ namedDecl(hasAnyName("::std::string", "::std::basic_string")))))))).bind("strvar")));
+
+ // Match substr with zero first argument
+ auto isSubstrCall =
+ cxxMemberCallExpr(
+ callee(memberExpr(hasDeclaration(cxxMethodDecl(
+ hasName("substr"),
+ ofClass(hasAnyName("basic_string", "string", "u16string")))))),
+ hasArgument(0, isZeroExpr),
+ hasArgument(1, expr().bind("length")))
+ .bind("substr");
+
+ // Match comparison operations
+ Finder->addMatcher(
+ binaryOperator(
+ anyOf(hasOperatorName("=="), hasOperatorName("!=")),
+ hasEitherOperand(isSubstrCall),
+ hasEitherOperand(isStringLike),
+ unless(hasType(isAnyCharacter())))
+ .bind("comparison"),
+ this);
+
+ // Also match direct substr calls that could be starts_with
+ Finder->addMatcher(
+ cxxMemberCallExpr(
+ callee(memberExpr(hasDeclaration(cxxMethodDecl(
+ hasName("substr"),
+ ofClass(hasAnyName("basic_string", "string", "u16string")))))),
+ hasArgument(0, isZeroExpr),
+ hasArgument(1, expr().bind("direct_length")))
+ .bind("direct_substr"),
+ this);
+}
+
+void SubstrToStartsWithCheck::check(const MatchFinder::MatchResult &Result) {
+ const auto *Comparison = Result.Nodes.getNodeAs<BinaryOperator>("comparison");
+ const auto *DirectSubstr =
+ Result.Nodes.getNodeAs<CXXMemberCallExpr>("direct_substr");
+
+ // Handle comparison cases
+ if (Comparison) {
+ const auto *SubstrCall = Result.Nodes.getNodeAs<CXXMemberCallExpr>("substr");
+ const auto *LengthArg = Result.Nodes.getNodeAs<Expr>("length");
+ const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("literal");
+ const auto *StrVar = Result.Nodes.getNodeAs<DeclRefExpr>("strvar");
+
+ if (!SubstrCall || !LengthArg || (!Literal && !StrVar))
+ return;
+
+ std::string CompareStr;
+ if (Literal) {
+ CompareStr = Literal->getString().str();
+ } else if (StrVar) {
+ CompareStr = Lexer::getSourceText(
+ CharSourceRange::getTokenRange(StrVar->getSourceRange()),
+ *Result.SourceManager, Result.Context->getLangOpts())
+ .str();
+ }
+
+ // Check if the length matches the string literal length
+ if (Literal) {
+ if (const auto *LengthLiteral = dyn_cast<IntegerLiteral>(LengthArg)) {
+ if (LengthLiteral->getValue() != Literal->getLength())
+ return;
+ }
+ }
+
+ // Build replacement
+ std::string Replacement;
+ if (Comparison->getOpcode() == BO_EQ) {
+ Replacement = "starts_with(" + CompareStr + ")";
+ } else { // BO_NE
+ Replacement = "!starts_with(" + CompareStr + ")";
+ }
+
+ diag(Comparison->getBeginLoc(),
+ "use starts_with() instead of substring comparison")
+ << FixItHint::CreateReplacement(Comparison->getSourceRange(),
+ Replacement);
+ }
+
+ // Handle direct substr calls
+ if (DirectSubstr) {
+ diag(DirectSubstr->getBeginLoc(),
+ "consider using starts_with() if this substr is used for prefix "
+ "checking");
+ }
+}
+
+} // namespace clang::tidy::modernize
\ No newline at end of file
diff --git a/clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.h b/clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.h
new file mode 100644
index 00000000000000..fdb157ca0a24fb
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.h
@@ -0,0 +1,30 @@
+//===--- SubstrToStartsWithCheck.h - clang-tidy ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_SUBSTRTOSTARTSWITHCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_SUBSTRTOSTARTSWITHCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang::tidy::modernize {
+
+/// Finds calls to substr(0, n) that can be replaced with starts_with().
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/modernize/substr-to-starts-with.html
+class SubstrToStartsWithCheck : public ClangTidyCheck {
+public:
+ SubstrToStartsWithCheck(StringRef Name, ClangTidyContext *Context)
+ : ClangTidyCheck(Name, Context) {}
+ void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+ void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+};
+
+} // namespace clang::tidy::modernize
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_SUBSTRTOSTARTSWITHCHECK_H
diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/substr-to-starts-with.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/substr-to-starts-with.rst
new file mode 100644
index 00000000000000..88a9c311eb6737
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/substr-to-starts-with.rst
@@ -0,0 +1,38 @@
+modernize-substr-to-starts-with
+==============================
+
+Finds calls to ``substr(0, n)`` that can be replaced with ``starts_with()`` (introduced in C++20).
+This makes the code's intent clearer and can be more efficient as it avoids creating temporary strings.
+
+For example:
+
+.. code-block:: c++
+
+ str.substr(0, 3) == "foo" // before
+ str.starts_with("foo") // after
+
+ "bar" == str.substr(0, 3) // before
+ str.starts_with("bar") // after
+
+ str.substr(0, n) == prefix // before
+ str.starts_with(prefix) // after
+
+The check handles various ways of expressing zero as the start index:
+
+.. code-block:: c++
+
+ const int zero = 0;
+ str.substr(zero, n) == prefix // converted
+ str.substr(x - x, n) == prefix // converted
+
+The check will only convert cases where:
+* The substr call starts at index 0 (or equivalent)
+* When comparing with string literals, the length matches exactly
+* The comparison is with == or !=
+
+The check will also warn about standalone substr calls that might be candidates
+for conversion to starts_with:
+
+.. code-block:: c++
+
+ auto prefix = str.substr(0, n); // warns about possible use of starts_with
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/substr-to-starts-with.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/substr-to-starts-with.cpp
new file mode 100644
index 00000000000000..41b59903e605d7
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/substr-to-starts-with.cpp
@@ -0,0 +1,41 @@
+// RUN: %check_clang_tidy %s modernize-substr-to-starts-with %t
+
+#include <string>
+
+void test() {
+ std::string str = "hello world";
+ if (str.substr(0, 5) == "hello") {}
+ // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use starts_with() instead of substring comparison
+ // CHECK-FIXES: if (str.starts_with("hello")) {}
+
+ if ("hello" == str.substr(0, 5)) {}
+ // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use starts_with() instead of substring comparison
+ // CHECK-FIXES: if (str.starts_with("hello")) {}
+
+ bool b = str.substr(0, 5) != "hello";
+ // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: use starts_with() instead of substring comparison
+ // CHECK-FIXES: bool b = !str.starts_with("hello");
+
+ // Variable length and string refs
+ std::string prefix = "hello";
+ size_t len = 5;
+ if (str.substr(0, len) == prefix) {}
+ // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use starts_with() instead of substring comparison
+ // CHECK-FIXES: if (str.starts_with(prefix)) {}
+
+ // Various zero expressions
+ const int zero = 0;
+ int i = 0;
+ if (str.substr(zero, 5) == "hello") {}
+ // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use starts_with() instead of substring comparison
+ // CHECK-FIXES: if (str.starts_with("hello")) {}
+
+ if (str.substr(i-i, 5) == "hello") {}
+ // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use starts_with() instead of substring comparison
+ // CHECK-FIXES: if (str.starts_with("hello")) {}
+
+ // Should not convert these
+ if (str.substr(1, 5) == "hello") {} // Non-zero start
+ if (str.substr(0, 4) == "hello") {} // Length mismatch
+ if (str.substr(0, 6) == "hello") {} // Length mismatch
+}
More information about the cfe-commits
mailing list