[clang-tools-extra] [clang-tidy] Add readability-string-view-substr check (PR #120055)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Dec 17 00:26:58 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-tools-extra
Author: Helmut Januschka (hjanuschka)
<details>
<summary>Changes</summary>
Add a new check that suggests using `string_view::remove_prefix()` and `remove_suffix()` instead of `substr()` when the intent is to remove characters from either end of a string_view.
edit: have not found an existing check, if there is any, please let me know
---
Full diff: https://github.com/llvm/llvm-project/pull/120055.diff
7 Files Affected:
- (modified) clang-tools-extra/clang-tidy/readability/CMakeLists.txt (+1)
- (modified) clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp (+3)
- (added) clang-tools-extra/clang-tidy/readability/StringViewSubstrCheck.cpp (+201)
- (added) clang-tools-extra/clang-tidy/readability/StringViewSubstrCheck.h (+39)
- (modified) clang-tools-extra/docs/ReleaseNotes.rst (+7)
- (added) clang-tools-extra/docs/clang-tidy/checks/readability/string-view-substr.rst (+18)
- (added) clang-tools-extra/test/clang-tidy/checkers/readability/stringview_substr.cpp (+108)
``````````diff
diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
index 8f303c51e1b0da..8b44fc339441ac 100644
--- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
@@ -53,6 +53,7 @@ add_clang_library(clangTidyReadabilityModule STATIC
StaticAccessedThroughInstanceCheck.cpp
StaticDefinitionInAnonymousNamespaceCheck.cpp
StringCompareCheck.cpp
+ StringViewSubstrCheck.cpp
SuspiciousCallArgumentCheck.cpp
UniqueptrDeleteReleaseCheck.cpp
UppercaseLiteralSuffixCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
index d61c0ba39658e5..f36ec8f95ede60 100644
--- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
@@ -56,6 +56,7 @@
#include "StaticAccessedThroughInstanceCheck.h"
#include "StaticDefinitionInAnonymousNamespaceCheck.h"
#include "StringCompareCheck.h"
+#include "StringViewSubstrCheck.h"
#include "SuspiciousCallArgumentCheck.h"
#include "UniqueptrDeleteReleaseCheck.h"
#include "UppercaseLiteralSuffixCheck.h"
@@ -146,6 +147,8 @@ class ReadabilityModule : public ClangTidyModule {
"readability-static-definition-in-anonymous-namespace");
CheckFactories.registerCheck<StringCompareCheck>(
"readability-string-compare");
+ CheckFactories.registerCheck<StringViewSubstrCheck>(
+ "readability-stringview-substr");
CheckFactories.registerCheck<readability::NamedParameterCheck>(
"readability-named-parameter");
CheckFactories.registerCheck<NonConstParameterCheck>(
diff --git a/clang-tools-extra/clang-tidy/readability/StringViewSubstrCheck.cpp b/clang-tools-extra/clang-tidy/readability/StringViewSubstrCheck.cpp
new file mode 100644
index 00000000000000..5c079e3203c989
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/readability/StringViewSubstrCheck.cpp
@@ -0,0 +1,201 @@
+//===--- StringViewSubstrCheck.cpp - clang-tidy------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "StringViewSubstrCheck.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Lex/Lexer.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang::tidy::readability {
+
+void StringViewSubstrCheck::registerMatchers(MatchFinder *Finder) {
+ const auto HasStringViewType = hasType(hasUnqualifiedDesugaredType(recordType(
+ hasDeclaration(recordDecl(hasName("::std::basic_string_view"))))));
+
+ // Match assignment to string_view's substr
+ Finder->addMatcher(
+ cxxOperatorCallExpr(
+ hasOverloadedOperatorName("="),
+ hasArgument(0, expr(HasStringViewType).bind("target")),
+ hasArgument(
+ 1, cxxMemberCallExpr(callee(memberExpr(hasDeclaration(
+ cxxMethodDecl(hasName("substr"))))),
+ on(expr(HasStringViewType).bind("source")))
+ .bind("substr_call")))
+ .bind("assignment"),
+ this);
+}
+
+void StringViewSubstrCheck::check(const MatchFinder::MatchResult &Result) {
+ const auto *Assignment =
+ Result.Nodes.getNodeAs<CXXOperatorCallExpr>("assignment");
+ const auto *Target = Result.Nodes.getNodeAs<Expr>("target");
+ const auto *Source = Result.Nodes.getNodeAs<Expr>("source");
+ const auto *SubstrCall =
+ Result.Nodes.getNodeAs<CXXMemberCallExpr>("substr_call");
+
+ if (!Assignment || !Target || !Source || !SubstrCall) {
+ return;
+ }
+
+ // Get the DeclRefExpr for the target and source
+ const auto *TargetDRE = dyn_cast<DeclRefExpr>(Target->IgnoreParenImpCasts());
+ const auto *SourceDRE = dyn_cast<DeclRefExpr>(Source->IgnoreParenImpCasts());
+
+ if (!TargetDRE || !SourceDRE) {
+ return;
+ }
+
+ const Expr *StartArg = SubstrCall->getArg(0);
+ const Expr *LengthArg =
+ SubstrCall->getNumArgs() > 1 ? SubstrCall->getArg(1) : nullptr;
+
+ std::string StartText =
+ Lexer::getSourceText(
+ CharSourceRange::getTokenRange(StartArg->getSourceRange()),
+ *Result.SourceManager, Result.Context->getLangOpts())
+ .str();
+
+ const bool IsSameVar = (TargetDRE->getDecl() == SourceDRE->getDecl());
+
+ // Case 1: Check for remove_prefix pattern
+ if (!LengthArg || isa<CXXDefaultArgExpr>(LengthArg)) {
+ if (IsSameVar) {
+ std::string Replacement = TargetDRE->getNameInfo().getAsString() +
+ ".remove_prefix(" + StartText + ")";
+ diag(Assignment->getBeginLoc(), "prefer 'remove_prefix' over 'substr' "
+ "for removing characters from the start")
+ << FixItHint::CreateReplacement(Assignment->getSourceRange(),
+ Replacement);
+ }
+ return;
+ }
+
+ // Check if StartArg resolves to 0
+ bool IsZero = false;
+
+ // Handle cases where StartArg is an integer literal
+ if (const auto *IL =
+ dyn_cast<IntegerLiteral>(StartArg->IgnoreParenImpCasts())) {
+ IsZero = IL->getValue() == 0;
+ }
+
+ // Optional: Handle cases where StartArg evaluates to zero
+ if (!IsZero) {
+ // Add logic for other constant evaluation (e.g., constexpr evaluation)
+ const auto &EvalResult = StartArg->EvaluateKnownConstInt(*Result.Context);
+ IsZero = !EvalResult.isNegative() && EvalResult == 0;
+ }
+
+ // If StartArg resolves to 0, handle the case
+ if (IsZero) {
+ bool isFullCopy = false;
+
+ // Check for length() or length() - expr pattern
+ if (const auto *BinOp = dyn_cast<BinaryOperator>(LengthArg)) {
+ if (BinOp->getOpcode() == BO_Sub) {
+ const Expr *LHS = BinOp->getLHS();
+ const Expr *RHS = BinOp->getRHS();
+
+ // Check for length() call
+ if (const auto *LengthCall = dyn_cast<CXXMemberCallExpr>(LHS)) {
+ if (const auto *LengthMethod =
+ dyn_cast<CXXMethodDecl>(LengthCall->getDirectCallee())) {
+ if (LengthMethod->getName() == "length") {
+ const Expr *LengthObject =
+ LengthCall->getImplicitObjectArgument();
+ const auto *LengthDRE =
+ dyn_cast<DeclRefExpr>(LengthObject->IgnoreParenImpCasts());
+
+ if (!LengthDRE || LengthDRE->getDecl() != SourceDRE->getDecl()) {
+ return;
+ }
+
+ // Check if RHS is 0 or evaluates to 0
+ bool IsZero = false;
+ if (const auto *IL =
+ dyn_cast<IntegerLiteral>(RHS->IgnoreParenImpCasts())) {
+ IsZero = IL->getValue() == 0;
+ }
+
+ if (IsZero) {
+ isFullCopy = true;
+ } else if (IsSameVar) {
+ // remove_suffix case (only for self-assignment)
+ std::string RHSText =
+ Lexer::getSourceText(
+ CharSourceRange::getTokenRange(RHS->getSourceRange()),
+ *Result.SourceManager, Result.Context->getLangOpts())
+ .str();
+
+ std::string Replacement =
+ TargetDRE->getNameInfo().getAsString() + ".remove_suffix(" +
+ RHSText + ")";
+ diag(Assignment->getBeginLoc(),
+ "prefer 'remove_suffix' over 'substr' for removing "
+ "characters from the end")
+ << FixItHint::CreateReplacement(
+ Assignment->getSourceRange(), Replacement);
+ return;
+ }
+ }
+ }
+ }
+ }
+ } else if (const auto *LengthCall =
+ dyn_cast<CXXMemberCallExpr>(LengthArg)) {
+ // Handle direct length() call
+ if (const auto *LengthMethod =
+ dyn_cast<CXXMethodDecl>(LengthCall->getDirectCallee())) {
+ if (LengthMethod->getName() == "length") {
+ const Expr *LengthObject = LengthCall->getImplicitObjectArgument();
+ const auto *LengthDRE =
+ dyn_cast<DeclRefExpr>(LengthObject->IgnoreParenImpCasts());
+
+ if (LengthDRE && LengthDRE->getDecl() == SourceDRE->getDecl()) {
+ isFullCopy = true;
+ }
+ }
+ }
+ }
+ if (isFullCopy) {
+ if (IsSameVar) {
+ // Remove redundant self-copy, including the semicolon
+ SourceLocation EndLoc = Assignment->getEndLoc();
+ while (EndLoc.isValid()) {
+ const char *endPtr = Result.SourceManager->getCharacterData(EndLoc);
+ if (*endPtr == ';')
+ break;
+ EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager,
+ Result.Context->getLangOpts());
+ }
+ if (EndLoc.isValid()) {
+ diag(Assignment->getBeginLoc(), "redundant self-copy")
+ << FixItHint::CreateRemoval(CharSourceRange::getCharRange(
+ Assignment->getBeginLoc(),
+ EndLoc.getLocWithOffset(
+ 1))); // +1 to include the semicolon.
+ }
+ } else {
+ // Simplify copy between different variables
+ std::string Replacement = TargetDRE->getNameInfo().getAsString() +
+ " = " +
+ SourceDRE->getNameInfo().getAsString();
+ diag(Assignment->getBeginLoc(), "prefer direct copy over substr")
+ << FixItHint::CreateReplacement(Assignment->getSourceRange(),
+ Replacement);
+ }
+
+ return;
+ }
+ }
+}
+
+} // namespace clang::tidy::readability
diff --git a/clang-tools-extra/clang-tidy/readability/StringViewSubstrCheck.h b/clang-tools-extra/clang-tidy/readability/StringViewSubstrCheck.h
new file mode 100644
index 00000000000000..1a2054da1ed9cc
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/readability/StringViewSubstrCheck.h
@@ -0,0 +1,39 @@
+//===--- StringViewSubstrCheck.h - clang-tidy---------------------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STRINGVIEWSUBSTRCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STRINGVIEWSUBSTRCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang::tidy::readability {
+
+/// Finds string_view substr() calls that can be replaced with remove_prefix()
+/// or remove_suffix().
+///
+/// For the user-facing documentation see:
+/// https://clang.llvm.org/extra/clang-tidy/checks/readability/string-view-substr.html
+///
+/// The check matches two patterns:
+/// sv = sv.substr(N) -> sv.remove_prefix(N)
+/// sv = sv.substr(0, sv.length() - N) -> sv.remove_suffix(N)
+///
+/// These replacements make the intent clearer and are more efficient as they
+/// modify the string_view in place rather than creating a new one.
+class StringViewSubstrCheck : public ClangTidyCheck {
+public:
+ StringViewSubstrCheck(StringRef Name, ClangTidyContext *Context)
+ : ClangTidyCheck(Name, Context) {}
+ void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+ void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+};
+
+} // namespace clang::tidy::readability
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STRINGVIEWSUBSTRCHECK_H
\ No newline at end of file
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 6803842106791b..6e0352f8c619cd 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -136,6 +136,13 @@ New checks
Gives warnings for tagged unions, where the number of tags is
different from the number of data members inside the union.
+- New :doc:`readability-string-view-substr
+ <clang-tidy/checks/readability/string-view-substr>` check.
+
+ Finds ``std::string_view::substr()`` calls that can be replaced with clearer
+ alternatives using ``remove_prefix()`` or ``remove_suffix()``. This makes the
+ intent clearer and is more efficient as it modifies the string_view in place.
+
- New :doc:`modernize-use-integer-sign-comparison
<clang-tidy/checks/modernize/use-integer-sign-comparison>` check.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/string-view-substr.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/string-view-substr.rst
new file mode 100644
index 00000000000000..c4e25a1d621c8f
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/readability/string-view-substr.rst
@@ -0,0 +1,18 @@
+.. title:: clang-tidy - readability-string-view-substr
+
+readability-string-view-substr
+==============================
+
+Finds ``string_view substr()`` calls that can be replaced with clearer alternatives
+using ``remove_prefix()`` or ``remove_suffix()``.
+
+The check suggests the following transformations:
+
+=========================================== =======================
+Expression Replacement
+=========================================== =======================
+``sv = sv.substr(n)`` ``sv.remove_prefix(n)``
+``sv = sv.substr(0, sv.length()-n)`` ``sv.remove_suffix(n)``
+``sv = sv.substr(0, sv.length())`` Redundant self-copy
+``sv1 = sv.substr(0, sv.length())`` ``sv1 = sv``
+=========================================== =======================
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/stringview_substr.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/stringview_substr.cpp
new file mode 100644
index 00000000000000..908c664e329b2f
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/stringview_substr.cpp
@@ -0,0 +1,108 @@
+// RUN: %check_clang_tidy %s readability-stringview-substr %t
+
+namespace std {
+template <typename T>
+class basic_string_view {
+public:
+ using size_type = unsigned long;
+ static constexpr size_type npos = -1;
+
+ basic_string_view(const char*) {}
+ basic_string_view substr(size_type pos, size_type count = npos) const { return *this; }
+ void remove_prefix(size_type n) {}
+ void remove_suffix(size_type n) {}
+ size_type length() const { return 0; }
+ basic_string_view& operator=(const basic_string_view&) { return *this; }
+};
+
+using string_view = basic_string_view<char>;
+} // namespace std
+
+void test_basic() {
+ std::string_view sv("test");
+ std::string_view sv1("test");
+ std::string_view sv2("test");
+
+ // Should match: remove_prefix
+ sv = sv.substr(5);
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_prefix' over 'substr' for removing characters from the start [readability-stringview-substr]
+ // CHECK-FIXES: sv.remove_prefix(5)
+
+ // Should match: remove_suffix
+ sv = sv.substr(0, sv.length() - 3);
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr]
+ // CHECK-FIXES: sv.remove_suffix(3)
+
+ // Should match: remove_suffix with complex expression
+ sv = sv.substr(0, sv.length() - (3 + 2));
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr]
+ // CHECK-FIXES: sv.remove_suffix((3 + 2))
+}
+
+void test_copies() {
+ std::string_view sv("test");
+ std::string_view sv1("test");
+ std::string_view sv2("test");
+
+ // Should match: remove redundant self copies
+ sv = sv.substr(0, sv.length());
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: redundant self-copy [readability-stringview-substr]
+
+ sv = sv.substr(0, sv.length() - 0);
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: redundant self-copy [readability-stringview-substr]
+
+ // Should match: simplify copies between different variables
+ sv1 = sv.substr(0, sv.length());
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer direct copy over substr [readability-stringview-substr]
+ // CHECK-FIXES: sv1 = sv
+
+ sv2 = sv.substr(0, sv.length() - 0);
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer direct copy over substr [readability-stringview-substr]
+ // CHECK-FIXES: sv2 = sv
+}
+
+void test_zero_forms() {
+ std::string_view sv("test");
+ const int kZero = 0;
+ constexpr std::string_view::size_type Zero = 0;
+ #define START_POS 0
+
+ // Should match: various forms of zero in first argument
+ sv = sv.substr(kZero, sv.length() - 3);
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr]
+ // CHECK-FIXES: sv.remove_suffix(3)
+
+ sv = sv.substr(Zero, sv.length() - 3);
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr]
+ // CHECK-FIXES: sv.remove_suffix(3)
+
+ sv = sv.substr(START_POS, sv.length() - 3);
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr]
+ // CHECK-FIXES: sv.remove_suffix(3)
+
+ sv = sv.substr((0), sv.length() - 3);
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr]
+ // CHECK-FIXES: sv.remove_suffix(3)
+
+ sv = sv.substr(0u, sv.length() - 3);
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr]
+ // CHECK-FIXES: sv.remove_suffix(3)
+
+ sv = sv.substr(0UL, sv.length() - 3);
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr]
+ // CHECK-FIXES: sv.remove_suffix(3)
+}
+
+void test_should_not_match() {
+ std::string_view sv("test");
+ std::string_view sv1("test");
+ std::string_view sv2("test");
+
+ // No match: substr used for prefix or mid-view
+ sv = sv.substr(1, sv.length() - 3); // No warning
+
+ // No match: Different string_views
+ sv = sv2.substr(0, sv2.length() - 3); // No warning
+
+
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/120055
More information about the cfe-commits
mailing list