[clang-tools-extra] [clang-tidy] add check to suggest replacement of nested std::min or std::max with initializer lists (PR #85572)

Julian Schmidt via cfe-commits cfe-commits at lists.llvm.org
Sat Mar 30 10:14:25 PDT 2024


================
@@ -0,0 +1,342 @@
+//===--- MinMaxUseInitializerListCheck.cpp - clang-tidy -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MinMaxUseInitializerListCheck.h"
+#include "../utils/ASTUtils.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/Lexer.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang::tidy::modernize {
+
+struct FindArgsResult {
+  const Expr *First;
+  const Expr *Last;
+  const Expr *Compare;
+  std::vector<const Expr *> Args;
+};
+
+static const FindArgsResult findArgs(const CallExpr *Call);
+static std::vector<std::pair<int, int>>
+getCommentRanges(const std::string &source);
+static bool
+isPositionInComment(int position,
+                    const std::vector<std::pair<int, int>> &commentRanges);
+static void
+removeCharacterFromSource(std::string &FunctionCallSource,
+                          const std::vector<std::pair<int, int>> &CommentRanges,
+                          char Character, const CallExpr *InnerCall,
+                          std::vector<FixItHint> &Result, bool ReverseSearch);
+static SourceLocation
+getLocForEndOfToken(const Expr *expr, const MatchFinder::MatchResult &Match);
+static const std::vector<FixItHint>
+generateReplacement(const MatchFinder::MatchResult &Match,
+                    const CallExpr *TopCall, const FindArgsResult &Result);
+
+MinMaxUseInitializerListCheck::MinMaxUseInitializerListCheck(
+    StringRef Name, ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context),
+      Inserter(Options.getLocalOrGlobal("IncludeStyle",
+                                        utils::IncludeSorter::IS_LLVM),
+               areDiagsSelfContained()) {}
+
+void MinMaxUseInitializerListCheck::storeOptions(
+    ClangTidyOptions::OptionMap &Opts) {
+  Options.store(Opts, "IncludeStyle", Inserter.getStyle());
+}
+
+void MinMaxUseInitializerListCheck::registerMatchers(MatchFinder *Finder) {
+  auto CreateMatcher = [](const std::string &FunctionName) {
+    auto FuncDecl = functionDecl(hasName(FunctionName));
+    auto Expression = callExpr(callee(FuncDecl));
+
+    return callExpr(callee(FuncDecl),
+                    anyOf(hasArgument(0, Expression),
+                          hasArgument(1, Expression),
+                          hasArgument(0, cxxStdInitializerListExpr())),
+                    unless(hasParent(Expression)))
+        .bind("topCall");
+  };
+
+  Finder->addMatcher(CreateMatcher("::std::max"), this);
+  Finder->addMatcher(CreateMatcher("::std::min"), this);
+}
+
+void MinMaxUseInitializerListCheck::registerPPCallbacks(
+    const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
+  Inserter.registerPreprocessor(PP);
+}
+
+void MinMaxUseInitializerListCheck::check(
+    const MatchFinder::MatchResult &Match) {
+
+  const auto *TopCall = Match.Nodes.getNodeAs<CallExpr>("topCall");
+
+  // if topcall in macro ignore
+  if (TopCall->getBeginLoc().isMacroID()) {
+    return;
+  }
+
+  FindArgsResult Result = findArgs(TopCall);
+  const std::vector<FixItHint> Replacement =
+      generateReplacement(Match, TopCall, Result);
+
+  // if only changes are inserting '{' and '}' then ignore
+  if (Replacement.size() <= 2) {
+    return;
+  }
+
+  const DiagnosticBuilder Diagnostic =
+      diag(TopCall->getBeginLoc(),
+           "do not use nested 'std::%0' calls, use initializer lists instead")
+      << TopCall->getDirectCallee()->getName()
+      << Inserter.createIncludeInsertion(
+             Match.SourceManager->getFileID(TopCall->getBeginLoc()),
+             "<algorithm>");
+
+  for (const auto &FixIt : Replacement) {
+    Diagnostic << FixIt;
+  }
+}
+
+static const FindArgsResult findArgs(const CallExpr *Call) {
+  FindArgsResult Result;
+  Result.First = nullptr;
+  Result.Last = nullptr;
+  Result.Compare = nullptr;
+
+  if (Call->getNumArgs() == 3) {
+    auto ArgIterator = Call->arguments().begin();
+    std::advance(ArgIterator, 2);
+    Result.Compare = *ArgIterator;
+  } else {
+    auto ArgIterator = Call->arguments().begin();
+
+    if (const auto *InitListExpr =
+            dyn_cast<CXXStdInitializerListExpr>(*ArgIterator)) {
+      if (const auto *TempExpr =
+              dyn_cast<MaterializeTemporaryExpr>(InitListExpr->getSubExpr())) {
+        if (const auto *InitList =
+                dyn_cast<clang::InitListExpr>(TempExpr->getSubExpr())) {
+          for (const Expr *Init : InitList->inits()) {
+            Result.Args.push_back(Init);
+          }
+          Result.First = *ArgIterator;
+          Result.Last = *ArgIterator;
+
+          std::advance(ArgIterator, 1);
+          if (ArgIterator != Call->arguments().end()) {
+            Result.Compare = *ArgIterator;
+          }
+          return Result;
+        }
+      }
+    }
+  }
+
+  for (const Expr *Arg : Call->arguments()) {
+    if (!Result.First)
+      Result.First = Arg;
+
+    if (Arg == Result.Compare)
+      continue;
+
+    Result.Args.push_back(Arg);
+    Result.Last = Arg;
+  }
+
+  return Result;
+}
+
+static std::vector<std::pair<int, int>>
+getCommentRanges(const std::string &source) {
+  std::vector<std::pair<int, int>> commentRanges;
+  bool inComment = false;
+  bool singleLineComment = false;
+  int start = -1;
+  for (unsigned long i = 0; i < source.size(); i++) {
+    switch (source[i]) {
+    case '/':
+      if (source[i + 1] == '/')
+        singleLineComment = true;
+      else if (source[i + 1] == '*')
+        inComment = true;
+      break;
+    case '*':
+      if (source[i + 1] == '/')
+        inComment = false;
+      break;
+    case '\n':
+      singleLineComment = false;
+      break;
+    }
+    if (inComment || singleLineComment) {
+      if (start == -1)
+        start = i;
+    } else if (start != -1) {
+      commentRanges.push_back({start, i});
+      start = -1;
+    }
+  }
+  return commentRanges;
+}
+
+static bool
+isPositionInComment(int position,
+                    const std::vector<std::pair<int, int>> &commentRanges) {
+  return std::any_of(commentRanges.begin(), commentRanges.end(),
+                     [position](const auto &range) {
+                       return position >= range.first &&
+                              position <= range.second;
+                     });
+}
+
+static void
+removeCharacterFromSource(std::string &FunctionCallSource,
+                          const std::vector<std::pair<int, int>> &CommentRanges,
+                          char Character, const CallExpr *InnerCall,
+                          std::vector<FixItHint> &Result, bool ReverseSearch) {
+  size_t CurrentSearch = ReverseSearch ? FunctionCallSource.size() : 0;
+  while ((CurrentSearch =
+              ReverseSearch
+                  ? FunctionCallSource.rfind(Character, CurrentSearch - 1)
+                  : FunctionCallSource.find(Character, CurrentSearch + 1)) !=
+         std::string::npos) {
+    if (!isPositionInComment(CurrentSearch, CommentRanges)) {
+      Result.push_back(FixItHint::CreateRemoval(CharSourceRange::getCharRange(
+          InnerCall->getSourceRange().getBegin().getLocWithOffset(
+              CurrentSearch),
+          InnerCall->getSourceRange().getBegin().getLocWithOffset(
+              CurrentSearch + 1))));
+      break;
+    }
+  }
+}
+
+SourceLocation getLocForEndOfToken(const Expr *expr,
+                                   const MatchFinder::MatchResult &Match) {
+  return Lexer::getLocForEndOfToken(expr->getEndLoc(), 0, *Match.SourceManager,
+                                    Match.Context->getLangOpts());
+}
+
+static const std::vector<FixItHint>
+generateReplacement(const MatchFinder::MatchResult &Match,
+                    const CallExpr *TopCall, const FindArgsResult &Result) {
+  std::vector<FixItHint> FixItHints;
+
+  const QualType ResultType = TopCall->getDirectCallee()
+                                  ->getReturnType()
+                                  .getNonReferenceType()
+                                  .getUnqualifiedType()
+                                  .getCanonicalType();
+  const bool IsInitializerList = Result.First == Result.Last;
+
+  if (!IsInitializerList)
+    FixItHints.push_back(
+        FixItHint::CreateInsertion(Result.First->getBeginLoc(), "{"));
+
+  for (const Expr *Arg : Result.Args) {
+    std::string ArgText =
+        Lexer::getSourceText(
+            CharSourceRange::getTokenRange(Arg->getSourceRange()),
+            *Match.SourceManager, Match.Context->getLangOpts())
+            .str();
+
+    if (const auto InnerCall = dyn_cast<CallExpr>(Arg->IgnoreParenImpCasts())) {
+      const auto InnerResult = findArgs(InnerCall);
+      const auto InnerReplacement =
+          generateReplacement(Match, InnerCall, InnerResult);
+      if (InnerCall->getDirectCallee()->getQualifiedNameAsString() ==
+              TopCall->getDirectCallee()->getQualifiedNameAsString() &&
+          ((!Result.Compare && !InnerResult.Compare) ||
+           utils::areStatementsIdentical(Result.Compare, InnerResult.Compare,
+                                         *Match.Context))) {
+        std::vector<std::pair<int, int>> CommentRanges =
+            getCommentRanges(ArgText);
+
+        FixItHints.push_back(
+            FixItHint::CreateRemoval(CharSourceRange::getTokenRange(
+                InnerCall->getCallee()->getSourceRange())));
+
+        removeCharacterFromSource(ArgText, CommentRanges, '(', InnerCall,
+                                  FixItHints, false);
+
+        if (InnerResult.First == InnerResult.Last) {
+          removeCharacterFromSource(ArgText, CommentRanges, '{', InnerCall,
+                                    FixItHints, false);
+          removeCharacterFromSource(ArgText, CommentRanges, '}', InnerCall,
+                                    FixItHints, true);
+          FixItHints.insert(FixItHints.end(), InnerReplacement.begin(),
+                            InnerReplacement.end());
+        } else {
+          FixItHints.insert(FixItHints.end(), InnerReplacement.begin() + 1,
+                            InnerReplacement.end() - 1);
+        }
+
+        if (InnerResult.Compare) {
+          bool CommentFound = false;
+
+          int InnerCallStartOffset = InnerCall->getBeginLoc().getRawEncoding();
+          int CompareStart =
+              getLocForEndOfToken(InnerResult.Last, Match).getRawEncoding() -
+              InnerCallStartOffset;
+          int LastEnd =
+              getLocForEndOfToken(InnerResult.Compare, Match).getRawEncoding() -
+              InnerCallStartOffset;
+
+          for (const auto &Range : CommentRanges) {
+
+            if (Range.first >= CompareStart && Range.second <= LastEnd)
+              CommentFound = true;
+          }
+
+          if (CommentFound) {
+            removeCharacterFromSource(ArgText, CommentRanges, ',', InnerCall,
+                                      FixItHints, true);
+
+            FixItHints.push_back(
+                FixItHint::CreateRemoval(CharSourceRange::getCharRange(
+                    InnerResult.Compare->getBeginLoc(),
+                    getLocForEndOfToken(InnerResult.Compare, Match))));
+          } else {
+            FixItHints.push_back(
+                FixItHint::CreateRemoval(CharSourceRange::getCharRange(
+                    getLocForEndOfToken(InnerResult.Last, Match),
+                    getLocForEndOfToken(InnerResult.Compare, Match))));
+          }
+        }
+
+        removeCharacterFromSource(ArgText, CommentRanges, ')', InnerCall,
+                                  FixItHints, true);
+
+        continue;
----------------
5chmidti wrote:

It's generally not a good idea to use strings like this to calculate removal locations. Instead, you should use the AST nodes as much as possible, and when that is not enough fall back on the Lexer to do this for you, because as you have mentioned, it is hard to figure out what is inside a comment and what is not. Although, I should point out that the Lexer is considered slow and should only be used when required (that was mentioned somewhere by someone).

E.g., to remove from the `CallExpr`: the called function and both parentheses, write:
```c++
FixItHints.push_back(FixItHint::CreateRemoval(InnerCall->getCallee()->getSourceRange()));

const auto LParen = utils::lexer::findNextTokenSkippingComments(
            InnerCall->getCallee()->getEndLoc(), *Match.SourceManager,
            Match.Context->getLangOpts());
if (LParen && LParen->getKind() == tok::l_paren)
    FixItHints.push_back(FixItHint::CreateRemoval(SourceRange(LParen->getLocation())));

FixItHints.push_back(FixItHint::CreateRemoval(SourceRange(InnerCall->getRParenLoc())));
```
I.e., use the AST nodes as much as possible. Sadly, [`CallExpr`](https://clang.llvm.org/doxygen/classclang_1_1CallExpr.html) only provides a location for the right parentheses, not the left one, so this is where the lexer is needed. Clang-Tidy already provides useful Lexer utilities [here](https://github.com/llvm/llvm-project/blob/main/clang-tools-extra/clang-tidy/utils/LexerUtils.h) (I suggest checking out the `utils` folder for the future, it's useful knowing what is already available there).

You can also change how you remove the braces from an existing initializer list argument by `dyn_cast`ing `InnerResult.First` to a `CXXStdInitializerListExpr` and removing its begin and end locations (`FixItHints.push_back(FixItHint::CreateRemoval(SourceRange(InitList->getBeginLoc())));`)

The same goes for the removal of `Compare`: remove the `SourceRange` of `Compare` and remove the previous `tok::comma` with `findPreviousTokenKind`, similar to the `LParen` above.

Afterward, you can move `ArgText` down into the scope inside `if (ArgType != ResultType)` because it is not needed beforehand. My attempt to not require getting the `ArgText` and to use two `CreateInsertion`s failed, possible because of shifted locations.

https://github.com/llvm/llvm-project/pull/85572


More information about the cfe-commits mailing list