[clang-tools-extra] [clang-tidy] Add new performance-expensive-flat-container-operation check (PR #112051)

via cfe-commits cfe-commits at lists.llvm.org
Fri Oct 11 14:46:35 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-tidy

Author: Nicolas van Kempen (nicovank)

<details>
<summary>Changes</summary>



Summary:

Test Plan:


---

Patch is 35.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112051.diff


10 Files Affected:

- (modified) clang-tools-extra/clang-tidy/performance/CMakeLists.txt (+1) 
- (added) clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.cpp (+104) 
- (added) clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.h (+37) 
- (modified) clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp (+3) 
- (modified) clang-tools-extra/docs/ReleaseNotes.rst (+5) 
- (modified) clang-tools-extra/docs/clang-tidy/checks/list.rst (+1) 
- (added) clang-tools-extra/docs/clang-tidy/checks/performance/expensive-flat-container-operation.rst (+82) 
- (removed) clang-tools-extra/test/.clang-format (-1) 
- (added) clang-tools-extra/test/clang-tidy/checkers/performance/expensive-flat-container-operation-warn-outside-loops.cpp (+478) 
- (added) clang-tools-extra/test/clang-tidy/checkers/performance/expensive-flat-container-operation.cpp (+91) 


``````````diff
diff --git a/clang-tools-extra/clang-tidy/performance/CMakeLists.txt b/clang-tools-extra/clang-tidy/performance/CMakeLists.txt
index c6e547c5089fb0..2def785be0465b 100644
--- a/clang-tools-extra/clang-tidy/performance/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/performance/CMakeLists.txt
@@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
 add_clang_library(clangTidyPerformanceModule STATIC
   AvoidEndlCheck.cpp
   EnumSizeCheck.cpp
+  ExpensiveFlatContainerOperationCheck.cpp
   FasterStringFindCheck.cpp
   ForRangeCopyCheck.cpp
   ImplicitConversionInLoopCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.cpp b/clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.cpp
new file mode 100644
index 00000000000000..c06cbfd5d4271c
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.cpp
@@ -0,0 +1,104 @@
+//===--- ExpensiveFlatContainerOperationCheck.cpp - clang-tidy ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ExpensiveFlatContainerOperationCheck.h"
+
+#include "../utils/OptionsUtils.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang::tidy::performance {
+
+namespace {
+// TODO: folly::heap_vector_map?
+const auto DefaultFlatContainers =
+    "::std::flat_map; ::std::flat_multimap;"
+    "::std::flat_set; ::std::flat_multiset;"
+    "::boost::container::flat_map; ::boost::container::flat_multimap;"
+    "::boost::container::flat_set; ::boost::container::flat_multiset;"
+    "::folly::sorted_vector_map; ::folly::sorted_vector_set;";
+} // namespace
+
+ExpensiveFlatContainerOperationCheck::ExpensiveFlatContainerOperationCheck(
+    StringRef Name, ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context),
+      WarnOutsideLoops(Options.get("WarnOutsideLoops", false)),
+      FlatContainers(utils::options::parseStringList(
+          Options.get("FlatContainers", DefaultFlatContainers))) {}
+
+void ExpensiveFlatContainerOperationCheck::registerMatchers(
+    MatchFinder *Finder) {
+  const auto OnSoughtFlatContainer =
+      callee(cxxMethodDecl(ofClass(cxxRecordDecl(hasAnyName(FlatContainers)))));
+
+  // Any emplace-style or insert_or_assign call is a single-element operation.
+  const auto HasEmplaceOrInsertorAssignCall = callee(cxxMethodDecl(hasAnyName(
+      "emplace", "emplace_hint", "try_emplace", "insert_or_assign")));
+
+  // Erase calls with a single argument are single-element operations.
+  const auto HasEraseCallWithOneArgument = cxxMemberCallExpr(
+      argumentCountIs(1), callee(cxxMethodDecl(hasName("erase"))));
+
+  // TODO: insert.
+
+  const auto SoughtFlatContainerOperation =
+      cxxMemberCallExpr(
+          OnSoughtFlatContainer,
+          anyOf(HasEmplaceOrInsertorAssignCall, HasEraseCallWithOneArgument))
+          .bind("call");
+
+  if (WarnOutsideLoops) {
+    Finder->addMatcher(SoughtFlatContainerOperation, this);
+    return;
+  }
+
+  Finder->addMatcher(
+      mapAnyOf(whileStmt, forStmt, cxxForRangeStmt, doStmt)
+          .with(stmt(
+              stmt().bind("loop"),
+              forEachDescendant(cxxMemberCallExpr(
+                  SoughtFlatContainerOperation,
+                  // Common false positive: variable is declared directly within
+                  // the loop. Note that this won't catch cases where the
+                  // container is a member of a class declared in the loop.
+                  // More robust lifetime analysis would be required to catch
+                  // those cases, but this should filter out the most common
+                  // false positives.
+                  unless(onImplicitObjectArgument(declRefExpr(hasDeclaration(
+                      decl(hasAncestor(stmt(equalsBoundNode("loop")))))))))))),
+      this);
+}
+
+void ExpensiveFlatContainerOperationCheck::check(
+    const MatchFinder::MatchResult &Result) {
+  const auto *Call = Result.Nodes.getNodeAs<CXXMemberCallExpr>("call");
+
+  diag(Call->getExprLoc(),
+       "Single element operations are expensive for flat containers. "
+       "Consider using available bulk operations instead, aggregating values "
+       "beforehand if needed.");
+}
+
+void ExpensiveFlatContainerOperationCheck::storeOptions(
+    ClangTidyOptions::OptionMap &Opts) {
+  Options.store(Opts, "WarnOutsideLoops", WarnOutsideLoops);
+  Options.store(Opts, "FlatContainers",
+                utils::options::serializeStringList(FlatContainers));
+}
+
+bool ExpensiveFlatContainerOperationCheck::isLanguageVersionSupported(
+    const LangOptions &LangOpts) const {
+  return LangOpts.CPlusPlus;
+}
+
+std::optional<TraversalKind>
+ExpensiveFlatContainerOperationCheck::getCheckTraversalKind() const {
+  return TK_IgnoreUnlessSpelledInSource;
+}
+} // namespace clang::tidy::performance
diff --git a/clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.h b/clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.h
new file mode 100644
index 00000000000000..b64ea87b573ffd
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.h
@@ -0,0 +1,37 @@
+//===--- ExpensiveFlatContainerOperationCheck.h - clang-tidy ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_EXPENSIVEFLATCONTAINEROPERATIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_EXPENSIVEFLATCONTAINEROPERATIONCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang::tidy::performance {
+
+/// Warns when calling an O(N) operation on a flat container.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/performance/expensive-flat-container-operation.html
+class ExpensiveFlatContainerOperationCheck : public ClangTidyCheck {
+public:
+  ExpensiveFlatContainerOperationCheck(StringRef Name,
+                                       ClangTidyContext *Context);
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+  void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
+  bool isLanguageVersionSupported(const LangOptions &LangOpts) const override;
+  std::optional<TraversalKind> getCheckTraversalKind() const override;
+
+private:
+  bool WarnOutsideLoops;
+  std::vector<StringRef> FlatContainers;
+};
+
+} // namespace clang::tidy::performance
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_EXPENSIVEFLATCONTAINEROPERATIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp b/clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp
index 9e0fa6f88b36a0..f6b0ed02389e36 100644
--- a/clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp
@@ -11,6 +11,7 @@
 #include "../ClangTidyModuleRegistry.h"
 #include "AvoidEndlCheck.h"
 #include "EnumSizeCheck.h"
+#include "ExpensiveFlatContainerOperationCheck.h"
 #include "FasterStringFindCheck.h"
 #include "ForRangeCopyCheck.h"
 #include "ImplicitConversionInLoopCheck.h"
@@ -37,6 +38,8 @@ class PerformanceModule : public ClangTidyModule {
   void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override {
     CheckFactories.registerCheck<AvoidEndlCheck>("performance-avoid-endl");
     CheckFactories.registerCheck<EnumSizeCheck>("performance-enum-size");
+    CheckFactories.registerCheck<ExpensiveFlatContainerOperationCheck>(
+        "performance-expensive-flat-container-operation");
     CheckFactories.registerCheck<FasterStringFindCheck>(
         "performance-faster-string-find");
     CheckFactories.registerCheck<ForRangeCopyCheck>(
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 3f7bcde1eb3014..db396fd61ea636 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -121,6 +121,11 @@ New checks
   Gives warnings for tagged unions, where the number of tags is
   different from the number of data members inside the union.
 
+- New :doc:`performance-expensive-flat-container-operation
+  <clang-tidy/checks/performance/expensive-flat-container-operation>` check.
+
+  Warns when calling an O(N) operation on a flat container.
+
 - New :doc:`portability-template-virtual-member-function
   <clang-tidy/checks/portability/template-virtual-member-function>` check.
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 0082234f5ed31b..eaaf23bd5c535b 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -328,6 +328,7 @@ Clang-Tidy Checks
    :doc:`openmp-use-default-none <openmp/use-default-none>`,
    :doc:`performance-avoid-endl <performance/avoid-endl>`, "Yes"
    :doc:`performance-enum-size <performance/enum-size>`,
+   :doc:`performance-expensive-flat-container-operation <performance/expensive-flat-container-operation>`,
    :doc:`performance-faster-string-find <performance/faster-string-find>`, "Yes"
    :doc:`performance-for-range-copy <performance/for-range-copy>`, "Yes"
    :doc:`performance-implicit-conversion-in-loop <performance/implicit-conversion-in-loop>`,
diff --git a/clang-tools-extra/docs/clang-tidy/checks/performance/expensive-flat-container-operation.rst b/clang-tools-extra/docs/clang-tidy/checks/performance/expensive-flat-container-operation.rst
new file mode 100644
index 00000000000000..259c89fdcd7678
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/performance/expensive-flat-container-operation.rst
@@ -0,0 +1,82 @@
+.. title:: clang-tidy - performance-expensive-flat-container-operation
+
+performance-expensive-flat-container-operation
+==============================================
+
+Warns when calling an O(N) operation on a flat container.
+
+This check operates on vector-based flat containers such as
+``std::flat_(map|set)``, ``boost::container::flat_(map|set)``, or
+``folly::sorted_vector_(map|set)``. While these containers' behavior is
+identical to usual maps/sets, the insert and erase operations are O(N). This
+check flags such operations, which are a common bad pattern, notably in loops.
+
+Below is an example of a typical bad pattern: inserting some values one by one
+into a flat container. This is O(N^2), as the container will need to shift
+elements right after each insertion.
+
+.. code-block:: c++
+
+    std::random_device generator;
+    std::uniform_int_distribution<int> distribution;
+
+    std::flat_set<int> set;
+    for (auto i = 0; i < N; ++i) {
+        set.insert(distribution(generator));
+    }
+
+The code above can be improved using a temporary vector, later inserting all
+values at once into the ``flat_set``.
+
+.. code-block:: c++
+
+    std::vector<int> temporary;
+    for (auto i = 0; i < N; ++i) {
+        temporary.push_back(distribution(generator));
+    }
+    std::flat_set<int> set(temporary.begin(), temporary.end());
+
+    // Or even better when possible, moving the temporary container:
+    // std::flat_set<int> set(std::move(temporary));
+
+For expensive-to-copy objects, ``std::move_iterator`` should be used.
+When possible, the temporary container can be moved directly into the flat
+container. When it is known that the inserted keys are sorted and uniqued, such
+as cases when they come from another flat container, ``std::sorted_unique`` can
+be used when inserting to save more cycles. Finally, if order is not important,
+hash-based containers can provide better performance.
+
+Limitations
+-----------
+
+This check is not capable of flagging insertions into a map via ``operator[]``,
+as it is not possible at compile-time to know whether it will trigger an
+insertion or a simple lookup. These cases have to be detected using dynamic
+profiling.
+
+This check is also of course not able to detect single element operations in
+loops crossing function boundaries. A more robust static analysis would be
+necessary to detect these cases.
+
+Options
+-------
+
+.. option:: WarnOutsideLoops
+
+    When disabled, the check will only warn when the single element operation is
+    directly enclosed by a loop, hence directly actionable. At the very least,
+    these cases can be improved using some temporary container.
+
+    When enabled, all insert and erase operations will be flagged.
+
+    Default is `false`.
+
+.. option:: FlatContainers
+
+    A semicolon-separated list of flat containers, with ``insert``, ``emplace``
+    and/or ``erase`` operations.
+
+    Default includes ``std::flat_(map|set)``, ``flat_multi(map|set)``,
+    ``boost::container::flat_(map|set)``,
+    ``boost::container::flat_multi(map|set)``, and
+    ``folly::sorted_vector_(map|set)``.
diff --git a/clang-tools-extra/test/.clang-format b/clang-tools-extra/test/.clang-format
deleted file mode 100644
index e3845288a2aece..00000000000000
--- a/clang-tools-extra/test/.clang-format
+++ /dev/null
@@ -1 +0,0 @@
-DisableFormat: true
diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance/expensive-flat-container-operation-warn-outside-loops.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance/expensive-flat-container-operation-warn-outside-loops.cpp
new file mode 100644
index 00000000000000..0a258a0e48da1f
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/performance/expensive-flat-container-operation-warn-outside-loops.cpp
@@ -0,0 +1,478 @@
+// RUN: %check_clang_tidy %s performance-expensive-flat-container-operation %t -- \
+// RUN:   -config="{CheckOptions: \
+// RUN:             [{key: performance-expensive-flat-container-operation.WarnOutsideLoops, \
+// RUN:               value: true}] \
+// RUN:             }"
+
+#include <stddef.h>
+
+namespace std {
+template <class T1, class T2> struct pair { pair(T1, T2); };
+
+template <class T> struct initializer_list {};
+
+template <class T> struct remove_reference { typedef T type; };
+template <class T> struct remove_reference<T &> { typedef T type; };
+template <class T> struct remove_reference<T &&> { typedef T type; };
+
+template <class T>
+typename std::remove_reference<T>::type &&move(T &&) noexcept;
+
+struct sorted_unique_t {};
+inline constexpr sorted_unique_t sorted_unique{};
+struct sorted_equivalent_t {};
+inline constexpr sorted_equivalent_t sorted_equivalent{};
+
+template <class Key, class T> struct flat_map {
+  using key_type = Key;
+  using mapped_type = T;
+  using value_type = pair<key_type, mapped_type>;
+  using reference = pair<const key_type &, mapped_type &>;
+  using const_reference = pair<const key_type &, const mapped_type &>;
+  using size_type = size_t;
+  using iterator = struct {};
+  using const_iterator = struct {};
+
+  const_iterator begin() const noexcept;
+  const_iterator end() const noexcept;
+
+  template <class... Args> pair<iterator, bool> emplace(Args &&...args);
+  template <class... Args>
+  iterator emplace_hint(const_iterator position, Args &&...args);
+
+  pair<iterator, bool> insert(const value_type &x);
+  pair<iterator, bool> insert(value_type &&x);
+  iterator insert(const_iterator position, const value_type &x);
+  iterator insert(const_iterator position, value_type &&x);
+
+  // template <class P> pair<iterator, bool> insert(P &&x);
+  // template <class P> iterator insert(const_iterator position, P &&);
+  template <class InputIter> void insert(InputIter first, InputIter last);
+  template <class InputIter>
+  void insert(sorted_unique_t, InputIter first, InputIter last);
+  template <class R> void insert_range(R &&rg);
+
+  void insert(initializer_list<value_type> il);
+  void insert(sorted_unique_t s, initializer_list<value_type> il);
+
+  template <class... Args>
+  pair<iterator, bool> try_emplace(const key_type &k, Args &&...args);
+  template <class... Args>
+  pair<iterator, bool> try_emplace(key_type &&k, Args &&...args);
+  template <class K, class... Args>
+  pair<iterator, bool> try_emplace(K &&k, Args &&...args);
+  template <class... Args>
+  iterator try_emplace(const_iterator hint, const key_type &k, Args &&...args);
+  template <class... Args>
+  iterator try_emplace(const_iterator hint, key_type &&k, Args &&...args);
+  template <class K, class... Args>
+  iterator try_emplace(const_iterator hint, K &&k, Args &&...args);
+  template <class M>
+  pair<iterator, bool> insert_or_assign(const key_type &k, M &&obj);
+  template <class M>
+  pair<iterator, bool> insert_or_assign(key_type &&k, M &&obj);
+  template <class K, class M>
+  pair<iterator, bool> insert_or_assign(K &&k, M &&obj);
+  template <class M>
+  iterator insert_or_assign(const_iterator hint, const key_type &k, M &&obj);
+  template <class M>
+  iterator insert_or_assign(const_iterator hint, key_type &&k, M &&obj);
+  template <class K, class M>
+  iterator insert_or_assign(const_iterator hint, K &&k, M &&obj);
+
+  iterator erase(iterator position);
+  iterator erase(const_iterator position);
+  size_type erase(const key_type &x);
+  template <class K> size_type erase(K &&x);
+  iterator erase(const_iterator first, const_iterator last);
+};
+
+template <class Key, class T> struct flat_multimap {
+  using key_type = Key;
+  using mapped_type = T;
+  using value_type = pair<key_type, mapped_type>;
+  using reference = pair<const key_type &, mapped_type &>;
+  using const_reference = pair<const key_type &, const mapped_type &>;
+  using size_type = size_t;
+  using iterator = struct {};
+  using const_iterator = struct {};
+
+  const_iterator begin() const noexcept;
+  const_iterator end() const noexcept;
+
+  template <class... Args> iterator emplace(Args &&...args);
+  template <class... Args>
+  iterator emplace_hint(const_iterator position, Args &&...args);
+
+  iterator insert(const value_type &x);
+  iterator insert(value_type &&x);
+  iterator insert(const_iterator position, const value_type &x);
+  iterator insert(const_iterator position, value_type &&x);
+
+  // template <class P> iterator insert(P &&x);
+  // template <class P> iterator insert(const_iterator position, P &&);
+  template <class InputIter> void insert(InputIter first, InputIter last);
+  template <class InputIter>
+  void insert(sorted_equivalent_t, InputIter first, InputIter last);
+  template <class R> void insert_range(R &&rg);
+
+  void insert(initializer_list<value_type> il);
+  void insert(sorted_equivalent_t s, initializer_list<value_type> il);
+
+  iterator erase(iterator position);
+  iterator erase(const_iterator position);
+  size_type erase(const key_type &x);
+  template <class K> size_type erase(K &&x);
+  iterator erase(const_iterator first, const_iterator last);
+
+  void swap(flat_multimap &) noexcept;
+  void clear() noexcept;
+};
+
+template <class Key> struct flat_set {
+  using key_type = Key;
+  using value_type = Key;
+  using reference = value_type &;
+  using const_reference = const value_type &;
+  using size_type = size_t;
+  using iterator = struct {};
+  using const_iterator = struct {};
+
+  const_iterator begin() const noexcept;
+  const_iterator end() const noexcept;
+
+  template <class... Args> pair<iterator, bool> emplace(Args &&...args);
+  template <class... Args>
+  iterator emplace_hint(const_iterator position, Args &&...args);
+
+  pair<iterator, bool> insert(const value_type &x);
+  pair<iterator, bool> insert(value_type &&x);
+  template <class K> pair<iterator, bool> insert(K &&x);
+  iterator insert(const_iterator position, const value_type &x);
+  iterator insert(const_iterator position, value_type &&x);
+  template <class K> iterator insert(const_iterator hint, K &&x);
+
+  template <class InputIter> void insert(InputIter first, InputIter last);
+  template <class InputIter>
+  void insert(sorted_unique_t, InputIter first, InputIter last);
+  template <class R> void ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/112051


More information about the cfe-commits mailing list