[clang-tools-extra] Add feature extractor tool (PR #149135)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Jul 18 11:31:59 PDT 2025
https://github.com/majidkamali1370 updated https://github.com/llvm/llvm-project/pull/149135
>From aab024f20b301aca4ab5299c88af2e32c72a3277 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Fri, 11 Jul 2025 23:16:27 +0100
Subject: [PATCH 01/16] Follow llvm and clang tutorial to create a sample tool
---
clang-tools-extra/CMakeLists.txt | 1 +
.../feature-extractor/CMakeLists.txt | 15 +++++
clang-tools-extra/feature-extractor/main.cpp | 55 +++++++++++++++++++
3 files changed, 71 insertions(+)
create mode 100644 clang-tools-extra/feature-extractor/CMakeLists.txt
create mode 100644 clang-tools-extra/feature-extractor/main.cpp
diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt
index 6b6f2b1ca2276..11879a03eba74 100644
--- a/clang-tools-extra/CMakeLists.txt
+++ b/clang-tools-extra/CMakeLists.txt
@@ -28,6 +28,7 @@ add_subdirectory(clang-query)
add_subdirectory(include-cleaner)
add_subdirectory(pp-trace)
add_subdirectory(tool-template)
+add_subdirectory(feature-extractor)
option(CLANG_TOOLS_EXTRA_INCLUDE_DOCS "Generate build targets for the Clang Extra Tools docs."
${LLVM_INCLUDE_DOCS})
diff --git a/clang-tools-extra/feature-extractor/CMakeLists.txt b/clang-tools-extra/feature-extractor/CMakeLists.txt
new file mode 100644
index 0000000000000..6ae7b78a7ecad
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(LLVM_LINK_COMPONENTS support)
+
+add_clang_executable(feature-extractor
+ main.cpp
+)
+
+target_link_libraries(feature-extractor
+ PRIVATE
+ clangAST
+ clangASTMatchers
+ clangBasic
+ clangFrontend
+ clangSerialization
+ clangTooling
+)
diff --git a/clang-tools-extra/feature-extractor/main.cpp b/clang-tools-extra/feature-extractor/main.cpp
new file mode 100644
index 0000000000000..6fec15a75e203
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/main.cpp
@@ -0,0 +1,55 @@
+// Declares clang::SyntaxOnlyAction.
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Tooling.h"
+// Declares llvm::cl::extrahelp.
+#include "llvm/Support/CommandLine.h"
+
+using namespace clang::tooling;
+using namespace clang::ast_matchers;
+using namespace llvm;
+
+// Apply a custom category to all command-line options so that they are the
+// only ones displayed.
+static llvm::cl::OptionCategory MyToolCategory("feature-extractor options");
+
+// CommonOptionsParser declares HelpMessage with a description of the common
+// command-line options related to the compilation database and input files.
+// It's nice to have this help message in all tools.
+static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
+
+// A help message for this specific tool can be added afterwards.
+static cl::extrahelp MoreHelp("\nMore help text...\n");
+
+StatementMatcher LoopMatcher =
+ forStmt(hasLoopInit(declStmt(hasSingleDecl(
+ varDecl(hasInitializer(integerLiteral(equals(0))))))))
+ .bind("forLoop");
+
+class LoopPrinter : public MatchFinder::MatchCallback {
+public:
+ virtual void run(const MatchFinder::MatchResult &Result) override {
+ if (const auto *FS = Result.Nodes.getNodeAs<clang::ForStmt>("forLoop"))
+ FS->dump();
+ }
+};
+
+int main(int argc, const char **argv) {
+ auto ExpectedParser = CommonOptionsParser::create(argc, argv, MyToolCategory);
+ if (!ExpectedParser) {
+ // Fail gracefully for unsupported options.
+ llvm::errs() << ExpectedParser.takeError();
+ return 1;
+ }
+ CommonOptionsParser &OptionsParser = ExpectedParser.get();
+ ClangTool Tool(OptionsParser.getCompilations(),
+ OptionsParser.getSourcePathList());
+
+ LoopPrinter Printer;
+ MatchFinder Finder;
+ Finder.addMatcher(LoopMatcher, &Printer);
+
+ return Tool.run(newFrontendActionFactory(&Finder).get());
+}
>From 318b398e3da0e8b931bea849b188a30c6d411791 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Sat, 12 Jul 2025 15:40:31 +0100
Subject: [PATCH 02/16] Add FeatureManager class for easy integration of
extracted features
---
.../feature-extractor/FeatureManager.h | 38 +++++++++++++++++++
1 file changed, 38 insertions(+)
create mode 100644 clang-tools-extra/feature-extractor/FeatureManager.h
diff --git a/clang-tools-extra/feature-extractor/FeatureManager.h b/clang-tools-extra/feature-extractor/FeatureManager.h
new file mode 100644
index 0000000000000..5de1986630b7d
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/FeatureManager.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <tuple>
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+
+using namespace clang::tooling;
+using namespace clang::ast_matchers;
+using namespace llvm;
+
+template <typename... Features> class FeatureManager {
+ std::tuple<Features...> features;
+ MatchFinder match_finder;
+
+public:
+ FeatureManager() {
+ (
+ [&]() {
+ for (const auto &matcher : Features::Matchers)
+ match_finder.addMatcher(matcher, &std::get<Features>(features));
+ }(),
+ ...);
+ }
+
+ MatchFinder *get_match_finder() { return &match_finder; }
+
+ ~FeatureManager() {
+ llvm::outs() << "\n";
+
+ (
+ [&]() {
+ llvm::outs() << Features::get_title() << " : "
+ << std::get<Features>(features).get_result() << "\n";
+ }(),
+ ...);
+ }
+};
>From 8978a24b1b9a2fe0c2daa393b65b51dcce7b5528 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Sun, 13 Jul 2025 23:43:35 +0100
Subject: [PATCH 03/16] Add NaryTree to hold items in tree-like structure
---
.../feature-extractor/NaryTree.h | 102 ++++++++++++++++++
1 file changed, 102 insertions(+)
create mode 100644 clang-tools-extra/feature-extractor/NaryTree.h
diff --git a/clang-tools-extra/feature-extractor/NaryTree.h b/clang-tools-extra/feature-extractor/NaryTree.h
new file mode 100644
index 0000000000000..57109bfd2e8f7
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/NaryTree.h
@@ -0,0 +1,102 @@
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <vector>
+
+template <typename T> class NaryTree {
+ struct Node {
+ Node(const T &t) : value(t) {}
+
+ std::vector<std::unique_ptr<Node>> children;
+ T value;
+ };
+
+ using Element = std::unique_ptr<Node>;
+
+ Element root{nullptr};
+
+ template <typename Func>
+ void traverse_post_order_impl(Element *parent, Element ¤t, int depth,
+ bool is_leaf, Func &&f) {
+ for (auto &child : current->children)
+ traverse_post_order_impl(¤t, child, depth + 1,
+ !child->children.size(), f);
+
+ f(parent, current, depth, !current->children.size());
+ }
+
+ template <typename Func>
+ void traverse_pre_order_impl(Element *parent, Element ¤t, int depth,
+ bool is_leaf, Func &&f) {
+ f(parent, current, depth, !current->children.size());
+
+ for (auto &child : current->children)
+ traverse_pre_order_impl(¤t, child, depth + 1,
+ !child->children.size(), f);
+ }
+
+ const Element *find_node(const Element ¤t, const T &data) const {
+ if (current) {
+ if (current->value == data)
+ return ¤t;
+ else
+ for (const auto &child : current->children)
+ return find_node(child, data);
+ }
+
+ return nullptr;
+ }
+
+public:
+ struct TraverseResult {
+ std::optional<T> parent;
+ T &self;
+ int depth;
+ bool is_leaf;
+ };
+
+ template <typename Func> void traverse_post_order(Func &&f) {
+ traverse_post_order_impl(
+ nullptr, root, 0,
+ [&f](Element *parent, Element &n, int depth, bool is_leaf) {
+ std::optional<T> opt;
+
+ if (parent)
+ opt = (*parent)->value;
+
+ f(TraverseResult{opt, n->value, depth, is_leaf});
+ });
+ }
+
+ template <typename Func> void traverse_pre_order(Func &&f) {
+ traverse_pre_order_impl(
+ nullptr, root, 0, !root->children.size(),
+ [&f](Element *parent, Element &n, int depth, bool is_leaf) {
+ std::optional<T> opt;
+
+ if (parent)
+ opt = (*parent)->value;
+
+ f(TraverseResult{opt, n->value, depth, is_leaf});
+ });
+ }
+
+ bool contains(const T &data) const {
+ return find_node(root, data) != nullptr;
+ }
+
+ bool add_node(const T &parentData, const T &data) {
+ if (!root) {
+ root = std::make_unique<Node>(data);
+ return true;
+ }
+
+ if (auto node = find_node(root, parentData)) {
+ (*node)->children.push_back(std::make_unique<Node>(data));
+ return true;
+ }
+
+ return false;
+ }
+};
>From e3b3a8c2fb2bc3d58237c97fb3ddae5d288c615e Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Sun, 13 Jul 2025 23:47:13 +0100
Subject: [PATCH 04/16] Add NumLoops feature and use in FeatureManager
---
.../feature-extractor/features/NumLoops.h | 42 +++++++++++++++++++
clang-tools-extra/feature-extractor/main.cpp | 26 ++++--------
2 files changed, 50 insertions(+), 18 deletions(-)
create mode 100644 clang-tools-extra/feature-extractor/features/NumLoops.h
diff --git a/clang-tools-extra/feature-extractor/features/NumLoops.h b/clang-tools-extra/feature-extractor/features/NumLoops.h
new file mode 100644
index 0000000000000..cad96c7775c23
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/features/NumLoops.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <array>
+#include <cstddef>
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+
+#include "../utils.h"
+
+using namespace clang;
+using namespace clang::tooling;
+using namespace clang::ast_matchers;
+using namespace llvm;
+
+class NumLoops : public MatchFinder::MatchCallback {
+private:
+ std::size_t num_loops{0};
+
+public:
+ static inline std::array Matchers = {forStmt().bind("forLoops"),
+ whileStmt().bind("whileLoops")};
+
+ virtual void run(const MatchFinder::MatchResult &result) override {
+ auto context = result.Context;
+
+ const auto fs = result.Nodes.getNodeAs<ForStmt>("forLoops");
+ const auto ws = result.Nodes.getNodeAs<WhileStmt>("whileLoops");
+
+ // We do not want to convert header files!
+ if ((!fs ||
+ !context->getSourceManager().isWrittenInMainFile(fs->getForLoc())) &&
+ (!ws ||
+ !context->getSourceManager().isWrittenInMainFile(ws->getWhileLoc())))
+ return;
+
+ num_loops++;
+ }
+
+ static const char *get_title() { return "num_loops"; }
+ std::size_t get_result() const { return num_loops; }
+};
diff --git a/clang-tools-extra/feature-extractor/main.cpp b/clang-tools-extra/feature-extractor/main.cpp
index 6fec15a75e203..3b75cb6f7d419 100644
--- a/clang-tools-extra/feature-extractor/main.cpp
+++ b/clang-tools-extra/feature-extractor/main.cpp
@@ -7,6 +7,10 @@
// Declares llvm::cl::extrahelp.
#include "llvm/Support/CommandLine.h"
+#include "FeatureManager.h"
+
+#include "features/NumLoops.h"
+
using namespace clang::tooling;
using namespace clang::ast_matchers;
using namespace llvm;
@@ -23,33 +27,19 @@ static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
// A help message for this specific tool can be added afterwards.
static cl::extrahelp MoreHelp("\nMore help text...\n");
-StatementMatcher LoopMatcher =
- forStmt(hasLoopInit(declStmt(hasSingleDecl(
- varDecl(hasInitializer(integerLiteral(equals(0))))))))
- .bind("forLoop");
-
-class LoopPrinter : public MatchFinder::MatchCallback {
-public:
- virtual void run(const MatchFinder::MatchResult &Result) override {
- if (const auto *FS = Result.Nodes.getNodeAs<clang::ForStmt>("forLoop"))
- FS->dump();
- }
-};
-
int main(int argc, const char **argv) {
auto ExpectedParser = CommonOptionsParser::create(argc, argv, MyToolCategory);
+
if (!ExpectedParser) {
// Fail gracefully for unsupported options.
llvm::errs() << ExpectedParser.takeError();
return 1;
}
+
CommonOptionsParser &OptionsParser = ExpectedParser.get();
ClangTool Tool(OptionsParser.getCompilations(),
OptionsParser.getSourcePathList());
+ FeatureManager<NumLoops> manager;
- LoopPrinter Printer;
- MatchFinder Finder;
- Finder.addMatcher(LoopMatcher, &Printer);
-
- return Tool.run(newFrontendActionFactory(&Finder).get());
+ return Tool.run(newFrontendActionFactory(manager.get_match_finder()).get());
}
>From 0093688c74a0efdfc5b29ab9ca46ee5829c13ebe Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Tue, 15 Jul 2025 14:51:08 +0100
Subject: [PATCH 05/16] Add various util functions
---
clang-tools-extra/feature-extractor/utils.h | 80 +++++++++++++++++++++
1 file changed, 80 insertions(+)
create mode 100644 clang-tools-extra/feature-extractor/utils.h
diff --git a/clang-tools-extra/feature-extractor/utils.h b/clang-tools-extra/feature-extractor/utils.h
new file mode 100644
index 0000000000000..e30e702e3202c
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/utils.h
@@ -0,0 +1,80 @@
+#pragma once
+
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+
+#include <optional>
+
+using namespace clang;
+
+///
+/// Check if two ValueDecl pointers refer to the same variable in AST
+///
+static inline bool are_same_variable(const ValueDecl *First,
+ const ValueDecl *Second) {
+ return First && Second &&
+ First->getCanonicalDecl() == Second->getCanonicalDecl();
+}
+
+///
+/// Check if for statement is defined in the translation unit file (not headers)
+///
+static inline bool is_in_main_file(ASTContext *context, const ForStmt *fs) {
+ return fs && context->getSourceManager().isWrittenInMainFile(fs->getForLoc());
+}
+
+///
+/// For a given for statement, tries to extract loop bound in the condition
+///
+static inline std::optional<llvm::APInt>
+get_for_condition_range_value(const ForStmt *fs) {
+ const Expr *cond = fs->getCond();
+
+ if (cond) {
+ if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(cond)) {
+ const Expr *RHS = BO->getRHS()->IgnoreParenImpCasts();
+
+ if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(RHS))
+ return IL->getValue();
+ }
+ }
+
+ return std::nullopt;
+}
+
+///
+/// For a given Stmt \s, tries to return the nearest ancestor of type
+/// StatementType. Return nullptr in case no parent of given type was found.
+///
+template <typename StatementType>
+static inline const StatementType *get_parent_stmt(ASTContext *context,
+ const Stmt *s) {
+ auto parents = context->getParents(*s);
+
+ if (parents.empty())
+ return nullptr;
+
+ for (auto &p : parents)
+ if (const StatementType *parent_stmt = p.get<StatementType>())
+ return parent_stmt;
+ else
+ return get_parent_stmt<StatementType>(context, p.get<Stmt>());
+
+ return nullptr;
+}
+
+///
+/// Run a callable on all parents of type StatementType of \s recursively goes
+/// up.
+///
+template <typename StatementType, typename Func>
+static inline void run_on_all_parents_of_type(ASTContext *context,
+ const Stmt *s, Func &&f) {
+ auto parent = get_parent_stmt<StatementType>(context, s);
+
+ while (parent) {
+ f(context, parent);
+ parent = get_parent_stmt<StatementType>(context,
+ dyn_cast<StatementType>(parent));
+ }
+}
>From eed01f169c10fd5d8cec8d0a47a58cfa82018ff2 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Tue, 15 Jul 2025 14:51:43 +0100
Subject: [PATCH 06/16] Add get_parent API to NaryTree
---
clang-tools-extra/feature-extractor/NaryTree.h | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/clang-tools-extra/feature-extractor/NaryTree.h b/clang-tools-extra/feature-extractor/NaryTree.h
index 57109bfd2e8f7..5acc9421e3bf8 100644
--- a/clang-tools-extra/feature-extractor/NaryTree.h
+++ b/clang-tools-extra/feature-extractor/NaryTree.h
@@ -86,6 +86,19 @@ template <typename T> class NaryTree {
return find_node(root, data) != nullptr;
}
+ std::optional<T> get_parent(const T &data) {
+ std::optional<T> result;
+
+ traverse_pre_order_impl(
+ nullptr, root, 0, !root->children.size(),
+ [&result, &data](Element *parent, Element &n, int depth, bool is_leaf) {
+ if (parent && n->value == data)
+ result = (*parent)->value;
+ });
+
+ return result;
+ }
+
bool add_node(const T &parentData, const T &data) {
if (!root) {
root = std::make_unique<Node>(data);
>From 2675b130fc81ff1c4b671e87d9816617897ab783 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Tue, 15 Jul 2025 14:54:37 +0100
Subject: [PATCH 07/16] Add LoopsData class to contain for loops AST nodes
---
.../feature-extractor/LoopsData.h | 42 +++++++++++++++++++
1 file changed, 42 insertions(+)
create mode 100644 clang-tools-extra/feature-extractor/LoopsData.h
diff --git a/clang-tools-extra/feature-extractor/LoopsData.h b/clang-tools-extra/feature-extractor/LoopsData.h
new file mode 100644
index 0000000000000..58b704732fe0a
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/LoopsData.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <cstdint>
+#include <unordered_map>
+#include <vector>
+
+#include <clang/AST/Stmt.h>
+
+#include "NaryTree.h"
+
+using namespace clang;
+
+class LoopsData {
+public:
+ using TreeType = NaryTree<const Stmt *>;
+
+ void add_for(clang::ASTContext *context, const Stmt *parent,
+ const Stmt *self) {
+ ids[self] = self->getID(*context);
+
+ if (parent == nullptr) {
+ loops.push_back({});
+ loops.back().add_node(nullptr, self);
+ } else {
+ auto ntree = std::find_if(loops.begin(), loops.end(),
+ [&parent](const NaryTree<const Stmt *> &tree) {
+ return tree.contains(parent);
+ });
+
+ if (ntree != loops.end()) {
+ ntree->add_node(parent, self);
+ }
+ }
+ }
+
+ auto &get_ids() { return ids; }
+ auto &get_loops() { return loops; }
+
+private:
+ std::unordered_map<const Stmt *, std::int64_t> ids;
+ std::vector<TreeType> loops;
+};
>From 07af8dd146fa516bb503929f78a820425d9c71c6 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Tue, 15 Jul 2025 19:05:06 +0100
Subject: [PATCH 08/16] Add OmpRegions feature
---
.../feature-extractor/features/OmpRegions.h | 67 +++++++++++++++++++
clang-tools-extra/feature-extractor/main.cpp | 3 +-
2 files changed, 69 insertions(+), 1 deletion(-)
create mode 100644 clang-tools-extra/feature-extractor/features/OmpRegions.h
diff --git a/clang-tools-extra/feature-extractor/features/OmpRegions.h b/clang-tools-extra/feature-extractor/features/OmpRegions.h
new file mode 100644
index 0000000000000..666b49bd8482c
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/features/OmpRegions.h
@@ -0,0 +1,67 @@
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <numeric>
+#include <unordered_map>
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+
+using namespace clang;
+using namespace clang::ast_matchers;
+
+class OmpRegions : public MatchFinder::MatchCallback {
+ std::unordered_map<std::string, unsigned> regions_count;
+
+public:
+ static inline std::array Matchers = {
+ ompExecutableDirective().bind("ompRegion")};
+
+ virtual void run(const MatchFinder::MatchResult &result) override {
+ if (const auto *omp_directive =
+ result.Nodes.getNodeAs<OMPExecutableDirective>("ompRegion")) {
+ std::string omp_type;
+
+ using namespace llvm;
+ if (isa<OMPParallelDirective>(omp_directive))
+ omp_type = "parallel";
+ else if (isa<OMPForDirective>(omp_directive))
+ omp_type = "for";
+ else if (isa<OMPParallelForDirective>(omp_directive))
+ omp_type = "parallel for";
+ else if (isa<OMPSingleDirective>(omp_directive))
+ omp_type = "single";
+ else if (isa<OMPMasterDirective>(omp_directive))
+ omp_type = "master";
+ else if (isa<OMPCriticalDirective>(omp_directive))
+ omp_type = "critical";
+ else if (isa<OMPTaskDirective>(omp_directive))
+ omp_type = "task";
+ else if (isa<OMPSectionDirective>(omp_directive))
+ omp_type = "section";
+ else if (isa<OMPSectionsDirective>(omp_directive))
+ omp_type = "sections";
+ else if (isa<OMPBarrierDirective>(omp_directive))
+ omp_type = "barrier";
+ else
+ omp_type = "other";
+
+ regions_count[omp_type]++;
+ }
+ }
+
+ static const char *get_title() { return "opm_regions"; }
+ std::size_t get_result() {
+#ifndef NDEBUG
+ llvm::outs() << "\n";
+ for (const auto &pair : regions_count)
+ llvm::outs() << "OMP region [" << pair.first << "]: " << pair.second
+ << "\n";
+#endif
+
+ return std::accumulate(
+ regions_count.cbegin(), regions_count.cend(), std::size_t{0},
+ [](std::size_t acc, const auto &p) { return acc + p.second; });
+ }
+};
diff --git a/clang-tools-extra/feature-extractor/main.cpp b/clang-tools-extra/feature-extractor/main.cpp
index 3b75cb6f7d419..4109a46ed3f1e 100644
--- a/clang-tools-extra/feature-extractor/main.cpp
+++ b/clang-tools-extra/feature-extractor/main.cpp
@@ -10,6 +10,7 @@
#include "FeatureManager.h"
#include "features/NumLoops.h"
+#include "features/OmpRegions.h"
using namespace clang::tooling;
using namespace clang::ast_matchers;
@@ -39,7 +40,7 @@ int main(int argc, const char **argv) {
CommonOptionsParser &OptionsParser = ExpectedParser.get();
ClangTool Tool(OptionsParser.getCompilations(),
OptionsParser.getSourcePathList());
- FeatureManager<NumLoops> manager;
+ FeatureManager<NumLoops, OmpRegions> manager;
return Tool.run(newFrontendActionFactory(manager.get_match_finder()).get());
}
>From 91e806aae264598f7b8e6bda8588f8b38b78e783 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Tue, 15 Jul 2025 23:24:36 +0100
Subject: [PATCH 09/16] Bugfix get_parent_stamt in utils
---
clang-tools-extra/feature-extractor/utils.h | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/clang-tools-extra/feature-extractor/utils.h b/clang-tools-extra/feature-extractor/utils.h
index e30e702e3202c..ceece1b3fb96b 100644
--- a/clang-tools-extra/feature-extractor/utils.h
+++ b/clang-tools-extra/feature-extractor/utils.h
@@ -54,11 +54,12 @@ static inline const StatementType *get_parent_stmt(ASTContext *context,
if (parents.empty())
return nullptr;
- for (auto &p : parents)
- if (const StatementType *parent_stmt = p.get<StatementType>())
- return parent_stmt;
- else
- return get_parent_stmt<StatementType>(context, p.get<Stmt>());
+ const auto p = parents[0];
+
+ if (const StatementType *parent_stmt = p.get<StatementType>())
+ return parent_stmt;
+ else if (const auto pStmt = p.get<Stmt>())
+ return get_parent_stmt<StatementType>(context, pStmt);
return nullptr;
}
>From d4894ce223238bb38dcc2cfb756d78afc03afe85 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Tue, 15 Jul 2025 23:58:08 +0100
Subject: [PATCH 10/16] Add function to evaluate more complex for loop
conditions
---
clang-tools-extra/feature-extractor/utils.h | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/clang-tools-extra/feature-extractor/utils.h b/clang-tools-extra/feature-extractor/utils.h
index ceece1b3fb96b..6ef00c065a3e7 100644
--- a/clang-tools-extra/feature-extractor/utils.h
+++ b/clang-tools-extra/feature-extractor/utils.h
@@ -25,6 +25,7 @@ static inline bool is_in_main_file(ASTContext *context, const ForStmt *fs) {
///
/// For a given for statement, tries to extract loop bound in the condition
+/// This function extracts literal conditions from for loop condition
///
static inline std::optional<llvm::APInt>
get_for_condition_range_value(const ForStmt *fs) {
@@ -42,6 +43,26 @@ get_for_condition_range_value(const ForStmt *fs) {
return std::nullopt;
}
+///
+/// For a given for statement, tries to extract loop bound in the condition
+/// This function evaluates macro conditions from for loop condition
+///
+static inline std::optional<llvm::APSInt>
+get_for_condition_range_value(ASTContext *context, const ForStmt *fs) {
+
+ if (const Expr *cond = fs->getCond(); cond) {
+ if (const BinaryOperator *binOp = dyn_cast<BinaryOperator>(cond)) {
+ const Expr *rhs = binOp->getRHS();
+ clang::Expr::EvalResult eval;
+ if (rhs->EvaluateAsInt(eval, *context)) {
+ return eval.Val.getInt();
+ }
+ }
+ }
+
+ return std::nullopt;
+}
+
///
/// For a given Stmt \s, tries to return the nearest ancestor of type
/// StatementType. Return nullptr in case no parent of given type was found.
>From 0e3d022638e5fd87b1ebeb73e92b70ac66ef01c1 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Wed, 16 Jul 2025 01:00:54 +0100
Subject: [PATCH 11/16] Improve LoopsData to accept more data other than Stmt
pointer
---
.../feature-extractor/LoopsData.h | 23 +++++++++++++------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/clang-tools-extra/feature-extractor/LoopsData.h b/clang-tools-extra/feature-extractor/LoopsData.h
index 58b704732fe0a..aae45298e2e89 100644
--- a/clang-tools-extra/feature-extractor/LoopsData.h
+++ b/clang-tools-extra/feature-extractor/LoopsData.h
@@ -12,20 +12,29 @@ using namespace clang;
class LoopsData {
public:
- using TreeType = NaryTree<const Stmt *>;
+ struct MetaData {
+ const Stmt *for_stmt;
+
+ MetaData(const Stmt *fs) : for_stmt(fs) {}
+
+ friend bool operator==(const MetaData &lhs, const MetaData &rhs) {
+ return lhs.for_stmt == rhs.for_stmt;
+ }
+ };
+
+ using TreeType = NaryTree<MetaData>;
void add_for(clang::ASTContext *context, const Stmt *parent,
- const Stmt *self) {
- ids[self] = self->getID(*context);
+ const MetaData &self) {
+ ids[self.for_stmt] = self.for_stmt->getID(*context);
if (parent == nullptr) {
loops.push_back({});
loops.back().add_node(nullptr, self);
} else {
- auto ntree = std::find_if(loops.begin(), loops.end(),
- [&parent](const NaryTree<const Stmt *> &tree) {
- return tree.contains(parent);
- });
+ auto ntree = std::find_if(
+ loops.begin(), loops.end(),
+ [&parent](const TreeType &tree) { return tree.contains(parent); });
if (ntree != loops.end()) {
ntree->add_node(parent, self);
>From 1c47aa72ee4159ec484c02b2b043a4fa7da56a29 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Wed, 16 Jul 2025 17:15:41 +0100
Subject: [PATCH 12/16] Add function to get total repetition count of for loop
considering its parents
---
clang-tools-extra/feature-extractor/utils.h | 36 +++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/clang-tools-extra/feature-extractor/utils.h b/clang-tools-extra/feature-extractor/utils.h
index 6ef00c065a3e7..63386167be2f9 100644
--- a/clang-tools-extra/feature-extractor/utils.h
+++ b/clang-tools-extra/feature-extractor/utils.h
@@ -63,6 +63,21 @@ get_for_condition_range_value(ASTContext *context, const ForStmt *fs) {
return std::nullopt;
}
+///
+/// For a given for statement, tries to extract loop bound in the condition. Use
+/// this function instead of two previous ones. This one internally uses the
+/// others
+///
+static inline std::optional<llvm::APInt>
+maybe_get_for_bound(ASTContext *context, const ForStmt *fs) {
+ if (const auto method1 = get_for_condition_range_value(fs))
+ return llvm::APInt(64, method1.value().getSExtValue());
+ else if (const auto method2 = get_for_condition_range_value(context, fs))
+ return method2.value();
+ else
+ return std::nullopt;
+}
+
///
/// For a given Stmt \s, tries to return the nearest ancestor of type
/// StatementType. Return nullptr in case no parent of given type was found.
@@ -100,3 +115,24 @@ static inline void run_on_all_parents_of_type(ASTContext *context,
dyn_cast<StatementType>(parent));
}
}
+
+///
+/// Get repetition of each for loop, considering parent for loops. For example,
+/// for the following two nested for loops, result for the first for is 10, and
+/// the result of nested one is 200
+///
+/// for(int i = 0; i < 10; i++)
+/// for(int j = 0; j < 20; j++)
+/// {}
+///
+llvm::APInt get_total_for_repetition_count(ASTContext *context,
+ const ForStmt *fs) {
+ auto bounds = maybe_get_for_bound(context, fs).value_or(llvm::APInt(64, 1));
+
+ run_on_all_parents_of_type<ForStmt>(
+ context, fs, [&bounds](auto ctx, auto fss) {
+ bounds *= maybe_get_for_bound(ctx, fss).value_or(llvm::APInt(64, 1));
+ });
+
+ return bounds;
+}
>From b5c75d4dedd367ce00ec5ed8aa05cbedeb09c62a Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Wed, 16 Jul 2025 17:22:29 +0100
Subject: [PATCH 13/16] Move utils functions to namespace
---
clang-tools-extra/feature-extractor/utils.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/clang-tools-extra/feature-extractor/utils.h b/clang-tools-extra/feature-extractor/utils.h
index 63386167be2f9..8237fb159fd68 100644
--- a/clang-tools-extra/feature-extractor/utils.h
+++ b/clang-tools-extra/feature-extractor/utils.h
@@ -5,6 +5,8 @@
#include <optional>
+namespace Utils {
+
using namespace clang;
///
@@ -136,3 +138,5 @@ llvm::APInt get_total_for_repetition_count(ASTContext *context,
return bounds;
}
+
+} // namespace Utils
>From a48cf44371ce07b4cb0b21f778aedb91113ca6cd Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Wed, 16 Jul 2025 17:23:37 +0100
Subject: [PATCH 14/16] Add loop range to LoopsData meta data
---
clang-tools-extra/feature-extractor/LoopsData.h | 3 +++
1 file changed, 3 insertions(+)
diff --git a/clang-tools-extra/feature-extractor/LoopsData.h b/clang-tools-extra/feature-extractor/LoopsData.h
index aae45298e2e89..d8992b44b3589 100644
--- a/clang-tools-extra/feature-extractor/LoopsData.h
+++ b/clang-tools-extra/feature-extractor/LoopsData.h
@@ -14,8 +14,11 @@ class LoopsData {
public:
struct MetaData {
const Stmt *for_stmt;
+ llvm::APInt loop_range;
MetaData(const Stmt *fs) : for_stmt(fs) {}
+ MetaData(const Stmt *fs, const llvm::APInt &rng)
+ : for_stmt(fs), loop_range(rng) {}
friend bool operator==(const MetaData &lhs, const MetaData &rhs) {
return lhs.for_stmt == rhs.for_stmt;
>From 692a014c3956ad4eee0cd73ab5253c65af668cb1 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Fri, 18 Jul 2025 19:02:44 +0100
Subject: [PATCH 15/16] Add AST visitors for float and int operations
---
.../visitors/FloatOpCounter.h | 75 +++++++++++++++++++
.../visitors/IntegerOpCounter.h | 60 +++++++++++++++
2 files changed, 135 insertions(+)
create mode 100644 clang-tools-extra/feature-extractor/visitors/FloatOpCounter.h
create mode 100644 clang-tools-extra/feature-extractor/visitors/IntegerOpCounter.h
diff --git a/clang-tools-extra/feature-extractor/visitors/FloatOpCounter.h b/clang-tools-extra/feature-extractor/visitors/FloatOpCounter.h
new file mode 100644
index 0000000000000..4daaaa4de78f4
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/visitors/FloatOpCounter.h
@@ -0,0 +1,75 @@
+#pragma once
+
+#include <cstddef>
+
+#include "clang/AST/RecursiveASTVisitor.h"
+
+class FloatOpCounter : public clang::RecursiveASTVisitor<FloatOpCounter> {
+public:
+ explicit FloatOpCounter() : count(0) {}
+
+ bool VisitBinaryOperator(clang::BinaryOperator *bo) {
+ using namespace clang;
+
+ if (bo->getLHS()->getType()->isFloatingType() &&
+ bo->getRHS()->getType()->isFloatingType()) {
+ ++count;
+ }
+
+ return true;
+ }
+
+ bool VisitCompoundAssignOperator(clang::CompoundAssignOperator *cao) {
+ if (cao->getLHS()->getType()->isFloatingType() &&
+ cao->getRHS()->getType()->isFloatingType()) {
+ ++count;
+ }
+
+ return true;
+ }
+
+ bool VisitUnaryOperator(clang::UnaryOperator *uo) {
+ using namespace clang;
+
+ if (uo->getSubExpr()->getType()->isFloatingType()) {
+ switch (uo->getOpcode()) {
+ case UO_PreInc:
+ case UO_PreDec:
+ case UO_PostInc:
+ case UO_PostDec:
+ case UO_Plus:
+ case UO_Minus:
+ ++count;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return true;
+ }
+
+ bool VisitFloatingLiteral(clang::FloatingLiteral * /* fl */) {
+ ++count;
+ return true;
+ }
+
+ bool VisitImplicitCastExpr(clang::ImplicitCastExpr *ice) {
+ if (ice->getType()->isFloatingType() &&
+ !ice->getSubExpr()->getType()->isFloatingType()) {
+ ++count;
+ }
+
+ return true;
+ }
+
+ void traverse(clang::Stmt *S) {
+ count = 0;
+ TraverseStmt(S);
+ }
+
+ std::size_t get_count() const { return count; }
+
+private:
+ std::size_t count;
+};
diff --git a/clang-tools-extra/feature-extractor/visitors/IntegerOpCounter.h b/clang-tools-extra/feature-extractor/visitors/IntegerOpCounter.h
new file mode 100644
index 0000000000000..a6f4a268b052f
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/visitors/IntegerOpCounter.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <cstddef>
+
+#include "clang/AST/RecursiveASTVisitor.h"
+
+class IntegerOpCounter : public clang::RecursiveASTVisitor<IntegerOpCounter> {
+public:
+ explicit IntegerOpCounter() : count(0) {}
+
+ bool VisitBinaryOperator(clang::BinaryOperator *bo) {
+ if (bo->getLHS()->getType()->isIntegerType() &&
+ bo->getRHS()->getType()->isIntegerType()) {
+ ++count;
+ }
+
+ return true;
+ }
+
+ bool VisitCompoundAssignOperator(clang::CompoundAssignOperator *cao) {
+ if (cao->getLHS()->getType()->isIntegerType() &&
+ cao->getRHS()->getType()->isIntegerType()) {
+ ++count;
+ }
+
+ return true;
+ }
+
+ bool VisitUnaryOperator(clang::UnaryOperator *uo) {
+ using namespace clang;
+
+ if (uo->getSubExpr()->getType()->isIntegerType()) {
+ switch (uo->getOpcode()) {
+ case UO_PreInc:
+ case UO_PostInc:
+ case UO_PreDec:
+ case UO_PostDec:
+ case UO_Plus:
+ case UO_Minus:
+ case UO_Not:
+ ++count;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return true;
+ }
+
+ void traverse(clang::Stmt *s) {
+ count = 0;
+ TraverseStmt(s);
+ }
+
+ std::size_t get_count() const { return count; }
+
+private:
+ std::size_t count;
+};
>From 12e8f99bf8444a7e779ff14344972ba6cab8cfd7 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1370 at gmail.com>
Date: Fri, 18 Jul 2025 19:31:21 +0100
Subject: [PATCH 16/16] Add basic LoopsRange feature
---
.../feature-extractor/LoopsData.h | 8 +-
.../feature-extractor/features/LoopsRange.h | 79 +++++++++++++++++++
clang-tools-extra/feature-extractor/main.cpp | 3 +-
3 files changed, 87 insertions(+), 3 deletions(-)
create mode 100644 clang-tools-extra/feature-extractor/features/LoopsRange.h
diff --git a/clang-tools-extra/feature-extractor/LoopsData.h b/clang-tools-extra/feature-extractor/LoopsData.h
index d8992b44b3589..2c6ddd913cd28 100644
--- a/clang-tools-extra/feature-extractor/LoopsData.h
+++ b/clang-tools-extra/feature-extractor/LoopsData.h
@@ -15,10 +15,14 @@ class LoopsData {
struct MetaData {
const Stmt *for_stmt;
llvm::APInt loop_range;
+ std::size_t float_ops;
+ std::size_t int_ops;
MetaData(const Stmt *fs) : for_stmt(fs) {}
- MetaData(const Stmt *fs, const llvm::APInt &rng)
- : for_stmt(fs), loop_range(rng) {}
+ MetaData(const Stmt *fs, const llvm::APInt &rng, std::size_t float_ops,
+ std::size_t int_ops)
+ : for_stmt(fs), loop_range(rng), float_ops(float_ops),
+ int_ops(int_ops) {}
friend bool operator==(const MetaData &lhs, const MetaData &rhs) {
return lhs.for_stmt == rhs.for_stmt;
diff --git a/clang-tools-extra/feature-extractor/features/LoopsRange.h b/clang-tools-extra/feature-extractor/features/LoopsRange.h
new file mode 100644
index 0000000000000..c64ab074f6922
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/features/LoopsRange.h
@@ -0,0 +1,79 @@
+#pragma once
+
+#include <array>
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+
+#include "../LoopsData.h"
+#include "../utils.h"
+#include "../visitors/FloatOpCounter.h"
+#include "../visitors/IntegerOpCounter.h"
+
+using namespace clang;
+using namespace clang::ast_matchers;
+using namespace llvm;
+
+class LoopsRange : public MatchFinder::MatchCallback {
+ LoopsData loops_data;
+
+public:
+ static inline std::array Matchers = {
+ forStmt(unless(hasAncestor(forStmt()))).bind("topLevelFor"),
+ forStmt(hasAncestor(forStmt())).bind("nestedFor"),
+ // forStmt(hasParent(compoundStmt(hasParent(forStmt())))).bind("nestedFor"),
+ };
+
+ virtual void run(const MatchFinder::MatchResult &result) override {
+ static constexpr auto GatherData =
+ [](const MatchFinder::MatchResult &result, LoopsData &loops_data,
+ const clang::ForStmt *parent_for, const clang::ForStmt *fs) {
+ // llvm::outs() << "Nested for loop at ";
+ // fs->getForLoc().print(llvm::outs(), *result.SourceManager);
+ // llvm::outs() << "\n";
+
+ FloatOpCounter fCounter;
+ IntegerOpCounter iCounter;
+
+ fCounter.traverse(const_cast<Stmt *>(fs->getBody()));
+ iCounter.traverse(const_cast<Stmt *>(fs->getBody()));
+
+ loops_data.add_for(
+ result.Context, parent_for,
+ LoopsData::MetaData{
+ fs, Utils::get_total_for_repetition_count(result.Context, fs),
+ fCounter.get_count(), iCounter.get_count()});
+ };
+
+ if (const ForStmt *fs = result.Nodes.getNodeAs<ForStmt>("topLevelFor");
+ Utils::is_in_main_file(result.Context, fs)) {
+ GatherData(result, loops_data, nullptr, fs);
+ }
+
+ if (const ForStmt *fs = result.Nodes.getNodeAs<ForStmt>("nestedFor");
+ Utils::is_in_main_file(result.Context, fs)) {
+ if (auto parent_for =
+ Utils::get_parent_stmt<ForStmt>(result.Context, fs)) {
+ GatherData(result, loops_data, parent_for, fs);
+ }
+ }
+ }
+
+ static const char *get_title() { return "loops_range"; }
+ std::size_t get_result() {
+ llvm::outs() << "\n";
+
+ for (auto &loop : loops_data.get_loops()) {
+ loop.traverse_pre_order(
+ [&loop](const LoopsData::TreeType::TraverseResult &result) mutable {
+ const auto &[optParentStmt, selfMetaData, depth, isLeaf] = result;
+
+ llvm::outs() << std::string(depth * 2, ' ') << "for "
+ << (isLeaf ? "(leaf) " : "") << selfMetaData.loop_range
+ << " " << selfMetaData.float_ops << " "
+ << selfMetaData.int_ops << "\n";
+ });
+ }
+ return loops_data.get_ids().size();
+ }
+};
diff --git a/clang-tools-extra/feature-extractor/main.cpp b/clang-tools-extra/feature-extractor/main.cpp
index 4109a46ed3f1e..67abb131865f8 100644
--- a/clang-tools-extra/feature-extractor/main.cpp
+++ b/clang-tools-extra/feature-extractor/main.cpp
@@ -9,6 +9,7 @@
#include "FeatureManager.h"
+#include "features/LoopsRange.h"
#include "features/NumLoops.h"
#include "features/OmpRegions.h"
@@ -40,7 +41,7 @@ int main(int argc, const char **argv) {
CommonOptionsParser &OptionsParser = ExpectedParser.get();
ClangTool Tool(OptionsParser.getCompilations(),
OptionsParser.getSourcePathList());
- FeatureManager<NumLoops, OmpRegions> manager;
+ FeatureManager<NumLoops, OmpRegions, LoopsRange> manager;
return Tool.run(newFrontendActionFactory(manager.get_match_finder()).get());
}
More information about the cfe-commits
mailing list