[clang] [clang][transformer] Add `merge` range-selector for selecting the merge of two ranges. (PR #169560)
Yu Hao via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 25 12:05:26 PST 2025
https://github.com/yuhaouy updated https://github.com/llvm/llvm-project/pull/169560
>From d6a195fea92c992f85d3c9bae3fad379c01f61c1 Mon Sep 17 00:00:00 2001
From: Yu Hao <yuhaoyu at google.com>
Date: Mon, 24 Nov 2025 15:19:46 -0800
Subject: [PATCH] Add merge range-selector
---
.../clang/Tooling/Transformer/RangeSelector.h | 4 ++
clang/lib/Tooling/Transformer/Parsing.cpp | 2 +-
.../lib/Tooling/Transformer/RangeSelector.cpp | 50 +++++++++++++++++++
clang/unittests/Tooling/RangeSelectorTest.cpp | 32 ++++++++++++
4 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Tooling/Transformer/RangeSelector.h b/clang/include/clang/Tooling/Transformer/RangeSelector.h
index 462a9da8f10eb..c76a5106edd65 100644
--- a/clang/include/clang/Tooling/Transformer/RangeSelector.h
+++ b/clang/include/clang/Tooling/Transformer/RangeSelector.h
@@ -37,6 +37,10 @@ RangeSelector enclose(RangeSelector Begin, RangeSelector End);
/// Convenience version of \c range where end-points are bound nodes.
RangeSelector encloseNodes(std::string BeginID, std::string EndID);
+/// Selects the merge of the two ranges, i.e. from min(First.begin,
+/// Second.begin) to max(First.end, Second.end).
+RangeSelector merge(RangeSelector First, RangeSelector Second);
+
/// DEPRECATED. Use `enclose`.
inline RangeSelector range(RangeSelector Begin, RangeSelector End) {
return enclose(std::move(Begin), std::move(End));
diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp
index 19a1c7c66df46..f7bffda6967a9 100644
--- a/clang/lib/Tooling/Transformer/Parsing.cpp
+++ b/clang/lib/Tooling/Transformer/Parsing.cpp
@@ -108,7 +108,7 @@ getBinaryStringSelectors() {
static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
getBinaryRangeSelectors() {
static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
- M = {{"enclose", enclose}, {"between", between}};
+ M = {{"enclose", enclose}, {"between", between}, {"merge", merge}};
return M;
}
diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp
index 54a1590d3106d..948cf9c7e712f 100644
--- a/clang/lib/Tooling/Transformer/RangeSelector.cpp
+++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp
@@ -178,6 +178,56 @@ RangeSelector transformer::encloseNodes(std::string BeginID,
return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
}
+RangeSelector transformer::merge(RangeSelector First, RangeSelector Second) {
+ return [First,
+ Second](const MatchResult &Result) -> Expected<CharSourceRange> {
+ Expected<CharSourceRange> FirstRange = First(Result);
+ if (!FirstRange)
+ return FirstRange.takeError();
+ Expected<CharSourceRange> SecondRange = Second(Result);
+ if (!SecondRange)
+ return SecondRange.takeError();
+ // Result begin loc is the minimum of the begin locs of the two ranges.
+ SourceLocation B = FirstRange->getBegin() < SecondRange->getBegin()
+ ? FirstRange->getBegin()
+ : SecondRange->getBegin();
+ if (FirstRange->isTokenRange() && SecondRange->isTokenRange()) {
+ // Both ranges are token ranges. Just take the maximum of their end locs.
+ SourceLocation E = FirstRange->getEnd() > SecondRange->getEnd()
+ ? FirstRange->getEnd()
+ : SecondRange->getEnd();
+ return CharSourceRange::getTokenRange(B, E);
+ }
+ SourceLocation FirstE = FirstRange->getEnd();
+ if (FirstRange->isTokenRange()) {
+ // The end of the first range is a token. Need to resolve the token to a
+ // char range.
+ CharSourceRange EndRange = Lexer::makeFileCharRange(
+ CharSourceRange::getTokenRange(FirstRange->getEnd()),
+ *Result.SourceManager, Result.Context->getLangOpts());
+ if (EndRange.isInvalid())
+ return invalidArgumentError(
+ "merge: can't resolve first token range to valid source range");
+ FirstE = EndRange.getEnd();
+ }
+ SourceLocation SecondE = SecondRange->getEnd();
+ if (SecondRange->isTokenRange()) {
+ // The end of the second range is a token. Need to resolve the token to a
+ // char range.
+ CharSourceRange EndRange = Lexer::makeFileCharRange(
+ CharSourceRange::getTokenRange(SecondRange->getEnd()),
+ *Result.SourceManager, Result.Context->getLangOpts());
+ if (EndRange.isInvalid())
+ return invalidArgumentError(
+ "merge: can't resolve second token range to valid source range");
+ SecondE = EndRange.getEnd();
+ }
+ // Result end loc is the maximum of the end locs of the two ranges.
+ SourceLocation E = FirstE > SecondE ? FirstE : SecondE;
+ return CharSourceRange::getCharRange(B, E);
+ };
+}
+
RangeSelector transformer::member(std::string ID) {
return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp
index d441da165b09b..9feca3ca0bed8 100644
--- a/clang/unittests/Tooling/RangeSelectorTest.cpp
+++ b/clang/unittests/Tooling/RangeSelectorTest.cpp
@@ -327,6 +327,38 @@ TEST(RangeSelectorTest, EncloseOpGeneralParsed) {
EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7"));
}
+TEST(RangeSelectorTest, MergeOp) {
+ StringRef Code = R"cc(
+ int f(int x, int y, int z) { return 3; }
+ int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+ )cc";
+ auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+ hasArgument(1, expr().bind("a1")),
+ hasArgument(2, expr().bind("a2")));
+ RangeSelector R = merge(node("a0"), node("a1"));
+ TestMatch Match = matchCode(Code, Matcher);
+ EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3, 7"));
+ R = merge(node("a2"), node("a1"));
+ EXPECT_THAT_EXPECTED(select(R, Match), HasValue("7 /* comment */, 9"));
+}
+
+TEST(RangeSelectorTest, MergeOpParsed) {
+ StringRef Code = R"cc(
+ int f(int x, int y, int z) { return 3; }
+ int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+ )cc";
+ auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+ hasArgument(1, expr().bind("a1")),
+ hasArgument(2, expr().bind("a2")));
+ auto R = parseRangeSelector(R"rs(merge(node("a0"), node("a1")))rs");
+ ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+ TestMatch Match = matchCode(Code, Matcher);
+ EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7"));
+ R = parseRangeSelector(R"rs(merge(node("a2"), node("a1")))rs");
+ ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+ EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("7 /* comment */, 9"));
+}
+
TEST(RangeSelectorTest, NodeOpStatement) {
StringRef Code = "int f() { return 3; }";
TestMatch Match = matchCode(Code, returnStmt().bind("id"));
More information about the cfe-commits
mailing list