[clang] [clang][transformer] Add `merge` range-selector for selecting the merge of two ranges. (PR #169560)
Yu Hao via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 25 17:58:48 PST 2025
https://github.com/yuhaouy updated https://github.com/llvm/llvm-project/pull/169560
>From d6a195fea92c992f85d3c9bae3fad379c01f61c1 Mon Sep 17 00:00:00 2001
From: Yu Hao <yuhaoyu at google.com>
Date: Mon, 24 Nov 2025 15:19:46 -0800
Subject: [PATCH 1/2] Add merge range-selector
---
.../clang/Tooling/Transformer/RangeSelector.h | 4 ++
clang/lib/Tooling/Transformer/Parsing.cpp | 2 +-
.../lib/Tooling/Transformer/RangeSelector.cpp | 50 +++++++++++++++++++
clang/unittests/Tooling/RangeSelectorTest.cpp | 32 ++++++++++++
4 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Tooling/Transformer/RangeSelector.h b/clang/include/clang/Tooling/Transformer/RangeSelector.h
index 462a9da8f10eb..c76a5106edd65 100644
--- a/clang/include/clang/Tooling/Transformer/RangeSelector.h
+++ b/clang/include/clang/Tooling/Transformer/RangeSelector.h
@@ -37,6 +37,10 @@ RangeSelector enclose(RangeSelector Begin, RangeSelector End);
/// Convenience version of \c range where end-points are bound nodes.
RangeSelector encloseNodes(std::string BeginID, std::string EndID);
+/// Selects the merge of the two ranges, i.e. from min(First.begin,
+/// Second.begin) to max(First.end, Second.end).
+RangeSelector merge(RangeSelector First, RangeSelector Second);
+
/// DEPRECATED. Use `enclose`.
inline RangeSelector range(RangeSelector Begin, RangeSelector End) {
return enclose(std::move(Begin), std::move(End));
diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp
index 19a1c7c66df46..f7bffda6967a9 100644
--- a/clang/lib/Tooling/Transformer/Parsing.cpp
+++ b/clang/lib/Tooling/Transformer/Parsing.cpp
@@ -108,7 +108,7 @@ getBinaryStringSelectors() {
static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
getBinaryRangeSelectors() {
static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
- M = {{"enclose", enclose}, {"between", between}};
+ M = {{"enclose", enclose}, {"between", between}, {"merge", merge}};
return M;
}
diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp
index 54a1590d3106d..948cf9c7e712f 100644
--- a/clang/lib/Tooling/Transformer/RangeSelector.cpp
+++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp
@@ -178,6 +178,56 @@ RangeSelector transformer::encloseNodes(std::string BeginID,
return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
}
+RangeSelector transformer::merge(RangeSelector First, RangeSelector Second) {
+ return [First,
+ Second](const MatchResult &Result) -> Expected<CharSourceRange> {
+ Expected<CharSourceRange> FirstRange = First(Result);
+ if (!FirstRange)
+ return FirstRange.takeError();
+ Expected<CharSourceRange> SecondRange = Second(Result);
+ if (!SecondRange)
+ return SecondRange.takeError();
+ // Result begin loc is the minimum of the begin locs of the two ranges.
+ SourceLocation B = FirstRange->getBegin() < SecondRange->getBegin()
+ ? FirstRange->getBegin()
+ : SecondRange->getBegin();
+ if (FirstRange->isTokenRange() && SecondRange->isTokenRange()) {
+ // Both ranges are token ranges. Just take the maximum of their end locs.
+ SourceLocation E = FirstRange->getEnd() > SecondRange->getEnd()
+ ? FirstRange->getEnd()
+ : SecondRange->getEnd();
+ return CharSourceRange::getTokenRange(B, E);
+ }
+ SourceLocation FirstE = FirstRange->getEnd();
+ if (FirstRange->isTokenRange()) {
+ // The end of the first range is a token. Need to resolve the token to a
+ // char range.
+ CharSourceRange EndRange = Lexer::makeFileCharRange(
+ CharSourceRange::getTokenRange(FirstRange->getEnd()),
+ *Result.SourceManager, Result.Context->getLangOpts());
+ if (EndRange.isInvalid())
+ return invalidArgumentError(
+ "merge: can't resolve first token range to valid source range");
+ FirstE = EndRange.getEnd();
+ }
+ SourceLocation SecondE = SecondRange->getEnd();
+ if (SecondRange->isTokenRange()) {
+ // The end of the second range is a token. Need to resolve the token to a
+ // char range.
+ CharSourceRange EndRange = Lexer::makeFileCharRange(
+ CharSourceRange::getTokenRange(SecondRange->getEnd()),
+ *Result.SourceManager, Result.Context->getLangOpts());
+ if (EndRange.isInvalid())
+ return invalidArgumentError(
+ "merge: can't resolve second token range to valid source range");
+ SecondE = EndRange.getEnd();
+ }
+ // Result end loc is the maximum of the end locs of the two ranges.
+ SourceLocation E = FirstE > SecondE ? FirstE : SecondE;
+ return CharSourceRange::getCharRange(B, E);
+ };
+}
+
RangeSelector transformer::member(std::string ID) {
return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp
index d441da165b09b..9feca3ca0bed8 100644
--- a/clang/unittests/Tooling/RangeSelectorTest.cpp
+++ b/clang/unittests/Tooling/RangeSelectorTest.cpp
@@ -327,6 +327,38 @@ TEST(RangeSelectorTest, EncloseOpGeneralParsed) {
EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7"));
}
+TEST(RangeSelectorTest, MergeOp) {
+ StringRef Code = R"cc(
+ int f(int x, int y, int z) { return 3; }
+ int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+ )cc";
+ auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+ hasArgument(1, expr().bind("a1")),
+ hasArgument(2, expr().bind("a2")));
+ RangeSelector R = merge(node("a0"), node("a1"));
+ TestMatch Match = matchCode(Code, Matcher);
+ EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3, 7"));
+ R = merge(node("a2"), node("a1"));
+ EXPECT_THAT_EXPECTED(select(R, Match), HasValue("7 /* comment */, 9"));
+}
+
+TEST(RangeSelectorTest, MergeOpParsed) {
+ StringRef Code = R"cc(
+ int f(int x, int y, int z) { return 3; }
+ int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+ )cc";
+ auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+ hasArgument(1, expr().bind("a1")),
+ hasArgument(2, expr().bind("a2")));
+ auto R = parseRangeSelector(R"rs(merge(node("a0"), node("a1")))rs");
+ ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+ TestMatch Match = matchCode(Code, Matcher);
+ EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7"));
+ R = parseRangeSelector(R"rs(merge(node("a2"), node("a1")))rs");
+ ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+ EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("7 /* comment */, 9"));
+}
+
TEST(RangeSelectorTest, NodeOpStatement) {
StringRef Code = "int f() { return 3; }";
TestMatch Match = matchCode(Code, returnStmt().bind("id"));
>From a9933031791f121631f00ee5d1836ad8aeb32e1d Mon Sep 17 00:00:00 2001
From: Yu Hao <yuhaoyu at google.com>
Date: Tue, 25 Nov 2025 17:58:34 -0800
Subject: [PATCH 2/2] Fix loc comparision and get end-of-token
---
.../lib/Tooling/Transformer/RangeSelector.cpp | 45 +++++++++++--------
1 file changed, 26 insertions(+), 19 deletions(-)
diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp
index 948cf9c7e712f..68b16f91652fb 100644
--- a/clang/lib/Tooling/Transformer/RangeSelector.cpp
+++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp
@@ -187,43 +187,50 @@ RangeSelector transformer::merge(RangeSelector First, RangeSelector Second) {
Expected<CharSourceRange> SecondRange = Second(Result);
if (!SecondRange)
return SecondRange.takeError();
+
+ SourceLocation FirstB = FirstRange->getBegin();
+ SourceLocation FirstE = FirstRange->getEnd();
+ SourceLocation SecondB = SecondRange->getBegin();
+ SourceLocation SecondE = SecondRange->getEnd();
// Result begin loc is the minimum of the begin locs of the two ranges.
- SourceLocation B = FirstRange->getBegin() < SecondRange->getBegin()
- ? FirstRange->getBegin()
- : SecondRange->getBegin();
+ SourceLocation B =
+ Result.SourceManager->isBeforeInTranslationUnit(FirstB, SecondB)
+ ? FirstB
+ : SecondB;
if (FirstRange->isTokenRange() && SecondRange->isTokenRange()) {
// Both ranges are token ranges. Just take the maximum of their end locs.
- SourceLocation E = FirstRange->getEnd() > SecondRange->getEnd()
- ? FirstRange->getEnd()
- : SecondRange->getEnd();
+ SourceLocation E =
+ Result.SourceManager->isBeforeInTranslationUnit(FirstE, SecondE)
+ ? SecondE
+ : FirstE;
return CharSourceRange::getTokenRange(B, E);
}
- SourceLocation FirstE = FirstRange->getEnd();
+
if (FirstRange->isTokenRange()) {
// The end of the first range is a token. Need to resolve the token to a
// char range.
- CharSourceRange EndRange = Lexer::makeFileCharRange(
- CharSourceRange::getTokenRange(FirstRange->getEnd()),
- *Result.SourceManager, Result.Context->getLangOpts());
- if (EndRange.isInvalid())
+ FirstE = Lexer::getLocForEndOfToken(FirstE, /*Offset=*/0,
+ *Result.SourceManager,
+ Result.Context->getLangOpts());
+ if (FirstE.isInvalid())
return invalidArgumentError(
"merge: can't resolve first token range to valid source range");
- FirstE = EndRange.getEnd();
}
- SourceLocation SecondE = SecondRange->getEnd();
if (SecondRange->isTokenRange()) {
// The end of the second range is a token. Need to resolve the token to a
// char range.
- CharSourceRange EndRange = Lexer::makeFileCharRange(
- CharSourceRange::getTokenRange(SecondRange->getEnd()),
- *Result.SourceManager, Result.Context->getLangOpts());
- if (EndRange.isInvalid())
+ SecondE = Lexer::getLocForEndOfToken(SecondE, /*Offset=*/0,
+ *Result.SourceManager,
+ Result.Context->getLangOpts());
+ if (SecondE.isInvalid())
return invalidArgumentError(
"merge: can't resolve second token range to valid source range");
- SecondE = EndRange.getEnd();
}
// Result end loc is the maximum of the end locs of the two ranges.
- SourceLocation E = FirstE > SecondE ? FirstE : SecondE;
+ SourceLocation E =
+ Result.SourceManager->isBeforeInTranslationUnit(FirstE, SecondE)
+ ? SecondE
+ : FirstE;
return CharSourceRange::getCharRange(B, E);
};
}
More information about the cfe-commits
mailing list