[clang-tools-extra] Add support for renaming objc methods, even those with multiple selector pieces (PR #76466)
kadir çetinkaya via cfe-commits
cfe-commits at lists.llvm.org
Fri Feb 2 05:41:45 PST 2024
================
@@ -538,11 +565,254 @@ std::optional<InvalidName> checkName(const NamedDecl &RenameDecl,
Conflict->getLocation().printToString(ASTCtx.getSourceManager())};
}
}
- if (Result)
+ if (Result) {
InvalidNameMetric.record(1, toString(Result->K));
+ return makeError(*Result);
+ }
+ return std::nullopt;
+}
+
+bool isMatchingSelectorName(const syntax::Token &Cur, const syntax::Token &Next,
+ const SourceManager &SM,
+ llvm::StringRef SelectorName) {
+ if (SelectorName.empty())
+ return Cur.kind() == tok::colon;
+ return Cur.kind() == tok::identifier && Next.kind() == tok::colon &&
+ Cur.text(SM) == SelectorName &&
+ // We require the selector name and : to be contiguous to avoid
+ // potential conflicts with ternary expression.
+ //
+ // e.g. support `foo:` but not `foo :`.
+ Cur.endLocation() == Next.location();
+}
+
+bool isSelectorLike(const syntax::Token &Cur, const syntax::Token &Next) {
+ return Cur.kind() == tok::identifier && Next.kind() == tok::colon &&
+ // We require the selector name and : to be contiguous.
+ // e.g. support `foo:` but not `foo :`.
+ Cur.endLocation() == Next.location();
+}
+
+bool parseMessageExpression(llvm::ArrayRef<syntax::Token> Tokens,
+ const SourceManager &SM, unsigned Index,
+ unsigned Last, Selector Sel,
+ std::vector<Range> &SelectorPieces) {
+
+ unsigned NumArgs = Sel.getNumArgs();
+ llvm::SmallVector<char, 8> Closes;
+ SelectorPieces.clear();
+ while (Index < Last) {
+ const auto &Tok = Tokens[Index];
+
+ if (Closes.empty()) {
+ auto PieceCount = SelectorPieces.size();
+ if (PieceCount < NumArgs &&
+ isMatchingSelectorName(Tok, Tokens[Index + 1], SM,
+ Sel.getNameForSlot(PieceCount))) {
+ // If 'foo:' instead of ':' (empty selector), we need to skip the ':'
+ // token after the name.
+ if (!Sel.getNameForSlot(PieceCount).empty()) {
+ ++Index;
+ }
+ SelectorPieces.push_back(
+ halfOpenToRange(SM, Tok.range(SM).toCharRange(SM)));
+ continue;
+ }
+ // If we've found all pieces but the current token looks like another
+ // selector piece, it means the method being renamed is a strict prefix of
+ // the selector we've found - should be skipped.
+ if (SelectorPieces.size() >= NumArgs &&
+ isSelectorLike(Tok, Tokens[Index + 1]))
+ return false;
+ }
+
+ switch (Tok.kind()) {
+ case tok::l_square:
+ Closes.push_back(']');
+ break;
+ case tok::l_paren:
+ Closes.push_back(')');
+ break;
+ case tok::l_brace:
+ Closes.push_back('}');
+ break;
+ case tok::r_square:
+ if (Closes.empty())
+ return SelectorPieces.size() == NumArgs;
+
+ if (Closes.back() != ']')
+ return false;
+ Closes.pop_back();
+ break;
+ case tok::r_paren:
+ if (Closes.empty() || Closes.back() != ')')
+ return false;
+ Closes.pop_back();
+ break;
+ case tok::r_brace:
+ if (Closes.empty() || Closes.back() != '}')
+ return false;
+ Closes.pop_back();
+ break;
+ case tok::semi:
+ // top level ; ends all statements.
+ if (Closes.empty())
+ return false;
+ break;
+ default:
+ break;
+ }
+
+ ++Index;
+ }
+ return false;
+}
+
+/// Collects all ranges of the given identifier/selector in the source code.
+///
+/// If a selector is given, this does a full lex of the given source code in
+/// order to identify all selector fragments (e.g. in method exprs/decls) since
+/// they are non-contiguous.
+std::vector<SymbolRange> collectRenameIdentifierRanges(
+ llvm::StringRef Identifier, llvm::StringRef Content,
+ const LangOptions &LangOpts, std::optional<Selector> Selector) {
+ std::vector<SymbolRange> Ranges;
+ if (!Selector) {
+ auto IdentifierRanges =
+ collectIdentifierRanges(Identifier, Content, LangOpts);
+ for (const auto &R : IdentifierRanges)
+ Ranges.emplace_back(R);
+ return Ranges;
+ }
+ // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
+ std::string NullTerminatedCode = Content.str();
+ SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode);
+ auto &SM = FileSM.get();
+
+ // We track parens and braces to ensure that we don't accidentally try parsing
+ // a method declaration or definition which isn't at the top level or similar
+ // looking expressions (e.g. an @selector() expression).
+ unsigned ParenCount = 0;
+ unsigned BraceCount = 0;
+ unsigned NumArgs = Selector->getNumArgs();
+
+ std::vector<Range> SelectorPieces;
+ auto Tokens = syntax::tokenize(SM.getMainFileID(), SM, LangOpts);
+ unsigned Last = Tokens.size() - 1;
+ for (unsigned Index = 0; Index < Last; ++Index) {
+ const auto &Tok = Tokens[Index];
+
----------------
kadircet wrote:
Sorry, i am still having a lot of trouble following this logic. I don't really see why we need all this complicated top-levelness handling in both places and can't just try to find relevant identifiers in the code, while skipping anything inside the braces, as we were discussing last time.
what about:
```
for (unsigned Index = 0; Index < Last - 1; ++Index) {
const auto &Tok = Tokens[Index];
// Keep searching for selector(0) to begin a match.
if (!isMatchingSelectorName(Tok, Tokens[Index + 1], SM, Selector->getNameForSlot(0)))
continue;
// We found a candidate for our match, this might be a method call, declaration, or unrelated identifier eg:
// - [obj ^sel0: X sel1: Y ... ]
// or
// @interface Foo
// - int ^sel0: X sel1: Y ...
// @end
// Check if we can find all the relevant selector peices starting from this token
auto SelectorPieces = findAllSelectorPieces(Tokens.slice(Index), Selector);
if (SelectorPieces) Ranges.emplace_back(std::move(*SelectorPieces));
}
```
```
// Searches for all the fragments of the selector starting from Tokens[0]. Returns relevant ranges for those
// if all could be found.
llvm::Optional<SymbolRange> findAllSelectorPieces(llvm::ArrayRef<Token> Tokens, Selector, SM) {
SymbolRange Result;
auto CurMatch = 0;
// Consumes all the tokens from beggining of Tokens until current selector fragment is found.
// It only matches at top-level, i.e. ignores any nested calls/exprs.
auto SkipUntil = [&] {
std::stack<char> Parens;
for(Index = 0; Index < Tokens.size() - 1; ++Index) {
auto &Tok = Tokens[Index];
auto &Next = Tokens[Index+1];
switch (Tok.kind()) {
case l_paren:
case l_square:
case l_brace:
Parens.push(Tok.text(SM)[0]);
break;
case r_paren:
case r_square:
case r_brace:
if (Parens.top() != GetOpenning(Tok.text(SM)[0])) {
// Invalid code, just error-out.
Tokens = {};
CurMatch = 0;
return;
}
Parens.pop();
break;
case identifier:
// Ignore any nested tokens.
if(!Parens.empty()) break;
// Found current segment!
if(isMatchingSelectorName(Tok, Next, SM, Selector(CurMatch))) return;
default:
break;
}
}
};
while(CurMatch != Selector->getNumArgs() && !Tokens.empty()) {
SkipUntil();
// We need at least a selector name and `:`.
if(Tokens.size() < 2) return nullopt;
Result.Ranges.push_back(tokenRangeForLoc(SM, Tokens.front(), ...);
// Drop current selector fragment, keep searching for the rest;
Tokens = Tokens.drop_front(2);
++CurMatch;
}
if (CurMatch == Selector->getNumArgs()) return Result;
return std::nullopt;
}
```
https://github.com/llvm/llvm-project/pull/76466
More information about the cfe-commits
mailing list