[clang-tools-extra] Add support for renaming objc methods, even those with multiple selector pieces (PR #76466)

Alex Hoppen via cfe-commits cfe-commits at lists.llvm.org
Mon Feb 5 18:02:55 PST 2024


================
@@ -538,11 +565,254 @@ std::optional<InvalidName> checkName(const NamedDecl &RenameDecl,
             Conflict->getLocation().printToString(ASTCtx.getSourceManager())};
     }
   }
-  if (Result)
+  if (Result) {
     InvalidNameMetric.record(1, toString(Result->K));
+    return makeError(*Result);
+  }
+  return std::nullopt;
+}
+
+bool isMatchingSelectorName(const syntax::Token &Cur, const syntax::Token &Next,
+                            const SourceManager &SM,
+                            llvm::StringRef SelectorName) {
+  if (SelectorName.empty())
+    return Cur.kind() == tok::colon;
+  return Cur.kind() == tok::identifier && Next.kind() == tok::colon &&
+         Cur.text(SM) == SelectorName &&
+         // We require the selector name and : to be contiguous to avoid
+         // potential conflicts with ternary expression.
+         //
+         // e.g. support `foo:` but not `foo :`.
+         Cur.endLocation() == Next.location();
+}
+
+bool isSelectorLike(const syntax::Token &Cur, const syntax::Token &Next) {
+  return Cur.kind() == tok::identifier && Next.kind() == tok::colon &&
+         // We require the selector name and : to be contiguous.
+         // e.g. support `foo:` but not `foo :`.
+         Cur.endLocation() == Next.location();
+}
+
+bool parseMessageExpression(llvm::ArrayRef<syntax::Token> Tokens,
+                            const SourceManager &SM, unsigned Index,
+                            unsigned Last, Selector Sel,
+                            std::vector<Range> &SelectorPieces) {
+
+  unsigned NumArgs = Sel.getNumArgs();
+  llvm::SmallVector<char, 8> Closes;
+  SelectorPieces.clear();
+  while (Index < Last) {
+    const auto &Tok = Tokens[Index];
+
+    if (Closes.empty()) {
+      auto PieceCount = SelectorPieces.size();
+      if (PieceCount < NumArgs &&
+          isMatchingSelectorName(Tok, Tokens[Index + 1], SM,
+                                 Sel.getNameForSlot(PieceCount))) {
+        // If 'foo:' instead of ':' (empty selector), we need to skip the ':'
+        // token after the name.
+        if (!Sel.getNameForSlot(PieceCount).empty()) {
+          ++Index;
+        }
+        SelectorPieces.push_back(
+            halfOpenToRange(SM, Tok.range(SM).toCharRange(SM)));
+        continue;
+      }
+      // If we've found all pieces but the current token looks like another
+      // selector piece, it means the method being renamed is a strict prefix of
+      // the selector we've found - should be skipped.
+      if (SelectorPieces.size() >= NumArgs &&
+          isSelectorLike(Tok, Tokens[Index + 1]))
+        return false;
+    }
+
+    switch (Tok.kind()) {
+    case tok::l_square:
+      Closes.push_back(']');
+      break;
+    case tok::l_paren:
+      Closes.push_back(')');
+      break;
+    case tok::l_brace:
+      Closes.push_back('}');
+      break;
+    case tok::r_square:
+      if (Closes.empty())
+        return SelectorPieces.size() == NumArgs;
+
+      if (Closes.back() != ']')
+        return false;
+      Closes.pop_back();
+      break;
+    case tok::r_paren:
+      if (Closes.empty() || Closes.back() != ')')
+        return false;
+      Closes.pop_back();
+      break;
+    case tok::r_brace:
+      if (Closes.empty() || Closes.back() != '}')
+        return false;
+      Closes.pop_back();
+      break;
+    case tok::semi:
+      // top level ; ends all statements.
+      if (Closes.empty())
+        return false;
+      break;
+    default:
+      break;
+    }
+
+    ++Index;
+  }
+  return false;
+}
+
+/// Collects all ranges of the given identifier/selector in the source code.
+///
+/// If a selector is given, this does a full lex of the given source code in
+/// order to identify all selector fragments (e.g. in method exprs/decls) since
+/// they are non-contiguous.
+std::vector<SymbolRange> collectRenameIdentifierRanges(
+    llvm::StringRef Identifier, llvm::StringRef Content,
+    const LangOptions &LangOpts, std::optional<Selector> Selector) {
+  std::vector<SymbolRange> Ranges;
+  if (!Selector) {
+    auto IdentifierRanges =
+        collectIdentifierRanges(Identifier, Content, LangOpts);
+    for (const auto &R : IdentifierRanges)
+      Ranges.emplace_back(R);
+    return Ranges;
+  }
+  // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
+  std::string NullTerminatedCode = Content.str();
+  SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode);
+  auto &SM = FileSM.get();
+
+  // We track parens and braces to ensure that we don't accidentally try parsing
+  // a method declaration or definition which isn't at the top level or similar
+  // looking expressions (e.g. an @selector() expression).
+  unsigned ParenCount = 0;
+  unsigned BraceCount = 0;
+  unsigned NumArgs = Selector->getNumArgs();
+
+  std::vector<Range> SelectorPieces;
+  auto Tokens = syntax::tokenize(SM.getMainFileID(), SM, LangOpts);
----------------
ahoppen wrote:

I don’t think it’s too bad to force that onto the caller. AFAICT we have two calls to `collectRenameIdentifierRanges` at the moment, one of which already has the tokens and one doesn’t. So it doesn’t make sense to optimize for the case where the tokens aren’t present. FWIW in my PR I introduced [`UnexpandedTokenBuffer`](https://github.com/llvm/llvm-project/pull/78872/files#diff-5b0c3948c18281a93c168fe93010fc5f4fed1a9659de2ef306090003223f152bR148-R160) for that reason and creating it is a [one-liner](https://github.com/llvm/llvm-project/pull/78872/files#diff-26ff7c74af8a1d882abbad43625d3cd4bf8d024ef5c7064993f5ede3eec8752eR717-R718).

https://github.com/llvm/llvm-project/pull/76466


More information about the cfe-commits mailing list