[clang-tools-extra] Add support for renaming objc methods, even those with multiple selector pieces (PR #76466)

Alex Hoppen via cfe-commits cfe-commits at lists.llvm.org
Mon Feb 5 15:01:38 PST 2024


================
@@ -538,11 +565,254 @@ std::optional<InvalidName> checkName(const NamedDecl &RenameDecl,
             Conflict->getLocation().printToString(ASTCtx.getSourceManager())};
     }
   }
-  if (Result)
+  if (Result) {
     InvalidNameMetric.record(1, toString(Result->K));
+    return makeError(*Result);
+  }
+  return std::nullopt;
+}
+
+bool isMatchingSelectorName(const syntax::Token &Cur, const syntax::Token &Next,
+                            const SourceManager &SM,
+                            llvm::StringRef SelectorName) {
+  if (SelectorName.empty())
+    return Cur.kind() == tok::colon;
+  return Cur.kind() == tok::identifier && Next.kind() == tok::colon &&
+         Cur.text(SM) == SelectorName &&
+         // We require the selector name and : to be contiguous to avoid
+         // potential conflicts with ternary expression.
+         //
+         // e.g. support `foo:` but not `foo :`.
+         Cur.endLocation() == Next.location();
+}
+
+bool isSelectorLike(const syntax::Token &Cur, const syntax::Token &Next) {
+  return Cur.kind() == tok::identifier && Next.kind() == tok::colon &&
+         // We require the selector name and : to be contiguous.
+         // e.g. support `foo:` but not `foo :`.
+         Cur.endLocation() == Next.location();
+}
+
+bool parseMessageExpression(llvm::ArrayRef<syntax::Token> Tokens,
+                            const SourceManager &SM, unsigned Index,
+                            unsigned Last, Selector Sel,
+                            std::vector<Range> &SelectorPieces) {
+
+  unsigned NumArgs = Sel.getNumArgs();
+  llvm::SmallVector<char, 8> Closes;
+  SelectorPieces.clear();
+  while (Index < Last) {
+    const auto &Tok = Tokens[Index];
+
+    if (Closes.empty()) {
+      auto PieceCount = SelectorPieces.size();
+      if (PieceCount < NumArgs &&
+          isMatchingSelectorName(Tok, Tokens[Index + 1], SM,
+                                 Sel.getNameForSlot(PieceCount))) {
+        // If 'foo:' instead of ':' (empty selector), we need to skip the ':'
+        // token after the name.
+        if (!Sel.getNameForSlot(PieceCount).empty()) {
+          ++Index;
+        }
+        SelectorPieces.push_back(
+            halfOpenToRange(SM, Tok.range(SM).toCharRange(SM)));
+        continue;
+      }
+      // If we've found all pieces but the current token looks like another
+      // selector piece, it means the method being renamed is a strict prefix of
+      // the selector we've found - should be skipped.
+      if (SelectorPieces.size() >= NumArgs &&
+          isSelectorLike(Tok, Tokens[Index + 1]))
+        return false;
+    }
+
+    switch (Tok.kind()) {
+    case tok::l_square:
+      Closes.push_back(']');
+      break;
+    case tok::l_paren:
+      Closes.push_back(')');
+      break;
+    case tok::l_brace:
+      Closes.push_back('}');
+      break;
+    case tok::r_square:
+      if (Closes.empty())
+        return SelectorPieces.size() == NumArgs;
+
+      if (Closes.back() != ']')
+        return false;
+      Closes.pop_back();
+      break;
+    case tok::r_paren:
+      if (Closes.empty() || Closes.back() != ')')
+        return false;
+      Closes.pop_back();
+      break;
+    case tok::r_brace:
+      if (Closes.empty() || Closes.back() != '}')
+        return false;
+      Closes.pop_back();
+      break;
+    case tok::semi:
+      // top level ; ends all statements.
+      if (Closes.empty())
+        return false;
+      break;
+    default:
+      break;
+    }
+
+    ++Index;
+  }
+  return false;
+}
+
+/// Collects all ranges of the given identifier/selector in the source code.
+///
+/// If a selector is given, this does a full lex of the given source code in
+/// order to identify all selector fragments (e.g. in method exprs/decls) since
+/// they are non-contiguous.
+std::vector<SymbolRange> collectRenameIdentifierRanges(
+    llvm::StringRef Identifier, llvm::StringRef Content,
+    const LangOptions &LangOpts, std::optional<Selector> Selector) {
+  std::vector<SymbolRange> Ranges;
+  if (!Selector) {
+    auto IdentifierRanges =
+        collectIdentifierRanges(Identifier, Content, LangOpts);
+    for (const auto &R : IdentifierRanges)
+      Ranges.emplace_back(R);
+    return Ranges;
+  }
+  // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
+  std::string NullTerminatedCode = Content.str();
+  SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode);
+  auto &SM = FileSM.get();
+
+  // We track parens and braces to ensure that we don't accidentally try parsing
+  // a method declaration or definition which isn't at the top level or similar
+  // looking expressions (e.g. an @selector() expression).
+  unsigned ParenCount = 0;
+  unsigned BraceCount = 0;
+  unsigned NumArgs = Selector->getNumArgs();
+
+  std::vector<Range> SelectorPieces;
+  auto Tokens = syntax::tokenize(SM.getMainFileID(), SM, LangOpts);
+  unsigned Last = Tokens.size() - 1;
+  for (unsigned Index = 0; Index < Last; ++Index) {
+    const auto &Tok = Tokens[Index];
+
+    if (BraceCount == 0 && ParenCount == 0) {
+      auto PieceCount = SelectorPieces.size();
+      if (PieceCount < NumArgs &&
+          isMatchingSelectorName(Tok, Tokens[Index + 1], SM,
+                                 Selector->getNameForSlot(PieceCount))) {
+        // If 'foo:' instead of ':' (empty selector), we need to skip the ':'
+        // token after the name.
+        if (!Selector->getNameForSlot(PieceCount).empty()) {
+          ++Index;
+        }
+        SelectorPieces.push_back(
+            halfOpenToRange(SM, Tok.range(SM).toCharRange(SM)));
+        continue;
+      }
+      // If we've found all pieces, we still need to try to consume more pieces
+      // as it's possible the selector being renamed is a prefix of this method
+      // name.
+      if (PieceCount >= NumArgs && isSelectorLike(Tok, Tokens[Index + 1])) {
+        ++Index;
+        SelectorPieces.push_back(
+            halfOpenToRange(SM, Tok.range(SM).toCharRange(SM)));
+        continue;
+      }
+    }
+
+    switch (Tok.kind()) {
+    case tok::l_square:
+      if (parseMessageExpression(Tokens, SM, Index + 1, Last, *Selector,
+                                 SelectorPieces)) {
+        Ranges.emplace_back(std::move(SelectorPieces));
+        SelectorPieces.clear();
+      }
+      break;
+    case tok::l_paren:
+      ParenCount++;
+      break;
+    case tok::r_paren:
+      if (ParenCount > 0) {
+        --ParenCount;
+      }
+      break;
+    case tok::r_brace:
+      if (BraceCount > 0) {
+        --BraceCount;
+      }
+      break;
+    case tok::l_brace:
+      // At the top level scope we should only have method defs.
+      if (BraceCount == 0 && ParenCount == 0) {
+        // All the selector pieces in the method def have been found.
+        if (SelectorPieces.size() == NumArgs) {
+          Ranges.emplace_back(std::move(SelectorPieces));
+        }
+        SelectorPieces.clear();
+      }
+      ++BraceCount;
+      break;
+    case tok::semi:
+      // At the top level scope we should only have method decls.
+      if (BraceCount == 0 && ParenCount == 0) {
+        // All the selector pieces in the method decl have been found.
+        if (SelectorPieces.size() == NumArgs) {
+          Ranges.emplace_back(std::move(SelectorPieces));
+        }
+        SelectorPieces.clear();
+      }
+      break;
+    default:
+      break;
+    }
+  }
+  return Ranges;
+}
+
+clangd::Range tokenRangeForLoc(ParsedAST &AST, SourceLocation TokLoc,
+                               const SourceManager &SM,
+                               const LangOptions &LangOpts) {
+  const auto *Token = AST.getTokens().spelledTokenAt(TokLoc);
+  assert(Token && "rename expects spelled tokens");
+  clangd::Range Result;
+  Result.start = sourceLocToPosition(SM, Token->location());
+  Result.end = sourceLocToPosition(SM, Token->endLocation());
   return Result;
 }
 
+// AST-based ObjC method rename, it renames all occurrences in the main file
+// even for selectors which may have multiple tokens.
+llvm::Expected<tooling::Replacements>
+renameObjCMethodWithinFile(ParsedAST &AST, const ObjCMethodDecl *MD,
+                           llvm::StringRef NewName,
+                           std::vector<SourceLocation> SelectorOccurences) {
+  const SourceManager &SM = AST.getSourceManager();
+  auto Code = SM.getBufferData(SM.getMainFileID());
+  auto RenameIdentifier = MD->getSelector().getNameForSlot(0).str();
+  llvm::SmallVector<llvm::StringRef, 8> NewNames;
+  NewName.split(NewNames, ":");
+
+  std::vector<Range> Ranges;
+  const auto &LangOpts = MD->getASTContext().getLangOpts();
+  for (const auto &Loc : SelectorOccurences)
+    Ranges.push_back(tokenRangeForLoc(AST, Loc, SM, LangOpts));
+  auto FilePath = AST.tuPath();
+  auto RenameRanges = collectRenameIdentifierRanges(
+      RenameIdentifier, Code, LangOpts, MD->getSelector());
----------------
ahoppen wrote:

That seems quite error-prone to me. If we have index data that tells us where the selectors appear, we should be using it in my opinion – both for local and global rename.

https://github.com/llvm/llvm-project/pull/76466


More information about the cfe-commits mailing list