[clang-tools-extra] Add support for renaming objc methods, even those with multiple selector pieces (PR #76466)

David Goldman via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 7 08:32:06 PST 2024


================
@@ -538,11 +565,254 @@ std::optional<InvalidName> checkName(const NamedDecl &RenameDecl,
             Conflict->getLocation().printToString(ASTCtx.getSourceManager())};
     }
   }
-  if (Result)
+  if (Result) {
     InvalidNameMetric.record(1, toString(Result->K));
+    return makeError(*Result);
+  }
+  return std::nullopt;
+}
+
+bool isMatchingSelectorName(const syntax::Token &Cur, const syntax::Token &Next,
+                            const SourceManager &SM,
+                            llvm::StringRef SelectorName) {
+  if (SelectorName.empty())
+    return Cur.kind() == tok::colon;
+  return Cur.kind() == tok::identifier && Next.kind() == tok::colon &&
+         Cur.text(SM) == SelectorName &&
+         // We require the selector name and : to be contiguous to avoid
+         // potential conflicts with ternary expression.
+         //
+         // e.g. support `foo:` but not `foo :`.
+         Cur.endLocation() == Next.location();
+}
+
+bool isSelectorLike(const syntax::Token &Cur, const syntax::Token &Next) {
+  return Cur.kind() == tok::identifier && Next.kind() == tok::colon &&
+         // We require the selector name and : to be contiguous.
+         // e.g. support `foo:` but not `foo :`.
+         Cur.endLocation() == Next.location();
+}
+
+bool parseMessageExpression(llvm::ArrayRef<syntax::Token> Tokens,
+                            const SourceManager &SM, unsigned Index,
+                            unsigned Last, Selector Sel,
+                            std::vector<Range> &SelectorPieces) {
+
+  unsigned NumArgs = Sel.getNumArgs();
+  llvm::SmallVector<char, 8> Closes;
+  SelectorPieces.clear();
+  while (Index < Last) {
+    const auto &Tok = Tokens[Index];
+
+    if (Closes.empty()) {
+      auto PieceCount = SelectorPieces.size();
+      if (PieceCount < NumArgs &&
+          isMatchingSelectorName(Tok, Tokens[Index + 1], SM,
+                                 Sel.getNameForSlot(PieceCount))) {
+        // If 'foo:' instead of ':' (empty selector), we need to skip the ':'
+        // token after the name.
+        if (!Sel.getNameForSlot(PieceCount).empty()) {
+          ++Index;
+        }
+        SelectorPieces.push_back(
+            halfOpenToRange(SM, Tok.range(SM).toCharRange(SM)));
+        continue;
+      }
+      // If we've found all pieces but the current token looks like another
+      // selector piece, it means the method being renamed is a strict prefix of
+      // the selector we've found - should be skipped.
+      if (SelectorPieces.size() >= NumArgs &&
+          isSelectorLike(Tok, Tokens[Index + 1]))
+        return false;
+    }
+
+    switch (Tok.kind()) {
+    case tok::l_square:
+      Closes.push_back(']');
+      break;
+    case tok::l_paren:
+      Closes.push_back(')');
+      break;
+    case tok::l_brace:
+      Closes.push_back('}');
+      break;
+    case tok::r_square:
+      if (Closes.empty())
+        return SelectorPieces.size() == NumArgs;
+
+      if (Closes.back() != ']')
+        return false;
+      Closes.pop_back();
+      break;
+    case tok::r_paren:
+      if (Closes.empty() || Closes.back() != ')')
+        return false;
+      Closes.pop_back();
+      break;
+    case tok::r_brace:
+      if (Closes.empty() || Closes.back() != '}')
+        return false;
+      Closes.pop_back();
+      break;
+    case tok::semi:
+      // top level ; ends all statements.
+      if (Closes.empty())
+        return false;
+      break;
+    default:
+      break;
+    }
+
+    ++Index;
+  }
+  return false;
+}
+
+/// Collects all ranges of the given identifier/selector in the source code.
+///
+/// If a selector is given, this does a full lex of the given source code in
+/// order to identify all selector fragments (e.g. in method exprs/decls) since
+/// they are non-contiguous.
+std::vector<SymbolRange> collectRenameIdentifierRanges(
+    llvm::StringRef Identifier, llvm::StringRef Content,
+    const LangOptions &LangOpts, std::optional<Selector> Selector) {
+  std::vector<SymbolRange> Ranges;
+  if (!Selector) {
+    auto IdentifierRanges =
+        collectIdentifierRanges(Identifier, Content, LangOpts);
+    for (const auto &R : IdentifierRanges)
+      Ranges.emplace_back(R);
+    return Ranges;
+  }
+  // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
+  std::string NullTerminatedCode = Content.str();
+  SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode);
+  auto &SM = FileSM.get();
+
+  // We track parens and braces to ensure that we don't accidentally try parsing
+  // a method declaration or definition which isn't at the top level or similar
+  // looking expressions (e.g. an @selector() expression).
+  unsigned ParenCount = 0;
+  unsigned BraceCount = 0;
+  unsigned NumArgs = Selector->getNumArgs();
+
+  std::vector<Range> SelectorPieces;
+  auto Tokens = syntax::tokenize(SM.getMainFileID(), SM, LangOpts);
+  unsigned Last = Tokens.size() - 1;
+  for (unsigned Index = 0; Index < Last; ++Index) {
+    const auto &Tok = Tokens[Index];
+
----------------
DavidGoldman wrote:

Done. This lexer logic runs independent of those positions and instead clangd checks afterwards that the lexed positions line up with the index positions, allowing for each one either the line or column # to have changed (e.g. in case the index data is slightly stale).

https://github.com/llvm/llvm-project/pull/76466


More information about the cfe-commits mailing list