r322690 - [Format] Improve ObjC header guessing heuristic
Ben Hamilton via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 17 09:33:08 PST 2018
Author: benhamilton
Date: Wed Jan 17 09:33:08 2018
New Revision: 322690
URL: http://llvm.org/viewvc/llvm-project?rev=322690&view=rev
Log:
[Format] Improve ObjC header guessing heuristic
Summary:
This improves upon the previous Objective-C header guessing heuristic
from rC320479.
Now, we run the lexer on C++ header files and look for Objective-C
keywords and syntax. We also look for Foundation types.
Test Plan: make -j12 FormatTests && ./tools/clang/unittests/Format/FormatTests
Reviewers: jolesiak, krasimir
Reviewed By: jolesiak
Subscribers: klimek, cfe-commits
Differential Revision: https://reviews.llvm.org/D42135
Modified:
cfe/trunk/lib/Format/Format.cpp
cfe/trunk/unittests/Format/FormatTestObjC.cpp
Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=322690&r1=322689&r2=322690&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Wed Jan 17 09:33:08 2018
@@ -32,6 +32,7 @@
#include "clang/Basic/VirtualFileSystem.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Path.h"
@@ -40,6 +41,7 @@
#include <algorithm>
#include <memory>
#include <string>
+#include <unordered_set>
#define DEBUG_TYPE "format-formatter"
@@ -48,6 +50,16 @@ using clang::format::FormatStyle;
LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory)
LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat)
+namespace std {
+// Allow using StringRef in std::unordered_set.
+template <> struct hash<llvm::StringRef> {
+public:
+ size_t operator()(const llvm::StringRef &s) const {
+ return llvm::hash_value(s);
+ }
+};
+} // namespace std
+
namespace llvm {
namespace yaml {
template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
@@ -1400,6 +1412,101 @@ private:
std::set<FormatToken *, FormatTokenLess> DeletedTokens;
};
+class ObjCHeaderStyleGuesser : public TokenAnalyzer {
+public:
+ ObjCHeaderStyleGuesser(const Environment &Env, const FormatStyle &Style)
+ : TokenAnalyzer(Env, Style), IsObjC(false) {}
+
+ std::pair<tooling::Replacements, unsigned>
+ analyze(TokenAnnotator &Annotator,
+ SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) override {
+ assert(Style.Language == FormatStyle::LK_Cpp);
+ IsObjC = guessIsObjC(AnnotatedLines, Tokens.getKeywords());
+ tooling::Replacements Result;
+ return {Result, 0};
+ }
+
+ bool isObjC() { return IsObjC; }
+
+private:
+ static bool guessIsObjC(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ const AdditionalKeywords &Keywords) {
+ static const std::unordered_set<StringRef> FoundationIdentifiers = {
+ "CGFloat",
+ "NSAffineTransform",
+ "NSArray",
+ "NSAttributedString",
+ "NSCache",
+ "NSCharacterSet",
+ "NSCountedSet",
+ "NSData",
+ "NSDataDetector",
+ "NSDecimal",
+ "NSDecimalNumber",
+ "NSDictionary",
+ "NSEdgeInsets",
+ "NSHashTable",
+ "NSIndexPath",
+ "NSIndexSet",
+ "NSInteger",
+ "NSLocale",
+ "NSMapTable",
+ "NSMutableArray",
+ "NSMutableAttributedString",
+ "NSMutableCharacterSet",
+ "NSMutableData",
+ "NSMutableDictionary",
+ "NSMutableIndexSet",
+ "NSMutableOrderedSet",
+ "NSMutableSet",
+ "NSMutableString",
+ "NSNumber",
+ "NSNumberFormatter",
+ "NSOrderedSet",
+ "NSPoint",
+ "NSPointerArray",
+ "NSRange",
+ "NSRect",
+ "NSRegularExpression",
+ "NSSet",
+ "NSSize",
+ "NSString",
+ "NSUInteger",
+ "NSURL",
+ "NSURLComponents",
+ "NSURLQueryItem",
+ "NSUUID",
+ };
+
+ for (auto &Line : AnnotatedLines) {
+ for (FormatToken *FormatTok = Line->First->Next; FormatTok;
+ FormatTok = FormatTok->Next) {
+ if ((FormatTok->Previous->is(tok::at) &&
+ (FormatTok->isObjCAtKeyword(tok::objc_interface) ||
+ FormatTok->isObjCAtKeyword(tok::objc_implementation) ||
+ FormatTok->isObjCAtKeyword(tok::objc_protocol) ||
+ FormatTok->isObjCAtKeyword(tok::objc_end) ||
+ FormatTok->isOneOf(tok::numeric_constant, tok::l_square,
+ tok::l_brace))) ||
+ (FormatTok->Tok.isAnyIdentifier() &&
+ FoundationIdentifiers.find(FormatTok->TokenText) !=
+ FoundationIdentifiers.end()) ||
+ FormatTok->is(TT_ObjCStringLiteral) ||
+ FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
+ TT_ObjCBlockLBrace, TT_ObjCBlockLParen,
+ TT_ObjCDecl, TT_ObjCForIn, TT_ObjCMethodExpr,
+ TT_ObjCMethodSpecifier, TT_ObjCProperty)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ bool IsObjC;
+};
+
struct IncludeDirective {
StringRef Filename;
StringRef Text;
@@ -2185,14 +2292,15 @@ llvm::Expected<FormatStyle> getStyle(Str
FormatStyle Style = getLLVMStyle();
Style.Language = getLanguageByFileName(FileName);
- // This is a very crude detection of whether a header contains ObjC code that
- // should be improved over time and probably be done on tokens, not one the
- // bare content of the file.
- if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") &&
- (Code.contains("\n- (") || Code.contains("\n+ (") ||
- Code.contains("\n at end\n") || Code.contains("\n at end ") ||
- Code.endswith("@end")))
- Style.Language = FormatStyle::LK_ObjC;
+ if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h")) {
+ std::unique_ptr<Environment> Env =
+ Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
+ ObjCHeaderStyleGuesser Guesser(*Env, Style);
+ Guesser.process();
+ if (Guesser.isObjC()) {
+ Style.Language = FormatStyle::LK_ObjC;
+ }
+ }
FormatStyle FallbackStyle = getNoStyle();
if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle))
Modified: cfe/trunk/unittests/Format/FormatTestObjC.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTestObjC.cpp?rev=322690&r1=322689&r2=322690&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTestObjC.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTestObjC.cpp Wed Jan 17 09:33:08 2018
@@ -94,6 +94,66 @@ TEST(FormatTestObjCStyle, DetectsObjCInH
Style = getStyle("LLVM", "a.h", "none", "void f() {}");
ASSERT_TRUE((bool)Style);
EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language);
+
+ Style = getStyle("{}", "a.h", "none", "@interface Foo\n at end\n");
+ ASSERT_TRUE((bool)Style);
+ EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+ Style = getStyle("{}", "a.h", "none",
+ "const int interface = 1;\nconst int end = 2;\n");
+ ASSERT_TRUE((bool)Style);
+ EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language);
+
+ Style = getStyle("{}", "a.h", "none", "@protocol Foo\n at end\n");
+ ASSERT_TRUE((bool)Style);
+ EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+ Style = getStyle("{}", "a.h", "none",
+ "const int protocol = 1;\nconst int end = 2;\n");
+ ASSERT_TRUE((bool)Style);
+ EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language);
+
+ Style = getStyle("{}", "a.h", "none", "extern NSString *kFoo;\n");
+ ASSERT_TRUE((bool)Style);
+ EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+ Style =
+ getStyle("{}", "a.h", "none", "typedef NS_ENUM(NSInteger, Foo) {};\n");
+ ASSERT_TRUE((bool)Style);
+ EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+ Style = getStyle("{}", "a.h", "none", "enum Foo {};");
+ ASSERT_TRUE((bool)Style);
+ EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language);
+
+ Style = getStyle("{}", "a.h", "none", "extern NSInteger Foo();\n");
+ ASSERT_TRUE((bool)Style);
+ EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+ Style =
+ getStyle("{}", "a.h", "none", "inline void Foo() { Log(@\"Foo\"); }\n");
+ ASSERT_TRUE((bool)Style);
+ EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+ Style =
+ getStyle("{}", "a.h", "none", "inline void Foo() { Log(\"Foo\"); }\n");
+ ASSERT_TRUE((bool)Style);
+ EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language);
+
+ Style =
+ getStyle("{}", "a.h", "none", "inline void Foo() { id = @[1, 2, 3]; }\n");
+ ASSERT_TRUE((bool)Style);
+ EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+ Style = getStyle("{}", "a.h", "none",
+ "inline void Foo() { id foo = @{1: 2, 3: 4, 5: 6}; }\n");
+ ASSERT_TRUE((bool)Style);
+ EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+ Style = getStyle("{}", "a.h", "none",
+ "inline void Foo() { int foo[] = {1, 2, 3}; }\n");
+ ASSERT_TRUE((bool)Style);
+ EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language);
}
TEST_F(FormatTestObjC, FormatObjCTryCatch) {
More information about the cfe-commits
mailing list