[cfe-commits] PATCH: In CorrectTypo, use the cached correction as a starting point instead. (issue 5570046)

rikka at google.com rikka at google.com
Mon Jan 23 12:39:24 PST 2012


Reviewers: kyrtzidis_apple.com,

Description:
Previously, for unqualified lookups, a positive cache hit is used as the
only non-keyword correction and a negative cache hit immediately returns
an empty TypoCorrection. With the new callback objects, this behavior
causes false negatives by not accounting for the fact that callback
objects alter the set of potential/allowed corrections. The new behavior
is to seed the set of corrections with the cached correction (for
positive hits) to estabilishing a baseline edit distance. Negative cache
hits are only stored or used when either no callback object is provided
or when it returns true for a call to ValidateCandidate with an empty
TypoCorrection (i.e. when ValidateCandidate does not seem to be doing
any checking of the TypoCorrection, such as when an instance of the base
callback class is used solely to specify the set of keywords to be
accepted).

Please review this at http://codereview.appspot.com/5570046/

Affected files:
   M include/clang/Parse/Parser.h
   M include/clang/Sema/Sema.h
   M lib/Parse/ParseExpr.cpp
   M lib/Parse/ParseExprCXX.cpp
   M lib/Sema/SemaExpr.cpp
   M lib/Sema/SemaLookup.cpp
   M test/SemaCXX/typo-correction.cpp


-------------- next part --------------
Index: include/clang/Parse/Parser.h
diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h
index ea79b987e1b48d6197f6d7e9685518ae461d06c3..07f39603f00648d7995e8b803c17370f8ff62026 100644
--- a/include/clang/Parse/Parser.h
+++ b/include/clang/Parse/Parser.h
@@ -1260,10 +1260,17 @@ private:
   //===--------------------------------------------------------------------===//
   // C99 6.5: Expressions.
 
-  ExprResult ParseExpression();
+  /// TypeCastState - State whether an expression is or may be a type cast.
+  enum TypeCastState {
+    NotTypeCast = 0,
+    MaybeTypeCast,
+    IsTypeCast
+  };
+
+  ExprResult ParseExpression(TypeCastState isTypeCast = NotTypeCast);
   ExprResult ParseConstantExpression();
   // Expr that doesn't include commas.
-  ExprResult ParseAssignmentExpression();
+  ExprResult ParseAssignmentExpression(TypeCastState isTypeCast = NotTypeCast);
 
   ExprResult ParseExpressionWithLeadingAt(SourceLocation AtLoc);
 
@@ -1274,10 +1281,10 @@ private:
   ExprResult ParseCastExpression(bool isUnaryExpression,
                                  bool isAddressOfOperand,
                                  bool &NotCastExpr,
-                                 bool isTypeCast);
+                                 TypeCastState isTypeCast);
   ExprResult ParseCastExpression(bool isUnaryExpression,
                                  bool isAddressOfOperand = false,
-                                 bool isTypeCast = false);
+                                 TypeCastState isTypeCast = NotTypeCast);
 
   /// Returns true if the next token would start a postfix-expression
   /// suffix.
Index: include/clang/Sema/Sema.h
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index 2055f3bf6ac17a8cdde5fe87b8253452cc426003..238e5ae3e7f404fe6e78dee53f095419e4341f1b 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -2287,7 +2287,8 @@ public:
   SourceRange getExprRange(Expr *E) const;
 
   ExprResult ActOnIdExpression(Scope *S, CXXScopeSpec &SS, UnqualifiedId &Id,
-                               bool HasTrailingLParen, bool IsAddressOfOperand);
+                               bool HasTrailingLParen, bool IsAddressOfOperand,
+                               CorrectionCandidateCallback *CCC = 0);
 
   void DecomposeUnqualifiedId(const UnqualifiedId &Id,
                               TemplateArgumentListInfo &Buffer,
Index: lib/Parse/ParseExpr.cpp
diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp
index bdebb5e6b9492a06859b0383606d1728390fd2e5..c3c5e2611836607e62bdc035740dbc167b9c7d73 100644
--- a/lib/Parse/ParseExpr.cpp
+++ b/lib/Parse/ParseExpr.cpp
@@ -23,6 +23,7 @@
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/ParsedTemplate.h"
+#include "clang/Sema/TypoCorrection.h"
 #include "clang/Basic/PrettyStackTrace.h"
 #include "RAIIObjectsForParser.h"
 #include "llvm/ADT/SmallVector.h"
@@ -174,8 +175,8 @@ static prec::Level getBinOpPrecedence(tok::TokenKind Kind,
 ///       expression: [C99 6.5.17]
 ///         assignment-expression ...[opt]
 ///         expression ',' assignment-expression ...[opt]
-ExprResult Parser::ParseExpression() {
-  ExprResult LHS(ParseAssignmentExpression());
+ExprResult Parser::ParseExpression(TypeCastState isTypeCast) {
+  ExprResult LHS(ParseAssignmentExpression(isTypeCast));
   return ParseRHSOfBinaryExpression(move(LHS), prec::Comma);
 }
 
@@ -211,7 +212,7 @@ Parser::ParseExpressionWithLeadingExtension(SourceLocation ExtLoc) {
 }
 
 /// ParseAssignmentExpression - Parse an expr that doesn't include commas.
-ExprResult Parser::ParseAssignmentExpression() {
+ExprResult Parser::ParseAssignmentExpression(TypeCastState isTypeCast) {
   if (Tok.is(tok::code_completion)) {
     Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Expression);
     cutOffParsing();
@@ -221,7 +222,9 @@ ExprResult Parser::ParseAssignmentExpression() {
   if (Tok.is(tok::kw_throw))
     return ParseThrowExpression();
 
-  ExprResult LHS = ParseCastExpression(/*isUnaryExpression=*/false);
+  ExprResult LHS = ParseCastExpression(/*isUnaryExpression=*/false,
+                                       /*isAddressOfOperand=*/false,
+                                       isTypeCast);
   return ParseRHSOfBinaryExpression(move(LHS), prec::Assignment);
 }
 
@@ -417,7 +420,7 @@ Parser::ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec) {
 ///
 ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
                                        bool isAddressOfOperand,
-                                       bool isTypeCast) {
+                                       TypeCastState isTypeCast) {
   bool NotCastExpr;
   ExprResult Res = ParseCastExpression(isUnaryExpression,
                                        isAddressOfOperand,
@@ -428,6 +431,29 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
   return move(Res);
 }
 
+namespace {
+class CastExpressionIdValidator : public CorrectionCandidateCallback {
+ public:
+  CastExpressionIdValidator(bool AllowTypes, bool AllowNonTypes)
+      : AllowNonTypes(AllowNonTypes) {
+    WantTypeSpecifiers = AllowTypes;
+  }
+
+  virtual bool ValidateCandidate(const TypoCorrection &candidate) {
+    NamedDecl *ND = candidate.getCorrectionDecl();
+    if (!ND)
+      return candidate.isKeyword();
+
+    if (isa<TypeDecl>(ND))
+      return WantTypeSpecifiers;
+    return AllowNonTypes;
+  }
+
+ private:
+  bool AllowNonTypes;
+};
+}
+
 /// ParseCastExpression - Parse a cast-expression, or, if isUnaryExpression is
 /// true, parse a unary-expression. isAddressOfOperand exists because an
 /// id-expression that is the operand of address-of gets special treatment
@@ -592,7 +618,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
 ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
                                        bool isAddressOfOperand,
                                        bool &NotCastExpr,
-                                       bool isTypeCast) {
+                                       TypeCastState isTypeCast) {
   ExprResult Res;
   tok::TokenKind SavedKind = Tok.getKind();
   NotCastExpr = false;
@@ -623,7 +649,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
       ColonProtectionRAIIObject X(*this, false);
 
       Res = ParseParenExpression(ParenExprType, false/*stopIfCastExr*/,
-                                 isTypeCast, CastTy, RParenLoc);
+                                 isTypeCast == IsTypeCast, CastTy, RParenLoc);
     }
 
     switch (ParenExprType) {
@@ -769,9 +795,12 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
     // not.
     UnqualifiedId Name;
     CXXScopeSpec ScopeSpec;
+    CastExpressionIdValidator Validator(isTypeCast != NotTypeCast,
+                                        isTypeCast != IsTypeCast);
     Name.setIdentifier(&II, ILoc);
     Res = Actions.ActOnIdExpression(getCurScope(), ScopeSpec, Name, 
-                                    Tok.is(tok::l_paren), isAddressOfOperand);
+                                    Tok.is(tok::l_paren), isAddressOfOperand,
+                                    &Validator);
     break;
   }
   case tok::char_constant:     // constant: character-constant
@@ -1953,7 +1982,7 @@ Parser::ParseParenExpression(ParenParseOption &ExprType, bool stopIfCastExpr,
         // TODO: For cast expression with CastTy.
         Result = ParseCastExpression(/*isUnaryExpression=*/false,
                                      /*isAddressOfOperand=*/false,
-                                     /*isTypeCast=*/true);
+                                     /*isTypeCast=*/IsTypeCast);
         if (!Result.isInvalid()) {
           Result = Actions.ActOnCastExpr(getCurScope(), OpenLoc,
                                          DeclaratorInfo, CastTy, 
@@ -1980,7 +2009,7 @@ Parser::ParseParenExpression(ParenParseOption &ExprType, bool stopIfCastExpr,
   } else {
     InMessageExpressionRAIIObject InMessage(*this, false);
     
-    Result = ParseExpression();
+    Result = ParseExpression(MaybeTypeCast);
     ExprType = SimpleExpr;
 
     // Don't build a paren expression unless we actually match a ')'.
Index: lib/Parse/ParseExprCXX.cpp
diff --git a/lib/Parse/ParseExprCXX.cpp b/lib/Parse/ParseExprCXX.cpp
index 8046f6b88c7a69d58069562b112d2ec44657e162..cc01d291cde3d85ac75eb708280ea5d35edabaf9 100644
--- a/lib/Parse/ParseExprCXX.cpp
+++ b/lib/Parse/ParseExprCXX.cpp
@@ -2549,7 +2549,7 @@ Parser::ParseCXXAmbiguousParenExpression(ParenParseOption &ExprType,
                                    false/*isAddressofOperand*/,
                                    NotCastExpr,
                                    // type-id has priority.
-                                   true/*isTypeCast*/);
+                                   IsTypeCast);
     }
 
     // If we parsed a cast-expression, it's really a type-id, otherwise it's
Index: lib/Sema/SemaExpr.cpp
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp
index 96496662503c33437e10db34b0c157b6927e1f5d..af3ac8057676d8d38fc27c57fc82f75fbff7b6ef 100644
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -1696,7 +1696,8 @@ ExprResult Sema::ActOnIdExpression(Scope *S,
                                    CXXScopeSpec &SS,
                                    UnqualifiedId &Id,
                                    bool HasTrailingLParen,
-                                   bool IsAddressOfOperand) {
+                                   bool IsAddressOfOperand,
+                                   CorrectionCandidateCallback *CCC) {
   assert(!(IsAddressOfOperand && HasTrailingLParen) &&
          "cannot be direct & operand and have a trailing lparen");
 
@@ -1811,7 +1812,7 @@ ExprResult Sema::ActOnIdExpression(Scope *S,
                                           TemplateArgs);
 
       CorrectionCandidateCallback DefaultValidator;
-      if (DiagnoseEmptyLookup(S, SS, R, DefaultValidator))
+      if (DiagnoseEmptyLookup(S, SS, R, CCC ? *CCC : DefaultValidator))
         return ExprError();
 
       assert(!R.empty() &&
Index: lib/Sema/SemaLookup.cpp
diff --git a/lib/Sema/SemaLookup.cpp b/lib/Sema/SemaLookup.cpp
index dd50cd2a5a25f34ca9226f0178a92303c3bb5f45..23ecb2509030db95d4f80203649a837fb6d96cc1 100644
--- a/lib/Sema/SemaLookup.cpp
+++ b/lib/Sema/SemaLookup.cpp
@@ -3569,6 +3569,11 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
 
   TypoCorrectionConsumer Consumer(*this, Typo);
 
+  // If a callback object returns true for an empty typo correction candidate,
+  // assume it does not do any actual validation of the candidates.
+  TypoCorrection EmptyCorrection;
+  bool ValidatingCallback = CCC && !CCC->ValidateCandidate(EmptyCorrection);
+
   // Perform name lookup to find visible, similarly-named entities.
   bool IsUnqualifiedLookup = false;
   if (MemberContext) {
@@ -3598,41 +3603,46 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
     IsUnqualifiedLookup = true;
     UnqualifiedTyposCorrectedMap::iterator Cached
       = UnqualifiedTyposCorrected.find(Typo);
-    if (Cached == UnqualifiedTyposCorrected.end() ||
-        (Cached->second && CCC && !CCC->ValidateCandidate(Cached->second))) {
+    if (Cached != UnqualifiedTyposCorrected.end()) {
+      // Add the cached value, unless it's a keyword or fails validation. In the
+      // keyword case, we'll end up adding the keyword below.
+      if (Cached->second) {
+        if (!Cached->second.isKeyword() &&
+            (!CCC || CCC->ValidateCandidate(Cached->second)))
+          Consumer.addCorrection(Cached->second);
+      } else {
+        // Only honor no-correction cache hits when a callback that will validate
+        // correction candidates is not being used.
+        if (!ValidatingCallback)
+          return TypoCorrection();
+      }
+    }
+    if (Cached == UnqualifiedTyposCorrected.end()) {
       // Provide a stop gap for files that are just seriously broken.  Trying
       // to correct all typos can turn into a HUGE performance penalty, causing
       // some files to take minutes to get rejected by the parser.
       if (TyposCorrected + UnqualifiedTyposCorrected.size() >= 20)
         return TypoCorrection();
+    }
 
-      // For unqualified lookup, look through all of the names that we have
-      // seen in this translation unit.
-      for (IdentifierTable::iterator I = Context.Idents.begin(),
-                                  IEnd = Context.Idents.end();
-           I != IEnd; ++I)
-        Consumer.FoundName(I->getKey());
-
-      // Walk through identifiers in external identifier sources.
-      if (IdentifierInfoLookup *External
-                              = Context.Idents.getExternalIdentifierLookup()) {
-        llvm::OwningPtr<IdentifierIterator> Iter(External->getIdentifiers());
-        do {
-          StringRef Name = Iter->Next();
-          if (Name.empty())
-            break;
+    // For unqualified lookup, look through all of the names that we have
+    // seen in this translation unit.
+    for (IdentifierTable::iterator I = Context.Idents.begin(),
+                                IEnd = Context.Idents.end();
+         I != IEnd; ++I)
+      Consumer.FoundName(I->getKey());
 
-          Consumer.FoundName(Name);
-        } while (true);
-      }
-    } else {
-      // Use the cached value, unless it's a keyword. In the keyword case, we'll
-      // end up adding the keyword below.
-      if (!Cached->second)
-        return TypoCorrection();
+    // Walk through identifiers in external identifier sources.
+    if (IdentifierInfoLookup *External
+                            = Context.Idents.getExternalIdentifierLookup()) {
+      llvm::OwningPtr<IdentifierIterator> Iter(External->getIdentifiers());
+      do {
+        StringRef Name = Iter->Next();
+        if (Name.empty())
+          break;
 
-      if (!Cached->second.isKeyword())
-        Consumer.addCorrection(Cached->second);
+        Consumer.FoundName(Name);
+      } while (true);
     }
   }
 
@@ -3813,8 +3823,10 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
   ED = Consumer.begin()->first;
 
   if (ED > 0 && Typo->getName().size() / ED < 3) {
-    // If this was an unqualified lookup, note that no correction was found.
-    if (IsUnqualifiedLookup)
+    // If this was an unqualified lookup and we believe the callback
+    // object wouldn't have filtered out possible corrections, note
+    // that no correction was found.
+    if (IsUnqualifiedLookup && !ValidatingCallback)
       (void)UnqualifiedTyposCorrected[Typo];
 
     return TypoCorrection();
@@ -3872,7 +3884,9 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
     return BestResults["super"];
   }
 
-  if (IsUnqualifiedLookup)
+  // If this was an unqualified lookup and we believe the callback object did
+  // not filter out possible corrections, note that no correction was found.
+  if (IsUnqualifiedLookup && !ValidatingCallback)
     (void)UnqualifiedTyposCorrected[Typo];
 
   return TypoCorrection();
Index: test/SemaCXX/typo-correction.cpp
diff --git a/test/SemaCXX/typo-correction.cpp b/test/SemaCXX/typo-correction.cpp
index edf4c6460a7912c30397ba2060a67449c58c77c8..1fd10ac75987a4a08142d2465299387c33624fe1 100644
--- a/test/SemaCXX/typo-correction.cpp
+++ b/test/SemaCXX/typo-correction.cpp
@@ -85,11 +85,26 @@ template<typename ...TypeNames> struct count { // expected-note{{parameter pack
 
 // Test the typo-correction callback in Sema::DiagnoseUnknownTypeName.
 namespace unknown_type_test {
-  class StreamOut {}; // expected-note{{'StreamOut' declared here}}
-  long stream_count;
+  class StreamOut {}; // expected-note 2 {{'StreamOut' declared here}}
+  long stream_count; // expected-note 2 {{'stream_count' declared here}}
 };
 unknown_type_test::stream_out out; // expected-error{{no type named 'stream_out' in namespace 'unknown_type_test'; did you mean 'StreamOut'?}}
 
+// Demonstrate a case where using only the cached value returns the wrong thing
+// when the cached value was the result of a previous callback object that only
+// accepts a subset of the current callback object.
+namespace {
+using namespace unknown_type_test;
+void bar(long i);
+void before_caching_classname() {
+  bar((stream_out)); // expected-error{{use of undeclared identifier 'stream_out'; did you mean 'stream_count'?}}
+}
+stream_out out; // expected-error{{unknown type name 'stream_out'; did you mean 'StreamOut'?}}
+void after_caching_classname() {
+  bar((stream_out)); // expected-error{{use of undeclared identifier 'stream_out'; did you mean 'stream_count'?}}
+}
+}
+
 // Test the typo-correction callback in Sema::DiagnoseInvalidRedeclaration.
 struct BaseDecl {
   void add_in(int i);
@@ -98,3 +113,15 @@ struct TestRedecl : public BaseDecl {
   void add_it(int i); // expected-note{{'add_it' declared here}}
 };
 void TestRedecl::add_in(int i) {} // expected-error{{out-of-line definition of 'add_in' does not match any declaration in 'TestRedecl'; did you mean 'add_it'?}}
+
+// Test the improved typo correction for the Parser::ParseCastExpr =>
+// Sema::ActOnIdExpression => Sema::DiagnoseEmptyLookup call path.
+class SomeNetMessage;
+class Message {};
+void foo(Message&);
+void foo(SomeNetMessage&);
+void doit(void *data) {
+  Message somenetmsg; // expected-note{{'somenetmsg' declared here}}
+  foo(somenetmessage); // expected-error{{use of undeclared identifier 'somenetmessage'; did you mean 'somenetmsg'?}}
+  foo((somenetmessage)data); // expected-error{{use of undeclared identifier 'somenetmessage'; did you mean 'SomeNetMessage'?}}
+}


More information about the cfe-commits mailing list