[cfe-commits] r150495 - in /cfe/trunk: include/clang/Sema/TypoCorrection.h lib/Sema/SemaLookup.cpp test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp

Kaelyn Uhrain rikka at google.com
Tue Feb 14 10:56:48 PST 2012


Author: rikka
Date: Tue Feb 14 12:56:48 2012
New Revision: 150495

URL: http://llvm.org/viewvc/llvm-project?rev=150495&view=rev
Log:
Use several weighted factors to determine typo candidate viablity.

Replace the simple Levenshtein edit distance for typo correction
candidates--and the hacky way adding namespace qualifiers would affect
the edit distance--with a synthetic "edit distance" comprised of several
factors and their relative weights. This also allows the typo correction
callback object to convey more information about the viability of a
correction candidate than simply viable or not viable.

Modified:
    cfe/trunk/include/clang/Sema/TypoCorrection.h
    cfe/trunk/lib/Sema/SemaLookup.cpp
    cfe/trunk/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp

Modified: cfe/trunk/include/clang/Sema/TypoCorrection.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/TypoCorrection.h?rev=150495&r1=150494&r2=150495&view=diff
==============================================================================
--- cfe/trunk/include/clang/Sema/TypoCorrection.h (original)
+++ cfe/trunk/include/clang/Sema/TypoCorrection.h Tue Feb 14 12:56:48 2012
@@ -23,32 +23,46 @@
 /// @brief Simple class containing the result of Sema::CorrectTypo
 class TypoCorrection {
 public:
+  // "Distance" for unusable corrections
+  static const unsigned InvalidDistance = ~0U;
+  // The largest distance still considered valid (larger edit distances are
+  // mapped to InvalidDistance by getEditDistance).
+  static const unsigned MaximumDistance = 10000U;
+
+  // Relative weightings of the "edit distance" components. The higher the
+  // weight, the more of a penalty to fitness the component will give (higher
+  // weights mean greater contribution to the total edit distance, with the
+  // best correction candidates having the lowest edit distance).
+  static const unsigned CharDistanceWeight = 100U;
+  static const unsigned QualifierDistanceWeight = 110U;
+  static const unsigned CallbackDistanceWeight = 150U;
+
   TypoCorrection(const DeclarationName &Name, NamedDecl *NameDecl,
-                 NestedNameSpecifier *NNS=0, unsigned distance=0)
-      : CorrectionName(Name),
-        CorrectionNameSpec(NNS),
-        EditDistance(distance) {
+                 NestedNameSpecifier *NNS=0, unsigned CharDistance=0,
+                 unsigned QualifierDistance=0)
+      : CorrectionName(Name), CorrectionNameSpec(NNS),
+      CharDistance(CharDistance), QualifierDistance(QualifierDistance),
+      CallbackDistance(0) {
     if (NameDecl)
       CorrectionDecls.push_back(NameDecl);
   }
 
   TypoCorrection(NamedDecl *Name, NestedNameSpecifier *NNS=0,
-                 unsigned distance=0)
-      : CorrectionName(Name->getDeclName()),
-        CorrectionNameSpec(NNS),
-        EditDistance(distance) {
+                 unsigned CharDistance=0)
+      : CorrectionName(Name->getDeclName()), CorrectionNameSpec(NNS),
+      CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) {
     if (Name)
       CorrectionDecls.push_back(Name);
   }
 
   TypoCorrection(DeclarationName Name, NestedNameSpecifier *NNS=0,
-                 unsigned distance=0)
-      : CorrectionName(Name),
-        CorrectionNameSpec(NNS),
-        EditDistance(distance) {}
+                 unsigned CharDistance=0)
+      : CorrectionName(Name), CorrectionNameSpec(NNS),
+      CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) {}
 
   TypoCorrection()
-      : CorrectionNameSpec(0), EditDistance(0) {}
+      : CorrectionNameSpec(0), CharDistance(0), QualifierDistance(0),
+      CallbackDistance(0) {}
 
   /// \brief Gets the DeclarationName of the typo correction
   DeclarationName getCorrection() const { return CorrectionName; }
@@ -64,8 +78,41 @@
     CorrectionNameSpec = NNS;
   }
 
-  /// \brief Gets the "edit distance" of the typo correction from the typo
-  unsigned getEditDistance() const { return EditDistance; }
+  void setQualifierDistance(unsigned ED) {
+    QualifierDistance = ED;
+  }
+
+  void setCallbackDistance(unsigned ED) {
+    CallbackDistance = ED;
+  }
+
+  // Convert the given weighted edit distance to a roughly equivalent number of
+  // single-character edits (typically for comparison to the length of the
+  // string being edited).
+  static unsigned NormalizeEditDistance(unsigned ED) {
+    if (ED > MaximumDistance)
+      return InvalidDistance;
+    return (ED + CharDistanceWeight / 2) / CharDistanceWeight;
+  }
+
+  /// \brief Gets the "edit distance" of the typo correction from the typo.
+  /// If Normalized is true, scale the distance down by the CharDistanceWeight
+  /// to return the edit distance in terms of single-character edits.
+  unsigned getEditDistance(bool Normalized = true) const {
+    if (CharDistance > MaximumDistance || QualifierDistance > MaximumDistance ||
+        CallbackDistance > MaximumDistance)
+      return InvalidDistance;
+    unsigned ED =
+        CharDistance * CharDistanceWeight +
+        QualifierDistance * QualifierDistanceWeight +
+        CallbackDistance * CallbackDistanceWeight;
+    if (ED > MaximumDistance)
+      return InvalidDistance;
+    // Half the CharDistanceWeight is added to ED to simulate rounding since
+    // integer division truncates the value (i.e. round-to-nearest-int instead
+    // of round-to-zero).
+    return Normalized ? NormalizeEditDistance(ED) : ED;
+  }
 
   /// \brief Gets the pointer to the declaration of the typo correction
   NamedDecl* getCorrectionDecl() const {
@@ -143,13 +190,17 @@
   DeclarationName CorrectionName;
   NestedNameSpecifier *CorrectionNameSpec;
   llvm::SmallVector<NamedDecl*, 1> CorrectionDecls;
-  unsigned EditDistance;
+  unsigned CharDistance;
+  unsigned QualifierDistance;
+  unsigned CallbackDistance;
 };
 
 /// @brief Base class for callback objects used by Sema::CorrectTypo to check
 /// the validity of a potential typo correction.
 class CorrectionCandidateCallback {
  public:
+  static const unsigned InvalidDistance = TypoCorrection::InvalidDistance;
+
   CorrectionCandidateCallback()
       : WantTypeSpecifiers(true), WantExpressionKeywords(true),
         WantCXXNamedCasts(true), WantRemainingKeywords(true),
@@ -158,10 +209,26 @@
 
   virtual ~CorrectionCandidateCallback() {}
 
+  /// \brief Simple predicate used by the default RankCandidate to
+  /// determine whether to return an edit distance of 0 or InvalidDistance.
+  /// This can be overrided by validators that only need to determine if a
+  /// candidate is viable, without ranking potentially viable candidates.
+  /// Only ValidateCandidate or RankCandidate need to be overriden by a
+  /// callback wishing to check the viability of correction candidates.
   virtual bool ValidateCandidate(const TypoCorrection &candidate) {
     return true;
   }
 
+  /// \brief Method used by Sema::CorrectTypo to assign an "edit distance" rank
+  /// to a candidate (where a lower value represents a better candidate), or
+  /// returning InvalidDistance if the candidate is not at all viable. For
+  /// validation callbacks that only need to determine if a candidate is viable,
+  /// the default RankCandidate returns either 0 or InvalidDistance depending
+  /// whether ValidateCandidate returns true or false.
+  virtual unsigned RankCandidate(const TypoCorrection &candidate) {
+    return ValidateCandidate(candidate) ? 0 : InvalidDistance;
+  }
+
   // Flags for context-dependent keywords.
   // TODO: Expand these to apply to non-keywords or possibly remove them.
   bool WantTypeSpecifiers;

Modified: cfe/trunk/lib/Sema/SemaLookup.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaLookup.cpp?rev=150495&r1=150494&r2=150495&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaLookup.cpp (original)
+++ cfe/trunk/lib/Sema/SemaLookup.cpp Tue Feb 14 12:56:48 2012
@@ -3109,9 +3109,12 @@
     return (*BestResults.begin()->second)[Name];
   }
 
-  unsigned getBestEditDistance() {
-    return (BestResults.empty()) ?
-        (std::numeric_limits<unsigned>::max)() : BestResults.begin()->first;
+  unsigned getBestEditDistance(bool Normalized) {
+    if (BestResults.empty())
+      return (std::numeric_limits<unsigned>::max)();
+
+    unsigned BestED = BestResults.begin()->first;
+    return Normalized ? TypoCorrection::NormalizeEditDistance(BestED) : BestED;
   }
 };
 
@@ -3167,7 +3170,7 @@
 
 void TypoCorrectionConsumer::addCorrection(TypoCorrection Correction) {
   StringRef Name = Correction.getCorrectionAsIdentifierInfo()->getName();
-  TypoResultsMap *& Map = BestResults[Correction.getEditDistance()];
+  TypoResultsMap *& Map = BestResults[Correction.getEditDistance(false)];
   if (!Map)
     Map = new TypoResultsMap;
 
@@ -3478,6 +3481,12 @@
   }
 }
 
+static bool isCandidateViable(CorrectionCandidateCallback &CCC,
+                              TypoCorrection &Candidate) {
+  Candidate.setCallbackDistance(CCC.RankCandidate(Candidate));
+  return Candidate.getEditDistance(false) != TypoCorrection::InvalidDistance;
+}
+
 /// \brief Try to "correct" a typo in the source code by finding
 /// visible declarations whose names are similar to the name that was
 /// present in the source code.
@@ -3545,10 +3554,10 @@
 
   TypoCorrectionConsumer Consumer(*this, Typo);
 
-  // If a callback object returns true for an empty typo correction candidate,
-  // assume it does not do any actual validation of the candidates.
+  // If a callback object considers an empty typo correction candidate to be
+  // viable, assume it does not do any actual validation of the candidates.
   TypoCorrection EmptyCorrection;
-  bool ValidatingCallback = !CCC.ValidateCandidate(EmptyCorrection);
+  bool ValidatingCallback = !isCandidateViable(CCC, EmptyCorrection);
 
   // Perform name lookup to find visible, similarly-named entities.
   bool IsUnqualifiedLookup = false;
@@ -3584,7 +3593,7 @@
       // keyword case, we'll end up adding the keyword below.
       if (Cached->second) {
         if (!Cached->second.isKeyword() &&
-            CCC.ValidateCandidate(Cached->second))
+            isCandidateViable(CCC, Cached->second))
           Consumer.addCorrection(Cached->second);
       } else {
         // Only honor no-correction cache hits when a callback that will validate
@@ -3637,7 +3646,7 @@
 
   // Make sure that the user typed at least 3 characters for each correction
   // made. Otherwise, we don't even both looking at the results.
-  unsigned ED = Consumer.getBestEditDistance();
+  unsigned ED = Consumer.getBestEditDistance(true);
   if (ED > 0 && Typo->getName().size() / ED < 3) {
     // If this was an unqualified lookup, note that no correction was found.
     if (IsUnqualifiedLookup)
@@ -3666,7 +3675,7 @@
 
   // Weed out any names that could not be found by name lookup or, if a
   // CorrectionCandidateCallback object was provided, failed validation.
-  llvm::SmallPtrSet<IdentifierInfo*, 16> QualifiedResults;
+  llvm::SmallVector<TypoCorrection, 16> QualifiedResults;
   LookupResult TmpRes(*this, TypoName, LookupKind);
   TmpRes.suppressDiagnostics();
   while (!Consumer.empty()) {
@@ -3681,7 +3690,7 @@
       if (I->second.isResolved()) {
         TypoCorrectionConsumer::result_iterator Prev = I;
         ++I;
-        if (!CCC.ValidateCandidate(Prev->second))
+        if (!isCandidateViable(CCC, Prev->second))
           DI->second->erase(Prev);
         continue;
       }
@@ -3695,7 +3704,7 @@
       case LookupResult::NotFound:
       case LookupResult::NotFoundInCurrentInstantiation:
       case LookupResult::FoundUnresolvedValue:
-        QualifiedResults.insert(Name);
+        QualifiedResults.push_back(I->second);
         // We didn't find this name in our scope, or didn't like what we found;
         // ignore it.
         {
@@ -3718,7 +3727,7 @@
              TRD != TRDEnd; ++TRD)
           I->second.addCorrectionDecl(*TRD);
         ++I;
-        if (!CCC.ValidateCandidate(Prev->second))
+        if (!isCandidateViable(CCC, Prev->second))
           DI->second->erase(Prev);
         break;
       }
@@ -3727,7 +3736,7 @@
         TypoCorrectionConsumer::result_iterator Prev = I;
         I->second.setCorrectionDecl(TmpRes.getAsSingle<NamedDecl>());
         ++I;
-        if (!CCC.ValidateCandidate(Prev->second))
+        if (!isCandidateViable(CCC, Prev->second))
           DI->second->erase(Prev);
         break;
       }
@@ -3744,7 +3753,7 @@
     // Only perform the qualified lookups for C++
     if (getLangOptions().CPlusPlus) {
       TmpRes.suppressDiagnostics();
-      for (llvm::SmallPtrSet<IdentifierInfo*,
+      for (llvm::SmallVector<TypoCorrection,
                              16>::iterator QRI = QualifiedResults.begin(),
                                         QRIEnd = QualifiedResults.end();
            QRI != QRIEnd; ++QRI) {
@@ -3752,33 +3761,33 @@
                                           NIEnd = Namespaces.end();
              NI != NIEnd; ++NI) {
           DeclContext *Ctx = NI->DeclCtx;
-          unsigned QualifiedED = ED + NI->EditDistance;
 
           // FIXME: Stop searching once the namespaces are too far away to create
           // acceptable corrections for this identifier (since the namespaces
           // are sorted in ascending order by edit distance).
 
           TmpRes.clear();
-          TmpRes.setLookupName(*QRI);
+          TmpRes.setLookupName(QRI->getCorrectionAsIdentifierInfo());
           if (!LookupQualifiedName(TmpRes, Ctx)) continue;
 
           // Any corrections added below will be validated in subsequent
           // iterations of the main while() loop over the Consumer's contents.
           switch (TmpRes.getResultKind()) {
           case LookupResult::Found:
-            Consumer.addName((*QRI)->getName(), TmpRes.getAsSingle<NamedDecl>(),
-                             QualifiedED, NI->NameSpecifier);
+            QRI->setCorrectionDecl(TmpRes.getAsSingle<NamedDecl>());
+            QRI->setCorrectionSpecifier(NI->NameSpecifier);
+            QRI->setQualifierDistance(NI->EditDistance);
+            Consumer.addCorrection(*QRI);
             break;
-          case LookupResult::FoundOverloaded: {
-            TypoCorrection corr(&Context.Idents.get((*QRI)->getName()), NULL,
-                                NI->NameSpecifier, QualifiedED);
+          case LookupResult::FoundOverloaded:
+            QRI->setCorrectionSpecifier(NI->NameSpecifier);
+            QRI->setQualifierDistance(NI->EditDistance);
             for (LookupResult::iterator TRD = TmpRes.begin(),
                                      TRDEnd = TmpRes.end();
                  TRD != TRDEnd; ++TRD)
-              corr.addCorrectionDecl(*TRD);
-            Consumer.addCorrection(corr);
+              QRI->addCorrectionDecl(*TRD);
+            Consumer.addCorrection(*QRI);
             break;
-          }
           case LookupResult::NotFound:
           case LookupResult::NotFoundInCurrentInstantiation:
           case LookupResult::Ambiguous:
@@ -3796,7 +3805,7 @@
   if (Consumer.empty()) return TypoCorrection();
 
   TypoResultsMap &BestResults = *Consumer.begin()->second;
-  ED = Consumer.begin()->first;
+  ED = TypoCorrection::NormalizeEditDistance(Consumer.begin()->first);
 
   if (ED > 0 && Typo->getName().size() / ED < 3) {
     // If this was an unqualified lookup and we believe the callback
@@ -3808,22 +3817,6 @@
     return TypoCorrection();
   }
 
-  // If we have multiple possible corrections, eliminate the ones where we
-  // added namespace qualifiers to try to resolve the ambiguity (and to favor
-  // corrections without additional namespace qualifiers)
-  if (getLangOptions().CPlusPlus && BestResults.size() > 1) {
-    TypoCorrectionConsumer::distance_iterator DI = Consumer.begin();
-    for (TypoCorrectionConsumer::result_iterator I = DI->second->begin(),
-                                              IEnd = DI->second->end();
-         I != IEnd; /* Increment in loop. */) {
-      if (I->second.getCorrectionSpecifier() != NULL) {
-        TypoCorrectionConsumer::result_iterator Cur = I;
-        ++I;
-        DI->second->erase(Cur);
-      } else ++I;
-    }
-  }
-
   // If only a single name remains, return that result.
   if (BestResults.size() == 1) {
     const llvm::StringMapEntry<TypoCorrection> &Correction = *(BestResults.begin());

Modified: cfe/trunk/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp?rev=150495&r1=150494&r2=150495&view=diff
==============================================================================
--- cfe/trunk/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp (original)
+++ cfe/trunk/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp Tue Feb 14 12:56:48 2012
@@ -74,3 +74,24 @@
  (void)new llvm::GraphWriter; // expected-error {{expected a type}}
  (void)new llvm::Graphwriter<S>; // expected-error {{no template named 'Graphwriter' in namespace 'llvm'; did you mean 'GraphWriter'?}}
 }
+
+// If namespace prefixes and character edits have the same weight, correcting
+// "fimish" to "N::famish" would have the same edit distance as correcting
+// "fimish" to "Finish". The result would be no correction being suggested
+// unless one of the corrections is given precedence (e.g. by filtering out
+// suggestions with added namespace qualifiers).
+namespace N { void famish(int); }
+void Finish(int); // expected-note {{'Finish' declared here}}
+void Start() {
+  fimish(7); // expected-error {{use of undeclared identifier 'fimish'; did you mean 'Finish'?}}
+}
+
+// But just eliminating the corrections containing added namespace qualifiers
+// won't work if both of the tied corrections have namespace qualifiers added.
+namespace N {
+void someCheck(int); // expected-note {{'N::someCheck' declared here}}
+namespace O { void somechock(int); }
+}
+void confusing() {
+  somechick(7); // expected-error {{use of undeclared identifier 'somechick'; did you mean 'N::someCheck'?}}
+}





More information about the cfe-commits mailing list