[cfe-commits] r150495 - in /cfe/trunk: include/clang/Sema/TypoCorrection.h lib/Sema/SemaLookup.cpp test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp
Kaelyn Uhrain
rikka at google.com
Tue Feb 14 10:56:48 PST 2012
Author: rikka
Date: Tue Feb 14 12:56:48 2012
New Revision: 150495
URL: http://llvm.org/viewvc/llvm-project?rev=150495&view=rev
Log:
Use several weighted factors to determine typo candidate viablity.
Replace the simple Levenshtein edit distance for typo correction
candidates--and the hacky way adding namespace qualifiers would affect
the edit distance--with a synthetic "edit distance" comprised of several
factors and their relative weights. This also allows the typo correction
callback object to convey more information about the viability of a
correction candidate than simply viable or not viable.
Modified:
cfe/trunk/include/clang/Sema/TypoCorrection.h
cfe/trunk/lib/Sema/SemaLookup.cpp
cfe/trunk/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp
Modified: cfe/trunk/include/clang/Sema/TypoCorrection.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/TypoCorrection.h?rev=150495&r1=150494&r2=150495&view=diff
==============================================================================
--- cfe/trunk/include/clang/Sema/TypoCorrection.h (original)
+++ cfe/trunk/include/clang/Sema/TypoCorrection.h Tue Feb 14 12:56:48 2012
@@ -23,32 +23,46 @@
/// @brief Simple class containing the result of Sema::CorrectTypo
class TypoCorrection {
public:
+ // "Distance" for unusable corrections
+ static const unsigned InvalidDistance = ~0U;
+ // The largest distance still considered valid (larger edit distances are
+ // mapped to InvalidDistance by getEditDistance).
+ static const unsigned MaximumDistance = 10000U;
+
+ // Relative weightings of the "edit distance" components. The higher the
+ // weight, the more of a penalty to fitness the component will give (higher
+ // weights mean greater contribution to the total edit distance, with the
+ // best correction candidates having the lowest edit distance).
+ static const unsigned CharDistanceWeight = 100U;
+ static const unsigned QualifierDistanceWeight = 110U;
+ static const unsigned CallbackDistanceWeight = 150U;
+
TypoCorrection(const DeclarationName &Name, NamedDecl *NameDecl,
- NestedNameSpecifier *NNS=0, unsigned distance=0)
- : CorrectionName(Name),
- CorrectionNameSpec(NNS),
- EditDistance(distance) {
+ NestedNameSpecifier *NNS=0, unsigned CharDistance=0,
+ unsigned QualifierDistance=0)
+ : CorrectionName(Name), CorrectionNameSpec(NNS),
+ CharDistance(CharDistance), QualifierDistance(QualifierDistance),
+ CallbackDistance(0) {
if (NameDecl)
CorrectionDecls.push_back(NameDecl);
}
TypoCorrection(NamedDecl *Name, NestedNameSpecifier *NNS=0,
- unsigned distance=0)
- : CorrectionName(Name->getDeclName()),
- CorrectionNameSpec(NNS),
- EditDistance(distance) {
+ unsigned CharDistance=0)
+ : CorrectionName(Name->getDeclName()), CorrectionNameSpec(NNS),
+ CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) {
if (Name)
CorrectionDecls.push_back(Name);
}
TypoCorrection(DeclarationName Name, NestedNameSpecifier *NNS=0,
- unsigned distance=0)
- : CorrectionName(Name),
- CorrectionNameSpec(NNS),
- EditDistance(distance) {}
+ unsigned CharDistance=0)
+ : CorrectionName(Name), CorrectionNameSpec(NNS),
+ CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) {}
TypoCorrection()
- : CorrectionNameSpec(0), EditDistance(0) {}
+ : CorrectionNameSpec(0), CharDistance(0), QualifierDistance(0),
+ CallbackDistance(0) {}
/// \brief Gets the DeclarationName of the typo correction
DeclarationName getCorrection() const { return CorrectionName; }
@@ -64,8 +78,41 @@
CorrectionNameSpec = NNS;
}
- /// \brief Gets the "edit distance" of the typo correction from the typo
- unsigned getEditDistance() const { return EditDistance; }
+ void setQualifierDistance(unsigned ED) {
+ QualifierDistance = ED;
+ }
+
+ void setCallbackDistance(unsigned ED) {
+ CallbackDistance = ED;
+ }
+
+ // Convert the given weighted edit distance to a roughly equivalent number of
+ // single-character edits (typically for comparison to the length of the
+ // string being edited).
+ static unsigned NormalizeEditDistance(unsigned ED) {
+ if (ED > MaximumDistance)
+ return InvalidDistance;
+ return (ED + CharDistanceWeight / 2) / CharDistanceWeight;
+ }
+
+ /// \brief Gets the "edit distance" of the typo correction from the typo.
+ /// If Normalized is true, scale the distance down by the CharDistanceWeight
+ /// to return the edit distance in terms of single-character edits.
+ unsigned getEditDistance(bool Normalized = true) const {
+ if (CharDistance > MaximumDistance || QualifierDistance > MaximumDistance ||
+ CallbackDistance > MaximumDistance)
+ return InvalidDistance;
+ unsigned ED =
+ CharDistance * CharDistanceWeight +
+ QualifierDistance * QualifierDistanceWeight +
+ CallbackDistance * CallbackDistanceWeight;
+ if (ED > MaximumDistance)
+ return InvalidDistance;
+ // Half the CharDistanceWeight is added to ED to simulate rounding since
+ // integer division truncates the value (i.e. round-to-nearest-int instead
+ // of round-to-zero).
+ return Normalized ? NormalizeEditDistance(ED) : ED;
+ }
/// \brief Gets the pointer to the declaration of the typo correction
NamedDecl* getCorrectionDecl() const {
@@ -143,13 +190,17 @@
DeclarationName CorrectionName;
NestedNameSpecifier *CorrectionNameSpec;
llvm::SmallVector<NamedDecl*, 1> CorrectionDecls;
- unsigned EditDistance;
+ unsigned CharDistance;
+ unsigned QualifierDistance;
+ unsigned CallbackDistance;
};
/// @brief Base class for callback objects used by Sema::CorrectTypo to check
/// the validity of a potential typo correction.
class CorrectionCandidateCallback {
public:
+ static const unsigned InvalidDistance = TypoCorrection::InvalidDistance;
+
CorrectionCandidateCallback()
: WantTypeSpecifiers(true), WantExpressionKeywords(true),
WantCXXNamedCasts(true), WantRemainingKeywords(true),
@@ -158,10 +209,26 @@
virtual ~CorrectionCandidateCallback() {}
+ /// \brief Simple predicate used by the default RankCandidate to
+ /// determine whether to return an edit distance of 0 or InvalidDistance.
+ /// This can be overrided by validators that only need to determine if a
+ /// candidate is viable, without ranking potentially viable candidates.
+ /// Only ValidateCandidate or RankCandidate need to be overriden by a
+ /// callback wishing to check the viability of correction candidates.
virtual bool ValidateCandidate(const TypoCorrection &candidate) {
return true;
}
+ /// \brief Method used by Sema::CorrectTypo to assign an "edit distance" rank
+ /// to a candidate (where a lower value represents a better candidate), or
+ /// returning InvalidDistance if the candidate is not at all viable. For
+ /// validation callbacks that only need to determine if a candidate is viable,
+ /// the default RankCandidate returns either 0 or InvalidDistance depending
+ /// whether ValidateCandidate returns true or false.
+ virtual unsigned RankCandidate(const TypoCorrection &candidate) {
+ return ValidateCandidate(candidate) ? 0 : InvalidDistance;
+ }
+
// Flags for context-dependent keywords.
// TODO: Expand these to apply to non-keywords or possibly remove them.
bool WantTypeSpecifiers;
Modified: cfe/trunk/lib/Sema/SemaLookup.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaLookup.cpp?rev=150495&r1=150494&r2=150495&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaLookup.cpp (original)
+++ cfe/trunk/lib/Sema/SemaLookup.cpp Tue Feb 14 12:56:48 2012
@@ -3109,9 +3109,12 @@
return (*BestResults.begin()->second)[Name];
}
- unsigned getBestEditDistance() {
- return (BestResults.empty()) ?
- (std::numeric_limits<unsigned>::max)() : BestResults.begin()->first;
+ unsigned getBestEditDistance(bool Normalized) {
+ if (BestResults.empty())
+ return (std::numeric_limits<unsigned>::max)();
+
+ unsigned BestED = BestResults.begin()->first;
+ return Normalized ? TypoCorrection::NormalizeEditDistance(BestED) : BestED;
}
};
@@ -3167,7 +3170,7 @@
void TypoCorrectionConsumer::addCorrection(TypoCorrection Correction) {
StringRef Name = Correction.getCorrectionAsIdentifierInfo()->getName();
- TypoResultsMap *& Map = BestResults[Correction.getEditDistance()];
+ TypoResultsMap *& Map = BestResults[Correction.getEditDistance(false)];
if (!Map)
Map = new TypoResultsMap;
@@ -3478,6 +3481,12 @@
}
}
+static bool isCandidateViable(CorrectionCandidateCallback &CCC,
+ TypoCorrection &Candidate) {
+ Candidate.setCallbackDistance(CCC.RankCandidate(Candidate));
+ return Candidate.getEditDistance(false) != TypoCorrection::InvalidDistance;
+}
+
/// \brief Try to "correct" a typo in the source code by finding
/// visible declarations whose names are similar to the name that was
/// present in the source code.
@@ -3545,10 +3554,10 @@
TypoCorrectionConsumer Consumer(*this, Typo);
- // If a callback object returns true for an empty typo correction candidate,
- // assume it does not do any actual validation of the candidates.
+ // If a callback object considers an empty typo correction candidate to be
+ // viable, assume it does not do any actual validation of the candidates.
TypoCorrection EmptyCorrection;
- bool ValidatingCallback = !CCC.ValidateCandidate(EmptyCorrection);
+ bool ValidatingCallback = !isCandidateViable(CCC, EmptyCorrection);
// Perform name lookup to find visible, similarly-named entities.
bool IsUnqualifiedLookup = false;
@@ -3584,7 +3593,7 @@
// keyword case, we'll end up adding the keyword below.
if (Cached->second) {
if (!Cached->second.isKeyword() &&
- CCC.ValidateCandidate(Cached->second))
+ isCandidateViable(CCC, Cached->second))
Consumer.addCorrection(Cached->second);
} else {
// Only honor no-correction cache hits when a callback that will validate
@@ -3637,7 +3646,7 @@
// Make sure that the user typed at least 3 characters for each correction
// made. Otherwise, we don't even both looking at the results.
- unsigned ED = Consumer.getBestEditDistance();
+ unsigned ED = Consumer.getBestEditDistance(true);
if (ED > 0 && Typo->getName().size() / ED < 3) {
// If this was an unqualified lookup, note that no correction was found.
if (IsUnqualifiedLookup)
@@ -3666,7 +3675,7 @@
// Weed out any names that could not be found by name lookup or, if a
// CorrectionCandidateCallback object was provided, failed validation.
- llvm::SmallPtrSet<IdentifierInfo*, 16> QualifiedResults;
+ llvm::SmallVector<TypoCorrection, 16> QualifiedResults;
LookupResult TmpRes(*this, TypoName, LookupKind);
TmpRes.suppressDiagnostics();
while (!Consumer.empty()) {
@@ -3681,7 +3690,7 @@
if (I->second.isResolved()) {
TypoCorrectionConsumer::result_iterator Prev = I;
++I;
- if (!CCC.ValidateCandidate(Prev->second))
+ if (!isCandidateViable(CCC, Prev->second))
DI->second->erase(Prev);
continue;
}
@@ -3695,7 +3704,7 @@
case LookupResult::NotFound:
case LookupResult::NotFoundInCurrentInstantiation:
case LookupResult::FoundUnresolvedValue:
- QualifiedResults.insert(Name);
+ QualifiedResults.push_back(I->second);
// We didn't find this name in our scope, or didn't like what we found;
// ignore it.
{
@@ -3718,7 +3727,7 @@
TRD != TRDEnd; ++TRD)
I->second.addCorrectionDecl(*TRD);
++I;
- if (!CCC.ValidateCandidate(Prev->second))
+ if (!isCandidateViable(CCC, Prev->second))
DI->second->erase(Prev);
break;
}
@@ -3727,7 +3736,7 @@
TypoCorrectionConsumer::result_iterator Prev = I;
I->second.setCorrectionDecl(TmpRes.getAsSingle<NamedDecl>());
++I;
- if (!CCC.ValidateCandidate(Prev->second))
+ if (!isCandidateViable(CCC, Prev->second))
DI->second->erase(Prev);
break;
}
@@ -3744,7 +3753,7 @@
// Only perform the qualified lookups for C++
if (getLangOptions().CPlusPlus) {
TmpRes.suppressDiagnostics();
- for (llvm::SmallPtrSet<IdentifierInfo*,
+ for (llvm::SmallVector<TypoCorrection,
16>::iterator QRI = QualifiedResults.begin(),
QRIEnd = QualifiedResults.end();
QRI != QRIEnd; ++QRI) {
@@ -3752,33 +3761,33 @@
NIEnd = Namespaces.end();
NI != NIEnd; ++NI) {
DeclContext *Ctx = NI->DeclCtx;
- unsigned QualifiedED = ED + NI->EditDistance;
// FIXME: Stop searching once the namespaces are too far away to create
// acceptable corrections for this identifier (since the namespaces
// are sorted in ascending order by edit distance).
TmpRes.clear();
- TmpRes.setLookupName(*QRI);
+ TmpRes.setLookupName(QRI->getCorrectionAsIdentifierInfo());
if (!LookupQualifiedName(TmpRes, Ctx)) continue;
// Any corrections added below will be validated in subsequent
// iterations of the main while() loop over the Consumer's contents.
switch (TmpRes.getResultKind()) {
case LookupResult::Found:
- Consumer.addName((*QRI)->getName(), TmpRes.getAsSingle<NamedDecl>(),
- QualifiedED, NI->NameSpecifier);
+ QRI->setCorrectionDecl(TmpRes.getAsSingle<NamedDecl>());
+ QRI->setCorrectionSpecifier(NI->NameSpecifier);
+ QRI->setQualifierDistance(NI->EditDistance);
+ Consumer.addCorrection(*QRI);
break;
- case LookupResult::FoundOverloaded: {
- TypoCorrection corr(&Context.Idents.get((*QRI)->getName()), NULL,
- NI->NameSpecifier, QualifiedED);
+ case LookupResult::FoundOverloaded:
+ QRI->setCorrectionSpecifier(NI->NameSpecifier);
+ QRI->setQualifierDistance(NI->EditDistance);
for (LookupResult::iterator TRD = TmpRes.begin(),
TRDEnd = TmpRes.end();
TRD != TRDEnd; ++TRD)
- corr.addCorrectionDecl(*TRD);
- Consumer.addCorrection(corr);
+ QRI->addCorrectionDecl(*TRD);
+ Consumer.addCorrection(*QRI);
break;
- }
case LookupResult::NotFound:
case LookupResult::NotFoundInCurrentInstantiation:
case LookupResult::Ambiguous:
@@ -3796,7 +3805,7 @@
if (Consumer.empty()) return TypoCorrection();
TypoResultsMap &BestResults = *Consumer.begin()->second;
- ED = Consumer.begin()->first;
+ ED = TypoCorrection::NormalizeEditDistance(Consumer.begin()->first);
if (ED > 0 && Typo->getName().size() / ED < 3) {
// If this was an unqualified lookup and we believe the callback
@@ -3808,22 +3817,6 @@
return TypoCorrection();
}
- // If we have multiple possible corrections, eliminate the ones where we
- // added namespace qualifiers to try to resolve the ambiguity (and to favor
- // corrections without additional namespace qualifiers)
- if (getLangOptions().CPlusPlus && BestResults.size() > 1) {
- TypoCorrectionConsumer::distance_iterator DI = Consumer.begin();
- for (TypoCorrectionConsumer::result_iterator I = DI->second->begin(),
- IEnd = DI->second->end();
- I != IEnd; /* Increment in loop. */) {
- if (I->second.getCorrectionSpecifier() != NULL) {
- TypoCorrectionConsumer::result_iterator Cur = I;
- ++I;
- DI->second->erase(Cur);
- } else ++I;
- }
- }
-
// If only a single name remains, return that result.
if (BestResults.size() == 1) {
const llvm::StringMapEntry<TypoCorrection> &Correction = *(BestResults.begin());
Modified: cfe/trunk/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp?rev=150495&r1=150494&r2=150495&view=diff
==============================================================================
--- cfe/trunk/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp (original)
+++ cfe/trunk/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp Tue Feb 14 12:56:48 2012
@@ -74,3 +74,24 @@
(void)new llvm::GraphWriter; // expected-error {{expected a type}}
(void)new llvm::Graphwriter<S>; // expected-error {{no template named 'Graphwriter' in namespace 'llvm'; did you mean 'GraphWriter'?}}
}
+
+// If namespace prefixes and character edits have the same weight, correcting
+// "fimish" to "N::famish" would have the same edit distance as correcting
+// "fimish" to "Finish". The result would be no correction being suggested
+// unless one of the corrections is given precedence (e.g. by filtering out
+// suggestions with added namespace qualifiers).
+namespace N { void famish(int); }
+void Finish(int); // expected-note {{'Finish' declared here}}
+void Start() {
+ fimish(7); // expected-error {{use of undeclared identifier 'fimish'; did you mean 'Finish'?}}
+}
+
+// But just eliminating the corrections containing added namespace qualifiers
+// won't work if both of the tied corrections have namespace qualifiers added.
+namespace N {
+void someCheck(int); // expected-note {{'N::someCheck' declared here}}
+namespace O { void somechock(int); }
+}
+void confusing() {
+ somechick(7); // expected-error {{use of undeclared identifier 'somechick'; did you mean 'N::someCheck'?}}
+}
More information about the cfe-commits
mailing list