r279056 - [analyzer] Teach CloneDetector to find clones that look like copy-paste errors.
Artem Dergachev via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 18 05:29:42 PDT 2016
Author: dergachev
Date: Thu Aug 18 07:29:41 2016
New Revision: 279056
URL: http://llvm.org/viewvc/llvm-project?rev=279056&view=rev
Log:
[analyzer] Teach CloneDetector to find clones that look like copy-paste errors.
The original clone checker tries to find copy-pasted code that is exactly
identical to the original code, up to minor details.
As an example, if the copy-pasted code has all references to variable 'a'
replaced with references to variable 'b', it is still considered to be
an exact clone.
The new check finds copy-pasted code in which exactly one variable seems
out of place compared to the original code, which likely indicates
a copy-paste error (a variable was forgotten to be renamed in one place).
Patch by Raphael Isemann!
Differential Revision: https://reviews.llvm.org/D23314
Added:
cfe/trunk/test/Analysis/copypaste/suspicious-clones.cpp
Modified:
cfe/trunk/include/clang/Analysis/CloneDetection.h
cfe/trunk/lib/Analysis/CloneDetection.cpp
cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
Modified: cfe/trunk/include/clang/Analysis/CloneDetection.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Analysis/CloneDetection.h?rev=279056&r1=279055&r2=279056&view=diff
==============================================================================
--- cfe/trunk/include/clang/Analysis/CloneDetection.h (original)
+++ cfe/trunk/include/clang/Analysis/CloneDetection.h Thu Aug 18 07:29:41 2016
@@ -24,6 +24,7 @@ namespace clang {
class Stmt;
class Decl;
+class VarDecl;
class ASTContext;
class CompoundStmt;
@@ -222,7 +223,43 @@ public:
/// that were identified to be clones of each other.
/// \param MinGroupComplexity Only return clones which have at least this
/// complexity value.
- void findClones(std::vector<CloneGroup> &Result, unsigned MinGroupComplexity);
+ /// \param CheckPatterns Returns only clone groups in which the referenced
+ /// variables follow the same pattern.
+ void findClones(std::vector<CloneGroup> &Result, unsigned MinGroupComplexity,
+ bool CheckPatterns = true);
+
+ /// \brief Describes two clones that reference their variables in a different
+ /// pattern which could indicate a programming error.
+ struct SuspiciousClonePair {
+ /// \brief Utility class holding the relevant information about a single
+ /// clone in this pair.
+ struct SuspiciousCloneInfo {
+ /// The variable which referencing in this clone was against the pattern.
+ const VarDecl *Variable;
+ /// Where the variable was referenced.
+ SourceRange VarRange;
+ /// The variable that should have been referenced to follow the pattern.
+ /// If Suggestion is a nullptr then it's not possible to fix the pattern
+ /// by referencing a different variable in this clone.
+ const VarDecl *Suggestion;
+ SuspiciousCloneInfo(const VarDecl *Variable, SourceRange Range,
+ const VarDecl *Suggestion)
+ : Variable(Variable), VarRange(Range), Suggestion(Suggestion) {}
+ SuspiciousCloneInfo() {}
+ };
+ /// The first clone in the pair which always has a suggested variable.
+ SuspiciousCloneInfo FirstCloneInfo;
+ /// This other clone in the pair which can have a suggested variable.
+ SuspiciousCloneInfo SecondCloneInfo;
+ };
+
+ /// \brief Searches the provided statements for pairs of clones that don't
+ /// follow the same pattern when referencing variables.
+ /// \param Result Output parameter that will contain the clone pairs.
+ /// \param MinGroupComplexity Only clone pairs in which the clones have at
+ /// least this complexity value.
+ void findSuspiciousClones(std::vector<SuspiciousClonePair> &Result,
+ unsigned MinGroupComplexity);
private:
/// Stores all found clone groups including invalid groups with only a single
Modified: cfe/trunk/lib/Analysis/CloneDetection.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/CloneDetection.cpp?rev=279056&r1=279055&r2=279056&view=diff
==============================================================================
--- cfe/trunk/lib/Analysis/CloneDetection.cpp (original)
+++ cfe/trunk/lib/Analysis/CloneDetection.cpp Thu Aug 18 07:29:41 2016
@@ -88,8 +88,11 @@ class VariablePattern {
struct VariableOccurence {
/// The index of the associated VarDecl in the Variables vector.
size_t KindID;
+ /// The source range in the code where the variable was referenced.
+ SourceRange Range;
- VariableOccurence(size_t KindID) : KindID(KindID) {}
+ VariableOccurence(size_t KindID, SourceRange Range)
+ : KindID(KindID), Range(Range) {}
};
/// All occurences of referenced variables in the order of appearance.
@@ -100,19 +103,20 @@ class VariablePattern {
/// \brief Adds a new variable referenced to this pattern.
/// \param VarDecl The declaration of the variable that is referenced.
- void addVariableOccurence(const VarDecl *VarDecl) {
+ /// \param Range The SourceRange where this variable is referenced.
+ void addVariableOccurence(const VarDecl *VarDecl, SourceRange Range) {
// First check if we already reference this variable
for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {
if (Variables[KindIndex] == VarDecl) {
// If yes, add a new occurence that points to the existing entry in
// the Variables vector.
- Occurences.emplace_back(KindIndex);
+ Occurences.emplace_back(KindIndex, Range);
return;
}
}
// If this variable wasn't already referenced, add it to the list of
// referenced variables and add a occurence that points to this new entry.
- Occurences.emplace_back(Variables.size());
+ Occurences.emplace_back(Variables.size(), Range);
Variables.push_back(VarDecl);
}
@@ -127,7 +131,7 @@ class VariablePattern {
// Check if S is a reference to a variable. If yes, add it to the pattern.
if (auto D = dyn_cast<DeclRefExpr>(S)) {
if (auto VD = dyn_cast<VarDecl>(D->getDecl()->getCanonicalDecl()))
- addVariableOccurence(VD);
+ addVariableOccurence(VD, D->getSourceRange());
}
// Recursively check all children of the given statement.
@@ -144,33 +148,93 @@ public:
addVariables(S);
}
- /// \brief Compares this pattern with the given one.
+ /// \brief Counts the differences between this pattern and the given one.
/// \param Other The given VariablePattern to compare with.
- /// \return Returns true if and only if the references variables in this
- /// object follow the same pattern than the ones in the given
- /// VariablePattern.
+ /// \param FirstMismatch Output parameter that will be filled with information
+ /// about the first difference between the two patterns. This parameter
+ /// can be a nullptr, in which case it will be ignored.
+ /// \return Returns the number of differences between the pattern this object
+ /// is following and the given VariablePattern.
///
- /// For example, the following statements all have the same pattern:
+ /// For example, the following statements all have the same pattern and this
+ /// function would return zero:
///
/// if (a < b) return a; return b;
/// if (x < y) return x; return y;
/// if (u2 < u1) return u2; return u1;
///
- /// but the following statement has a different pattern (note the changed
- /// variables in the return statements).
+ /// But the following statement has a different pattern (note the changed
+ /// variables in the return statements) and would have two differences when
+ /// compared with one of the statements above.
///
/// if (a < b) return b; return a;
///
/// This function should only be called if the related statements of the given
/// pattern and the statements of this objects are clones of each other.
- bool comparePattern(const VariablePattern &Other) {
+ unsigned countPatternDifferences(
+ const VariablePattern &Other,
+ CloneDetector::SuspiciousClonePair *FirstMismatch = nullptr) {
+ unsigned NumberOfDifferences = 0;
+
assert(Other.Occurences.size() == Occurences.size());
for (unsigned i = 0; i < Occurences.size(); ++i) {
- if (Occurences[i].KindID != Other.Occurences[i].KindID) {
- return false;
- }
+ auto ThisOccurence = Occurences[i];
+ auto OtherOccurence = Other.Occurences[i];
+ if (ThisOccurence.KindID == OtherOccurence.KindID)
+ continue;
+
+ ++NumberOfDifferences;
+
+ // If FirstMismatch is not a nullptr, we need to store information about
+ // the first difference between the two patterns.
+ if (FirstMismatch == nullptr)
+ continue;
+
+ // Only proceed if we just found the first difference as we only store
+ // information about the first difference.
+ if (NumberOfDifferences != 1)
+ continue;
+
+ const VarDecl *FirstSuggestion = nullptr;
+ // If there is a variable available in the list of referenced variables
+ // which wouldn't break the pattern if it is used in place of the
+ // current variable, we provide this variable as the suggested fix.
+ if (OtherOccurence.KindID < Variables.size())
+ FirstSuggestion = Variables[OtherOccurence.KindID];
+
+ // Store information about the first clone.
+ FirstMismatch->FirstCloneInfo =
+ CloneDetector::SuspiciousClonePair::SuspiciousCloneInfo(
+ Variables[ThisOccurence.KindID], ThisOccurence.Range,
+ FirstSuggestion);
+
+ // Same as above but with the other clone. We do this for both clones as
+ // we don't know which clone is the one containing the unintended
+ // pattern error.
+ const VarDecl *SecondSuggestion = nullptr;
+ if (ThisOccurence.KindID < Other.Variables.size())
+ SecondSuggestion = Other.Variables[ThisOccurence.KindID];
+
+ // Store information about the second clone.
+ FirstMismatch->SecondCloneInfo =
+ CloneDetector::SuspiciousClonePair::SuspiciousCloneInfo(
+ Variables[ThisOccurence.KindID], OtherOccurence.Range,
+ SecondSuggestion);
+
+ // SuspiciousClonePair guarantees that the first clone always has a
+ // suggested variable associated with it. As we know that one of the two
+ // clones in the pair always has suggestion, we swap the two clones
+ // in case the first clone has no suggested variable which means that
+ // the second clone has a suggested variable and should be first.
+ if (!FirstMismatch->FirstCloneInfo.Suggestion)
+ std::swap(FirstMismatch->FirstCloneInfo,
+ FirstMismatch->SecondCloneInfo);
+
+ // This ensures that we always have at least one suggestion in a pair.
+ assert(FirstMismatch->FirstCloneInfo.Suggestion);
}
- return true;
+
+ return NumberOfDifferences;
}
};
}
@@ -529,7 +593,8 @@ static void createCloneGroups(std::vecto
// and assign them to our new clone group.
auto I = UnassignedSequences.begin(), E = UnassignedSequences.end();
while (I != E) {
- if (VariablePattern(*I).comparePattern(PrototypeFeatures)) {
+
+ if (VariablePattern(*I).countPatternDifferences(PrototypeFeatures) == 0) {
FilteredGroup.Sequences.push_back(*I);
I = UnassignedSequences.erase(I);
continue;
@@ -546,11 +611,15 @@ static void createCloneGroups(std::vecto
}
void CloneDetector::findClones(std::vector<CloneGroup> &Result,
- unsigned MinGroupComplexity) {
+ unsigned MinGroupComplexity,
+ bool CheckPatterns) {
// Add every valid clone group that fulfills the complexity requirement.
for (const CloneGroup &Group : CloneGroups) {
if (Group.isValid() && Group.Complexity >= MinGroupComplexity) {
- createCloneGroups(Result, Group);
+ if (CheckPatterns)
+ createCloneGroups(Result, Group);
+ else
+ Result.push_back(Group);
}
}
@@ -580,3 +649,37 @@ void CloneDetector::findClones(std::vect
Result.erase(Result.begin() + *I);
}
}
+
+void CloneDetector::findSuspiciousClones(
+ std::vector<CloneDetector::SuspiciousClonePair> &Result,
+ unsigned MinGroupComplexity) {
+ std::vector<CloneGroup> Clones;
+ // Reuse the normal search for clones but specify that the clone groups don't
+ // need to have a common referenced variable pattern so that we can manually
+ // search for the kind of pattern errors this function is supposed to find.
+ findClones(Clones, MinGroupComplexity, false);
+
+ for (const CloneGroup &Group : Clones) {
+ for (unsigned i = 0; i < Group.Sequences.size(); ++i) {
+ VariablePattern PatternA(Group.Sequences[i]);
+
+ for (unsigned j = i + 1; j < Group.Sequences.size(); ++j) {
+ VariablePattern PatternB(Group.Sequences[j]);
+
+ CloneDetector::SuspiciousClonePair ClonePair;
+ // For now, we only report clones which break the variable pattern just
+ // once because multiple differences in a pattern are an indicator that
+ // those differences are maybe intended (e.g. because it's actually
+ // a different algorithm).
+ // TODO: In very big clones even multiple variables can be unintended,
+ // so replacing this number with a percentage could better handle such
+ // cases. On the other hand it could increase the false-positive rate
+ // for all clones if the percentage is too high.
+ if (PatternA.countPatternDifferences(PatternB, &ClonePair) == 1) {
+ Result.push_back(ClonePair);
+ break;
+ }
+ }
+ }
+ }
+}
Modified: cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp?rev=279056&r1=279055&r2=279056&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp Thu Aug 18 07:29:41 2016
@@ -34,6 +34,15 @@ public:
void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
AnalysisManager &Mgr, BugReporter &BR) const;
+
+ /// \brief Reports all clones to the user.
+ void reportClones(SourceManager &SM, AnalysisManager &Mgr,
+ int MinComplexity) const;
+
+ /// \brief Reports only suspicious clones to the user along with informaton
+ /// that explain why they are suspicious.
+ void reportSuspiciousClones(SourceManager &SM, AnalysisManager &Mgr,
+ int MinComplexity) const;
};
} // end anonymous namespace
@@ -52,10 +61,23 @@ void CloneChecker::checkEndOfTranslation
int MinComplexity = Mgr.getAnalyzerOptions().getOptionAsInteger(
"MinimumCloneComplexity", 10, this);
-
assert(MinComplexity >= 0);
- SourceManager &SM = BR.getSourceManager();
+ bool ReportSuspiciousClones = Mgr.getAnalyzerOptions().getBooleanOption(
+ "ReportSuspiciousClones", true, this);
+
+ bool ReportNormalClones = Mgr.getAnalyzerOptions().getBooleanOption(
+ "ReportNormalClones", true, this);
+
+ if (ReportSuspiciousClones)
+ reportSuspiciousClones(BR.getSourceManager(), Mgr, MinComplexity);
+
+ if (ReportNormalClones)
+ reportClones(BR.getSourceManager(), Mgr, MinComplexity);
+}
+
+void CloneChecker::reportClones(SourceManager &SM, AnalysisManager &Mgr,
+ int MinComplexity) const {
std::vector<CloneDetector::CloneGroup> CloneGroups;
Detector.findClones(CloneGroups, MinComplexity);
@@ -87,6 +109,52 @@ void CloneChecker::checkEndOfTranslation
}
}
+void CloneChecker::reportSuspiciousClones(SourceManager &SM,
+ AnalysisManager &Mgr,
+ int MinComplexity) const {
+
+ std::vector<CloneDetector::SuspiciousClonePair> Clones;
+ Detector.findSuspiciousClones(Clones, MinComplexity);
+
+ DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+ auto SuspiciousCloneWarning = DiagEngine.getCustomDiagID(
+ DiagnosticsEngine::Warning, "suspicious code clone detected; did you "
+ "mean to use %0?");
+
+ auto RelatedCloneNote = DiagEngine.getCustomDiagID(
+ DiagnosticsEngine::Note, "suggestion is based on the usage of this "
+ "variable in a similar piece of code");
+
+ auto RelatedSuspiciousCloneNote = DiagEngine.getCustomDiagID(
+ DiagnosticsEngine::Note, "suggestion is based on the usage of this "
+ "variable in a similar piece of code; did you "
+ "mean to use %0?");
+
+ for (CloneDetector::SuspiciousClonePair &Pair : Clones) {
+ // The first clone always has a suggestion and we report it to the user
+ // along with the place where the suggestion should be used.
+ DiagEngine.Report(Pair.FirstCloneInfo.VarRange.getBegin(),
+ SuspiciousCloneWarning)
+ << Pair.FirstCloneInfo.VarRange << Pair.FirstCloneInfo.Suggestion;
+
+ // The second clone can have a suggestion and if there is one, we report
+ // that suggestion to the user.
+ if (Pair.SecondCloneInfo.Suggestion) {
+ DiagEngine.Report(Pair.SecondCloneInfo.VarRange.getBegin(),
+ RelatedSuspiciousCloneNote)
+ << Pair.SecondCloneInfo.VarRange << Pair.SecondCloneInfo.Suggestion;
+ continue;
+ }
+
+ // If there isn't a suggestion in the second clone, we only inform the
+ // user where we got the idea that his code could contain an error.
+ DiagEngine.Report(Pair.SecondCloneInfo.VarRange.getBegin(),
+ RelatedCloneNote)
+ << Pair.SecondCloneInfo.VarRange;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Register CloneChecker
//===----------------------------------------------------------------------===//
Added: cfe/trunk/test/Analysis/copypaste/suspicious-clones.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/copypaste/suspicious-clones.cpp?rev=279056&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/copypaste/suspicious-clones.cpp (added)
+++ cfe/trunk/test/Analysis/copypaste/suspicious-clones.cpp Thu Aug 18 07:29:41 2016
@@ -0,0 +1,97 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.clone.CloneChecker -analyzer-config alpha.clone.CloneChecker:ReportSuspiciousClones=true -analyzer-config alpha.clone.CloneChecker:ReportNormalClones=false -verify %s
+
+// Tests finding a suspicious clone that references local variables.
+
+void log();
+
+int max(int a, int b) {
+ log();
+ if (a > b)
+ return a;
+ return b; // expected-note{{suggestion is based on the usage of this variable in a similar piece of code}}
+}
+
+int maxClone(int x, int y, int z) {
+ log();
+ if (x > y)
+ return x;
+ return z; // expected-warning{{suspicious code clone detected; did you mean to use 'y'?}}
+}
+
+// Tests finding a suspicious clone that references global variables.
+
+struct mutex {
+ bool try_lock();
+ void unlock();
+};
+
+mutex m1;
+mutex m2;
+int i;
+
+void busyIncrement() {
+ while (true) {
+ if (m1.try_lock()) {
+ ++i;
+ m1.unlock(); // expected-note{{suggestion is based on the usage of this variable in a similar piece of code}}
+ if (i > 1000) {
+ return;
+ }
+ }
+ }
+}
+
+void faultyBusyIncrement() {
+ while (true) {
+ if (m1.try_lock()) {
+ ++i;
+ m2.unlock(); // expected-warning{{suspicious code clone detected; did you mean to use 'm1'?}}
+ if (i > 1000) {
+ return;
+ }
+ }
+ }
+}
+
+// Tests that we provide two suggestions in cases where two fixes are possible.
+
+int foo(int a, int b, int c) {
+ a += b + c;
+ b /= a + b;
+ c -= b * a; // expected-warning{{suspicious code clone detected; did you mean to use 'a'?}}
+ return c;
+}
+
+int fooClone(int a, int b, int c) {
+ a += b + c;
+ b /= a + b;
+ c -= a * a; // expected-note{{suggestion is based on the usage of this variable in a similar piece of code; did you mean to use 'b'?}}
+ return c;
+}
+
+
+// Tests that for clone groups with a many possible suspicious clone pairs, at
+// most one warning per clone group is generated and every relevant clone is
+// reported through either a warning or a note.
+
+long bar1(long a, long b, long c, long d) {
+ c = a - b;
+ c = c / d * a;
+ d = b * b - c; // expected-warning{{suspicious code clone detected; did you mean to use 'c'?}}
+ return d;
+}
+
+long bar2(long a, long b, long c, long d) {
+ c = a - b;
+ c = c / d * a;
+ d = c * b - c; // expected-note{{suggestion is based on the usage of this variable in a similar piece of code; did you mean to use 'b'?}} \
+ // expected-warning{{suspicious code clone detected; did you mean to use 'a'?}}
+ return d;
+}
+
+long bar3(long a, long b, long c, long d) {
+ c = a - b;
+ c = c / d * a;
+ d = a * b - c; // expected-note{{suggestion is based on the usage of this variable in a similar piece of code; did you mean to use 'c'?}}
+ return d;
+}
More information about the cfe-commits
mailing list