[clang] 84bf5e3 - Fix various problems found by fuzzing.
Manuel Klimek via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 22 02:10:34 PST 2021
Author: Manuel Klimek
Date: 2021-11-22T11:08:38+01:00
New Revision: 84bf5e328664db2e744c4651c52d2460b1733d09
URL: https://github.com/llvm/llvm-project/commit/84bf5e328664db2e744c4651c52d2460b1733d09
DIFF: https://github.com/llvm/llvm-project/commit/84bf5e328664db2e744c4651c52d2460b1733d09.diff
LOG: Fix various problems found by fuzzing.
1. IndexTokenSource::getNextToken cannot return nullptr; some code was
still written assuming it can; make getNextToken more resilient against
incorrect input and fix its call-sites.
2. Change various asserts that can happen due to user provided input to
conditionals in the code.
Added:
Modified:
clang/lib/Format/ContinuationIndenter.cpp
clang/lib/Format/TokenAnnotator.cpp
clang/lib/Format/UnwrappedLineParser.cpp
clang/lib/Format/WhitespaceManager.cpp
Removed:
################################################################################
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 1e4f5690ef241..f56b7c70d18e7 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -1986,7 +1986,9 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current,
Current.Previous->isNot(TT_ImplicitStringLiteral))) {
if (!Style.ReflowComments ||
CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
- switchesFormatting(Current))
+ switchesFormatting(Current) ||
+ !(Current.TokenText.startswith("//") ||
+ Current.TokenText.startswith("#")))
return nullptr;
return std::make_unique<BreakableLineCommentSection>(
Current, StartColumn, /*InPPDirective=*/false, Encoding, Style);
@@ -2195,11 +2197,10 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
// When breaking before a tab character, it may be moved by a few columns,
// but will still be expanded to the next tab stop, so we don't save any
// columns.
- if (NewRemainingTokenColumns == RemainingTokenColumns) {
+ if (NewRemainingTokenColumns >= RemainingTokenColumns) {
// FIXME: Do we need to adjust the penalty?
break;
}
- assert(NewRemainingTokenColumns < RemainingTokenColumns);
LLVM_DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first
<< ", " << Split.second << "\n");
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 3897241cb8589..f3f63b4cad234 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -946,11 +946,15 @@ class AnnotatingParser {
!Line.First->isOneOf(tok::kw_enum, tok::kw_case,
tok::kw_default)) {
FormatToken *Prev = Tok->getPreviousNonComment();
+ if (!Prev)
+ break;
if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept))
Tok->setType(TT_CtorInitializerColon);
else if (Prev->is(tok::kw_try)) {
// Member initializer list within function try block.
FormatToken *PrevPrev = Prev->getPreviousNonComment();
+ if (!PrevPrev)
+ break;
if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept))
Tok->setType(TT_CtorInitializerColon);
} else
@@ -1578,6 +1582,8 @@ class AnnotatingParser {
if (TemplateCloser->is(tok::l_paren)) {
// No Matching Paren yet so skip to matching paren
TemplateCloser = untilMatchingParen(TemplateCloser);
+ if (!TemplateCloser)
+ break;
}
if (TemplateCloser->is(tok::less))
NestingLevel++;
@@ -2639,8 +2645,8 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
if (Current->Role)
Current->Role->precomputeFormattingInfos(Current);
if (Current->MatchingParen &&
- Current->MatchingParen->opensBlockOrBlockTypeList(Style)) {
- assert(IndentLevel > 0);
+ Current->MatchingParen->opensBlockOrBlockTypeList(Style) &&
+ IndentLevel > 0) {
--IndentLevel;
}
Current->IndentLevel = IndentLevel;
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 28d925858f776..c12c7c6ecfa69 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -199,6 +199,8 @@ class IndexedTokenSource : public FormatTokenSource {
: Tokens(Tokens), Position(-1) {}
FormatToken *getNextToken() override {
+ if (Position >= 0 && Tokens[Position]->is(tok::eof))
+ return Tokens[Position];
++Position;
return Tokens[Position];
}
@@ -399,7 +401,7 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
FormatToken *Next;
do {
Next = Tokens->getNextToken();
- } while (Next && Next->is(tok::comment));
+ } while (Next->is(tok::comment));
FormatTok = Tokens->setPosition(StoredPosition);
if (Next && Next->isNot(tok::colon)) {
// default not followed by ':' is not a case label; treat it like
@@ -1097,7 +1099,6 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
}
void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
- assert(!FormatTok->is(tok::l_brace));
if (Style.Language == FormatStyle::LK_TableGen &&
FormatTok->is(tok::pp_include)) {
nextToken();
@@ -1488,7 +1489,7 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
unsigned StoredPosition = Tokens->getPosition();
FormatToken *Next = Tokens->getNextToken();
FormatTok = Tokens->setPosition(StoredPosition);
- if (Next && !mustBeJSIdent(Keywords, Next)) {
+ if (!mustBeJSIdent(Keywords, Next)) {
nextToken();
break;
}
@@ -2653,23 +2654,25 @@ bool UnwrappedLineParser::tryToParseSimpleAttribute() {
ScopedTokenPosition AutoPosition(Tokens);
FormatToken *Tok = Tokens->getNextToken();
// We already read the first [ check for the second.
- if (Tok && !Tok->is(tok::l_square)) {
+ if (!Tok->is(tok::l_square)) {
return false;
}
// Double check that the attribute is just something
// fairly simple.
- while (Tok) {
+ while (Tok->isNot(tok::eof)) {
if (Tok->is(tok::r_square)) {
break;
}
Tok = Tokens->getNextToken();
}
+ if (Tok->is(tok::eof))
+ return false;
Tok = Tokens->getNextToken();
- if (Tok && !Tok->is(tok::r_square)) {
+ if (!Tok->is(tok::r_square)) {
return false;
}
Tok = Tokens->getNextToken();
- if (Tok && Tok->is(tok::semi)) {
+ if (Tok->is(tok::semi)) {
return false;
}
return true;
@@ -2682,7 +2685,7 @@ void UnwrappedLineParser::parseJavaEnumBody() {
unsigned StoredPosition = Tokens->getPosition();
bool IsSimple = true;
FormatToken *Tok = Tokens->getNextToken();
- while (Tok) {
+ while (!Tok->is(tok::eof)) {
if (Tok->is(tok::r_brace))
break;
if (Tok->isOneOf(tok::l_brace, tok::semi)) {
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
index 74136d2f5caa1..7a00e93789191 100644
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -372,8 +372,6 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End,
if (ContinuedStringLiteral)
Changes[i].Spaces += Shift;
- assert(Shift >= 0);
-
Changes[i].StartOfTokenColumn += Shift;
if (i + 1 != Changes.size())
Changes[i + 1].PreviousEndOfTokenColumn += Shift;
@@ -915,7 +913,7 @@ void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
Changes[i].StartOfBlockComment->StartOfTokenColumn -
Changes[i].StartOfTokenColumn;
}
- assert(Shift >= 0);
+ if (Shift < 0) continue;
Changes[i].Spaces += Shift;
if (i + 1 != Changes.size())
Changes[i + 1].PreviousEndOfTokenColumn += Shift;
@@ -1270,10 +1268,10 @@ WhitespaceManager::linkCells(CellDescriptions &&CellDesc) {
void WhitespaceManager::generateChanges() {
for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
const Change &C = Changes[i];
- if (i > 0) {
- assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
- C.OriginalWhitespaceRange.getBegin() &&
- "Generating two replacements for the same location");
+ if (i > 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() ==
+ C.OriginalWhitespaceRange.getBegin()) {
+ // Do not generate two replacements for the same location.
+ continue;
}
if (C.CreateReplacement) {
std::string ReplacementText = C.PreviousLinePostfix;
More information about the cfe-commits
mailing list