[PATCH] Add CRLF support to LineIterator.

Justin Bogner mail at justinbogner.com
Sun Nov 2 12:30:57 PST 2014


Rafael Ávila de Espíndola <rafael.espindola at gmail.com> writes:
> Hi chandlerc,
>
> The MRI scripts have to work with CRLF, and in general it is probably
> a good idea to support this in a core utility like LineIterator.

This can read past the end of the buffer in a few places if the buffer
ends with '\r' for some reason. We should probably handle that
possibility.

> http://reviews.llvm.org/D5844
>
> Files:
>   lib/Support/LineIterator.cpp
>   test/Object/Inputs/mri-crlf.mri
>   test/Object/mri-crlf.test
>
> Index: lib/Support/LineIterator.cpp
> ===================================================================
> --- lib/Support/LineIterator.cpp
> +++ lib/Support/LineIterator.cpp
> @@ -12,6 +12,25 @@
>  
>  using namespace llvm;
>  
> +static bool isAtLineEnd(const char *P) {
> +  if (*P == '\n')
> +    return true;
> +  if (*P == '\r' && *(P + 1) == '\n')
> +    return true;
> +  return false;
> +}
> +
> +static void skipLineEnd(const char *&P) {
> +  if (*P == '\n') {
> +    ++P;
> +    return;
> +  }
> +  if (*P == '\r' && *(P + 1) == '\n') {
> +    P += 2;
> +    return;
> +  }
> +}
> +
>  line_iterator::line_iterator(const MemoryBuffer &Buffer, bool SkipBlanks,
>                               char CommentMarker)
>      : Buffer(Buffer.getBufferSize() ? &Buffer : nullptr),
> @@ -23,42 +42,41 @@
>    if (Buffer.getBufferSize()) {
>      assert(Buffer.getBufferEnd()[0] == '\0');
>      // Make sure we don't skip a leading newline if we're keeping blanks
> -    if (SkipBlanks || *Buffer.getBufferStart() != '\n')
> +    if (SkipBlanks || !isAtLineEnd(Buffer.getBufferStart()))
>        advance();
>    }
>  }
>  
>  void line_iterator::advance() {
>    assert(Buffer && "Cannot advance past the end!");
>  
>    const char *Pos = CurrentLine.end();
> -  assert(Pos == Buffer->getBufferStart() || *Pos == '\n' || *Pos == '\0');
> +  assert(Pos == Buffer->getBufferStart() || isAtLineEnd(Pos) || *Pos == '\0');
>  
> -  if (*Pos == '\n') {
> -    ++Pos;
> +  if (isAtLineEnd(Pos)) {
> +    skipLineEnd(Pos);
>      ++LineNumber;
>    }
> -  if (!SkipBlanks && *Pos == '\n') {
> +  if (!SkipBlanks && isAtLineEnd(Pos)) {
>      // Nothing to do for a blank line.
>    } else if (CommentMarker == '\0') {
>      // If we're not stripping comments, this is simpler.
> -    size_t Blanks = 0;
> -    while (Pos[Blanks] == '\n')
> -      ++Blanks;
> -    Pos += Blanks;
> -    LineNumber += Blanks;
> +    while (isAtLineEnd(Pos)) {
> +      skipLineEnd(Pos);
> +      ++LineNumber;
> +    }
>    } else {
>      // Skip comments and count line numbers, which is a bit more complex.
>      for (;;) {
> -      if (*Pos == '\n' && !SkipBlanks)
> +      if (isAtLineEnd(Pos) && !SkipBlanks)
>          break;
>        if (*Pos == CommentMarker)
>          do {
>            ++Pos;
> -        } while (*Pos != '\0' && *Pos != '\n');
> -      if (*Pos != '\n')
> +        } while (*Pos != '\0' && !isAtLineEnd(Pos));
> +      if (!isAtLineEnd(Pos))
>          break;
> -      ++Pos;
> +      skipLineEnd(Pos);
>        ++LineNumber;
>      }
>    }
> @@ -72,7 +90,7 @@
>  
>    // Measure the line.
>    size_t Length = 0;
> -  while (Pos[Length] != '\0' && Pos[Length] != '\n') {
> +  while (Pos[Length] != '\0' && !isAtLineEnd(&Pos[Length])) {
>      ++Length;
>    }
>  
> Index: test/Object/Inputs/mri-crlf.mri
> ===================================================================
> --- /dev/null
> +++ test/Object/Inputs/mri-crlf.mri
> @@ -0,0 +1,2 @@
> +; this file intentionally has crlf line endings
> +end
> Index: test/Object/mri-crlf.test
> ===================================================================
> --- /dev/null
> +++ test/Object/mri-crlf.test
> @@ -0,0 +1 @@
> +; RUN: llvm-ar -M  < %S/Inputs/mri-crlf.mri
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list