[compiler-rt] r182563 - Performance improvement.

Chandler Carruth chandlerc at google.com
Mon Jun 24 04:12:19 PDT 2013


On Thu, May 23, 2013 at 12:18 AM, Bill Wendling <isanbard at gmail.com> wrote:

> Author: void
> Date: Thu May 23 02:18:59 2013
> New Revision: 182563
>
> URL: http://llvm.org/viewvc/llvm-project?rev=182563&view=rev
> Log:
> Performance improvement.
>
> Using fwrite and fread was very *very* slow. The resulting code was
> multiple
> times slower than GCC's implementation of gcov. Replace the fwrite/fread
> system
> with an mmap() version.
>

Hey Bill,

I generally like this approach but I have a few questions and I also have
some comments below. I'm hitting lots of bugs with this version when really
hitting the profiling engine hard that I didn't see with the old version.
I'm not done debugging them yet, but some of the things I'm commenting on
below are likely related. I just spotted them by inspection.


>
> If the `.gcda' file doesn't exist, we (re)allocate a buffer that we write
> into. That gets written to the `.gcda' file in one chunk. If the `.gcda'
> file
> already exists, we simply mmap() the file, modify the mapped data, and use
> msync() to write the contents out to disk. It's much easier than
> implementing
> our own buffering scheme, and we don't have to use fwrite's and fread's
> buffering.
>

The use of mmap and msync makes sense if we're OK requiring those system
calls (and I am). However, I don't understand why we need a buffer in any
of these cases? Can't we always just mmap and modify the mapped data? It
would seem simpler, and I'm somewhat worried about the call to 'realloc' in
this implementation as these routines are called during program shutdown.

Some comments on the code below:

Modified: compiler-rt/trunk/lib/profile/GCDAProfiling.c
> URL:
> http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/profile/GCDAProfiling.c?rev=182563&r1=182562&r2=182563&view=diff
>
> ==============================================================================
> --- compiler-rt/trunk/lib/profile/GCDAProfiling.c (original)
> +++ compiler-rt/trunk/lib/profile/GCDAProfiling.c Thu May 23 02:18:59 2013
> @@ -20,10 +20,12 @@
>  |*
>
>  \*===----------------------------------------------------------------------===*/
>
> +#include <fcntl.h>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
>  #include <sys/stat.h>
> +#include <sys/mman.h>
>  #include <sys/types.h>
>  #ifdef _WIN32
>  #include <direct.h>
> @@ -50,11 +52,13 @@ static FILE *output_file = NULL;
>  /*
>   * Buffer that we write things into.
>   */
> -#define WRITE_BUFFER_SIZE (1 << 12)
> -static char write_buffer[WRITE_BUFFER_SIZE];
> -static int64_t cur_file_pos = 0;
> -static uint32_t cur_offset = 0;
> +#define NEW_WRITE_BUFFER_SIZE (128 * 1024)
> +static char *new_write_buffer = NULL;
> +static uint64_t cur_buffer_size = 0;
> +static uint64_t cur_pos = 0;
> +static uint64_t file_size = 0;
>  static int new_file = 0;
> +static int fd = -1;
>
>  /*
>   * A list of functions to write out the data.
> @@ -82,30 +86,20 @@ struct flush_fn_node {
>  static struct flush_fn_node *flush_fn_head = NULL;
>  static struct flush_fn_node *flush_fn_tail = NULL;
>
> -static void flush_buffer() {
> -  /* Flush the buffer to file. */
> -  fwrite(write_buffer, cur_offset, 1, output_file);
> -
> -  if (new_file) {
> -    cur_offset = 0;
> -    return;
> -  }
> -
> -  /* Read in more of the file. */
> -  fread(write_buffer,1, WRITE_BUFFER_SIZE, output_file);
> -  fseek(output_file, cur_file_pos, SEEK_SET);
> -
> -  cur_offset = 0;
> +static void resize_write_buffer(uint64_t size) {
> +  if (!new_file) return;
>

Where do you grow the mmap-ed region? It seems like you mmap a fixed size
when you first open the file, and don't re-compute after that, but I may be
missing something.

+  size += cur_pos;
> +  if (size <= cur_buffer_size) return;
> +  size = (size - 1) / NEW_WRITE_BUFFER_SIZE + 1;
> +  size *= NEW_WRITE_BUFFER_SIZE;
> +  new_write_buffer = realloc(new_write_buffer, size);
> +  cur_buffer_size = size;
>  }
>
>  static void write_bytes(const char *s, size_t len) {
> -  if (cur_offset + len > WRITE_BUFFER_SIZE)
> -    flush_buffer();
> -
> -  cur_file_pos += len;
> -
> -  for (; len > 0; --len)
> -    write_buffer[cur_offset++] = *s++;
> +  resize_write_buffer(len);
> +  memcpy(&new_write_buffer[cur_pos], s, len);
> +  cur_pos += len;
>  }
>
>  static void write_32bit_value(uint32_t i) {
> @@ -130,30 +124,22 @@ static void write_string(const char *s)
>  static uint32_t read_32bit_value() {
>    uint32_t val;
>
> -  if (cur_offset + 4 > WRITE_BUFFER_SIZE)
> -    flush_buffer();
> -
>    if (new_file)
>      return (uint32_t)-1;
>
> -  val = *(uint32_t*)&write_buffer[cur_offset];
> -  cur_file_pos += 4;
> -  cur_offset += 4;
> +  val = *(uint32_t*)&new_write_buffer[cur_pos];
> +  cur_pos += 4;
>    return val;
>  }
>
>  static uint64_t read_64bit_value() {
>    uint64_t val;
>
> -  if (cur_offset + 8 > WRITE_BUFFER_SIZE)
> -    flush_buffer();
> -
>    if (new_file)
> -    return (uint32_t)-1;
> +    return (uint64_t)-1;
>
> -  val = *(uint64_t*)&write_buffer[cur_offset];
> -  cur_file_pos += 8;
> -  cur_offset += 8;
> +  val = *(uint64_t*)&new_write_buffer[cur_pos];
> +  cur_pos += 8;
>    return val;
>  }
>
> @@ -210,6 +196,21 @@ static void recursive_mkdir(char *filena
>    }
>  }
>
> +static void map_file() {
> +  fseek(output_file, 0L, SEEK_END);
> +  file_size = ftell(output_file);
>

This seems a strange way to compute the size of a file unless you're about
to use fwrite. Why not fstat?

+
> +  new_write_buffer = mmap(0, file_size, PROT_READ | PROT_WRITE,
> +                          MAP_FILE | MAP_SHARED, fd, 0);
>

There is no error checking here. If mmap fails, we end up blindly setting
the write buffer to be (void*)-1 and getting really strange segfaults
later. I'm actually seeing this in the wild.


> +}
> +
> +static void unmap_file() {
> +  msync(new_write_buffer, file_size, MS_SYNC);
> +  munmap(new_write_buffer, file_size);
>

Missing error checking here as well.


> +  new_write_buffer = NULL;
> +  file_size = 0;
> +}
> +
>  /*
>   * --- LLVM line counter API ---
>   */
> @@ -220,18 +221,21 @@ static void recursive_mkdir(char *filena
>   */
>  void llvm_gcda_start_file(const char *orig_filename, const char
> version[4]) {
>    char *filename = mangle_filename(orig_filename);
> +  const char *mode = "r+b";
>
>    /* Try just opening the file. */
> -  output_file = fopen(filename, "r+b");
> +  new_file = 0;
> +  fd = open(filename, O_RDWR);
>
> -  if (!output_file) {
> +  if (fd == -1) {
>      /* Try opening the file, creating it if necessary. */
>      new_file = 1;
> -    output_file = fopen(filename, "w+b");
> -    if (!output_file) {
> +    mode = "w+b";
> +    fd = open(filename, O_RDWR | O_CREAT);
>

Why do you try to open, then try again to open with O_CREAT? It would seem
more natural to me to do a single open and just always specify O_CREAT.


> +    if (fd == -1) {
>        /* Try creating the directories first then opening the file. */
>        recursive_mkdir(filename);
> -      output_file = fopen(filename, "w+b");
> +      fd = open(filename, O_RDWR | O_CREAT);
>        if (!output_file) {
>

This condition is now wrong -- it still tests output_file rather than fd.


>          /* Bah! It's hopeless. */
>          fprintf(stderr, "profiling:%s: cannot open\n", filename);
> @@ -241,14 +245,19 @@ void llvm_gcda_start_file(const char *or
>      }
>    }
>
> -  setbuf(output_file, 0);
> +  output_file = fdopen(fd, mode);
>
>    /* Initialize the write buffer. */
> -  memset(write_buffer, 0, WRITE_BUFFER_SIZE);
> -  fread(write_buffer, 1, WRITE_BUFFER_SIZE, output_file);
> -  fseek(output_file, 0L, SEEK_SET);
> -  cur_file_pos = 0;
> -  cur_offset = 0;
> +  new_write_buffer = NULL;
> +  cur_buffer_size = 0;
> +  cur_pos = 0;
> +
> +  if (new_file) {
> +    resize_write_buffer(NEW_WRITE_BUFFER_SIZE);
> +    memset(new_write_buffer, 0, NEW_WRITE_BUFFER_SIZE);
> +  } else {
> +    map_file();
> +  }
>
>    /* gcda file, version, stamp LLVM. */
>    write_bytes("adcg", 4);
> @@ -315,12 +324,11 @@ void llvm_gcda_emit_function(uint32_t id
>  void llvm_gcda_emit_arcs(uint32_t num_counters, uint64_t *counters) {
>    uint32_t i;
>    uint64_t *old_ctrs = NULL;
> -  int64_t save_cur_file_pos = cur_file_pos;
>    uint32_t val = 0;
> +  uint64_t save_cur_pos = cur_pos;
>
>    if (!output_file) return;
>
> -  flush_buffer();
>    val = read_32bit_value();
>
>    if (val != (uint32_t)-1) {
> @@ -339,15 +347,10 @@ void llvm_gcda_emit_arcs(uint32_t num_co
>      old_ctrs = malloc(sizeof(uint64_t) * num_counters);
>      for (i = 0; i < num_counters; ++i)
>        old_ctrs[i] = read_64bit_value();
> -
> -    /* Reset the current buffer and file position. */
> -    memset(write_buffer, 0, WRITE_BUFFER_SIZE);
> -    cur_file_pos = save_cur_file_pos;
> -    cur_offset = 0;
> -
> -    fseek(output_file, cur_file_pos, SEEK_SET);
>    }
>
> +  cur_pos = save_cur_pos;
> +
>    /* Counter #1 (arcs) tag */
>    write_bytes("\0\0\xa1\1", 4);
>    write_32bit_value(num_counters * 2);
> @@ -369,9 +372,17 @@ void llvm_gcda_end_file() {
>    /* Write out EOF record. */
>    if (!output_file) return;
>    write_bytes("\0\0\0\0\0\0\0\0", 8);
> -  flush_buffer();
> +
> +  if (new_file) {
> +    fwrite(new_write_buffer, cur_pos, 1, output_file);
> +    free(new_write_buffer);
> +  } else {
> +    unmap_file();
> +  }
> +
>    fclose(output_file);
>    output_file = NULL;
> +  new_write_buffer = NULL;
>
>  #ifdef DEBUG_GCDAPROFILING
>    fprintf(stderr, "llvmgcda: -----\n");
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130624/b6657867/attachment.html>


More information about the llvm-commits mailing list