[llvm-commits] [poolalloc] r57322 - in /poolalloc/trunk: runtime/FL2Allocator/PoolAllocator.cpp runtime/Makefile runtime/PreRT/ runtime/PreRT/Makefile runtime/PreRT/qsort.c runtime/PreRT/strdup.c test/TEST.poolalloc.Makefile

Andrew Lenharth alenhar2 at cs.uiuc.edu
Wed Oct 8 23:31:27 PDT 2008


Author: alenhar2
Date: Thu Oct  9 01:31:27 2008
New Revision: 57322

URL: http://llvm.org/viewvc/llvm-project?rev=57322&view=rev
Log:
libc impl for annoying functions

Added:
    poolalloc/trunk/runtime/PreRT/
    poolalloc/trunk/runtime/PreRT/Makefile   (with props)
    poolalloc/trunk/runtime/PreRT/qsort.c
    poolalloc/trunk/runtime/PreRT/strdup.c
Modified:
    poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.cpp
    poolalloc/trunk/runtime/Makefile
    poolalloc/trunk/test/TEST.poolalloc.Makefile

Modified: poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.cpp
URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.cpp?rev=57322&r1=57321&r2=57322&view=diff

==============================================================================
--- poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.cpp (original)
+++ poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.cpp Thu Oct  9 01:31:27 2008
@@ -27,6 +27,8 @@
 #define INITIAL_SLAB_SIZE 4096
 #define LARGE_SLAB_SIZE   4096
 
+#define NDEBUG
+
 #ifndef NDEBUG
 // Configuration macros.  Define up to one of these.
 #define PRINT_NUM_POOLS          // Print use dynamic # pools info

Modified: poolalloc/trunk/runtime/Makefile
URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/runtime/Makefile?rev=57322&r1=57321&r2=57322&view=diff

==============================================================================
--- poolalloc/trunk/runtime/Makefile (original)
+++ poolalloc/trunk/runtime/Makefile Thu Oct  9 01:31:27 2008
@@ -6,6 +6,6 @@
 #
 # List all of the subdirectories that we will compile.
 #
-DIRS=FreeListAllocator FL2Allocator
+DIRS=FreeListAllocator FL2Allocator PreRT
 
 include $(LEVEL)/Makefile.common

Added: poolalloc/trunk/runtime/PreRT/Makefile
URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/runtime/PreRT/Makefile?rev=57322&view=auto

==============================================================================
--- poolalloc/trunk/runtime/PreRT/Makefile (added)
+++ poolalloc/trunk/runtime/PreRT/Makefile Thu Oct  9 01:31:27 2008
@@ -0,0 +1,6 @@
+LEVEL = ../..
+BYTECODE_LIBRARY=1
+LIBRARYNAME=pa_pre_rt
+
+include $(LEVEL)/Makefile.common
+

Propchange: poolalloc/trunk/runtime/PreRT/Makefile

------------------------------------------------------------------------------
    svn:executable = *

Added: poolalloc/trunk/runtime/PreRT/qsort.c
URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/runtime/PreRT/qsort.c?rev=57322&view=auto

==============================================================================
--- poolalloc/trunk/runtime/PreRT/qsort.c (added)
+++ poolalloc/trunk/runtime/PreRT/qsort.c Thu Oct  9 01:31:27 2008
@@ -0,0 +1,284 @@
+/* Copyright (C) 1991,1992,1996,1997,1999,2004 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Written by Douglas C. Schmidt (schmidt at ics.uci.edu).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* If you consider tuning this algorithm, you should consult first:
+   Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
+   Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993.  */
+
+#include <alloca.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Byte-wise swap two items of size SIZE. */
+#define SWAP(a, b, size)						      \
+  do									      \
+    {									      \
+      register size_t __size = (size);					      \
+      register char *__a = (a), *__b = (b);				      \
+      do								      \
+	{								      \
+	  char __tmp = *__a;						      \
+	  *__a++ = *__b;						      \
+	  *__b++ = __tmp;						      \
+	} while (--__size > 0);						      \
+    } while (0)
+
+/* Discontinue quicksort algorithm when partition gets below this size.
+   This particular magic number was chosen to work best on a Sun 4/260. */
+#define MAX_THRESH 4
+
+/* Stack node declarations used to store unfulfilled partition obligations. */
+typedef struct
+  {
+    char *lo;
+    char *hi;
+  } stack_node;
+
+/* The next 4 #defines implement a very fast in-line stack abstraction. */
+/* The stack needs log (total_elements) entries (we could even subtract
+   log(MAX_THRESH)).  Since total_elements has type size_t, we get as
+   upper bound for log (total_elements):
+   bits per byte (CHAR_BIT) * sizeof(size_t).  */
+#define STACK_SIZE	(CHAR_BIT * sizeof(size_t))
+#define PUSH(low, high)	((void) ((top->lo = (low)), (top->hi = (high)), ++top))
+#define	POP(low, high)	((void) (--top, (low = top->lo), (high = top->hi)))
+#define	STACK_NOT_EMPTY	(stack < top)
+
+
+/* Order size using quicksort.  This implementation incorporates
+   four optimizations discussed in Sedgewick:
+
+   1. Non-recursive, using an explicit stack of pointer that store the
+      next array partition to sort.  To save time, this maximum amount
+      of space required to store an array of SIZE_MAX is allocated on the
+      stack.  Assuming a 32-bit (64 bit) integer for size_t, this needs
+      only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
+      Pretty cheap, actually.
+
+   2. Chose the pivot element using a median-of-three decision tree.
+      This reduces the probability of selecting a bad pivot value and
+      eliminates certain extraneous comparisons.
+
+   3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
+      insertion sort to order the MAX_THRESH items within each partition.
+      This is a big win, since insertion sort is faster for small, mostly
+      sorted array segments.
+
+   4. The larger of the two sub-partitions is always pushed onto the
+      stack first, with the algorithm then concentrating on the
+      smaller partition.  This *guarantees* no more than log (total_elems)
+      stack size is needed (actually O(1) in this case)!  */
+
+void
+qsort (void *const pbase, size_t total_elems, size_t size,
+       int(*cmp)(const void*, const void*))
+{
+  register char *base_ptr = (char *) pbase;
+
+  const size_t max_thresh = MAX_THRESH * size;
+
+  if (total_elems == 0)
+    /* Avoid lossage with unsigned arithmetic below.  */
+    return;
+
+  if (total_elems > MAX_THRESH)
+    {
+      char *lo = base_ptr;
+      char *hi = &lo[size * (total_elems - 1)];
+      stack_node stack[STACK_SIZE];
+      stack_node *top = stack;
+
+      PUSH (NULL, NULL);
+
+      while (STACK_NOT_EMPTY)
+        {
+          char *left_ptr;
+          char *right_ptr;
+
+	  /* Select median value from among LO, MID, and HI. Rearrange
+	     LO and HI so the three values are sorted. This lowers the
+	     probability of picking a pathological pivot value and
+	     skips a comparison for both the LEFT_PTR and RIGHT_PTR in
+	     the while loops. */
+
+	  char *mid = lo + size * ((hi - lo) / size >> 1);
+
+	  if ((*cmp) ((void *) mid, (void *) lo) < 0)
+	    SWAP (mid, lo, size);
+	  if ((*cmp) ((void *) hi, (void *) mid) < 0)
+	    SWAP (mid, hi, size);
+	  else
+	    goto jump_over;
+	  if ((*cmp) ((void *) mid, (void *) lo) < 0)
+	    SWAP (mid, lo, size);
+	jump_over:;
+
+	  left_ptr  = lo + size;
+	  right_ptr = hi - size;
+
+	  /* Here's the famous ``collapse the walls'' section of quicksort.
+	     Gotta like those tight inner loops!  They are the main reason
+	     that this algorithm runs much faster than others. */
+	  do
+	    {
+	      while ((*cmp) ((void *) left_ptr, (void *) mid) < 0)
+		left_ptr += size;
+
+	      while ((*cmp) ((void *) mid, (void *) right_ptr) < 0)
+		right_ptr -= size;
+
+	      if (left_ptr < right_ptr)
+		{
+		  SWAP (left_ptr, right_ptr, size);
+		  if (mid == left_ptr)
+		    mid = right_ptr;
+		  else if (mid == right_ptr)
+		    mid = left_ptr;
+		  left_ptr += size;
+		  right_ptr -= size;
+		}
+	      else if (left_ptr == right_ptr)
+		{
+		  left_ptr += size;
+		  right_ptr -= size;
+		  break;
+		}
+	    }
+	  while (left_ptr <= right_ptr);
+
+          /* Set up pointers for next iteration.  First determine whether
+             left and right partitions are below the threshold size.  If so,
+             ignore one or both.  Otherwise, push the larger partition's
+             bounds on the stack and continue sorting the smaller one. */
+
+          if ((size_t) (right_ptr - lo) <= max_thresh)
+            {
+              if ((size_t) (hi - left_ptr) <= max_thresh)
+		/* Ignore both small partitions. */
+                POP (lo, hi);
+              else
+		/* Ignore small left partition. */
+                lo = left_ptr;
+            }
+          else if ((size_t) (hi - left_ptr) <= max_thresh)
+	    /* Ignore small right partition. */
+            hi = right_ptr;
+          else if ((right_ptr - lo) > (hi - left_ptr))
+            {
+	      /* Push larger left partition indices. */
+              PUSH (lo, right_ptr);
+              lo = left_ptr;
+            }
+          else
+            {
+	      /* Push larger right partition indices. */
+              PUSH (left_ptr, hi);
+              hi = right_ptr;
+            }
+        }
+    }
+
+  /* Once the BASE_PTR array is partially sorted by quicksort the rest
+     is completely sorted using insertion sort, since this is efficient
+     for partitions below MAX_THRESH size. BASE_PTR points to the beginning
+     of the array to sort, and END_PTR points at the very last element in
+     the array (*not* one beyond it!). */
+
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+  {
+    char *const end_ptr = &base_ptr[size * (total_elems - 1)];
+    char *tmp_ptr = base_ptr;
+    char *thresh = min(end_ptr, base_ptr + max_thresh);
+    register char *run_ptr;
+
+    /* Find smallest element in first threshold and place it at the
+       array's beginning.  This is the smallest array element,
+       and the operation speeds up insertion sort's inner loop. */
+
+    for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
+      if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr) < 0)
+        tmp_ptr = run_ptr;
+
+    if (tmp_ptr != base_ptr)
+      SWAP (tmp_ptr, base_ptr, size);
+
+    /* Insertion sort, running from left-hand-side up to right-hand-side.  */
+
+    run_ptr = base_ptr + size;
+    while ((run_ptr += size) <= end_ptr)
+      {
+	tmp_ptr = run_ptr - size;
+	while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr) < 0)
+	  tmp_ptr -= size;
+
+	tmp_ptr += size;
+        if (tmp_ptr != run_ptr)
+          {
+            char *trav;
+
+	    trav = run_ptr + size;
+	    while (--trav >= run_ptr)
+              {
+                char c = *trav;
+                char *hi, *lo;
+
+                for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
+                  *hi = *lo;
+                *hi = c;
+              }
+          }
+      }
+  }
+}
+
+#if 0
+int lt(const void* x, const void* y) {
+  int xv = *(int*)x;
+  int yv = *(int*)y;
+  if (xv < yv) return -1;
+  if (xv > yv) return 1;
+  return 0;
+}
+int gt(const void* x, const void* y) {
+  int xv = *(int*)x;
+  int yv = *(int*)y;
+  if (xv > yv) return -1;
+  if (xv < yv) return 1;
+  return 0;
+}
+
+int main() {
+  int size = 100000;
+  int* arr = malloc(size*sizeof(int));
+  srand(0);
+  for (int x = 0; x < size; ++x)
+    arr[x] = rand();
+  
+  qsort(arr, size, sizeof(int), lt);
+  qsort(arr, size, sizeof(int), gt);
+  qsort(arr, size, sizeof(int), lt);
+  qsort(arr, size, sizeof(int), lt);
+  for (int x = 0 ; x < size; ++x)
+    printf("%d\n", arr[x]);
+  return 0;
+}
+#endif
+

Added: poolalloc/trunk/runtime/PreRT/strdup.c
URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/runtime/PreRT/strdup.c?rev=57322&view=auto

==============================================================================
--- poolalloc/trunk/runtime/PreRT/strdup.c (added)
+++ poolalloc/trunk/runtime/PreRT/strdup.c Thu Oct  9 01:31:27 2008
@@ -0,0 +1,16 @@
+#include <stdlib.h>
+#include <string.h>
+
+#undef strdup
+
+char* strdup(const char *s)
+{
+  size_t len = strlen (s) + 1;
+  void *new = malloc (len);
+  
+  if (new == NULL)
+    return NULL;
+  
+  return (char *) memcpy (new, s, len);
+}
+

Modified: poolalloc/trunk/test/TEST.poolalloc.Makefile
URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/test/TEST.poolalloc.Makefile?rev=57322&r1=57321&r2=57322&view=diff

==============================================================================
--- poolalloc/trunk/test/TEST.poolalloc.Makefile (original)
+++ poolalloc/trunk/test/TEST.poolalloc.Makefile Thu Oct  9 01:31:27 2008
@@ -21,6 +21,9 @@
 RELDIR  := $(subst $(PROGDIR),,$(CURDIR))
 PADIR   := /home/andrewl/Research/llvm/projects/poolalloc
 
+# Bits of runtime to improve analysis
+PA_PRE_RT := $(PADIR)/Release/lib/libpa_pre_rt.bca
+
 # Pool allocator pass shared object
 PA_SO    := $(PADIR)/Debug/lib/libpoolalloc$(SHLIBEXT)
 DSA_SO   := $(PADIR)/Debug/lib/libLLVMDataStructure$(SHLIBEXT)
@@ -28,7 +31,7 @@
 # Pool allocator runtime library
 #PA_RT    := $(PADIR)/Debug/lib/libpoolalloc_fl_rt.bc
 #PA_RT_O  := $(PROJECT_DIR)/lib/$(CONFIGURATION)/poolalloc_rt.o
-PA_RT_O  := $(PADIR)/Release/lib/poolalloc_rt.o
+PA_RT_O  := $(PADIR)/Debug/lib/poolalloc_rt.o
 #PA_RT_O  := $(PROJECT_DIR)/lib/Release/poolalloc_fl_rt.o
 
 # Command to run opt with the pool allocator pass loaded
@@ -41,31 +44,39 @@
 OPTZN_PASSES := -globaldce -ipsccp -deadargelim -adce -instcombine -simplifycfg
 
 
-# This rule runs the pool allocator on the .llvm.bc file to produce a new .bc
+$(PROGRAMS_TO_TEST:%=Output/%.temp.bc): \
+Output/%.temp.bc: Output/%.llvm.bc 
+	-$(LLVMLD) -link-as-library $< $(PA_PRE_RT) -o $@
+
+$(PROGRAMS_TO_TEST:%=Output/%.base.bc): \
+Output/%.base.bc: Output/%.temp.bc $(LOPT)
+	-$(LOPT) -instnamer -internalize -globaldce $< -f -o $@ 
+
+# This rule runs the pool allocator on the .base.bc file to produce a new .bc
 # file
 $(PROGRAMS_TO_TEST:%=Output/%.poolalloc.bc): \
-Output/%.poolalloc.bc: Output/%.llvm.bc $(PA_SO) $(LOPT)
+Output/%.poolalloc.bc: Output/%.base.bc $(PA_SO) $(LOPT)
 	- at rm -f $(CURDIR)/$@.info
 	-$(OPT_PA_STATS) -poolalloc $(EXTRA_PA_FLAGS) $(OPTZN_PASSES) -pooloptimize $< -o $@ -f 2>&1 > $@.out
 
 $(PROGRAMS_TO_TEST:%=Output/%.basepa.bc): \
-Output/%.basepa.bc: Output/%.llvm.bc $(PA_SO) $(LOPT)
+Output/%.basepa.bc: Output/%.base.bc $(PA_SO) $(LOPT)
 	- at rm -f $(CURDIR)/$@.info
 	-$(OPT_PA_STATS) -poolalloc -poolalloc-disable-alignopt -poolalloc-force-all-poolfrees -poolalloc-heuristic=AllNodes $(OPTZN_PASSES) $< -o $@ -f 2>&1 > $@.out
 
 
 $(PROGRAMS_TO_TEST:%=Output/%.mallocrepl.bc): \
-Output/%.mallocrepl.bc: Output/%.llvm.bc $(PA_SO) $(LOPT)
+Output/%.mallocrepl.bc: Output/%.base.bc $(PA_SO) $(LOPT)
 	- at rm -f $(CURDIR)/$@.info
 	-$(OPT_PA_STATS) -poolalloc -poolalloc-heuristic=AllInOneGlobalPool $(OPTZN_PASSES) $< -o $@ -f 2>&1 > $@.out
 
 $(PROGRAMS_TO_TEST:%=Output/%.onlyoverhead.bc): \
-Output/%.onlyoverhead.bc: Output/%.llvm.bc $(PA_SO) $(LOPT)
+Output/%.onlyoverhead.bc: Output/%.base.bc $(PA_SO) $(LOPT)
 	- at rm -f $(CURDIR)/$@.info
 	-$(OPT_PA_STATS) -poolalloc -poolalloc-heuristic=OnlyOverhead $(OPTZN_PASSES) $< -o $@ -f 2>&1 > $@.out
 
 $(PROGRAMS_TO_TEST:%=Output/%.nonpa.bc): \
-Output/%.nonpa.bc: Output/%.llvm.bc $(LOPT)
+Output/%.nonpa.bc: Output/%.base.bc $(LOPT)
 	- at rm -f $(CURDIR)/$@.info
 	-$(LOPT) $(OPTZN_PASSES) $< -o $@ -f 2>&1 > $@.out
 





More information about the llvm-commits mailing list