[llvm-commits] [vector_llvm] CVS: llvm/examples/SIMD/MADFilter/Makefile mad.h mad_filter.altivec.handwritten.c mad_filter.sse.handwritten.c mad_filter.vectorc.c main.c

Sun Oct 23 15:50:20 PDT 2005

Changes in directory llvm/examples/SIMD/MADFilter:

Makefile added (r1.1.2.1)
mad.h added (r1.1.2.1)
mad_filter.altivec.handwritten.c added (r1.1.2.1)
mad_filter.sse.handwritten.c added (r1.1.2.1)
mad_filter.vectorc.c added (r1.1.2.1)
main.c added (r1.1.2.1)
---
Log message:

Examples to illustrate Vector LLVM's SIMD support.

---
Diffs of the changes:  (+1109 -0)

 Makefile                         |    4 
 mad.h                            |  932 +++++++++++++++++++++++++++++++++++++++
 mad_filter.altivec.handwritten.c |   15 
 mad_filter.sse.handwritten.c     |   16 
 mad_filter.vectorc.c             |   13 
 main.c                           |  129 +++++
 6 files changed, 1109 insertions

Index: llvm/examples/SIMD/MADFilter/Makefile
diff -c /dev/null llvm/examples/SIMD/MADFilter/Makefile:1.1.2.1
*** /dev/null	Sun Oct 23 17:50:00 2005
--- llvm/examples/SIMD/MADFilter/Makefile	Sun Oct 23 17:49:40 2005
***************
*** 0 ****
--- 1,4 ----
+ NAME= mad_filter
+ 
+ include ../Makefile.common
+ 

Index: llvm/examples/SIMD/MADFilter/mad.h
diff -c /dev/null llvm/examples/SIMD/MADFilter/mad.h:1.1.2.1
*** /dev/null	Sun Oct 23 17:50:17 2005
--- llvm/examples/SIMD/MADFilter/mad.h	Sun Oct 23 17:49:40 2005
***************
*** 0 ****
--- 1,932 ----
+ /*
+  * libmad - MPEG audio decoder library
+  * Copyright (C) 2000-2001 Robert Leslie
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+  * the Free Software Foundation; either version 2 of the License, or
+  * (at your option) any later version.
+  *
+  * This program is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  * GNU General Public License for more details.
+  *
+  * You should have received a copy of the GNU General Public License
+  * along with this program; if not, write to the Free Software
+  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+  *
+  * If you would like to negotiate alternate licensing terms, you may do
+  * so by contacting the author: Robert Leslie <rob at mars.org>
+  */
+ 
+ # ifdef __cplusplus
+ extern "C" {
+ # endif
+ 
+ # define FPM_INTEL
+ 
+ # define SIZEOF_INT 4
+ # define SIZEOF_LONG 4
+ # define SIZEOF_LONG_LONG 8
+ 
+ /* Id: version.h,v 1.20 2001/10/27 22:47:32 rob Exp */
+ 
+ # ifndef LIBMAD_VERSION_H
+ # define LIBMAD_VERSION_H
+ 
+ # define MAD_VERSION_MAJOR	0
+ # define MAD_VERSION_MINOR	14
+ # define MAD_VERSION_PATCH	2
+ # define MAD_VERSION_EXTRA	" (beta)"
+ 
+ # define MAD_VERSION_STRINGIZE(str)	#str
+ # define MAD_VERSION_STRING(num)	MAD_VERSION_STRINGIZE(num)
+ 
+ # define MAD_VERSION		MAD_VERSION_STRING(MAD_VERSION_MAJOR) "."  \
+ 				MAD_VERSION_STRING(MAD_VERSION_MINOR) "."  \
+ 				MAD_VERSION_STRING(MAD_VERSION_PATCH)  \
+ 				MAD_VERSION_EXTRA
+ 
+ # define MAD_PUBLISHYEAR	"2000-2001"
+ # define MAD_AUTHOR		"Robert Leslie"
+ # define MAD_EMAIL		"rob at mars.org"
+ 
+ extern char const mad_version[];
+ extern char const mad_copyright[];
+ extern char const mad_author[];
+ extern char const mad_build[];
+ 
+ # endif
+ 
+ /* Id: fixed.h,v 1.30 2001/11/02 09:51:06 rob Exp */
+ 
+ # ifndef LIBMAD_FIXED_H
+ # define LIBMAD_FIXED_H
+ 
+ # if SIZEOF_INT >= 4
+ typedef   signed int mad_fixed_t;
+ 
+ typedef   signed int mad_fixed64hi_t;
+ typedef unsigned int mad_fixed64lo_t;
+ # else
+ typedef   signed long mad_fixed_t;
+ 
+ typedef   signed long mad_fixed64hi_t;
+ typedef unsigned long mad_fixed64lo_t;
+ # endif
+ 
+ # if defined(_MSC_VER)
+ #  define mad_fixed64_t  signed __int64
+ # elif 1 || defined(__GNUC__)
+ #  define mad_fixed64_t  signed long long
+ # endif
+ 
+ # if defined(FPM_FLOAT)
+ typedef double mad_sample_t;
+ # else
+ typedef mad_fixed_t mad_sample_t;
+ # endif
+ 
+ /*
+  * Fixed-point format: 0xABBBBBBB
+  * A == whole part      (sign + 3 bits)
+  * B == fractional part (28 bits)
+  *
+  * Values are signed two's complement, so the effective range is:
+  * 0x80000000 to 0x7fffffff
+  *       -8.0 to +7.9999999962747097015380859375
+  *
+  * The smallest representable value is:
+  * 0x00000001 == 0.0000000037252902984619140625 (i.e. about 3.725e-9)
+  *
+  * 28 bits of fractional accuracy represent about
+  * 8.6 digits of decimal accuracy.
+  *
+  * Fixed-point numbers can be added or subtracted as normal
+  * integers, but multiplication requires shifting the 64-bit result
+  * from 56 fractional bits back to 28 (and rounding.)
+  *
+  * Changing the definition of MAD_F_FRACBITS is only partially
+  * supported, and must be done with care.
+  */
+ 
+ # define MAD_F_FRACBITS		28
+ 
+ # if MAD_F_FRACBITS == 28
+ #  define MAD_F(x)		((mad_fixed_t) (x##L))
+ # else
+ #  if MAD_F_FRACBITS < 28
+ #   warning "MAD_F_FRACBITS < 28"
+ #   define MAD_F(x)		((mad_fixed_t)  \
+ 				 (((x##L) +  \
+ 				   (1L << (28 - MAD_F_FRACBITS - 1))) >>  \
+ 				  (28 - MAD_F_FRACBITS)))
+ #  elif MAD_F_FRACBITS > 28
+ #   error "MAD_F_FRACBITS > 28 not currently supported"
+ #   define MAD_F(x)		((mad_fixed_t)  \
+ 				 ((x##L) << (MAD_F_FRACBITS - 28)))
+ #  endif
+ # endif
+ 
+ # define MAD_F_MIN		((mad_fixed_t) -0x80000000L)
+ # define MAD_F_MAX		((mad_fixed_t) +0x7fffffffL)
+ 
+ # define MAD_F_ONE		MAD_F(0x10000000)
+ 
+ # define mad_f_tofixed(x)	((mad_fixed_t)  \
+ 				 ((x) * (double) (1L << MAD_F_FRACBITS) + 0.5))
+ # define mad_f_todouble(x)	((double)  \
+ 				 ((x) / (double) (1L << MAD_F_FRACBITS)))
+ 
+ # define mad_f_intpart(x)	((x) >> MAD_F_FRACBITS)
+ # define mad_f_fracpart(x)	((x) & ((1L << MAD_F_FRACBITS) - 1))
+ 				/* (x should be positive) */
+ 
+ # define mad_f_fromint(x)	((x) << MAD_F_FRACBITS)
+ 
+ # define mad_f_add(x, y)	((x) + (y))
+ # define mad_f_sub(x, y)	((x) - (y))
+ 
+ # if defined(FPM_FLOAT)
+ #  error "FPM_FLOAT not yet supported"
+ 
+ #  undef MAD_F
+ #  define MAD_F(x)		mad_f_todouble(x)
+ 
+ #  define mad_f_mul(x, y)	((x) * (y))
+ #  define mad_f_scale64
+ 
+ #  undef ASO_ZEROCHECK
+ 
+ # elif defined(FPM_64BIT)
+ 
+ /*
+  * This version should be the most accurate if 64-bit types are supported by
+  * the compiler, although it may not be the most efficient.
+  */
+ #  if defined(OPT_ACCURACY)
+ #   define mad_f_mul(x, y)  \
+     ((mad_fixed_t)  \
+      ((((mad_fixed64_t) (x) * (y)) +  \
+        (1L << (MAD_F_SCALEBITS - 1))) >> MAD_F_SCALEBITS))
+ #  else
+ #   define mad_f_mul(x, y)  \
+     ((mad_fixed_t) (((mad_fixed64_t) (x) * (y)) >> MAD_F_SCALEBITS))
+ #  endif
+ 
+ #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
+ 
+ /* --- Intel --------------------------------------------------------------- */
+ 
+ # elif defined(FPM_INTEL)
+ 
+ #  if defined(_MSC_VER)
+ #   pragma warning(push)
+ #   pragma warning(disable: 4035)  /* no return value */
+ static __forceinline
+ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
+ {
+   enum {
+     fracbits = MAD_F_FRACBITS
+   };
+ 
+   __asm {
+     mov eax, x
+     imul y
+     shrd eax, edx, fracbits
+   }
+ 
+   /* implicit return of eax */
+ }
+ #   pragma warning(pop)
+ 
+ #   define mad_f_mul		mad_f_mul_inline
+ #   define mad_f_scale64
+ #  else
+ /*
+  * This Intel version is fast and accurate; the disposition of the least
+  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
+  */
+ #   define MAD_F_MLX(hi, lo, x, y)  \
+     asm ("imull %3"  \
+ 	 : "=a" (lo), "=d" (hi)  \
+ 	 : "%a" (x), "rm" (y)  \
+ 	 : "cc")
+ 
+ #   if defined(OPT_ACCURACY)
+ /*
+  * This gives best accuracy but is not very fast.
+  */
+ #    define MAD_F_MLA(hi, lo, x, y)  \
+     ({ mad_fixed64hi_t __hi;  \
+        mad_fixed64lo_t __lo;  \
+        MAD_F_MLX(__hi, __lo, (x), (y));  \
+        asm ("addl %2,%0\n\t"  \
+ 	    "adcl %3,%1"  \
+ 	    : "=rm" (lo), "=rm" (hi)  \
+ 	    : "r" (__lo), "r" (__hi), "0" (lo), "1" (hi)  \
+ 	    : "cc");  \
+     })
+ #   endif  /* OPT_ACCURACY */
+ 
+ #   if defined(OPT_ACCURACY)
+ /*
+  * Surprisingly, this is faster than SHRD followed by ADC.
+  */
+ #    define mad_f_scale64(hi, lo)  \
+     ({ mad_fixed64hi_t __hi_;  \
+        mad_fixed64lo_t __lo_;  \
+        mad_fixed_t __result;  \
+        asm ("addl %4,%2\n\t"  \
+ 	    "adcl %5,%3"  \
+ 	    : "=rm" (__lo_), "=rm" (__hi_)  \
+ 	    : "0" (lo), "1" (hi),  \
+ 	      "ir" (1L << (MAD_F_SCALEBITS - 1)), "ir" (0)  \
+ 	    : "cc");  \
+        asm ("shrdl %3,%2,%1"  \
+ 	    : "=rm" (__result)  \
+ 	    : "0" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS)  \
+ 	    : "cc");  \
+        __result;  \
+     })
+ #   else
+ #    define mad_f_scale64(hi, lo)  \
+     ({ mad_fixed_t __result;  \
+        asm ("shrdl %3,%2,%1"  \
+ 	    : "=rm" (__result)  \
+ 	    : "0" (lo), "r" (hi), "I" (MAD_F_SCALEBITS)  \
+ 	    : "cc");  \
+        __result;  \
+     })
+ #   endif  /* OPT_ACCURACY */
+ 
+ #   define MAD_F_SCALEBITS  MAD_F_FRACBITS
+ #  endif
+ 
+ /* --- ARM ----------------------------------------------------------------- */
+ 
+ # elif defined(FPM_ARM)
+ 
+ /* 
+  * This ARM V4 version is as accurate as FPM_64BIT but much faster. The
+  * least significant bit is properly rounded at no CPU cycle cost!
+  */
+ # if 1
+ /*
+  * There's a bug somewhere, possibly in the compiler, that sometimes makes
+  * this necessary instead of the default implementation via MAD_F_MLX and
+  * mad_f_scale64. It may be related to the use (or lack) of
+  * -finline-functions and/or -fstrength-reduce.
+  *
+  * This is also apparently faster than MAD_F_MLX/mad_f_scale64.
+  */
+ #  define mad_f_mul(x, y)  \
+     ({ mad_fixed64hi_t __hi;  \
+        mad_fixed64lo_t __lo;  \
+        mad_fixed_t __result;  \
+        asm ("smull	%0, %1, %3, %4\n\t"  \
+ 	    "movs	%0, %0, lsr %5\n\t"  \
+ 	    "adc	%2, %0, %1, lsl %6"  \
+ 	    : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
+ 	    : "%r" (x), "r" (y),  \
+ 	      "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
+ 	    : "cc");  \
+        __result;  \
+     })
+ # endif
+ 
+ #  define MAD_F_MLX(hi, lo, x, y)  \
+     asm ("smull	%0, %1, %2, %3"  \
+ 	 : "=&r" (lo), "=&r" (hi)  \
+ 	 : "%r" (x), "r" (y))
+ 
+ #  define MAD_F_MLA(hi, lo, x, y)  \
+     asm ("smlal	%0, %1, %2, %3"  \
+ 	 : "+r" (lo), "+r" (hi)  \
+ 	 : "%r" (x), "r" (y))
+ 
+ #  define MAD_F_MLN(hi, lo)  \
+     asm ("rsbs	%0, %2, #0\n\t"  \
+ 	 "rsc	%1, %3, #0"  \
+ 	 : "=r" (lo), "=r" (hi)  \
+ 	 : "0" (lo), "1" (hi)  \
+ 	 : "cc")
+ 
+ #  define mad_f_scale64(hi, lo)  \
+     ({ mad_fixed_t __result;  \
+        asm ("movs	%0, %1, lsr %3\n\t"  \
+ 	    "adc	%0, %0, %2, lsl %4"  \
+ 	    : "=r" (__result)  \
+ 	    : "r" (lo), "r" (hi),  \
+ 	      "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
+ 	    : "cc");  \
+        __result;  \
+     })
+ 
+ #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
+ 
+ /* --- MIPS ---------------------------------------------------------------- */
+ 
+ # elif defined(FPM_MIPS)
+ 
+ /*
+  * This MIPS version is fast and accurate; the disposition of the least
+  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
+  */
+ #  define MAD_F_MLX(hi, lo, x, y)  \
+     asm ("mult	%2,%3"  \
+ 	 : "=l" (lo), "=h" (hi)  \
+ 	 : "%r" (x), "r" (y))
+ 
+ # if defined(HAVE_MADD_ASM)
+ #  define MAD_F_MLA(hi, lo, x, y)  \
+     asm ("madd	%2,%3"  \
+ 	 : "+l" (lo), "+h" (hi)  \
+ 	 : "%r" (x), "r" (y))
+ # elif defined(HAVE_MADD16_ASM)
+ /*
+  * This loses significant accuracy due to the 16-bit integer limit in the
+  * multiply/accumulate instruction.
+  */
+ #  define MAD_F_ML0(hi, lo, x, y)  \
+     asm ("mult	%2,%3"  \
+ 	 : "=l" (lo), "=h" (hi)  \
+ 	 : "%r" ((x) >> 12), "r" ((y) >> 16))
+ #  define MAD_F_MLA(hi, lo, x, y)  \
+     asm ("madd16	%2,%3"  \
+ 	 : "+l" (lo), "+h" (hi)  \
+ 	 : "%r" ((x) >> 12), "r" ((y) >> 16))
+ #  define MAD_F_MLZ(hi, lo)  ((mad_fixed_t) (lo))
+ # endif
+ 
+ # if defined(OPT_SPEED)
+ #  define mad_f_scale64(hi, lo)  \
+     ((mad_fixed_t) ((hi) << (32 - MAD_F_SCALEBITS)))
+ #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
+ # endif
+ 
+ /* --- SPARC --------------------------------------------------------------- */
+ 
+ # elif defined(FPM_SPARC)
+ 
+ /*
+  * This SPARC V8 version is fast and accurate; the disposition of the least
+  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
+  */
+ #  define MAD_F_MLX(hi, lo, x, y)  \
+     asm ("smul %2, %3, %0\n\t"  \
+ 	 "rd %%y, %1"  \
+ 	 : "=r" (lo), "=r" (hi)  \
+ 	 : "%r" (x), "rI" (y))
+ 
+ /* --- PowerPC ------------------------------------------------------------- */
+ 
+ # elif defined(FPM_PPC)
+ 
+ /*
+  * This PowerPC version is tuned for the 4xx embedded processors. It is
+  * effectively a tuned version of FPM_64BIT. It is a little faster and just
+  * as accurate. The disposition of the least significant bit depends on
+  * OPT_ACCURACY via mad_f_scale64().
+  */
+ #  define MAD_F_MLX(hi, lo, x, y)  \
+     asm ("mulhw %1, %2, %3\n\t"  \
+ 	 "mullw %0, %2, %3"  \
+ 	 : "=&r" (lo), "=&r" (hi)  \
+ 	 : "%r" (x), "r" (y))
+ 
+ #  define MAD_F_MLA(hi, lo, x, y)  \
+     ({ mad_fixed64hi_t __hi;  \
+        mad_fixed64lo_t __lo;  \
+        MAD_F_MLX(__hi, __lo, (x), (y));  \
+        asm ("addc %0, %2, %3\n\t"  \
+ 	    "adde %1, %4, %5"  \
+ 	    : "=r" (lo), "=r" (hi)  \
+ 	    : "%r" (__lo), "0" (lo), "%r" (__hi), "1" (hi));  \
+     })
+ 
+ #  if defined(OPT_ACCURACY)
+ /*
+  * This is accurate and ~2 - 2.5 times slower than the unrounded version.
+  *
+  * The __volatile__ improves the generated code by another 5% (fewer spills
+  * to memory); eventually they should be removed.
+  */
+ #   define mad_f_scale64(hi, lo)  \
+     ({ mad_fixed_t __result;  \
+        mad_fixed64hi_t __hi_;  \
+        mad_fixed64lo_t __lo_;  \
+        asm __volatile__ ("addc %0, %2, %4\n\t"  \
+ 			 "addze %1, %3"  \
+ 	    : "=r" (__lo_), "=r" (__hi_)  \
+ 	    : "r" (lo), "r" (hi), "r" (1 << (MAD_F_SCALEBITS - 1)));  \
+        asm __volatile__ ("rlwinm %0, %2,32-%3,0,%3-1\n\t"  \
+ 			 "rlwimi %0, %1,32-%3,%3,31"  \
+ 	    : "=&r" (__result)  \
+ 	    : "r" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS));  \
+ 	    __result;  \
+     })
+ #  else
+ #   define mad_f_scale64(hi, lo)  \
+     ({ mad_fixed_t __result;  \
+        asm ("rlwinm %0, %2,32-%3,0,%3-1\n\t"  \
+ 	    "rlwimi %0, %1,32-%3,%3,31"  \
+ 	    : "=r" (__result)  \
+ 	    : "r" (lo), "r" (hi), "I" (MAD_F_SCALEBITS));  \
+ 	    __result;  \
+     })
+ #  endif  /* OPT_ACCURACY */
+ 
+ #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
+ 
+ /* --- Default ------------------------------------------------------------- */
+ 
+ # elif defined(FPM_DEFAULT)
+ 
+ /*
+  * This version is the most portable but it loses significant accuracy.
+  * Furthermore, accuracy is biased against the second argument, so care
+  * should be taken when ordering operands.
+  *
+  * The scale factors are constant as this is not used with SSO.
+  *
+  * Pre-rounding is required to stay within the limits of compliance.
+  */
+ #  if defined(OPT_SPEED)
+ #   define mad_f_mul(x, y)	(((x) >> 12) * ((y) >> 16))
+ #  else
+ #   define mad_f_mul(x, y)	((((x) + (1L << 11)) >> 12) *  \
+ 				 (((y) + (1L << 15)) >> 16))
+ #  endif
+ 
+ /* ------------------------------------------------------------------------- */
+ 
+ # else
+ #  error "no FPM selected"
+ # endif
+ 
+ /* default implementations */
+ 
+ # if !defined(mad_f_mul)
+ #  define mad_f_mul(x, y)  \
+     ({ mad_fixed64hi_t __hi;  \
+        mad_fixed64lo_t __lo;  \
+        MAD_F_MLX(__hi, __lo, (x), (y));  \
+        mad_f_scale64(__hi, __lo);  \
+     })
+ # endif
+ 
+ # if !defined(MAD_F_MLA)
+ #  define MAD_F_ML0(hi, lo, x, y)	((lo)  = mad_f_mul((x), (y)))
+ #  define MAD_F_MLA(hi, lo, x, y)	((lo) += mad_f_mul((x), (y)))
+ #  define MAD_F_MLN(hi, lo)		((lo)  = -(lo))
+ #  define MAD_F_MLZ(hi, lo)		((void) (hi), (mad_fixed_t) (lo))
+ # endif
+ 
+ # if !defined(MAD_F_ML0)
+ #  define MAD_F_ML0(hi, lo, x, y)	MAD_F_MLX((hi), (lo), (x), (y))
+ # endif
+ 
+ # if !defined(MAD_F_MLN)
+ #  define MAD_F_MLN(hi, lo)		((hi) = ((lo) = -(lo)) ? ~(hi) : -(hi))
+ # endif
+ 
+ # if !defined(MAD_F_MLZ)
+ #  define MAD_F_MLZ(hi, lo)		mad_f_scale64((hi), (lo))
+ # endif
+ 
+ # if !defined(mad_f_scale64)
+ #  if defined(OPT_ACCURACY)
+ #   define mad_f_scale64(hi, lo)  \
+     ((((mad_fixed_t)  \
+        (((hi) << (32 - (MAD_F_SCALEBITS - 1))) |  \
+ 	((lo) >> (MAD_F_SCALEBITS - 1)))) + 1) >> 1)
+ #  else
+ #   define mad_f_scale64(hi, lo)  \
+     ((mad_fixed_t)  \
+      (((hi) << (32 - MAD_F_SCALEBITS)) |  \
+       ((lo) >> MAD_F_SCALEBITS)))
+ #  endif
+ #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
+ # endif
+ 
+ /* miscellaneous C routines */
+ 
+ mad_fixed_t mad_f_abs(mad_fixed_t);
+ 
+ # endif
+ 
+ /* Id: bit.h,v 1.8 2001/10/17 19:14:47 rob Exp */
+ 
+ # ifndef LIBMAD_BIT_H
+ # define LIBMAD_BIT_H
+ 
+ struct mad_bitptr {
+   unsigned char const *byte;
+   unsigned short cache;
+   unsigned short left;
+ };
+ 
+ void mad_bit_init(struct mad_bitptr *, unsigned char const *);
+ 
+ # define mad_bit_finish(bitptr)		/* nothing */
+ 
+ unsigned int mad_bit_length(struct mad_bitptr const *,
+ 			    struct mad_bitptr const *);
+ 
+ # define mad_bit_bitsleft(bitptr)  ((bitptr)->left)
+ unsigned char const *mad_bit_nextbyte(struct mad_bitptr const *);
+ 
+ void mad_bit_skip(struct mad_bitptr *, unsigned int);
+ unsigned long mad_bit_read(struct mad_bitptr *, unsigned int);
+ void mad_bit_write(struct mad_bitptr *, unsigned int, unsigned long);
+ 
+ unsigned short mad_bit_crc(struct mad_bitptr, unsigned int, unsigned short);
+ 
+ # endif
+ 
+ /* Id: timer.h,v 1.12 2001/11/03 03:57:11 rob Exp */
+ 
+ # ifndef LIBMAD_TIMER_H
+ # define LIBMAD_TIMER_H
+ 
+ typedef struct {
+   signed long seconds;		/* whole seconds */
+   unsigned long fraction;	/* 1/MAD_TIMER_RESOLUTION seconds */
+ } mad_timer_t;
+ 
+ extern mad_timer_t const mad_timer_zero;
+ 
+ # define MAD_TIMER_RESOLUTION	352800000UL
+ 
+ enum mad_units {
+   MAD_UNITS_HOURS	 =    -2,
+   MAD_UNITS_MINUTES	 =    -1,
+   MAD_UNITS_SECONDS	 =     0,
+ 
+   /* metric units */
+ 
+   MAD_UNITS_DECISECONDS	 =    10,
+   MAD_UNITS_CENTISECONDS =   100,
+   MAD_UNITS_MILLISECONDS =  1000,
+ 
+   /* audio sample units */
+ 
+   MAD_UNITS_8000_HZ	 =  8000,
+   MAD_UNITS_11025_HZ	 = 11025,
+   MAD_UNITS_12000_HZ	 = 12000,
+ 
+   MAD_UNITS_16000_HZ	 = 16000,
+   MAD_UNITS_22050_HZ	 = 22050,
+   MAD_UNITS_24000_HZ	 = 24000,
+ 
+   MAD_UNITS_32000_HZ	 = 32000,
+   MAD_UNITS_44100_HZ	 = 44100,
+   MAD_UNITS_48000_HZ	 = 48000,
+ 
+   /* video frame/field units */
+ 
+   MAD_UNITS_24_FPS	 =    24,
+   MAD_UNITS_25_FPS	 =    25,
+   MAD_UNITS_30_FPS	 =    30,
+   MAD_UNITS_48_FPS	 =    48,
+   MAD_UNITS_50_FPS	 =    50,
+   MAD_UNITS_60_FPS	 =    60,
+ 
+   /* CD audio frames */
+ 
+   MAD_UNITS_75_FPS	 =    75,
+ 
+   /* video drop-frame units */
+ 
+   MAD_UNITS_23_976_FPS	 =   -24,
+   MAD_UNITS_24_975_FPS	 =   -25,
+   MAD_UNITS_29_97_FPS	 =   -30,
+   MAD_UNITS_47_952_FPS	 =   -48,
+   MAD_UNITS_49_95_FPS	 =   -50,
+   MAD_UNITS_59_94_FPS	 =   -60
+ };
+ 
+ # define mad_timer_reset(timer)	((void) (*(timer) = mad_timer_zero))
+ 
+ int mad_timer_compare(mad_timer_t, mad_timer_t);
+ 
+ # define mad_timer_sign(timer)	mad_timer_compare((timer), mad_timer_zero)
+ 
+ void mad_timer_negate(mad_timer_t *);
+ mad_timer_t mad_timer_abs(mad_timer_t);
+ 
+ void mad_timer_set(mad_timer_t *, unsigned long, unsigned long, unsigned long);
+ void mad_timer_add(mad_timer_t *, mad_timer_t);
+ void mad_timer_multiply(mad_timer_t *, signed long);
+ 
+ signed long mad_timer_count(mad_timer_t, enum mad_units);
+ unsigned long mad_timer_fraction(mad_timer_t, unsigned long);
+ void mad_timer_string(mad_timer_t, char *, char const *,
+ 		      enum mad_units, enum mad_units, unsigned long);
+ 
+ # endif
+ 
+ /* Id: stream.h,v 1.15 2001/11/08 23:28:03 rob Exp */
+ 
+ # ifndef LIBMAD_STREAM_H
+ # define LIBMAD_STREAM_H
+ 
+ # define MAD_BUFFER_GUARD	8
+ # define MAD_BUFFER_MDLEN	(511 + 2048 + MAD_BUFFER_GUARD)
+ 
+ enum mad_error {
+   MAD_ERROR_NONE	   = 0x0000,	/* no error */
+ 
+   MAD_ERROR_BUFLEN	   = 0x0001,	/* input buffer too small (or EOF) */
+   MAD_ERROR_BUFPTR	   = 0x0002,	/* invalid (null) buffer pointer */
+ 
+   MAD_ERROR_NOMEM	   = 0x0031,	/* not enough memory */
+ 
+   MAD_ERROR_LOSTSYNC	   = 0x0101,	/* lost synchronization */
+   MAD_ERROR_BADLAYER	   = 0x0102,	/* reserved header layer value */
+   MAD_ERROR_BADBITRATE	   = 0x0103,	/* forbidden bitrate value */
+   MAD_ERROR_BADSAMPLERATE  = 0x0104,	/* reserved sample frequency value */
+   MAD_ERROR_BADEMPHASIS	   = 0x0105,	/* reserved emphasis value */
+ 
+   MAD_ERROR_BADCRC	   = 0x0201,	/* CRC check failed */
+   MAD_ERROR_BADBITALLOC	   = 0x0211,	/* forbidden bit allocation value */
+   MAD_ERROR_BADSCALEFACTOR = 0x0221,	/* bad scalefactor index */
+   MAD_ERROR_BADFRAMELEN	   = 0x0231,	/* bad frame length */
+   MAD_ERROR_BADBIGVALUES   = 0x0232,	/* bad big_values count */
+   MAD_ERROR_BADBLOCKTYPE   = 0x0233,	/* reserved block_type */
+   MAD_ERROR_BADSCFSI	   = 0x0234,	/* bad scalefactor selection info */
+   MAD_ERROR_BADDATAPTR	   = 0x0235,	/* bad main_data_begin pointer */
+   MAD_ERROR_BADPART3LEN	   = 0x0236,	/* bad audio data length */
+   MAD_ERROR_BADHUFFTABLE   = 0x0237,	/* bad Huffman table select */
+   MAD_ERROR_BADHUFFDATA	   = 0x0238,	/* Huffman data overrun */
+   MAD_ERROR_BADSTEREO	   = 0x0239	/* incompatible block_type for JS */
+ };
+ 
+ # define MAD_RECOVERABLE(error)	((error) & 0xff00)
+ 
+ struct mad_stream {
+   unsigned char const *buffer;		/* input bitstream buffer */
+   unsigned char const *bufend;		/* end of buffer */
+   unsigned long skiplen;		/* bytes to skip before next frame */
+ 
+   int sync;				/* stream sync found */
+   unsigned long freerate;		/* free bitrate (fixed) */
+ 
+   unsigned char const *this_frame;	/* start of current frame */
+   unsigned char const *next_frame;	/* start of next frame */
+   struct mad_bitptr ptr;		/* current processing bit pointer */
+ 
+   struct mad_bitptr anc_ptr;		/* ancillary bits pointer */
+   unsigned int anc_bitlen;		/* number of ancillary bits */
+ 
+   unsigned char (*main_data)[MAD_BUFFER_MDLEN];
+ 					/* Layer III main_data() */
+   unsigned int md_len;			/* bytes in main_data */
+ 
+   int options;				/* decoding options (see below) */
+   enum mad_error error;			/* error code (see above) */
+ };
+ 
+ enum {
+   MAD_OPTION_IGNORECRC      = 0x0001,	/* ignore CRC errors */
+   MAD_OPTION_HALFSAMPLERATE = 0x0002	/* generate PCM at 1/2 sample rate */
+ # if 0  /* not yet implemented */
+   MAD_OPTION_LEFTCHANNEL    = 0x0010,	/* decode left channel only */
+   MAD_OPTION_RIGHTCHANNEL   = 0x0020,	/* decode right channel only */
+   MAD_OPTION_SINGLECHANNEL  = 0x0030	/* combine channels */
+ # endif
+ };
+ 
+ void mad_stream_init(struct mad_stream *);
+ void mad_stream_finish(struct mad_stream *);
+ 
+ # define mad_stream_options(stream, opts)  \
+     ((void) ((stream)->options = (opts)))
+ 
+ void mad_stream_buffer(struct mad_stream *,
+ 		       unsigned char const *, unsigned long);
+ void mad_stream_skip(struct mad_stream *, unsigned long);
+ 
+ int mad_stream_sync(struct mad_stream *);
+ 
+ char const *mad_stream_errorstr(struct mad_stream const *);
+ 
+ # endif
+ 
+ /* Id: frame.h,v 1.16 2001/10/17 19:13:41 rob Exp */
+ 
+ # ifndef LIBMAD_FRAME_H
+ # define LIBMAD_FRAME_H
+ 
+ enum mad_layer {
+   MAD_LAYER_I   = 1,			/* Layer I */
+   MAD_LAYER_II  = 2,			/* Layer II */
+   MAD_LAYER_III = 3			/* Layer III */
+ };
+ 
+ enum mad_mode {
+   MAD_MODE_SINGLE_CHANNEL = 0,		/* single channel */
+   MAD_MODE_DUAL_CHANNEL	  = 1,		/* dual channel */
+   MAD_MODE_JOINT_STEREO	  = 2,		/* joint (MS/intensity) stereo */
+   MAD_MODE_STEREO	  = 3		/* normal LR stereo */
+ };
+ 
+ enum mad_emphasis {
+   MAD_EMPHASIS_NONE	  = 0,		/* no emphasis */
+   MAD_EMPHASIS_50_15_US	  = 1,		/* 50/15 microseconds emphasis */
+   MAD_EMPHASIS_CCITT_J_17 = 3		/* CCITT J.17 emphasis */
+ };
+ 
+ struct mad_header {
+   enum mad_layer layer;			/* audio layer (1, 2, or 3) */
+   enum mad_mode mode;			/* channel mode (see above) */
+   int mode_extension;			/* additional mode info */
+   enum mad_emphasis emphasis;		/* de-emphasis to use (see above) */
+ 
+   unsigned long bitrate;		/* stream bitrate (bps) */
+   unsigned int samplerate;		/* sampling frequency (Hz) */
+ 
+   unsigned short crc_check;		/* frame CRC accumulator */
+   unsigned short crc_target;		/* final target CRC checksum */
+ 
+   int flags;				/* flags (see below) */
+   int private_bits;			/* private bits (see below) */
+ 
+   mad_timer_t duration;			/* audio playing time of frame */
+ };
+ 
+ struct mad_frame {
+   struct mad_header header;		/* MPEG audio header */
+ 
+   int options;				/* decoding options (from stream) */
+ 
+   mad_fixed_t sbsample[2][36][32];	/* synthesis subband filter samples */
+   mad_fixed_t (*overlap)[2][32][18];	/* Layer III block overlap data */
+ };
+ 
+ # define MAD_NCHANNELS(header)		((header)->mode ? 2 : 1)
+ # define MAD_NSBSAMPLES(header)  \
+   ((header)->layer == MAD_LAYER_I ? 12 :  \
+    (((header)->layer == MAD_LAYER_III &&  \
+      ((header)->flags & MAD_FLAG_LSF_EXT)) ? 18 : 36))
+ 
+ enum {
+   MAD_FLAG_NPRIVATE_III	= 0x0007,	/* number of Layer III private bits */
+   MAD_FLAG_INCOMPLETE	= 0x0008,	/* header but not data is decoded */
+ 
+   MAD_FLAG_PROTECTION	= 0x0010,	/* frame has CRC protection */
+   MAD_FLAG_COPYRIGHT	= 0x0020,	/* frame is copyright */
+   MAD_FLAG_ORIGINAL	= 0x0040,	/* frame is original (else copy) */
+   MAD_FLAG_PADDING	= 0x0080,	/* frame has additional slot */
+ 
+   MAD_FLAG_I_STEREO	= 0x0100,	/* uses intensity joint stereo */
+   MAD_FLAG_MS_STEREO	= 0x0200,	/* uses middle/side joint stereo */
+   MAD_FLAG_FREEFORMAT	= 0x0400,	/* uses free format bitrate */
+ 
+   MAD_FLAG_LSF_EXT	= 0x1000,	/* lower sampling freq. extension */
+   MAD_FLAG_MC_EXT	= 0x2000,	/* multichannel audio extension */
+   MAD_FLAG_MPEG_2_5_EXT	= 0x4000	/* MPEG 2.5 (unofficial) extension */
+ };
+ 
+ enum {
+   MAD_PRIVATE_HEADER	= 0x0100,	/* header private bit */
+   MAD_PRIVATE_III	= 0x001f	/* Layer III private bits (up to 5) */
+ };
+ 
+ void mad_header_init(struct mad_header *);
+ 
+ # define mad_header_finish(header)  /* nothing */
+ 
+ int mad_header_decode(struct mad_header *, struct mad_stream *);
+ 
+ void mad_frame_init(struct mad_frame *);
+ void mad_frame_finish(struct mad_frame *);
+ 
+ int mad_frame_decode(struct mad_frame *, struct mad_stream *);
+ 
+ void mad_frame_mute(struct mad_frame *);
+ 
+ # endif
+ 
+ /* Id: synth.h,v 1.11 2001/11/08 23:28:03 rob Exp */
+ 
+ # ifndef LIBMAD_SYNTH_H
+ # define LIBMAD_SYNTH_H
+ 
+ struct mad_pcm {
+   unsigned int samplerate;		/* sampling frequency (Hz) */
+   unsigned short channels;		/* number of channels */
+   unsigned short length;		/* number of samples per channel */
+   mad_fixed_t samples[2][1152];		/* PCM output samples [ch][sample] */
+ };
+ 
+ struct mad_synth {
+   mad_fixed_t filter[2][2][2][16][8];	/* polyphase filterbank outputs */
+   					/* [ch][eo][peo][s][v] */
+ 
+   unsigned int phase;			/* current processing phase */
+ 
+   struct mad_pcm pcm;			/* PCM output */
+ };
+ 
+ /* single channel PCM selector */
+ enum {
+   MAD_PCM_CHANNEL_SINGLE = 0
+ };
+ 
+ /* dual channel PCM selector */
+ enum {
+   MAD_PCM_CHANNEL_DUAL_1 = 0,
+   MAD_PCM_CHANNEL_DUAL_2 = 1
+ };
+ 
+ /* stereo PCM selector */
+ enum {
+   MAD_PCM_CHANNEL_STEREO_LEFT  = 0,
+   MAD_PCM_CHANNEL_STEREO_RIGHT = 1
+ };
+ 
+ void mad_synth_init(struct mad_synth *);
+ 
+ # define mad_synth_finish(synth)  /* nothing */
+ 
+ void mad_synth_mute(struct mad_synth *);
+ 
+ void mad_synth_frame(struct mad_synth *, struct mad_frame const *);
+ 
+ # endif
+ 
+ /* Id: decoder.h,v 1.13 2001/11/03 03:57:11 rob Exp */
+ 
+ # ifndef LIBMAD_DECODER_H
+ # define LIBMAD_DECODER_H
+ 
+ enum mad_decoder_mode {
+   MAD_DECODER_MODE_SYNC  = 0,
+   MAD_DECODER_MODE_ASYNC
+ };
+ 
+ enum mad_flow {
+   MAD_FLOW_CONTINUE = 0x0000,	/* continue normally */
+   MAD_FLOW_STOP     = 0x0010,	/* stop decoding normally */
+   MAD_FLOW_BREAK    = 0x0011,	/* stop decoding and signal an error */
+   MAD_FLOW_IGNORE   = 0x0020	/* ignore the current frame */
+ };
+ 
+ struct mad_decoder {
+   enum mad_decoder_mode mode;
+ 
+   int options;
+ 
+   struct {
+     long pid;
+     int in;
+     int out;
+   } async;
+ 
+   struct {
+     struct mad_stream stream;
+     struct mad_frame frame;
+     struct mad_synth synth;
+   } *sync;
+ 
+   void *cb_data;
+ 
+   enum mad_flow (*input_func)(void *, struct mad_stream *);
+   enum mad_flow (*header_func)(void *, struct mad_header const *);
+   enum mad_flow (*filter_func)(void *,
+ 			       struct mad_stream const *, struct mad_frame *);
+   enum mad_flow (*output_func)(void *,
+ 			       struct mad_header const *, struct mad_pcm *);
+   enum mad_flow (*error_func)(void *, struct mad_stream *, struct mad_frame *);
+   enum mad_flow (*message_func)(void *, void *, unsigned int *);
+ };
+ 
+ void mad_decoder_init(struct mad_decoder *, void *,
+ 		      enum mad_flow (*)(void *, struct mad_stream *),
+ 		      enum mad_flow (*)(void *, struct mad_header const *),
+ 		      enum mad_flow (*)(void *,
+ 					struct mad_stream const *,
+ 					struct mad_frame *),
+ 		      enum mad_flow (*)(void *,
+ 					struct mad_header const *,
+ 					struct mad_pcm *),
+ 		      enum mad_flow (*)(void *,
+ 					struct mad_stream *,
+ 					struct mad_frame *),
+ 		      enum mad_flow (*)(void *, void *, unsigned int *));
+ int mad_decoder_finish(struct mad_decoder *);
+ 
+ # define mad_decoder_options(decoder, opts)  \
+     ((void) ((decoder)->options = (opts)))
+ 
+ int mad_decoder_run(struct mad_decoder *, enum mad_decoder_mode);
+ int mad_decoder_message(struct mad_decoder *, void *, unsigned int *);
+ 
+ # endif
+ 
+ # ifdef __cplusplus
+ }
+ # endif

Index: llvm/examples/SIMD/MADFilter/mad_filter.altivec.handwritten.c
diff -c /dev/null llvm/examples/SIMD/MADFilter/mad_filter.altivec.handwritten.c:1.1.2.1
*** /dev/null	Sun Oct 23 17:50:18 2005
--- llvm/examples/SIMD/MADFilter/mad_filter.altivec.handwritten.c	Sun Oct 23 17:49:40 2005
***************
*** 0 ****
--- 1,15 ----
+ void experimental_filter_vector(short *left_ch, short *right_ch, unsigned n) {
+   vector signed short *left_vp = (vector signed short*) left_ch;
+   vector signed short *right_vp = (vector signed short*) right_ch;
+   unsigned i;
+   vector unsigned short two = (vector unsigned short) (2);
+ 
+   for (i = 0; i < n/8; ++i) {
+     vector signed short left = left_vp[i];
+     vector signed short right = right_vp[i];
+     vector signed short left_sub = vec_sub(left, vec_sra(right, two));
+     vector signed short right_sub = vec_sub(right, vec_sra(left, two));
+     right_vp[i] = right_sub;
+     left_vp[i] = left_sub;
+   }
+ }

Index: llvm/examples/SIMD/MADFilter/mad_filter.sse.handwritten.c
diff -c /dev/null llvm/examples/SIMD/MADFilter/mad_filter.sse.handwritten.c:1.1.2.1
*** /dev/null	Sun Oct 23 17:50:18 2005
--- llvm/examples/SIMD/MADFilter/mad_filter.sse.handwritten.c	Sun Oct 23 17:49:40 2005
***************
*** 0 ****
--- 1,16 ----
+ #include "SSE.h"
+ 
+ void experimental_filter_vector(short *left_ch, short *right_ch, unsigned n) {
+   unsigned int i, j;
+   __m128i *left_vp = (__m128i*) left_ch;
+   __m128i *right_vp = (__m128i*) right_ch;
+ 
+   for (i = 0; i < n/8; ++i) {
+     __m128i left  = left_vp[i];
+     __m128i right = right_vp[i];
+     __m128i left_sub = _mm_sub_epi16(left, _mm_srai_epi16(right, 2));
+     __m128i right_sub = _mm_sub_epi16(right, _mm_srai_epi16(left, 2));
+     right_vp[i] = right_sub;
+     left_vp[i] = left_sub;
+   }
+ }

Index: llvm/examples/SIMD/MADFilter/mad_filter.vectorc.c
diff -c /dev/null llvm/examples/SIMD/MADFilter/mad_filter.vectorc.c:1.1.2.1
*** /dev/null	Sun Oct 23 17:50:18 2005
--- llvm/examples/SIMD/MADFilter/mad_filter.vectorc.c	Sun Oct 23 17:49:40 2005
***************
*** 0 ****
--- 1,13 ----
+ #include "VectorC.h"
+ 
+ void experimental_filter_vector(short *left_ch, short *right_ch, unsigned n) {
+   unsigned i;
+   for (i = 0; i < n/8; ++i) {
+     short left = vllvm_load_short(left_ch, 8, i);
+     short right = vllvm_load_short(right_ch, 8, i);
+     short left_sub = left - (right >> 2);
+     short right_sub = right - (left >> 2);
+     vllvm_store_short(right_sub, right_ch, i);
+     vllvm_store_short(left_sub, left_ch, i);
+   }
+ }

Index: llvm/examples/SIMD/MADFilter/main.c
diff -c /dev/null llvm/examples/SIMD/MADFilter/main.c:1.1.2.1
*** /dev/null	Sun Oct 23 17:50:18 2005
--- llvm/examples/SIMD/MADFilter/main.c	Sun Oct 23 17:49:41 2005
***************
*** 0 ****
--- 1,129 ----
+ /*
+  * This program adapted from
+  * mad - MPEG audio decoder
+  * Copyright (C) 2000-2001 Robert Leslie
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+  * the Free Software Foundation; either version 2 of the License, or
+  * (at your option) any later version.
+  *
+  * This program is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  * GNU General Public License for more details.
+  *
+  * You should have received a copy of the GNU General Public License
+  * along with this program; if not, write to the Free Software
+  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+  *
+  * $Id: main.c,v 1.1.2.1 2005/10/23 22:49:41 bocchino Exp $
+  */
+ 
+ # include <stdlib.h>
+ # include <stdio.h>
+ 
+ # include "mad.h"
+ #include "../_malloc.h"
+ #include <sys/time.h>
+ #include <sys/times.h>
+ 
+ void experimental_filter_scalar(short*, short*, unsigned);
+ void experimental_filter_vector(short*, short*, unsigned);
+ void init_channel(short*, unsigned);
+ int compare_channels(short *ch1, short *ch2, unsigned);
+ 
+ short *scalar_left, *scalar_right;
+ short *vector_left, *vector_right;
+ 
+ #define CHANNEL_SIZE 1152
+ #define ARRAY_SIZE CHANNEL_SIZE*sizeof(short)
+ 
+ void run(long *scalar_time, long *vector_time) {
+  struct tms buf_s, buf_e;
+   unsigned i;
+   
+   scalar_left = _malloc(ARRAY_SIZE);
+   scalar_right = _malloc(ARRAY_SIZE);
+   vector_left = _malloc(ARRAY_SIZE);
+   vector_right = _malloc(ARRAY_SIZE);
+   
+   init_channel(scalar_left, CHANNEL_SIZE);
+   init_channel(scalar_right, CHANNEL_SIZE);
+ 
+   times(&buf_s);
+   for (i = 0; i < 1000000; ++i)
+     experimental_filter_scalar(scalar_left, scalar_right, CHANNEL_SIZE);
+   times(&buf_e);
+   *scalar_time = buf_e.tms_utime - buf_s.tms_utime;
+   printf("scalar time=%d, ", *scalar_time);
+   
+   init_channel(vector_left, CHANNEL_SIZE);
+   init_channel(vector_right, CHANNEL_SIZE);
+ 
+   times(&buf_s);
+   for (i = 0; i < 1000000; ++i)
+     experimental_filter_vector(vector_left, vector_right, CHANNEL_SIZE);
+   times(&buf_e);
+   *vector_time = buf_e.tms_utime - buf_s.tms_utime;
+   printf("vector time=%d, ", *vector_time);
+  
+   float speedup = (float) *scalar_time / *vector_time;
+   printf("speedup=%f\n", speedup);
+ 
+   if (!compare_channels(scalar_left, vector_left, CHANNEL_SIZE) ||
+       !compare_channels(scalar_right, vector_right, CHANNEL_SIZE)) {
+     printf("FAILED\n");
+     exit(1);
+   }
+ 
+ }
+ 
+ int
+ main (void) {
+   unsigned i;
+ 
+   long best_scalar = -1, best_vector = -1;
+   long scalar, vector;
+   for (i = 0; i < NRUNS; ++i) {
+     run (&scalar, &vector);
+     if (best_scalar < 0 || best_scalar > scalar)
+       best_scalar = scalar;
+     if (best_vector < 0 || best_vector > vector)
+       best_vector = vector;
+   }
+ 
+   printf("best scalar=%d, ", best_scalar);
+   printf("best vector=%d, ", best_vector);
+   printf("speedup=%f\n", ((float) best_scalar)/best_vector);
+   printf ("PASSED\n");
+   return 0;
+ }
+ 
+ void init_channel(short *channel, unsigned n) {
+   unsigned i, j;
+   for (i = 0; i < n; ++i)
+     channel[i] = i;
+ }
+ 
+ int compare_channels(short *ch1, short *ch2, unsigned n) {
+   unsigned i, j;
+   for (i = 0; i < n; ++i)
+     if(ch1[i] != ch2[i])
+       return 0;
+   return 1;
+ }
+ 
+ void experimental_filter_scalar(short *left_ch, short *right_ch, unsigned n) {
+   unsigned int i, j;
+ 
+   for (i = 0; i < n; ++i) {
+     short left, right;
+     
+     left  = left_ch[i];
+     right = right_ch[i];
+     right_ch[i] -= left >> 2;
+     left_ch[i] -= right  >> 2;
+   }
+ }
+