[llvm-commits] CVS: llvm-test/MultiSource/Applications/JM/lencod/.cvsignore Makefile annexb.c annexb.h biariencode.c biariencode.h block.c block.h cabac.c cabac.h configfile.c configfile.h context_ini.c context_ini.h contributors.h ctx_tables.h decoder.c defines.h elements.h explicit_gop.c explicit_gop.h filehandle.c fmo.c fmo.h global.h header.c header.h ifunctions.h image.c image.h img_chroma.c img_chroma.h img_luma.c img_luma.h intrarefresh.c intrarefresh.h leaky_bucket.c leaky_bucket.h lencod.c loopFilter.c macroblock.c macroblock.h mb_access.c mb_access.h mbuffer.c mbuffer.h md_high.c md_highfast.c md_highloss.c md_low.c me_distortion.c me_distortion.h me_epzs.c me_epzs.h me_fullfast.c me_fullfast.h me_fullsearch.c me_fullsearch.h me_umhex.c me_umhex.h me_umhexsmp.c me_umhexsmp.h memalloc.c memalloc.h minmax.h mode_decision.c mode_decision.h mv-search.c mv-search.h nal.c nalu.c nalu.h nalucommon.c nalucommon.h output.c output.h parset.c parset.h parsetcommon.c parsetcommon.h! q_matrix.c q_matrix.h q_offsets.c q_offsets.h ratectl.c ratectl.h rc_quadratic.c rc_quadratic.h rdopt.c rdopt_coding_state.c rdopt_coding_state.h rdpicdecision.c refbuf.c refbuf.h rtp.c rtp.h sei.c sei.h slice.c symbol.c symbol.h transform8x8.c transform8x8.h vlc.c vlc.h weighted_prediction.c win32.h

Anton Korobeynikov asl at math.spbu.ru
Sun Feb 4 06:38:55 PST 2007



Changes in directory llvm-test/MultiSource/Applications/JM/lencod:

.cvsignore updated: 1.3 -> 1.4
Makefile updated: 1.2 -> 1.3
annexb.c updated: 1.2 -> 1.3
annexb.h updated: 1.2 -> 1.3
biariencode.c updated: 1.2 -> 1.3
biariencode.h updated: 1.2 -> 1.3
block.c updated: 1.2 -> 1.3
block.h updated: 1.2 -> 1.3
cabac.c updated: 1.2 -> 1.3
cabac.h updated: 1.2 -> 1.3
configfile.c updated: 1.2 -> 1.3
configfile.h updated: 1.2 -> 1.3
context_ini.c updated: 1.2 -> 1.3
context_ini.h updated: 1.2 -> 1.3
contributors.h updated: 1.2 -> 1.3
ctx_tables.h updated: 1.2 -> 1.3
decoder.c updated: 1.2 -> 1.3
defines.h updated: 1.2 -> 1.3
elements.h updated: 1.2 -> 1.3
explicit_gop.c updated: 1.2 -> 1.3
explicit_gop.h updated: 1.2 -> 1.3
filehandle.c updated: 1.2 -> 1.3
fmo.c updated: 1.2 -> 1.3
fmo.h updated: 1.2 -> 1.3
global.h updated: 1.2 -> 1.3
header.c updated: 1.2 -> 1.3
header.h updated: 1.2 -> 1.3
ifunctions.h added (r1.1)
image.c updated: 1.2 -> 1.3
image.h updated: 1.2 -> 1.3
img_chroma.c added (r1.1)
img_chroma.h added (r1.1)
img_luma.c added (r1.1)
img_luma.h added (r1.1)
intrarefresh.c updated: 1.2 -> 1.3
intrarefresh.h updated: 1.2 -> 1.3
leaky_bucket.c updated: 1.2 -> 1.3
leaky_bucket.h updated: 1.2 -> 1.3
lencod.c updated: 1.2 -> 1.3
loopFilter.c updated: 1.2 -> 1.3
macroblock.c updated: 1.2 -> 1.3
macroblock.h updated: 1.2 -> 1.3
mb_access.c updated: 1.2 -> 1.3
mb_access.h updated: 1.2 -> 1.3
mbuffer.c updated: 1.2 -> 1.3
mbuffer.h updated: 1.2 -> 1.3
md_high.c added (r1.1)
md_highfast.c added (r1.1)
md_highloss.c added (r1.1)
md_low.c added (r1.1)
me_distortion.c added (r1.1)
me_distortion.h added (r1.1)
me_epzs.c added (r1.1)
me_epzs.h added (r1.1)
me_fullfast.c added (r1.1)
me_fullfast.h added (r1.1)
me_fullsearch.c added (r1.1)
me_fullsearch.h added (r1.1)
me_umhex.c added (r1.1)
me_umhex.h added (r1.1)
me_umhexsmp.c added (r1.1)
me_umhexsmp.h added (r1.1)
memalloc.c updated: 1.2 -> 1.3
memalloc.h updated: 1.2 -> 1.3
minmax.h updated: 1.2 -> 1.3
mode_decision.c updated: 1.2 -> 1.3
mode_decision.h updated: 1.2 -> 1.3
mv-search.c updated: 1.2 -> 1.3
mv-search.h updated: 1.2 -> 1.3
nal.c updated: 1.2 -> 1.3
nalu.c updated: 1.2 -> 1.3
nalu.h updated: 1.2 -> 1.3
nalucommon.c updated: 1.2 -> 1.3
nalucommon.h updated: 1.2 -> 1.3
output.c updated: 1.2 -> 1.3
output.h updated: 1.2 -> 1.3
parset.c updated: 1.2 -> 1.3
parset.h updated: 1.2 -> 1.3
parsetcommon.c updated: 1.2 -> 1.3
parsetcommon.h updated: 1.2 -> 1.3
q_matrix.c updated: 1.3 -> 1.4
q_matrix.h updated: 1.2 -> 1.3
q_offsets.c updated: 1.3 -> 1.4
q_offsets.h updated: 1.2 -> 1.3
ratectl.c updated: 1.2 -> 1.3
ratectl.h updated: 1.2 -> 1.3
rc_quadratic.c added (r1.1)
rc_quadratic.h added (r1.1)
rdopt.c updated: 1.2 -> 1.3
rdopt_coding_state.c updated: 1.2 -> 1.3
rdopt_coding_state.h updated: 1.2 -> 1.3
rdpicdecision.c updated: 1.2 -> 1.3
refbuf.c updated: 1.2 -> 1.3
refbuf.h updated: 1.2 -> 1.3
rtp.c updated: 1.2 -> 1.3
rtp.h updated: 1.2 -> 1.3
sei.c updated: 1.2 -> 1.3
sei.h updated: 1.2 -> 1.3
slice.c updated: 1.2 -> 1.3
symbol.c added (r1.1)
symbol.h added (r1.1)
transform8x8.c updated: 1.2 -> 1.3
transform8x8.h updated: 1.2 -> 1.3
vlc.c updated: 1.2 -> 1.3
vlc.h updated: 1.2 -> 1.3
weighted_prediction.c updated: 1.2 -> 1.3
win32.h added (r1.1)
---
Log message:

Updated JM to version 12.1. Also fixed input files. This 
(finally!) closes PR963: http://llvm.org/PR963 


---
Diffs of the changes:  (+63255 -0)

 .cvsignore            |    7 
 Makefile              |    9 
 annexb.c              |  114 +
 annexb.h              |   25 
 biariencode.c         |  339 +++
 biariencode.h         |  138 +
 block.c               | 3071 ++++++++++++++++++++++++++++++++++++
 block.h               |  173 ++
 cabac.c               | 1583 ++++++++++++++++++
 cabac.h               |   62 
 configfile.c          | 1241 ++++++++++++++
 configfile.h          |  324 +++
 context_ini.c         |  365 ++++
 context_ini.h         |   32 
 contributors.h        |  194 ++
 ctx_tables.h          |  729 ++++++++
 decoder.c             |  647 +++++++
 defines.h             |  201 ++
 elements.h            |   89 +
 explicit_gop.c        |  475 +++++
 explicit_gop.h        |   25 
 filehandle.c          |  190 ++
 fmo.c                 |  730 ++++++++
 fmo.h                 |   39 
 global.h              | 1483 +++++++++++++++++
 header.c              |  559 ++++++
 header.h              |   21 
 ifunctions.h          |  125 +
 image.c               | 2875 +++++++++++++++++++++++++++++++++
 image.h               |   34 
 img_chroma.c          |  127 +
 img_chroma.h          |   23 
 img_luma.c            |  557 ++++++
 img_luma.h            |   29 
 intrarefresh.c        |  136 +
 intrarefresh.h        |   26 
 leaky_bucket.c        |  292 +++
 leaky_bucket.h        |   29 
 lencod.c              | 2751 ++++++++++++++++++++++++++++++++
 loopFilter.c          |  480 +++++
 macroblock.c          | 4262 ++++++++++++++++++++++++++++++++++++++++++++++++++
 macroblock.h          |   58 
 mb_access.c           |  653 +++++++
 mb_access.h           |   33 
 mbuffer.c             | 3848 +++++++++++++++++++++++++++++++++++++++++++++
 mbuffer.h             |  194 ++
 md_high.c             |  459 +++++
 md_highfast.c         |  614 +++++++
 md_highloss.c         |  466 +++++
 md_low.c              |  620 +++++++
 me_distortion.c       | 1331 +++++++++++++++
 me_distortion.h       |   65 
 me_epzs.c             | 3123 ++++++++++++++++++++++++++++++++++++
 me_epzs.h             |   90 +
 me_fullfast.c         |  901 ++++++++++
 me_fullfast.h         |   29 
 me_fullsearch.c       |  750 ++++++++
 me_fullsearch.h       |   37 
 me_umhex.c            | 1550 ++++++++++++++++++
 me_umhex.h            |  191 ++
 me_umhexsmp.c         | 1251 ++++++++++++++
 me_umhexsmp.h         |  123 +
 memalloc.c            | 1145 +++++++++++++
 memalloc.h            |   80 
 minmax.h              |   17 
 mode_decision.c       | 1008 +++++++++++
 mode_decision.h       |   91 +
 mv-search.c           | 1854 +++++++++++++++++++++
 mv-search.h           |   77 
 nal.c                 |  147 +
 nalu.c                |   78 
 nalu.h                |   28 
 nalucommon.c          |   73 
 nalucommon.h          |   62 
 output.c              |  498 +++++
 output.h              |   28 
 parset.c              | 1048 ++++++++++++
 parset.h              |   48 
 parsetcommon.c        |  100 +
 parsetcommon.h        |  198 ++
 q_matrix.c            |  654 +++++++
 q_matrix.h            |   43 
 q_offsets.c           |  544 ++++++
 q_offsets.h           |   33 
 ratectl.c             |  247 ++
 ratectl.h             |   80 
 rc_quadratic.c        | 2355 +++++++++++++++++++++++++++
 rc_quadratic.h        |  164 +
 rdopt.c               | 3163 +++++++++++++++++++++++++++++++++++++
 rdopt_coding_state.c  |  195 ++
 rdopt_coding_state.h  |   52 
 rdpicdecision.c       |   64 
 refbuf.c              |   75 
 refbuf.h              |   23 
 rtp.c                 |  629 +++++++
 rtp.h                 |   72 
 sei.c                 | 1642 +++++++++++++++++++
 sei.h                 |  325 +++
 slice.c               | 1344 +++++++++++++++
 symbol.c              |   29 
 symbol.h              |   26 
 transform8x8.c        | 1579 ++++++++++++++++++
 transform8x8.h        |   32 
 vlc.c                 | 1445 ++++++++++++++++
 vlc.h                 |   57 
 weighted_prediction.c |  737 ++++++++
 win32.h               |   69 
 107 files changed, 63255 insertions(+)


Index: llvm-test/MultiSource/Applications/JM/lencod/.cvsignore
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/.cvsignore:1.4
--- /dev/null	Sun Feb  4 08:38:42 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/.cvsignore	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,7 @@
+Output
+data.txt
+leakybucketparam.cfg
+log.dat
+stats.dat
+test.264
+test_rec.yuv


Index: llvm-test/MultiSource/Applications/JM/lencod/Makefile
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/Makefile:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/Makefile	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,9 @@
+LEVEL = ../../../..
+PROG     = lencod
+CPPFLAGS = -D __USE_LARGEFILE64 -D _FILE_OFFSET_BITS=64
+LDFLAGS  = -lm $(TOOLLINKOPTS)
+FP_TOLERANCE = 0.025
+
+RUN_OPTIONS = -d $(PROJ_SRC_DIR)/data/encoder.cfg -p InputFile=$(PROJ_SRC_DIR)/data/foreman_part_qcif_444.yuv
+
+include ../../../Makefile.multisrc


Index: llvm-test/MultiSource/Applications/JM/lencod/annexb.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/annexb.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/annexb.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,114 @@
+
+/*!
+ *************************************************************************************
+ * \file annexb.c
+ *
+ * \brief
+ *    Annex B Byte Stream format NAL Unit writing routines
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+ *************************************************************************************
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+
+#include "global.h"
+#include "nalucommon.h"
+
+static FILE *f = NULL;    // the output file
+
+
+/*!
+ ********************************************************************************************
+ * \brief
+ *    Writes a NALU to the Annex B Byte Stream
+ *
+ * \return
+ *    number of bits written
+ *
+ ********************************************************************************************
+*/
+int WriteAnnexbNALU (NALU_t *n)
+{
+  int BitsWritten = 0;
+
+  assert (n != NULL);
+  assert (n->forbidden_bit == 0);
+  assert (f != NULL);
+  assert (n->startcodeprefix_len == 3 || n->startcodeprefix_len == 4);
+
+// printf ("WriteAnnexbNALU: writing %d bytes w/ startcode_len %d\n", n->len+1, n->startcodeprefix_len);
+  if (n->startcodeprefix_len > 3)
+  {
+    putc (0, f);
+    BitsWritten =+ 8;
+  }
+  putc (0, f);
+  putc (0, f);
+  putc (1, f);
+  BitsWritten += 24;
+
+  n->buf[0] = (unsigned char) ((n->forbidden_bit << 7) | (n->nal_reference_idc << 5) | n->nal_unit_type);
+
+// printf ("First Byte %x, nal_ref_idc %x, nal_unit_type %d\n", n->buf[0], n->nal_reference_idc, n->nal_unit_type);
+
+  if (n->len != fwrite (n->buf, 1, n->len, f))
+  {
+    printf ("Fatal: cannot write %d bytes to bitstream file, exit (-1)\n", n->len);
+    exit (-1);
+  }
+  BitsWritten += n->len * 8;
+
+  fflush (f);
+#if TRACE
+  fprintf (p_trace, "\n\nAnnex B NALU w/ %s startcode, len %d, forbidden_bit %d, nal_reference_idc %d, nal_unit_type %d\n\n",
+    n->startcodeprefix_len == 4?"long":"short", n->len, n->forbidden_bit, n->nal_reference_idc, n->nal_unit_type);
+  fflush (p_trace);
+#endif
+  return BitsWritten;
+}
+
+
+/*!
+ ********************************************************************************************
+ * \brief
+ *    Opens the output file for the bytestream
+ *
+ * \param Filename
+ *    The filename of the file to be opened
+ *
+ * \return
+ *    none.  Function terminates the program in case of an error
+ *
+ ********************************************************************************************
+*/
+void OpenAnnexbFile (char *Filename)
+{
+  if ((f = fopen (Filename, "wb")) == NULL)
+  {
+    printf ("Fatal: cannot open Annex B bytestream file '%s', exit (-1)\n", Filename);
+    exit (-1);
+  }
+}
+
+
+/*!
+ ********************************************************************************************
+ * \brief
+ *    Closes the output bit stream file
+ *
+ * \return
+ *    none.  Funtion trerminates the program in case of an error
+ ********************************************************************************************
+*/
+void CloseAnnexbFile(void) {
+  if (fclose (f))
+  {
+    printf ("Fatal: cannot close Annex B bytestream file, exit (-1)\n");
+    exit (-1);
+  }
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/annexb.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/annexb.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/annexb.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,25 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ *    annexb.h
+ * \brief
+ *    Byte stream operations support
+ *    This code reflects JVT version xxx
+ *  \date 7 December 2002
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+ ***************************************************************************************
+ */
+
+#ifndef _ANNEXB_H_
+#define _ANNEXB_H_
+
+#include "nalucommon.h"
+
+int WriteAnnexbNALU (NALU_t *n);
+void CloseAnnexbFile();
+void OpenAnnexbFile (char *Filename);
+
+#endif //_ANNEXB_H_


Index: llvm-test/MultiSource/Applications/JM/lencod/biariencode.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/biariencode.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/biariencode.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,339 @@
+
+/*!
+ *************************************************************************************
+ * \file biariencode.c
+ *
+ * \brief
+ *    Routines for binary arithmetic encoding
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Detlev Marpe                    <marpe at hhi.de>
+ *    - Gabi Blaettermann               <blaetter at hhi.de>
+ *************************************************************************************
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "global.h"
+#include "biariencode.h"
+
+int binCount = 0;
+
+/*!
+ ************************************************************************
+ * Macro for writing bytes of code
+ ***********************************************************************
+ */
+
+#define put_byte() { \
+                     Ecodestrm[(*Ecodestrm_len)++] = Ebuffer; \
+                     Ebits_to_go = 8; \
+                     while (eep->C > 7) { \
+                       eep->C-=8; \
+                       eep->E++; \
+                     } \
+                    }
+
+#define put_one_bit(b) { \
+                         Ebuffer <<= 1; Ebuffer |= (b); \
+                         if (--Ebits_to_go == 0) \
+                           put_byte(); \
+                       }
+
+#define put_one_bit_plus_outstanding(b) { \
+                                          put_one_bit(b); \
+                                          while (Ebits_to_follow > 0) \
+                                          { \
+                                            Ebits_to_follow--; \
+                                            put_one_bit(!(b)); \
+                                          } \
+                                         }
+
+int pic_bin_count;
+
+void reset_pic_bin_count(void)
+{
+  pic_bin_count = 0;
+}
+
+int get_pic_bin_count(void)
+{
+  return pic_bin_count;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocates memory for the EncodingEnvironment struct
+ ************************************************************************
+ */
+EncodingEnvironmentPtr arienco_create_encoding_environment(void)
+{
+  EncodingEnvironmentPtr eep;
+
+  if ( (eep = (EncodingEnvironmentPtr) calloc(1,sizeof(EncodingEnvironment))) == NULL)
+    no_mem_exit("arienco_create_encoding_environment: eep");
+
+  return eep;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Frees memory of the EncodingEnvironment struct
+ ************************************************************************
+ */
+void arienco_delete_encoding_environment(EncodingEnvironmentPtr eep)
+{
+  if (eep == NULL)
+  {
+    snprintf(errortext, ET_SIZE, "Error freeing eep (NULL pointer)");
+    error (errortext, 200);
+  }
+  else
+    free(eep);
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Initializes the EncodingEnvironment for the arithmetic coder
+ ************************************************************************
+ */
+void arienco_start_encoding(EncodingEnvironmentPtr eep,
+                            unsigned char *code_buffer,
+                            int *code_len )
+{
+  Elow = 0;
+  Ebits_to_follow = 0;
+  Ebuffer = 0;
+  Ebits_to_go = 9; // to swallow first redundant bit
+
+  Ecodestrm = code_buffer;
+  Ecodestrm_len = code_len;
+
+  Erange = HALF-2;
+
+  eep->C = 0;
+  eep->E = 0;
+
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Returns the number of currently written bits
+ ************************************************************************
+ */
+int arienco_bits_written(EncodingEnvironmentPtr eep)
+{
+   return (8 * (*Ecodestrm_len) + Ebits_to_follow + 8  - Ebits_to_go);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Terminates the arithmetic codeword, writes stop bit and stuffing bytes (if any)
+ ************************************************************************
+ */
+void arienco_done_encoding(EncodingEnvironmentPtr eep)
+{
+  put_one_bit_plus_outstanding((unsigned char) ((Elow >> (B_BITS-1)) & 1));
+  put_one_bit((unsigned char) (Elow >> (B_BITS-2))&1);
+  put_one_bit((unsigned char) 1);
+
+  stats->bit_use_stuffingBits[img->type]+=(8-Ebits_to_go);
+
+  while (Ebits_to_go != 8)
+    put_one_bit(0);
+
+  pic_bin_count += eep->E*8 + eep->C; // no of processed bins
+}
+
+extern int cabac_encoding;
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Actually arithmetic encoding of one binary symbol by using
+ *    the probability estimate of its associated context model
+ ************************************************************************
+ */
+void biari_encode_symbol(EncodingEnvironmentPtr eep, signed short symbol, BiContextTypePtr bi_ct )
+{
+  register unsigned int range = Erange;
+  register unsigned int low = Elow;
+  unsigned int rLPS = rLPS_table_64x4[bi_ct->state][(range>>6) & 3];
+
+#if (2==TRACE)
+  if (cabac_encoding)
+    fprintf(p_trace, "%d  0x%04x  %d  %d\n", binCount++, Erange , bi_ct->state, bi_ct->MPS );
+#endif
+
+  range -= rLPS;
+  bi_ct->count += cabac_encoding;
+
+  /* covers all cases where code does not bother to shift down symbol to be
+   * either 0 or 1, e.g. in some cases for cbp, mb_Type etc the code simply
+   * masks off the bit position and passes in the resulting value */
+  symbol = (short) (symbol != 0);
+
+  if (symbol != bi_ct->MPS)
+  {
+    low += range;
+    range = rLPS;
+
+    if (!bi_ct->state)
+      bi_ct->MPS = (unsigned char) (bi_ct->MPS ^ 0x01);               // switch LPS if necessary
+    bi_ct->state = AC_next_state_LPS_64[bi_ct->state]; // next state
+  }
+  else
+    bi_ct->state = AC_next_state_MPS_64[bi_ct->state]; // next state
+
+  /* renormalisation */
+  while (range < QUARTER)
+  {
+    if (low >= HALF)
+    {
+      put_one_bit_plus_outstanding(1);
+      low -= HALF;
+    }
+    else if (low < QUARTER)
+    {
+      put_one_bit_plus_outstanding(0);
+    }
+    else
+    {
+      Ebits_to_follow++;
+      low -= QUARTER;
+    }
+    low <<= 1;
+    range <<= 1;
+  }
+  Erange = range;
+  Elow = low;
+  eep->C++;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Arithmetic encoding of one binary symbol assuming
+ *    a fixed prob. distribution with p(symbol) = 0.5
+ ************************************************************************
+ */
+void biari_encode_symbol_eq_prob(EncodingEnvironmentPtr eep, signed short symbol)
+{
+  register unsigned int low = (Elow<<1);
+
+#if (2==TRACE)
+  extern int cabac_encoding;
+  if (cabac_encoding)
+    fprintf(p_trace, "%d  0x%04x\n", binCount++, Erange );
+#endif
+
+  if (symbol != 0)
+    low += Erange;
+
+  /* renormalisation as for biari_encode_symbol;
+     note that low has already been doubled */
+  if (low >= ONE)
+  {
+    put_one_bit_plus_outstanding(1);
+    low -= ONE;
+  }
+  else
+    if (low < HALF)
+    {
+      put_one_bit_plus_outstanding(0);
+    }
+    else
+    {
+      Ebits_to_follow++;
+      low -= HALF;
+    }
+    Elow = low;
+    eep->C++;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Arithmetic encoding for last symbol before termination
+ ************************************************************************
+ */
+void biari_encode_symbol_final(EncodingEnvironmentPtr eep, signed short symbol)
+{
+  register unsigned int range = Erange-2;
+  register unsigned int low = Elow;
+
+#if (2==TRACE)
+  extern int cabac_encoding;
+  if (cabac_encoding)
+    fprintf(p_trace, "%d  0x%04x\n", binCount++, Erange);
+#endif
+
+  if (symbol) {
+    low += range;
+    range = 2;
+  }
+
+  while (range < QUARTER)
+  {
+    if (low >= HALF)
+    {
+      put_one_bit_plus_outstanding(1);
+      low -= HALF;
+    }
+    else
+      if (low < QUARTER)
+      {
+        put_one_bit_plus_outstanding(0);
+      }
+      else
+      {
+        Ebits_to_follow++;
+        low -= QUARTER;
+      }
+      low <<= 1;
+      range <<= 1;
+  }
+  Erange = range;
+  Elow = low;
+  eep->C++;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Initializes a given context with some pre-defined probability state
+ ************************************************************************
+ */
+void biari_init_context (BiContextTypePtr ctx, const int* ini)
+{
+  int pstate = iClip3 ( 1, 126, ((ini[0]* imax(0, img->currentSlice->qp)) >> 4) + ini[1]);
+
+  if ( pstate >= 64 )
+  {
+    ctx->state  = (unsigned short) (pstate - 64);
+    ctx->MPS    = 1;
+  }
+  else
+  {
+    ctx->state  = (unsigned short) (63 - pstate);
+    ctx->MPS    = 0;
+  }
+
+  ctx->count = 0;
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/biariencode.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/biariencode.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/biariencode.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,138 @@
+
+/*!
+ ***************************************************************************
+ * \file
+ *    biariencode.h
+ *
+ * \brief
+ *    Headerfile for binary arithmetic encoding routines
+ *
+ * \author
+ *    Detlev Marpe,
+ *    Gabi Blaettermann
+ *    Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved.
+ *
+ * \date
+ *    21. Oct 2000
+ **************************************************************************
+ */
+
+
+#ifndef _BIARIENCOD_H_
+#define _BIARIENCOD_H_
+
+
+/************************************************************************
+ * D e f i n i t i o n s
+ ***********************************************************************
+ */
+
+// some definitions to increase the readability of the source code
+
+#define Elow                      (eep->Elow)
+#define Erange                    (eep->Erange)
+#define Ebits_to_follow           (eep->Ebits_to_follow)
+#define Ebuffer                   (eep->Ebuffer)
+#define Ebits_to_go               (eep->Ebits_to_go)
+#define Ecodestrm                 (eep->Ecodestrm)
+#define Ecodestrm_len             (eep->Ecodestrm_len)
+#define Ecodestrm_laststartcode   (eep->Ecodestrm_laststartcode)
+#define B_BITS                    10 // Number of bits to represent the whole coding interval
+#define ONE                       (1 << B_BITS)
+#define HALF                      (1 << (B_BITS-1))
+#define QUARTER                   (1 << (B_BITS-2))
+
+// Range table for LPS
+const byte rLPS_table_64x4[64][4]=
+{
+        { 128, 176, 208, 240},
+        { 128, 167, 197, 227},
+        { 128, 158, 187, 216},
+        { 123, 150, 178, 205},
+        { 116, 142, 169, 195},
+        { 111, 135, 160, 185},
+        { 105, 128, 152, 175},
+        { 100, 122, 144, 166},
+        {  95, 116, 137, 158},
+        {  90, 110, 130, 150},
+        {  85, 104, 123, 142},
+        {  81,  99, 117, 135},
+        {  77,  94, 111, 128},
+        {  73,  89, 105, 122},
+        {  69,  85, 100, 116},
+        {  66,  80,  95, 110},
+        {  62,  76,  90, 104},
+        {  59,  72,  86,  99},
+        {  56,  69,  81,  94},
+        {  53,  65,  77,  89},
+        {  51,  62,  73,  85},
+        {  48,  59,  69,  80},
+        {  46,  56,  66,  76},
+        {  43,  53,  63,  72},
+        {  41,  50,  59,  69},
+        {  39,  48,  56,  65},
+        {  37,  45,  54,  62},
+        {  35,  43,  51,  59},
+        {  33,  41,  48,  56},
+        {  32,  39,  46,  53},
+        {  30,  37,  43,  50},
+        {  29,  35,  41,  48},
+        {  27,  33,  39,  45},
+        {  26,  31,  37,  43},
+        {  24,  30,  35,  41},
+        {  23,  28,  33,  39},
+        {  22,  27,  32,  37},
+        {  21,  26,  30,  35},
+        {  20,  24,  29,  33},
+        {  19,  23,  27,  31},
+        {  18,  22,  26,  30},
+        {  17,  21,  25,  28},
+        {  16,  20,  23,  27},
+        {  15,  19,  22,  25},
+        {  14,  18,  21,  24},
+        {  14,  17,  20,  23},
+        {  13,  16,  19,  22},
+        {  12,  15,  18,  21},
+        {  12,  14,  17,  20},
+        {  11,  14,  16,  19},
+        {  11,  13,  15,  18},
+        {  10,  12,  15,  17},
+        {  10,  12,  14,  16},
+        {   9,  11,  13,  15},
+        {   9,  11,  12,  14},
+        {   8,  10,  12,  14},
+        {   8,   9,  11,  13},
+        {   7,   9,  11,  12},
+        {   7,   9,  10,  12},
+        {   7,   8,  10,  11},
+        {   6,   8,   9,  11},
+        {   6,   7,   9,  10},
+        {   6,   7,   8,   9},
+        {   2,   2,   2,   2}
+};
+
+const unsigned short AC_next_state_MPS_64[64] =
+{
+                1,2,3,4,5,6,7,8,9,10,
+                11,12,13,14,15,16,17,18,19,20,
+                21,22,23,24,25,26,27,28,29,30,
+                31,32,33,34,35,36,37,38,39,40,
+                41,42,43,44,45,46,47,48,49,50,
+                51,52,53,54,55,56,57,58,59,60,
+                61,62,62,63
+};
+
+const unsigned short AC_next_state_LPS_64[64] =
+{
+                 0, 0, 1, 2, 2, 4, 4, 5, 6, 7,
+                 8, 9, 9,11,11,12,13,13,15,15,
+                 16,16,18,18,19,19,21,21,22,22,
+                 23,24,24,25,26,26,27,27,28,29,
+                 29,30,30,30,31,32,32,33,33,33,
+                 34,34,35,35,35,36,36,36,37,37,
+                 37,38,38,63
+};
+
+
+#endif  // BIARIENCOD_H
+


Index: llvm-test/MultiSource/Applications/JM/lencod/block.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/block.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/block.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,3071 @@
+
+/*!
+ *************************************************************************************
+ * \file block.c
+ *
+ * \brief
+ *    Process one block
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+ *    - Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+ *    - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+ *    - Jani Lainema                    <jani.lainema at nokia.com>
+ *    - Detlev Marpe                    <marpe at hhi.de>
+ *    - Thomas Wedi                     <wedi at tnt.uni-hannover.de>
+ *    - Ragip Kurceren                  <ragip.kurceren at nokia.com>
+ *    - Greg Conklin                    <gregc at real.com>
+ *************************************************************************************
+ */
+
+#include "contributors.h"
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+#include <math.h>
+
+#include "global.h"
+
+#include "image.h"
+#include "mb_access.h"
+#include "block.h"
+#include "vlc.h"
+
+
+const int quant_coef[6][4][4] = {
+  {{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243},{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243}},
+  {{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660},{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660}},
+  {{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194},{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194}},
+  {{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647},{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647}},
+  {{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355},{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355}},
+  {{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893},{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893}}
+};
+
+const int dequant_coef[6][4][4] = {
+  {{10, 13, 10, 13},{ 13, 16, 13, 16},{10, 13, 10, 13},{ 13, 16, 13, 16}},
+  {{11, 14, 11, 14},{ 14, 18, 14, 18},{11, 14, 11, 14},{ 14, 18, 14, 18}},
+  {{13, 16, 13, 16},{ 16, 20, 16, 20},{13, 16, 13, 16},{ 16, 20, 16, 20}},
+  {{14, 18, 14, 18},{ 18, 23, 18, 23},{14, 18, 14, 18},{ 18, 23, 18, 23}},
+  {{16, 20, 16, 20},{ 20, 25, 20, 25},{16, 20, 16, 20},{ 20, 25, 20, 25}},
+  {{18, 23, 18, 23},{ 23, 29, 23, 29},{18, 23, 18, 23},{ 23, 29, 23, 29}}
+};
+static const int A[4][4] = {
+  { 16, 20, 16, 20},
+  { 20, 25, 20, 25},
+  { 16, 20, 16, 20},
+  { 20, 25, 20, 25}
+};
+
+
+const byte QP_SCALE_CR[52]=
+{
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,
+  12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,
+  28,29,29,30,31,32,32,33,34,34,35,35,36,36,37,37,
+  37,38,38,38,39,39,39,39
+};
+
+// Notation for comments regarding prediction and predictors.
+// The pels of the 4x4 block are labelled a..p. The predictor pels above
+// are labelled A..H, from the left I..P, and from above left X, as follows:
+//
+//  X A B C D E F G H
+//  I a b c d
+//  J e f g h
+//  K i j k l
+//  L m n o p
+//
+
+// Predictor array index definitions
+#define P_X (PredPel[0])
+#define P_A (PredPel[1])
+#define P_B (PredPel[2])
+#define P_C (PredPel[3])
+#define P_D (PredPel[4])
+#define P_E (PredPel[5])
+#define P_F (PredPel[6])
+#define P_G (PredPel[7])
+#define P_H (PredPel[8])
+#define P_I (PredPel[9])
+#define P_J (PredPel[10])
+#define P_K (PredPel[11])
+#define P_L (PredPel[12])
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Make intra 4x4 prediction according to all 9 prediction modes.
+ *    The routine uses left and upper neighbouring points from
+ *    previous coded blocks to do this (if available). Notice that
+ *    inaccessible neighbouring points are signalled with a negative
+ *    value in the predmode array .
+ *
+ *  \par Input:
+ *     Starting point of current 4x4 block image posision
+ *
+ *  \par Output:
+ *      none
+ ************************************************************************
+ */
+void intrapred_luma(int img_x,int img_y, int *left_available, int *up_available, int *all_available)
+{
+  int i,j;
+  int s0;
+  imgpel PredPel[13];  // array of predictor pels
+  imgpel **imgY = enc_picture->imgY;  // For MB level frame/field coding tools -- set default to imgY
+  imgpel *imgYpel;
+  imgpel (*cur_pred)[16];
+
+  int ioff = (img_x & 15);
+  int joff = (img_y & 15);
+  int mb_nr=img->current_mb_nr;
+
+  PixelPos pix_a[4];
+  PixelPos pix_b, pix_c, pix_d;
+
+  int block_available_up;
+  int block_available_left;
+  int block_available_up_left;
+  int block_available_up_right;
+
+  for (i=0;i<4;i++)
+  {
+    getNeighbour(mb_nr, ioff -1 , joff +i , IS_LUMA, &pix_a[i]);
+  }
+
+  getNeighbour(mb_nr, ioff    , joff -1 , IS_LUMA, &pix_b);
+  getNeighbour(mb_nr, ioff +4 , joff -1 , IS_LUMA, &pix_c);
+  getNeighbour(mb_nr, ioff -1 , joff -1 , IS_LUMA, &pix_d);
+
+  pix_c.available = pix_c.available && !((ioff==4) && ((joff==4)||(joff==12)));
+
+  if (input->UseConstrainedIntraPred)
+  {
+    for (i=0, block_available_left=1; i<4;i++)
+      block_available_left  &= pix_a[i].available ? img->intra_block[pix_a[i].mb_addr]: 0;
+    block_available_up       = pix_b.available ? img->intra_block [pix_b.mb_addr] : 0;
+    block_available_up_right = pix_c.available ? img->intra_block [pix_c.mb_addr] : 0;
+    block_available_up_left  = pix_d.available ? img->intra_block [pix_d.mb_addr] : 0;
+  }
+  else
+  {
+    block_available_left     = pix_a[0].available;
+    block_available_up       = pix_b.available;
+    block_available_up_right = pix_c.available;
+    block_available_up_left  = pix_d.available;
+  }
+
+  *left_available = block_available_left;
+  *up_available   = block_available_up;
+  *all_available  = block_available_up && block_available_left && block_available_up_left;
+
+  i = (img_x & 15);
+  j = (img_y & 15);
+
+  // form predictor pels
+  if (block_available_up)
+  {
+    imgYpel = &imgY[pix_b.pos_y][pix_b.pos_x];
+    P_A = *(imgYpel++);
+    P_B = *(imgYpel++);
+    P_C = *(imgYpel++);
+    P_D = *(imgYpel);
+
+  }
+  else
+  {
+    P_A = P_B = P_C = P_D = img->dc_pred_value_luma;
+  }
+
+  if (block_available_up_right)
+  {
+    imgYpel = &imgY[pix_c.pos_y][pix_c.pos_x];
+    P_E = *(imgYpel++);
+    P_F = *(imgYpel++);
+    P_G = *(imgYpel++);
+    P_H = *(imgYpel);
+  }
+  else
+  {
+    P_E = P_F = P_G = P_H = P_D;
+  }
+
+  if (block_available_left)
+  {
+    P_I = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+    P_J = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+    P_K = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+    P_L = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+  }
+  else
+  {
+    P_I = P_J = P_K = P_L = img->dc_pred_value_luma;
+  }
+
+  if (block_available_up_left)
+  {
+    P_X = imgY[pix_d.pos_y][pix_d.pos_x];
+  }
+  else
+  {
+    P_X = img->dc_pred_value_luma;
+  }
+
+  for(i=0;i<9;i++)
+    img->mprr[i][0][0]=-1;
+
+  ///////////////////////////////
+  // make DC prediction
+  ///////////////////////////////
+  s0 = 0;
+  if (block_available_up && block_available_left)
+  {
+    // no edge
+    s0 = (P_A + P_B + P_C + P_D + P_I + P_J + P_K + P_L + 4) >> (BLOCK_SHIFT + 1);
+  }
+  else if (!block_available_up && block_available_left)
+  {
+    // upper edge
+    s0 = (P_I + P_J + P_K + P_L + 2) >> BLOCK_SHIFT;;
+  }
+  else if (block_available_up && !block_available_left)
+  {
+    // left edge
+    s0 = (P_A + P_B + P_C + P_D + 2) >> BLOCK_SHIFT;
+  }
+  else //if (!block_available_up && !block_available_left)
+  {
+    // top left corner, nothing to predict from
+    s0 = img->dc_pred_value_luma;
+  }
+
+  // store DC prediction
+  cur_pred = img->mprr[DC_PRED];
+  for (j=0; j < BLOCK_SIZE; j++)
+  {
+    for (i=0; i < BLOCK_SIZE; i++)
+      cur_pred[j][i] = (imgpel) s0;
+  }
+
+  ///////////////////////////////
+  // make horiz and vert prediction
+  ///////////////////////////////
+
+  //Mode vertical
+  cur_pred = img->mprr[VERT_PRED];
+  for (i=0; i < BLOCK_SIZE; i++)
+  {
+    cur_pred[0][i] =
+    cur_pred[1][i] =
+    cur_pred[2][i] =
+    cur_pred[3][i] = (imgpel) (&P_A)[i];
+  }
+  if(!block_available_up)
+    cur_pred [0][0]=-1;
+
+  //Mode horizontal
+  cur_pred = img->mprr[HOR_PRED];
+  for (i=0; i < BLOCK_SIZE; i++)
+  {
+    cur_pred[i][0]  =
+    cur_pred[i][1]  =
+    cur_pred[i][2]  =
+    cur_pred[i][3]  = (imgpel) (&P_I)[i];
+  }
+  if(!block_available_left)
+    cur_pred[0][0]=-1;
+
+  if (block_available_up)
+  {
+    // Mode DIAG_DOWN_LEFT_PRED
+    cur_pred = img->mprr[DIAG_DOWN_LEFT_PRED];
+    cur_pred[0][0] = (imgpel) ((P_A + P_C + 2*(P_B) + 2) >> 2);
+    cur_pred[0][1] =
+    cur_pred[1][0] = (imgpel) ((P_B + P_D + 2*(P_C) + 2) >> 2);
+    cur_pred[0][2] =
+    cur_pred[1][1] =
+    cur_pred[2][0] = (imgpel) ((P_C + P_E + 2*(P_D) + 2) >> 2);
+    cur_pred[0][3] =
+    cur_pred[1][2] =
+    cur_pred[2][1] =
+    cur_pred[3][0] = (imgpel) ((P_D + P_F + 2*(P_E) + 2) >> 2);
+    cur_pred[1][3] =
+    cur_pred[2][2] =
+    cur_pred[3][1] = (imgpel) ((P_E + P_G + 2*(P_F) + 2) >> 2);
+    cur_pred[2][3] =
+    cur_pred[3][2] = (imgpel) ((P_F + P_H + 2*(P_G) + 2) >> 2);
+    cur_pred[3][3] = (imgpel) ((P_G + 3*(P_H) + 2) >> 2);
+
+    // Mode VERT_LEFT_PRED
+    cur_pred = img->mprr[VERT_LEFT_PRED];
+    cur_pred[0][0] = (imgpel) ((P_A + P_B + 1) >> 1);
+    cur_pred[0][1] =
+    cur_pred[2][0] = (imgpel) ((P_B + P_C + 1) >> 1);
+    cur_pred[0][2] =
+    cur_pred[2][1] = (imgpel) ((P_C + P_D + 1) >> 1);
+    cur_pred[0][3] =
+    cur_pred[2][2] = (imgpel) ((P_D + P_E + 1) >> 1);
+    cur_pred[2][3] = (imgpel) ((P_E + P_F + 1) >> 1);
+    cur_pred[1][0] = (imgpel) ((P_A + 2*P_B + P_C + 2) >> 2);
+    cur_pred[1][1] =
+    cur_pred[3][0] = (imgpel) ((P_B + 2*P_C + P_D + 2) >> 2);
+    cur_pred[1][2] =
+    cur_pred[3][1] = (imgpel) ((P_C + 2*P_D + P_E + 2) >> 2);
+    cur_pred[1][3] =
+    cur_pred[3][2] = (imgpel) ((P_D + 2*P_E + P_F + 2) >> 2);
+    cur_pred[3][3] = (imgpel) ((P_E + 2*P_F + P_G + 2) >> 2);
+
+  }
+
+  /*  Prediction according to 'diagonal' modes */
+  if (block_available_left)
+  {
+    // Mode HOR_UP_PRED
+    cur_pred = img->mprr[HOR_UP_PRED];
+    cur_pred[0][0] = (imgpel) ((P_I + P_J + 1) >> 1);
+    cur_pred[0][1] = (imgpel) ((P_I + 2*P_J + P_K + 2) >> 2);
+    cur_pred[0][2] =
+    cur_pred[1][0] = (imgpel) ((P_J + P_K + 1) >> 1);
+    cur_pred[0][3] =
+    cur_pred[1][1] = (imgpel) ((P_J + 2*P_K + P_L + 2) >> 2);
+    cur_pred[1][2] =
+    cur_pred[2][0] = (imgpel) ((P_K + P_L + 1) >> 1);
+    cur_pred[1][3] =
+    cur_pred[2][1] = (imgpel) ((P_K + 2*P_L + P_L + 2) >> 2);
+    cur_pred[3][0] =
+    cur_pred[2][2] =
+    cur_pred[2][3] =
+    cur_pred[3][1] =
+    cur_pred[3][2] =
+    cur_pred[3][3] = (imgpel) P_L;
+  }
+
+  /*  Prediction according to 'diagonal' modes */
+  if (block_available_up && block_available_left && block_available_up_left)
+  {
+    // Mode DIAG_DOWN_RIGHT_PRED
+    cur_pred = img->mprr[DIAG_DOWN_RIGHT_PRED];
+    cur_pred[3][0] = (imgpel) ((P_L + 2*P_K + P_J + 2) >> 2);
+    cur_pred[2][0] =
+    cur_pred[3][1] = (imgpel) ((P_K + 2*P_J + P_I + 2) >> 2);
+    cur_pred[1][0] =
+    cur_pred[2][1] =
+    cur_pred[3][2] = (imgpel) ((P_J + 2*P_I + P_X + 2) >> 2);
+    cur_pred[0][0] =
+    cur_pred[1][1] =
+    cur_pred[2][2] =
+    cur_pred[3][3] = (imgpel) ((P_I + 2*P_X + P_A + 2) >> 2);
+    cur_pred[0][1] =
+    cur_pred[1][2] =
+    cur_pred[2][3] = (imgpel) ((P_X + 2*P_A + P_B + 2) >> 2);
+    cur_pred[0][2] =
+    cur_pred[1][3] = (imgpel) ((P_A + 2*P_B + P_C + 2) >> 2);
+    cur_pred[0][3] = (imgpel) ((P_B + 2*P_C + P_D + 2) >> 2);
+
+     // Mode VERT_RIGHT_PRED
+    cur_pred = img->mprr[VERT_RIGHT_PRED];
+    cur_pred[0][0] =
+    cur_pred[2][1] = (imgpel) ((P_X + P_A + 1) >> 1);
+    cur_pred[0][1] =
+    cur_pred[2][2] = (imgpel) ((P_A + P_B + 1) >> 1);
+    cur_pred[0][2] =
+    cur_pred[2][3] = (imgpel) ((P_B + P_C + 1) >> 1);
+    cur_pred[0][3] = (imgpel) ((P_C + P_D + 1) >> 1);
+    cur_pred[1][0] =
+    cur_pred[3][1] = (imgpel) ((P_I + 2*P_X + P_A + 2) >> 2);
+    cur_pred[1][1] =
+    cur_pred[3][2] = (imgpel) ((P_X + 2*P_A + P_B + 2) >> 2);
+    cur_pred[1][2] =
+    cur_pred[3][3] = (imgpel) ((P_A + 2*P_B + P_C + 2) >> 2);
+    cur_pred[1][3] = (imgpel) ((P_B + 2*P_C + P_D + 2) >> 2);
+    cur_pred[2][0] = (imgpel) ((P_X + 2*P_I + P_J + 2) >> 2);
+    cur_pred[3][0] = (imgpel) ((P_I + 2*P_J + P_K + 2) >> 2);
+
+    // Mode HOR_DOWN_PRED
+    cur_pred = img->mprr[HOR_DOWN_PRED];
+    cur_pred[0][0] =
+    cur_pred[1][2] = (imgpel) ((P_X + P_I + 1) >> 1);
+    cur_pred[0][1] =
+    cur_pred[1][3] = (imgpel) ((P_I + 2*P_X + P_A + 2) >> 2);
+    cur_pred[0][2] = (imgpel) ((P_X + 2*P_A + P_B + 2) >> 2);
+    cur_pred[0][3] = (imgpel) ((P_A + 2*P_B + P_C + 2) >> 2);
+    cur_pred[1][0] =
+    cur_pred[2][2] = (imgpel) ((P_I + P_J + 1) >> 1);
+    cur_pred[1][1] =
+    cur_pred[2][3] = (imgpel) ((P_X + 2*P_I + P_J + 2) >> 2);
+    cur_pred[2][0] =
+    cur_pred[3][2] = (imgpel) ((P_J + P_K + 1) >> 1);
+    cur_pred[2][1] =
+    cur_pred[3][3] = (imgpel) ((P_I + 2*P_J + P_K + 2) >> 2);
+    cur_pred[3][0] = (imgpel) ((P_K + P_L + 1) >> 1);
+    cur_pred[3][1] = (imgpel) ((P_J + 2*P_K + P_L + 2) >> 2);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    16x16 based luma prediction
+ *
+ * \par Input:
+ *    Image parameters
+ *
+ * \par Output:
+ *    none
+ ************************************************************************
+ */
+void intrapred_luma_16x16()
+{
+  int s0=0,s1,s2;
+  imgpel s[2][16];
+  int i,j;
+
+  int ih,iv;
+  int ib,ic,iaa;
+
+  imgpel   **imgY_pred = enc_picture->imgY;  // For Mb level field/frame coding tools -- default to frame pred
+  int          mb_nr = img->current_mb_nr;
+
+  PixelPos up;          //!< pixel position p(0,-1)
+  PixelPos left[17];    //!< pixel positions p(-1, -1..15)
+
+  int up_avail, left_avail, left_up_avail;
+
+  for (i=0;i<17;i++)
+  {
+    getNeighbour(mb_nr, -1,  i-1, IS_LUMA, &left[i]);
+  }
+
+  getNeighbour(mb_nr,    0,   -1, IS_LUMA, &up);
+
+  if (!(input->UseConstrainedIntraPred))
+  {
+    up_avail      = up.available;
+    left_avail    = left[1].available;
+    left_up_avail = left[0].available;
+  }
+  else
+  {
+    up_avail      = up.available ? img->intra_block[up.mb_addr] : 0;
+    for (i=1, left_avail=1; i<17;i++)
+      left_avail  &= left[i].available ? img->intra_block[left[i].mb_addr]: 0;
+    left_up_avail = left[0].available ? img->intra_block[left[0].mb_addr]: 0;
+  }
+
+  s1=s2=0;
+  // make DC prediction
+  if (up_avail)
+  {
+    for (i=up.pos_x; i < up.pos_x + MB_BLOCK_SIZE; i++)
+      s1 += imgY_pred[up.pos_y][i];    // sum hor pix
+  }
+
+  if (left_avail)
+  {
+    for (i=1; i < MB_BLOCK_SIZE + 1; i++)
+      s2 += imgY_pred[left[i].pos_y][left[i].pos_x];    // sum vert pix
+  }
+
+  if (up_avail)
+  {
+    s0= left_avail
+      ? rshift_rnd_sf((s1+s2),(MB_BLOCK_SHIFT + 1)) // no edge
+      : rshift_rnd_sf(s1, MB_BLOCK_SHIFT);          // left edge
+  }
+  else
+  {
+    s0=left_avail
+      ? rshift_rnd_sf(s2, MB_BLOCK_SHIFT)           // upper edge
+      : img->dc_pred_value_luma;                        // top left corner, nothing to predict from
+  }
+
+  // vertical prediction
+  if (up_avail)
+    memcpy(s[0], &imgY_pred[up.pos_y][up.pos_x], MB_BLOCK_SIZE * sizeof(imgpel));
+
+  // horizontal prediction
+  if (left_avail)
+  {
+    for (i=1; i < MB_BLOCK_SIZE + 1; i++)
+      s[1][i - 1]=imgY_pred[left[i].pos_y][left[i].pos_x];
+  }
+
+  for (j=0; j < MB_BLOCK_SIZE; j++)
+  {
+    memcpy(img->mprr_2[VERT_PRED_16][j], s[0], MB_BLOCK_SIZE * sizeof(imgpel)); // store vertical prediction
+    for (i=0; i < MB_BLOCK_SIZE; i++)
+    {
+      img->mprr_2[HOR_PRED_16 ][j][i] = s[1][j]; // store horizontal prediction
+      img->mprr_2[DC_PRED_16  ][j][i] = s0;      // store DC prediction
+    }
+  }
+  if (!up_avail || !left_avail || !left_up_avail) // edge
+    return;
+
+  // 16 bit integer plan pred
+
+  ih=0;
+  iv=0;
+  for (i=1;i<9;i++)
+  {
+    if (i<8)
+      ih += i*(imgY_pred[up.pos_y][up.pos_x+7+i] - imgY_pred[up.pos_y][up.pos_x+7-i]);
+    else
+      ih += i*(imgY_pred[up.pos_y][up.pos_x+7+i] - imgY_pred[left[0].pos_y][left[0].pos_x]);
+
+    iv += i*(imgY_pred[left[8+i].pos_y][left[8+i].pos_x] - imgY_pred[left[8-i].pos_y][left[8-i].pos_x]);
+  }
+  ib=(5*ih+32)>>6;
+  ic=(5*iv+32)>>6;
+
+  iaa=16*(imgY_pred[up.pos_y][up.pos_x+15]+imgY_pred[left[16].pos_y][left[16].pos_x]);
+
+  for (j=0;j< MB_BLOCK_SIZE;j++)
+  {
+    for (i=0;i< MB_BLOCK_SIZE;i++)
+    {
+      img->mprr_2[PLANE_16][j][i]= iClip3( 0, img->max_imgpel_value,rshift_rnd_sf((iaa+(i-7)*ib +(j-7)*ic), 5));// store plane prediction
+    }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    For new intra pred routines
+ *
+ * \par Input:
+ *    Image par, 16x16 based intra mode
+ *
+ * \par Output:
+ *    none
+ ************************************************************************
+ */
+int dct_luma_16x16(int new_intra_mode)
+{
+  //int qp_const;
+  int i,j;
+  int ii,jj;
+  int jdiv, jmod;
+  static int M1[16][16];
+  static int M4[4][4];
+  static int M5[4],M6[4];
+  static int M0[4][4][4][4];
+  int run,scan_pos,coeff_ctr,level;
+  int qp_per,qp_rem,q_bits;
+  int ac_coef = 0;
+
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+
+  int   b8, b4;
+  int*  DCLevel = img->cofDC[0][0];
+  int*  DCRun   = img->cofDC[0][1];
+  int*  ACLevel;
+  int*  ACRun;
+  int **levelscale,**leveloffset;
+  int **invlevelscale;
+  Boolean lossless_qpprime = (Boolean) ((currMB->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1);
+  const byte (*pos_scan)[2] = currMB->is_field_mode ? FIELD_SCAN : SNGL_SCAN;
+
+  // Note that we could just use currMB->qp here
+  qp_per = qp_per_matrix[(currMB->qp + img->bitdepth_luma_qp_scale - MIN_QP)];
+  qp_rem = qp_rem_matrix[(currMB->qp + img->bitdepth_luma_qp_scale - MIN_QP)];
+  q_bits = Q_BITS + qp_per;
+
+  // select scaling parameters
+  levelscale    = LevelScale4x4Luma   [1][qp_rem];
+  invlevelscale = InvLevelScale4x4Luma[1][qp_rem];
+  leveloffset   = LevelOffset4x4Luma  [1][qp_per];
+
+  for (j=0;j<16;j++)
+  {
+    jdiv = j >> 2;
+    jmod = j & 0x03;
+    jj = img->opix_y+j;
+    for (i=0;i<16;i++)
+    {
+      M1[j][i] = imgY_org[jj][img->opix_x+i] - img->mprr_2[new_intra_mode][j][i];
+      M0[jdiv][i >> 2][jmod][i & 0x03] = M1[j][i];
+    }
+  }
+
+  if (!lossless_qpprime)
+  {
+    for (jj=0;jj<4;jj++)
+    {
+      for (ii=0;ii<4;ii++)
+      {
+        for (j=0;j<4;j++)
+        {
+          M5[0] = M0[jj][ii][j][0] + M0[jj][ii][j][3];
+          M5[1] = M0[jj][ii][j][1] + M0[jj][ii][j][2];
+          M5[2] = M0[jj][ii][j][1] - M0[jj][ii][j][2];
+          M5[3] = M0[jj][ii][j][0] - M0[jj][ii][j][3];
+
+          M4[j][0] = M5[0]   + M5[1];
+          M4[j][2] = M5[0]   - M5[1];
+          M4[j][1] = (M5[3] << 1) + M5[2];
+          M4[j][3] = M5[3]   - (M5[2] << 1);
+        }
+        // vertical
+        for (i=0;i<4;i++)
+        {
+          M5[0] = M4[0][i] + M4[3][i];
+          M5[1] = M4[1][i] + M4[2][i];
+          M5[2] = M4[1][i] - M4[2][i];
+          M5[3] = M4[0][i] - M4[3][i];
+
+          M0[jj][ii][0][i] =  M5[0]   + M5[1];
+          M0[jj][ii][2][i] =  M5[0]   - M5[1];
+          M0[jj][ii][1][i] = (M5[3] << 1) + M5[2];
+          M0[jj][ii][3][i] =  M5[3]   - (M5[2] << 1);
+        }
+      }
+    }
+
+    // pick out DC coeff
+
+    for (j=0;j<4;j++)
+    {
+      for (i=0;i<4;i++)
+        M4[j][i]= M0[j][i][0][0];
+    }
+
+    for (j=0;j<4;j++)
+    {
+      M5[0] = M4[j][0]+M4[j][3];
+      M5[1] = M4[j][1]+M4[j][2];
+      M5[2] = M4[j][1]-M4[j][2];
+      M5[3] = M4[j][0]-M4[j][3];
+
+      M4[j][0] = M5[0]+M5[1];
+      M4[j][2] = M5[0]-M5[1];
+      M4[j][1] = M5[3]+M5[2];
+      M4[j][3] = M5[3]-M5[2];
+    }
+
+    // vertical
+    for (i=0;i<4;i++)
+    {
+      M5[0] = M4[0][i]+M4[3][i];
+      M5[1] = M4[1][i]+M4[2][i];
+      M5[2] = M4[1][i]-M4[2][i];
+      M5[3] = M4[0][i]-M4[3][i];
+
+      M4[0][i]=(M5[0]+M5[1])>>1;
+      M4[2][i]=(M5[0]-M5[1])>>1;
+      M4[1][i]=(M5[3]+M5[2])>>1;
+      M4[3][i]=(M5[3]-M5[2])>>1;
+    }
+
+    // quant
+    run=-1;
+    scan_pos=0;
+
+    for (coeff_ctr=0;coeff_ctr<16;coeff_ctr++)
+    {
+      i=pos_scan[coeff_ctr][0];
+      j=pos_scan[coeff_ctr][1];
+
+      run++;
+      level= (iabs(M4[j][i]) * levelscale[0][0] + (leveloffset[0][0]<<1)) >> (q_bits+1);
+
+      if (input->symbol_mode == UVLC && img->qp < 10)
+      {
+        if (level > CAVLC_LEVEL_LIMIT)
+          level = CAVLC_LEVEL_LIMIT;
+      }
+
+      if (level != 0)
+      {
+        DCLevel[scan_pos] = isignab(level,M4[j][i]);
+        DCRun  [scan_pos] = run;
+        ++scan_pos;
+        run=-1;
+      }
+      M4[j][i]=isignab(level,M4[j][i]);
+    }
+    DCLevel[scan_pos]=0;
+
+    // inverse DC transform
+    for (j=0;j<4;j++)
+    {
+      M6[0] = M4[j][0] + M4[j][2];
+      M6[1] = M4[j][0] - M4[j][2];
+      M6[2] = M4[j][1] - M4[j][3];
+      M6[3] = M4[j][1] + M4[j][3];
+
+      M4[j][0] = M6[0] + M6[3];
+      M4[j][1] = M6[1] + M6[2];
+      M4[j][2] = M6[1] - M6[2];
+      M4[j][3] = M6[0] - M6[3];
+    }
+
+    for (i=0;i<4;i++)
+    {
+
+      M6[0] = M4[0][i] + M4[2][i];
+      M6[1] = M4[0][i] - M4[2][i];
+      M6[2] = M4[1][i] - M4[3][i];
+      M6[3] = M4[1][i] + M4[3][i];
+
+      M0[0][i][0][0] = rshift_rnd_sf(((M6[0]+M6[3])*invlevelscale[0][0])<<qp_per,6);
+      M0[1][i][0][0] = rshift_rnd_sf(((M6[1]+M6[2])*invlevelscale[0][0])<<qp_per,6);
+      M0[2][i][0][0] = rshift_rnd_sf(((M6[1]-M6[2])*invlevelscale[0][0])<<qp_per,6);
+      M0[3][i][0][0] = rshift_rnd_sf(((M6[0]-M6[3])*invlevelscale[0][0])<<qp_per,6);
+    }
+  }
+  else  // lossless_qpprime
+  {
+
+    // pick out DC coeff
+    for (j=0;j<4;j++)
+    {
+      for (i=0;i<4;i++)
+        M4[j][i]= M0[j][i][0][0];
+    }
+
+    run=-1;
+    scan_pos=0;
+
+    for (coeff_ctr=0;coeff_ctr<16;coeff_ctr++)
+    {
+      i=pos_scan[coeff_ctr][0];
+      j=pos_scan[coeff_ctr][1];
+
+      run++;
+
+      level= iabs(M4[j][i]);
+
+      if (input->symbol_mode == UVLC && img->qp < 10 && level > CAVLC_LEVEL_LIMIT)
+        level = CAVLC_LEVEL_LIMIT;
+
+      if (level != 0)
+      {
+        DCLevel[scan_pos] = isignab(level,M4[j][i]);
+        DCRun  [scan_pos] = run;
+        ++scan_pos;
+        run=-1;
+      }
+    }
+    DCLevel[scan_pos]=0;
+  }
+
+  // AC inverse trans/quant for MB
+  for (jj=0;jj<4;jj++)
+  {
+    for (ii=0;ii<4;ii++)
+    {
+      for (j=0;j<4;j++)
+      {
+        memcpy(M4[j],M0[jj][ii][j], BLOCK_SIZE * sizeof(int));
+      }
+
+      run      = -1;
+      scan_pos =  0;
+      b8       = 2*(jj >> 1) + (ii >> 1);
+      b4       = 2*(jj & 0x01) + (ii & 0x01);
+      ACLevel  = img->cofAC [b8][b4][0];
+      ACRun    = img->cofAC [b8][b4][1];
+
+      if(!lossless_qpprime)
+      {
+        for (coeff_ctr=1;coeff_ctr<16;coeff_ctr++) // set in AC coeff
+        {
+
+          i=pos_scan[coeff_ctr][0];
+          j=pos_scan[coeff_ctr][1];
+
+          run++;
+          level= ( iabs( M4[j][i]) * levelscale[j][i] + leveloffset[j][i]) >> q_bits;
+
+          if (img->AdaptiveRounding)
+          {
+            img->fadjust4x4[2][jj*BLOCK_SIZE+j][ii*BLOCK_SIZE+i] = (level == 0) ? 0
+              : rshift_rnd_sf((AdaptRndWeight * (iabs(M4[j][i]) * levelscale[j][i] - (level << q_bits))),(q_bits + 1));
+          }
+
+          if (level != 0)
+          {
+            ac_coef = 15;
+            ACLevel[scan_pos] = isignab(level,M4[j][i]);
+            ACRun  [scan_pos] = run;
+            ++scan_pos;
+            run=-1;
+          }
+
+          level=isignab(level, M4[j][i]);
+
+          M4[j][i]=rshift_rnd_sf((level*invlevelscale[j][i])<<qp_per, 4);
+        }
+
+        ACLevel[scan_pos] = 0;
+
+        // IDCT horizontal
+        for (j=0;j<4 ;j++)
+        {
+          M6[0] = M4[j][0]     +  M4[j][2];
+          M6[1] = M4[j][0]     -  M4[j][2];
+          M6[2] =(M4[j][1]>>1) -  M4[j][3];
+          M6[3] = M4[j][1]     + (M4[j][3]>>1);
+
+          M4[j][0] = M6[0] + M6[3];
+          M4[j][1] = M6[1] + M6[2];
+          M4[j][2] = M6[1] - M6[2];
+          M4[j][3] = M6[0] - M6[3];
+        }
+
+        // vertical
+        for (i=0;i<4;i++)
+        {
+          M6[0]= M4[0][i]     +  M4[2][i];
+          M6[1]= M4[0][i]     -  M4[2][i];
+          M6[2]=(M4[1][i]>>1) -  M4[3][i];
+          M6[3]= M4[1][i]     + (M4[3][i]>>1);
+
+          M0[jj][ii][0][i] = M6[0] + M6[3];
+          M0[jj][ii][1][i] = M6[1] + M6[2];
+          M0[jj][ii][2][i] = M6[1] - M6[2];
+          M0[jj][ii][3][i] = M6[0] - M6[3];
+        }
+      }
+      else  // Lossless qpprime code
+      {
+        for (coeff_ctr=1;coeff_ctr<16;coeff_ctr++) // set in AC coeff
+        {
+
+          i=pos_scan[coeff_ctr][0];
+          j=pos_scan[coeff_ctr][1];
+
+          run++;
+
+          level= iabs( M4[j][i]);
+
+          if (level != 0)
+          {
+            ac_coef = 15;
+            ACLevel[scan_pos] = isignab(level,M4[j][i]);
+            ACRun  [scan_pos] = run;
+            ++scan_pos;
+            run=-1;
+          }
+          // set adaptive rounding params to 0 since process is not meaningful here.
+          if (img->AdaptiveRounding)
+          {
+            img->fadjust4x4[2][jj*BLOCK_SIZE+j][ii*BLOCK_SIZE+i] = 0;
+          }
+        }
+        ACLevel[scan_pos] = 0;
+      }
+    }
+  }
+
+  for (jj=0;jj<BLOCK_MULTIPLE; jj++)
+  {
+    for (ii=0;ii<BLOCK_MULTIPLE; ii++)
+      for (j=0;j<BLOCK_SIZE;j++)
+      {
+        memcpy(&M1[jj*BLOCK_SIZE + j][ii*BLOCK_SIZE], M0[jj][ii][j], BLOCK_SIZE * sizeof(int));
+      }
+  }
+
+  if(lossless_qpprime)
+  {
+    if(img->type!=SP_SLICE)
+    {
+      for (j=0;j<16;j++)
+      {
+        jj = img->pix_y+j;
+        for (i=0;i<16;i++)
+          enc_picture->imgY[jj][img->pix_x+i]=(imgpel)(M1[j][i]+img->mprr_2[new_intra_mode][j][i]);
+      }
+    }
+    else
+    {
+      for (j = 0; j < MB_BLOCK_SIZE; j++)
+      {
+        jj = img->pix_y+j;
+        for (i = 0; i < MB_BLOCK_SIZE ; i++)
+        {
+          enc_picture->imgY[jj][img->pix_x+i]=(imgpel)(M1[j][i]+img->mprr_2[new_intra_mode][j][i]);
+          lrec[jj][img->pix_x+i] = -16; //signals an I16 block in the SP frame
+        }
+      }
+    }
+  }
+  else
+  {
+    if(img->type!=SP_SLICE)
+    {
+      for (j=0;j<16;j++)
+      {
+        jj = img->pix_y+j;
+        for (i=0;i<16;i++)
+          enc_picture->imgY[jj][img->pix_x+i] =
+          iClip1( img->max_imgpel_value, rshift_rnd_sf((M1[j][i]+((long)img->mprr_2[new_intra_mode][j][i]<<DQ_BITS)),DQ_BITS));
+      }
+    }
+    else
+    {
+      for (j=0;j<16;j++)
+      {
+        jj = img->pix_y+j;
+        for (i=0;i<16;i++)
+        {
+          enc_picture->imgY[jj][img->pix_x+i] =
+            iClip1( img->max_imgpel_value, rshift_rnd_sf((M1[j][i]+((long)img->mprr_2[new_intra_mode][j][i]<<DQ_BITS)),DQ_BITS));
+          lrec[jj][img->pix_x+i]=-16; //signals an I16 block in the SP frame
+        }
+      }
+    }
+  }
+
+  return ac_coef;
+}
+
+
+/*!
+************************************************************************
+* \brief
+*    The routine performs transform,quantization,inverse transform, adds the diff.
+*    to the prediction and writes the result to the decoded luma frame. Includes the
+*    RD constrained quantization also.
+*
+* \par Input:
+*    block_x,block_y: Block position inside a macro block (0,4,8,12).
+*
+* \par Output_
+*    nonzero: 0 if no levels are nonzero.  1 if there are nonzero levels.             \n
+*    coeff_cost: Counter for nonzero coefficients, used to discard expensive levels.
+************************************************************************
+*/
+int dct_luma(int block_x,int block_y,int *coeff_cost, int intra)
+{
+  int i,j, ii, ilev, coeff_ctr;
+  static int m4[4][4], m5[4], m6[4];
+  int level,scan_pos = 0,run = -1;
+  int nonzero = FALSE;
+  int qp_per, qp_rem, q_bits;
+
+  int   pos_x   = block_x >> BLOCK_SHIFT;
+  int   pos_y   = block_y >> BLOCK_SHIFT;
+  int   b8      = 2*(pos_y >> 1) + (pos_x >> 1);
+  int   b4      = 2*(pos_y & 0x01) + (pos_x & 0x01);
+  int*  ACLevel = img->cofAC[b8][b4][0];
+  int*  ACRun   = img->cofAC[b8][b4][1];
+  int   pix_y, pix_x;
+
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+  Boolean lossless_qpprime = (Boolean) ((currMB->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1);
+
+  int **levelscale,**leveloffset;
+  int **invlevelscale;
+
+  const byte (*pos_scan)[2] = currMB->is_field_mode ? FIELD_SCAN : SNGL_SCAN;
+
+  qp_per    = qp_per_matrix[(currMB->qp + img->bitdepth_luma_qp_scale - MIN_QP)];
+  qp_rem    = qp_rem_matrix[(currMB->qp + img->bitdepth_luma_qp_scale - MIN_QP)];
+  q_bits    = Q_BITS+qp_per;
+
+  levelscale    = LevelScale4x4Luma[intra][qp_rem];
+  leveloffset   = LevelOffset4x4Luma[intra][qp_per];
+  invlevelscale = InvLevelScale4x4Luma[intra][qp_rem];
+
+  //  Horizontal transform
+  if (!lossless_qpprime)
+  {
+    for (j=0; j < BLOCK_SIZE; j++)
+    {
+      m5[0] = img->m7[j][0]+img->m7[j][3];
+      m5[1] = img->m7[j][1]+img->m7[j][2];
+      m5[2] = img->m7[j][1]-img->m7[j][2];
+      m5[3] = img->m7[j][0]-img->m7[j][3];
+
+      m4[j][0] = m5[0]   + m5[1];
+      m4[j][2] = m5[0]   - m5[1];
+      m4[j][1] = (m5[3]<<1) + m5[2];
+      m4[j][3] = m5[3]   - (m5[2]<<1);
+    }
+
+    //  Vertical transform
+    for (i=0; i < BLOCK_SIZE; i++)
+    {
+      m5[0] = m4[0][i] + m4[3][i];
+      m5[1] = m4[1][i] + m4[2][i];
+      m5[2] = m4[1][i] - m4[2][i];
+      m5[3] = m4[0][i] - m4[3][i];
+
+      m4[0][i] = m5[0]   + m5[1];
+      m4[2][i] = m5[0]   - m5[1];
+      m4[1][i] = (m5[3]<<1) + m5[2];
+      m4[3][i] = m5[3]   - (m5[2]<<1);
+    }
+
+    // Quant
+    for (coeff_ctr=0;coeff_ctr < 16;coeff_ctr++)
+    {
+
+      i=pos_scan[coeff_ctr][0];
+      j=pos_scan[coeff_ctr][1];
+
+      run++;
+      ilev=0;
+
+      level = (iabs (m4[j][i]) * levelscale[j][i] + leveloffset[j][i]) >> q_bits;
+
+      if (img->AdaptiveRounding)
+      {
+        img->fadjust4x4[intra][block_y+j][block_x+i] = (level == 0)
+          ? 0
+          : rshift_rnd_sf((AdaptRndWeight * (iabs(m4[j][i]) * levelscale[j][i] - (level << q_bits))), q_bits + 1);
+      }
+
+      if (level != 0)
+      {
+        nonzero=TRUE;
+
+        *coeff_cost += (level > 1) ? MAX_VALUE : COEFF_COST[input->disthres][run];
+
+        ACLevel[scan_pos] = isignab(level,m4[j][i]);
+
+        ACRun  [scan_pos] = run;
+        ++scan_pos;
+        run=-1;                     // reset zero level counter
+
+        level = isignab(level, m4[j][i]);
+        ilev  = rshift_rnd_sf(((level*invlevelscale[j][i])<< qp_per), 4);
+      }
+      m4[j][i]=ilev;
+    }
+
+    ACLevel[scan_pos] = 0;
+
+    //     IDCT.
+    //     horizontal
+    for (j=0; j < BLOCK_SIZE; j++)
+    {
+      m6[0]=(m4[j][0]     +  m4[j][2]);
+      m6[1]=(m4[j][0]     -  m4[j][2]);
+      m6[2]=(m4[j][1]>>1) -  m4[j][3];
+      m6[3]= m4[j][1]     + (m4[j][3]>>1);
+
+      m4[j][0] = m6[0] + m6[3];
+      m4[j][1] = m6[1] + m6[2];
+      m4[j][2] = m6[1] - m6[2];
+      m4[j][3] = m6[0] - m6[3];
+    }
+
+    //  vertical
+    for (i=0; i < BLOCK_SIZE; i++)
+    {
+
+      m6[0]=(m4[0][i]     +  m4[2][i]);
+      m6[1]=(m4[0][i]     -  m4[2][i]);
+      m6[2]=(m4[1][i]>>1) -  m4[3][i];
+      m6[3]= m4[1][i]     + (m4[3][i]>>1);
+
+      ii = i + block_x;
+
+      img->m7[0][i] = iClip1( img->max_imgpel_value, rshift_rnd_sf((m6[0]+m6[3]+((long)img->mpr[    block_y][ii] << DQ_BITS)),DQ_BITS));
+      img->m7[1][i] = iClip1( img->max_imgpel_value, rshift_rnd_sf((m6[1]+m6[2]+((long)img->mpr[1 + block_y][ii] << DQ_BITS)),DQ_BITS));
+      img->m7[2][i] = iClip1( img->max_imgpel_value, rshift_rnd_sf((m6[1]-m6[2]+((long)img->mpr[2 + block_y][ii] << DQ_BITS)),DQ_BITS));
+      img->m7[3][i] = iClip1( img->max_imgpel_value, rshift_rnd_sf((m6[0]-m6[3]+((long)img->mpr[3 + block_y][ii] << DQ_BITS)),DQ_BITS));
+    }
+    //  Decoded block moved to frame memory
+    for (j=0; j < BLOCK_SIZE; j++)
+    {
+      pix_y = img->pix_y + block_y + j;
+      pix_x = img->pix_x + block_x;
+      for (i=0; i < BLOCK_SIZE; i++)
+      {
+        enc_picture->imgY[pix_y][pix_x + i]=img->m7[j][i];
+      }
+    }
+  }
+  else // Lossless qpprime code
+  {
+    for (coeff_ctr=0;coeff_ctr < 16;coeff_ctr++)
+    {
+      i=pos_scan[coeff_ctr][0];
+      j=pos_scan[coeff_ctr][1];
+
+      run++;
+      ilev=0;
+
+      level = iabs (img->m7[j][i]);
+
+      if (img->AdaptiveRounding)
+      {
+        img->fadjust4x4[intra][block_y+j][block_x+i] = 0;
+      }
+
+      if (level != 0)
+      {
+        nonzero=TRUE;
+
+        *coeff_cost += MAX_VALUE;
+
+        ACLevel[scan_pos] = isignab(level,img->m7[j][i]);
+        ACRun  [scan_pos] = run;
+        ++scan_pos;
+        run=-1;                     // reset zero level counter
+
+        level=isignab(level, m4[j][i]);
+
+        ilev=level;
+      }
+    }
+    ACLevel[scan_pos] = 0;
+
+    for (j=0; j < BLOCK_SIZE; j++)
+    {
+      pix_y = img->pix_y + block_y + j;
+      pix_x = img->pix_x+block_x;
+      for (i=0; i < BLOCK_SIZE; i++)
+      {
+        enc_picture->imgY[pix_y][pix_x+i]=img->m7[j][i]+img->mpr[j+block_y][i+block_x];
+      }
+    }
+  }
+
+  return nonzero;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Transform,quantization,inverse transform for chroma.
+ *    The main reason why this is done in a separate routine is the
+ *    additional 2x2 transform of DC-coeffs. This routine is called
+ *    ones for each of the chroma components.
+ *
+ * \par Input:
+ *    uv    : Make difference between the U and V chroma component  \n
+ *    cr_cbp: chroma coded block pattern
+ *
+ * \par Output:
+ *    cr_cbp: Updated chroma coded block pattern.
+ ************************************************************************
+ */
+int dct_chroma(int uv,int cr_cbp)
+{
+  int i,j,i1,j2,ilev,n2,n1,j1,mb_y,coeff_ctr,level ,scan_pos,run;
+  static int m1[BLOCK_SIZE],m5[BLOCK_SIZE],m6[BLOCK_SIZE];
+  int coeff_cost;
+  int cr_cbp_tmp;
+  int DCcoded=0 ;
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+
+  int qp_per,qp_rem,q_bits;
+
+  int   b4;
+  int*  DCLevel = img->cofDC[uv+1][0];
+  int*  DCRun   = img->cofDC[uv+1][1];
+  int*  ACLevel;
+  int*  ACRun;
+  int   intra = IS_INTRA (currMB);
+  int   uv_scale = uv*(img->num_blk8x8_uv >> 1);
+
+  //FRExt
+  static const int64 cbpblk_pattern[4]={0, 0xf0000, 0xff0000, 0xffff0000};
+  int yuv = img->yuv_format;
+  int b8;
+  static int m3[4][4];
+  static int m4[4][4];
+  int qp_per_dc = 0;
+  int qp_rem_dc = 0;
+  int q_bits_422 = 0;
+  int ***levelscale, ***leveloffset;
+  int ***invlevelscale;
+  short pix_c_x, pix_c_y;
+  const byte (*pos_scan)[2] = currMB->is_field_mode ? FIELD_SCAN : SNGL_SCAN;
+
+  Boolean lossless_qpprime = (Boolean) ((currMB->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1);
+
+  qp_per = qp_per_matrix[(currMB->qpc[uv] + img->bitdepth_chroma_qp_scale)];
+  qp_rem = qp_rem_matrix[(currMB->qpc[uv] + img->bitdepth_chroma_qp_scale)];
+  q_bits = Q_BITS+qp_per;
+
+  levelscale    = LevelScale4x4Chroma[uv][intra];
+  leveloffset   = LevelOffset4x4Chroma[uv][intra];
+  invlevelscale = InvLevelScale4x4Chroma[uv][intra];
+
+  if (img->yuv_format == YUV422)
+  {
+    //for YUV422 only
+    qp_per_dc = qp_per_matrix[(currMB->qpc[uv] + 3 + img->bitdepth_chroma_qp_scale)];
+    qp_rem_dc = qp_rem_matrix[(currMB->qpc[uv] + 3 + img->bitdepth_chroma_qp_scale)];
+
+    q_bits_422 = Q_BITS+qp_per_dc;
+  }
+
+
+  //============= dct transform ===============
+  if (!lossless_qpprime)
+  {
+    for (n2=0; n2 < img->mb_cr_size_y; n2 += BLOCK_SIZE)
+    {
+      for (n1=0; n1 < img->mb_cr_size_x; n1 += BLOCK_SIZE)
+      {
+        //  Horizontal transform.
+        for (j=0; j < BLOCK_SIZE; j++)
+        {
+          mb_y=n2+j;
+
+          m5[0]=img->m7[mb_y][n1  ]+img->m7[mb_y][n1+3];
+          m5[1]=img->m7[mb_y][n1+1]+img->m7[mb_y][n1+2];
+          m5[2]=img->m7[mb_y][n1+1]-img->m7[mb_y][n1+2];
+          m5[3]=img->m7[mb_y][n1  ]-img->m7[mb_y][n1+3];
+
+          img->m7[mb_y][n1  ] = (m5[0]   + m5[1]);
+          img->m7[mb_y][n1+2] = (m5[0]   - m5[1]);
+          img->m7[mb_y][n1+1] = (m5[3]<<1) + m5[2];
+          img->m7[mb_y][n1+3] =  m5[3]   - (m5[2]<<1);
+        }
+
+        //  Vertical transform.
+        for (i=0; i < BLOCK_SIZE; i++)
+        {
+          j1=n1+i;
+          m5[0] = img->m7[n2  ][j1] + img->m7[n2+3][j1];
+          m5[1] = img->m7[n2+1][j1] + img->m7[n2+2][j1];
+          m5[2] = img->m7[n2+1][j1] - img->m7[n2+2][j1];
+          m5[3] = img->m7[n2  ][j1] - img->m7[n2+3][j1];
+
+          img->m7[n2  ][j1] = (m5[0]     + m5[1]);
+          img->m7[n2+2][j1] = (m5[0]     - m5[1]);
+          img->m7[n2+1][j1] = (m5[3]<<1) + m5[2];
+          img->m7[n2+3][j1] =  m5[3]     - (m5[2]<<1);
+        }
+      }
+    }
+  }
+
+  if (yuv == YUV420)
+  {
+    //================== CHROMA DC YUV420 ===================
+    //     2X2 transform of DC coeffs.
+    run=-1;
+    scan_pos=0;
+    if(!lossless_qpprime)
+    {
+      m1[0]=(img->m7[0][0] + img->m7[0][4] + img->m7[4][0] + img->m7[4][4]);
+      m1[1]=(img->m7[0][0] - img->m7[0][4] + img->m7[4][0] - img->m7[4][4]);
+      m1[2]=(img->m7[0][0] + img->m7[0][4] - img->m7[4][0] - img->m7[4][4]);
+      m1[3]=(img->m7[0][0] - img->m7[0][4] - img->m7[4][0] + img->m7[4][4]);
+
+      //     Quant of chroma 2X2 coeffs.
+      for (coeff_ctr=0; coeff_ctr < 4; coeff_ctr++)
+      {
+        run++;
+        ilev=0;
+
+        level =(iabs(m1[coeff_ctr]) * levelscale[qp_rem][0][0] + (leveloffset[qp_per][0][0]<<1)) >> (q_bits+1);
+
+        if (input->symbol_mode == UVLC && img->qp < 4)
+        {
+          if (level > CAVLC_LEVEL_LIMIT)
+            level = CAVLC_LEVEL_LIMIT;
+        }
+
+        if (level  != 0)
+        {
+          currMB->cbp_blk |= 0xf0000 << (uv << 2) ;    // if one of the 2x2-DC levels is != 0 set the
+          cr_cbp=imax(1,cr_cbp);                     // coded-bit all 4 4x4 blocks (bit 16-19 or 20-23)
+          DCcoded = 1 ;
+          DCLevel[scan_pos] = isignab(level ,m1[coeff_ctr]);
+          DCRun  [scan_pos] = run;
+          scan_pos++;
+          run=-1;
+
+          ilev=isignab(level, m1[coeff_ctr]);
+        }
+        m1[coeff_ctr]=ilev;
+      }
+
+      DCLevel[scan_pos] = 0;
+      //  Inverse transform of 2x2 DC levels
+      m5[0]=(m1[0] + m1[1] + m1[2] + m1[3]);
+      m5[1]=(m1[0] - m1[1] + m1[2] - m1[3]);
+      m5[2]=(m1[0] + m1[1] - m1[2] - m1[3]);
+      m5[3]=(m1[0] - m1[1] - m1[2] + m1[3]);
+
+      m1[0]=((m5[0] * invlevelscale[qp_rem][0][0])<<qp_per)>>5;
+      m1[1]=((m5[1] * invlevelscale[qp_rem][0][0])<<qp_per)>>5;
+      m1[2]=((m5[2] * invlevelscale[qp_rem][0][0])<<qp_per)>>5;
+      m1[3]=((m5[3] * invlevelscale[qp_rem][0][0])<<qp_per)>>5;
+
+      img->m7[0][0] = m1[0];
+      img->m7[0][4] = m1[1];
+      img->m7[4][0] = m1[2];
+      img->m7[4][4] = m1[3];
+    }
+    else // Lossless qpprime
+    {
+      m1[0]=img->m7[0][0];
+      m1[1]=img->m7[0][4];
+      m1[2]=img->m7[4][0];
+      m1[3]=img->m7[4][4];
+
+      for (coeff_ctr=0; coeff_ctr < 4; coeff_ctr++)
+      {
+        run++;
+        ilev=0;
+
+        level =iabs(m1[coeff_ctr]);
+
+        if (input->symbol_mode == UVLC && img->qp < 4)
+        {
+          if (level > CAVLC_LEVEL_LIMIT) level = CAVLC_LEVEL_LIMIT;
+        }
+
+        if (level  != 0)
+        {
+          currMB->cbp_blk |= 0xf0000 << (uv << 2) ;    // if one of the 2x2-DC levels is != 0 set the
+          cr_cbp=imax(1,cr_cbp);                     // coded-bit all 4 4x4 blocks (bit 16-19 or 20-23)
+          DCcoded = 1 ;
+          DCLevel[scan_pos] = isignab(level ,m1[coeff_ctr]);
+          DCRun  [scan_pos] = run;
+          scan_pos++;
+          run=-1;
+
+          ilev=isignab(level, m1[coeff_ctr]);
+        }
+      }
+      DCLevel[scan_pos] = 0;
+    }
+  }
+  else if(yuv == YUV422)
+  {
+    //================== CHROMA DC YUV422 ===================
+    //transform DC coeff
+    //horizontal
+
+    //pick out DC coeff
+    for (j=0; j < img->mb_cr_size_y; j+=BLOCK_SIZE)
+    {
+      for (i=0; i < img->mb_cr_size_x; i+=BLOCK_SIZE)
+        m3[i>>2][j>>2]= img->m7[j][i];
+    }
+    //horizontal
+    if(!lossless_qpprime)
+    {
+      m4[0][0] = m3[0][0] + m3[1][0];
+      m4[0][1] = m3[0][1] + m3[1][1];
+      m4[0][2] = m3[0][2] + m3[1][2];
+      m4[0][3] = m3[0][3] + m3[1][3];
+
+      m4[1][0] = m3[0][0] - m3[1][0];
+      m4[1][1] = m3[0][1] - m3[1][1];
+      m4[1][2] = m3[0][2] - m3[1][2];
+      m4[1][3] = m3[0][3] - m3[1][3];
+
+      // vertical
+      for (i=0;i<2;i++)
+      {
+        m5[0] = m4[i][0] + m4[i][3];
+        m5[1] = m4[i][1] + m4[i][2];
+        m5[2] = m4[i][1] - m4[i][2];
+        m5[3] = m4[i][0] - m4[i][3];
+
+        m4[i][0] = (m5[0] + m5[1]);
+        m4[i][2] = (m5[0] - m5[1]);
+        m4[i][1] = (m5[3] + m5[2]);
+        m4[i][3] = (m5[3] - m5[2]);
+      }
+    }
+
+    run=-1;
+    scan_pos=0;
+
+    //quant of chroma DC-coeffs
+    for (coeff_ctr=0;coeff_ctr<8;coeff_ctr++)
+    {
+      i=SCAN_YUV422[coeff_ctr][0];
+      j=SCAN_YUV422[coeff_ctr][1];
+
+      run++;
+
+      if(lossless_qpprime)
+      {
+        level = iabs(m3[i][j]);
+        m4[i][j]=m3[i][j];
+      }
+      else
+        level =(iabs(m4[i][j]) * levelscale[qp_rem_dc][0][0] + (leveloffset[qp_per_dc][0][0]*2)) >> (q_bits_422+1);
+
+      if (level != 0)
+      {
+        //YUV422
+        currMB->cbp_blk |= 0xff0000 << (uv << 3) ;   // if one of the DC levels is != 0 set the
+        cr_cbp=imax(1,cr_cbp);                       // coded-bit all 4 4x4 blocks (bit 16-31 or 32-47) //YUV444
+        DCcoded = 1 ;
+
+        DCLevel[scan_pos] = isignab(level,m4[i][j]);
+        DCRun  [scan_pos] = run;
+        ++scan_pos;
+        run=-1;
+      }
+      if(!lossless_qpprime)
+        m3[i][j]=isignab(level,m4[i][j]);
+    }
+    DCLevel[scan_pos]=0;
+
+    //inverse DC transform
+    //horizontal
+    if(!lossless_qpprime)
+    {
+      m4[0][0] = m3[0][0] + m3[1][0];
+      m4[0][1] = m3[0][1] + m3[1][1];
+      m4[0][2] = m3[0][2] + m3[1][2];
+      m4[0][3] = m3[0][3] + m3[1][3];
+
+      m4[1][0] = m3[0][0] - m3[1][0];
+      m4[1][1] = m3[0][1] - m3[1][1];
+      m4[1][2] = m3[0][2] - m3[1][2];
+      m4[1][3] = m3[0][3] - m3[1][3];
+
+      // vertical
+      for (i=0;i<2;i++)
+      {
+        m6[0]=m4[i][0]+m4[i][2];
+        m6[1]=m4[i][0]-m4[i][2];
+        m6[2]=m4[i][1]-m4[i][3];
+        m6[3]=m4[i][1]+m4[i][3];
+
+        if(qp_per_dc<4)
+        {
+          img->m7[0 ][i*4]=((((m6[0]+m6[3])*invlevelscale[qp_rem_dc][0][0]+(1<<(3-qp_per_dc)))>>(4-qp_per_dc))+2)>>2;
+          img->m7[4 ][i*4]=((((m6[1]+m6[2])*invlevelscale[qp_rem_dc][0][0]+(1<<(3-qp_per_dc)))>>(4-qp_per_dc))+2)>>2;
+          img->m7[8 ][i*4]=((((m6[1]-m6[2])*invlevelscale[qp_rem_dc][0][0]+(1<<(3-qp_per_dc)))>>(4-qp_per_dc))+2)>>2;
+          img->m7[12][i*4]=((((m6[0]-m6[3])*invlevelscale[qp_rem_dc][0][0]+(1<<(3-qp_per_dc)))>>(4-qp_per_dc))+2)>>2;
+        }
+        else
+        {
+          img->m7[0 ][i*4]=((((m6[0]+m6[3])*invlevelscale[qp_rem_dc][0][0])<<(qp_per_dc-4))+2)>>2;
+          img->m7[4 ][i*4]=((((m6[1]+m6[2])*invlevelscale[qp_rem_dc][0][0])<<(qp_per_dc-4))+2)>>2;
+          img->m7[8 ][i*4]=((((m6[1]-m6[2])*invlevelscale[qp_rem_dc][0][0])<<(qp_per_dc-4))+2)>>2;
+          img->m7[12][i*4]=((((m6[0]-m6[3])*invlevelscale[qp_rem_dc][0][0])<<(qp_per_dc-4))+2)>>2;
+        }
+      }//for (i=0;i<2;i++)
+    }
+  }
+  else if(yuv == YUV444)
+  {
+    //================== CHROMA DC YUV444 ===================
+    //transform DC coeff
+    //pick out DC coeff
+    for (j=0; j < img->mb_cr_size_y; j+=BLOCK_SIZE)
+    {
+      for (i=0; i < img->mb_cr_size_x; i+=BLOCK_SIZE)
+        m4[i>>2][j>>2]= img->m7[j][i];
+    }
+
+    //horizontal
+    for (j=0;j<4 && !lossless_qpprime;j++)
+    {
+      m5[0] = m4[0][j] + m4[3][j];
+      m5[1] = m4[1][j] + m4[2][j];
+      m5[2] = m4[1][j] - m4[2][j];
+      m5[3] = m4[0][j] - m4[3][j];
+
+      m4[0][j]=m5[0]+m5[1];
+      m4[2][j]=m5[0]-m5[1];
+      m4[1][j]=m5[3]+m5[2];
+      m4[3][j]=m5[3]-m5[2];
+    }
+    // vertical
+    for (i=0;i<4 && !lossless_qpprime;i++)
+    {
+      m5[0] = m4[i][0] + m4[i][3];
+      m5[1] = m4[i][1] + m4[i][2];
+      m5[2] = m4[i][1] - m4[i][2];
+      m5[3] = m4[i][0] - m4[i][3];
+
+      m4[i][0]=(m5[0]+m5[1])>>1;
+      m4[i][2]=(m5[0]-m5[1])>>1;
+      m4[i][1]=(m5[3]+m5[2])>>1;
+      m4[i][3]=(m5[3]-m5[2])>>1;
+    }
+
+    run=-1;
+    scan_pos=0;
+
+    //quant of chroma DC-coeffs
+    for (coeff_ctr=0;coeff_ctr<16;coeff_ctr++)
+    {
+      i=SNGL_SCAN[coeff_ctr][0];
+      j=SNGL_SCAN[coeff_ctr][1];
+
+      run++;
+
+      if(lossless_qpprime)
+        level = iabs(m4[i][j]);
+      else
+        level =(iabs(m4[i][j]) * levelscale[qp_rem][0][0] + (leveloffset[qp_per][0][0]*2)) >> (q_bits+1);
+
+      if (level != 0)
+      {
+        //YUV444
+        currMB->cbp_blk |= ((int64)0xffff0000) << (uv << 4) ;   // if one of the DC levels is != 0 set the
+        cr_cbp=imax(1,cr_cbp);                                  // coded-bit all 4 4x4 blocks (bit 16-31 or 32-47) //YUV444
+        DCcoded = 1 ;
+
+        DCLevel[scan_pos] = isignab(level,m4[i][j]);
+        DCRun  [scan_pos] = run;
+        ++scan_pos;
+        run=-1;
+      }
+      if(!lossless_qpprime)
+        m4[i][j]=isignab(level,m4[i][j]);
+    }
+    DCLevel[scan_pos]=0;
+
+    // inverse DC transform
+    //horizontal
+    if (!lossless_qpprime)
+    {
+      for (j = 0; j < 4; j++)
+      {
+        m6[0] = m4[0][j] + m4[2][j];
+        m6[1] = m4[0][j] - m4[2][j];
+        m6[2] = m4[1][j] - m4[3][j];
+        m6[3] = m4[1][j] + m4[3][j];
+
+        m4[0][j] = m6[0] + m6[3];
+        m4[1][j] = m6[1] + m6[2];
+        m4[2][j] = m6[1] - m6[2];
+        m4[3][j] = m6[0] - m6[3];
+      }
+
+      //vertical
+      for (i=0;i<4;i++)
+      {
+        m6[0]=m4[i][0]+m4[i][2];
+        m6[1]=m4[i][0]-m4[i][2];
+        m6[2]=m4[i][1]-m4[i][3];
+        m6[3]=m4[i][1]+m4[i][3];
+
+        if(qp_per<4)
+        {
+          img->m7[0 ][i*4] = ((((m6[0] + m6[3])*invlevelscale[qp_rem][0][0]+(1<<(3-qp_per)))>>(4-qp_per))+2)>>2;
+          img->m7[4 ][i*4] = ((((m6[1] + m6[2])*invlevelscale[qp_rem][0][0]+(1<<(3-qp_per)))>>(4-qp_per))+2)>>2;
+          img->m7[8 ][i*4] = ((((m6[1] - m6[2])*invlevelscale[qp_rem][0][0]+(1<<(3-qp_per)))>>(4-qp_per))+2)>>2;
+          img->m7[12][i*4] = ((((m6[0] - m6[3])*invlevelscale[qp_rem][0][0]+(1<<(3-qp_per)))>>(4-qp_per))+2)>>2;
+        }
+        else
+        {
+          img->m7[0 ][i*4] = ((((m6[0]+m6[3])*invlevelscale[qp_rem][0][0])<<(qp_per-4))+2)>>2;
+          img->m7[4 ][i*4] = ((((m6[1]+m6[2])*invlevelscale[qp_rem][0][0])<<(qp_per-4))+2)>>2;
+          img->m7[8 ][i*4] = ((((m6[1]-m6[2])*invlevelscale[qp_rem][0][0])<<(qp_per-4))+2)>>2;
+          img->m7[12][i*4] = ((((m6[0]-m6[3])*invlevelscale[qp_rem][0][0])<<(qp_per-4))+2)>>2;
+        }
+      }
+    }
+  }
+
+  //     Quant of chroma AC-coeffs.
+  coeff_cost=0;
+  cr_cbp_tmp=0;
+
+  for (b8=0; b8 < (img->num_blk8x8_uv >> 1); b8++)
+  {
+    for (b4=0; b4 < 4; b4++)
+    {
+      int64 uv_cbpblk = ((int64)1) << cbp_blk_chroma[b8 + uv_scale][b4];
+      n1 = hor_offset[yuv][b8][b4];
+      n2 = ver_offset[yuv][b8][b4];
+      ACLevel = img->cofAC[4+b8+uv_scale][b4][0];
+      ACRun   = img->cofAC[4+b8+uv_scale][b4][1];
+      run=-1;
+      scan_pos=0;
+
+      if(!lossless_qpprime)
+      {
+        for (coeff_ctr=1; coeff_ctr < 16; coeff_ctr++)  // start change rd_quant
+        {
+          i=pos_scan[coeff_ctr][0];
+          j=pos_scan[coeff_ctr][1];
+
+          ++run;
+          ilev=0;
+
+          level=(iabs(img->m7[n2+j][n1+i])*levelscale[qp_rem][j][i]+leveloffset[qp_per][j][i])>>q_bits;
+
+          if (img->AdaptiveRounding)
+          {
+            img->fadjust4x4Cr[intra][uv][n2+j][n1+i] = (level == 0)
+              ? 0
+              : rshift_rnd_sf((AdaptRndCrWeight * (iabs(img->m7[n2+j][n1+i]) * levelscale[qp_rem][j][i] - (level << q_bits))), (q_bits + 1));
+          }
+
+          if (level  != 0)
+          {
+            currMB->cbp_blk |= uv_cbpblk;
+            // if level > 1 set high cost to avoid thresholding
+            coeff_cost += (level > 1) ? MAX_VALUE : COEFF_COST[input->disthres][run];
+
+            cr_cbp_tmp=2;
+            ACLevel[scan_pos] = isignab(level,img->m7[n2+j][n1+i]);
+            ACRun  [scan_pos] = run;
+            ++scan_pos;
+            run=-1;
+
+            level=isignab(level, img->m7[n2+j][n1+i]);
+
+            ilev = rshift_rnd_sf((level*invlevelscale[qp_rem][j][i])<<qp_per, 4);
+            // inverse scale can be alternative performed as follows to ensure 16bit
+            // arithmetic is satisfied.
+            // ilev = (qp_per<4)
+            //      ? rshift_rnd_sf((level*invlevelscale[qp_rem][j][i]),4-qp_per);
+            //      : (level*invlevelscale[qp_rem][j][i])<<(qp_per-4);
+          }
+          img->m7[n2+j][n1+i]=ilev;
+        }
+      }
+      else
+      {
+        for (coeff_ctr=1; coeff_ctr < 16; coeff_ctr++)// start change rd_quant
+        {
+          i=pos_scan[coeff_ctr][0];
+          j=pos_scan[coeff_ctr][1];
+
+          ++run;
+          ilev=0;
+
+          level = iabs(img->m7[n2+j][n1+i]);
+
+          if (img->AdaptiveRounding)
+          {
+            img->fadjust4x4Cr[intra][uv][n2+j][n1+i] = 0;
+          }
+
+          if (level  != 0)
+          {
+            currMB->cbp_blk |= uv_cbpblk;
+            coeff_cost += MAX_VALUE;                // set high cost, shall not be discarded
+
+            cr_cbp_tmp=2;
+            ACLevel[scan_pos] = isignab(level,img->m7[n2+j][n1+i]);
+            ACRun  [scan_pos] = run;
+            ++scan_pos;
+            run=-1;
+
+            level=isignab(level, img->m7[n2+j][n1+i]);
+            ilev = level;
+          }
+        }
+      }
+      ACLevel[scan_pos] = 0;
+    }
+  }
+
+  if(!lossless_qpprime)
+  {
+    // Perform thresholding
+    // * reset chroma coeffs
+    if(coeff_cost < _CHROMA_COEFF_COST_)
+    {
+      int64 uv_cbpblk = ((int64)cbpblk_pattern[yuv] << (uv << (1+yuv)));
+      cr_cbp_tmp = 0;
+
+      for (b8=0; b8 < (img->num_blk8x8_uv >> 1); b8++)
+      {
+        for (b4=0; b4 < 4; b4++)
+        {
+          n1 = hor_offset[yuv][b8][b4];
+          n2 = ver_offset[yuv][b8][b4];
+          ACLevel = img->cofAC[4 + b8 + uv_scale][b4][0];
+          ACRun   = img->cofAC[4 + b8 + uv_scale][b4][1];
+          if( DCcoded == 0)
+            currMB->cbp_blk &= ~(uv_cbpblk);  // if no chroma DC's: then reset coded-bits of this chroma subblock
+
+          ACLevel[0] = 0;
+          for (coeff_ctr=1; coeff_ctr < 16; coeff_ctr++)// ac coeff
+          {
+            i = pos_scan[coeff_ctr][0];
+            j = pos_scan[coeff_ctr][1];
+
+            img->m7[n2+j][n1+i] = 0;
+            ACLevel[coeff_ctr]  = 0;
+          }
+        }
+      }
+    }
+
+    //     IDCT.
+    //     Horizontal.
+    if(cr_cbp_tmp==2)
+      cr_cbp = 2;
+
+    for (n2=0; n2 < img->mb_cr_size_y; n2 += BLOCK_SIZE)
+    {
+      for (n1=0; n1 < img->mb_cr_size_x; n1 += BLOCK_SIZE)
+      {
+        for (j=0; j < BLOCK_SIZE; j++)
+        {
+          j2 = n2 + j;
+          memcpy(&m5[0],&img->m7[j2][n1], BLOCK_SIZE * sizeof(int));
+
+          m6[0] = (m5[0]     +  m5[2]);
+          m6[1] = (m5[0]     -  m5[2]);
+          m6[2] = (m5[1]>>1) -  m5[3];
+          m6[3] =  m5[1]     + (m5[3]>>1);
+
+          img->m7[j2][n1  ] = m6[0] + m6[3];
+          img->m7[j2][n1+1] = m6[1] + m6[2];
+          img->m7[j2][n1+2] = m6[1] - m6[2];
+          img->m7[j2][n1+3] = m6[0] - m6[3];
+        }
+
+        //     Vertical.
+        for (i=0; i < BLOCK_SIZE; i++)
+        {
+          i1 = n1 + i;
+          for (j=0; j < BLOCK_SIZE; j++)
+          {
+            m5[j]=img->m7[n2+j][i1];
+          }
+          m6[0] = (m5[0] + m5[2]);
+          m6[1] = (m5[0] - m5[2]);
+          m6[2] = (m5[1]>>1) - m5[3];
+          m6[3] =  m5[1] + (m5[3]>>1);
+
+          img->m7[n2  ][i1] = iClip1(img->max_imgpel_value_uv, rshift_rnd_sf((m6[0]+m6[3]+((long)img->mpr[n2  ][i1] << DQ_BITS)),DQ_BITS));
+          img->m7[n2+1][i1] = iClip1(img->max_imgpel_value_uv, rshift_rnd_sf((m6[1]+m6[2]+((long)img->mpr[n2+1][i1] << DQ_BITS)),DQ_BITS));
+          img->m7[n2+2][i1] = iClip1(img->max_imgpel_value_uv, rshift_rnd_sf((m6[1]-m6[2]+((long)img->mpr[n2+2][i1] << DQ_BITS)),DQ_BITS));
+          img->m7[n2+3][i1] = iClip1(img->max_imgpel_value_uv, rshift_rnd_sf((m6[0]-m6[3]+((long)img->mpr[n2+3][i1] << DQ_BITS)),DQ_BITS));
+        }
+      }
+    }
+
+    //  Decoded block moved to memory
+    for (j=0; j < img->mb_cr_size_y; j++)
+    {
+      pix_c_y = img->pix_c_y+j;
+      for (i=0; i < img->mb_cr_size_x; i++)
+      {
+        pix_c_x = img->pix_c_x + i;
+        enc_picture->imgUV[uv][pix_c_y][pix_c_x]= (imgpel) img->m7[j][i];
+      }
+    }
+  }
+  else
+  {
+    for (j=0; j < img->mb_cr_size_y; j++)
+    {
+      pix_c_y = img->pix_c_y + j;
+      for (i=0; i < img->mb_cr_size_x; i++)
+      {
+        pix_c_x = img->pix_c_x + i;
+        enc_picture->imgUV[uv][pix_c_y][pix_c_x]= (imgpel) img->m7[j][i] + img->mpr[j][i];
+      }
+    }
+  }
+
+  return cr_cbp;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    The routine performs transform,quantization,inverse transform, adds the diff.
+ *    to the prediction and writes the result to the decoded luma frame. Includes the
+ *    RD constrained quantization also.
+ *
+ * \par Input:
+ *    block_x,block_y: Block position inside a macro block (0,4,8,12).
+ *
+ * \par Output:
+ *    nonzero: 0 if no levels are nonzero.  1 if there are nonzero levels.              \n
+ *    coeff_cost: Counter for nonzero coefficients, used to discard expensive levels.
+ *
+ *
+ ************************************************************************
+ */
+int dct_luma_sp(int block_x,int block_y,int *coeff_cost)
+{
+  int i,j,i1,j1,ilev,m5[4],m6[4],coeff_ctr;
+  int qp_const,level,scan_pos,run;
+  int nonzero;
+
+  int predicted_block[BLOCK_SIZE][BLOCK_SIZE],c_err,qp_const2;
+  int qp_per,qp_rem,q_bits;
+  int qp_per_sp,qp_rem_sp,q_bits_sp;
+
+  int   pos_x   = block_x >> BLOCK_SHIFT;
+  int   pos_y   = block_y >> BLOCK_SHIFT;
+  int   b8      = 2*(pos_y >> 1) + (pos_x >> 1);
+  int   b4      = 2*(pos_y & 0x01) + (pos_x & 0x01);
+  int*  ACLevel = img->cofAC[b8][b4][0];
+  int*  ACRun   = img->cofAC[b8][b4][1];
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+
+  // For encoding optimization
+  int c_err1, c_err2, level1, level2;
+  double D_dis1, D_dis2;
+  int len, info;
+  double lambda_mode   = 0.85 * pow (2, (currMB->qp - SHIFT_QP)/3.0) * 4;
+
+  qp_per    = qp_per_matrix[(currMB->qp - MIN_QP)];
+  qp_rem    = qp_rem_matrix[(currMB->qp - MIN_QP)];
+  q_bits    = Q_BITS + qp_per;
+  qp_per_sp = qp_per_matrix[(currMB->qpsp - MIN_QP)];
+  qp_rem_sp = qp_rem_matrix[(currMB->qpsp - MIN_QP)];
+  q_bits_sp = Q_BITS + qp_per_sp;
+
+  qp_const  = (1<<q_bits)/6;    // inter
+  qp_const2 = (1<<q_bits_sp)/2;  //sp_pred
+
+  //  Horizontal transform
+  for (j=0; j< BLOCK_SIZE; j++)
+    for (i=0; i< BLOCK_SIZE; i++)
+    {
+      img->m7[j][i]+=img->mpr[j+block_y][i+block_x];
+      predicted_block[i][j]=img->mpr[j+block_y][i+block_x];
+    }
+
+  for (j=0; j < BLOCK_SIZE; j++)
+  {
+    for (i=0; i < 2; i++)
+    {
+      i1=3-i;
+      m5[i]=img->m7[j][i]+img->m7[j][i1];
+      m5[i1]=img->m7[j][i]-img->m7[j][i1];
+    }
+    img->m7[j][0]=(m5[0]+m5[1]);
+    img->m7[j][2]=(m5[0]-m5[1]);
+    img->m7[j][1]=m5[3]*2+m5[2];
+    img->m7[j][3]=m5[3]-m5[2]*2;
+  }
+
+  //  Vertical transform
+
+  for (i=0; i < BLOCK_SIZE; i++)
+  {
+    for (j=0; j < 2; j++)
+    {
+      j1=3-j;
+      m5[j]=img->m7[j][i]+img->m7[j1][i];
+      m5[j1]=img->m7[j][i]-img->m7[j1][i];
+    }
+    img->m7[0][i]=(m5[0]+m5[1]);
+    img->m7[2][i]=(m5[0]-m5[1]);
+    img->m7[1][i]=m5[3]*2+m5[2];
+    img->m7[3][i]=m5[3]-m5[2]*2;
+  }
+
+  for (j=0; j < BLOCK_SIZE; j++)
+  {
+    for (i=0; i < 2; i++)
+    {
+      i1=3-i;
+      m5[i]=predicted_block[i][j]+predicted_block[i1][j];
+      m5[i1]=predicted_block[i][j]-predicted_block[i1][j];
+    }
+    predicted_block[0][j]=(m5[0]+m5[1]);
+    predicted_block[2][j]=(m5[0]-m5[1]);
+    predicted_block[1][j]=m5[3]*2+m5[2];
+    predicted_block[3][j]=m5[3]-m5[2]*2;
+  }
+
+  //  Vertical transform
+
+  for (i=0; i < BLOCK_SIZE; i++)
+  {
+    for (j=0; j < 2; j++)
+    {
+      j1=3-j;
+      m5[j]=predicted_block[i][j]+predicted_block[i][j1];
+      m5[j1]=predicted_block[i][j]-predicted_block[i][j1];
+    }
+    predicted_block[i][0]=(m5[0]+m5[1]);
+    predicted_block[i][2]=(m5[0]-m5[1]);
+    predicted_block[i][1]=m5[3]*2+m5[2];
+    predicted_block[i][3]=m5[3]-m5[2]*2;
+  }
+
+  // Quant
+  nonzero=FALSE;
+
+  run=-1;
+  scan_pos=0;
+
+  for (coeff_ctr=0;coeff_ctr < 16;coeff_ctr++)     // 8 times if double scan, 16 normal scan
+  {
+
+    if (currMB->is_field_mode)
+    {  // Alternate scan for field coding
+        i=FIELD_SCAN[coeff_ctr][0];
+        j=FIELD_SCAN[coeff_ctr][1];
+    }
+    else
+    {
+        i=SNGL_SCAN[coeff_ctr][0];
+        j=SNGL_SCAN[coeff_ctr][1];
+    }
+
+    run++;
+    ilev=0;
+
+    // decide prediction
+
+    // case 1
+    level1 = (iabs (predicted_block[i][j]) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp;
+    level1 = (level1 << q_bits_sp) / quant_coef[qp_rem_sp][i][j];
+    c_err1 = img->m7[j][i]-isignab(level1, predicted_block[i][j]);
+    level1 = (iabs (c_err1) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits;
+
+    // case 2
+    c_err2=img->m7[j][i]-predicted_block[i][j];
+    level2 = (iabs (c_err2) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits;
+
+    // select prediction
+    if ((level1 != level2) && (level1 != 0) && (level2 != 0))
+    {
+      D_dis1 = img->m7[j][i] - ((isignab(level1,c_err1)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_block[i][j];
+      levrun_linfo_inter(level1, run, &len, &info);
+      D_dis1 = D_dis1*D_dis1 + lambda_mode * len;
+
+      D_dis2 = img->m7[j][i] - ((isignab(level2,c_err2)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_block[i][j];
+      levrun_linfo_inter(level2, run, &len, &info);
+      D_dis2 = D_dis2 * D_dis2 + lambda_mode * len;
+
+      if (D_dis1 == D_dis2)
+        level = (iabs(level1) < iabs(level2)) ? level1 : level2;
+      else
+      {
+        if (D_dis1 < D_dis2)
+          level = level1;
+        else
+          level = level2;
+      }
+      c_err = (level == level1) ? c_err1 : c_err2;
+    }
+    else if (level1 == level2)
+    {
+      level = level1;
+      c_err = c_err1;
+    }
+    else
+    {
+      level = (level1 == 0) ? level1 : level2;
+      c_err = (level1 == 0) ? c_err1 : c_err2;
+    }
+
+    if (level != 0)
+    {
+      nonzero=TRUE;
+      if (level > 1)
+        *coeff_cost += MAX_VALUE;                // set high cost, shall not be discarded
+      else
+        *coeff_cost += COEFF_COST[input->disthres][run];
+      ACLevel[scan_pos] = isignab(level,c_err);
+      ACRun  [scan_pos] = run;
+      ++scan_pos;
+      run=-1;                     // reset zero level counter
+      ilev=((isignab(level,c_err)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6);
+    }
+    ilev+=predicted_block[i][j] ;
+    if(!si_frame_indicator && !sp2_frame_indicator)//stores the SP frame coefficients in lrec, will be useful to encode these and create SI or SP switching frame
+    {
+      lrec[img->pix_y+block_y+j][img->pix_x+block_x+i]=
+        isignab((iabs(ilev) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp, ilev);
+    }
+    img->m7[j][i] = isignab((iabs(ilev) * quant_coef[qp_rem_sp][i][j] + qp_const2)>> q_bits_sp, ilev) * dequant_coef[qp_rem_sp][i][j] << qp_per_sp;
+  }
+  ACLevel[scan_pos] = 0;
+
+
+  //     IDCT.
+  //     horizontal
+
+  for (j=0; j < BLOCK_SIZE; j++)
+  {
+    for (i=0; i < BLOCK_SIZE; i++)
+    {
+      m5[i]=img->m7[j][i];
+    }
+    m6[0]=(m5[0]+m5[2]);
+    m6[1]=(m5[0]-m5[2]);
+    m6[2]=(m5[1]>>1)-m5[3];
+    m6[3]=m5[1]+(m5[3]>>1);
+
+    for (i=0; i < 2; i++)
+    {
+      i1=3-i;
+      img->m7[j][i]=m6[i]+m6[i1];
+      img->m7[j][i1]=m6[i]-m6[i1];
+    }
+  }
+
+  //  vertical
+
+  for (i=0; i < BLOCK_SIZE; i++)
+  {
+    for (j=0; j < BLOCK_SIZE; j++)
+    {
+      m5[j]=img->m7[j][i];
+    }
+    m6[0]=(m5[0]+m5[2]);
+    m6[1]=(m5[0]-m5[2]);
+    m6[2]=(m5[1]>>1)-m5[3];
+    m6[3]=m5[1]+(m5[3]>>1);
+
+    for (j=0; j < 2; j++)
+    {
+      j1=3-j;
+      img->m7[j][i] =iClip3(0,img->max_imgpel_value,(m6[j]+m6[j1]+DQ_ROUND)>>DQ_BITS);
+      img->m7[j1][i]=iClip3(0,img->max_imgpel_value,(m6[j]-m6[j1]+DQ_ROUND)>>DQ_BITS);
+    }
+  }
+
+  //  Decoded block moved to frame memory
+
+  for (j=0; j < BLOCK_SIZE; j++)
+  for (i=0; i < BLOCK_SIZE; i++)
+    enc_picture->imgY[img->pix_y+block_y+j][img->pix_x+block_x+i]= (imgpel) img->m7[j][i];
+
+  return nonzero;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Transform,quantization,inverse transform for chroma.
+ *    The main reason why this is done in a separate routine is the
+ *    additional 2x2 transform of DC-coeffs. This routine is called
+ *    ones for each of the chroma components.
+ *
+ * \par Input:
+ *    uv    : Make difference between the U and V chroma component               \n
+ *    cr_cbp: chroma coded block pattern
+ *
+ * \par Output:
+ *    cr_cbp: Updated chroma coded block pattern.
+ ************************************************************************
+ */
+int dct_chroma_sp(int uv,int cr_cbp)
+{
+  int i,j,i1,j2,ilev,n2,n1,j1,mb_y,coeff_ctr,qp_const,c_err,level ,scan_pos,run;
+  int m1[BLOCK_SIZE],m5[BLOCK_SIZE],m6[BLOCK_SIZE];
+  int coeff_cost;
+  int cr_cbp_tmp;
+  int predicted_chroma_block[MB_BLOCK_SIZE>>1][MB_BLOCK_SIZE>>1],qp_const2,mp1[BLOCK_SIZE];
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+
+  int qp_per,qp_rem,q_bits;
+  int qp_per_sp,qp_rem_sp,q_bits_sp;
+
+  int   b4;
+  int*  DCLevel = img->cofDC[uv+1][0];
+  int*  DCRun   = img->cofDC[uv+1][1];
+  int*  ACLevel;
+  int*  ACRun;
+
+  int c_err1, c_err2, level1, level2;
+  int len, info;
+  double D_dis1, D_dis2;
+  double lambda_mode   = 0.85 * pow (2, (currMB->qp -SHIFT_QP)/3.0) * 4;
+
+
+  int qpChroma = iClip3(-img->bitdepth_chroma_qp_scale, 51, currMB->qp + active_pps->chroma_qp_index_offset);
+  int qpChromaSP=iClip3(-img->bitdepth_chroma_qp_scale, 51, currMB->qpsp + active_pps->chroma_qp_index_offset);
+
+  qp_per    = ((qpChroma<0?qpChroma:QP_SCALE_CR[qpChroma])-MIN_QP)/6;
+  qp_rem    = ((qpChroma<0?qpChroma:QP_SCALE_CR[qpChroma])-MIN_QP)%6;
+  q_bits    = Q_BITS+qp_per;
+  qp_const=(1<<q_bits)/6;    // inter
+  qp_per_sp    = ((qpChromaSP<0?currMB->qpsp:QP_SCALE_CR[qpChromaSP])-MIN_QP)/6;
+  qp_rem_sp    = ((qpChromaSP<0?currMB->qpsp:QP_SCALE_CR[qpChromaSP])-MIN_QP)%6;
+  q_bits_sp    = Q_BITS+qp_per_sp;
+  qp_const2=(1<<q_bits_sp)/2;  //sp_pred
+
+
+  for (j=0; j < MB_BLOCK_SIZE>>1; j++)
+    for (i=0; i < MB_BLOCK_SIZE>>1; i++)
+    {
+      img->m7[j][i]+=img->mpr[j][i];
+      predicted_chroma_block[i][j]=img->mpr[j][i];
+    }
+
+  for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
+  {
+    for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
+    {
+
+      //  Horizontal transform.
+      for (j=0; j < BLOCK_SIZE; j++)
+      {
+        mb_y=n2+j;
+        for (i=0; i < 2; i++)
+        {
+          i1=3-i;
+          m5[i]=img->m7[mb_y][i+n1]+img->m7[mb_y][i1+n1];
+          m5[i1]=img->m7[mb_y][i+n1]-img->m7[mb_y][i1+n1];
+        }
+        img->m7[mb_y][n1]  =(m5[0]+m5[1]);
+        img->m7[mb_y][n1+2]=(m5[0]-m5[1]);
+        img->m7[mb_y][n1+1]=m5[3]*2+m5[2];
+        img->m7[mb_y][n1+3]=m5[3]-m5[2]*2;
+      }
+
+      //  Vertical transform.
+
+      for (i=0; i < BLOCK_SIZE; i++)
+      {
+        j1=n1+i;
+        for (j=0; j < 2; j++)
+        {
+          j2=3-j;
+          m5[j]=img->m7[n2+j][j1]+img->m7[n2+j2][j1];
+          m5[j2]=img->m7[n2+j][j1]-img->m7[n2+j2][j1];
+        }
+        img->m7[n2  ][j1]=(m5[0]+m5[1]);
+        img->m7[n2+2][j1]=(m5[0]-m5[1]);
+        img->m7[n2+1][j1]=m5[3]*2+m5[2];
+        img->m7[n2+3][j1]=m5[3]-m5[2]*2;
+      }
+    }
+  }
+  for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
+  {
+    for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
+    {
+
+      //  Horizontal transform.
+      for (j=0; j < BLOCK_SIZE; j++)
+      {
+        mb_y=n2+j;
+        for (i=0; i < 2; i++)
+        {
+          i1=3-i;
+          m5[i]=predicted_chroma_block[i+n1][mb_y]+predicted_chroma_block[i1+n1][mb_y];
+          m5[i1]=predicted_chroma_block[i+n1][mb_y]-predicted_chroma_block[i1+n1][mb_y];
+        }
+        predicted_chroma_block[n1][mb_y]  =(m5[0]+m5[1]);
+        predicted_chroma_block[n1+2][mb_y]=(m5[0]-m5[1]);
+        predicted_chroma_block[n1+1][mb_y]=m5[3]*2+m5[2];
+        predicted_chroma_block[n1+3][mb_y]=m5[3]-m5[2]*2;
+      }
+
+      //  Vertical transform.
+
+      for (i=0; i < BLOCK_SIZE; i++)
+      {
+        j1=n1+i;
+        for (j=0; j < 2; j++)
+        {
+          j2=3-j;
+          m5[j]=predicted_chroma_block[j1][n2+j]+predicted_chroma_block[j1][n2+j2];
+          m5[j2]=predicted_chroma_block[j1][n2+j]-predicted_chroma_block[j1][n2+j2];
+        }
+        predicted_chroma_block[j1][n2  ]=(m5[0]+m5[1]);
+        predicted_chroma_block[j1][n2+2]=(m5[0]-m5[1]);
+        predicted_chroma_block[j1][n2+1]=m5[3]*2+m5[2];
+        predicted_chroma_block[j1][n2+3]=m5[3]-m5[2]*2;
+      }
+    }
+  }
+
+  //     2X2 transform of DC coeffs.
+  m1[0]=(img->m7[0][0]+img->m7[0][4]+img->m7[4][0]+img->m7[4][4]);
+  m1[1]=(img->m7[0][0]-img->m7[0][4]+img->m7[4][0]-img->m7[4][4]);
+  m1[2]=(img->m7[0][0]+img->m7[0][4]-img->m7[4][0]-img->m7[4][4]);
+  m1[3]=(img->m7[0][0]-img->m7[0][4]-img->m7[4][0]+img->m7[4][4]);
+
+  //     2X2 transform of DC coeffs.
+  mp1[0]=(predicted_chroma_block[0][0]+predicted_chroma_block[4][0]+predicted_chroma_block[0][4]+predicted_chroma_block[4][4]);
+  mp1[1]=(predicted_chroma_block[0][0]-predicted_chroma_block[4][0]+predicted_chroma_block[0][4]-predicted_chroma_block[4][4]);
+  mp1[2]=(predicted_chroma_block[0][0]+predicted_chroma_block[4][0]-predicted_chroma_block[0][4]-predicted_chroma_block[4][4]);
+  mp1[3]=(predicted_chroma_block[0][0]-predicted_chroma_block[4][0]-predicted_chroma_block[0][4]+predicted_chroma_block[4][4]);
+
+  run=-1;
+  scan_pos=0;
+
+  for (coeff_ctr=0; coeff_ctr < 4; coeff_ctr++)
+  {
+    run++;
+    ilev=0;
+
+  // case 1
+    c_err1 = (iabs (mp1[coeff_ctr]) * quant_coef[qp_rem_sp][0][0] + 2 * qp_const2) >> (q_bits_sp + 1);
+    c_err1 = (c_err1 << (q_bits_sp + 1)) / quant_coef[qp_rem_sp][0][0];
+    c_err1 = m1[coeff_ctr] - isignab(c_err1, mp1[coeff_ctr]);
+    level1 = (iabs(c_err1) * quant_coef[qp_rem][0][0] + 2 * qp_const) >> (q_bits+1);
+
+  // case 2
+    c_err2 = m1[coeff_ctr] - mp1[coeff_ctr];
+    level2 = (iabs(c_err2) * quant_coef[qp_rem][0][0] + 2 * qp_const) >> (q_bits+1);
+
+    if (level1 != level2 && level1 != 0 && level2 != 0)
+    {
+      D_dis1 = m1[coeff_ctr] - ((isignab(level1,c_err1)*dequant_coef[qp_rem][0][0]*A[0][0]<< qp_per) >>5)- mp1[coeff_ctr];
+      levrun_linfo_c2x2(level1, run, &len, &info);
+      D_dis1 = D_dis1 * D_dis1 + lambda_mode * len;
+
+      D_dis2 = m1[coeff_ctr] - ((isignab(level2,c_err2)*dequant_coef[qp_rem][0][0]*A[0][0]<< qp_per) >>5)- mp1[coeff_ctr];
+      levrun_linfo_c2x2(level2, run, &len, &info);
+      D_dis2 = D_dis2 * D_dis2 + lambda_mode * len;
+
+      if (D_dis1 == D_dis2)
+        level = (iabs(level1) < iabs(level2)) ? level1 : level2;
+      else
+      {
+        if (D_dis1 < D_dis2)
+          level = level1;
+        else
+          level = level2;
+      }
+      c_err = (level == level1) ? c_err1 : c_err2;
+    }
+    else if (level1 == level2)
+    {
+      level = level1;
+      c_err = c_err1;
+    }
+    else
+    {
+      level = (level1 == 0) ? level1 : level2;
+      c_err = (level1 == 0) ? c_err1 : c_err2;
+    }
+
+    if (input->symbol_mode == UVLC && img->qp < 4)
+    {
+      if (level > CAVLC_LEVEL_LIMIT)
+      {
+        level = CAVLC_LEVEL_LIMIT;
+      }
+    }
+
+    if (level  != 0)
+    {
+      currMB->cbp_blk |= 0xf0000 << (uv << 2) ;  // if one of the 2x2-DC levels is != 0 the coded-bit
+      cr_cbp=imax(1,cr_cbp);
+      DCLevel[scan_pos] = isignab(level ,c_err);
+      DCRun  [scan_pos] = run;
+      scan_pos++;
+      run=-1;
+      ilev=((isignab(level,c_err)*dequant_coef[qp_rem][0][0]*A[0][0]<< qp_per) >>5);
+    }
+    ilev+= mp1[coeff_ctr];
+    m1[coeff_ctr]=isignab((iabs(ilev)  * quant_coef[qp_rem_sp][0][0] + 2 * qp_const2) >> (q_bits_sp+1), ilev) * dequant_coef[qp_rem_sp][0][0] << qp_per_sp;
+    if(!si_frame_indicator && !sp2_frame_indicator)
+      lrec_uv[uv][img->pix_c_y+4*(coeff_ctr%2)][img->pix_c_x+4*(coeff_ctr/2)]=isignab((iabs(ilev)  * quant_coef[qp_rem_sp][0][0] + 2 * qp_const2) >> (q_bits_sp+1), ilev);// stores the SP frames coefficients, will be useful to encode SI or switching SP frame
+  }
+  DCLevel[scan_pos] = 0;
+
+  //  Inverse transform of 2x2 DC levels
+
+  img->m7[0][0]=(m1[0]+m1[1]+m1[2]+m1[3])/2;
+  img->m7[0][4]=(m1[0]-m1[1]+m1[2]-m1[3])/2;
+  img->m7[4][0]=(m1[0]+m1[1]-m1[2]-m1[3])/2;
+  img->m7[4][4]=(m1[0]-m1[1]-m1[2]+m1[3])/2;
+
+  //     Quant of chroma AC-coeffs.
+  coeff_cost=0;
+  cr_cbp_tmp=0;
+
+  for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
+  {
+    for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
+    {
+      b4      = 2*(n2 >> 2) + (n1 >> 2);
+      ACLevel = img->cofAC[uv+4][b4][0];
+      ACRun   = img->cofAC[uv+4][b4][1];
+
+      run      = -1;
+      scan_pos =  0;
+
+      for (coeff_ctr=1; coeff_ctr < 16; coeff_ctr++)// start change rd_quant
+      {
+
+        if (currMB->is_field_mode)
+        {  // Alternate scan for field coding
+          i=FIELD_SCAN[coeff_ctr][0];
+          j=FIELD_SCAN[coeff_ctr][1];
+        }
+        else
+        {
+          i=SNGL_SCAN[coeff_ctr][0];
+          j=SNGL_SCAN[coeff_ctr][1];
+        }
+        ++run;
+        ilev=0;
+
+    // quantization on prediction
+    c_err1 = (iabs(predicted_chroma_block[n1+i][n2+j]) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp;
+    c_err1 = (c_err1 << q_bits_sp) / quant_coef[qp_rem_sp][i][j];
+    c_err1 = img->m7[n2+j][n1+i] - isignab(c_err1, predicted_chroma_block[n1+i][n2+j]);
+    level1 = (iabs(c_err1) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits;
+
+    // no quantization on prediction
+    c_err2 = img->m7[n2+j][n1+i] - predicted_chroma_block[n1+i][n2+j];
+    level2 = (iabs(c_err2) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits;
+
+    if (level1 != level2 && level1 != 0 && level2 != 0)
+    {
+      D_dis1 = img->m7[n2+j][n1+i] - ((isignab(level1,c_err1)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_chroma_block[n1+i][n2+j];
+
+      levrun_linfo_inter(level1, run, &len, &info);
+      D_dis1 = D_dis1 * D_dis1 + lambda_mode * len;
+
+      D_dis2 = img->m7[n2+j][n1+i] - ((isignab(level2,c_err2)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_chroma_block[n1+i][n2+j];
+      levrun_linfo_inter(level2, run, &len, &info);
+      D_dis2 = D_dis2 * D_dis2 + lambda_mode * len;
+
+      if (D_dis1 == D_dis2)
+        level = (iabs(level1) < iabs(level2)) ? level1 : level2;
+      else
+      {
+        if (D_dis1 < D_dis2)
+          level = level1;
+        else
+          level = level2;
+      }
+      c_err = (level == level1) ? c_err1 : c_err2;
+    }
+    else if (level1 == level2)
+    {
+      level = level1;
+      c_err = c_err1;
+    }
+    else
+    {
+      level = (level1 == 0) ? level1 : level2;
+      c_err = (level1 == 0) ? c_err1 : c_err2;
+    }
+
+        if (level  != 0)
+        {
+          currMB->cbp_blk |=  (int64)1 << (16 + (uv << 2) + ((n2 >> 1) + (n1 >> 2))) ;
+          if (level > 1)
+            coeff_cost += MAX_VALUE;                // set high cost, shall not be discarded
+          else
+            coeff_cost += COEFF_COST[input->disthres][run];
+
+          cr_cbp_tmp=2;
+          ACLevel[scan_pos] = isignab(level,c_err);
+          ACRun  [scan_pos] = run;
+          ++scan_pos;
+          run=-1;
+          ilev=((isignab(level,c_err)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6);
+        }
+        ilev+=predicted_chroma_block[n1+i][n2+j];
+        if(!si_frame_indicator && !sp2_frame_indicator)
+          if(!( (n2+j) % 4==0 && (n1+i)%4 ==0 ))
+            lrec_uv[uv][img->pix_c_y+n1+j][img->pix_c_x+n2+i]=isignab((iabs(ilev) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp,ilev);//stores the SP frames coefficients, will be useful to encode SI or switching SP frame
+        img->m7[n2+j][n1+i] = isignab((iabs(ilev) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp,ilev) * dequant_coef[qp_rem_sp][i][j] << qp_per_sp;
+      }
+      ACLevel[scan_pos] = 0;
+    }
+  }
+
+  // * reset chroma coeffs
+
+  if(cr_cbp_tmp==2)
+      cr_cbp=2;
+  //     IDCT.
+
+      //     Horizontal.
+  for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
+  {
+    for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
+    {
+      for (j=0; j < BLOCK_SIZE; j++)
+      {
+        for (i=0; i < BLOCK_SIZE; i++)
+        {
+          m5[i]=img->m7[n2+j][n1+i];
+        }
+        m6[0]=(m5[0]+m5[2]);
+        m6[1]=(m5[0]-m5[2]);
+        m6[2]=(m5[1]>>1)-m5[3];
+        m6[3]=m5[1]+(m5[3]>>1);
+
+        for (i=0; i < 2; i++)
+        {
+          i1=3-i;
+          img->m7[n2+j][n1+i]=m6[i]+m6[i1];
+          img->m7[n2+j][n1+i1]=m6[i]-m6[i1];
+        }
+      }
+
+      //     Vertical.
+      for (i=0; i < BLOCK_SIZE; i++)
+      {
+        for (j=0; j < BLOCK_SIZE; j++)
+        {
+          m5[j]=img->m7[n2+j][n1+i];
+        }
+        m6[0]=(m5[0]+m5[2]);
+        m6[1]=(m5[0]-m5[2]);
+        m6[2]=(m5[1]>>1)-m5[3];
+        m6[3]=m5[1]+(m5[3]>>1);
+
+        for (j=0; j < 2; j++)
+        {
+          j2=3-j;
+          img->m7[n2+j][n1+i] =iClip3(0,img->max_imgpel_value_uv,(m6[j]+m6[j2]+DQ_ROUND)>>DQ_BITS);
+          img->m7[n2+j2][n1+i]=iClip3(0,img->max_imgpel_value_uv,(m6[j]-m6[j2]+DQ_ROUND)>>DQ_BITS);
+        }
+      }
+    }
+  }
+
+  //  Decoded block moved to memory
+  for (j=0; j < BLOCK_SIZE*2; j++)
+    for (i=0; i < BLOCK_SIZE*2; i++)
+    {
+      enc_picture->imgUV[uv][img->pix_c_y+j][img->pix_c_x+i]= (imgpel) img->m7[j][i];
+    }
+
+  return cr_cbp;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    The routine performs transform,quantization,inverse transform, adds the diff.
+ *    to the prediction and writes the result to the decoded luma frame. Includes the
+ *    RD constrained quantization also.
+ *
+ * \par Input:
+ *    block_x,block_y: Block position inside a macro block (0,4,8,12).
+ *
+ * \par Output:
+ *    nonzero: 0 if no levels are nonzero.  1 if there are nonzero levels.            \n
+ *    coeff_cost: Counter for nonzero coefficients, used to discard expencive levels.
+ ************************************************************************
+ */
+void copyblock_sp(int block_x,int block_y)
+{
+  int i,j,i1,j1,m5[4],m6[4];
+
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+
+  int predicted_block[BLOCK_SIZE][BLOCK_SIZE];
+  int qp_per = (currMB->qpsp-MIN_QP)/6;
+  int qp_rem = (currMB->qpsp-MIN_QP)%6;
+  int q_bits    = Q_BITS+qp_per;
+  int qp_const2=(1<<q_bits)/2;  //sp_pred
+
+  //  Horizontal transform
+  for (j=0; j< BLOCK_SIZE; j++)
+    for (i=0; i< BLOCK_SIZE; i++)
+    {
+      predicted_block[i][j]=img->mpr[j+block_y][i+block_x];
+    }
+
+  for (j=0; j < BLOCK_SIZE; j++)
+  {
+    for (i=0; i < 2; i++)
+    {
+      i1=3-i;
+      m5[i]=predicted_block[i][j]+predicted_block[i1][j];
+      m5[i1]=predicted_block[i][j]-predicted_block[i1][j];
+    }
+    predicted_block[0][j]=(m5[0]+m5[1]);
+    predicted_block[2][j]=(m5[0]-m5[1]);
+    predicted_block[1][j]=m5[3]*2+m5[2];
+    predicted_block[3][j]=m5[3]-m5[2]*2;
+  }
+
+  //  Vertical transform
+
+  for (i=0; i < BLOCK_SIZE; i++)
+  {
+    for (j=0; j < 2; j++)
+    {
+      j1=3-j;
+      m5[j]=predicted_block[i][j]+predicted_block[i][j1];
+      m5[j1]=predicted_block[i][j]-predicted_block[i][j1];
+    }
+    predicted_block[i][0]=(m5[0]+m5[1]);
+    predicted_block[i][2]=(m5[0]-m5[1]);
+    predicted_block[i][1]=m5[3]*2+m5[2];
+    predicted_block[i][3]=m5[3]-m5[2]*2;
+  }
+
+  // Quant
+  for (j=0;j < BLOCK_SIZE; j++)
+  {
+    for (i=0; i < BLOCK_SIZE; i++)
+    {
+      img->m7[j][i]=isignab((iabs(predicted_block[i][j])* quant_coef[qp_rem][i][j]+qp_const2)>> q_bits,predicted_block[i][j])*dequant_coef[qp_rem][i][j]<<qp_per;
+      if(!si_frame_indicator && !sp2_frame_indicator)
+      {
+        lrec[img->pix_y+block_y+j][img->pix_x+block_x+i] =
+        isignab((iabs(predicted_block[i][j]) * quant_coef[qp_rem][i][j] + qp_const2) >> q_bits, predicted_block[i][j]);// stores the SP frames coefficients, will be useful to encode SI or switching SP frame
+      }
+    }
+  }
+
+  //     IDCT.
+  //     horizontal
+
+  for (j=0;j<BLOCK_SIZE;j++)
+  {
+    for (i=0;i<BLOCK_SIZE;i++)
+    {
+      m5[i]=img->m7[j][i];
+    }
+    m6[0]=(m5[0]+m5[2]);
+    m6[1]=(m5[0]-m5[2]);
+    m6[2]=(m5[1]>>1)-m5[3];
+    m6[3]=m5[1]+(m5[3]>>1);
+
+    for (i=0;i<2;i++)
+    {
+      i1=3-i;
+      img->m7[j][i]=m6[i]+m6[i1];
+      img->m7[j][i1]=m6[i]-m6[i1];
+    }
+  }
+  // vertical
+  for (i=0;i<BLOCK_SIZE;i++)
+  {
+    for (j=0;j<BLOCK_SIZE;j++)
+      m5[j]=img->m7[j][i];
+
+    m6[0]=(m5[0]+m5[2]);
+    m6[1]=(m5[0]-m5[2]);
+    m6[2]=(m5[1]>>1)-m5[3];
+    m6[3]=m5[1]+(m5[3]>>1);
+
+    for (j=0;j<2;j++)
+    {
+      j1=3-j;
+      img->m7[j][i] =iClip3(0,img->max_imgpel_value,(m6[j]+m6[j1]+DQ_ROUND)>>DQ_BITS);
+      img->m7[j1][i]=iClip3(0,img->max_imgpel_value,(m6[j]-m6[j1]+DQ_ROUND)>>DQ_BITS);
+    }
+  }
+
+  //  Decoded block moved to frame memory
+
+  for (j=0; j < BLOCK_SIZE; j++)
+    for (i=0; i < BLOCK_SIZE; i++)
+      enc_picture->imgY[img->pix_y+block_y+j][img->pix_x+block_x+i]=(imgpel) img->m7[j][i];
+}
+
+
+
+int writeIPCMBytes(Bitstream *currStream)
+{
+  int i,j, jj;
+  int len = 0, uv;
+  SyntaxElement   se;
+
+  for (j=0;j<16;j++)
+  {
+    jj = img->pix_y+j;
+    for (i=0;i<16;i++)
+    {
+      se.len = img->bitdepth_luma;
+      len   += se.len;
+      se.bitpattern = enc_picture->imgY[jj][img->pix_x+i];
+      writeSyntaxElement2Buf_Fixed(&se, currStream);
+    }
+  }
+
+  for (uv = 0; uv < 2; uv ++)
+  {
+    for (j=0;j<img->mb_cr_size_y;j++)
+    {
+      jj = img->pix_c_y+j;
+      for (i=0;i<img->mb_cr_size_x;i++)
+      {
+        se.len = img->bitdepth_chroma;
+        len += se.len;
+        se.bitpattern = enc_picture->imgUV[uv][jj][img->pix_c_x+i];
+        writeSyntaxElement2Buf_Fixed(&se, currStream);
+      }
+    }
+  }
+  return len;
+}
+
+int writePCMByteAlign(Bitstream *currStream)
+{
+  int len = 0;
+  if (currStream->bits_to_go < 8)
+  { // trailing bits to process
+    len = 8 - currStream->bits_to_go;
+    currStream->byte_buf = (currStream->byte_buf <<currStream->bits_to_go) | (0xff >> (8 - currStream->bits_to_go));
+    stats->bit_use_stuffingBits[img->type]+=currStream->bits_to_go;
+    currStream->streamBuffer[currStream->byte_pos++]=currStream->byte_buf;
+    currStream->bits_to_go = 8;
+  }
+  return len;
+}
+
+/*!
+ ************************************************************************
+ * \brief Eric Setton
+ * Encoding of a secondary SP / SI frame.
+ * For an SI frame the predicted block should only come from spatial pred.
+ * The original image signal is the error coefficients of a primary SP in the raw data stream
+ * the difference with the primary SP are :
+ *  - the prediction signal is transformed and quantized (qpsp) but not dequantized
+ *  - only one kind of prediction is considered and not two
+ *  - the resulting error coefficients are not quantized before being sent to the VLC
+ *
+ * \para Input:
+ *    block_x,block_y: Block position inside a macro block (0,4,8,12).
+ *
+ * \para Output:
+ *    nonzero: 0 if no levels are nonzero.  1 if there are nonzero levels.
+ *    coeff_cost: Counter for nonzero coefficients, used to discard expencive levels.
+ *
+ *
+ ************************************************************************
+ */
+
+int dct_luma_sp2(int block_x,int block_y,int *coeff_cost)
+{
+  int i,j,i1,j1,ilev,m5[4],m6[4],coeff_ctr;
+  int qp_const,level,scan_pos,run;
+  int nonzero;
+
+  int predicted_block[BLOCK_SIZE][BLOCK_SIZE],c_err,qp_const2;
+  int qp_per,qp_rem,q_bits;
+  int qp_per_sp,qp_rem_sp,q_bits_sp;
+
+  int   pos_x   = block_x >> BLOCK_SHIFT;
+  int   pos_y   = block_y >> BLOCK_SHIFT;
+  int   b8      = 2*(pos_y >> 1) + (pos_x >> 1);
+  int   b4      = 2*(pos_y & 0x01) + (pos_x & 0x01);
+  int*  ACLevel = img->cofAC[b8][b4][0];
+  int*  ACRun   = img->cofAC[b8][b4][1];
+
+  int level1;
+
+  qp_per    = (img->qpsp-MIN_QP)/6 ;
+  qp_rem    = (img->qpsp-MIN_QP)%6;
+  q_bits    = Q_BITS+qp_per;
+  qp_per_sp    = (img->qpsp-MIN_QP)/6;
+  qp_rem_sp    = (img->qpsp-MIN_QP)%6;
+  q_bits_sp    = Q_BITS+qp_per_sp;
+
+  qp_const=(1<<q_bits)/6;    // inter
+  qp_const2=(1<<q_bits_sp)/2;  //sp_pred
+
+  //  Horizontal transform
+  for (j=0; j< BLOCK_SIZE; j++)
+    for (i=0; i< BLOCK_SIZE; i++)
+    {
+      //Coefficients obtained from the prior encoding of the SP frame
+      img->m7[j][i]=lrec[img->pix_y+block_y+j][img->pix_x+block_x+i];
+      //Predicted block
+      predicted_block[i][j]=img->mpr[j+block_y][i+block_x];
+    }
+
+  //Horizontal transform
+  for (j=0; j < BLOCK_SIZE; j++)
+    {
+      for (i=0; i < 2; i++)
+    {
+      i1=3-i;
+      m5[i]=predicted_block[i][j]+predicted_block[i1][j];
+      m5[i1]=predicted_block[i][j]-predicted_block[i1][j];
+    }
+    predicted_block[0][j]=(m5[0]+m5[1]);
+    predicted_block[2][j]=(m5[0]-m5[1]);
+    predicted_block[1][j]=m5[3]*2+m5[2];
+    predicted_block[3][j]=m5[3]-m5[2]*2;
+  }
+
+  //  Vertical transform of the predicted block
+
+  for (i=0; i < BLOCK_SIZE; i++)
+  {
+    for (j=0; j < 2; j++)
+    {
+      j1=3-j;
+      m5[j]=predicted_block[i][j]+predicted_block[i][j1];
+      m5[j1]=predicted_block[i][j]-predicted_block[i][j1];
+    }
+    predicted_block[i][0]=(m5[0]+m5[1]);
+    predicted_block[i][2]=(m5[0]-m5[1]);
+    predicted_block[i][1]=m5[3]*2+m5[2];
+    predicted_block[i][3]=m5[3]-m5[2]*2;
+  }
+
+  // Quant
+  nonzero=FALSE;
+
+  run=-1;
+  scan_pos=0;
+
+  for (coeff_ctr=0;coeff_ctr < 16;coeff_ctr++)     // 8 times if double scan, 16 normal scan
+  {
+
+    if (img->field_picture || ( mb_adaptive && img->field_mode ))
+    {  // Alternate scan for field coding
+        i=FIELD_SCAN[coeff_ctr][0];
+        j=FIELD_SCAN[coeff_ctr][1];
+    }
+    else
+    {
+        i=SNGL_SCAN[coeff_ctr][0];
+        j=SNGL_SCAN[coeff_ctr][1];
+    }
+
+    run++;
+    ilev=0;
+
+    //quantization of the predicted block
+    level1 = (iabs (predicted_block[i][j]) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp;
+    //substracted from lrec
+    c_err = img->m7[j][i]-isignab(level1, predicted_block[i][j]);   //substracting the predicted block
+
+
+    level = iabs(c_err);
+    if (level != 0)
+    {
+      nonzero=TRUE;
+      if (level > 1)
+        *coeff_cost += MAX_VALUE;                // set high cost, shall not be discarded
+      else
+        *coeff_cost += COEFF_COST[input->disthres][run];
+      ACLevel[scan_pos] = isignab(level,c_err);
+      ACRun  [scan_pos] = run;
+      ++scan_pos;
+      run=-1;                     // reset zero level counter
+    }
+    //from now on we are in decoder land
+    ilev=c_err + isignab(level1,predicted_block[i][j]) ;  // adding the quantized predicted block
+    img->m7[j][i] = ilev  *dequant_coef[qp_rem_sp][i][j] << qp_per_sp;
+
+  }
+  ACLevel[scan_pos] = 0;
+
+
+  //     IDCT.
+  //     horizontal
+
+  for (j=0; j < BLOCK_SIZE; j++)
+  {
+    for (i=0; i < BLOCK_SIZE; i++)
+    {
+      m5[i]=img->m7[j][i];
+    }
+    m6[0]=(m5[0]+m5[2]);
+    m6[1]=(m5[0]-m5[2]);
+    m6[2]=(m5[1]>>1)-m5[3];
+    m6[3]=m5[1]+(m5[3]>>1);
+
+    for (i=0; i < 2; i++)
+    {
+      i1=3-i;
+      img->m7[j][i]=m6[i]+m6[i1];
+      img->m7[j][i1]=m6[i]-m6[i1];
+    }
+  }
+
+  //  vertical
+
+  for (i=0; i < BLOCK_SIZE; i++)
+  {
+    for (j=0; j < BLOCK_SIZE; j++)
+    {
+      m5[j]=img->m7[j][i];
+    }
+    m6[0]=(m5[0]+m5[2]);
+    m6[1]=(m5[0]-m5[2]);
+    m6[2]=(m5[1]>>1)-m5[3];
+    m6[3]=m5[1]+(m5[3]>>1);
+
+    for (j=0; j < 2; j++)
+    {
+      j1=3-j;
+      img->m7[j][i] =iClip3(0,255,(m6[j]+m6[j1]+DQ_ROUND)>>DQ_BITS);
+      img->m7[j1][i]=iClip3(0,255,(m6[j]-m6[j1]+DQ_ROUND)>>DQ_BITS);
+    }
+  }
+
+  //  Decoded block moved to frame memory
+  for (j=0; j < BLOCK_SIZE; j++)
+    for (i=0; i < BLOCK_SIZE; i++){
+      enc_picture->imgY[img->pix_y+block_y+i][img->pix_x+block_x+j]=(imgpel) img->m7[i][j];
+    }
+  return nonzero;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief Eric Setton
+ * Encoding of the chroma of a  secondary SP / SI frame.
+ * For an SI frame the predicted block should only come from spatial pred.
+ * The original image signal is the error coefficients of a primary SP in the raw data stream
+ * the difference with the primary SP are :
+ *  - the prediction signal is transformed and quantized (qpsp) but not dequantized
+ *  - the resulting error coefficients are not quantized before being sent to the VLC
+ *
+ * \par Input:
+ *    uv    : Make difference between the U and V chroma component
+ *    cr_cbp: chroma coded block pattern
+ *
+ * \par Output:
+ *    cr_cbp: Updated chroma coded block pattern.
+ *
+ ************************************************************************
+ */
+int dct_chroma_sp2(int uv,int cr_cbp)
+{
+  int i,j,i1,j2,ilev,n2,n1,j1,mb_y,coeff_ctr,qp_const,c_err,level ,scan_pos,run;
+  int m1[BLOCK_SIZE],m5[BLOCK_SIZE],m6[BLOCK_SIZE];
+  int coeff_cost;
+  int cr_cbp_tmp;
+  int predicted_chroma_block[MB_BLOCK_SIZE/2][MB_BLOCK_SIZE/2],qp_const2,mp1[BLOCK_SIZE];
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+
+  int qp_per,qp_rem,q_bits;
+  int qp_per_sp,qp_rem_sp,q_bits_sp;
+
+  int   b4;
+  int*  DCLevel = img->cofDC[uv+1][0];
+  int*  DCRun   = img->cofDC[uv+1][1];
+  int*  ACLevel;
+  int*  ACRun;
+  int  level1;
+
+  qp_per    = ((img->qp<0?img->qp:QP_SCALE_CR[img->qp])-MIN_QP)/6;
+  qp_rem    = ((img->qp<0?img->qp:QP_SCALE_CR[img->qp])-MIN_QP)%6;
+  q_bits    = Q_BITS+qp_per;
+  qp_const=(1<<q_bits)/6;    // inter
+
+  qp_per_sp    = ((img->qpsp<0?img->qpsp:QP_SCALE_CR[img->qpsp])-MIN_QP)/6;
+  qp_rem_sp    = ((img->qpsp<0?img->qpsp:QP_SCALE_CR[img->qpsp])-MIN_QP)%6;
+  q_bits_sp    = Q_BITS+qp_per_sp;
+  qp_const2=(1<<q_bits_sp)/2;  //sp_pred
+
+
+  for (j=0; j < MB_BLOCK_SIZE>>1; j++)
+    for (i=0; i < MB_BLOCK_SIZE>>1; i++)
+    {
+      predicted_chroma_block[i][j]=img->mpr[j][i];
+      img->m7[j][i]=lrec_uv[uv][img->pix_c_y+j][img->pix_c_x+i];
+    }
+
+  for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
+  {
+    for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
+    {
+
+      //  Horizontal transform.
+      for (j=0; j < BLOCK_SIZE; j++)
+      {
+        mb_y=n2+j;
+        for (i=0; i < 2; i++)
+        {
+          i1=3-i;
+          m5[i]=predicted_chroma_block[i+n1][mb_y]+predicted_chroma_block[i1+n1][mb_y];
+          m5[i1]=predicted_chroma_block[i+n1][mb_y]-predicted_chroma_block[i1+n1][mb_y];
+        }
+        predicted_chroma_block[n1][mb_y]  =(m5[0]+m5[1]);
+        predicted_chroma_block[n1+2][mb_y]=(m5[0]-m5[1]);
+        predicted_chroma_block[n1+1][mb_y]=m5[3]*2+m5[2];
+        predicted_chroma_block[n1+3][mb_y]=m5[3]-m5[2]*2;
+      }
+
+      //  Vertical transform.
+
+      for (i=0; i < BLOCK_SIZE; i++)
+      {
+        j1=n1+i;
+        for (j=0; j < 2; j++)
+        {
+          j2=3-j;
+          m5[j]=predicted_chroma_block[j1][n2+j]+predicted_chroma_block[j1][n2+j2];
+          m5[j2]=predicted_chroma_block[j1][n2+j]-predicted_chroma_block[j1][n2+j2];
+        }
+        predicted_chroma_block[j1][n2  ]=(m5[0]+m5[1]);
+        predicted_chroma_block[j1][n2+2]=(m5[0]-m5[1]);
+        predicted_chroma_block[j1][n2+1]=m5[3]*2+m5[2];
+        predicted_chroma_block[j1][n2+3]=m5[3]-m5[2]*2;
+      }
+    }
+  }
+
+  //   DC coefficients already transformed and quantized
+  m1[0]= img->m7[0][0];
+  m1[1]= img->m7[4][0];
+  m1[2]= img->m7[0][4];
+  m1[3]= img->m7[4][4];
+
+  //     2X2 transform of predicted DC coeffs.
+  mp1[0]=(predicted_chroma_block[0][0]+predicted_chroma_block[4][0]+predicted_chroma_block[0][4]+predicted_chroma_block[4][4]);
+  mp1[1]=(predicted_chroma_block[0][0]-predicted_chroma_block[4][0]+predicted_chroma_block[0][4]-predicted_chroma_block[4][4]);
+  mp1[2]=(predicted_chroma_block[0][0]+predicted_chroma_block[4][0]-predicted_chroma_block[0][4]-predicted_chroma_block[4][4]);
+  mp1[3]=(predicted_chroma_block[0][0]-predicted_chroma_block[4][0]-predicted_chroma_block[0][4]+predicted_chroma_block[4][4]);
+
+  run=-1;
+  scan_pos=0;
+
+  for (coeff_ctr=0; coeff_ctr < 4; coeff_ctr++)
+  {
+    run++;
+    ilev=0;
+
+    //quantization of predicted DC coeff
+    level1 = (iabs (mp1[coeff_ctr]) * quant_coef[qp_rem_sp][0][0] + 2 * qp_const2) >> (q_bits_sp + 1);
+    //substratcted from lrecUV
+    c_err = m1[coeff_ctr] - isignab(level1, mp1[coeff_ctr]);
+    level = iabs(c_err);
+
+    if (level  != 0)
+    {
+      currMB->cbp_blk |= 0xf0000 << (uv << 2) ;  // if one of the 2x2-DC levels is != 0 the coded-bit
+      cr_cbp=imax(1,cr_cbp);
+      DCLevel[scan_pos] = isignab(level ,c_err);
+      DCRun  [scan_pos] = run;
+      scan_pos++;
+      run=-1;
+    }
+
+    //from now on decoder world
+    ilev = c_err + isignab(level1,mp1[coeff_ctr]) ; // we have perfect reconstruction here
+
+    m1[coeff_ctr]= ilev  * dequant_coef[qp_rem_sp][0][0] << qp_per_sp;
+
+  }
+  DCLevel[scan_pos] = 0;
+
+  //  Invers transform of 2x2 DC levels
+
+  img->m7[0][0]=(m1[0]+m1[1]+m1[2]+m1[3])/2;
+  img->m7[4][0]=(m1[0]-m1[1]+m1[2]-m1[3])/2;
+  img->m7[0][4]=(m1[0]+m1[1]-m1[2]-m1[3])/2;
+  img->m7[4][4]=(m1[0]-m1[1]-m1[2]+m1[3])/2;
+
+  //     Quant of chroma AC-coeffs.
+  coeff_cost=0;
+  cr_cbp_tmp=0;
+
+  for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
+  {
+    for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
+    {
+      b4      = 2*(n2/4) + (n1/4);
+      ACLevel = img->cofAC[uv+4][b4][0];
+      ACRun   = img->cofAC[uv+4][b4][1];
+
+      run      = -1;
+      scan_pos =  0;
+
+      for (coeff_ctr=1; coeff_ctr < 16; coeff_ctr++)// start change rd_quant
+      {
+
+        if (img->field_picture || ( mb_adaptive && img->field_mode ))
+        {  // Alternate scan for field coding
+          j=FIELD_SCAN[coeff_ctr][0];
+          i=FIELD_SCAN[coeff_ctr][1];
+        }
+        else
+        {
+          j=SNGL_SCAN[coeff_ctr][0];
+          i=SNGL_SCAN[coeff_ctr][1];
+        }
+        ++run;
+        ilev=0;
+        // quantization on prediction
+        level1 = (iabs(predicted_chroma_block[n1+j][n2+i]) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp;
+        //substracted from lrec
+        c_err  = img->m7[n1+i][n2+j] - isignab(level1, predicted_chroma_block[n1+j][n2+i]);
+        level  = iabs(c_err) ;
+
+        if (level  != 0)
+        {
+          currMB->cbp_blk |=  (int64)1 << (16 + (uv << 2) + ((n2 >> 1) + (n1 >> 2))) ;
+          if (level > 1)
+            coeff_cost += MAX_VALUE;                // set high cost, shall not be discarded
+          else
+            coeff_cost += COEFF_COST[input->disthres][run];
+
+          cr_cbp_tmp=2;
+          ACLevel[scan_pos] = isignab(level,c_err);
+          ACRun  [scan_pos] = run;
+          ++scan_pos;
+          run=-1;
+        }
+
+        //from now on decoder land
+        ilev=c_err + isignab(level1,predicted_chroma_block[n1+j][n2+i]);
+        img->m7[n1+i][n2+j] = ilev * dequant_coef[qp_rem_sp][i][j] << qp_per_sp;
+      }
+      ACLevel[scan_pos] = 0;
+    }
+  }
+  // * reset chroma coeffs
+
+  if(cr_cbp_tmp==2)
+      cr_cbp=2;
+  //     IDCT.
+
+      //     Horizontal.
+  for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
+  {
+    for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
+    {
+      for (j=0; j < BLOCK_SIZE; j++)
+      {
+        for (i=0; i < BLOCK_SIZE; i++)
+        {
+          m5[i]=img->m7[n1+i][n2+j];
+        }
+        m6[0]=(m5[0]+m5[2]);
+        m6[1]=(m5[0]-m5[2]);
+        m6[2]=(m5[1]>>1)-m5[3];
+        m6[3]=m5[1]+(m5[3]>>1);
+
+        for (i=0; i < 2; i++)
+        {
+          i1=3-i;
+          img->m7[n1+i][n2+j]=m6[i]+m6[i1];
+          img->m7[n1+i1][n2+j]=m6[i]-m6[i1];
+        }
+      }
+
+      //     Vertical.
+      for (i=0; i < BLOCK_SIZE; i++)
+      {
+        for (j=0; j < BLOCK_SIZE; j++)
+        {
+          m5[j]=img->m7[n1+i][n2+j];
+        }
+        m6[0]=(m5[0]+m5[2]);
+        m6[1]=(m5[0]-m5[2]);
+        m6[2]=(m5[1]>>1)-m5[3];
+        m6[3]=m5[1]+(m5[3]>>1);
+
+        for (j=0; j < 2; j++)
+        {
+          j2=3-j;
+          img->m7[n1+i][n2+j] =iClip3(0,255,(m6[j]+m6[j2]+DQ_ROUND)>>DQ_BITS);
+          img->m7[n1+i][n2+j2]=iClip3(0,255,(m6[j]-m6[j2]+DQ_ROUND)>>DQ_BITS);
+        }
+      }
+    }
+  }
+
+  //  Decoded block moved to memory
+  for (j=0; j < BLOCK_SIZE; j++)
+    for (i=0; i < BLOCK_SIZE; i++)
+    {
+      enc_picture->imgUV[uv][img->pix_c_y+i][img->pix_c_x+j]= (imgpel) img->m7[i][j];
+      enc_picture->imgUV[uv][img->pix_c_y+i][img->pix_c_x+j+4]= (imgpel) img->m7[i+4][j];
+      enc_picture->imgUV[uv][img->pix_c_y+i+4][img->pix_c_x+j]= (imgpel) img->m7[i][j+4];
+      enc_picture->imgUV[uv][img->pix_c_y+i+4][img->pix_c_x+j+4]= (imgpel) img->m7[i+4][j+4];
+    }
+
+  return cr_cbp;
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/block.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/block.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/block.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,173 @@
+
+/*!
+ ************************************************************************
+ * \file block.h
+ *
+ * \brief
+ *    constant arrays for single block processing
+ *
+ * \author
+ *    Inge Lille-Langoy               <inge.lille-langoy at telenor.com>    \n
+ *    Telenor Satellite Services                                         \n
+ *    P.O.Box 6914 St.Olavs plass                                        \n
+ *    N-0130 Oslo, Norway
+ *
+ ************************************************************************
+ */
+
+#ifndef _BLOCK_H_
+#define _BLOCK_H_
+
+//! make chroma QP from quant
+extern const byte QP_SCALE_CR[52] ;
+
+
+//! single scan pattern
+const byte SNGL_SCAN[16][2] =
+{
+  {0,0},{1,0},{0,1},{0,2},
+  {1,1},{2,0},{3,0},{2,1},
+  {1,2},{0,3},{1,3},{2,2},
+  {3,1},{3,2},{2,3},{3,3}
+};
+
+//! field scan pattern
+const byte FIELD_SCAN[16][2] =
+{
+  {0,0},{0,1},{1,0},{0,2},
+  {0,3},{1,1},{1,2},{1,3},
+  {2,0},{2,1},{2,2},{2,3},
+  {3,0},{3,1},{3,2},{3,3}
+};
+
+
+//! array used to find expencive coefficients
+const byte COEFF_COST[2][16] =
+{
+  {3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0},
+  {9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9}
+};
+
+
+
+//! bit cost for coefficients
+const byte COEFF_BIT_COST[3][16][16]=
+{
+  { // 2x2 scan (corrested per Gisle's Email 11/23/2000 by StW
+    { 3, 5, 7, 9, 9,11,11,11,11,13,13,13,13,13,13,13},
+    { 5, 7, 9, 9,11,11,11,11,13,13,13,13,13,13,13,13},
+    { 7, 9, 9,11,11,11,11,13,13,13,13,13,13,13,13,15},
+    { 7, 9, 9,11,11,11,11,13,13,13,13,13,13,13,13,15},
+    { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+    { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+    { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+    { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+    { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+    { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+    { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+    { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+    { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+    { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+    { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+    { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+  },
+  {  // double scan
+    { 3, 5, 7, 7, 7, 9, 9, 9, 9,11,11,13,13,13,13,15},
+    { 5, 9, 9,11,11,13,13,13,13,15,15,15,15,15,15,15},
+    { 7,11,11,13,13,13,13,15,15,15,15,15,15,15,15,17},
+    { 9,11,11,13,13,13,13,15,15,15,15,15,15,15,15,17},
+    { 9,11,11,13,13,13,13,15,15,15,15,15,15,15,15,17},
+    {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+    {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+    {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+    {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+    {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+    {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+    {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+    {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+    {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+  },
+  {    // single scan
+    { 3, 7, 9, 9,11,13,13,15,15,15,15,17,17,17,17,17},
+    { 5, 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17},
+    { 5, 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17},
+    { 7,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+    { 7,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+    { 7,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+    { 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+    { 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+    { 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+    { 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+    {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19},
+    {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19},
+    {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19},
+    {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19},
+    {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19},
+    {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19},
+  },
+};
+
+//! single scan pattern
+const byte SCAN_YUV422  [8][2] =
+{
+  {0,0},{0,1},
+  {1,0},{0,2},
+  {0,3},{1,1},
+  {1,2},{1,3}
+};
+
+//! look up tables for FRExt-chroma support
+const unsigned char hor_offset[4][4][4] =
+  {{{0, 0, 0, 0},
+  {0, 0, 0, 0},
+  {0, 0, 0, 0},
+  {0, 0, 0, 0}},
+
+  {{0, 4, 0, 4},
+  {0, 0, 0, 0},
+  {0, 0, 0, 0},
+  {0, 0, 0, 0}},
+
+  {{0, 4, 0, 4},
+  {0, 4, 0, 4},
+  {0, 0, 0, 0},
+  {0, 0, 0, 0}},
+
+  {{0, 4, 0, 4},
+  {8,12, 8,12},
+  {0, 4, 0, 4},
+  {8,12, 8,12}}};
+
+const unsigned char ver_offset[4][4][4] =
+  { {{0, 0, 0, 0},
+  {0, 0, 0, 0},
+  {0, 0, 0, 0},
+  {0, 0, 0, 0}},
+
+  {{0, 0, 4, 4},
+  {0, 0, 0, 0},
+  {0, 0, 0, 0},
+  {0, 0, 0, 0}},
+
+  {{0, 0, 4, 4},
+  {8, 8,12,12},
+  {0, 0, 0, 0},
+  {0, 0, 0, 0}},
+
+  {{0, 0, 4, 4},
+  {0, 0, 4, 4},
+  {8, 8,12,12},
+  {8, 8,12,12}}};
+
+static unsigned char cbp_blk_chroma[8][4] =
+  { {16, 17, 18, 19},
+    {20, 21, 22, 23},
+    {24, 25, 26, 27},
+    {28, 29, 30, 31},
+    {32, 33, 34, 35},
+    {36, 37, 38, 39},
+    {40, 41, 42, 43},
+    {44, 45, 46, 47} };
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/cabac.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/cabac.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/cabac.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,1583 @@
+
+/*!
+ *************************************************************************************
+ * \file cabac.c
+ *
+ * \brief
+ *    CABAC entropy coding routines
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Detlev Marpe                    <marpe at hhi.de>
+ **************************************************************************************
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <memory.h>
+#include "global.h"
+
+#include "cabac.h"
+#include "image.h"
+#include "mb_access.h"
+
+int last_dquant = 0;
+
+#if TRACE
+#define CABAC_TRACE if (dp->bitstream->trace_enabled) trace2out_cabac (se)
+#else
+  #define CABAC_TRACE
+#endif
+
+/***********************************************************************
+ * L O C A L L Y   D E F I N E D   F U N C T I O N   P R O T O T Y P E S
+ ***********************************************************************
+ */
+
+
+void unary_bin_encode(EncodingEnvironmentPtr eep_frame,
+                      unsigned int symbol,
+                      BiContextTypePtr ctx,
+                      int ctx_offset);
+
+void unary_bin_max_encode(EncodingEnvironmentPtr eep_frame,
+                          unsigned int symbol,
+                          BiContextTypePtr ctx,
+                          int ctx_offset,
+                          unsigned int max_symbol);
+
+void unary_exp_golomb_level_encode( EncodingEnvironmentPtr eep_dp,
+                                    unsigned int symbol,
+                                    BiContextTypePtr ctx);
+
+void unary_exp_golomb_mv_encode(EncodingEnvironmentPtr eep_dp,
+                                unsigned int symbol,
+                                BiContextTypePtr ctx,
+                                unsigned int max_bin);
+
+
+void cabac_new_slice(void)
+{
+  last_dquant=0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Check for available neighbouring blocks
+ *    and set pointers in current macroblock
+ ************************************************************************
+ */
+void CheckAvailabilityOfNeighborsCABAC(void)
+{
+  int mb_nr = img->current_mb_nr;
+  Macroblock *currMB = &img->mb_data[mb_nr];
+  PixelPos up, left;
+
+  getNeighbour(mb_nr, -1,  0, IS_LUMA, &left);
+  getNeighbour(mb_nr,  0, -1, IS_LUMA, &up);
+
+  if (up.available)
+    currMB->mb_available_up = &img->mb_data[up.mb_addr];
+  else
+    currMB->mb_available_up = NULL;
+
+  if (left.available)
+    currMB->mb_available_left = &img->mb_data[left.mb_addr];
+  else
+    currMB->mb_available_left = NULL;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocation of contexts models for the motion info
+ *    used for arithmetic encoding
+ ************************************************************************
+ */
+MotionInfoContexts* create_contexts_MotionInfo(void)
+{
+  MotionInfoContexts* enco_ctx;
+
+  enco_ctx = (MotionInfoContexts*) calloc(1, sizeof(MotionInfoContexts) );
+  if( enco_ctx == NULL )
+    no_mem_exit("create_contexts_MotionInfo: enco_ctx");
+
+  return enco_ctx;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocates of contexts models for the texture info
+ *    used for arithmetic encoding
+ ************************************************************************
+ */
+TextureInfoContexts* create_contexts_TextureInfo(void)
+{
+  TextureInfoContexts*  enco_ctx;
+
+  enco_ctx = (TextureInfoContexts*) calloc(1, sizeof(TextureInfoContexts) );
+  if( enco_ctx == NULL )
+    no_mem_exit("create_contexts_TextureInfo: enco_ctx");
+
+  return enco_ctx;
+}
+
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Frees the memory of the contexts models
+ *    used for arithmetic encoding of the motion info.
+ ************************************************************************
+ */
+void delete_contexts_MotionInfo(MotionInfoContexts *enco_ctx)
+{
+  if( enco_ctx == NULL )
+    return;
+
+  free( enco_ctx );
+
+  return;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Frees the memory of the contexts models
+ *    used for arithmetic encoding of the texture info.
+ ************************************************************************
+ */
+void delete_contexts_TextureInfo(TextureInfoContexts *enco_ctx)
+{
+  if( enco_ctx == NULL )
+    return;
+
+  free( enco_ctx );
+
+  return;
+}
+
+
+/*!
+ ***************************************************************************
+ * \brief
+ *    This function is used to arithmetically encode the field
+ *    mode info of a given MB  in the case of mb-based frame/field decision
+ ***************************************************************************
+ */
+void writeFieldModeInfo_CABAC(SyntaxElement *se, DataPartition *dp)
+{
+  EncodingEnvironmentPtr eep_dp = &(dp->ee_cabac);
+  int curr_len = arienco_bits_written(eep_dp);
+  int a,b,act_ctx;
+  MotionInfoContexts *ctx         = (img->currentSlice)->mot_ctx;
+  Macroblock         *currMB      = &img->mb_data[img->current_mb_nr];
+  int                mb_field     = se->value1;
+
+  a = currMB->mbAvailA ? img->mb_data[currMB->mbAddrA].mb_field : 0;
+  b = currMB->mbAvailB ? img->mb_data[currMB->mbAddrB].mb_field : 0;
+
+  act_ctx = a + b;
+
+  biari_encode_symbol(eep_dp, (signed short) (mb_field != 0),&ctx->mb_aff_contexts[act_ctx]);
+
+  se->context = act_ctx;
+
+  dp->bitstream->write_flag = 1;
+  se->len = (arienco_bits_written(eep_dp) - curr_len);
+  CABAC_TRACE;
+  return;
+}
+
+/*!
+***************************************************************************
+* \brief
+*    This function is used to arithmetically encode the mb_skip_flag.
+***************************************************************************
+*/
+void writeMB_skip_flagInfo_CABAC(SyntaxElement *se, DataPartition *dp)
+{
+  EncodingEnvironmentPtr eep_dp = &(dp->ee_cabac);
+  int curr_len = arienco_bits_written(eep_dp);
+  int a,b,act_ctx;
+  int bframe   = (img->type==B_SLICE);
+  MotionInfoContexts *ctx         = (img->currentSlice)->mot_ctx;
+  Macroblock         *currMB      = &img->mb_data[img->current_mb_nr];
+  int                curr_mb_type = se->value1;
+
+  if (bframe)
+  {
+    if (currMB->mb_available_up == NULL)
+      b = 0;
+    else
+      b = (currMB->mb_available_up->skip_flag==0 ? 1 : 0);
+    if (currMB->mb_available_left == NULL)
+      a = 0;
+    else
+      a = (currMB->mb_available_left->skip_flag==0 ? 1 : 0);
+
+    act_ctx = 7 + a + b;
+
+    if (se->value1==0 && se->value2==0) // DIRECT mode, no coefficients
+      biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]);
+    else
+      biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[2][act_ctx]);
+
+    currMB->skip_flag = (se->value1==0 && se->value2==0)?1:0;
+  }
+  else
+  {
+    if (currMB->mb_available_up == NULL)
+      b = 0;
+    else
+      b = (( (currMB->mb_available_up)->skip_flag == 0) ? 1 : 0 );
+    if (currMB->mb_available_left == NULL)
+      a = 0;
+    else
+      a = (( (currMB->mb_available_left)->skip_flag == 0) ? 1 : 0 );
+
+    act_ctx = a + b;
+
+    if (curr_mb_type==0) // SKIP
+      biari_encode_symbol(eep_dp, 1,&ctx->mb_type_contexts[1][act_ctx]);
+    else
+      biari_encode_symbol(eep_dp, 0,&ctx->mb_type_contexts[1][act_ctx]);
+
+    currMB->skip_flag = (curr_mb_type==0)?1:0;
+  }
+  se->context = act_ctx;
+
+  dp->bitstream->write_flag = 1;
+  se->len = (arienco_bits_written(eep_dp) - curr_len);
+  CABAC_TRACE;
+  return;
+}
+
+/*!
+***************************************************************************
+* \brief
+*    This function is used to arithmetically encode the macroblock
+*    intra_pred_size flag info of a given MB.
+***************************************************************************
+*/
+
+void writeMB_transform_size_CABAC(SyntaxElement *se, DataPartition *dp)
+{
+  EncodingEnvironmentPtr eep_dp = &(dp->ee_cabac);
+  int curr_len = arienco_bits_written(eep_dp);
+  int a, b;
+  int act_ctx = 0;
+  int act_sym;
+
+  MotionInfoContexts *ctx         = (img->currentSlice)->mot_ctx;
+  Macroblock         *currMB      = &img->mb_data[img->current_mb_nr];
+
+
+  b = (currMB->mb_available_up == NULL) ? 0 : currMB->mb_available_up->luma_transform_size_8x8_flag;
+  a = (currMB->mb_available_left == NULL) ? 0 :currMB->mb_available_left->luma_transform_size_8x8_flag;
+
+  act_ctx     = a + b;
+  act_sym     = currMB->luma_transform_size_8x8_flag;
+  se->context = act_ctx; // store context
+  biari_encode_symbol(eep_dp, (signed short) (act_sym != 0), ctx->transform_size_contexts + act_ctx );
+
+  dp->bitstream->write_flag = 1;
+  se->len = (arienco_bits_written(eep_dp) - curr_len);
+  CABAC_TRACE;
+  return;
+}
+
+/*!
+ ***************************************************************************
+ * \brief
+ *    This function is used to arithmetically encode the macroblock
+ *    type info of a given MB.
+ ***************************************************************************
+ */
+
+void writeMB_typeInfo_CABAC(SyntaxElement *se, DataPartition *dp)
+{
+  EncodingEnvironmentPtr eep_dp = &(dp->ee_cabac);
+  int curr_len = arienco_bits_written(eep_dp);
+  int a, b;
+  int act_ctx = 0;
+  int act_sym;
+  signed short csym;
+  int bframe   = (img->type==B_SLICE);
+  int mode_sym = 0;
+  int mode16x16;
+
+
+  MotionInfoContexts *ctx         = (img->currentSlice)->mot_ctx;
+  Macroblock         *currMB      = &img->mb_data[img->current_mb_nr];
+  int                curr_mb_type = se->value1;
+
+  if(img->type == I_SLICE)  // INTRA-frame
+  {
+    if (currMB->mb_available_up == NULL)
+      b = 0;
+    else
+      b = ((currMB->mb_available_up->mb_type != I4MB &&  currMB->mb_available_up->mb_type != I8MB) ? 1 : 0 );
+
+    if (currMB->mb_available_left == NULL)
+      a = 0;
+    else
+      a = ((currMB->mb_available_left->mb_type != I4MB &&  currMB->mb_available_left->mb_type != I8MB) ? 1 : 0 );
+
+    act_ctx     = a + b;
+    act_sym     = curr_mb_type;
+    se->context = act_ctx; // store context
+
+    if (act_sym==0) // 4x4 Intra
+    {
+      biari_encode_symbol(eep_dp, 0, ctx->mb_type_contexts[0] + act_ctx );
+    }
+    else if( act_sym == 25 ) // PCM-MODE
+    {
+      biari_encode_symbol(eep_dp, 1, ctx->mb_type_contexts[0] + act_ctx );
+      biari_encode_symbol_final(eep_dp, 1);
+    }
+    else // 16x16 Intra
+    {
+      biari_encode_symbol(eep_dp, 1, ctx->mb_type_contexts[0] + act_ctx );
+
+      biari_encode_symbol_final(eep_dp, 0);
+
+      mode_sym = act_sym-1; // Values in the range of 0...23
+      act_ctx  = 4;
+      act_sym  = mode_sym/12;
+      biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[0] + act_ctx ); // coding of AC/no AC
+      mode_sym = mode_sym % 12;
+      act_sym  = mode_sym / 4; // coding of cbp: 0,1,2
+      act_ctx  = 5;
+      if (act_sym==0)
+      {
+        biari_encode_symbol(eep_dp, 0, ctx->mb_type_contexts[0] + act_ctx );
+      }
+      else
+      {
+        biari_encode_symbol(eep_dp, 1, ctx->mb_type_contexts[0] + act_ctx );
+        act_ctx=6;
+        biari_encode_symbol(eep_dp, (signed short) (act_sym!=1), ctx->mb_type_contexts[0] + act_ctx );
+      }
+      mode_sym = mode_sym & 0x03; // coding of I pred-mode: 0,1,2,3
+      act_sym  = mode_sym >> 1;
+      act_ctx  = 7;
+      biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[0] + act_ctx );
+      act_ctx  = 8;
+      act_sym  = mode_sym & 0x01;
+      biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[0] + act_ctx );
+    }
+  }
+  else // INTER
+  {
+
+    if (bframe)
+    {
+      if (currMB->mb_available_up == NULL)
+        b = 0;
+      else
+        b = ((currMB->mb_available_up->mb_type != 0) ? 1 : 0 );
+
+      if (currMB->mb_available_left == NULL)
+        a = 0;
+      else
+        a = ((currMB->mb_available_left->mb_type != 0) ? 1 : 0 );
+      act_ctx = a + b;
+      se->context = act_ctx; // store context
+    }
+    act_sym = curr_mb_type;
+
+    if (act_sym>=(mode16x16=(bframe?24:7)))
+    {
+      mode_sym = act_sym-mode16x16;
+      act_sym  = mode16x16; // 16x16 mode info
+    }
+
+    if (!bframe)
+    {
+      switch (act_sym)
+      {
+      case 0:
+        break;
+      case 1:
+        biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][4]);
+        biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][5]);
+        biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][6]);
+        break;
+      case 2:
+        biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][4]);
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][5]);
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][7]);
+        break;
+      case 3:
+        biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][4]);
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][5]);
+        biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][7]);
+        break;
+      case 4:
+      case 5:
+        biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][4]);
+        biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][5]);
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][6]);
+        break;
+      case 6:
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][4]);
+        biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][7]);
+        break;
+      case 7:
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][4]);
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][7]);
+        break;
+      default:
+        printf ("Unsupported MB-MODE in writeMB_typeInfo_CABAC!\n");
+        exit (1);
+      }
+    }
+    else //===== B-FRAMES =====
+    {
+      if (act_sym==0)
+      {
+        biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[2][act_ctx]);
+      }
+      else if (act_sym<=2)
+      {
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]);
+        biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[2][4]);
+        csym = (act_sym-1 != 0);
+        biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+      }
+      else if (act_sym<=10)
+      {
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]);
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][4]);
+        biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[2][5]);
+        csym=(((act_sym-3)>>2)&0x01) != 0;
+        biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+        csym=(((act_sym-3)>>1)&0x01) != 0;
+        biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+        csym=((act_sym-3)&0x01) != 0;
+        biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+      }
+      else if (act_sym==11 || act_sym==22)
+      {
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]);
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][4]);
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][5]);
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][6]);
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][6]);
+        csym = (act_sym != 11);
+        biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+      }
+      else
+      {
+        if (act_sym > 22) act_sym--;
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]);
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][4]);
+        biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][5]);
+        csym=(((act_sym-12)>>3)&0x01) != 0;
+        biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+        csym=(((act_sym-12)>>2)&0x01) != 0;
+        biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+        csym=(((act_sym-12)>>1)&0x01) != 0;
+        biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+        csym=((act_sym-12)&0x01) != 0;
+        biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+        if (act_sym >=22) act_sym++;
+      }
+    }
+
+    if(act_sym==mode16x16) // additional info for 16x16 Intra-mode
+    {
+      if( mode_sym==25 )
+      {
+        biari_encode_symbol_final(eep_dp, 1 );
+        dp->bitstream->write_flag = 1;
+        se->len = (arienco_bits_written(eep_dp) - curr_len);
+        CABAC_TRACE;
+        return;
+      }
+      biari_encode_symbol_final(eep_dp, 0 );
+
+      act_ctx = 8;
+      act_sym = mode_sym/12;
+      biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[1] + act_ctx ); // coding of AC/no AC
+      mode_sym = mode_sym % 12;
+
+      act_sym = mode_sym / 4; // coding of cbp: 0,1,2
+      act_ctx = 9;
+      if (act_sym==0)
+      {
+        biari_encode_symbol(eep_dp, 0, ctx->mb_type_contexts[1] + act_ctx );
+      }
+      else
+      {
+        biari_encode_symbol(eep_dp, 1, ctx->mb_type_contexts[1] + act_ctx );
+        biari_encode_symbol(eep_dp, (signed short) (act_sym!=1), ctx->mb_type_contexts[1] + act_ctx );
+      }
+
+      mode_sym = mode_sym % 4; // coding of I pred-mode: 0,1,2,3
+      act_ctx  = 10;
+      act_sym  = mode_sym/2;
+      biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[1] + act_ctx );
+      act_sym  = mode_sym%2;
+      biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[1] + act_ctx );
+    }
+  }
+  dp->bitstream->write_flag = 1;
+  se->len = (arienco_bits_written(eep_dp) - curr_len);
+  CABAC_TRACE;
+  return;
+}
+
+
+/*!
+ ***************************************************************************
+ * \brief
+ *    This function is used to arithmetically encode the 8x8 block
+ *    type info
+ ***************************************************************************
+ */
+void writeB8_typeInfo_CABAC(SyntaxElement *se, DataPartition *dp)
+{
+  EncodingEnvironmentPtr eep_dp = &(dp->ee_cabac);
+  int curr_len = arienco_bits_written(eep_dp);
+  int act_ctx;
+  int act_sym;
+  signed short csym;
+  int bframe=(img->type==B_SLICE);
+
+  MotionInfoContexts *ctx = (img->currentSlice)->mot_ctx;
+
+  act_sym = se->value1;
+  act_ctx = 0;
+
+  if (!bframe)
+  {
+    switch (act_sym)
+    {
+    case 0:
+      biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[0][1]);
+      break;
+    case 1:
+      biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][1]);
+      biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][3]);
+      break;
+    case 2:
+      biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][1]);
+      biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[0][3]);
+      biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[0][4]);
+      break;
+    case 3:
+      biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][1]);
+      biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[0][3]);
+      biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][4]);
+      break;
+    }
+  }
+  else //===== B-FRAME =====
+  {
+    if (act_sym==0)
+    {
+      biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[1][0]);
+      dp->bitstream->write_flag = 1;
+      se->len = (arienco_bits_written(eep_dp) - curr_len);
+      CABAC_TRACE;
+      return;
+    }
+    else
+    {
+      biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][0]);
+      act_sym--;
+    }
+    if (act_sym<2)
+    {
+      biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[1][1]);
+      biari_encode_symbol (eep_dp, (signed short) (act_sym!=0), &ctx->b8_type_contexts[1][3]);
+    }
+    else if (act_sym<6)
+    {
+      biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][1]);
+      biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[1][2]);
+      csym=(((act_sym-2)>>1)&0x01) != 0;
+      biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]);
+      csym=((act_sym-2)&0x01) != 0;
+      biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]);
+    }
+    else
+    {
+      biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][1]);
+      biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][2]);
+      csym=(((act_sym-6)>>2)&0x01);
+      if (csym)
+      {
+        biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][3]);
+        csym=((act_sym-6)&0x01) != 0;
+        biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]);
+      }
+      else
+      {
+        biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[1][3]);
+        csym=(((act_sym-6)>>1)&0x01) != 0;
+        biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]);
+        csym=((act_sym-6)&0x01) != 0;
+        biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]);
+      }
+    }
+  }
+
+  dp->bitstream->write_flag = 1;
+  se->len = (arienco_bits_written(eep_dp) - curr_len);
+  CABAC_TRACE;
+  return;
+}
+
+/*!
+ ****************************************************************************
+ * \brief
+ *    This function is used to arithmetically encode a pair of
+ *    intra prediction modes of a given MB.
+ ****************************************************************************
+ */
+void writeIntraPredMode_CABAC(SyntaxElement *se, DataPartition *dp)
+{
+  EncodingEnvironmentPtr eep_dp = &(dp->ee_cabac);
+  int curr_len = arienco_bits_written(eep_dp);
+  TextureInfoContexts *ctx = img->currentSlice->tex_ctx;
+
+  // use_most_probable_mode
+  if (se->value1 == -1)
+    biari_encode_symbol(eep_dp, 1, ctx->ipr_contexts);
+  else
+  {
+    biari_encode_symbol(eep_dp, 0, ctx->ipr_contexts);
+
+    // remaining_mode_selector
+    biari_encode_symbol(eep_dp,(signed short)( se->value1 & 0x1    ), ctx->ipr_contexts+1);
+    biari_encode_symbol(eep_dp,(signed short)((se->value1 & 0x2)>>1), ctx->ipr_contexts+1);
+    biari_encode_symbol(eep_dp,(signed short)((se->value1 & 0x4)>>2), ctx->ipr_contexts+1);
+  }
+
+  dp->bitstream->write_flag = 1;
+  se->len = (arienco_bits_written(eep_dp) - curr_len);
+  CABAC_TRACE;
+  return;
+}
+
+/*!
+ ****************************************************************************
+ * \brief
+ *    This function is used to arithmetically encode the reference
+ *    parameter of a given MB.
+ ****************************************************************************
+ */
+void writeRefFrame_CABAC(SyntaxElement *se, DataPartition *dp)
+{
+  EncodingEnvironmentPtr eep_dp = &(dp->ee_cabac);
+  int curr_len = arienco_bits_written(eep_dp);
+  int   mb_nr = img->current_mb_nr;
+  MotionInfoContexts  *ctx    = img->currentSlice->mot_ctx;
+  Macroblock          *currMB = &img->mb_data[mb_nr];
+  int                 addctx  = 0;
+
+  int   a, b;
+  int   act_ctx;
+  int   act_sym;
+  char** refframe_array = enc_picture->ref_idx[se->value2];
+
+  int bslice = (img->type==B_SLICE);
+
+  int   b8a, b8b;
+
+  PixelPos block_a, block_b;
+
+  getLuma4x4Neighbour(mb_nr, (img->subblock_x << 2) - 1, (img->subblock_y << 2), &block_a);
+  getLuma4x4Neighbour(mb_nr, (img->subblock_x << 2),     (img->subblock_y << 2) - 1, &block_b);
+
+  b8a=((block_a.x >> 1) & 0x01)+2*((block_a.y >> 1) & 0x01);
+  b8b=((block_b.x >> 1) & 0x01)+2*((block_b.y >> 1) & 0x01);
+
+
+  if (!block_b.available)
+    b=0;
+  //else if (IS_DIRECT(&img->mb_data[block_b.mb_addr]) || (img->mb_data[block_b.mb_addr].b8mode[b8b]==0 && bslice))
+  else if ((IS_DIRECT(&img->mb_data[block_b.mb_addr]) && !giRDOpt_B8OnlyFlag) || (img->mb_data[block_b.mb_addr].b8mode[b8b]==0 && bslice))
+    b=0;
+  else
+  {
+    if (img->MbaffFrameFlag && (currMB->mb_field == 0) && (img->mb_data[block_b.mb_addr].mb_field == 1))
+      b = (refframe_array[block_b.pos_y][block_b.pos_x] > 1 ? 1 : 0);
+    else
+      b = (refframe_array[block_b.pos_y][block_b.pos_x] > 0 ? 1 : 0);
+  }
+
+  if (!block_a.available)
+    a=0;
+  // else if (IS_DIRECT(&img->mb_data[block_a.mb_addr]) || (img->mb_data[block_a.mb_addr].b8mode[b8a]==0 && bslice))
+  else if ((IS_DIRECT(&img->mb_data[block_a.mb_addr]) && !giRDOpt_B8OnlyFlag) || (img->mb_data[block_a.mb_addr].b8mode[b8a]==0 && bslice))
+    a=0;
+  else
+  {
+    if (img->MbaffFrameFlag && (currMB->mb_field == 0) && (img->mb_data[block_a.mb_addr].mb_field == 1))
+      a = (refframe_array[block_a.pos_y][block_a.pos_x] > 1 ? 1 : 0);
+    else
+      a = (refframe_array[block_a.pos_y][block_a.pos_x] > 0 ? 1 : 0);
+  }
+
+  act_ctx     = a + 2*b;
+  se->context = act_ctx; // store context
+  act_sym     = se->value1;
+
+  if (act_sym==0)
+  {
+    biari_encode_symbol(eep_dp, 0, ctx->ref_no_contexts[addctx] + act_ctx );
+  }
+  else
+  {
+    biari_encode_symbol(eep_dp, 1, ctx->ref_no_contexts[addctx] + act_ctx);
+    act_sym--;
+    act_ctx=4;
+    unary_bin_encode(eep_dp, act_sym,ctx->ref_no_contexts[addctx]+act_ctx,1);
+  }
+
+  dp->bitstream->write_flag = 1;
+  se->len = (arienco_bits_written(eep_dp) - curr_len);
+  CABAC_TRACE;
+  return;
+}
+
+/*!
+ ****************************************************************************
+ * \brief
+ *    This function is used to arithmetically encode the coded
+ *    block pattern of a given delta quant.
+ ****************************************************************************
+ */
+void writeDquant_CABAC(SyntaxElement *se, DataPartition *dp)
+{
+  EncodingEnvironmentPtr eep_dp = &(dp->ee_cabac);
+  int curr_len = arienco_bits_written(eep_dp);
+
+  MotionInfoContexts *ctx = img->currentSlice->mot_ctx;
+
+  int act_ctx;
+  int act_sym;
+  int dquant = se->value1;
+  int sign=0;
+
+  Macroblock     *currMB       = &img->mb_data[img->current_mb_nr];
+
+  last_dquant=currMB->prev_delta_qp;
+
+  if (dquant <= 0)
+    sign = 1;
+  act_sym = iabs(dquant) << 1;
+
+  act_sym += sign;
+  act_sym --;
+
+  act_ctx = ( (last_dquant != 0) ? 1 : 0);
+
+  if (act_sym==0)
+  {
+    biari_encode_symbol(eep_dp, 0, ctx->delta_qp_contexts + act_ctx );
+  }
+  else
+  {
+    biari_encode_symbol(eep_dp, 1, ctx->delta_qp_contexts + act_ctx);
+    act_ctx=2;
+    act_sym--;
+    unary_bin_encode(eep_dp, act_sym,ctx->delta_qp_contexts+act_ctx,1);
+  }
+
+  dp->bitstream->write_flag = 1;
+  se->len = (arienco_bits_written(eep_dp) - curr_len);
+  CABAC_TRACE;
+  return;
+}
+
+/*!
+ ****************************************************************************
+ * \brief
+ *    This function is used to arithmetically encode the motion
+ *    vector data of a B-frame MB.
+ ****************************************************************************
+ */
+void writeMVD_CABAC(SyntaxElement *se, DataPartition *dp)
+{
+  EncodingEnvironmentPtr eep_dp = &(dp->ee_cabac);
+  int curr_len = arienco_bits_written(eep_dp);
+  int i = (img->subblock_x << 2);
+  int j = (img->subblock_y << 2);
+  int a, b;
+  int act_ctx;
+  int act_sym;
+  int mv_pred_res;
+  int mv_local_err;
+  int mv_sign;
+  int list_idx = se->value2 & 0x01;
+  int k = (se->value2>>1); // MVD component
+  int mb_nr = img->current_mb_nr;
+
+  PixelPos block_a, block_b;
+
+  MotionInfoContexts  *ctx    = img->currentSlice->mot_ctx;
+  Macroblock          *currMB = &img->mb_data[mb_nr];
+
+  getLuma4x4Neighbour(mb_nr, i - 1, j, &block_a);
+  getLuma4x4Neighbour(mb_nr, i    , j - 1, &block_b);
+
+  if (block_b.available)
+  {
+    b = iabs(img->mb_data[block_b.mb_addr].mvd[list_idx][block_b.y][block_b.x][k]);
+    if (img->MbaffFrameFlag && (k==1))
+    {
+      if ((currMB->mb_field==0) && (img->mb_data[block_b.mb_addr].mb_field==1))
+        b *= 2;
+      else if ((currMB->mb_field==1) && (img->mb_data[block_b.mb_addr].mb_field==0))
+        b /= 2;
+    }
+  }
+  else
+    b=0;
+
+  if (block_a.available)
+  {
+    a = iabs(img->mb_data[block_a.mb_addr].mvd[list_idx][block_a.y][block_a.x][k]);
+    if (img->MbaffFrameFlag && (k==1))
+    {
+      if ((currMB->mb_field==0) && (img->mb_data[block_a.mb_addr].mb_field==1))
+        a *= 2;
+      else if ((currMB->mb_field==1) && (img->mb_data[block_a.mb_addr].mb_field==0))
+        a /= 2;
+    }
+  }
+  else
+    a = 0;
+
+  if ((mv_local_err=a+b)<3)
+    act_ctx = 5*k;
+  else
+  {
+    if (mv_local_err>32)
+      act_ctx=5*k+3;
+    else
+      act_ctx=5*k+2;
+  }
+
+  mv_pred_res = se->value1;
+  se->context = act_ctx;
+
+  act_sym = iabs(mv_pred_res);
+
+  if (act_sym == 0)
+    biari_encode_symbol(eep_dp, 0, &ctx->mv_res_contexts[0][act_ctx] );
+  else
+  {
+    biari_encode_symbol(eep_dp, 1, &ctx->mv_res_contexts[0][act_ctx] );
+    act_sym--;
+    act_ctx=5*k;
+    unary_exp_golomb_mv_encode(eep_dp,act_sym,ctx->mv_res_contexts[1]+act_ctx,3);
+    mv_sign = (mv_pred_res<0) ? 1: 0;
+    biari_encode_symbol_eq_prob(eep_dp, (signed short) mv_sign);
+  }
+
+  dp->bitstream->write_flag = 1;
+  se->len = (arienco_bits_written(eep_dp) - curr_len);
+  CABAC_TRACE;
+  return;
+}
+
+
+/*!
+ ****************************************************************************
+ * \brief
+ *    This function is used to arithmetically encode the chroma
+ *    intra prediction mode of an 8x8 block
+ ****************************************************************************
+ */
+void writeCIPredMode_CABAC(SyntaxElement *se, DataPartition *dp)
+{
+  EncodingEnvironmentPtr eep_dp = &(dp->ee_cabac);
+  int curr_len = arienco_bits_written(eep_dp);
+  TextureInfoContexts *ctx     = img->currentSlice->tex_ctx;
+  Macroblock          *currMB  = &img->mb_data[img->current_mb_nr];
+  int                 act_ctx,a,b;
+  int                 act_sym  = se->value1;
+
+  if (currMB->mb_available_up == NULL) b = 0;
+  else  b = ( ((currMB->mb_available_up)->c_ipred_mode != 0) ? 1 : 0);
+
+  if (currMB->mb_available_left == NULL) a = 0;
+  else  a = ( ((currMB->mb_available_left)->c_ipred_mode != 0) ? 1 : 0);
+
+  act_ctx = a+b;
+
+  if (act_sym==0)
+    biari_encode_symbol(eep_dp, 0, ctx->cipr_contexts + act_ctx );
+  else
+  {
+    biari_encode_symbol(eep_dp, 1, ctx->cipr_contexts + act_ctx );
+    unary_bin_max_encode(eep_dp,(unsigned int) (act_sym-1),ctx->cipr_contexts+3,0,2);
+  }
+
+  dp->bitstream->write_flag = 1;
+  se->len = (arienco_bits_written(eep_dp) - curr_len);
+  CABAC_TRACE;
+  return;
+}
+
+
+/*!
+ ****************************************************************************
+ * \brief
+ *    This function is used to arithmetically encode the coded
+ *    block pattern of an 8x8 block
+ ****************************************************************************
+ */
+void writeCBP_BIT_CABAC (int b8, int bit, int cbp, Macroblock* currMB, int inter, EncodingEnvironmentPtr eep_dp)
+{
+  PixelPos block_a;
+  int a, b;
+
+  int mb_x=(b8 & 0x01)<<1;
+  int mb_y=(b8 >> 1)<<1;
+
+  if (mb_y == 0)
+  {
+    if (currMB->mb_available_up == NULL)
+      b = 0;
+    else
+    {
+      if((currMB->mb_available_up)->mb_type==IPCM)
+        b=0;
+      else
+        b = (( ((currMB->mb_available_up)->cbp & (1<<(2+(mb_x>>1)))) == 0) ? 1 : 0);   //VG-ADD
+    }
+
+  }
+  else
+    b = ( ((cbp & (1<<(mb_x/2))) == 0) ? 1: 0);
+
+  if (mb_x == 0)
+  {
+    getLuma4x4Neighbour(img->current_mb_nr, (mb_x << 2) - 1, (mb_y << 2), &block_a);
+    if (block_a.available)
+    {
+      {
+        if(img->mb_data[block_a.mb_addr].mb_type==IPCM)
+          a=0;
+        else
+          a = (( (img->mb_data[block_a.mb_addr].cbp & (1<<(2*(block_a.y>>1)+1))) == 0) ? 1 : 0); //VG-ADD
+      }
+
+    }
+    else
+      a=0;
+  }
+  else
+    a = ( ((cbp & (1<<mb_y)) == 0) ? 1: 0);
+
+  //===== WRITE BIT =====
+  biari_encode_symbol (eep_dp, (signed short) bit,
+    img->currentSlice->tex_ctx->cbp_contexts[0] + a+2*b);
+}
+
+/*!
+****************************************************************************
+* \brief
+*    This function is used to arithmetically encode the coded
+*    block pattern of a macroblock
+****************************************************************************
+*/
+void writeCBP_CABAC(SyntaxElement *se, DataPartition *dp)
+{
+  EncodingEnvironmentPtr eep_dp = &(dp->ee_cabac);
+  int curr_len = arienco_bits_written(eep_dp);
+  TextureInfoContexts *ctx = img->currentSlice->tex_ctx;
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+
+  int a, b;
+  int curr_cbp_ctx, curr_cbp_idx;
+  int cbp = se->value1; // symbol to encode
+  int cbp_bit;
+  int b8;
+
+  for (b8=0; b8<4; b8++)
+  {
+    curr_cbp_idx = (currMB->b8mode[b8] == IBLOCK ? 0 : 1);
+    writeCBP_BIT_CABAC (b8, cbp&(1<<b8), cbp, currMB, curr_cbp_idx, eep_dp);
+  }
+
+  if (img->yuv_format != YUV400)
+  {
+    // coding of chroma part
+    b = 0;
+    if (currMB->mb_available_up != NULL)
+    {
+      if((currMB->mb_available_up)->mb_type==IPCM)
+        b=1;
+      else
+        b = ((currMB->mb_available_up)->cbp > 15) ? 1 : 0;
+    }
+
+
+    a = 0;
+    if (currMB->mb_available_left != NULL)
+    {
+      if((currMB->mb_available_left)->mb_type==IPCM)
+        a=1;
+      else
+        a = ((currMB->mb_available_left)->cbp > 15) ? 1 : 0;
+    }
+
+    curr_cbp_ctx = a+2*b;
+    cbp_bit = (cbp > 15 ) ? 1 : 0;
+    biari_encode_symbol(eep_dp, (signed short) cbp_bit, ctx->cbp_contexts[1] + curr_cbp_ctx );
+
+    if (cbp > 15)
+    {
+      b = 0;
+      if (currMB->mb_available_up != NULL)
+      {
+        if((currMB->mb_available_up)->mb_type==IPCM)
+          b=1;
+        else
+          if ((currMB->mb_available_up)->cbp > 15)
+            b = (( ((currMB->mb_available_up)->cbp >> 4) == 2) ? 1 : 0);
+      }
+
+
+      a = 0;
+      if (currMB->mb_available_left != NULL)
+      {
+        if((currMB->mb_available_left)->mb_type==IPCM)
+          a=1;
+        else
+          if ((currMB->mb_available_left)->cbp > 15)
+            a = (( ((currMB->mb_available_left)->cbp >> 4) == 2) ? 1 : 0);
+      }
+
+      curr_cbp_ctx = a+2*b;
+      cbp_bit = ((cbp>>4) == 2) ? 1 : 0;
+      biari_encode_symbol(eep_dp, (signed short) cbp_bit, ctx->cbp_contexts[2] + curr_cbp_ctx );
+    }
+  }
+
+  dp->bitstream->write_flag = 1;
+  se->len = (arienco_bits_written(eep_dp) - curr_len);
+  CABAC_TRACE;
+  return;
+}
+
+static const int maxpos       [] = {16, 15, 64, 32, 32, 16,  4, 15,  8, 16};
+static const int c1isdc       [] = { 1,  0,  1,  1,  1,  1,  1,  0,  1,  1};
+
+static const int type2ctx_bcbp[] = { 0,  1,  2,  2,  3,  4,  5,  6,  5,  5}; // 7
+static const int type2ctx_map [] = { 0,  1,  2,  3,  4,  5,  6,  7,  6,  6}; // 8
+static const int type2ctx_last[] = { 0,  1,  2,  3,  4,  5,  6,  7,  6,  6}; // 8
+static const int type2ctx_one [] = { 0,  1,  2,  3,  3,  4,  5,  6,  5,  5}; // 7
+static const int type2ctx_abs [] = { 0,  1,  2,  3,  3,  4,  5,  6,  5,  5}; // 7
+static const int max_c2       [] = { 4,  4,  4,  4,  4,  4,  3,  4,  3,  3}; // 9
+
+
+
+/*!
+ ****************************************************************************
+ * \brief
+ *    Write CBP4-BIT
+ ****************************************************************************
+ */
+void write_and_store_CBP_block_bit (Macroblock* currMB, EncodingEnvironmentPtr eep_dp, int type, int cbp_bit)
+{
+#define BIT_SET(x,n)  ((int)(((x)&((int64)1<<(n)))>>(n)))
+
+  int y_ac        = (type==LUMA_16AC || type==LUMA_8x8 || type==LUMA_8x4 || type==LUMA_4x8 || type==LUMA_4x4);
+  int y_dc        = (type==LUMA_16DC);
+  int u_ac        = (type==CHROMA_AC && !img->is_v_block);
+  int v_ac        = (type==CHROMA_AC &&  img->is_v_block);
+  int chroma_dc   = (type==CHROMA_DC || type==CHROMA_DC_2x4 || type==CHROMA_DC_4x4);
+  int u_dc        = (chroma_dc && !img->is_v_block);
+  int v_dc        = (chroma_dc &&  img->is_v_block);
+  int j           = (y_ac || u_ac || v_ac ? img->subblock_y : 0);
+  int i           = (y_ac || u_ac || v_ac ? img->subblock_x : 0);
+  int bit         = (y_dc ? 0 : y_ac ? 1 : u_dc ? 17 : v_dc ? 18 : u_ac ? 19 : 23);
+  int default_bit = (img->is_intra_block ? 1 : 0);
+  int upper_bit   = default_bit;
+  int left_bit    = default_bit;
+  int ctx;
+
+  int bit_pos_a   = 0;
+  int bit_pos_b   = 0;
+
+  PixelPos block_a, block_b;
+
+  if (y_ac || y_dc)
+  {
+    getLuma4x4Neighbour(img->current_mb_nr, (i<<2) - 1, (j << 2),     &block_a);
+    getLuma4x4Neighbour(img->current_mb_nr, (i<<2),     (j << 2) -1, &block_b);
+    if (y_ac)
+    {
+      if (block_a.available)
+        bit_pos_a = 4*block_a.y + block_a.x;
+      if (block_b.available)
+        bit_pos_b = 4*block_b.y + block_b.x;
+    }
+  }
+  else
+  {
+    getChroma4x4Neighbour(img->current_mb_nr, (i<<2)-1, (j<<2),  &block_a);
+    getChroma4x4Neighbour(img->current_mb_nr, (i<<2),   (j<<2)-1,&block_b);
+    if (u_ac||v_ac)
+    {
+      if (block_a.available)
+        bit_pos_a = 4*block_a.y + block_a.x;
+      if (block_b.available)
+        bit_pos_b = 4*block_b.y + block_b.x;
+    }
+  }
+
+  bit = (y_dc ? 0 : y_ac ? 1+4*j+i : u_dc ? 17 : v_dc ? 18 : u_ac ? 19+4*j+i : 35+4*j+i);
+  //--- set bits for current block ---
+  if (cbp_bit)
+  {
+    if (type==LUMA_8x8)
+    {
+      currMB->cbp_bits   |= ((int64)1<< bit   );
+      currMB->cbp_bits   |= ((int64)1<<(bit+1));
+      currMB->cbp_bits   |= ((int64)1<<(bit+4));
+      currMB->cbp_bits   |= ((int64)1<<(bit+5));
+    }
+    else if (type==LUMA_8x4)
+    {
+      currMB->cbp_bits   |= ((int64)1<< bit   );
+      currMB->cbp_bits   |= ((int64)1<<(bit+1));
+    }
+    else if (type==LUMA_4x8)
+    {
+      currMB->cbp_bits   |= ((int64)1<< bit   );
+      currMB->cbp_bits   |= ((int64)1<<(bit+4));
+    }
+    else
+    {
+      currMB->cbp_bits   |= ((int64)1<<bit);
+    }
+  }
+
+  bit = (y_dc ? 0 : y_ac ? 1 : u_dc ? 17 : v_dc ? 18 : u_ac ? 19 : 35);
+  if (type!=LUMA_8x8)
+  {
+    if (block_b.available)
+    {
+      if(img->mb_data[block_b.mb_addr].mb_type==IPCM)
+        upper_bit=1;
+      else
+        upper_bit = BIT_SET(img->mb_data[block_b.mb_addr].cbp_bits,bit+bit_pos_b);
+    }
+
+
+    if (block_a.available)
+    {
+      if(img->mb_data[block_a.mb_addr].mb_type==IPCM)
+        left_bit=1;
+      else
+        left_bit = BIT_SET(img->mb_data[block_a.mb_addr].cbp_bits,bit+bit_pos_a);
+    }
+
+    ctx = 2*upper_bit+left_bit;
+
+    //===== encode symbol =====
+    biari_encode_symbol (eep_dp, (short)cbp_bit, img->currentSlice->tex_ctx->bcbp_contexts[type2ctx_bcbp[type]]+ctx);
+  }
+}
+
+
+
+
+//===== position -> ctx for MAP =====
+//--- zig-zag scan ----
+static const int  pos2ctx_map8x8 [] = { 0,  1,  2,  3,  4,  5,  5,  4,  4,  3,  3,  4,  4,  4,  5,  5,
+                                        4,  4,  4,  4,  3,  3,  6,  7,  7,  7,  8,  9, 10,  9,  8,  7,
+                                        7,  6, 11, 12, 13, 11,  6,  7,  8,  9, 14, 10,  9,  8,  6, 11,
+                                       12, 13, 11,  6,  9, 14, 10,  9, 11, 12, 13, 11 ,14, 10, 12, 14}; // 15 CTX
+static const int  pos2ctx_map8x4 [] = { 0,  1,  2,  3,  4,  5,  7,  8,  9, 10, 11,  9,  8,  6,  7,  8,
+                                        9, 10, 11,  9,  8,  6, 12,  8,  9, 10, 11,  9, 13, 13, 14, 14}; // 15 CTX
+static const int  pos2ctx_map4x4 [] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 14}; // 15 CTX
+static const int  pos2ctx_map2x4c[] = { 0,  0,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2}; // 15 CTX
+static const int  pos2ctx_map4x4c[] = { 0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2}; // 15 CTX
+static const int* pos2ctx_map    [] = {pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8, pos2ctx_map8x4,
+                                       pos2ctx_map8x4, pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map4x4,
+                                       pos2ctx_map2x4c, pos2ctx_map4x4c};
+
+//--- interlace scan ----
+//Taken from ABT
+static const int  pos2ctx_map8x8i[] = { 0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  7,  7,  8,  4,  5,
+                                        6,  9, 10, 10,  8, 11, 12, 11,  9,  9, 10, 10,  8, 11, 12, 11,
+                                        9,  9, 10, 10,  8, 11, 12, 11,  9,  9, 10, 10,  8, 13, 13,  9,
+                                        9, 10, 10,  8, 13, 13,  9,  9, 10, 10, 14, 14, 14, 14, 14, 14}; // 15 CTX
+
+static const int  pos2ctx_map8x4i[] = { 0,  1,  2,  3,  4,  5,  6,  3,  4,  5,  6,  3,  4,  7,  6,  8,
+                                        9,  7,  6,  8,  9, 10, 11, 12, 12, 10, 11, 13, 13, 14, 14, 14}; // 15 CTX
+static const int  pos2ctx_map4x8i[] = { 0,  1,  1,  1,  2,  3,  3,  4,  4,  4,  5,  6,  2,  7,  7,  8,
+                                        8,  8,  5,  6,  9, 10, 10, 11, 11, 11, 12, 13, 13, 14, 14, 14}; // 15 CTX
+static const int* pos2ctx_map_int[] = {pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8i,pos2ctx_map8x4i,
+                                       pos2ctx_map4x8i,pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map4x4,
+                                       pos2ctx_map2x4c, pos2ctx_map4x4c};
+
+
+//===== position -> ctx for LAST =====
+static const int  pos2ctx_last8x8 [] = { 0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+                                         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+                                         3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
+                                         5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8}; //  9 CTX
+static const int  pos2ctx_last8x4 [] = { 0,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,
+                                         3,  3,  3,  3,  4,  4,  4,  4,  5,  5,  6,  6,  7,  7,  8,  8}; //  9 CTX
+static const int  pos2ctx_last4x4 [] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15}; // 15 CTX
+static const int  pos2ctx_last2x4c[] = { 0,  0,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2}; // 15 CTX
+static const int  pos2ctx_last4x4c[] = { 0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2}; // 15 CTX
+static const int* pos2ctx_last    [] = {pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last8x8, pos2ctx_last8x4,
+                                        pos2ctx_last8x4, pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last4x4,
+                                        pos2ctx_last2x4c, pos2ctx_last4x4c};
+
+
+
+
+/*!
+****************************************************************************
+* \brief
+*    Write Significance MAP
+****************************************************************************
+*/
+void write_significance_map (Macroblock* currMB, EncodingEnvironmentPtr eep_dp, int type, int coeff[], int coeff_ctr)
+{
+  int   k;
+  unsigned short sig, last;
+  int   k0 = 0;
+  int   k1 = maxpos[type]-1;
+  TextureInfoContexts*  tex_ctx = img->currentSlice->tex_ctx;
+
+  int               fld       = ( img->structure!=FRAME || currMB->mb_field );
+  BiContextTypePtr  map_ctx   = ( fld
+    ? tex_ctx->fld_map_contexts[type2ctx_map [type]]
+    : tex_ctx->map_contexts[type2ctx_map [type]] );
+  BiContextTypePtr  last_ctx  = ( fld
+    ? tex_ctx->fld_last_contexts[type2ctx_last[type]]
+    : tex_ctx->last_contexts[type2ctx_last[type]] );
+
+  if (!c1isdc[type])
+  {
+    k0++; k1++; coeff--;
+  }
+
+  if (!fld)
+  {
+    for (k=k0; k<k1; k++) // if last coeff is reached, it has to be significant
+    {
+      sig   = (coeff[k] != 0);
+      biari_encode_symbol  (eep_dp, sig,  map_ctx + pos2ctx_map  [type][k]);
+      if (sig)
+      {
+        last = (--coeff_ctr == 0);
+
+        biari_encode_symbol(eep_dp, last, last_ctx + pos2ctx_last[type][k]);
+        if (last) return;
+      }
+    }
+    return;
+  }
+  else
+  {
+    for (k=k0; k<k1; k++) // if last coeff is reached, it has to be significant
+    {
+      sig   = (coeff[k] != 0);
+
+      biari_encode_symbol  (eep_dp, sig,  map_ctx + pos2ctx_map_int [type][k]);
+      if (sig)
+      {
+        last = (--coeff_ctr == 0);
+
+        biari_encode_symbol(eep_dp, last, last_ctx + pos2ctx_last[type][k]);
+        if (last) return;
+      }
+    }
+  }
+}
+
+
+/*!
+ ****************************************************************************
+ * \brief
+ *    Write Levels
+ ****************************************************************************
+ */
+void write_significant_coefficients (Macroblock* currMB, EncodingEnvironmentPtr eep_dp, int type, int coeff[])
+{
+  int   i;
+  int   absLevel;
+  int   ctx;
+  short sign;
+  short greater_one;
+  int   c1 = 1;
+  int   c2 = 0;
+
+  for (i=maxpos[type]-1; i>=0; i--)
+  {
+    if (coeff[i]!=0)
+    {
+      if (coeff[i]>0) {absLevel =  coeff[i];  sign = 0;}
+      else            {absLevel = -coeff[i];  sign = 1;}
+
+      greater_one = (absLevel>1);
+
+      //--- if coefficient is one ---
+      ctx = imin(c1,4);
+      biari_encode_symbol (eep_dp, greater_one, img->currentSlice->tex_ctx->one_contexts[type2ctx_one[type]] + ctx);
+
+      if (greater_one)
+      {
+        ctx = imin(c2, max_c2[type]);
+        unary_exp_golomb_level_encode(eep_dp, absLevel-2, img->currentSlice->tex_ctx->abs_contexts[type2ctx_abs[type]] + ctx);
+        c1 = 0;
+        c2++;
+      }
+      else if (c1)
+      {
+        c1++;
+      }
+      biari_encode_symbol_eq_prob (eep_dp, sign);
+    }
+  }
+}
+
+
+
+/*!
+ ****************************************************************************
+ * \brief
+ *    Write Block-Transform Coefficients
+ ****************************************************************************
+ */
+void writeRunLevel_CABAC (SyntaxElement *se, DataPartition *dp)
+{
+  EncodingEnvironmentPtr eep_dp = &(dp->ee_cabac);
+  int curr_len = arienco_bits_written(eep_dp);
+  static int  coeff[64];
+  static int  coeff_ctr = 0;
+  static int  pos       = 0;
+
+  //--- accumulate run-level information ---
+  if (se->value1 != 0)
+  {
+    pos += se->value2;
+    coeff[pos++] = se->value1;
+    coeff_ctr++;
+    //return;
+  }
+  else
+  {
+    Macroblock* currMB    = &img->mb_data[img->current_mb_nr];
+    //===== encode CBP-BIT =====
+    if (coeff_ctr>0)
+    {
+      write_and_store_CBP_block_bit  (currMB, eep_dp, se->context, 1);
+      //===== encode significance map =====
+      write_significance_map         (currMB, eep_dp, se->context, coeff, coeff_ctr);
+      //===== encode significant coefficients =====
+      write_significant_coefficients (currMB, eep_dp, se->context, coeff);
+    }
+    else
+      write_and_store_CBP_block_bit  (currMB, eep_dp, se->context, 0);
+
+    //--- reset counters ---
+    pos = coeff_ctr = 0;
+    memset(coeff, 0 , 64 * sizeof(int));
+  }
+
+  dp->bitstream->write_flag = 1;
+  se->len = (arienco_bits_written(eep_dp) - curr_len);
+  CABAC_TRACE;
+  return;
+}
+
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Unary binarization and encoding of a symbol by using
+ *    one or two distinct models for the first two and all
+ *    remaining bins
+*
+************************************************************************/
+void unary_bin_encode(EncodingEnvironmentPtr eep_dp,
+                      unsigned int symbol,
+                      BiContextTypePtr ctx,
+                      int ctx_offset)
+{
+  unsigned int l;
+  BiContextTypePtr ictx;
+
+  if (symbol==0)
+  {
+    biari_encode_symbol(eep_dp, 0, ctx );
+    return;
+  }
+  else
+  {
+    biari_encode_symbol(eep_dp, 1, ctx );
+    l = symbol;
+    ictx = ctx+ctx_offset;
+    while ((--l)>0)
+      biari_encode_symbol(eep_dp, 1, ictx);
+    biari_encode_symbol(eep_dp, 0, ictx);
+  }
+  return;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Unary binarization and encoding of a symbol by using
+ *    one or two distinct models for the first two and all
+ *    remaining bins; no terminating "0" for max_symbol
+ *    (finite symbol alphabet)
+ ************************************************************************
+ */
+void unary_bin_max_encode(EncodingEnvironmentPtr eep_dp,
+                          unsigned int symbol,
+                          BiContextTypePtr ctx,
+                          int ctx_offset,
+                          unsigned int max_symbol)
+{
+  unsigned int l;
+  BiContextTypePtr ictx;
+
+  if (symbol==0)
+  {
+    biari_encode_symbol(eep_dp, 0, ctx );
+    return;
+  }
+  else
+  {
+    biari_encode_symbol(eep_dp, 1, ctx );
+    l=symbol;
+    ictx=ctx+ctx_offset;
+    while ((--l)>0)
+      biari_encode_symbol(eep_dp, 1, ictx);
+    if (symbol<max_symbol)
+      biari_encode_symbol(eep_dp, 0, ictx);
+  }
+  return;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Exp Golomb binarization and encoding
+ ************************************************************************
+ */
+void exp_golomb_encode_eq_prob( EncodingEnvironmentPtr eep_dp,
+                                unsigned int symbol,
+                                int k)
+{
+  while(1)
+  {
+    if (symbol >= (unsigned int)(1<<k))
+    {
+      biari_encode_symbol_eq_prob(eep_dp, 1);   //first unary part
+      symbol = symbol - (1<<k);
+      k++;
+    }
+    else
+    {
+      biari_encode_symbol_eq_prob(eep_dp, 0);   //now terminated zero of unary part
+      while (k--)                               //next binary part
+        biari_encode_symbol_eq_prob(eep_dp, (signed short)((symbol>>k)&1));
+      break;
+    }
+  }
+
+  return;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Exp-Golomb for Level Encoding
+*
+************************************************************************/
+void unary_exp_golomb_level_encode( EncodingEnvironmentPtr eep_dp,
+                                    unsigned int symbol,
+                                    BiContextTypePtr ctx)
+{
+  unsigned int l,k;
+  unsigned int exp_start = 13; // 15-2 : 0,1 level decision always sent
+
+  if (symbol==0)
+  {
+    biari_encode_symbol(eep_dp, 0, ctx );
+    return;
+  }
+  else
+  {
+    biari_encode_symbol(eep_dp, 1, ctx );
+    l=symbol;
+    k=1;
+    while (((--l)>0) && (++k <= exp_start))
+      biari_encode_symbol(eep_dp, 1, ctx);
+    if (symbol < exp_start) biari_encode_symbol(eep_dp, 0, ctx);
+    else exp_golomb_encode_eq_prob(eep_dp,symbol-exp_start,0);
+  }
+  return;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Exp-Golomb for MV Encoding
+*
+************************************************************************/
+void unary_exp_golomb_mv_encode(EncodingEnvironmentPtr eep_dp,
+                                unsigned int symbol,
+                                BiContextTypePtr ctx,
+                                unsigned int max_bin)
+{
+  unsigned int l,k;
+  unsigned int bin=1;
+  BiContextTypePtr ictx=ctx;
+  unsigned int exp_start = 8; // 9-1 : 0 mvd decision always sent
+
+  if (symbol==0)
+  {
+    biari_encode_symbol(eep_dp, 0, ictx );
+    return;
+  }
+  else
+  {
+    biari_encode_symbol(eep_dp, 1, ictx );
+    l=symbol;
+    k=1;
+    ictx++;
+    while (((--l)>0) && (++k <= exp_start))
+    {
+      biari_encode_symbol(eep_dp, 1, ictx  );
+      if ((++bin)==2) ictx++;
+      if (bin==max_bin) ictx++;
+    }
+    if (symbol < exp_start) biari_encode_symbol(eep_dp, 0, ictx);
+    else exp_golomb_encode_eq_prob(eep_dp,symbol-exp_start,3);
+  }
+  return;
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/cabac.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/cabac.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/cabac.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,62 @@
+
+/*!
+ ***************************************************************************
+ * \file
+ *    cabac.h
+ *
+ * \brief
+ *    Headerfile for entropy coding routines
+ *
+ * \author
+ *    Detlev Marpe                                                         \n
+ *    Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved.
+ *
+ * \date
+ *    21. Oct 2000 (Changes by Tobias Oelbaum 28.08.2001)
+ ***************************************************************************
+ */
+
+
+#ifndef _CABAC_H_
+#define _CABAC_H_
+
+// CABAC
+int get_pic_bin_count(void);
+void reset_pic_bin_count(void);
+
+void arienco_start_encoding(EncodingEnvironmentPtr eep, unsigned char *code_buffer, int *code_len);
+int  arienco_bits_written(EncodingEnvironmentPtr eep);
+void arienco_done_encoding(EncodingEnvironmentPtr eep);
+void biari_init_context (BiContextTypePtr ctx, const int* ini);
+void rescale_cum_freq(BiContextTypePtr bi_ct);
+void biari_encode_symbol(EncodingEnvironmentPtr eep, signed short symbol, BiContextTypePtr bi_ct );
+void biari_encode_symbol_eq_prob(EncodingEnvironmentPtr eep, signed short symbol);
+void biari_encode_symbol_final(EncodingEnvironmentPtr eep, signed short symbol);
+MotionInfoContexts* create_contexts_MotionInfo(void);
+TextureInfoContexts* create_contexts_TextureInfo(void);
+void init_contexts_MotionInfo (MotionInfoContexts  *enco_ctx);
+void init_contexts_TextureInfo(TextureInfoContexts *enco_ctx);
+void delete_contexts_MotionInfo(MotionInfoContexts *enco_ctx);
+void delete_contexts_TextureInfo(TextureInfoContexts *enco_ctx);
+void writeHeaderToBuffer(void);
+void writeMB_typeInfo_CABAC(SyntaxElement *se, DataPartition *dp);
+void writeIntraPredMode_CABAC(SyntaxElement *se, DataPartition *dp);
+void writeB8_typeInfo_CABAC(SyntaxElement *se, DataPartition *dp);
+void writeRefFrame_CABAC(SyntaxElement *se, DataPartition *dp);
+void writeMVD_CABAC(SyntaxElement *se, DataPartition *dp);
+void writeCBP_CABAC(SyntaxElement *se, DataPartition *dp);
+void writeDquant_CABAC(SyntaxElement *se, DataPartition *dp);
+void writeRunLevel_CABAC(SyntaxElement *se, DataPartition *dp);
+void writeCIPredMode_CABAC(SyntaxElement *se, DataPartition *dp);
+void print_ctx_TextureInfo(TextureInfoContexts *enco_ctx);
+void writeMB_skip_flagInfo_CABAC(SyntaxElement *se, DataPartition *dp);
+void writeFieldModeInfo_CABAC(SyntaxElement *se, DataPartition *dp); //GB
+void writeCBP_BIT_CABAC (int b8, int bit, int cbp, Macroblock* currMB, int inter, EncodingEnvironmentPtr eep_dp);
+void cabac_new_slice(void);
+void CheckAvailabilityOfNeighborsCABAC(void);
+
+void writeMB_transform_size_CABAC(SyntaxElement *se, DataPartition *dp);
+
+
+#endif  // CABAC_H
+


Index: llvm-test/MultiSource/Applications/JM/lencod/configfile.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/configfile.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/configfile.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,1241 @@
+
+/*!
+ ***********************************************************************
+ * \file
+ *    configfile.c
+ * \brief
+ *    Configuration handling.
+ * \author
+ *  Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Stephan Wenger           <stewe at cs.tu-berlin.de>
+ * \note
+ *    In the future this module should hide the Parameters and offer only
+ *    Functions for their access.  Modules which make frequent use of some parameters
+ *    (e.g. picture size in macroblocks) are free to buffer them on local variables.
+ *    This will not only avoid global variable and make the code more readable, but also
+ *    speed it up.  It will also greatly facilitate future enhancements such as the
+ *    handling of different picture sizes in the same sequence.                         \n
+ *                                                                                      \n
+ *    For now, everything is just copied to the inp_par structure (gulp)
+ *
+ **************************************************************************************
+ * \par Configuration File Format
+ **************************************************************************************
+ * Format is line oriented, maximum of one parameter per line                           \n
+ *                                                                                      \n
+ * Lines have the following format:                                                     \n
+ * \<ParameterName\> = \<ParameterValue\> # Comments \\n                                    \n
+ * Whitespace is space and \\t
+ * \par
+ * \<ParameterName\> are the predefined names for Parameters and are case sensitive.
+ *   See configfile.h for the definition of those names and their mapping to
+ *   configinput->values.
+ * \par
+ * \<ParameterValue\> are either integers [0..9]* or strings.
+ *   Integers must fit into the wordlengths, signed values are generally assumed.
+ *   Strings containing no whitespace characters can be used directly.  Strings containing
+ *   whitespace characters are to be inclosed in double quotes ("string with whitespace")
+ *   The double quote character is forbidden (may want to implement something smarter here).
+ * \par
+ * Any Parameters whose ParameterName is undefined lead to the termination of the program
+ * with an error message.
+ *
+ * \par Known bug/Shortcoming:
+ *    zero-length strings (i.e. to signal an non-existing file
+ *    have to be coded as "".
+ *
+ * \par Rules for using command files
+ *                                                                                      \n
+ * All Parameters are initially taken from DEFAULTCONFIGFILENAME, defined in configfile.h.
+ * If an -f \<config\> parameter is present in the command line then this file is used to
+ * update the defaults of DEFAULTCONFIGFILENAME.  There can be more than one -f parameters
+ * present.  If -p <ParameterName = ParameterValue> parameters are present then these
+ * override the default and the additional config file's settings, and are themselves
+ * overridden by future -p parameters.  There must be whitespace between -f and -p commands
+ * and their respective parameters
+ ***********************************************************************
+ */
+
+#define INCLUDED_BY_CONFIGFILE_C
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include "global.h"
+#include "configfile.h"
+#include "fmo.h"
+
+char *GetConfigFileContent (char *Filename);
+static void ParseContent (char *buf, int bufsize);
+static int ParameterNameToMapIndex (char *s);
+static int InitEncoderParams(void);
+static int TestEncoderParams(int bitdepth_qp_scale);
+static int DisplayEncoderParams(void);
+static void PatchInp (void);
+static void ProfileCheck(void);
+static void LevelCheck(void);
+
+
+#define MAX_ITEMS_TO_PARSE  10000
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ *   print help message and exit
+ ***********************************************************************
+ */
+void JMHelpExit (void)
+{
+  fprintf( stderr, "\n   lencod [-h] [-d defenc.cfg] {[-f curenc1.cfg]...[-f curencN.cfg]}"
+    " {[-p EncParam1=EncValue1]..[-p EncParamM=EncValueM]}\n\n"
+    "## Parameters\n\n"
+
+    "## Options\n"
+    "   -h :  prints function usage\n"
+    "   -d :  use <defenc.cfg> as default file for parameter initializations.\n"
+    "         If not used then file defaults to encoder.cfg in local directory.\n"
+    "   -f :  read <curencM.cfg> for reseting selected encoder parameters.\n"
+    "         Multiple files could be used that set different parameters\n"
+    "   -p :  Set parameter <EncParamM> to <EncValueM>.\n"
+    "         See default encoder.cfg file for description of all parameters.\n\n"
+
+    "## Supported video file formats\n"
+    "   RAW:  .yuv -> YUV 4:2:0\n\n"
+
+    "## Examples of usage:\n"
+    "   lencod\n"
+    "   lencod  -h\n"
+    "   lencod  -d default.cfg\n"
+    "   lencod  -f curenc1.cfg\n"
+    "   lencod  -f curenc1.cfg -p InputFile=\"e:\\data\\container_qcif_30.yuv\" -p SourceWidth=176 -p SourceHeight=144\n"
+    "   lencod  -f curenc1.cfg -p FramesToBeEncoded=30 -p QPISlice=28 -p QPPSlice=28 -p QPBSlice=30\n");
+
+  exit(-1);
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Parse the command line parameters and read the config files.
+ * \param ac
+ *    number of command line parameters
+ * \param av
+ *    command line parameters
+ ***********************************************************************
+ */
+void Configure (int ac, char *av[])
+{
+  char *content;
+  int CLcount, ContentLen, NumberParams;
+  char *filename=DEFAULTCONFIGFILENAME;
+
+  memset (&configinput, 0, sizeof (InputParameters));
+  //Set default parameters.
+  printf ("Setting Default Parameters...\n");
+  InitEncoderParams();
+
+  // Process default config file
+  CLcount = 1;
+
+  if (ac==2)
+  {
+    if (0 == strncmp (av[1], "-h", 2))
+    {
+      JMHelpExit();
+    }
+  }
+
+  if (ac>=3)
+  {
+    if (0 == strncmp (av[1], "-d", 2))
+    {
+      filename=av[2];
+      CLcount = 3;
+    }
+    if (0 == strncmp (av[1], "-h", 2))
+    {
+      JMHelpExit();
+    }
+  }
+  printf ("Parsing Configfile %s", filename);
+  content = GetConfigFileContent (filename);
+  if (NULL==content)
+    error (errortext, 300);
+  ParseContent (content, strlen(content));
+  printf ("\n");
+  free (content);
+
+  // Parse the command line
+
+  while (CLcount < ac)
+  {
+    if (0 == strncmp (av[CLcount], "-h", 2))
+    {
+      JMHelpExit();
+    }
+
+    if (0 == strncmp (av[CLcount], "-f", 2))  // A file parameter?
+    {
+      content = GetConfigFileContent (av[CLcount+1]);
+      if (NULL==content)
+        error (errortext, 300);
+      printf ("Parsing Configfile %s", av[CLcount+1]);
+      ParseContent (content, strlen (content));
+      printf ("\n");
+      free (content);
+      CLcount += 2;
+    } else
+    {
+      if (0 == strncmp (av[CLcount], "-p", 2))  // A config change?
+      {
+        // Collect all data until next parameter (starting with -<x> (x is any character)),
+        // put it into content, and parse content.
+
+        CLcount++;
+        ContentLen = 0;
+        NumberParams = CLcount;
+
+        // determine the necessary size for content
+        while (NumberParams < ac && av[NumberParams][0] != '-')
+          ContentLen += strlen (av[NumberParams++]);        // Space for all the strings
+        ContentLen += 1000;                     // Additional 1000 bytes for spaces and \0s
+
+
+        if ((content = malloc (ContentLen))==NULL) no_mem_exit("Configure: content");;
+        content[0] = '\0';
+
+        // concatenate all parameters identified before
+
+        while (CLcount < NumberParams)
+        {
+          char *source = &av[CLcount][0];
+          char *destin = &content[strlen (content)];
+
+          while (*source != '\0')
+          {
+            if (*source == '=')  // The Parser expects whitespace before and after '='
+            {
+              *destin++=' '; *destin++='='; *destin++=' ';  // Hence make sure we add it
+            } else
+              *destin++=*source;
+            source++;
+          }
+          *destin = '\0';
+          CLcount++;
+        }
+        printf ("Parsing command line string '%s'", content);
+        ParseContent (content, strlen(content));
+        free (content);
+        printf ("\n");
+      }
+      else
+      {
+        snprintf (errortext, ET_SIZE, "Error in command line, ac %d, around string '%s', missing -f or -p parameters?", CLcount, av[CLcount]);
+        error (errortext, 300);
+      }
+    }
+  }
+  printf ("\n");
+  PatchInp();
+  if (input->DisplayEncParams)
+    DisplayEncoderParams();
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    allocates memory buf, opens file Filename in f, reads contents into
+ *    buf and returns buf
+ * \param Filename
+ *    name of config file
+ * \return
+ *    if successfull, content of config file
+ *    NULL in case of error. Error message will be set in errortext
+ ***********************************************************************
+ */
+char *GetConfigFileContent (char *Filename)
+{
+  long FileSize;
+  FILE *f;
+  char *buf;
+
+  if (NULL == (f = fopen (Filename, "r")))
+  {
+      snprintf (errortext, ET_SIZE, "Cannot open configuration file %s.", Filename);
+      return NULL;
+  }
+
+  if (0 != fseek (f, 0, SEEK_END))
+  {
+    snprintf (errortext, ET_SIZE, "Cannot fseek in configuration file %s.", Filename);
+    return NULL;
+  }
+
+  FileSize = ftell (f);
+  if (FileSize < 0 || FileSize > 60000)
+  {
+    snprintf (errortext, ET_SIZE, "Unreasonable Filesize %ld reported by ftell for configuration file %s.", FileSize, Filename);
+    return NULL;
+  }
+  if (0 != fseek (f, 0, SEEK_SET))
+  {
+    snprintf (errortext, ET_SIZE, "Cannot fseek in configuration file %s.", Filename);
+    return NULL;
+  }
+
+  if ((buf = malloc (FileSize + 1))==NULL) no_mem_exit("GetConfigFileContent: buf");
+
+  // Note that ftell() gives us the file size as the file system sees it.  The actual file size,
+  // as reported by fread() below will be often smaller due to CR/LF to CR conversion and/or
+  // control characters after the dos EOF marker in the file.
+
+  FileSize = fread (buf, 1, FileSize, f);
+  buf[FileSize] = '\0';
+
+
+  fclose (f);
+  return buf;
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Parses the character array buf and writes global variable input, which is defined in
+ *    configfile.h.  This hack will continue to be necessary to facilitate the addition of
+ *    new parameters through the Map[] mechanism (Need compiler-generated addresses in map[]).
+ * \param buf
+ *    buffer to be parsed
+ * \param bufsize
+ *    buffer size of buffer
+ ***********************************************************************
+ */
+void ParseContent (char *buf, int bufsize)
+{
+
+  char *items[MAX_ITEMS_TO_PARSE];
+  int MapIdx;
+  int item = 0;
+  int InString = 0, InItem = 0;
+  char *p = buf;
+  char *bufend = &buf[bufsize];
+  int IntContent;
+  double DoubleContent;
+  int i;
+
+// Stage one: Generate an argc/argv-type list in items[], without comments and whitespace.
+// This is context insensitive and could be done most easily with lex(1).
+
+  while (p < bufend)
+  {
+    switch (*p)
+    {
+      case 13:
+        p++;
+        break;
+      case '#':                 // Found comment
+        *p = '\0';              // Replace '#' with '\0' in case of comment immediately following integer or string
+        while (*p != '\n' && p < bufend)  // Skip till EOL or EOF, whichever comes first
+          p++;
+        InString = 0;
+        InItem = 0;
+        break;
+      case '\n':
+        InItem = 0;
+        InString = 0;
+        *p++='\0';
+        break;
+      case ' ':
+      case '\t':              // Skip whitespace, leave state unchanged
+        if (InString)
+          p++;
+        else
+        {                     // Terminate non-strings once whitespace is found
+          *p++ = '\0';
+          InItem = 0;
+        }
+        break;
+
+      case '"':               // Begin/End of String
+        *p++ = '\0';
+        if (!InString)
+        {
+          items[item++] = p;
+          InItem = ~InItem;
+        }
+        else
+          InItem = 0;
+        InString = ~InString; // Toggle
+        break;
+
+      default:
+        if (!InItem)
+        {
+          items[item++] = p;
+          InItem = ~InItem;
+        }
+        p++;
+    }
+  }
+
+  item--;
+
+  for (i=0; i<item; i+= 3)
+  {
+    if (0 > (MapIdx = ParameterNameToMapIndex (items[i])))
+    {
+      snprintf (errortext, ET_SIZE, " Parsing error in config file: Parameter Name '%s' not recognized.", items[i]);
+      error (errortext, 300);
+    }
+    if (strcasecmp ("=", items[i+1]))
+    {
+      snprintf (errortext, ET_SIZE, " Parsing error in config file: '=' expected as the second token in each line.");
+      error (errortext, 300);
+    }
+
+    // Now interpret the Value, context sensitive...
+
+    switch (Map[MapIdx].Type)
+    {
+      case 0:           // Numerical
+        if (1 != sscanf (items[i+2], "%d", &IntContent))
+        {
+          snprintf (errortext, ET_SIZE, " Parsing error: Expected numerical value for Parameter of %s, found '%s'.", items[i], items[i+2]);
+          error (errortext, 300);
+        }
+        * (int *) (Map[MapIdx].Place) = IntContent;
+        printf (".");
+        break;
+      case 1:
+        strncpy ((char *) Map[MapIdx].Place, items [i+2], FILE_NAME_SIZE);
+        printf (".");
+        break;
+      case 2:           // Numerical double
+        if (1 != sscanf (items[i+2], "%lf", &DoubleContent))
+        {
+          snprintf (errortext, ET_SIZE, " Parsing error: Expected numerical value for Parameter of %s, found '%s'.", items[i], items[i+2]);
+          error (errortext, 300);
+        }
+        * (double *) (Map[MapIdx].Place) = DoubleContent;
+        printf (".");
+        break;
+      default:
+        error ("Unknown value type in the map definition of configfile.h",-1);
+    }
+  }
+  memcpy (input, &configinput, sizeof (InputParameters));
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Returns the index number from Map[] for a given parameter name.
+ * \param s
+ *    parameter name string
+ * \return
+ *    the index number if the string is a valid parameter name,         \n
+ *    -1 for error
+ ***********************************************************************
+ */
+static int ParameterNameToMapIndex (char *s)
+{
+  int i = 0;
+
+  while (Map[i].TokenName != NULL)
+    if (0==strcasecmp (Map[i].TokenName, s))
+      return i;
+    else
+      i++;
+  return -1;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Sets initial values for encoding parameters.
+ * \return
+ *    -1 for error
+ ***********************************************************************
+ */
+static int InitEncoderParams(void)
+{
+  int i = 0;
+
+  while (Map[i].TokenName != NULL)
+  {
+    if (Map[i].Type == 0)
+        * (int *) (Map[i].Place) = (int) Map[i].Default;
+    else if (Map[i].Type == 2)
+    * (double *) (Map[i].Place) = Map[i].Default;
+      i++;
+  }
+  return -1;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Validates encoding parameters.
+ * \return
+ *    -1 for error
+ ***********************************************************************
+ */
+static int TestEncoderParams(int bitdepth_qp_scale)
+{
+  int i = 0;
+
+  while (Map[i].TokenName != NULL)
+  {
+    if (Map[i].param_limits == 1)
+    {
+      if (Map[i].Type == 0)
+      {
+        if ( * (int *) (Map[i].Place) < (int) Map[i].min_limit || * (int *) (Map[i].Place) > (int) Map[i].max_limit )
+        {
+          snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should be in [%d, %d] range.", Map[i].TokenName, (int) Map[i].min_limit,(int)Map[i].max_limit );
+          error (errortext, 400);
+        }
+
+      }
+      else if (Map[i].Type == 2)
+      {
+        if ( * (double *) (Map[i].Place) < Map[i].min_limit || * (double *) (Map[i].Place) > Map[i].max_limit )
+        {
+          snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should be in [%.2f, %.2f] range.", Map[i].TokenName,Map[i].min_limit ,Map[i].max_limit );
+          error (errortext, 400);
+        }
+      }
+    }
+    else if (Map[i].param_limits == 2)
+    {
+      if (Map[i].Type == 0)
+      {
+        if ( * (int *) (Map[i].Place) < (int) Map[i].min_limit )
+        {
+          snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should not be smaller than %d.", Map[i].TokenName, (int) Map[i].min_limit);
+          error (errortext, 400);
+        }
+      }
+      else if (Map[i].Type == 2)
+      {
+        if ( * (double *) (Map[i].Place) < Map[i].min_limit )
+        {
+          snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should not be smaller than %2.f.", Map[i].TokenName,Map[i].min_limit);
+          error (errortext, 400);
+        }
+      }
+    }
+    else if (Map[i].param_limits == 3) // Only used for QPs
+    {
+      if (Map[i].Type == 0)
+      {
+        if ( * (int *) (Map[i].Place) < (int) (Map[i].min_limit - bitdepth_qp_scale) || * (int *) (Map[i].Place) > (int) Map[i].max_limit )
+        {
+          snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should be in [%d, %d] range.", Map[i].TokenName, (int) (Map[i].min_limit - bitdepth_qp_scale),(int)Map[i].max_limit );
+          error (errortext, 400);
+        }
+      }
+    }
+
+    i++;
+  }
+  return -1;
+}
+
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Outputs encoding parameters.
+ * \return
+ *    -1 for error
+ ***********************************************************************
+ */
+static int DisplayEncoderParams(void)
+{
+  int i = 0;
+
+  printf("******************************************************\n");
+  printf("*               Encoder Parameters                   *\n");
+  printf("******************************************************\n");
+  while (Map[i].TokenName != NULL)
+  {
+    if (Map[i].Type == 0)
+      printf("Parameter %s = %d\n",Map[i].TokenName,* (int *) (Map[i].Place));
+    else if (Map[i].Type == 1)
+      printf("Parameter %s = ""%s""\n",Map[i].TokenName,(char *)  (Map[i].Place));
+    else if (Map[i].Type == 2)
+      printf("Parameter %s = %.2f\n",Map[i].TokenName,* (double *) (Map[i].Place));
+      i++;
+  }
+  printf("******************************************************\n");
+  return -1;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    calculate Ceil(Log2(uiVal))
+ ************************************************************************
+ */
+unsigned CeilLog2( unsigned uiVal)
+{
+  unsigned uiTmp = uiVal-1;
+  unsigned uiRet = 0;
+
+  while( uiTmp != 0 )
+  {
+    uiTmp >>= 1;
+    uiRet++;
+  }
+  return uiRet;
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Checks the input parameters for consistency.
+ ***********************************************************************
+ */
+static void PatchInp (void)
+{
+  int bitdepth_qp_scale = 6*(input->BitDepthLuma - 8);
+
+  // These variables are added for FMO
+  FILE * sgfile=NULL;
+  int i,j;
+  int frame_mb_only;
+  int mb_width, mb_height, mapunit_height;
+  int storedBplus1;
+
+  TestEncoderParams(bitdepth_qp_scale);
+
+  if (input->FrameRate == 0.0)
+    input->FrameRate = INIT_FRAME_RATE;
+
+  // Set block sizes
+
+    // Skip/Direct16x16
+    input->part_size[0][0] = 4;
+    input->part_size[0][1] = 4;
+  // 16x16
+    input->part_size[1][0] = 4;
+    input->part_size[1][1] = 4;
+  // 16x8
+    input->part_size[2][0] = 4;
+    input->part_size[2][1] = 2;
+  // 8x16
+    input->part_size[3][0] = 2;
+    input->part_size[3][1] = 4;
+  // 8x8
+    input->part_size[4][0] = 2;
+    input->part_size[4][1] = 2;
+  // 8x4
+    input->part_size[5][0] = 2;
+    input->part_size[5][1] = 1;
+  // 4x8
+    input->part_size[6][0] = 1;
+    input->part_size[6][1] = 2;
+  // 4x4
+    input->part_size[7][0] = 1;
+    input->part_size[7][1] = 1;
+
+    input->blocktype_lut[0][0] = 7; // 4x4
+    input->blocktype_lut[0][1] = 6; // 4x8
+    input->blocktype_lut[1][0] = 5; // 8x4
+    input->blocktype_lut[1][1] = 4; // 8x8
+    input->blocktype_lut[1][3] = 3; // 8x16
+    input->blocktype_lut[3][1] = 2; // 16x8
+    input->blocktype_lut[3][3] = 1; // 16x16
+
+  for (j = 0; j<8;j++)
+  {
+    for (i = 0; i<2; i++)
+    {
+      input->blc_size[j][i] = input->part_size[j][i] * BLOCK_SIZE;
+    }
+  }
+
+  // set proper log2_max_frame_num_minus4.
+  storedBplus1 = (input->BRefPictures ) ? input->successive_Bframe + 1: 1;
+
+  if (input->Log2MaxFNumMinus4 == -1)
+    log2_max_frame_num_minus4 = iClip3(0,12, (int) (CeilLog2(input->no_frames * storedBplus1) - 4));
+  else
+    log2_max_frame_num_minus4 = input->Log2MaxFNumMinus4;
+
+  if (log2_max_frame_num_minus4 == 0 && input->num_ref_frames == 16)
+  {
+    snprintf(errortext, ET_SIZE, " NumberReferenceFrames=%d and Log2MaxFNumMinus4=%d may lead to an invalid value of frame_num.", input->num_ref_frames, input-> Log2MaxFNumMinus4);
+    error (errortext, 500);
+  }
+
+  // set proper log2_max_pic_order_cnt_lsb_minus4.
+  if (input->Log2MaxPOCLsbMinus4 == - 1)
+    log2_max_pic_order_cnt_lsb_minus4 = iClip3(0,12, (int) (CeilLog2( 2*input->no_frames * (input->jumpd + 1)) - 4));
+  else
+    log2_max_pic_order_cnt_lsb_minus4 = input->Log2MaxPOCLsbMinus4;
+
+  if (((1<<(log2_max_pic_order_cnt_lsb_minus4 + 3)) < input->jumpd * 4) && input->Log2MaxPOCLsbMinus4 != -1)
+    error("log2_max_pic_order_cnt_lsb_minus4 might not be sufficient for encoding. Increase value.",400);
+
+  // B picture consistency check
+  if(input->successive_Bframe > input->jumpd)
+  {
+    snprintf(errortext, ET_SIZE, "Number of B-frames %d can not exceed the number of frames skipped", input->successive_Bframe);
+    error (errortext, 400);
+  }
+
+  // Direct Mode consistency check
+  if(input->successive_Bframe && input->direct_spatial_mv_pred_flag != DIR_SPATIAL && input->direct_spatial_mv_pred_flag != DIR_TEMPORAL)
+  {
+    snprintf(errortext, ET_SIZE, "Unsupported direct mode=%d, use TEMPORAL=0 or SPATIAL=1", input->direct_spatial_mv_pred_flag);
+    error (errortext, 400);
+  }
+
+  if (input->PicInterlace>0 || input->MbInterlace>0)
+  {
+    if (input->directInferenceFlag==0)
+      printf("\nDirectInferenceFlag set to 1 due to interlace coding.");
+    input->directInferenceFlag = 1;
+  }
+
+  if (input->PicInterlace>0)
+  {
+    if (input->IntraBottom!=0 && input->IntraBottom!=1)
+    {
+      snprintf(errortext, ET_SIZE, "Incorrect value %d for IntraBottom. Use 0 (disable) or 1 (enable).", input->IntraBottom);
+      error (errortext, 400);
+    }
+  }
+  // Cabac/UVLC consistency check
+  if (input->symbol_mode != UVLC && input->symbol_mode != CABAC)
+  {
+    snprintf (errortext, ET_SIZE, "Unsupported symbol mode=%d, use UVLC=0 or CABAC=1",input->symbol_mode);
+    error (errortext, 400);
+  }
+
+  // Open Files
+  if ((p_in=open(input->infile, OPENFLAGS_READ))==-1)
+  {
+    snprintf(errortext, ET_SIZE, "Input file %s does not exist",input->infile);
+    error (errortext, 500);
+  }
+
+  if (strlen (input->ReconFile) > 0 && (p_dec=open(input->ReconFile, OPENFLAGS_WRITE, OPEN_PERMISSIONS))==-1)
+  {
+    snprintf(errortext, ET_SIZE, "Error open file %s", input->ReconFile);
+    error (errortext, 500);
+  }
+
+#if TRACE
+  if (strlen (input->TraceFile) > 0 && (p_trace=fopen(input->TraceFile,"w"))==NULL)
+  {
+    snprintf(errortext, ET_SIZE, "Error open file %s", input->TraceFile);
+    error (errortext, 500);
+  }
+#endif
+
+  if (input->img_width % 16 != 0)
+  {
+    img->auto_crop_right = 16-(input->img_width % 16);
+  }
+  else
+  {
+    img->auto_crop_right=0;
+  }
+  if (input->PicInterlace || input->MbInterlace)
+  {
+    if (input->img_height % 2 != 0)
+    {
+      error ("even number of lines required for interlaced coding", 500);
+    }
+    if (input->img_height % 32 != 0)
+    {
+      img->auto_crop_bottom = 32-(input->img_height % 32);
+    }
+    else
+    {
+      img->auto_crop_bottom=0;
+    }
+  }
+  else
+  {
+    if (input->img_height % 16 != 0)
+    {
+      img->auto_crop_bottom = 16-(input->img_height % 16);
+    }
+    else
+    {
+      img->auto_crop_bottom=0;
+    }
+  }
+  if (img->auto_crop_bottom || img->auto_crop_right)
+  {
+    fprintf (stderr, "Warning: Automatic cropping activated: Coded frame Size: %dx%d\n", input->img_width+img->auto_crop_right, input->img_height+img->auto_crop_bottom);
+  }
+
+  if ((input->slice_mode==1)&&(input->MbInterlace!=0))
+  {
+    if ((input->slice_argument%2)!=0)
+    {
+      fprintf ( stderr, "Warning: slice border within macroblock pair. ");
+      if (input->slice_argument > 1)
+      {
+        input->slice_argument--;
+      }
+      else
+      {
+        input->slice_argument++;
+      }
+      fprintf ( stderr, "Using %d MBs per slice.\n", input->slice_argument);
+    }
+  }
+  /*
+  // add check for MAXSLICEGROUPIDS
+  if(input->num_slice_groups_minus1>=MAXSLICEGROUPIDS)
+  {
+    snprintf(errortext, ET_SIZE, "num_slice_groups_minus1 exceeds MAXSLICEGROUPIDS");
+    error (errortext, 500);
+  }
+  */
+
+  // Following codes are to read slice group configuration from SliceGroupConfigFileName for slice group type 0,2 or 6
+  if( (input->num_slice_groups_minus1!=0)&&
+    ((input->slice_group_map_type == 0) || (input->slice_group_map_type == 2) || (input->slice_group_map_type == 6)) )
+  {
+    if (strlen (input->SliceGroupConfigFileName) > 0 && (sgfile=fopen(input->SliceGroupConfigFileName,"r"))==NULL)
+    {
+      snprintf(errortext, ET_SIZE, "Error open file %s", input->SliceGroupConfigFileName);
+      error (errortext, 500);
+    }
+    else
+    {
+      if (input->slice_group_map_type == 0)
+      {
+        input->run_length_minus1=(int *)malloc(sizeof(int)*(input->num_slice_groups_minus1+1));
+        if (NULL==input->run_length_minus1)
+          no_mem_exit("PatchInp: input->run_length_minus1");
+
+        // each line contains one 'run_length_minus1' value
+        for(i=0;i<=input->num_slice_groups_minus1;i++)
+        {
+          fscanf(sgfile,"%d",(input->run_length_minus1+i));
+          fscanf(sgfile,"%*[^\n]");
+        }
+      }
+      else if (input->slice_group_map_type == 2)
+      {
+        input->top_left=(int *)malloc(sizeof(int)*input->num_slice_groups_minus1);
+        input->bottom_right=(int *)malloc(sizeof(int)*input->num_slice_groups_minus1);
+        if (NULL==input->top_left)
+          no_mem_exit("PatchInp: input->top_left");
+        if (NULL==input->bottom_right)
+          no_mem_exit("PatchInp: input->bottom_right");
+
+        // every two lines contain 'top_left' and 'bottom_right' value
+        for(i=0;i<input->num_slice_groups_minus1;i++)
+        {
+          fscanf(sgfile,"%d",(input->top_left+i));
+          fscanf(sgfile,"%*[^\n]");
+          fscanf(sgfile,"%d",(input->bottom_right+i));
+          fscanf(sgfile,"%*[^\n]");
+        }
+      }
+      else if (input->slice_group_map_type == 6)
+      {
+        int tmp;
+
+        frame_mb_only = !(input->PicInterlace || input->MbInterlace);
+        mb_width= (input->img_width+img->auto_crop_right)>>4;
+        mb_height= (input->img_height+img->auto_crop_bottom)>>4;
+        mapunit_height=mb_height/(2-frame_mb_only);
+
+        input->slice_group_id=(byte * ) malloc(sizeof(byte)*mapunit_height*mb_width);
+        if (NULL==input->slice_group_id)
+          no_mem_exit("PatchInp: input->slice_group_id");
+
+        // each line contains slice_group_id for one Macroblock
+        for (i=0;i<mapunit_height*mb_width;i++)
+        {
+          fscanf(sgfile,"%d", &tmp);
+          input->slice_group_id[i]= (byte) tmp;
+          if ( *(input->slice_group_id+i) > input->num_slice_groups_minus1 )
+          {
+            snprintf(errortext, ET_SIZE, "Error read slice group information from file %s", input->SliceGroupConfigFileName);
+            error (errortext, 500);
+          }
+          fscanf(sgfile,"%*[^\n]");
+        }
+      }
+      fclose(sgfile);
+    }
+  }
+
+
+  if (input->ReferenceReorder && (input->PicInterlace || input->MbInterlace))
+  {
+    snprintf(errortext, ET_SIZE, "ReferenceReorder Not supported with Interlace encoding methods\n");
+    error (errortext, 400);
+  }
+
+  if (input->PocMemoryManagement && (input->PicInterlace || input->MbInterlace))
+  {
+    snprintf(errortext, ET_SIZE, "PocMemoryManagement not supported with Interlace encoding methods\n");
+    error (errortext, 400);
+  }
+
+  // frame/field consistency check
+  if (input->PicInterlace != FRAME_CODING && input->PicInterlace != ADAPTIVE_CODING && input->PicInterlace != FIELD_CODING)
+  {
+    snprintf (errortext, ET_SIZE, "Unsupported PicInterlace=%d, use frame based coding=0 or field based coding=1 or adaptive=2",input->PicInterlace);
+    error (errortext, 400);
+  }
+
+  // frame/field consistency check
+  if (input->MbInterlace != FRAME_CODING && input->MbInterlace != ADAPTIVE_CODING && input->MbInterlace != FIELD_CODING && input->MbInterlace != FRAME_MB_PAIR_CODING)
+  {
+    snprintf (errortext, ET_SIZE, "Unsupported MbInterlace=%d, use frame based coding=0 or field based coding=1 or adaptive=2 or frame MB pair only=3",input->MbInterlace);
+    error (errortext, 400);
+  }
+
+
+  if ((!input->rdopt)&&(input->MbInterlace))
+  {
+    snprintf(errortext, ET_SIZE, "MB AFF is not compatible with non-rd-optimized coding.");
+    error (errortext, 500);
+  }
+
+  /*if (input->rdopt>2)
+  {
+    snprintf(errortext, ET_SIZE, "RDOptimization=3 mode has been deactivated do to diverging of real and simulated decoders.");
+    error (errortext, 500);
+  }*/
+
+  // check RDoptimization mode and profile. FMD does not support Frex Profiles.
+  if (input->rdopt==2 && input->ProfileIDC>=FREXT_HP)
+  {
+    snprintf(errortext, ET_SIZE, "Fast Mode Decision methods does not support FREX Profiles");
+    error (errortext, 500);
+  }
+
+  if ( (input->MEErrorMetric[Q_PEL] == ERROR_SATD && input->MEErrorMetric[H_PEL] == ERROR_SAD && input->MEErrorMetric[F_PEL] == ERROR_SAD)
+    && input->SearchMode > FAST_FULL_SEARCH && input->SearchMode < EPZS)
+  {
+    snprintf(errortext, ET_SIZE, "MEDistortionQPel=2, MEDistortionHPel=0, MEDistortionFPel=0 is not allowed when SearchMode is set to 1 or 2.");
+    error (errortext, 500);
+  }
+
+  // Tian Dong: May 31, 2002
+  // The number of frames in one sub-seq in enhanced layer should not exceed
+  // the number of reference frame number.
+  if ( input->NumFramesInELSubSeq >= input->num_ref_frames || input->NumFramesInELSubSeq < 0 )
+  {
+    snprintf(errortext, ET_SIZE, "NumFramesInELSubSeq (%d) is out of range [0,%d).", input->NumFramesInELSubSeq, input->num_ref_frames);
+    error (errortext, 500);
+  }
+  // Tian Dong: Enhanced GOP is not supported in bitstream mode. September, 2002
+  if ( input->NumFramesInELSubSeq > 0 && input->of_mode == PAR_OF_ANNEXB )
+  {
+    snprintf(errortext, ET_SIZE, "Enhanced GOP is not supported in bitstream mode and RTP mode yet.");
+    error (errortext, 500);
+  }
+  // Tian Dong (Sept 2002)
+  // The AFF is not compatible with spare picture for the time being.
+  if ((input->PicInterlace || input->MbInterlace) && input->SparePictureOption == TRUE)
+  {
+    snprintf(errortext, ET_SIZE, "AFF is not compatible with spare picture.");
+    error (errortext, 500);
+  }
+
+  // Only the RTP mode is compatible with spare picture for the time being.
+  if (input->of_mode != PAR_OF_RTP && input->SparePictureOption == TRUE)
+  {
+    snprintf(errortext, ET_SIZE, "Only RTP output mode is compatible with spare picture features.");
+    error (errortext, 500);
+  }
+
+  if( (input->WeightedPrediction > 0 || input->WeightedBiprediction > 0) && (input->MbInterlace))
+  {
+    snprintf(errortext, ET_SIZE, "Weighted prediction coding is not supported for MB AFF currently.");
+    error (errortext, 500);
+  }
+  if ( input->NumFramesInELSubSeq > 0 && input->WeightedPrediction > 0)
+  {
+    snprintf(errortext, ET_SIZE, "Enhanced GOP is not supported in weighted prediction coding mode yet.");
+    error (errortext, 500);
+  }
+
+  //! the number of slice groups is forced to be 1 for slice group type 3-5
+  if(input->num_slice_groups_minus1 > 0)
+  {
+    if( (input->slice_group_map_type >= 3) && (input->slice_group_map_type<=5) )
+      input->num_slice_groups_minus1 = 1;
+  }
+
+  // Rate control
+  if(input->RCEnable)
+  {
+    if ( ((input->img_height+img->auto_crop_bottom)*(input->img_width+img->auto_crop_right)/256)%input->basicunit!=0)
+    {
+      snprintf(errortext, ET_SIZE, "Frame size in macroblocks must be a multiple of BasicUnit.");
+      error (errortext, 500);
+    }
+
+    if ( (input->successive_Bframe || input->jumpd) && input->RCUpdateMode == RC_MODE_1 )
+    {
+      snprintf(errortext, ET_SIZE, "Use RC_MODE_1 only for all-intra coding.");
+      error (errortext, 500);
+    }
+  }
+
+  if ((input->successive_Bframe)&&(input->BRefPictures)&&(input->idr_enable)&&(input->intra_period)&&(input->pic_order_cnt_type!=0))
+  {
+    error("Stored B pictures combined with IDR pictures only supported in Picture Order Count type 0\n",-1000);
+  }
+
+  if( !input->direct_spatial_mv_pred_flag && input->num_ref_frames<2 && input->successive_Bframe >0)
+    error("temporal direct needs at least 2 ref frames\n",-1000);
+
+  // frext
+  if(input->Transform8x8Mode && input->sp_periodicity /*SP-frames*/)
+  {
+    snprintf(errortext, ET_SIZE, "\nThe new 8x8 mode is not implemented for sp-frames.");
+    error (errortext, 500);
+  }
+
+  if(input->Transform8x8Mode && (input->ProfileIDC<FREXT_HP || input->ProfileIDC>FREXT_Hi444))
+  {
+    snprintf(errortext, ET_SIZE, "\nTransform8x8Mode may be used only with ProfileIDC %d to %d.", FREXT_HP, FREXT_Hi444);
+    error (errortext, 500);
+  }
+  if(input->ScalingMatrixPresentFlag && (input->ProfileIDC<FREXT_HP || input->ProfileIDC>FREXT_Hi444))
+  {
+    snprintf(errortext, ET_SIZE, "\nScalingMatrixPresentFlag may be used only with ProfileIDC %d to %d.", FREXT_HP, FREXT_Hi444);
+    error (errortext, 500);
+  }
+
+  if(input->yuv_format==YUV422 && input->ProfileIDC < FREXT_Hi422)
+  {
+    snprintf(errortext, ET_SIZE, "\nFRExt Profile(YUV Format) Error!\nYUV422 can be used only with ProfileIDC %d or %d\n",FREXT_Hi422, FREXT_Hi444);
+    error (errortext, 500);
+  }
+  if(input->yuv_format==YUV444 && input->ProfileIDC < FREXT_Hi444)
+  {
+    snprintf(errortext, ET_SIZE, "\nFRExt Profile(YUV Format) Error!\nYUV444 can be used only with ProfileIDC %d.\n",FREXT_Hi444);
+    error (errortext, 500);
+  }
+
+  if (input->successive_Bframe && ((input->BiPredMotionEstimation) && (input->search_range < input->BiPredMESearchRange)))
+  {
+    snprintf(errortext, ET_SIZE, "\nBiPredMESearchRange must be smaller or equal SearchRange.");
+    error (errortext, 500);
+  }
+
+  // check consistency
+  if ( input->ChromaMEEnable && !(input->ChromaMCBuffer) ) {
+    snprintf(errortext, ET_SIZE, "\nChromaMCBuffer must be set to 1 if ChromaMEEnable is set.");
+    error (errortext, 500);
+  }
+
+  if ( input->ChromaMEEnable && input->yuv_format ==  YUV400) {
+    snprintf(errortext, ET_SIZE, "\nChromaMEEnable cannot be used with YUV400 color format.");
+    input->ChromaMEEnable = 0;
+  }
+
+  if (input->EnableOpenGOP && input->PicInterlace)
+  {
+    snprintf(errortext, ET_SIZE, "Open GOP currently not supported for Field coded pictures.");
+    error (errortext, 500);
+  }
+
+  if (input->EnableOpenGOP)
+    input->ReferenceReorder = 1;
+
+  if (input->redundant_pic_flag)
+  {
+    if (input->PicInterlace || input->MbInterlace)
+    {
+      snprintf(errortext, ET_SIZE, "Redundant pictures cannot be used with interlaced tools.");
+      error (errortext, 500);
+    }
+    if (input->RDPictureDecision)
+    {
+      snprintf(errortext, ET_SIZE, "Redundant pictures cannot be used with RDPictureDecision.");
+      error (errortext, 500);
+    }
+    if (input->successive_Bframe)
+    {
+      snprintf(errortext, ET_SIZE, "Redundant pictures cannot be used with B frames.");
+      error (errortext, 500);
+    }
+    if (input->PrimaryGOPLength < (1 << input->NumRedundantHierarchy))
+    {
+      snprintf(errortext, ET_SIZE, "PrimaryGOPLength must be equal or greater than 2^NumRedundantHierarchy.");
+      error (errortext, 500);
+    }
+    if (input->num_ref_frames < input->PrimaryGOPLength)
+    {
+      snprintf(errortext, ET_SIZE, "NumberReferenceFrames must be greater than or equal to PrimaryGOPLength.");
+      error (errortext, 500);
+    }
+  }
+
+  if (input->num_ref_frames == 1 && input->successive_Bframe)
+  {
+    fprintf( stderr, "\nWarning: B slices used but only one reference allocated within reference buffer.\n");
+    fprintf( stderr, "         Performance may be considerably compromised! \n");
+    fprintf( stderr, "         2 or more references recommended for use with B slices.\n");
+  }
+  if ((input->HierarchicalCoding || input->BRefPictures) && input->successive_Bframe)
+  {
+    fprintf( stderr, "\nWarning: Hierarchical coding or Referenced B slices used.\n");
+    fprintf( stderr, "         Make sure that you have allocated enough references\n");
+    fprintf( stderr, "         in reference buffer to achieve best performance.\n");
+  }
+
+  ProfileCheck();
+  LevelCheck();
+}
+
+void PatchInputNoFrames(void)
+{
+  // Tian Dong: May 31, 2002
+  // If the frames are grouped into two layers, "FramesToBeEncoded" in the config file
+  // will give the number of frames which are in the base layer. Here we let input->no_frames
+  // be the total frame numbers.
+  input->no_frames = 1+ (input->no_frames-1) * (input->NumFramesInELSubSeq+1);
+  if ( input->NumFrameIn2ndIGOP )
+    input->NumFrameIn2ndIGOP = 1+(input->NumFrameIn2ndIGOP-1) * (input->NumFramesInELSubSeq+1);
+  FirstFrameIn2ndIGOP = input->no_frames;
+}
+
+static void ProfileCheck(void)
+{
+  if((input->ProfileIDC != 66 ) &&
+     (input->ProfileIDC != 77 ) &&
+     (input->ProfileIDC != 88 ) &&
+     (input->ProfileIDC != FREXT_HP    ) &&
+     (input->ProfileIDC != FREXT_Hi10P ) &&
+     (input->ProfileIDC != FREXT_Hi422 ) &&
+     (input->ProfileIDC != FREXT_Hi444 ))
+  {
+    snprintf(errortext, ET_SIZE, "Profile must be baseline(66)/main(77)/extended(88) or FRExt (%d to %d).", FREXT_HP,FREXT_Hi444);
+    error (errortext, 500);
+  }
+
+  if ((input->partition_mode) && (input->symbol_mode==CABAC))
+  {
+    snprintf(errortext, ET_SIZE, "Data partitioning and CABAC is not supported in any profile.");
+    error (errortext, 500);
+  }
+
+  if (input->redundant_pic_flag)
+  {
+    if (input->ProfileIDC != 66)
+    {
+      snprintf(errortext, ET_SIZE, "Redundant pictures are only allowed in Baseline profile.");
+      error (errortext, 500);
+    }
+  }
+
+  if (input->partition_mode)
+  {
+    if (input->ProfileIDC != 88)
+    {
+      snprintf(errortext, ET_SIZE, "Data partitioning is only allowed in extended profile.");
+      error (errortext, 500);
+    }
+  }
+
+  if (input->ChromaIntraDisable && input->FastCrIntraDecision)
+  {
+    fprintf( stderr, "\n----------------------------------------------------------------------------------\n");
+    fprintf( stderr, "\n Warning: ChromaIntraDisable and FastCrIntraDecision cannot be combined together.\n Using only Chroma Intra DC mode.\n");
+    fprintf( stderr, "\n----------------------------------------------------------------------------------\n");
+    input->FastCrIntraDecision=0;
+  }
+
+  // baseline
+  if (input->ProfileIDC == 66 )
+  {
+    if ((input->successive_Bframe || input->BRefPictures==2) && input->PReplaceBSlice == 0)
+    {
+      snprintf(errortext, ET_SIZE, "B slices are not allowed in baseline.");
+      error (errortext, 500);
+    }
+    if (input->sp_periodicity)
+    {
+      snprintf(errortext, ET_SIZE, "SP pictures are not allowed in baseline.");
+      error (errortext, 500);
+    }
+    if (input->WeightedPrediction)
+    {
+      snprintf(errortext, ET_SIZE, "Weighted prediction is not allowed in baseline.");
+      error (errortext, 500);
+    }
+    if (input->WeightedBiprediction)
+    {
+      snprintf(errortext, ET_SIZE, "Weighted prediction is not allowed in baseline.");
+      error (errortext, 500);
+    }
+    if (input->symbol_mode == CABAC)
+    {
+      snprintf(errortext, ET_SIZE, "CABAC is not allowed in baseline.");
+      error (errortext, 500);
+    }
+  }
+
+  // main
+  if (input->ProfileIDC == 77 )
+  {
+    if (input->sp_periodicity)
+    {
+      snprintf(errortext, ET_SIZE, "SP pictures are not allowed in main.");
+      error (errortext, 500);
+    }
+    if (input->num_slice_groups_minus1)
+    {
+      snprintf(errortext, ET_SIZE, "num_slice_groups_minus1>0 (FMO) is not allowed in main.");
+      error (errortext, 500);
+    }
+  }
+
+  // extended
+  if (input->ProfileIDC == 88 )
+  {
+    if (!input->directInferenceFlag)
+    {
+      snprintf(errortext, ET_SIZE, "direct_8x8_inference flag must be equal to 1 in extended.");
+      error (errortext, 500);
+    }
+
+    if (input->symbol_mode == CABAC)
+    {
+      snprintf(errortext, ET_SIZE, "CABAC is not allowed in extended.");
+      error (errortext, 500);
+    }
+  }
+}
+
+static void LevelCheck(void)
+{
+  if ( (input->LevelIDC>=30) && (input->directInferenceFlag==0))
+  {
+    fprintf( stderr, "\nWarning: LevelIDC 3.0 and above require direct_8x8_inference to be set to 1. Please check your settings.\n");
+    input->directInferenceFlag=1;
+  }
+  if ( ((input->LevelIDC<21) || (input->LevelIDC>41)) && (input->PicInterlace > 0 || input->MbInterlace > 0) )
+  {
+    snprintf(errortext, ET_SIZE, "\nInterlace modes only supported for LevelIDC in the range of 2.1 and 4.1. Please check your settings.\n");
+    error (errortext, 500);
+  }
+
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/configfile.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/configfile.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/configfile.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,324 @@
+
+/*!
+ ***********************************************************************
+ *  \file
+ *     configfile.h
+ *  \brief
+ *     Prototypes for configfile.c and definitions of used structures.
+ ***********************************************************************
+ */
+
+#include "fmo.h"
+
+#ifndef _CONFIGFILE_H_
+#define _CONFIGFILE_H_
+
+#define DEFAULTCONFIGFILENAME "encoder.cfg"
+
+#define PROFILE_IDC     88
+#define LEVEL_IDC       21
+
+
+typedef struct {
+  char *TokenName;
+  void *Place;
+  int Type;
+  double Default;
+  int param_limits; //! 0: no limits, 1: both min and max, 2: only min (i.e. no negatives), 3: special case for QPs since min needs bitdepth_qp_scale
+  double min_limit;
+  double max_limit;
+} Mapping;
+
+
+
+InputParameters configinput;
+
+
+#ifdef INCLUDED_BY_CONFIGFILE_C
+// Mapping_Map Syntax:
+// {NAMEinConfigFile,  &configinput.VariableName, Type, InitialValue, LimitType, MinLimit, MaxLimit}
+// Types : {0:int, 1:text, 2: double}
+// LimitType: {0:none, 1:both, 2:minimum, 3: QP based}
+// We could separate this based on types to make it more flexible and allow also defaults for text types.
+Mapping Map[] = {
+    {"ProfileIDC",               &configinput.ProfileIDC,                   0,   (double) PROFILE_IDC,      0,  0.0,              0.0              },
+    {"LevelIDC",                 &configinput.LevelIDC,                     0,   (double) LEVEL_IDC,        0,  0.0,              0.0              },
+    {"FrameRate",                &configinput.FrameRate,                    2,   (double) INIT_FRAME_RATE,  1,  0.0,            100.0              },
+    {"IDRIntraEnable",           &configinput.idr_enable,                   0,   0.0,                       1,  0.0,              1.0              },
+    {"ResendSPS",                &configinput.ResendSPS,                    0,   0.0,                       1,  0.0,              1.0              },
+    {"StartFrame",               &configinput.start_frame,                  0,   0.0,                       2,  0.0,              0.0              },
+    {"IntraPeriod",              &configinput.intra_period,                 0,   0.0,                       2,  0.0,              0.0              },
+    {"EnableOpenGOP",            &configinput.EnableOpenGOP,                0,   0.0,                       1,  0.0,              1.0              },
+    {"FramesToBeEncoded",        &configinput.no_frames,                    0,   1.0,                       2,  1.0,              0.0              },
+    {"QPISlice",                 &configinput.qp0,                          0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+    {"QPPSlice",                 &configinput.qpN,                          0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+    {"QPBSlice",                 &configinput.qpB,                          0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+    {"FrameSkip",                &configinput.jumpd,                        0,   0.0,                       2,  0.0,              0.0              },
+    {"DisableSubpelME",          &configinput.DisableSubpelME,              0,   0.0,                       1,  0.0,              1.0              },
+    {"SearchRange",              &configinput.search_range,                 0,   16.0,                      2,  0.0,              0.0              },
+    {"NumberReferenceFrames",    &configinput.num_ref_frames,               0,   1.0,                       1,  1.0,             16.0              },
+    {"PList0References",         &configinput.P_List0_refs,                 0,   0.0,                       1,  0.0,             16.0              },
+    {"BList0References",         &configinput.B_List0_refs,                 0,   0.0,                       1,  0.0,             16.0              },
+    {"BList1References",         &configinput.B_List1_refs,                 0,   1.0,                       1,  0.0,             16.0              },
+    {"Log2MaxFNumMinus4",        &configinput.Log2MaxFNumMinus4,            0,   0.0,                       1, -1.0,             12.0              },
+    {"Log2MaxPOCLsbMinus4",      &configinput.Log2MaxPOCLsbMinus4,          0,   2.0,                       1, -1.0,             12.0              },
+    {"GenerateMultiplePPS",      &configinput.GenerateMultiplePPS,          0,   0.0,                       1,  0.0,              1.0              },
+    {"Generate_SEIVUI",          &configinput.Generate_SEIVUI,              0,   0.0,                       1,  0.0,              1.0              },
+    {"SEIMessageText",           &configinput.SEIMessageText,               1,   0.0,                       0,  0.0,              0.0              },
+    {"ResendPPS",                &configinput.ResendPPS,                    0,   0.0,                       1,  0.0,              1.0              },
+    {"SourceWidth",              &configinput.img_width,                    0,   176.0,                     2, 16.0,              0.0              },
+    {"SourceHeight",             &configinput.img_height,                   0,   144.0,                     2, 16.0,              0.0              },
+    {"MbLineIntraUpdate",        &configinput.intra_upd,                    0,   0.0,                       1,  0.0,              1.0              },
+    {"SliceMode",                &configinput.slice_mode,                   0,   0.0,                       1,  0.0,              3.0              },
+    {"SliceArgument",            &configinput.slice_argument,               0,   1.0,                       2,  1.0,              1.0              },
+    {"UseConstrainedIntraPred",  &configinput.UseConstrainedIntraPred,      0,   0.0,                       1,  0.0,              1.0              },
+    {"InputFile",                &configinput.infile,                       1,   0.0,                       0,  0.0,              0.0              },
+    {"InputHeaderLength",        &configinput.infile_header,                0,   0.0,                       2,  0.0,              1.0              },
+    {"OutputFile",               &configinput.outfile,                      1,   0.0,                       0,  0.0,              0.0              },
+    {"ReconFile",                &configinput.ReconFile,                    1,   0.0,                       0,  0.0,              0.0              },
+    {"TraceFile",                &configinput.TraceFile,                    1,   0.0,                       0,  0.0,              0.0              },
+    {"DisposableP",              &configinput.DisposableP,                  0,   0.0,                       1,  0.0,              1.0              },
+    {"DispPQPOffset",            &configinput.DispPQPOffset,                0,   0.0,                       0,-51.0,             51.0              },
+    {"NumberBFrames",            &configinput.successive_Bframe,            0,   0.0,                       2,  0.0,              0.0              },
+    {"PReplaceBSlice",           &configinput.PReplaceBSlice,               0,   0.0,                       1,  0.0,              1.0              },
+    {"BRefPicQPOffset",          &configinput.qpBRSOffset,                  0,   0.0,                       0,-51.0,             51.0              },
+    {"DirectModeType",           &configinput.direct_spatial_mv_pred_flag,  0,   0.0,                       1,  0.0,              1.0              },
+    {"DirectInferenceFlag",      &configinput.directInferenceFlag,          0,   1.0,                       1,  0.0,              1.0              },
+    {"SPPicturePeriodicity",     &configinput.sp_periodicity,               0,   0.0,                       2,  0.0,              0.0              },
+    {"QPSPSlice",                &configinput.qpsp,                         0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+    {"QPSP2Slice",               &configinput.qpsp_pred,                    0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+    {"SI_FRAMES",                &configinput.si_frame_indicator,           0,   0.0,                       1,  0.0,              1.0              },
+    {"SP_output",                &configinput.sp_output_indicator,          0,   0.0,                       1,  0.0,              1.0              },
+    {"SP_output_name",           &configinput.sp_output_filename,           1,   0.0,                       0,  0.0,              0.0              },
+    {"SP2_FRAMES",               &configinput.sp2_frame_indicator,          0,   0.0,                       1,  0.0,              1.0              },
+    {"SP2_input_name1",          &configinput.sp2_input_filename1,          1,   0.0,                       0,  0.0,              0.0              },
+    {"SP2_input_name2",          &configinput.sp2_input_filename2,          1,   0.0,                       0,  0.0,              0.0              },
+    {"SymbolMode",               &configinput.symbol_mode,                  0,   0.0,                       1,  (double) UVLC,    (double) CABAC   },
+    {"OutFileMode",              &configinput.of_mode,                      0,   0.0,                       1,  0.0,              1.0              },
+    {"PartitionMode",            &configinput.partition_mode,               0,   0.0,                       1,  0.0,              1.0              },
+    {"InterSearch16x16",         &configinput.InterSearch16x16,             0,   1.0,                       1,  0.0,              1.0              },
+    {"InterSearch16x8",          &configinput.InterSearch16x8 ,             0,   1.0,                       1,  0.0,              1.0              },
+    {"InterSearch8x16",          &configinput.InterSearch8x16,              0,   1.0,                       1,  0.0,              1.0              },
+    {"InterSearch8x8",           &configinput.InterSearch8x8 ,              0,   1.0,                       1,  0.0,              1.0              },
+    {"InterSearch8x4",           &configinput.InterSearch8x4,               0,   1.0,                       1,  0.0,              1.0              },
+    {"InterSearch4x8",           &configinput.InterSearch4x8,               0,   1.0,                       1,  0.0,              1.0              },
+    {"InterSearch4x4",           &configinput.InterSearch4x4,               0,   1.0,                       1,  0.0,              1.0              },
+    {"IntraDisableInterOnly",    &configinput.IntraDisableInterOnly,        0,   0.0,                       1,  0.0,              1.0              },
+    {"Intra4x4ParDisable",       &configinput.Intra4x4ParDisable,           0,   0.0,                       1,  0.0,              1.0              },
+    {"Intra4x4DiagDisable",      &configinput.Intra4x4DiagDisable,          0,   0.0,                       1,  0.0,              1.0              },
+    {"Intra4x4DirDisable",       &configinput.Intra4x4DirDisable,           0,   0.0,                       1,  0.0,              1.0              },
+    {"Intra16x16ParDisable",     &configinput.Intra16x16ParDisable,         0,   0.0,                       1,  0.0,              1.0              },
+    {"Intra16x16PlaneDisable",   &configinput.Intra16x16PlaneDisable,       0,   0.0,                       1,  0.0,              1.0              },
+    {"EnableIPCM",               &configinput.EnableIPCM,                   0,   1.0,                       1,  0.0,              1.0              },
+    {"ChromaIntraDisable",       &configinput.ChromaIntraDisable,           0,   0.0,                       1,  0.0,              1.0              },
+
+#ifdef _FULL_SEARCH_RANGE_
+    {"RestrictSearchRange",      &configinput.full_search,                  0,   2.0,                       1,  0.0,              2.0              },
+#endif
+#ifdef _ADAPT_LAST_GROUP_
+    {"LastFrameNumber",          &configinput.last_frame,                   0,   0.0,                       2,  0.0,              0.0              },
+#endif
+#ifdef _CHANGE_QP_
+    {"ChangeQPI",                &configinput.qp02,                         0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+    {"ChangeQPP",                &configinput.qpN2,                         0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+    {"ChangeQPB",                &configinput.qpB2,                         0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+    {"ChangeQPBSRefOffset",      &configinput.qpBRS2Offset,                 0,   0.0,                       1,-51.0,             51.0              },
+    {"ChangeQPStart",            &configinput.qp2start,                     0,   0.0,                       2,  0.0,              0.0              },
+#endif
+    {"RDOptimization",           &configinput.rdopt,                        0,   0.0,                       1,  0.0,              3.0              },
+    {"CtxAdptLagrangeMult",      &configinput.CtxAdptLagrangeMult,          0,   0.0,                       1,  0.0,              1.0              },
+    {"FastCrIntraDecision",      &configinput.FastCrIntraDecision,          0,   0.0,                       1,  0.0,              1.0              },
+    {"DisableThresholding",      &configinput.disthres,                     0,   0.0,                       1,  0.0,              1.0              },
+    {"DisableBSkipRDO",          &configinput.nobskip,                      0,   0.0,                       1,  0.0,              1.0              },
+    {"LossRateA",                &configinput.LossRateA,                    0,   0.0,                       2,  0.0,              0.0              },
+    {"LossRateB",                &configinput.LossRateB,                    0,   0.0,                       2,  0.0,              0.0              },
+    {"LossRateC",                &configinput.LossRateC,                    0,   0.0,                       2,  0.0,              0.0              },
+    {"NumberOfDecoders",         &configinput.NoOfDecoders,                 0,   0.0,                       2,  0.0,              0.0              },
+    {"RestrictRefFrames",        &configinput.RestrictRef ,                 0,   0.0,                       1,  0.0,              1.0              },
+#ifdef _LEAKYBUCKET_
+    {"NumberofLeakyBuckets",     &configinput.NumberLeakyBuckets,           0,   2.0,                       1,  2.0,              255.0            },
+    {"LeakyBucketRateFile",      &configinput.LeakyBucketRateFile,          1,   0.0,                       0,  0.0,              0.0              },
+    {"LeakyBucketParamFile",     &configinput.LeakyBucketParamFile,         1,   0.0,                       0,  0.0,              0.0              },
+#endif
+    {"PicInterlace",             &configinput.PicInterlace,                 0,   0.0,                       1,  0.0,              2.0              },
+    {"MbInterlace",              &configinput.MbInterlace,                  0,   0.0,                       1,  0.0,              3.0              },
+
+    {"IntraBottom",              &configinput.IntraBottom,                  0,   0.0,                       1,  0.0,              1.0              },
+
+    {"NumberFramesInEnhancementLayerSubSequence", &configinput.NumFramesInELSubSeq, 0,   0.0,               2,  0.0,              0.0              },
+    {"NumberOfFrameInSecondIGOP",&configinput.NumFrameIn2ndIGOP,            0,   0.0,                       2,  0.0,              0.0              },
+    {"RandomIntraMBRefresh",     &configinput.RandomIntraMBRefresh,         0,   0.0,                       2,  0.0,              0.0              },
+
+    {"WeightedPrediction",       &configinput.WeightedPrediction,           0,   0.0,                       1,  0.0,              1.0              },
+    {"WeightedBiprediction",     &configinput.WeightedBiprediction,         0,   0.0,                       1,  0.0,              2.0              },
+    {"UseWeightedReferenceME",   &configinput.UseWeightedReferenceME,       0,   0.0,                       1,  0.0,              1.0              },
+    {"RDPictureDecision",        &configinput.RDPictureDecision,            0,   0.0,                       1,  0.0,              1.0              },
+    {"RDPictureIntra",           &configinput.RDPictureIntra,               0,   0.0,                       1,  0.0,              1.0              },
+    {"RDPSliceWeightOnly",       &configinput.RDPSliceWeightOnly,           0,   1.0,                       1,  0.0,              1.0              },
+    {"RDPSliceBTest",            &configinput.RDPSliceBTest,                0,   0.0,                       1,  0.0,              1.0              },
+    {"RDBSliceWeightOnly",       &configinput.RDBSliceWeightOnly,           0,   0.0,                       1,  0.0,              1.0              },
+
+    {"SkipIntraInInterSlices",   &configinput.SkipIntraInInterSlices,       0,   0.0,                       1,  0.0,              1.0              },
+    {"BReferencePictures",       &configinput.BRefPictures,                 0,   0.0,                       1,  0.0,              2.0              },
+    {"HierarchicalCoding",       &configinput.HierarchicalCoding,           0,   0.0,                       1,  0.0,              3.0              },
+    {"HierarchyLevelQPEnable",   &configinput.HierarchyLevelQPEnable,       0,   0.0,                       1,  0.0,              1.0              },
+    {"ExplicitHierarchyFormat",  &configinput.ExplicitHierarchyFormat,      1,   0.0,                       0,  0.0,              0.0              },
+    {"ReferenceReorder",         &configinput.ReferenceReorder,             0,   0.0,                       1,  0.0,              1.0              },
+    {"PocMemoryManagement",      &configinput.PocMemoryManagement,          0,   0.0,                       1,  0.0,              1.0              },
+
+    //Bipredicting Motion Estimation parameters
+    {"BiPredMotionEstimation",   &configinput.BiPredMotionEstimation,       0,   0.0,                       1,  0.0,              1.0              },
+    {"BiPredMERefinements",      &configinput.BiPredMERefinements,          0,   0.0,                       1,  0.0,              5.0              },
+    {"BiPredMESearchRange",      &configinput.BiPredMESearchRange,          0,   8.0,                       2,  0.0,              0.0              },
+    {"BiPredMESubPel",           &configinput.BiPredMESubPel,               0,   1.0,                       1,  0.0,              2.0              },
+
+    {"LoopFilterParametersFlag", &configinput.LFSendParameters,             0,   0.0,                       1,  0.0,              1.0              },
+    {"LoopFilterDisable",        &configinput.LFDisableIdc,                 0,   0.0,                       1,  0.0,              2.0              },
+    {"LoopFilterAlphaC0Offset",  &configinput.LFAlphaC0Offset,              0,   0.0,                       1, -6.0,              6.0              },
+    {"LoopFilterBetaOffset",     &configinput.LFBetaOffset,                 0,   0.0,                       1, -6.0,              6.0              },
+    {"SparePictureOption",       &configinput.SparePictureOption,           0,   0.0,                       1,  0.0,              1.0              },
+    {"SparePictureDetectionThr", &configinput.SPDetectionThreshold,         0,   0.0,                       2,  0.0,              0.0              },
+    {"SparePicturePercentageThr",&configinput.SPPercentageThreshold,        0,   0.0,                       2,  0.0,            100.0              },
+
+    {"num_slice_groups_minus1",  &configinput.num_slice_groups_minus1,      0,   0.0,                       1,  0.0,  (double)MAXSLICEGROUPIDS - 1 },
+    {"slice_group_map_type",     &configinput.slice_group_map_type,         0,   0.0,                       1,  0.0,              6.0              },
+    {"slice_group_change_direction_flag", &configinput.slice_group_change_direction_flag, 0,   0.0,         1,  0.0,              2.0              },
+    {"slice_group_change_rate_minus1",    &configinput.slice_group_change_rate_minus1,    0,   0.0,         2,  0.0,              1.0              },
+    {"SliceGroupConfigFileName", &configinput.SliceGroupConfigFileName,     1,   0.0,                       0,  0.0,              0.0              },
+
+    {"UseRedundantPicture",      &configinput.redundant_pic_flag,           0,   0.0,                       1,  0.0,              1.0              },
+    {"NumRedundantHierarchy",    &configinput.NumRedundantHierarchy,        0,   0.0,                       1,  0.0,              4.0              },
+    {"PrimaryGOPLength",         &configinput.PrimaryGOPLength,             0,   1.0,                       1,  1.0,              16.0             },
+    {"NumRefPrimary",            &configinput.NumRefPrimary,                0,   1.0,                       1,  1.0,              16.0             },
+
+    {"PicOrderCntType",          &configinput.pic_order_cnt_type,           0,   0.0,                       1,  0.0,              2.0              },
+
+    {"ContextInitMethod",        &configinput.context_init_method,          0,   0.0,                       1,  0.0,              1.0              },
+    {"FixedModelNumber",         &configinput.model_number,                 0,   0.0,                       1,  0.0,              2.0              },
+
+    {"Transform8x8Mode",         &configinput.Transform8x8Mode,             0,   0.0,                       1,  0.0,              2.0              },
+    {"ReportFrameStats",         &configinput.ReportFrameStats,             0,   0.0,                       1,  0.0,              1.0              },
+    {"DisplayEncParams",         &configinput.DisplayEncParams,             0,   0.0,                       1,  0.0,              1.0              },
+    {"Verbose",                  &configinput.Verbose,                      0,   1.0,                       1,  0.0,              2.0              },
+    // Rate Control
+    {"RateControlEnable",        &configinput.RCEnable,                     0,   0.0,                       1,  0.0,              1.0              },
+    {"Bitrate",                  &configinput.bit_rate,                     0,   0.0,                       2,  0.0,              0.0              },
+    {"InitialQP",                &configinput.SeinitialQP,                  0,   0.0,                       3,  (double) MIN_QP,  (double) MAX_QP  },
+    {"BasicUnit",                &configinput.basicunit,                    0,   0.0,                       2,  0.0,              0.0              },
+    {"ChannelType",              &configinput.channel_type,                 0,   0.0,                       1,  0.0,              1.0              },
+    {"RCUpdateMode",             &configinput.RCUpdateMode,                 0,   0.0,                       1,  0.0,              4.0              },
+    {"RCISliceBitRatio",         &configinput.RCISliceBitRatio,             2,   1.0,                       1,  0.0,              1.0              },
+    {"RCBSliceBitRatio0",        &configinput.RCBSliceBitRatio[0],          2,   0.5,                       1,  0.0,              1.0              },
+    {"RCBSliceBitRatio1",        &configinput.RCBSliceBitRatio[1],          2,   0.25,                      1,  0.0,              1.0              },
+    {"RCBSliceBitRatio2",        &configinput.RCBSliceBitRatio[2],          2,   0.25,                      1,  0.0,              1.0              },
+    {"RCBSliceBitRatio3",        &configinput.RCBSliceBitRatio[3],          2,   0.25,                      1,  0.0,              1.0              },
+    {"RCBSliceBitRatio4",        &configinput.RCBSliceBitRatio[4],          2,   0.25,                      1,  0.0,              1.0              },
+    {"RCBoverPRatio",            &configinput.RCBoverPRatio,                2,   0.45,                      1,  0.0,              1000.0           },
+    {"RCIoverPRatio",            &configinput.RCIoverPRatio,                2,   3.80,                      1,  0.0,              1000.0           },
+    // Q_Matrix
+    {"QmatrixFile",              &configinput.QmatrixFile,                  1,   0.0,                       0,  0.0,              0.0              },
+    {"ScalingMatrixPresentFlag", &configinput.ScalingMatrixPresentFlag,     0,   0.0,                       1,  0.0,              3.0              },
+    {"ScalingListPresentFlag0",  &configinput.ScalingListPresentFlag[0],    0,   0.0,                       1,  0.0,              3.0              },
+    {"ScalingListPresentFlag1",  &configinput.ScalingListPresentFlag[1],    0,   0.0,                       1,  0.0,              3.0              },
+    {"ScalingListPresentFlag2",  &configinput.ScalingListPresentFlag[2],    0,   0.0,                       1,  0.0,              3.0              },
+    {"ScalingListPresentFlag3",  &configinput.ScalingListPresentFlag[3],    0,   0.0,                       1,  0.0,              3.0              },
+    {"ScalingListPresentFlag4",  &configinput.ScalingListPresentFlag[4],    0,   0.0,                       1,  0.0,              3.0              },
+    {"ScalingListPresentFlag5",  &configinput.ScalingListPresentFlag[5],    0,   0.0,                       1,  0.0,              3.0              },
+    {"ScalingListPresentFlag6",  &configinput.ScalingListPresentFlag[6],    0,   0.0,                       1,  0.0,              3.0              },
+    {"ScalingListPresentFlag7",  &configinput.ScalingListPresentFlag[7],    0,   0.0,                       1,  0.0,              3.0              },
+    // Fast ME enable
+    {"SearchMode",               &configinput.SearchMode,                   0,   0.0,                       1, -1.0,              3.0              },
+    // Parameters for UMHEX control
+    {"UMHexDSR",                 &configinput.UMHexDSR,                     0,   1.0,                       1,  0.0,              1.0              },
+    {"UMHexScale",               &configinput.UMHexScale,                   0,   1.0,                       0,  0.0,              0.0              },
+    // Parameters for EPZS control
+    {"EPZSPattern",              &configinput.EPZSPattern,                  0,   2.0,                       1,  0.0,              5.0              },
+    {"EPZSDualRefinement",       &configinput.EPZSDual,                     0,   3.0,                       1,  0.0,              6.0              },
+    {"EPZSFixedPredictors",      &configinput.EPZSFixed,                    0,   2.0,                       1,  0.0,              2.0              },
+    {"EPZSTemporal",             &configinput.EPZSTemporal,                 0,   1.0,                       1,  0.0,              1.0              },
+    {"EPZSSpatialMem",           &configinput.EPZSSpatialMem,               0,   1.0,                       1,  0.0,              1.0              },
+    {"EPZSMinThresScale",        &configinput.EPZSMinThresScale,            0,   0.0,                       0,  0.0,              0.0              },
+    {"EPZSMaxThresScale",        &configinput.EPZSMaxThresScale,            0,   2.0,                       0,  0.0,              0.0              },
+    {"EPZSMedThresScale",        &configinput.EPZSMedThresScale,            0,   1.0,                       0,  0.0,              0.0              },
+    {"EPZSSubPelME",             &configinput.EPZSSubPelME,                 0,   1.0,                       1,  0.0,              1.0              },
+    {"EPZSSubPelMEBiPred",       &configinput.EPZSSubPelMEBiPred,           0,   1.0,                       1,  0.0,              1.0              },
+    {"EPZSSubPelGrid",           &configinput.EPZSSubPelGrid,               0,   0.0,                       1,  0.0,              1.0              },
+    {"EPZSSubPelThresScale",     &configinput.EPZSSubPelThresScale,         0,   2.0,                       0,  0.0,              0.0              },
+
+    // Chroma QP Offset
+    {"ChromaQPOffset",           &configinput.chroma_qp_index_offset,       0,   0.0,                       1,-51.0,             51.0              },
+
+    // Fidelity Range Extensions
+    {"BitDepthLuma",             &configinput.BitDepthLuma,                 0,   8.0,                       1,  8.0,             12.0              },
+    {"BitDepthChroma",           &configinput.BitDepthChroma,               0,   8.0,                       1,  8.0,             12.0              },
+    {"YUVFormat",                &configinput.yuv_format,                   0,   1.0,                       1,  0.0,              3.0              },
+    {"RGBInput",                 &configinput.rgb_input_flag,               0,   0.0,                       1,  0.0,              1.0              },
+    {"CbQPOffset",               &configinput.cb_qp_index_offset,           0,   0.0,                       1,-51.0,             51.0              },
+    {"CrQPOffset",               &configinput.cr_qp_index_offset,           0,   0.0,                       1,-51.0,             51.0              },
+
+    // Lossless Coding
+    {"QPPrimeYZeroTransformBypassFlag", &configinput.lossless_qpprime_y_zero_flag,      0,   0.0,           1,  0.0,              1.0              },
+
+    // Explicit Lambda Usage
+    {"UseExplicitLambdaParams",  &configinput.UseExplicitLambdaParams,      0,   0.0,                       1,  0.0,              3.0              },
+    {"FixedLambdaPslice",        &configinput.FixedLambda[0],               2,   0.1,                       2,  0.0,              0.0              },
+    {"FixedLambdaBslice",        &configinput.FixedLambda[1],               2,   0.1,                       2,  0.0,              0.0              },
+    {"FixedLambdaIslice",        &configinput.FixedLambda[2],               2,   0.1,                       2,  0.0,              0.0              },
+    {"FixedLambdaSPslice",       &configinput.FixedLambda[3],               2,   0.1,                       2,  0.0,              0.0              },
+    {"FixedLambdaSIslice",       &configinput.FixedLambda[4],               2,   0.1,                       2,  0.0,              0.0              },
+    {"FixedLambdaRefBslice",     &configinput.FixedLambda[5],               2,   0.1,                       2,  0.0,              0.0              },
+
+    {"LambdaWeightPslice",       &configinput.LambdaWeight[0],              2,   0.68,                      2,  0.0,              0.0              },
+    {"LambdaWeightBslice",       &configinput.LambdaWeight[1],              2,   2.00,                      2,  0.0,              0.0              },
+    {"LambdaWeightIslice",       &configinput.LambdaWeight[2],              2,   0.65,                      2,  0.0,              0.0              },
+    {"LambdaWeightSPslice",      &configinput.LambdaWeight[3],              2,   1.50,                      2,  0.0,              0.0              },
+    {"LambdaWeightSIslice",      &configinput.LambdaWeight[4],              2,   0.65,                      2,  0.0,              0.0              },
+    {"LambdaWeightRefBslice",    &configinput.LambdaWeight[5],              2,   1.50,                      2,  0.0,              0.0              },
+    {"QOffsetMatrixFile",        &configinput.QOffsetMatrixFile,            1,   0.0,                       0,  0.0,              0.0              },
+    {"OffsetMatrixPresentFlag",  &configinput.OffsetMatrixPresentFlag,      0,   0.0,                       1,  0.0,              1.0              },
+
+    // Fast Mode Decision
+    {"EarlySkipEnable",          &configinput.EarlySkipEnable,              0,   0.0,                       1,  0.0,              1.0              },
+    {"SelectiveIntraEnable",     &configinput.SelectiveIntraEnable,         0,   0.0,                       1,  0.0,              1.0              },
+
+    // Adaptive rounding technique based on JVT-N011
+    {"AdaptiveRounding",         &configinput.AdaptiveRounding,             0,   0.0,                       1,  0.0,              1.0              },
+    {"AdaptRndPeriod",           &configinput.AdaptRndPeriod,               0,  16.0,                       2,  0.0,              0.0              },
+    {"AdaptRndChroma",           &configinput.AdaptRndChroma,               0,   0.0,                       1,  0.0,              1.0              },
+    {"AdaptRndWFactorIRef",      &configinput.AdaptRndWFactor[1][I_SLICE],  0,   4.0,                       1,  0.0,           4096.0              },
+    {"AdaptRndWFactorPRef",      &configinput.AdaptRndWFactor[1][P_SLICE],  0,   4.0,                       1,  0.0,           4096.0              },
+    {"AdaptRndWFactorBRef",      &configinput.AdaptRndWFactor[1][B_SLICE],  0,   4.0,                       1,  0.0,           4096.0              },
+    {"AdaptRndWFactorINRef",     &configinput.AdaptRndWFactor[0][I_SLICE],  0,   4.0,                       1,  0.0,           4096.0              },
+    {"AdaptRndWFactorPNRef",     &configinput.AdaptRndWFactor[0][P_SLICE],  0,   4.0,                       1,  0.0,           4096.0              },
+    {"AdaptRndWFactorBNRef",     &configinput.AdaptRndWFactor[0][B_SLICE],  0,   4.0,                       1,  0.0,           4096.0              },
+
+    {"AdaptRndCrWFactorIRef",    &configinput.AdaptRndCrWFactor[1][I_SLICE],0,   4.0,                       1,  0.0,           4096.0              },
+    {"AdaptRndCrWFactorPRef",    &configinput.AdaptRndCrWFactor[1][P_SLICE],0,   4.0,                       1,  0.0,           4096.0              },
+    {"AdaptRndCrWFactorBRef",    &configinput.AdaptRndCrWFactor[1][B_SLICE],0,   4.0,                       1,  0.0,           4096.0              },
+    {"AdaptRndCrWFactorINRef",   &configinput.AdaptRndCrWFactor[0][I_SLICE],0,   4.0,                       1,  0.0,           4096.0              },
+    {"AdaptRndCrWFactorPNRef",   &configinput.AdaptRndCrWFactor[0][P_SLICE],0,   4.0,                       1,  0.0,           4096.0              },
+    {"AdaptRndCrWFactorBNRef",   &configinput.AdaptRndCrWFactor[0][B_SLICE],0,   4.0,                       1,  0.0,           4096.0              },
+
+    {"VUISupport",               &configinput.VUISupport,                   0,   0.0,                       1,  0.0,              1.0              },
+    {"ChromaMCBuffer",           &configinput.ChromaMCBuffer,               0,   0.0,                       1,  0.0,              1.0              },
+    {"ChromaMEEnable",           &configinput.ChromaMEEnable,               0,   0.0,                       1,  0.0,              2.0              },
+    {"MEDistortionFPel",         &configinput.MEErrorMetric[F_PEL],         0,   0.0,                       1,  0.0,              2.0              },
+    {"MEDistortionHPel",         &configinput.MEErrorMetric[H_PEL],         0,   0.0,                       1,  0.0,              2.0              },
+    {"MEDistortionQPel",         &configinput.MEErrorMetric[Q_PEL],         0,   0.0,                       1,  0.0,              2.0              },
+    {"MDDistortion",             &configinput.ModeDecisionMetric,           0,   2.0,                       1,  0.0,              2.0              },
+
+    {NULL,                       NULL,                                     -1,   0.0,                       0,  0.0,              0.0              }
+};
+
+#endif
+
+#ifndef INCLUDED_BY_CONFIGFILE_C
+extern Mapping Map[];
+#endif
+
+
+void Configure (int ac, char *av[]);
+void PatchInputNoFrames(void);
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/context_ini.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/context_ini.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/context_ini.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,365 @@
+
+/*!
+ *************************************************************************************
+ * \file context_ini.c
+ *
+ * \brief
+ *    CABAC context initializations
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Detlev Marpe                    <marpe at hhi.de>
+ *    - Heiko Schwarz                   <hschwarz at hhi.de>
+ **************************************************************************************
+ */
+
+#define CONTEXT_INI_C
+
+#include <stdlib.h>
+#include <math.h>
+
+#include "global.h"
+
+#include "ctx_tables.h"
+#include "cabac.h"
+
+#define DEFAULT_CTX_MODEL   0
+#define RELIABLE_COUNT      32.0
+#define FRAME_TYPES         4
+#define FIXED               0
+
+
+int                     num_mb_per_slice;
+int                     number_of_slices;
+int***                  initialized;
+int***                  model_number;
+
+
+double entropy    [128];
+double probability[128] =
+{
+  0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+  0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+  0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+  0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+  0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+  0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+  0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+  0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+  //--------------------------------------------------------------------------------
+  0.500000, 0.474609, 0.450507, 0.427629,    0.405912, 0.385299, 0.365732, 0.347159,
+  0.329530, 0.312795, 0.296911, 0.281833,    0.267520, 0.253935, 0.241039, 0.228799,
+  0.217180, 0.206151, 0.195682, 0.185744,    0.176312, 0.167358, 0.158859, 0.150792,
+  0.143134, 0.135866, 0.128966, 0.122417,    0.116200, 0.110299, 0.104698, 0.099381,
+  0.094334, 0.089543, 0.084996, 0.080680,    0.076583, 0.072694, 0.069002, 0.065498,
+  0.062172, 0.059014, 0.056018, 0.053173,    0.050473, 0.047909, 0.045476, 0.043167,
+  0.040975, 0.038894, 0.036919, 0.035044,    0.033264, 0.031575, 0.029972, 0.028450,
+  0.027005, 0.025633, 0.024332, 0.023096,    0.021923, 0.020810, 0.019753, 0.018750
+};
+
+
+
+void create_context_memory ()
+{
+  int i, j, k;
+  int num_mb    = img->FrameSizeInMbs; // number of macroblocks for frame
+
+  num_mb_per_slice  = (input->slice_mode==1 ? input->slice_argument : num_mb);
+  number_of_slices  = (num_mb + num_mb_per_slice - 1) / num_mb_per_slice;
+
+  if ((initialized  = (int***) malloc (3 * sizeof(int**))) == NULL)
+  {
+    no_mem_exit ("create_context_memory: initialized");
+  }
+  if ((model_number = (int***) malloc (3 * sizeof(int**))) == NULL)
+  {
+    no_mem_exit ("create_context_memory: model_number");
+  }
+
+  for (k=0; k<3; k++)
+  {
+    if ((initialized[k] = (int**) malloc (FRAME_TYPES * sizeof(int*))) == NULL)
+    {
+      no_mem_exit ("create_context_memory: initialized");
+    }
+    if ((model_number[k]= (int**) malloc (FRAME_TYPES * sizeof(int*))) == NULL)
+    {
+      no_mem_exit ("create_context_memory: model_number");
+    }
+
+    for (i=0; i<FRAME_TYPES; i++)
+    {
+      if ((initialized[k][i] = (int*) malloc (number_of_slices * sizeof(int))) == NULL)
+      {
+        no_mem_exit ("create_context_memory: initialized");
+      }
+      if ((model_number[k][i]= (int*) malloc (number_of_slices * sizeof(int))) == NULL)
+      {
+        no_mem_exit ("create_context_memory: model_number");
+      }
+    }
+  }
+
+  //===== set all context sets as "uninitialized" =====
+  for (k=0; k<3; k++)
+  {
+    for (i=0; i<FRAME_TYPES; i++)
+    {
+      for (j=0; j<number_of_slices; j++)
+      {
+        initialized[k][i][j] = 0;
+      }
+    }
+  }
+
+  //----- init tables -----
+  for( k=0; k<64; k++ )
+  {
+    probability[k] = 1.0 - probability[127-k];
+    entropy    [k] = log10(probability[    k])/log10(2.0);
+    entropy[127-k] = log10(probability[127-k])/log10(2.0);
+  }
+}
+
+
+
+
+void free_context_memory ()
+{
+  int i, k;
+
+  for (k=0; k<3; k++)
+  {
+    for (i=0; i<FRAME_TYPES; i++)
+    {
+      free (initialized [k][i]);
+      free (model_number[k][i]);
+    }
+    free (initialized [k]);
+    free (model_number[k]);
+  }
+  free (initialized);
+  free (model_number);
+}
+
+
+
+
+
+
+#define BIARI_CTX_INIT2(ii,jj,ctx,tab,num) \
+{ \
+  for (i=0; i<ii; i++) \
+  for (j=0; j<jj; j++) \
+  { \
+    if      (img->type==I_SLICE)  biari_init_context (&(ctx[i][j]), &(tab ## _I[num][i][j][0])); \
+    else                            biari_init_context (&(ctx[i][j]), &(tab ## _P[num][i][j][0])); \
+  } \
+}
+#define BIARI_CTX_INIT1(jj,ctx,tab,num) \
+{ \
+  for (j=0; j<jj; j++) \
+  { \
+    if      (img->type==I_SLICE)  biari_init_context (&(ctx[j]), &(tab ## _I[num][0][j][0])); \
+    else                            biari_init_context (&(ctx[j]), &(tab ## _P[num][0][j][0])); \
+  } \
+}
+
+
+
+void SetCtxModelNumber ()
+{
+  int frame_field = img->field_picture;
+  int img_type    = img->type;
+  int ctx_number  = img->currentSlice->start_mb_nr / num_mb_per_slice;
+
+  if(img->type==I_SLICE)
+  {
+    img->model_number=DEFAULT_CTX_MODEL;
+    return;
+  }
+  if(input->context_init_method==FIXED)
+  {
+    img->model_number=input->model_number;
+    return;
+  }
+
+  if (initialized [frame_field][img_type][ctx_number])
+  {
+    img->model_number = model_number[frame_field][img_type][ctx_number];
+  }
+  else if (ctx_number && initialized[frame_field][img_type][ctx_number-1])
+  {
+    img->model_number = model_number[frame_field][img_type][ctx_number-1];
+  }
+  else
+  {
+    img->model_number = DEFAULT_CTX_MODEL;
+  }
+}
+
+
+
+void init_contexts ()
+{
+  MotionInfoContexts*  mc = img->currentSlice->mot_ctx;
+  TextureInfoContexts* tc = img->currentSlice->tex_ctx;
+  int i, j;
+
+  //--- motion coding contexts ---
+  BIARI_CTX_INIT2 (3, NUM_MB_TYPE_CTX,   mc->mb_type_contexts,     INIT_MB_TYPE,    img->model_number);
+  BIARI_CTX_INIT2 (2, NUM_B8_TYPE_CTX,   mc->b8_type_contexts,     INIT_B8_TYPE,    img->model_number);
+  BIARI_CTX_INIT2 (2, NUM_MV_RES_CTX,    mc->mv_res_contexts,      INIT_MV_RES,     img->model_number);
+  BIARI_CTX_INIT2 (2, NUM_REF_NO_CTX,    mc->ref_no_contexts,      INIT_REF_NO,     img->model_number);
+  BIARI_CTX_INIT1 (   NUM_DELTA_QP_CTX,  mc->delta_qp_contexts,    INIT_DELTA_QP,   img->model_number);
+  BIARI_CTX_INIT1 (   NUM_MB_AFF_CTX,    mc->mb_aff_contexts,      INIT_MB_AFF,     img->model_number);
+  BIARI_CTX_INIT1 (   NUM_TRANSFORM_SIZE_CTX,  mc->transform_size_contexts,    INIT_TRANSFORM_SIZE,   img->model_number);
+
+  //--- texture coding contexts ---
+  BIARI_CTX_INIT1 (                 NUM_IPR_CTX,  tc->ipr_contexts,     INIT_IPR,       img->model_number);
+  BIARI_CTX_INIT1 (                 NUM_CIPR_CTX, tc->cipr_contexts,    INIT_CIPR,      img->model_number);
+  BIARI_CTX_INIT2 (3,               NUM_CBP_CTX,  tc->cbp_contexts,     INIT_CBP,       img->model_number);
+  BIARI_CTX_INIT2 (8,               NUM_BCBP_CTX, tc->bcbp_contexts,    INIT_BCBP,      img->model_number);
+  BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX,  tc->map_contexts,     INIT_MAP,       img->model_number);
+  BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->last_contexts,    INIT_LAST,      img->model_number);
+  BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ONE_CTX,  tc->one_contexts,     INIT_ONE,       img->model_number);
+  BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ABS_CTX,  tc->abs_contexts,     INIT_ABS,       img->model_number);
+  BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX,  tc->fld_map_contexts, INIT_FLD_MAP,   img->model_number);
+  BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->fld_last_contexts,INIT_FLD_LAST,  img->model_number);
+}
+
+
+
+
+
+double XRate (BiContextTypePtr ctx, const int* model)
+{
+  int     ctx_state, mod_state;
+  double  weight, xr = 0.0;
+  int     qp = imax(0,img->qp);
+
+  weight    = dmin (1.0, (double)ctx->count/(double)RELIABLE_COUNT);
+
+  mod_state = ((model[0]*qp)>>4)+model[1];
+  mod_state = iClip3(0, 127, mod_state);
+  ctx_state = (ctx->MPS ? 64+ctx->state : 63-ctx->state);
+
+  xr -= weight * probability[    ctx_state] * entropy[    mod_state];
+  xr -= weight * probability[127-ctx_state] * entropy[127-mod_state];
+
+  return xr;
+}
+
+#define ADD_XRATE2(ii,jj,ctx,tab,num) \
+{ \
+  for (i=0; i<ii; i++) \
+  for (j=0; j<jj; j++) \
+  { \
+    if      (img->type==I_SLICE)  xr += XRate (&(ctx[i][j]), &(tab ## _I[num][i][j][0])); \
+    else                            xr += XRate (&(ctx[i][j]), &(tab ## _P[num][i][j][0])); \
+  } \
+}
+#define ADD_XRATE1(jj,ctx,tab,num) \
+{ \
+  for (j=0; j<jj; j++) \
+  { \
+    if      (img->type==I_SLICE)  xr += XRate (&(ctx[j]), &(tab ## _I[num][0][j][0])); \
+    else                            xr += XRate (&(ctx[j]), &(tab ## _P[num][0][j][0])); \
+  } \
+}
+
+
+void GetCtxModelNumber (int* mnumber, MotionInfoContexts* mc, TextureInfoContexts* tc)
+{
+  int     model, j, i;
+  int     num_models = (img->type==I_SLICE ? NUM_CTX_MODELS_I : NUM_CTX_MODELS_P);
+  double  xr, min_xr = 1e30;
+
+  for (model=0; model<num_models; model++)
+  {
+    xr = 0.0;
+    //--- motion coding contexts ---
+    ADD_XRATE2 (3, NUM_MB_TYPE_CTX,   mc->mb_type_contexts,     INIT_MB_TYPE,   model);
+    ADD_XRATE2 (2, NUM_B8_TYPE_CTX,   mc->b8_type_contexts,     INIT_B8_TYPE,   model);
+    ADD_XRATE2 (2, NUM_MV_RES_CTX,    mc->mv_res_contexts,      INIT_MV_RES,    model);
+    ADD_XRATE2 (2, NUM_REF_NO_CTX,    mc->ref_no_contexts,      INIT_REF_NO,    model);
+    ADD_XRATE1 (   NUM_DELTA_QP_CTX,  mc->delta_qp_contexts,    INIT_DELTA_QP,  model);
+    ADD_XRATE1 (   NUM_MB_AFF_CTX,    mc->mb_aff_contexts,      INIT_MB_AFF,    model);
+    ADD_XRATE1 (   NUM_TRANSFORM_SIZE_CTX,  mc->transform_size_contexts, INIT_TRANSFORM_SIZE,  model);
+
+    //--- texture coding contexts ---
+    ADD_XRATE1 (                  NUM_IPR_CTX,  tc->ipr_contexts,       INIT_IPR,       model);
+    ADD_XRATE1 (                  NUM_CIPR_CTX, tc->cipr_contexts,      INIT_CIPR,      model);
+    ADD_XRATE2 (3,                NUM_CBP_CTX,  tc->cbp_contexts,       INIT_CBP,       model);
+    ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_BCBP_CTX, tc->bcbp_contexts,      INIT_BCBP,      model);
+    ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_MAP_CTX,  tc->map_contexts,       INIT_MAP,       model);
+    ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_LAST_CTX, tc->last_contexts,      INIT_LAST,      model);
+    ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_ONE_CTX,  tc->one_contexts,       INIT_ONE,       model);
+    ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_ABS_CTX,  tc->abs_contexts,       INIT_ABS,       model);
+    ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_MAP_CTX,  tc->fld_map_contexts,   INIT_FLD_MAP,   model);
+    ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_LAST_CTX, tc->fld_last_contexts,  INIT_FLD_LAST,  model);
+
+    if (xr<min_xr)
+    {
+      min_xr    = xr;
+      *mnumber  = model;
+    }
+  }
+}
+
+#undef ADD_XRATE2
+#undef ADD_XRATE1
+
+
+
+
+
+
+void store_contexts ()
+{
+  int frame_field = img->field_picture;
+  int img_type    = img->type;
+  int ctx_number  = img->currentSlice->start_mb_nr / num_mb_per_slice;
+
+  if( input->context_init_method )
+  {
+    initialized [frame_field][img_type][ctx_number] = 1;
+    GetCtxModelNumber (model_number[frame_field][img_type]+ctx_number, img->currentSlice->mot_ctx, img->currentSlice->tex_ctx);
+  }
+  else
+  {
+    // do nothing
+  }
+}
+
+
+void update_field_frame_contexts (int field)
+{
+  int i, j;
+
+  if (field)
+  {
+    // set frame contexts
+    for (j=0; j<FRAME_TYPES; j++)
+    {
+      for (i=0; i<number_of_slices; i++)
+      {
+        initialized [0][j][i] = initialized [1][j][i>>1];
+        model_number[0][j][i] = model_number[1][j][i>>1];
+      }
+    }
+  }
+  else
+  {
+    // set field contexts
+    for (j=0; j<FRAME_TYPES; j++)
+    {
+      for (i=0; i<((number_of_slices+1)>>1); i++)
+      {
+        initialized [1][j][i] = initialized [0][j][i<<1];
+        model_number[1][j][i] = model_number[0][j][i<<1];
+      }
+    }
+  }
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/context_ini.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/context_ini.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/context_ini.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,32 @@
+
+/*!
+ *************************************************************************************
+ * \file context_ini.h
+ *
+ * \brief
+ *    CABAC context initializations
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Detlev Marpe                    <marpe at hhi.de>
+ *    - Heiko Schwarz                   <hschwarz at hhi.de>
+ **************************************************************************************
+ */
+
+#ifndef _CONTEXT_INI_
+#define _CONTEXT_INI_
+
+
+void  create_context_memory (void);
+void  free_context_memory   (void);
+
+void  init_contexts  (void);
+void  store_contexts (void);
+
+void  update_field_frame_contexts (int);
+void  update_rd_picture_contexts  (int);
+
+void  SetCtxModelNumber (void);
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/contributors.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/contributors.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/contributors.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,194 @@
+
+/*! \file
+ *     contributors.h
+ *  \brief
+ *     List of contributors and copyright information.
+ *
+ *  \par Copyright statements
+    \verbatim
+   H.264 JM coder/decoder
+
+   Copyright (C) 2000 by
+      Telenor Broadband Services, Norway
+      Ericsson Radio Systems, Sweden
+      TELES AG, Germany
+      Nokia Inc., USA
+      Nokia Corporation, Finland
+      Siemens AG, Germany
+      Heinrich-Hertz-Institute for Communication Technology GmbH, Germany
+      University of Hannover, Institut of Communication Theory and Signal Processing,Germany
+      Videolocus, Canada
+      LSI Logic, Canada
+      Motorola Inc., USA
+      Microsoft Corp., USA
+      Apple Computer, Inc.
+      RealNetworks, Inc., USA
+      Thomson, Inc., USA
+      Dolby Laboratories, Inc., USA
+   \endverbatim
+   \par Full Contact Information
+   \verbatim
+
+      Lowell Winger                   <lwinger at videolocus.com><lwinger at uwaterloo.ca><lwinger at lsil.com>
+      Guy Côté                        <gcote at videolocus.com>
+      Michael Gallant                 <mgallant at videolocus.com>
+      VideoLocus Inc.
+      97 Randall Dr.
+      Waterloo, ON, Canada  N2V1C5
+
+      Inge Lille-Langøy               <inge.lille-langoy at telenor.com>
+      Telenor Broadband Services
+      P.O.Box 6914 St.Olavs plass
+      N-0130 Oslo, Norway
+
+      Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+      Ericsson Radio Systems
+      KI/ERA/T/VV
+      164 80 Stockholm, Sweden
+
+      Stephan Wenger                  <stewe at cs.tu-berlin.de>
+      TU Berlin / TELES AG
+      Sekr. FR 6-3
+      Franklinstr. 28-29
+      D-10587 Berlin, Germany
+
+      Jani Lainema                    <jani.lainema at nokia.com>
+      Ragip Kurceren                  <ragip.kurceren at nokia.com>
+      Nokia Inc. / Nokia Research Center
+      6000 Connection Drive
+      Irving, TX 75039, USA
+
+      Miska M. Hannuksela             <miska.hannuksela at nokia.com>
+      Nokia Corporation / Nokia Mobile Phones
+      P.O. Box 88
+      33721 Tampere, Finland
+
+      Sebastian Purreiter             <sebastian.purreiter at mch.siemens.de>
+      Siemens AG
+      ICM MD MP RD MCH 83
+      P.O.Box 80 17 07
+      D-81617 Munich, Germany
+
+      Thomas Wedi                     <wedi at tnt.uni-hannover.de>
+      University of Hannover
+      Institut of Communication Theory and Signal Processing
+      Appelstr. 9a
+      30167 Hannover
+
+      Thomas Stockhammer              <stockhammer at ei.tum.de>
+      Tobias Oelbaum (TO)             <drehvial at gmx.net>
+      Institute for Communications Engineering
+      Munich University of Technology
+      80290 Munich
+      Germany
+
+      Yann Le Maguet                  <yann.lemaguet at philips.com>
+      Philips Research France
+
+      Dong Tian                       <tian at cs.tut.fi>
+      Ye-Kui Wang                     <wyk at ieee.org>
+      Tampere University of Technology
+      Tampere International Center for Signal Processing
+      33720 Tampere, Finland
+
+      Karsten Suehring                <suehring at hhi.de>
+      Heiko Schwarz                   <hschwarz at hhi.de>
+      Detlev Marpe                    <marpe at hhi.de>
+      Guido Heising                   <heising at hhi.de>
+      Heinrich-Hertz-Institute
+      Einsteinufer 37
+      10587 Berlin
+      Germany
+
+      Limin Wang                      <liwang at gi.com>
+      Krit Panusopone                 <kpanusopone at gi.com>
+      Rajeev Gandhi                   <rgandhi at gi.com>
+      Yue Yu                          <yyu at gi.com>
+      Motorola Inc.
+      6450 Sequence Drive
+      San Diego, CA 92121 USA
+
+      Feng Wu                         <fengwu at microsoft.com>
+      Xiaoyan Sun                     <sunxiaoyan at msrchina.research.microsoft.com>
+      Microsoft Research Asia
+      3/F, Beijing Sigma Center
+      No.49, Zhichun Road, Hai Dian District,
+      Beijing China 100080
+
+      Mathias Wien                    <wien at ient.rwth-aachen.de>
+      Achim Dahlhoff                  <dahlhoff at ient.rwth-aachen.de>
+      Institut und Lehrstuhl für Nachrichtentechnik
+      RWTH Aachen University
+      52072 Aachen
+      Germany
+
+      Yoshihiro Kikuchi               <yoshihiro.kikuchi at toshiba.co.jp>
+      Takeshi Chujoh                  <takeshi.chujoh at toshiba.co.jp>
+      Toshiba Corporation
+      Research and Development Center
+      Kawasaki 212-8582, Japan
+
+      Shinya Kadono                   <kadono at drl.mei.co.jp>
+      Matsushita Electric Industrial Co., Ltd.
+      1006 Kadoma, Kadoma
+      Osaka 663-8113, Japan
+
+      Dzung Hoang                     <dthoang at yahoo.com>
+      10533 Roy Butler Dr.
+      Austin, TX 78717
+
+      Eric Viscito                    <eric at ev-consulting.com>
+      eV Consulting
+      52 Tracy Ln
+      Shelburne, VT 05482 USA
+
+      Dzung Hoang                     <dzung.hoang at xilient.com>
+      Xilient Inc.
+      10181 Bubb Rd.
+      Cupertino, CA 95014
+
+      Barry Haskell
+      Apple Computer, Inc.            <bhaskell at apple.com>
+      2 Infinite Loop
+      Cupertino, California 95014
+
+      Greg Conklin
+      RealNetworks, Inc.              <gregc at real.com>
+      2601 Elliott Ave
+      Seattle, WA 98101
+
+      Jill Boyce                      <jill.boyce at thomson.net>
+      Cristina Gomila                 <cristina.gomila at thomson.net>
+      Thomson
+      2 Independence Way
+      Princeton, NJ 08540
+
+      Siwei Ma                        <swma at jdl.ac.cn>
+      Institute of Computing Technology
+      Chinese Academy of Sciences
+      Kexueyuan South Road 6
+      Haidian District
+      Beijing, China
+
+      Zhibo Chen                      <chenzhibo at tsinghua.org.cn>
+      JianFeng Xu                     <fenax at video.mdc.tsinghua.edu.cn>
+      Wenfang Fu                      <fwf at video.mdc.tsinghua.edu.cn>
+      Dept.of E&E, Tsinghua Univ
+      Haidian District
+      Beijing China, 100084
+
+      Alexis Michael Tourapis         <alexismt at ieee.org><atour at dolby.com>
+      Athanasios Leontaris            <aleon at dolby.com>
+      Dolby Laboratories Inc.
+      3601 West Alameda Ave.
+      Burbank, CA 91505
+
+      Xiaoquan Yi                     <xyi at engr.scu.edu>
+      Jun Zhang                       <jzhang2 at engr.scu.edu>
+      Dept. of C.E. Santa Clara Univ.
+      500 El Camino Real
+      Santa Clara, CA 95053
+
+   \endverbatim
+ */
+


Index: llvm-test/MultiSource/Applications/JM/lencod/ctx_tables.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/ctx_tables.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/ctx_tables.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,729 @@
+
+/*!
+ *************************************************************************************
+ * \file ctx_tables.h
+ *
+ * \brief
+ *    CABAC context initialization tables
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Detlev Marpe                    <marpe at hhi.de>
+ *    - Heiko Schwarz                   <hschwarz at hhi.de>
+ **************************************************************************************
+ */
+
+#define CTX_UNUSED          {0,64}
+#define CTX_UNDEF           {0,63}
+
+#ifdef CONTEXT_INI_C
+
+
+#define NUM_CTX_MODELS_I     1
+#define NUM_CTX_MODELS_P     3
+
+
+static const int INIT_MB_TYPE_I[1][3][11][2] =
+{
+  //----- model 0 -----
+  {
+    { {  20, -15} , {   2,  54} , {   3,  74} ,  CTX_UNUSED , { -28, 127} , { -23, 104} , {  -6,  53} , {  -1,  54} , {   7,  51} ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  20, -15} , {   2,  54} , {   3,  74} , {  20, -15} , {   2,  54} , {   3,  74} , { -28, 127} , { -23, 104} , {  -6,  53} , {  -1,  54} , {   7,  51} }, // SI (unused at the moment)
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  }
+};
+static const int INIT_MB_TYPE_P[3][3][11][2] =
+{
+  //----- model 0 -----
+  {
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+  { {  23,  33} , {  23,   2} , {  21,   0} ,  CTX_UNUSED , {   1,   9} , {   0,  49} , { -37, 118} , {   5,  57} , { -13,  78} , { -11,  65} , {   1,  62} },
+  { {  26,  67} , {  16,  90} , {   9, 104} ,  CTX_UNUSED , { -46, 127} , { -20, 104} , {   1,  67} , {  18,  64} , {   9,  43} , {  29,   0} ,  CTX_UNUSED }
+  },
+  //----- model 1 -----
+  {
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  22,  25} , {  34,   0} , {  16,   0} ,  CTX_UNUSED , {  -2,   9} , {   4,  41} , { -29, 118} , {   2,  65} , {  -6,  71} , { -13,  79} , {   5,  52} },
+    { {  57,   2} , {  41,  36} , {  26,  69} ,  CTX_UNUSED , { -45, 127} , { -15, 101} , {  -4,  76} , {  26,  34} , {  19,  22} , {  40,   0} ,  CTX_UNUSED }
+  },
+  //----- model 2 -----
+  {
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  29,  16} , {  25,   0} , {  14,   0} ,  CTX_UNUSED , { -10,  51} , {  -3,  62} , { -27,  99} , {  26,  16} , {  -4,  85} , { -24, 102} , {   5,  57} },
+  { {  54,   0} , {  37,  42} , {  12,  97} ,  CTX_UNUSED , { -32, 127} , { -22, 117} , {  -2,  74} , {  20,  40} , {  20,  10} , {  29,   0} ,  CTX_UNUSED }
+  }
+};
+
+
+
+
+
+static const int INIT_B8_TYPE_I[1][2][9][2] =
+{
+  //----- model 0 -----
+  {
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  }
+};
+static const int INIT_B8_TYPE_P[3][2][9][2] =
+{
+  //----- model 0 -----
+  {
+    {  CTX_UNUSED , {  12,  49} ,  CTX_UNUSED , {  -4,  73} , {  17,  50} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  -6,  86} , { -17,  95} , {  -6,  61} , {   9,  45} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  },
+  //----- model 1 -----
+  {
+    {  CTX_UNUSED , {   9,  50} ,  CTX_UNUSED , {  -3,  70} , {  10,  54} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {   6,  69} , { -13,  90} , {   0,  52} , {   8,  43} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  },
+  //----- model 2 -----
+  {
+    {  CTX_UNUSED , {   6,  57} ,  CTX_UNUSED , { -17,  73} , {  14,  57} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  -6,  93} , { -14,  88} , {  -6,  44} , {   4,  55} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  }
+};
+
+
+
+
+
+static const int INIT_MV_RES_I[1][2][10][2] =
+{
+  //----- model 0 -----
+  {
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  }
+};
+static const int INIT_MV_RES_P[3][2][10][2] =
+{
+  //----- model 0 -----
+  {
+    { {  -3,  69} ,  CTX_UNUSED , {  -6,  81} , { -11,  96} ,  CTX_UNUSED , {   0,  58} ,  CTX_UNUSED , {  -3,  76} , { -10,  94} ,  CTX_UNUSED },
+    { {   6,  55} , {   7,  67} , {  -5,  86} , {   2,  88} ,  CTX_UNUSED , {   5,  54} , {   4,  69} , {  -3,  81} , {   0,  88} ,  CTX_UNUSED }
+  },
+  //----- model 1 -----
+  {
+    { {  -2,  69} ,  CTX_UNUSED , {  -5,  82} , { -10,  96} ,  CTX_UNUSED , {   1,  56} ,  CTX_UNUSED , {  -3,  74} , {  -6,  85} ,  CTX_UNUSED },
+    { {   2,  59} , {   2,  75} , {  -3,  87} , {  -3, 100} ,  CTX_UNUSED , {   0,  59} , {  -3,  81} , {  -7,  86} , {  -5,  95} ,  CTX_UNUSED }
+  },
+  //----- model 2 -----
+  {
+    { { -11,  89} ,  CTX_UNUSED , { -15, 103} , { -21, 116} ,  CTX_UNUSED , {   1,  63} ,  CTX_UNUSED , {  -5,  85} , { -13, 106} ,  CTX_UNUSED },
+    { {  19,  57} , {  20,  58} , {   4,  84} , {   6,  96} ,  CTX_UNUSED , {   5,  63} , {   6,  75} , {  -3,  90} , {  -1, 101} ,  CTX_UNUSED }
+  }
+};
+
+
+
+
+
+static const int INIT_REF_NO_I[1][2][6][2] =
+{
+  //----- model 0 -----
+  {
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  }
+};
+static const int INIT_REF_NO_P[3][2][6][2] =
+{
+  //----- model 0 -----
+  {
+    { {  -7,  67} , {  -5,  74} , {  -4,  74} , {  -5,  80} , {  -7,  72} , {   1,  58} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  },
+  //----- model 1 -----
+  {
+    { {  -1,  66} , {  -1,  77} , {   1,  70} , {  -2,  86} , {  -5,  72} , {   0,  61} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  },
+  //----- model 2 -----
+  {
+    { {   3,  55} , {  -4,  79} , {  -2,  75} , { -12,  97} , {  -7,  50} , {   1,  60} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  }
+};
+
+
+static const int INIT_TRANSFORM_SIZE_I[1][1][3][2]=
+{
+  //----- model 0 -----
+  {
+    {  {  31,  21} , {  31,  31} , {  25,  50} },
+//    { {   0,  41} , {   0,  63} , {   0,  63} },
+  }
+};
+
+static const int INIT_TRANSFORM_SIZE_P[3][1][3][2]=
+{
+  //----- model 0 -----
+  {
+    {  {  12,  40} , {  11,  51} , {  14,  59} },
+//    { {   0,  41} , {   0,  63} , {   0,  63} },
+  },
+  //----- model 1 -----
+  {
+    {  {  25,  32} , {  21,  49} , {  21,  54} },
+//    { {   0,  41} , {   0,  63} , {   0,  63} },
+  },
+  //----- model 2 -----
+  {
+    {  {  21,  33} , {  19,  50} , {  17,  61} },
+//    { {   0,  41} , {   0,  63} , {   0,  63} },
+  }
+};
+
+static const int INIT_DELTA_QP_I[1][1][4][2]=
+{
+  //----- model 0 -----
+  {
+    { {   0,  41} , {   0,  63} , {   0,  63} , {   0,  63} },
+  }
+};
+static const int INIT_DELTA_QP_P[3][1][4][2]=
+{
+  //----- model 0 -----
+  {
+    { {   0,  41} , {   0,  63} , {   0,  63} , {   0,  63} },
+  },
+  //----- model 1 -----
+  {
+    { {   0,  41} , {   0,  63} , {   0,  63} , {   0,  63} },
+  },
+  //----- model 2 -----
+  {
+    { {   0,  41} , {   0,  63} , {   0,  63} , {   0,  63} },
+  }
+};
+
+
+
+
+
+static const int INIT_MB_AFF_I[1][1][4][2] =
+{
+  //----- model 0 -----
+  {
+    { {   0,  11} , {   1,  55} , {   0,  69} ,  CTX_UNUSED }
+  }
+};
+static const int INIT_MB_AFF_P[3][1][4][2] =
+{
+  //----- model 0 -----
+  {
+    { {   0,  45} , {  -4,  78} , {  -3,  96} ,  CTX_UNUSED }
+  },
+  //----- model 1 -----
+  {
+    { {  13,  15} , {   7,  51} , {   2,  80} ,  CTX_UNUSED }
+  },
+  //----- model 2 -----
+  {
+    { {   7,  34} , {  -9,  88} , { -20, 127} ,  CTX_UNUSED }
+  }
+};
+
+
+
+
+
+static const int INIT_IPR_I[1][1][2][2] =
+{
+  //----- model 0 -----
+  {
+    { { 13,  41} , {   3,  62} }
+  }
+};
+static const int INIT_IPR_P[3][1][2][2] =
+{
+  //----- model 0 -----
+  {
+    { { 13,  41} , {   3,  62} }
+  },
+  //----- model 1 -----
+  {
+    { { 13,  41} , {   3,  62} }
+  },
+  //----- model 2 -----
+  {
+    { { 13,  41} , {   3,  62} }
+  }
+};
+
+
+
+
+
+static const int INIT_CIPR_I[1][1][4][2] =
+{
+  //----- model 0 -----
+  {
+    { {  -9,  83} , {   4,  86} , {   0,  97} , {  -7,  72} }
+  }
+};
+static const int INIT_CIPR_P[3][1][4][2] =
+{
+  //----- model 0 -----
+  {
+    { {  -9,  83} , {   4,  86} , {   0,  97} , {  -7,  72} }
+  },
+  //----- model 1 -----
+  {
+    { {  -9,  83} , {   4,  86} , {   0,  97} , {  -7,  72} }
+  },
+  //----- model 2 -----
+  {
+    { {  -9,  83} , {   4,  86} , {   0,  97} , {  -7,  72} }
+  }
+};
+
+
+
+
+
+
+static const int INIT_CBP_I[1][3][4][2] =
+{
+  //----- model 0 -----
+  {
+    { { -17, 127} , { -13, 102} , {   0,  82} , {  -7,  74} },
+    { { -21, 107} , { -27, 127} , { -31, 127} , { -24, 127} },
+    { { -18,  95} , { -27, 127} , { -21, 114} , { -30, 127} }
+  }
+};
+static const int INIT_CBP_P[3][3][4][2] =
+{
+  //----- model 0 -----
+  {
+    { { -27, 126} , { -28,  98} , { -25, 101} , { -23,  67} },
+    { { -28,  82} , { -20,  94} , { -16,  83} , { -22, 110} },
+    { { -21,  91} , { -18, 102} , { -13,  93} , { -29, 127} }
+  },
+  //----- model 1 -----
+  {
+    { { -39, 127} , { -18,  91} , { -17,  96} , { -26,  81} },
+    { { -35,  98} , { -24, 102} , { -23,  97} , { -27, 119} },
+    { { -24,  99} , { -21, 110} , { -18, 102} , { -36, 127} }
+  },
+  //----- model 2 -----
+  {
+    { { -36, 127} , { -17,  91} , { -14,  95} , { -25,  84} },
+    { { -25,  86} , { -12,  89} , { -17,  91} , { -31, 127} },
+    { { -14,  76} , { -18, 103} , { -13,  90} , { -37, 127} }
+  }
+};
+
+
+
+
+
+static const int INIT_BCBP_I[1][8][4][2] =
+{
+  //----- model 0 -----
+  {
+    { { -17, 123} , { -12, 115} , { -16, 122} , { -11, 115} },
+    { { -12,  63} , {  -2,  68} , { -15,  84} , { -13, 104} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  -3,  70} , {  -8,  93} , { -10,  90} , { -30, 127} },
+    { {  -1,  74} , {  -6,  97} , {  -7,  91} , { -20, 127} },
+    { {  -4,  56} , {  -5,  82} , {  -7,  76} , { -22, 125} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  }
+};
+static const int INIT_BCBP_P[3][8][4][2] =
+{
+  //----- model 0 -----
+  {
+    { {  -7,  92} , {  -5,  89} , {  -7,  96} , { -13, 108} },
+    { {  -3,  46} , {  -1,  65} , {  -1,  57} , {  -9,  93} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  -3,  74} , {  -9,  92} , {  -8,  87} , { -23, 126} },
+    { {   5,  54} , {   6,  60} , {   6,  59} , {   6,  69} },
+    { {  -1,  48} , {   0,  68} , {  -4,  69} , {  -8,  88} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  },
+  //----- model 1 -----
+  {
+    { {   0,  80} , {  -5,  89} , {  -7,  94} , {  -4,  92} },
+    { {   0,  39} , {   0,  65} , { -15,  84} , { -35, 127} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  -2,  73} , { -12, 104} , {  -9,  91} , { -31, 127} },
+    { {   3,  55} , {   7,  56} , {   7,  55} , {   8,  61} },
+    { {  -3,  53} , {   0,  68} , {  -7,  74} , {  -9,  88} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  },
+  //----- model 2 -----
+  {
+    { {  11,  80} , {   5,  76} , {   2,  84} , {   5,  78} },
+    { {  -6,  55} , {   4,  61} , { -14,  83} , { -37, 127} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  -5,  79} , { -11, 104} , { -11,  91} , { -30, 127} },
+    { {   0,  65} , {  -2,  79} , {   0,  72} , {  -4,  92} },
+    { {  -6,  56} , {   3,  68} , {  -8,  71} , { -13,  98} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  }
+};
+
+
+
+
+
+static const int INIT_MAP_I[1][8][15][2] =
+{
+  //----- model 0 -----
+  {
+    { {  -7,  93} , { -11,  87} , {  -3,  77} , {  -5,  71} , {  -4,  63} , {  -4,  68} , { -12,  84} , {  -7,  62} , {  -7,  65} , {   8,  61} , {   5,  56} , {  -2,  66} , {   1,  64} , {   0,  61} , {  -2,  78} },
+    {  CTX_UNUSED , {   1,  50} , {   7,  52} , {  10,  35} , {   0,  44} , {  11,  38} , {   1,  45} , {   0,  46} , {   5,  44} , {  31,  17} , {   1,  51} , {   7,  50} , {  28,  19} , {  16,  33} , {  14,  62} },
+    {  { -17, 120} , { -20, 112} , { -18, 114} , { -11,  85} , { -15,  92} , { -14,  89} , { -26,  71} , { -15,  81} , { -14,  80} , {   0,  68} , { -14,  70} , { -24,  56} , { -23,  68} , { -24,  50} , { -11,  74} },
+//    { {  -1,  73} , {  -7,  73} , {  -6,  76} , {  -7,  71} , {  -9,  72} , {  -5,  65} , { -14,  83} , {  -8,  72} , { -10,  75} , {  -5,  64} , {  -4,  59} , { -13,  79} , {  -9,  69} , {  -8,  66} , {   3,  55} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { { -13, 108} , { -15, 100} , { -13, 101} , { -13,  91} , { -12,  94} , { -10,  88} , { -16,  84} , { -10,  86} , {  -7,  83} , { -13,  87} , { -19,  94} , {   1,  70} , {   0,  72} , {  -5,  74} , {  18,  59} },
+    { {  -8, 102} , { -15, 100} , {   0,  95} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {  -4,  75} , {   2,  72} , { -11,  75} , {  -3,  71} , {  15,  46} , { -13,  69} , {   0,  62} , {   0,  65} , {  21,  37} , { -15,  72} , {   9,  57} , {  16,  54} , {   0,  62} , {  12,  72} }
+  }
+};
+static const int INIT_MAP_P[3][8][15][2] =
+{
+  //----- model 0 -----
+  {
+    { {  -2,  85} , {  -6,  78} , {  -1,  75} , {  -7,  77} , {   2,  54} , {   5,  50} , {  -3,  68} , {   1,  50} , {   6,  42} , {  -4,  81} , {   1,  63} , {  -4,  70} , {   0,  67} , {   2,  57} , {  -2,  76} },
+    {  CTX_UNUSED , {  11,  35} , {   4,  64} , {   1,  61} , {  11,  35} , {  18,  25} , {  12,  24} , {  13,  29} , {  13,  36} , { -10,  93} , {  -7,  73} , {  -2,  73} , {  13,  46} , {   9,  49} , {  -7, 100} },
+    {  {  -4,  79} , {  -7,  71} , {  -5,  69} , {  -9,  70} , {  -8,  66} , { -10,  68} , { -19,  73} , { -12,  69} , { -16,  70} , { -15,  67} , { -20,  62} , { -19,  70} , { -16,  66} , { -22,  65} , { -20,  63} },
+//    { {  -4,  60} , {  -3,  49} , {  -2,  50} , {  -4,  49} , {  -5,  48} , {  -2,  46} , {  -7,  54} , {  -1,  45} , {  -4,  49} , {   4,  39} , {   0,  42} , {   2,  43} , {   0,  44} , {   5,  32} , {  15,  30}  },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {   9,  53} , {   2,  53} , {   5,  53} , {  -2,  61} , {   0,  56} , {   0,  56} , { -13,  63} , {  -5,  60} , {  -1,  62} , {   4,  57} , {  -6,  69} , {   4,  57} , {  14,  39} , {   4,  51} , {  13,  68} },
+    { {   3,  64} , {   1,  61} , {   9,  63} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {   7,  50} , {  16,  39} , {   5,  44} , {   4,  52} , {  11,  48} , {  -5,  60} , {  -1,  59} , {   0,  59} , {  22,  33} , {   5,  44} , {  14,  43} , {  -1,  78} , {   0,  60} , {   9,  69} }
+  },
+  //----- model 1 -----
+  {
+    { { -13, 103} , { -13,  91} , {  -9,  89} , { -14,  92} , {  -8,  76} , { -12,  87} , { -23, 110} , { -24, 105} , { -10,  78} , { -20, 112} , { -17,  99} , { -78, 127} , { -70, 127} , { -50, 127} , { -46, 127} },
+    {  CTX_UNUSED , {  -4,  66} , {  -5,  78} , {  -4,  71} , {  -8,  72} , {   2,  59} , {  -1,  55} , {  -7,  70} , {  -6,  75} , {  -8,  89} , { -34, 119} , {  -3,  75} , {  32,  20} , {  30,  22} , { -44, 127} },
+    {  {  -5,  85} , {  -6,  81} , { -10,  77} , {  -7,  81} , { -17,  80} , { -18,  73} , {  -4,  74} , { -10,  83} , {  -9,  71} , {  -9,  67} , {  -1,  61} , {  -8,  66} , { -14,  66} , {   0,  59} , {   2,  59} },
+//    { {  -4,  60} , {  -3,  49} , {  -2,  50} , {  -4,  49} , {  -5,  48} , {  -2,  46} , {  -7,  54} , {  -1,  45} , {  -4,  49} , {   4,  39} , {   0,  42} , {   2,  43} , {   0,  44} , {   5,  32} , {  15,  30}  },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {   0,  54} , {  -5,  61} , {   0,  58} , {  -1,  60} , {  -3,  61} , {  -8,  67} , { -25,  84} , { -14,  74} , {  -5,  65} , {   5,  52} , {   2,  57} , {   0,  61} , {  -9,  69} , { -11,  70} , {  18,  55} },
+    { {  -4,  71} , {   0,  58} , {   7,  61} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {   9,  41} , {  18,  25} , {   9,  32} , {   5,  43} , {   9,  47} , {   0,  44} , {   0,  51} , {   2,  46} , {  19,  38} , {  -4,  66} , {  15,  38} , {  12,  42} , {   9,  34} , {   0,  89} }
+  },
+  //----- model 2 -----
+  {
+    { {  -4,  86} , { -12,  88} , {  -5,  82} , {  -3,  72} , {  -4,  67} , {  -8,  72} , { -16,  89} , {  -9,  69} , {  -1,  59} , {   5,  66} , {   4,  57} , {  -4,  71} , {  -2,  71} , {   2,  58} , {  -1,  74} },
+    {  CTX_UNUSED , {  -4,  44} , {  -1,  69} , {   0,  62} , {  -7,  51} , {  -4,  47} , {  -6,  42} , {  -3,  41} , {  -6,  53} , {   8,  76} , {  -9,  78} , { -11,  83} , {   9,  52} , {   0,  67} , {  -5,  90} },
+    {  {  -3,  78} , {  -8,  74} , {  -9,  72} , { -10,  72} , { -18,  75} , { -12,  71} , { -11,  63} , {  -5,  70} , { -17,  75} , { -14,  72} , { -16,  67} , {  -8,  53} , { -14,  59} , {  -9,  52} , { -11,  68} },
+//    { {  -4,  60} , {  -3,  49} , {  -2,  50} , {  -4,  49} , {  -5,  48} , {  -2,  46} , {  -7,  54} , {  -1,  45} , {  -4,  49} , {   4,  39} , {   0,  42} , {   2,  43} , {   0,  44} , {   5,  32} , {  15,  30}  },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {   1,  67} , { -15,  72} , {  -5,  75} , {  -8,  80} , { -21,  83} , { -21,  64} , { -13,  31} , { -25,  64} , { -29,  94} , {   9,  75} , {  17,  63} , {  -8,  74} , {  -5,  35} , {  -2,  27} , {  13,  91} },
+    { {   3,  65} , {  -7,  69} , {   8,  77} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , { -10,  66} , {   3,  62} , {  -3,  68} , { -20,  81} , {   0,  30} , {   1,   7} , {  -3,  23} , { -21,  74} , {  16,  66} , { -23, 124} , {  17,  37} , {  44, -18} , {  50, -34} , { -22, 127} }
+  }
+};
+
+
+
+
+static const int INIT_LAST_I[1][8][15][2] =
+{
+  //----- model 0 -----
+  {
+    { {  24,   0} , {  15,   9} , {   8,  25} , {  13,  18} , {  15,   9} , {  13,  19} , {  10,  37} , {  12,  18} , {   6,  29} , {  20,  33} , {  15,  30} , {   4,  45} , {   1,  58} , {   0,  62} , {   7,  61} },
+    {  CTX_UNUSED , {  12,  38} , {  11,  45} , {  15,  39} , {  11,  42} , {  13,  44} , {  16,  45} , {  12,  41} , {  10,  49} , {  30,  34} , {  18,  42} , {  10,  55} , {  17,  51} , {  17,  46} , {   0,  89} },
+    {  {  23, -13} , {  26, -13} , {  40, -15} , {  49, -14} , {  44,   3} , {  45,   6} , {  44,  34} , {  33,  54} , {  19,  82} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+//    { {  12,  33} , {   5,  38} , {   9,  34} , {  18,  22} , {  19,  22} , {  23,  19} , {  26,  16} , {  14,  44} , {  40,  14} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  26, -19} , {  22, -17} , {  26, -17} , {  30, -25} , {  28, -20} , {  33, -23} , {  37, -27} , {  33, -23} , {  40, -28} , {  38, -17} , {  33, -11} , {  40, -15} , {  41,  -6} , {  38,   1} , {  41,  17} },
+    { {  30,  -6} , {  27,   3} , {  26,  22} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {  37, -16} , {  35,  -4} , {  38,  -8} , {  38,  -3} , {  37,   3} , {  38,   5} , {  42,   0} , {  35,  16} , {  39,  22} , {  14,  48} , {  27,  37} , {  21,  60} , {  12,  68} , {   2,  97} }
+  }
+};
+static const int INIT_LAST_P[3][8][15][2] =
+{
+  //----- model 0 -----
+  {
+    { {  11,  28} , {   2,  40} , {   3,  44} , {   0,  49} , {   0,  46} , {   2,  44} , {   2,  51} , {   0,  47} , {   4,  39} , {   2,  62} , {   6,  46} , {   0,  54} , {   3,  54} , {   2,  58} , {   4,  63} },
+    {  CTX_UNUSED , {   6,  51} , {   6,  57} , {   7,  53} , {   6,  52} , {   6,  55} , {  11,  45} , {  14,  36} , {   8,  53} , {  -1,  82} , {   7,  55} , {  -3,  78} , {  15,  46} , {  22,  31} , {  -1,  84} },
+    {  {   9,  -2} , {  26,  -9} , {  33,  -9} , {  39,  -7} , {  41,  -2} , {  45,   3} , {  49,   9} , {  45,  27} , {  36,  59} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+//    { {  17,  27} , {  23,  13} , {  24,  16} , {  22,  25} , {  23,  27} , {  23,  32} , {  17,  43} , {  17,  49} , {   2,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  25,   7} , {  30,  -7} , {  28,   3} , {  28,   4} , {  32,   0} , {  34,  -1} , {  30,   6} , {  30,   6} , {  32,   9} , {  31,  19} , {  26,  27} , {  26,  30} , {  37,  20} , {  28,  34} , {  17,  70} },
+    { {   1,  67} , {   5,  59} , {   9,  67} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {  16,  30} , {  18,  32} , {  18,  35} , {  22,  29} , {  24,  31} , {  23,  38} , {  18,  43} , {  20,  41} , {  11,  63} , {   9,  59} , {   9,  64} , {  -1,  94} , {  -2,  89} , {  -9, 108} }
+  },
+  //----- model 1 -----
+  {
+    { {   4,  45} , {  10,  28} , {  10,  31} , {  33, -11} , {  52, -43} , {  18,  15} , {  28,   0} , {  35, -22} , {  38, -25} , {  34,   0} , {  39, -18} , {  32, -12} , { 102, -94} , {   0,   0} , {  56, -15} },
+    {  CTX_UNUSED , {  33,  -4} , {  29,  10} , {  37,  -5} , {  51, -29} , {  39,  -9} , {  52, -34} , {  69, -58} , {  67, -63} , {  44,  -5} , {  32,   7} , {  55, -29} , {  32,   1} , {   0,   0} , {  27,  36} },
+    {  {  17, -10} , {  32, -13} , {  42,  -9} , {  49,  -5} , {  53,   0} , {  64,   3} , {  68,  10} , {  66,  27} , {  47,  57} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+//    { {  17,  27} , {  23,  13} , {  24,  16} , {  22,  25} , {  23,  27} , {  23,  32} , {  17,  43} , {  17,  49} , {   2,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  33, -25} , {  34, -30} , {  36, -28} , {  38, -28} , {  38, -27} , {  34, -18} , {  35, -16} , {  34, -14} , {  32,  -8} , {  37,  -6} , {  35,   0} , {  30,  10} , {  28,  18} , {  26,  25} , {  29,  41} },
+    { {   0,  75} , {   2,  72} , {   8,  77} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {  14,  35} , {  18,  31} , {  17,  35} , {  21,  30} , {  17,  45} , {  20,  42} , {  18,  45} , {  27,  26} , {  16,  54} , {   7,  66} , {  16,  56} , {  11,  73} , {  10,  67} , { -10, 116} }
+  },
+  //----- model 2 -----
+  {
+    { {   4,  39} , {   0,  42} , {   7,  34} , {  11,  29} , {   8,  31} , {   6,  37} , {   7,  42} , {   3,  40} , {   8,  33} , {  13,  43} , {  13,  36} , {   4,  47} , {   3,  55} , {   2,  58} , {   6,  60} },
+    {  CTX_UNUSED , {   8,  44} , {  11,  44} , {  14,  42} , {   7,  48} , {   4,  56} , {   4,  52} , {  13,  37} , {   9,  49} , {  19,  58} , {  10,  48} , {  12,  45} , {   0,  69} , {  20,  33} , {   8,  63} },
+    {  {   9,  -2} , {  30, -10} , {  31,  -4} , {  33,  -1} , {  33,   7} , {  31,  12} , {  37,  23} , {  31,  38} , {  20,  64} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+//    { {  17,  27} , {  23,  13} , {  24,  16} , {  22,  25} , {  23,  27} , {  23,  32} , {  17,  43} , {  17,  49} , {   2,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  35, -18} , {  33, -25} , {  28,  -3} , {  24,  10} , {  27,   0} , {  34, -14} , {  52, -44} , {  39, -24} , {  19,  17} , {  31,  25} , {  36,  29} , {  24,  33} , {  34,  15} , {  30,  20} , {  22,  73} },
+    { {  20,  34} , {  19,  31} , {  27,  44} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {  19,  16} , {  15,  36} , {  15,  36} , {  21,  28} , {  25,  21} , {  30,  20} , {  31,  12} , {  27,  16} , {  24,  42} , {   0,  93} , {  14,  56} , {  15,  57} , {  26,  38} , { -24, 127} }
+  }
+};
+
+
+
+
+
+static const int INIT_ONE_I[1][8][5][2] =
+{
+  //----- model 0 -----
+  {
+    { {  -3,  71} , {  -6,  42} , {  -5,  50} , {  -3,  54} , {  -2,  62} },
+    { {  -5,  67} , {  -5,  27} , {  -3,  39} , {  -2,  44} , {   0,  46} },
+    {  {  -3,  75} , {  -1,  23} , {   1,  34} , {   1,  43} , {   0,  54} },
+//    { {  -9,  75} , {  -1,  44} , {  -2,  49} , {  -2,  51} , {  -1,  51} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { { -12,  92} , { -15,  55} , { -10,  60} , {  -6,  62} , {  -4,  65} },
+    { { -11,  97} , { -20,  84} , { -11,  79} , {  -6,  73} , {  -4,  74} },
+    { {  -8,  78} , {  -5,  33} , {  -4,  48} , {  -2,  53} , {  -3,  62} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  }
+};
+static const int INIT_ONE_P[3][8][5][2] =
+{
+  //----- model 0 -----
+  {
+    { {  -6,  76} , {  -2,  44} , {   0,  45} , {   0,  52} , {  -3,  64} },
+    { {  -9,  77} , {   3,  24} , {   0,  42} , {   0,  48} , {   0,  55} },
+    {  {  -6,  66} , {  -7,  35} , {  -7,  42} , {  -8,  45} , {  -5,  48} },
+//    { {  -3,  58} , {  -1,  28} , {   0,  29} , {   2,  30} , {   1,  35} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {   1,  58} , {  -3,  29} , {  -1,  36} , {   1,  38} , {   2,  43} },
+    { {   0,  70} , {  -4,  29} , {   5,  31} , {   7,  42} , {   1,  59} },
+    { {   0,  58} , {   8,   5} , {  10,  14} , {  14,  18} , {  13,  27} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  },
+  //----- model 1 -----
+  {
+    { { -23, 112} , { -15,  71} , {  -7,  61} , {   0,  53} , {  -5,  66} },
+    { { -21, 101} , {  -3,  39} , {  -5,  53} , {  -7,  61} , { -11,  75} },
+    {  {  -5,  71} , {   0,  24} , {  -1,  36} , {  -2,  42} , {  -2,  52} },
+//    { {  -3,  58} , {  -1,  28} , {   0,  29} , {   2,  30} , {   1,  35} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { { -11,  76} , { -10,  44} , { -10,  52} , { -10,  57} , {  -9,  58} },
+    { {   2,  66} , {  -9,  34} , {   1,  32} , {  11,  31} , {   5,  52} },
+    { {   3,  52} , {   7,   4} , {  10,   8} , {  17,   8} , {  16,  19} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  },
+  //----- model 2 -----
+  {
+    { { -24, 115} , { -22,  82} , {  -9,  62} , {   0,  53} , {   0,  59} },
+    { { -21, 100} , { -14,  57} , { -12,  67} , { -11,  71} , { -10,  77} },
+    {  {  -9,  71} , {  -7,  37} , {  -8,  44} , { -11,  49} , { -10,  56} },
+//    { {  -3,  58} , {  -1,  28} , {   0,  29} , {   2,  30} , {   1,  35} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { { -10,  82} , {  -8,  48} , {  -8,  61} , {  -8,  66} , {  -7,  70} },
+    { {  -4,  79} , { -22,  69} , { -16,  75} , {  -2,  58} , {   1,  58} },
+    { { -13,  81} , {  -6,  38} , { -13,  62} , {  -6,  58} , {  -2,  59} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  }
+};
+
+
+
+
+
+static const int INIT_ABS_I[1][8][5][2] =
+{
+  //----- model 0 -----
+  {
+    { {   0,  58} , {   1,  63} , {  -2,  72} , {  -1,  74} , {  -9,  91} },
+    { { -16,  64} , {  -8,  68} , { -10,  78} , {  -6,  77} , { -10,  86} },
+    {  {  -2,  55} , {   0,  61} , {   1,  64} , {   0,  68} , {  -9,  92} },
+//    { {  -4,  56} , {  -1,  59} , {  -6,  71} , {  -8,  74} , { -11,  85} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { { -12,  73} , {  -8,  76} , {  -7,  80} , {  -9,  88} , { -17, 110} },
+    { { -13,  86} , { -13,  96} , { -11,  97} , { -19, 117} ,  CTX_UNUSED },
+    { { -13,  71} , { -10,  79} , { -12,  86} , { -13,  90} , { -14,  97} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  }
+};
+static const int INIT_ABS_P[3][8][5][2] =
+{
+  //----- model 0 -----
+  {
+    { {  -2,  59} , {  -4,  70} , {  -4,  75} , {  -8,  82} , { -17, 102} },
+    { {  -6,  59} , {  -7,  71} , { -12,  83} , { -11,  87} , { -30, 119} },
+    {  { -12,  56} , {  -6,  60} , {  -5,  62} , {  -8,  66} , {  -8,  76} },
+//    { {  -7,  54} , {  -2,  58} , {  -4,  63} , {  -5,  66} , {   1,  64} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  -6,  55} , {   0,  58} , {   0,  64} , {  -3,  74} , { -10,  90} },
+    { {  -2,  58} , {  -3,  72} , {  -3,  81} , { -11,  97} ,  CTX_UNUSED },
+    { {   2,  40} , {   0,  58} , {  -3,  70} , {  -6,  79} , {  -8,  85} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  },
+  //----- model 1 -----
+  {
+    { { -11,  77} , {  -9,  80} , {  -9,  84} , { -10,  87} , { -34, 127} },
+    { { -15,  77} , { -17,  91} , { -25, 107} , { -25, 111} , { -28, 122} },
+    {  {  -9,  57} , {  -6,  63} , {  -4,  65} , {  -4,  67} , {  -7,  82} },
+//    { {  -7,  54} , {  -2,  58} , {  -4,  63} , {  -5,  66} , {   1,  64} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { { -16,  72} , {  -7,  69} , {  -4,  69} , {  -5,  74} , {  -9,  86} },
+    { {  -2,  55} , {  -2,  67} , {   0,  73} , {  -8,  89} ,  CTX_UNUSED },
+    { {   3,  37} , {  -1,  61} , {  -5,  73} , {  -1,  70} , {  -4,  78} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  },
+  //----- model 2 -----
+  {
+    { { -14,  85} , { -13,  89} , { -13,  94} , { -11,  92} , { -29, 127} },
+    { { -21,  85} , { -16,  88} , { -23, 104} , { -15,  98} , { -37, 127} },
+    {  { -12,  59} , {  -8,  63} , {  -9,  67} , {  -6,  68} , { -10,  79} },
+//    { {  -7,  54} , {  -2,  58} , {  -4,  63} , {  -5,  66} , {   1,  64} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { { -14,  75} , { -10,  79} , {  -9,  83} , { -12,  92} , { -18, 108} },
+    { { -13,  78} , {  -9,  83} , {  -4,  81} , { -13,  99} ,  CTX_UNUSED },
+    { { -16,  73} , { -10,  76} , { -13,  86} , {  -9,  83} , { -10,  87} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+  }
+};
+
+
+
+
+
+static const int INIT_FLD_MAP_I[1][8][15][2] =
+{
+  //----- model 0 -----
+  {
+    { {  -6,  93} , {  -6,  84} , {  -8,  79} , {   0,  66} , {  -1,  71} , {   0,  62} , {  -2,  60} , {  -2,  59} , {  -5,  75} , {  -3,  62} , {  -4,  58} , {  -9,  66} , {  -1,  79} , {   0,  71} , {   3,  68} },
+    {  CTX_UNUSED , {  10,  44} , {  -7,  62} , {  15,  36} , {  14,  40} , {  16,  27} , {  12,  29} , {   1,  44} , {  20,  36} , {  18,  32} , {   5,  42} , {   1,  48} , {  10,  62} , {  17,  46} , {   9,  64} },
+    {  { -14, 106} , { -13,  97} , { -15,  90} , { -12,  90} , { -18,  88} , { -10,  73} , {  -9,  79} , { -14,  86} , { -10,  73} , { -10,  70} , { -10,  69} , {  -5,  66} , {  -9,  64} , {  -5,  58} , {   2,  59} },
+//    { {  -1,  73} , {  -7,  73} , {  -6,  76} , {  -7,  71} , {  -9,  72} , {  -5,  65} , { -14,  83} , {  -8,  72} , { -10,  75} , {  -5,  64} , {  -4,  59} , { -13,  79} , {  -9,  69} , {  -8,  66} , {   3,  55} },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { { -12, 104} , { -11,  97} , { -16,  96} , {  -7,  88} , {  -8,  85} , {  -7,  85} , {  -9,  85} , { -13,  88} , {   4,  66} , {  -3,  77} , {  -3,  76} , {  -6,  76} , {  10,  58} , {  -1,  76} , {  -1,  83} },
+    { {  -7,  99} , { -14,  95} , {   2,  95} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {   0,  76} , {  -5,  74} , {   0,  70} , { -11,  75} , {   1,  68} , {   0,  65} , { -14,  73} , {   3,  62} , {   4,  62} , {  -1,  68} , { -13,  75} , {  11,  55} , {   5,  64} , {  12,  70} }
+  }
+};
+static const int INIT_FLD_MAP_P[3][8][15][2] =
+{
+  //----- model 0 -----
+  {
+    { { -13, 106} , { -16, 106} , { -10,  87} , { -21, 114} , { -18, 110} , { -14,  98} , { -22, 110} , { -21, 106} , { -18, 103} , { -21, 107} , { -23, 108} , { -26, 112} , { -10,  96} , { -12,  95} , {  -5,  91} },
+    {  CTX_UNUSED , {  -9,  93} , { -22,  94} , {  -5,  86} , {   9,  67} , {  -4,  80} , { -10,  85} , {  -1,  70} , {   7,  60} , {   9,  58} , {   5,  61} , {  12,  50} , {  15,  50} , {  18,  49} , {  17,  54} },
+    {  {  -5,  85} , {  -6,  81} , { -10,  77} , {  -7,  81} , { -17,  80} , { -18,  73} , {  -4,  74} , { -10,  83} , {  -9,  71} , {  -9,  67} , {  -1,  61} , {  -8,  66} , { -14,  66} , {   0,  59} , {   2,  59} },
+//    { {  -4,  60} , {  -3,  49} , {  -2,  50} , {  -4,  49} , {  -5,  48} , {  -2,  46} , {  -7,  54} , {  -1,  45} , {  -4,  49} , {   4,  39} , {   0,  42} , {   2,  43} , {   0,  44} , {   5,  32} , {  15,  30}  },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  10,  41} , {   7,  46} , {  -1,  51} , {   7,  49} , {   8,  52} , {   9,  41} , {   6,  47} , {   2,  55} , {  13,  41} , {  10,  44} , {   6,  50} , {   5,  53} , {  13,  49} , {   4,  63} , {   6,  64} },
+    { {  -2,  69} , {  -2,  59} , {   6,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {  10,  44} , {   9,  31} , {  12,  43} , {   3,  53} , {  14,  34} , {  10,  38} , {  -3,  52} , {  13,  40} , {  17,  32} , {   7,  44} , {   7,  38} , {  13,  50} , {  10,  57} , {  26,  43} }
+  },
+  //----- model 1 -----
+  {
+    { { -21, 126} , { -23, 124} , { -20, 110} , { -26, 126} , { -25, 124} , { -17, 105} , { -27, 121} , { -27, 117} , { -17, 102} , { -26, 117} , { -27, 116} , { -33, 122} , { -10,  95} , { -14, 100} , {  -8,  95} },
+    {  CTX_UNUSED , { -17, 111} , { -28, 114} , {  -6,  89} , {  -2,  80} , {  -4,  82} , {  -9,  85} , {  -8,  81} , {  -1,  72} , {   5,  64} , {   1,  67} , {   9,  56} , {   0,  69} , {   1,  69} , {   7,  69} },
+    {  {  -3,  81} , {  -3,  76} , {  -7,  72} , {  -6,  78} , { -12,  72} , { -14,  68} , {  -3,  70} , {  -6,  76} , {  -5,  66} , {  -5,  62} , {   0,  57} , {  -4,  61} , {  -9,  60} , {   1,  54} , {   2,  58} },
+//    { {  -4,  60} , {  -3,  49} , {  -2,  50} , {  -4,  49} , {  -5,  48} , {  -2,  46} , {  -7,  54} , {  -1,  45} , {  -4,  49} , {   4,  39} , {   0,  42} , {   2,  43} , {   0,  44} , {   5,  32} , {  15,  30}  },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  -7,  69} , {  -6,  67} , { -16,  77} , {  -2,  64} , {   2,  61} , {  -6,  67} , {  -3,  64} , {   2,  57} , {  -3,  65} , {  -3,  66} , {   0,  62} , {   9,  51} , {  -1,  66} , {  -2,  71} , {  -2,  75} },
+    { {  -1,  70} , {  -9,  72} , {  14,  60} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {  16,  37} , {   0,  47} , {  18,  35} , {  11,  37} , {  12,  41} , {  10,  41} , {   2,  48} , {  12,  41} , {  13,  41} , {   0,  59} , {   3,  50} , {  19,  40} , {   3,  66} , {  18,  50} }
+  },
+  //----- model 2 -----
+  {
+    { { -22, 127} , { -25, 127} , { -25, 120} , { -27, 127} , { -19, 114} , { -23, 117} , { -25, 118} , { -26, 117} , { -24, 113} , { -28, 118} , { -31, 120} , { -37, 124} , { -10,  94} , { -15, 102} , { -10,  99} },
+    {  CTX_UNUSED , { -13, 106} , { -50, 127} , {  -5,  92} , {  17,  57} , {  -5,  86} , { -13,  94} , { -12,  91} , {  -2,  77} , {   0,  71} , {  -1,  73} , {   4,  64} , {  -7,  81} , {   5,  64} , {  15,  57} },
+    {  {  -3,  78} , {  -8,  74} , {  -9,  72} , { -10,  72} , { -18,  75} , { -12,  71} , { -11,  63} , {  -5,  70} , { -17,  75} , { -14,  72} , { -16,  67} , {  -8,  53} , { -14,  59} , {  -9,  52} , { -11,  68} },
+//    { {  -4,  60} , {  -3,  49} , {  -2,  50} , {  -4,  49} , {  -5,  48} , {  -2,  46} , {  -7,  54} , {  -1,  45} , {  -4,  49} , {   4,  39} , {   0,  42} , {   2,  43} , {   0,  44} , {   5,  32} , {  15,  30}  },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {   1,  67} , {   0,  68} , { -10,  67} , {   1,  68} , {   0,  77} , {   2,  64} , {   0,  68} , {  -5,  78} , {   7,  55} , {   5,  59} , {   2,  65} , {  14,  54} , {  15,  44} , {   5,  60} , {   2,  70} },
+    { {  -2,  76} , { -18,  86} , {  12,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {   5,  64} , { -12,  70} , {  11,  55} , {   5,  56} , {   0,  69} , {   2,  65} , {  -6,  74} , {   5,  54} , {   7,  54} , {  -6,  76} , { -11,  82} , {  -2,  77} , {  -2,  77} , {  25,  42} }
+  }
+};
+
+
+
+
+
+static const int INIT_FLD_LAST_I[1][8][15][2] =
+{
+  //----- model 0 -----
+  {
+    { {  15,   6} , {   6,  19} , {   7,  16} , {  12,  14} , {  18,  13} , {  13,  11} , {  13,  15} , {  15,  16} , {  12,  23} , {  13,  23} , {  15,  20} , {  14,  26} , {  14,  44} , {  17,  40} , {  17,  47} },
+    {  CTX_UNUSED , {  24,  17} , {  21,  21} , {  25,  22} , {  31,  27} , {  22,  29} , {  19,  35} , {  14,  50} , {  10,  57} , {   7,  63} , {  -2,  77} , {  -4,  82} , {  -3,  94} , {   9,  69} , { -12, 109} },
+    {  {  21, -10} , {  24, -11} , {  28,  -8} , {  28,  -1} , {  29,   3} , {  29,   9} , {  35,  20} , {  29,  36} , {  14,  67} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+//    { {  12,  33} , {   5,  38} , {   9,  34} , {  18,  22} , {  19,  22} , {  23,  19} , {  26,  16} , {  14,  44} , {  40,  14} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  36, -35} , {  36, -34} , {  32, -26} , {  37, -30} , {  44, -32} , {  34, -18} , {  34, -15} , {  40, -15} , {  33,  -7} , {  35,  -5} , {  33,   0} , {  38,   2} , {  33,  13} , {  23,  35} , {  13,  58} },
+    { {  29,  -3} , {  26,   0} , {  22,  30} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {  31,  -7} , {  35, -15} , {  34,  -3} , {  34,   3} , {  36,  -1} , {  34,   5} , {  32,  11} , {  35,   5} , {  34,  12} , {  39,  11} , {  30,  29} , {  34,  26} , {  29,  39} , {  19,  66} }
+  }
+};
+static const int INIT_FLD_LAST_P[3][8][15][2] =
+{
+  //----- model 0 -----
+  {
+    { {  14,  11} , {  11,  14} , {   9,  11} , {  18,  11} , {  21,   9} , {  23,  -2} , {  32, -15} , {  32, -15} , {  34, -21} , {  39, -23} , {  42, -33} , {  41, -31} , {  46, -28} , {  38, -12} , {  21,  29} },
+    {  CTX_UNUSED , {  45, -24} , {  53, -45} , {  48, -26} , {  65, -43} , {  43, -19} , {  39, -10} , {  30,   9} , {  18,  26} , {  20,  27} , {   0,  57} , { -14,  82} , {  -5,  75} , { -19,  97} , { -35, 125} },
+    {  {  21, -13} , {  33, -14} , {  39,  -7} , {  46,  -2} , {  51,   2} , {  60,   6} , {  61,  17} , {  55,  34} , {  42,  62} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+//    { {  17,  27} , {  23,  13} , {  24,  16} , {  22,  25} , {  23,  27} , {  23,  32} , {  17,  43} , {  17,  49} , {   2,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  27,   0} , {  28,   0} , {  31,  -4} , {  27,   6} , {  34,   8} , {  30,  10} , {  24,  22} , {  33,  19} , {  22,  32} , {  26,  31} , {  21,  41} , {  26,  44} , {  23,  47} , {  16,  65} , {  14,  71} },
+    { {   8,  60} , {   6,  63} , {  17,  65} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {  21,  24} , {  23,  20} , {  26,  23} , {  27,  32} , {  28,  23} , {  28,  24} , {  23,  40} , {  24,  32} , {  28,  29} , {  23,  42} , {  19,  57} , {  22,  53} , {  22,  61} , {  11,  86} }
+  },
+  //----- model 1 -----
+  {
+    { {  19,  -6} , {  18,  -6} , {  14,   0} , {  26, -12} , {  31, -16} , {  33, -25} , {  33, -22} , {  37, -28} , {  39, -30} , {  42, -30} , {  47, -42} , {  45, -36} , {  49, -34} , {  41, -17} , {  32,   9} },
+    {  CTX_UNUSED , {  69, -71} , {  63, -63} , {  66, -64} , {  77, -74} , {  54, -39} , {  52, -35} , {  41, -10} , {  36,   0} , {  40,  -1} , {  30,  14} , {  28,  26} , {  23,  37} , {  12,  55} , {  11,  65} },
+    {  {  17, -10} , {  32, -13} , {  42,  -9} , {  49,  -5} , {  53,   0} , {  64,   3} , {  68,  10} , {  66,  27} , {  47,  57} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+//    { {  17,  27} , {  23,  13} , {  24,  16} , {  22,  25} , {  23,  27} , {  23,  32} , {  17,  43} , {  17,  49} , {   2,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  37, -33} , {  39, -36} , {  40, -37} , {  38, -30} , {  46, -33} , {  42, -30} , {  40, -24} , {  49, -29} , {  38, -12} , {  40, -10} , {  38,  -3} , {  46,  -5} , {  31,  20} , {  29,  30} , {  25,  44} },
+    { {  12,  48} , {  11,  49} , {  26,  45} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {  22,  22} , {  23,  22} , {  27,  21} , {  33,  20} , {  26,  28} , {  30,  24} , {  27,  34} , {  18,  42} , {  25,  39} , {  18,  50} , {  12,  70} , {  21,  54} , {  14,  71} , {  11,  83} }
+  },
+  //----- model 2 -----
+  {
+    { {  17, -13} , {  16,  -9} , {  17, -12} , {  27, -21} , {  37, -30} , {  41, -40} , {  42, -41} , {  48, -47} , {  39, -32} , {  46, -40} , {  52, -51} , {  46, -41} , {  52, -39} , {  43, -19} , {  32,  11} },
+    {  CTX_UNUSED , {  61, -55} , {  56, -46} , {  62, -50} , {  81, -67} , {  45, -20} , {  35,  -2} , {  28,  15} , {  34,   1} , {  39,   1} , {  30,  17} , {  20,  38} , {  18,  45} , {  15,  54} , {   0,  79} },
+    {  {   9,  -2} , {  30, -10} , {  31,  -4} , {  33,  -1} , {  33,   7} , {  31,  12} , {  37,  23} , {  31,  38} , {  20,  64} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+//    { {  17,  27} , {  23,  13} , {  24,  16} , {  22,  25} , {  23,  27} , {  23,  32} , {  17,  43} , {  17,  49} , {   2,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    { {  36, -16} , {  37, -14} , {  37, -17} , {  32,   1} , {  34,  15} , {  29,  15} , {  24,  25} , {  34,  22} , {  31,  16} , {  35,  18} , {  31,  28} , {  33,  41} , {  36,  28} , {  27,  47} , {  21,  62} },
+    { {  18,  31} , {  19,  26} , {  36,  24} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+    {  CTX_UNUSED , {  24,  23} , {  27,  16} , {  24,  30} , {  31,  29} , {  22,  41} , {  22,  42} , {  16,  60} , {  15,  52} , {  14,  60} , {   3,  78} , { -16, 123} , {  21,  53} , {  22,  56} , {  25,  61} }
+  }
+};
+
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/decoder.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/decoder.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/decoder.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,647 @@
+
+/*!
+ *************************************************************************************
+ * \file decoder.c
+ *
+ * \brief
+ *    Contains functions that implement the "decoders in the encoder" concept for the
+ *    rate-distortion optimization with losses.
+ * \date
+ *    October 22nd, 2001
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and
+ *    affiliation details)
+ *    - Dimitrios Kontopodis                    <dkonto at eikon.tum.de>
+ *************************************************************************************
+ */
+
+#include <stdlib.h>
+#include <memory.h>
+
+#include "global.h"
+#include "refbuf.h"
+#include "image.h"
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    decodes one 8x8 partition
+ *
+ * \note
+ *    Gives the expected value in the decoder of one 8x8 block. This is done based on the
+ *    stored reconstructed residue decs->resY[][], the reconstructed values imgY[][]
+ *    and the motion vectors. The decoded 8x8 block is moved to decs->decY[][].
+ *************************************************************************************
+ */
+void decode_one_b8block (int decoder, int mbmode, int b8block, int b8mode, int b8ref)
+{
+  int i,j,block_y,block_x,bx,by;
+  int ref_inx = (IMG_NUMBER-1)%img->num_ref_frames;
+
+  int mv[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE];
+  int resY_tmp[MB_BLOCK_SIZE][MB_BLOCK_SIZE];
+
+  int i0 = (b8block%2)<<3,   i1 = i0+8,   bx0 = i0>>2,   bx1 = bx0+2;
+  int j0 = (b8block/2)<<3,   j1 = j0+8,   by0 = j0>>2,   by1 = by0+2;
+
+  if (img->type==I_SLICE)
+  {
+    for(i=i0;i<i1;i++)
+    for(j=j0;j<j1;j++)
+    {
+      decs->decY[decoder][img->pix_y+j][img->pix_x+i]=enc_picture->imgY[img->pix_y+j][img->pix_x+i];
+    }
+  }
+  else
+  {
+    if (mbmode==0 && (img->type==P_SLICE || (img->type==B_SLICE && img->nal_reference_idc>0)))
+    {
+      for(i=i0;i<i1;i++)
+      for(j=j0;j<j1;j++)
+      {
+        resY_tmp[j][i]=0;
+      }
+      for (by=by0; by<by1; by++)
+      for (bx=bx0; bx<bx1; bx++)
+      {
+        mv[0][by][bx] = mv[1][by][bx] = 0;
+      }
+    }
+    else
+    {
+      if (b8mode>=1 && b8mode<=7)
+      {
+        for (by=by0; by<by1; by++)
+        for (bx=bx0; bx<bx1; bx++)
+        {
+          mv[0][by][bx] = img->all_mv[by][bx][LIST_0][b8ref][b8mode][0];
+          mv[1][by][bx] = img->all_mv[by][bx][LIST_0][b8ref][b8mode][1];
+        }
+      }
+      else
+      {
+        for (by=by0; by<by1; by++)
+        for (bx=bx0; bx<bx1; bx++)
+        {
+          mv[0][by][bx] = mv[1][by][bx] = 0;
+        }
+      }
+
+      for(i=i0;i<i1;i++)
+      for(j=j0;j<j1;j++)
+      {
+        resY_tmp[j][i]=decs->resY[j][i];
+      }
+    }
+
+    // Decode Luminance
+    if ((b8mode>=1 && b8mode<=7) || (mbmode==0 && (img->type==P_SLICE || (img->type==B_SLICE && img->nal_reference_idc>0))))
+    {
+      for (by=by0; by<by1; by++)
+      for (bx=bx0; bx<bx1; bx++)
+      {
+        block_x = img->block_x+bx;
+        block_y = img->block_y+by;
+        if (img->type == B_SLICE && enc_picture != enc_frame_picture)
+          ref_inx = (IMG_NUMBER-b8ref-2)%img->num_ref_frames;
+
+        Get_Reference_Block (decs->decref[decoder][ref_inx],
+                             block_y, block_x,
+                             mv[0][by][bx],
+                             mv[1][by][bx],
+                             decs->RefBlock);
+        for (j=0; j<4; j++)
+        for (i=0; i<4; i++)
+        {
+          decs->decY[decoder][block_y*4+j][block_x*4+i] = resY_tmp[by*4+j][bx*4+i] + decs->RefBlock[j][i];
+        }
+      }
+    }
+    else
+    {
+      // Intra Refresh - Assume no spatial prediction
+      for(i=i0;i<i1;i++)
+      for(j=j0;j<j1;j++)
+      {
+        decs->decY[decoder][img->pix_y+j][img->pix_x+i] = enc_picture->imgY[img->pix_y+j][img->pix_x+i];
+      }
+    }
+  }
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    decodes one macroblock
+ *************************************************************************************
+ */
+void decode_one_mb (int decoder, Macroblock* currMB)
+{
+  decode_one_b8block (decoder, currMB->mb_type, 0, currMB->b8mode[0], enc_picture->ref_idx[LIST_0][img->block_y+0][img->block_x+0]);//refFrArr[img->block_y+0][img->block_x+0]);
+  decode_one_b8block (decoder, currMB->mb_type, 1, currMB->b8mode[1], enc_picture->ref_idx[LIST_0][img->block_y+0][img->block_x+2]);//refFrArr[img->block_y+0][img->block_x+2]);
+  decode_one_b8block (decoder, currMB->mb_type, 2, currMB->b8mode[2], enc_picture->ref_idx[LIST_0][img->block_y+2][img->block_x+0]);//refFrArr[img->block_y+2][img->block_x+0]);
+  decode_one_b8block (decoder, currMB->mb_type, 3, currMB->b8mode[3], enc_picture->ref_idx[LIST_0][img->block_y+2][img->block_x+2]);//refFrArr[img->block_y+2][img->block_x+2]);
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Finds the reference MB given the decoded reference frame
+ * \note
+ *    This is based on the function UnifiedOneForthPix, only it is modified to
+ *    be used at the "many decoders in the encoder" RD optimization. In this case
+ *    we dont want to keep full upsampled reference frames for all decoders, so
+ *    we just upsample when it is necessary.
+ * \param imY
+ *    The frame to be upsampled
+ * \param block_y
+ *    The row of the block, whose prediction we want to find
+ * \param block_x
+ *    The column of the block, whose prediction we want to track
+ * \param mvhor
+ *    Motion vector, horizontal part
+ * \param mvver
+ *    Motion vector, vertical part
+ * \param out
+ *    Output: The prediction for the block (block_y, block_x)
+ *************************************************************************************
+ */
+void Get_Reference_Block(imgpel **imY,
+                         int block_y,
+                         int block_x,
+                         int mvhor,
+                         int mvver,
+                         imgpel **out)
+{
+  int i,j,y,x;
+
+  y = block_y * BLOCK_SIZE * 4 + mvver;
+  x = block_x * BLOCK_SIZE * 4 + mvhor;
+
+  for (j=0; j<BLOCK_SIZE; j++)
+    for (i=0; i<BLOCK_SIZE; i++)
+      out[j][i] = Get_Reference_Pixel(imY, y+j*4, x+i*4);
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Finds a pixel (y,x) of the upsampled reference frame
+ * \note
+ *    This is based on the function UnifiedOneForthPix, only it is modified to
+ *    be used at the "many decoders in the encoder" RD optimization. In this case
+ *    we dont want to keep full upsampled reference frames for all decoders, so
+ *    we just upsample when it is necessary.
+ *************************************************************************************
+ */
+byte Get_Reference_Pixel(imgpel **imY, int y_pos, int x_pos)
+{
+
+  int dx, x;
+  int dy, y;
+  int maxold_x,maxold_y;
+
+  int result = 0, result1, result2;
+  int pres_x;
+  int pres_y;
+
+  int tmp_res[6];
+
+  static const int COEF[6] = {
+    1, -5, 20, 20, -5, 1
+  };
+
+
+  dx = x_pos&3;
+  dy = y_pos&3;
+  x_pos = (x_pos-dx)/4;
+  y_pos = (y_pos-dy)/4;
+  maxold_x = img->width-1;
+  maxold_y = img->height-1;
+
+  if (dx == 0 && dy == 0) { /* fullpel position */
+    result = imY[iClip3(0,maxold_y,y_pos)][iClip3(0,maxold_x,x_pos)];
+  }
+  else { /* other positions */
+
+    if (dy == 0) {
+
+      pres_y = iClip3(0,maxold_y,y_pos);
+      for(x=-2;x<4;x++) {
+        pres_x = iClip3(0,maxold_x,x_pos+x);
+        result += imY[pres_y][pres_x]*COEF[x+2];
+      }
+
+      result = iClip3(0, img->max_imgpel_value, (result+16)/32);
+
+      if (dx == 1) {
+        result = (result + imY[pres_y][iClip3(0,maxold_x,x_pos)])/2;
+      }
+      else if (dx == 3) {
+        result = (result + imY[pres_y][iClip3(0,maxold_x,x_pos+1)])/2;
+      }
+    }
+    else if (dx == 0) {
+
+      pres_x = iClip3(0,maxold_x,x_pos);
+      for(y=-2;y<4;y++) {
+        pres_y = iClip3(0,maxold_y,y_pos+y);
+        result += imY[pres_y][pres_x]*COEF[y+2];
+      }
+
+      result = iClip3(0, img->max_imgpel_value, (result+16)/32);
+
+      if (dy == 1) {
+        result = (result + imY[iClip3(0,maxold_y,y_pos)][pres_x])/2;
+      }
+      else if (dy == 3) {
+        result = (result + imY[iClip3(0,maxold_y,y_pos+1)][pres_x])/2;
+      }
+    }
+    else if (dx == 2) {
+
+      for(y=-2;y<4;y++) {
+        result = 0;
+        pres_y = iClip3(0,maxold_y,y_pos+y);
+        for(x=-2;x<4;x++) {
+          pres_x = iClip3(0,maxold_x,x_pos+x);
+          result += imY[pres_y][pres_x]*COEF[x+2];
+        }
+        tmp_res[y+2] = result;
+      }
+
+      result = 0;
+      for(y=-2;y<4;y++) {
+        result += tmp_res[y+2]*COEF[y+2];
+      }
+
+      result = iClip3(0, img->max_imgpel_value, (result+512)/1024);
+
+      if (dy == 1) {
+        result = (result + iClip3(0, img->max_imgpel_value, (tmp_res[2]+16)/32))/2;
+      }
+      else if (dy == 3) {
+        result = (result + iClip3(0, img->max_imgpel_value, (tmp_res[3]+16)/32))/2;
+      }
+    }
+    else if (dy == 2) {
+
+      for(x=-2;x<4;x++) {
+        result = 0;
+        pres_x = iClip3(0,maxold_x,x_pos+x);
+        for(y=-2;y<4;y++) {
+          pres_y = iClip3(0,maxold_y,y_pos+y);
+          result += imY[pres_y][pres_x]*COEF[y+2];
+        }
+        tmp_res[x+2] = result;
+      }
+
+      result = 0;
+      for(x=-2;x<4;x++) {
+        result += tmp_res[x+2]*COEF[x+2];
+      }
+
+      result = iClip3(0, img->max_imgpel_value, (result+512)/1024);
+
+      if (dx == 1) {
+        result = (result + iClip3(0, img->max_imgpel_value, (tmp_res[2]+16)/32))/2;
+      }
+      else {
+        result = (result + iClip3(0, img->max_imgpel_value, (tmp_res[3]+16)/32))/2;
+      }
+    }
+    else {
+
+      result = 0;
+      pres_y = dy == 1 ? y_pos : y_pos+1;
+      pres_y = iClip3(0,maxold_y,pres_y);
+
+      for(x=-2;x<4;x++) {
+        pres_x = iClip3(0,maxold_x,x_pos+x);
+        result += imY[pres_y][pres_x]*COEF[x+2];
+      }
+
+      result1 = iClip3(0, img->max_imgpel_value, (result+16)/32);
+
+      result = 0;
+      pres_x = dx == 1 ? x_pos : x_pos+1;
+      pres_x = iClip3(0,maxold_x,pres_x);
+
+      for(y=-2;y<4;y++) {
+        pres_y = iClip3(0,maxold_y,y_pos+y);
+        result += imY[pres_y][pres_x]*COEF[y+2];
+      }
+
+      result2 = iClip3(0, img->max_imgpel_value, (result+16)/32);
+      result = (result1+result2)/2;
+    }
+  }
+
+  return result;
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Performs the simulation of the packet losses, calls the error concealment funcs
+ *    and copies the decoded images to the reference frame buffers of the decoders
+ *
+ *************************************************************************************
+ */
+void UpdateDecoders()
+{
+  int k;
+  for (k=0; k<input->NoOfDecoders; k++)
+  {
+    Build_Status_Map(decs->status_map); // simulates the packet losses
+    Error_Concealment(decs->decY_best[k], decs->status_map, decs->decref[k]); // for the moment error concealment is just a "copy"
+    // Move decoded frames to reference buffers: (at the decoders this is done
+    // without interpolation (upsampling) - upsampling is done while decoding
+    DecOneForthPix(decs->decY_best[k], decs->decref[k]);
+  }
+}
+/*!
+ *************************************************************************************
+ * \brief
+ *    Copies one (reconstructed) image to the respective reference frame buffer
+ *
+ * \note
+ *    This is used at the "many decoders in the encoder"
+ * \param dY
+ *    The reconstructed image
+ * \param dref
+ *    The reference buffer
+ *************************************************************************************
+ */
+void DecOneForthPix(imgpel **dY, imgpel ***dref)
+{
+  int j, ref=IMG_NUMBER%img->buf_cycle;
+
+  for (j=0; j<img->height; j++)
+    memcpy(dref[ref][j], dY[j], img->width*sizeof(imgpel));
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Gives the prediction residue for a 8x8 block
+ *************************************************************************************
+ */
+void compute_residue_b8block (int b8block, int i16mode) // if not INTRA16x16 it has to be -1
+{
+  int i,j;
+  int i0 = (b8block%2)<<3,   i1 = i0+8;
+  int j0 = (b8block/2)<<3,   j1 = j0+8;
+
+  if (i16mode>=0)
+  {
+    for (i=i0; i<i1; i++)
+    for (j=j0; j<j1; j++)
+    {
+      decs->resY[j][i] = enc_picture->imgY[img->pix_y+j][img->pix_x+i] - img->mprr_2[i16mode][j][i];
+    }
+  }
+  else
+  {
+    for (i=i0; i<i1; i++)
+    for (j=j0; j<j1; j++)
+    {
+      decs->resY[j][i] = enc_picture->imgY[img->pix_y+j][img->pix_x+i] - img->mpr[j][i];
+    }
+  }
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Gives the prediction residue for a macroblock
+ *************************************************************************************
+ */
+void compute_residue_mb (int i16mode)
+{
+  compute_residue_b8block (0, i16mode);
+  compute_residue_b8block (1, i16mode);
+  compute_residue_b8block (2, i16mode);
+  compute_residue_b8block (3, i16mode);
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Builds a random status map showing whether each MB is received or lost, based
+ *    on the packet loss rate and the slice structure.
+ *
+ * \param s_map
+ *    The status map to be filled
+ *************************************************************************************
+ */
+void Build_Status_Map(byte **s_map)
+{
+  int i,j,slice=-1,mb=0,jj,ii,packet_lost=0;
+
+  jj = img->height/MB_BLOCK_SIZE;
+  ii = img->width/MB_BLOCK_SIZE;
+
+  for (j=0 ; j<jj; j++)
+  for (i=0 ; i<ii; i++)
+  {
+    if (!input->slice_mode || img->mb_data[mb].slice_nr != slice) /* new slice */
+    {
+      packet_lost=0;
+      if ((double)rand()/(double)RAND_MAX*100 < input->LossRateC)   packet_lost += 3;
+      if ((double)rand()/(double)RAND_MAX*100 < input->LossRateB)   packet_lost += 2;
+      if ((double)rand()/(double)RAND_MAX*100 < input->LossRateA)   packet_lost  = 1;
+      slice++;
+    }
+    if (!packet_lost)
+    {
+      s_map[j][i]=0;  //! Packet OK
+    }
+    else
+    {
+      s_map[j][i]=packet_lost;
+      if(input->partition_mode == 0)  s_map[j][i]=1;
+    }
+    mb++;
+  }
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Performs some sort of error concealment for the areas that are lost according
+ *    to the status_map
+ *
+ * \param inY
+ *    Error concealment is performed on this frame imY[][]
+ * \param s_map
+ *    The status map shows which areas are lost.
+ * \param refY
+ *    The set of reference frames - may be used for the error concealment.
+ *************************************************************************************
+ */
+void Error_Concealment(imgpel **inY, byte **s_map, imgpel ***refY)
+{
+  int mb_y, mb_x, mb_h, mb_w;
+  mb_h = img->height/MB_BLOCK_SIZE;
+  mb_w = img->width/MB_BLOCK_SIZE;
+
+  for (mb_y=0; mb_y < mb_h; mb_y++)
+  for (mb_x=0; mb_x < mb_w; mb_x++)
+  {
+    if (s_map[mb_y][mb_x])   Conceal_Error(inY, mb_y, mb_x, refY, s_map);
+  }
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Copies a certain MB (mb_y,mb_x) of the frame inY[][] from the previous frame.
+ *    For the time there is no better EC...
+ *************************************************************************************
+ */
+void Conceal_Error(imgpel **inY, int mb_y, int mb_x, imgpel ***refY, byte **s_map)
+{
+  int i,j,block_x, block_y;
+  int ref_inx = (IMG_NUMBER-1)%img->num_ref_frames;
+  int pos_y = mb_y*MB_BLOCK_SIZE, pos_x = mb_x*MB_BLOCK_SIZE;
+  int mv[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE];
+  int resY[MB_BLOCK_SIZE][MB_BLOCK_SIZE];
+  int copy  = (decs->dec_mb_mode[mb_y][mb_x]==0 && (img->type==P_SLICE || (img->type==B_SLICE && img->nal_reference_idc>0)));
+  int inter = (((decs->dec_mb_mode[mb_y][mb_x]>=1 && decs->dec_mb_mode[mb_y][mb_x]<=3) || decs->dec_mb_mode[mb_y][mb_x]==P8x8) && (img->type==P_SLICE || (img->type==B_SLICE && img->nal_reference_idc>0)));
+  short ***tmp_mv = enc_picture->mv[LIST_0];
+
+  switch(s_map[mb_y][mb_x])
+  {
+  case 1: //! whole slice lost (at least partition A lost)
+    if (img->type!=I_SLICE)
+    {
+      for (j=0;j<MB_BLOCK_SIZE;j++)
+        for (i=0;i<MB_BLOCK_SIZE;i++)
+          inY[pos_y+j][pos_x+i] = refY[ref_inx][pos_y+j][pos_x+i];
+    }
+    else
+    {
+      for (j=0;j<MB_BLOCK_SIZE;j++)
+        for (i=0;i<MB_BLOCK_SIZE;i++)
+          inY[pos_y+j][pos_x+i] = 127;
+    }
+    break;
+  case 5: //! partition B and partition C lost
+
+    //! Copy motion vectors
+    for (block_y=0; block_y<BLOCK_MULTIPLE; block_y++)
+      for (block_x=0; block_x<BLOCK_MULTIPLE; block_x++)
+        for (i=0;i<2;i++)
+          mv[i][block_y][block_x]=tmp_mv[mb_y*BLOCK_SIZE+block_y][mb_x*BLOCK_SIZE+block_x+4][i];
+
+    //! Residue ist set to zero
+    for(i=0;i<MB_BLOCK_SIZE;i++)
+      for(j=0;j<MB_BLOCK_SIZE;j++)
+        resY[j][i]=0;
+
+    //! not first frame
+    if (img->type!=I_SLICE)
+    {
+      //! if copy mb
+      if (copy)
+      {
+        for (j=0;j<MB_BLOCK_SIZE;j++)
+          for (i=0;i<MB_BLOCK_SIZE;i++)
+            inY[pos_y+j][pos_x+i] = refY[ref_inx][pos_y+j][pos_x+i];
+      }
+      //! if inter mb
+      else if (inter)
+      {
+        for (block_y = mb_y*BLOCK_SIZE ; block_y < (mb_y*BLOCK_SIZE + BLOCK_MULTIPLE) ; block_y++)
+          for (block_x = mb_x*BLOCK_SIZE ; block_x < (mb_x*BLOCK_SIZE + BLOCK_MULTIPLE) ; block_x++)
+          {
+            Get_Reference_Block(refY[ref_inx],
+                                block_y, block_x,
+                                mv[0][block_y - mb_y*BLOCK_SIZE][block_x - mb_x*BLOCK_SIZE],
+                                mv[1][block_y - mb_y*BLOCK_SIZE][block_x - mb_x*BLOCK_SIZE],
+                                decs->RefBlock);
+            for (j=0;j<BLOCK_SIZE;j++)
+              for (i=0;i<BLOCK_SIZE;i++)
+              {
+                inY[block_y*BLOCK_SIZE + j][block_x*BLOCK_SIZE + i] = decs->RefBlock[j][i];
+              }
+          }
+      }
+      else //intra; up to now only copy mb, may integrate nokia EC
+      {
+        for (j=0;j<MB_BLOCK_SIZE;j++)
+          for (i=0;i<MB_BLOCK_SIZE;i++)
+            inY[pos_y+j][pos_x+i] = refY[ref_inx][pos_y+j][pos_x+i];
+      }
+    }
+    else //! first frame; up to now set value to grey, may integrate nokia EC
+    {
+      for (j=0;j<MB_BLOCK_SIZE;j++)
+        for (i=0;i<MB_BLOCK_SIZE;i++)
+          inY[pos_y+j][pos_x+i] = 127;
+    }
+    break;
+  case 3: //! Partition C lost
+    if(img->type!=I_SLICE)
+    {
+      //! Copy motion vectors
+      for (block_y=0; block_y<BLOCK_MULTIPLE; block_y++)
+        for (block_x=0; block_x<BLOCK_MULTIPLE; block_x++)
+          for (i=0;i<2;i++)
+            mv[i][block_y][block_x]=tmp_mv[mb_y*BLOCK_SIZE+block_y][mb_x*BLOCK_SIZE+block_x+4][i];
+
+      //! Residue ist set to zero
+      for(i=0;i<MB_BLOCK_SIZE;i++)
+        for(j=0;j<MB_BLOCK_SIZE;j++)
+          resY[j][i]=0;
+
+      //! if copy mb
+      if (copy)
+      {
+        for (j=0;j<MB_BLOCK_SIZE;j++)
+          for (i=0;i<MB_BLOCK_SIZE;i++)
+            inY[pos_y+j][pos_x+i] = refY[ref_inx][pos_y+j][pos_x+i];
+      }
+      //! if inter mb
+      else if (inter)
+      {
+        for (block_y = mb_y*BLOCK_SIZE ; block_y < (mb_y*BLOCK_SIZE + BLOCK_MULTIPLE) ; block_y++)
+          for (block_x = mb_x*BLOCK_SIZE ; block_x < (mb_x*BLOCK_SIZE + BLOCK_MULTIPLE) ; block_x++)
+            {
+              Get_Reference_Block(refY[ref_inx],
+                                  block_y, block_x,
+                                  mv[0][block_y - mb_y*BLOCK_SIZE][block_x - mb_x*BLOCK_SIZE],
+                                  mv[1][block_y - mb_y*BLOCK_SIZE][block_x - mb_x*BLOCK_SIZE],
+                                  decs->RefBlock);
+              for (j=0;j<BLOCK_SIZE;j++)
+                for (i=0;i<BLOCK_SIZE;i++)
+                {
+                  inY[block_y*BLOCK_SIZE + j][block_x*BLOCK_SIZE + i] = decs->RefBlock[j][i];
+                }
+            }
+      }
+    }
+    break;
+  case 2: //! Partition B lost
+    if(img->type!=I_SLICE)
+    {
+      if(!inter)
+      {
+        for (j=0;j<MB_BLOCK_SIZE;j++)
+          for (i=0;i<MB_BLOCK_SIZE;i++)
+            inY[pos_y+j][pos_x+i] = refY[ref_inx][pos_y+j][pos_x+i];
+      }
+    }
+    else //! first frame; up to now set value to grey, may integrate nokia EC
+    {
+      for (j=0;j<MB_BLOCK_SIZE;j++)
+        for (i=0;i<MB_BLOCK_SIZE;i++)
+          inY[pos_y+j][pos_x+i] = 127;
+    }
+    break;
+  } //! End Switch
+}


Index: llvm-test/MultiSource/Applications/JM/lencod/defines.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/defines.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/defines.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,201 @@
+
+/*!
+ **************************************************************************
+ * \file defines.h
+ *
+ * \brief
+ *    Header file containing some useful global definitions
+ *
+ * \author
+ *    Detlev Marpe
+ *    Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved.
+ *
+ * \date
+ *    21. March 2001
+ **************************************************************************
+ */
+
+
+#ifndef _DEFINES_H_
+#define _DEFINES_H_
+
+#if defined _DEBUG
+#define TRACE           0                   //!< 0:Trace off 1:Trace on 2:detailed CABAC context information
+#else
+#define TRACE           0                   //!< 0:Trace off 1:Trace on 2:detailed CABAC context information
+#endif
+
+#define GET_METIME      1       //!< Enables or disables ME computation time
+#define DUMP_DPB        0       //!< Dump dbp for debug purposes
+typedef unsigned char byte;    //!< byte type definition
+
+#define RC_MAX_TEMPORAL_LEVELS   5
+
+//#define BEST_NZ_COEFF 1   // yuwen 2005.11.03 => for high complexity mode decision (CAVLC, #TotalCoeff)
+
+//FREXT Profile IDC definitions
+#define FREXT_HP        100      //!< YUV 4:2:0/8 "High"
+#define FREXT_Hi10P     110      //!< YUV 4:2:0/10 "High 10"
+#define FREXT_Hi422     122      //!< YUV 4:2:2/10 "High 4:2:2"
+#define FREXT_Hi444     144      //!< YUV 4:4:4/12 "High 4:4:4"
+
+#define ZEROSNR 1
+
+// CAVLC
+#define LUMA              0
+#define LUMA_INTRA16x16DC 1
+#define LUMA_INTRA16x16AC 2
+
+#define LEVEL_NUM      6
+#define TOTRUN_NUM    15
+#define RUNBEFORE_NUM  7
+
+#define CAVLC_LEVEL_LIMIT 2063
+
+//--- block types for CABAC
+#define LUMA_16DC       0
+#define LUMA_16AC       1
+#define LUMA_8x8        2
+#define LUMA_8x4        3
+#define LUMA_4x8        4
+#define LUMA_4x4        5
+#define CHROMA_DC       6
+#define CHROMA_AC       7
+#define CHROMA_DC_2x4   8
+#define CHROMA_DC_4x4   9
+#define NUM_BLOCK_TYPES 10
+
+#define _FULL_SEARCH_RANGE_
+#define _ADAPT_LAST_GROUP_
+#define _CHANGE_QP_
+#define _LEAKYBUCKET_
+
+// ---------------------------------------------------------------------------------
+// FLAGS and DEFINES for new chroma intra prediction, Dzung Hoang
+// Threshold values to zero out quantized transform coefficients.
+// Recommend that _CHROMA_COEFF_COST_ be low to improve chroma quality
+#define _LUMA_COEFF_COST_       4 //!< threshold for luma coeffs
+#define _CHROMA_COEFF_COST_     4 //!< threshold for chroma coeffs, used to be 7
+#define _LUMA_MB_COEFF_COST_    5 //!< threshold for luma coeffs of inter Macroblocks
+#define _LUMA_8x8_COEFF_COST_   5 //!< threshold for luma coeffs of 8x8 Inter Partition
+
+#define IMG_PAD_SIZE           20 //!< Number of pixels padded around the reference frame (>=4)
+#define IMG_PAD_SIZE_TIMES4    80 //!< Number of pixels padded around the reference frame in subpel units(>=16)
+
+#define MAX_VALUE       999999   //!< used for start value for some variables
+#define INVALIDINDEX  (-135792468)
+
+#define P8x8    8
+#define I4MB    9
+#define I16MB   10
+#define IBLOCK  11
+#define SI4MB   12
+#define I8MB    13
+#define IPCM    14
+#define MAXMODE 15
+
+
+#define  LAMBDA_ACCURACY_BITS         16
+#define  LAMBDA_FACTOR(lambda)        ((int)((double)(1<<LAMBDA_ACCURACY_BITS)*lambda+0.5))
+#define  WEIGHTED_COST(factor,bits)   (((factor)*(bits))>>LAMBDA_ACCURACY_BITS)
+#define  MV_COST(f,s,cx,cy,px,py)     (WEIGHTED_COST(f,mvbits[((cx)<<(s))-px]+mvbits[((cy)<<(s))-py]))
+#define  MV_COST_SMP(f,cx,cy,px,py)     (WEIGHTED_COST(f,mvbits[cx-px]+mvbits[cy-py]))
+#define  REF_COST(f,ref,list_offset) (WEIGHTED_COST(f,((listXsize[list_offset]<=1)? 0:refbits[(ref)])))
+
+#define IS_INTRA(MB)    ((MB)->mb_type==I4MB  || (MB)->mb_type==I16MB || (MB)->mb_type==I8MB || (MB)->mb_type==IPCM)
+#define IS_NEWINTRA(MB) ((MB)->mb_type==I16MB)
+#define IS_OLDINTRA(MB) ((MB)->mb_type==I4MB)
+#define IS_IPCM(MB)     ((MB)->mb_type==IPCM)
+
+#define IS_INTER(MB)    ((MB)->mb_type!=I4MB  && (MB)->mb_type!=I16MB && (MB)->mb_type!=I8MB)
+#define IS_INTERMV(MB)  ((MB)->mb_type!=I4MB  && (MB)->mb_type!=I16MB && (MB)->mb_type!=I8MB  && (MB)->mb_type!=0)
+#define IS_DIRECT(MB)   ((MB)->mb_type==0     && (img->type==B_SLICE))
+#define IS_COPY(MB)     ((MB)->mb_type==0     && (img->type==P_SLICE||img ->type==SP_SLICE))
+#define IS_P8x8(MB)     ((MB)->mb_type==P8x8)
+
+// Quantization parameter range
+
+#define MIN_QP          0
+#define MAX_QP          51
+#define SHIFT_QP        12
+
+// Direct Mode types
+#define DIR_TEMPORAL    0   //!< Temporal Direct Mode
+#define DIR_SPATIAL     1   //!< Spatial Direct Mode
+
+#define MAX_REFERENCE_PICTURES 32
+
+#define BLOCK_SHIFT     2
+#define BLOCK_SIZE      4
+#define BLOCK_SIZE8x8   8
+#define MB_BLOCK_SIZE   16
+#define MB_BLOCK_SHIFT  4
+
+// These variables relate to the subpel accuracy supported by the software (1/4)
+#define BLOCK_SIZE_SP      16  // BLOCK_SIZE << 2
+#define BLOCK_SIZE8x8_SP   32  // BLOCK_SIZE8x8 << 2
+
+// number of intra prediction modes
+#define NO_INTRA_PMODE  9
+
+// 4x4 intra prediction modes
+#define VERT_PRED             0
+#define HOR_PRED              1
+#define DC_PRED               2
+#define DIAG_DOWN_LEFT_PRED   3
+#define DIAG_DOWN_RIGHT_PRED  4
+#define VERT_RIGHT_PRED       5
+#define HOR_DOWN_PRED         6
+#define VERT_LEFT_PRED        7
+#define HOR_UP_PRED           8
+
+// 16x16 intra prediction modes
+#define VERT_PRED_16    0
+#define HOR_PRED_16     1
+#define DC_PRED_16      2
+#define PLANE_16        3
+
+// 8x8 chroma intra prediction modes
+#define DC_PRED_8       0
+#define HOR_PRED_8      1
+#define VERT_PRED_8     2
+#define PLANE_8         3
+
+#define INIT_FRAME_RATE 30
+#define EOS             1         //!< End Of Sequence
+
+
+#define MVPRED_MEDIAN   0
+#define MVPRED_L        1
+#define MVPRED_U        2
+#define MVPRED_UR       3
+
+#define BLOCK_MULTIPLE        4   //(MB_BLOCK_SIZE/BLOCK_SIZE)
+#define MB_BLOCK_PARTITIONS   16  //(BLOCK_MULTIPLE * BLOCK_MULTIPLE)
+#define MB_PIXELS             256 //(MB_BLOCK_SIZE * MB_BLOCK_SIZE)
+#define BLOCK_CONTEXT         64  //(4 * MB_BLOCK_PARTITIONS)
+
+#define MAX_SYMBOLS_PER_MB  1200  //!< Maximum number of different syntax elements for one MB
+                                  // CAVLC needs more symbols per MB
+
+
+#define MAX_PART_NR     3 /*!< Maximum number of different data partitions.
+                               Some reasonable number which should reflect
+                               what is currently defined in the SE2Partition map (elements.h) */
+
+//Start code and Emulation Prevention need this to be defined in identical manner at encoder and decoder
+#define ZEROBYTES_SHORTSTARTCODE 2 //indicates the number of zero bytes in the short start-code prefix
+
+#define Q_BITS          15
+#define DQ_BITS         6
+#define DQ_ROUND        (1<<(DQ_BITS-1))
+
+#define Q_BITS_8        16
+#define DQ_BITS_8       6
+#define DQ_ROUND_8      (1<<(DQ_BITS_8-1))
+
+// Context Adaptive Lagrange Multiplier (CALM)
+#define CALM_MF_FACTOR_THRESHOLD 512.0
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/elements.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/elements.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/elements.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,89 @@
+
+/*!
+ **************************************************************************
+ *  \file elements.h
+ *  \brief  Header file for elements in H.264 streams
+ *  \date 6.10.2000,
+ *  \version
+ *      1.1
+ *
+ * \note
+ *    Version 1.0 included three partition modes, no DP, 2 partitionsper slice
+ *      and 4 partitions per slice.  As per document VCEG-N72 this is changed
+ *      in version 1.1 to only two partition modes, one without DP and one with
+ *      3 partition per slice
+ *
+ *  \author Sebastian Purreiter     <sebastian.purreiter at mch.siemens.de>
+ *  \author Stephan Wenger          <stewe at cs.tu-berlin.de>
+ *
+ **************************************************************************
+ */
+
+#ifndef _ELEMENTS_H_
+#define _ELEMENTS_H_
+
+/*!
+ *  definition of H.264 syntax elements
+ *  order of elements follow dependencies for picture reconstruction
+ */
+
+#define MAXPARTITIONMODES 2 //!< maximum possible partition modes as defined in assignSE2partition[][]
+
+/*!
+ *  \brief  lookup-table to assign different elements to partition
+ *
+ *  \note here we defined up to 6 different partitions similar to
+ *      document Q15-k-18 described in the PROGFRAMEMODE.
+ *      The Sliceheader contains the PSYNC information. \par
+ *
+ *      Elements inside a partition are not ordered. They are
+ *      ordered by occurence in the stream.
+ *      Assumption: Only partitionlosses are considered. \par
+ *
+ *      The texture elements luminance and chrominance are
+ *      not ordered in the progressive form
+ *      This may be changed in image.c \par
+ *
+ *  -IMPORTANT:
+ *      Picture- or Sliceheaders must be assigned to partition 0. \par
+ *      Furthermore partitions must follow syntax dependencies as
+ *      outlined in document Q15-J-23.
+ */
+
+
+// A note on this table:
+//
+// While the assignment of values in enum data types is specified in C, it is not
+// very ood style to have an "elementnumber", not even as a comment.
+//
+// Hence a copy of the relevant structure from global.h here
+/*
+typedef enum {
+ 0  SE_HEADER,
+ 1  SE_PTYPE,
+ 2  SE_MBTYPE,
+ 3  SE_REFFRAME,
+ 4  SE_INTRAPREDMODE,
+ 5  SE_MVD,
+ 6  SE_CBP
+ 7  SE_LUM_DC_INTRA,
+ 8  SE_CHR_DC_INTRA,
+ 9  SE_LUM_AC_INTRA,
+10  SE_CHR_AC_INTRA,
+12  SE_LUM_DC_INTER,
+13  SE_CHR_DC_INTER,
+14  SE_LUM_AC_INTER,
+15  SE_CHR_AC_INTER,
+16  SE_DELTA_QUANT,
+18  SE_BFRAME,
+19  SE_EOS,
+20  SE_MAX_ELEMENTS */ // number of maximum syntax elements
+//} SE_type;
+
+
+extern int * assignSE2partition[2];
+extern int assignSE2partition_NoDP[SE_MAX_ELEMENTS];
+extern int assignSE2partition_DP[SE_MAX_ELEMENTS];
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/explicit_gop.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/explicit_gop.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/explicit_gop.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,475 @@
+
+/*!
+ *************************************************************************************
+ * \file explicit_gop.c
+ *
+ * \brief
+ *    Code for explicit gop support and hierarchical coding.
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *     - Alexis Michael Tourapis                     <alexismt at ieee.org>
+ *************************************************************************************
+ */
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+#include "global.h"
+#include "contributors.h"
+#include "explicit_gop.h"
+#include "image.h"
+#include "nalucommon.h"
+#include "string.h"
+
+
+/*!
+************************************************************************
+* \brief
+*    Generation of hierarchical GOP
+************************************************************************
+*/
+void create_hierarchy()
+{
+  int i, j;
+  int centerB=input->successive_Bframe/2;
+  GOP_DATA tmp;
+
+  if (input->HierarchicalCoding == 1)
+  {
+    for (i=0;i<input->successive_Bframe;i++)
+    {
+      if (i < centerB)
+      {
+        gop_structure[i].slice_type = B_SLICE;
+        gop_structure[i].display_no = i * 2 + 1;
+        gop_structure[i].hierarchy_layer = 1;
+        gop_structure[i].reference_idc = NALU_PRIORITY_HIGH;
+        gop_structure[i].slice_qp = imax(0, (input->qpB + (input->HierarchyLevelQPEnable ? -1: input->qpBRSOffset)));
+
+      }
+      else
+      {
+        gop_structure[i].slice_type = B_SLICE;
+        gop_structure[i].display_no = (i - centerB) * 2;
+        gop_structure[i].hierarchy_layer = 0;
+        gop_structure[i].reference_idc = NALU_PRIORITY_DISPOSABLE;
+        gop_structure[i].slice_qp = input->qpB;
+      }
+    }
+    img->GopLevels = 2;
+  }
+  else
+  {
+    int GOPlevels = 1;
+    int Bframes = input->successive_Bframe;
+    int *curGOPLevelfrm,*curGOPLeveldist ;
+    int curlevel = GOPlevels ;
+    int i;
+
+    while (((Bframes + 1 ) >> GOPlevels) > 1)
+    {
+      GOPlevels ++;
+    }
+
+    curlevel = GOPlevels;
+    img->GopLevels = GOPlevels;
+    if (NULL == (curGOPLevelfrm = (int*)malloc(GOPlevels * sizeof(int)))) no_mem_exit("create_hierarchy:curGOPLevelfrm");
+    if (NULL == (curGOPLeveldist= (int*)malloc(GOPlevels * sizeof(int)))) no_mem_exit("create_hierarchy:curGOPLeveldist");
+
+    for (i=0; i <input->successive_Bframe; i++)
+    {
+      gop_structure[i].display_no = i;
+      gop_structure[i].slice_type = B_SLICE;
+      gop_structure[i].hierarchy_layer = 0;
+      gop_structure[i].reference_idc = NALU_PRIORITY_DISPOSABLE;
+      gop_structure[i].slice_qp = input->qpB;
+    }
+
+    for (j = 1; j < GOPlevels; j++)
+    {
+      for (i = (1 << j) - 1; i < Bframes + 1 - (1 << j); i += (1 << j)) {
+        gop_structure[i].hierarchy_layer  = j;
+        gop_structure[i].reference_idc  = NALU_PRIORITY_HIGH;
+        gop_structure[i].slice_qp = imax(0, input->qpB + (input->HierarchyLevelQPEnable ? -j: input->qpBRSOffset));
+      }
+    }
+
+    for (i = 1; i < Bframes; i++)
+    {
+      j = i;
+
+      while (j > 0 && gop_structure[j].hierarchy_layer > gop_structure[j-1].hierarchy_layer)
+      {
+        tmp = gop_structure[j-1];
+        gop_structure[j-1] = gop_structure[j];
+        gop_structure[j] = tmp;
+        j--;
+      }
+    }
+  }
+}
+
+
+/*!
+************************************************************************
+* \brief
+*    Initialization of GOP structure.
+*
+************************************************************************
+*/
+void init_gop_structure()
+{
+  int max_gopsize = input->HierarchicalCoding != 3 ? input->successive_Bframe  : input->jumpd;
+
+  gop_structure = calloc(imax(10,max_gopsize), sizeof (GOP_DATA)); // +1 for reordering
+  if (NULL==gop_structure)
+    no_mem_exit("init_gop_structure: gop_structure");
+}
+
+
+/*!
+************************************************************************
+* \brief
+*    Clear GOP structure
+************************************************************************
+*/
+void clear_gop_structure()
+{
+  if (gop_structure)
+    free(gop_structure);
+}
+
+
+/*!
+************************************************************************
+* \brief
+*    Interpret GOP struct from input parameters
+************************************************************************
+*/
+void interpret_gop_structure()
+{
+
+  int nLength = strlen(input->ExplicitHierarchyFormat);
+  int i =0, k, dqp, display_no;
+  int slice_read =0, order_read = 0, stored_read = 0, qp_read =0;
+  int coded_frame = 0;
+
+  if (nLength > 0)
+  {
+
+    for (i = 0; i < nLength ; i++)
+    {
+      //! First lets read slice type
+      if (slice_read == 0)
+      {
+        switch (input->ExplicitHierarchyFormat[i])
+        {
+        case 'P':
+        case 'p':
+          gop_structure[coded_frame].slice_type=P_SLICE;
+          break;
+        case 'B':
+        case 'b':
+          gop_structure[coded_frame].slice_type=B_SLICE;
+          break;
+        case 'I':
+        case 'i':
+          gop_structure[coded_frame].slice_type=I_SLICE;
+          break;
+        default:
+          snprintf(errortext, ET_SIZE, "Slice Type invalid in ExplicitHierarchyFormat param. Please check configuration file.");
+          error (errortext, 400);
+          break;
+        }
+        slice_read = 1;
+      }
+      else
+      {
+        //! Next is Display Order
+        if (order_read == 0)
+        {
+          if (isdigit((int)(*(input->ExplicitHierarchyFormat+i))))
+          {
+            sscanf(input->ExplicitHierarchyFormat+i,"%d",&display_no);
+            gop_structure[coded_frame].display_no = display_no;
+            order_read = 1;
+            if (display_no<0 || display_no>=input->jumpd)
+            {
+              snprintf(errortext, ET_SIZE, "Invalid Frame Order value. Frame position needs to be in [0,%d] range.",input->jumpd-1);
+              error (errortext, 400);
+            }
+            for (k=0;k<coded_frame;k++)
+            {
+              if (gop_structure[k].display_no == display_no)
+              {
+                snprintf(errortext, ET_SIZE, "Frame Order value %d in frame %d already used for enhancement frame %d.",display_no,coded_frame,k);
+                error (errortext, 400);
+              }
+            }
+          }
+          else
+          {
+            snprintf(errortext, ET_SIZE, "Slice Type needs to be followed by Display Order. Please check configuration file.");
+            error (errortext, 400);
+          }
+        }
+        else if (order_read == 1)
+        {
+          if (stored_read == 0 && !(isdigit((int)(*(input->ExplicitHierarchyFormat+i)))))
+          {
+            switch (input->ExplicitHierarchyFormat[i])
+            {
+            case 'E':
+            case 'e':
+              gop_structure[coded_frame].reference_idc = NALU_PRIORITY_DISPOSABLE;
+              gop_structure[coded_frame].hierarchy_layer = 0;
+              break;
+            case 'R':
+            case 'r':
+              gop_structure[coded_frame].reference_idc= NALU_PRIORITY_HIGH;
+              gop_structure[coded_frame].hierarchy_layer = 1;
+              img->GopLevels = 2;
+              break;
+            default:
+              snprintf(errortext, ET_SIZE, "Reference_IDC invalid in ExplicitHierarchyFormat param. Please check configuration file.");
+              error (errortext, 400);
+              break;
+            }
+            stored_read = 1;
+          }
+          else if (stored_read == 1 && qp_read == 0)
+          {
+            if (isdigit((int)(*(input->ExplicitHierarchyFormat+i))))
+            {
+              sscanf(input->ExplicitHierarchyFormat+i,"%d",&dqp);
+
+              if (gop_structure[coded_frame].slice_type == I_SLICE)
+                gop_structure[coded_frame].slice_qp = input->qp0;
+              else if (gop_structure[coded_frame].slice_type == P_SLICE)
+                gop_structure[coded_frame].slice_qp = input->qpN;
+              else
+                gop_structure[coded_frame].slice_qp = input->qpB;
+
+              gop_structure[coded_frame].slice_qp = iClip3(-img->bitdepth_luma_qp_scale, 51,gop_structure[coded_frame].slice_qp + dqp);
+                qp_read = 1;
+            }
+            else
+            {
+              snprintf(errortext, ET_SIZE, "Reference_IDC needs to be followed by QP. Please check configuration file.");
+              error (errortext, 400);
+            }
+          }
+          else if (stored_read == 1 && qp_read == 1 && !(isdigit((int)(*(input->ExplicitHierarchyFormat+i)))) && (i < nLength - 2))
+          {
+            stored_read =0;
+            qp_read=0;
+            order_read=0;
+            slice_read=0;
+            i--;
+            coded_frame ++;
+            if (coded_frame >= input->jumpd )
+            {
+              snprintf(errortext, ET_SIZE, "Total number of frames in Enhancement GOP need to be fewer or equal to FrameSkip parameter.");
+              error (errortext, 400);
+            }
+          }
+        }
+      }
+    }
+  }
+  else
+  {
+    snprintf(errortext, ET_SIZE, "ExplicitHierarchyFormat is empty. Please check configuration file.");
+    error (errortext, 400);
+  }
+
+  input->successive_Bframe = coded_frame + 1;
+}
+
+
+/*!
+************************************************************************
+* \brief
+*    Encode Enhancement Layer.
+************************************************************************
+*/
+void encode_enhancement_layer()
+{
+  int previous_ref_idc = 1;
+
+  if ((input->successive_Bframe != 0) && (IMG_NUMBER > 0)) // B-frame(s) to encode
+  {
+    if (input->PReplaceBSlice)
+      img->type = P_SLICE;            // set image type to P-frame
+    else
+      img->type = B_SLICE;            // set image type to B-frame
+
+    if (input->NumFramesInELSubSeq == 0)
+      img->layer = 0;
+    else
+      img->layer = 1;
+
+    if (input->BRefPictures != 1 && input->HierarchicalCoding==0)
+    {
+      img->frame_num++; //increment frame_num once for B-frames
+      img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4));
+    }
+    img->nal_reference_idc = 0;
+
+    //if (input->HierarchicalCoding == 3 || input->HierarchicalCoding == 1)
+    if (input->HierarchicalCoding)
+    {
+      for(img->b_frame_to_code=1; img->b_frame_to_code<=input->successive_Bframe; img->b_frame_to_code++)
+      {
+
+        img->nal_reference_idc = 0;
+
+        img->type = gop_structure[img->b_frame_to_code - 1].slice_type;
+
+        if (previous_ref_idc == 1)
+        {
+          img->frame_num++;                 //increment frame_num for each stored B slice
+          img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4));
+        }
+
+        if (gop_structure[img->b_frame_to_code - 1].reference_idc== NALU_PRIORITY_HIGH )
+        {
+          img->nal_reference_idc = 1;
+          previous_ref_idc = 1;
+        }
+        else
+          previous_ref_idc = 0;
+
+        img->b_interval =
+          ((double) (input->jumpd + 1) / (input->successive_Bframe + 1.0) );
+
+        if (input->HierarchicalCoding == 3)
+          img->b_interval = 1.0;
+
+        if(input->intra_period && input->idr_enable)
+          img->toppoc = 2*(((IMG_NUMBER%input->intra_period)-1)*(input->jumpd+1) + (int)(img->b_interval * (double)(1 + gop_structure[img->b_frame_to_code - 1].display_no)));
+        else
+          img->toppoc = 2*((IMG_NUMBER-1)*(input->jumpd + 1) + (int)(img->b_interval * (double)(1 + gop_structure[img->b_frame_to_code -1].display_no)));
+
+        if (img->b_frame_to_code == 1)
+          img->delta_pic_order_cnt[0] = img->toppoc - 2*(start_tr_in_this_IGOP  + (IMG_NUMBER)*((input->jumpd+1)));
+        else
+          img->delta_pic_order_cnt[0] = img->toppoc - 2*(start_tr_in_this_IGOP  + (IMG_NUMBER-1)*((input->jumpd+1)) + (int) (2.0 *img->b_interval * (double) (1+ gop_structure[img->b_frame_to_code - 2].display_no)));
+
+        if ((input->PicInterlace==FRAME_CODING)&&(input->MbInterlace==FRAME_CODING))
+          img->bottompoc = img->toppoc;     //progressive
+        else
+          img->bottompoc = img->toppoc+1;
+
+        img->framepoc = imin (img->toppoc, img->bottompoc);
+
+        img->delta_pic_order_cnt[1]= 0;   // POC200301
+
+        encode_one_frame();  // encode one B-frame
+        if (input->ReportFrameStats)
+          report_frame_statistic();
+
+        if (gop_structure[img->b_frame_to_code - 1].reference_idc== NALU_PRIORITY_HIGH && img->b_frame_to_code==input->successive_Bframe)
+        {
+          img->frame_num++;                 //increment frame_num for each stored B slice
+          img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4));
+        }
+      }
+      img->b_frame_to_code = 0;
+    }
+    else
+    {
+      for(img->b_frame_to_code=1; img->b_frame_to_code<=input->successive_Bframe; img->b_frame_to_code++)
+      {
+
+        img->nal_reference_idc = 0;
+        if (input->BRefPictures == 1 )
+        {
+          img->nal_reference_idc = 1;
+          img->frame_num++;                 //increment frame_num once for B-frames
+          img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4));
+        }
+
+        img->b_interval =
+          ((double) (input->jumpd + 1) / (input->successive_Bframe + 1.0) );
+
+        if (input->HierarchicalCoding == 3)
+          img->b_interval = 1.0;
+
+        if(input->intra_period && input->idr_enable)
+          img->toppoc = 2*(((IMG_NUMBER% input->intra_period)-1)*(input->jumpd+1) + (int) (img->b_interval * (double)img->b_frame_to_code));
+        else
+          img->toppoc = 2*((IMG_NUMBER-1)*(input->jumpd+1) + (int) (img->b_interval * (double)img->b_frame_to_code));
+
+        if ((input->PicInterlace==FRAME_CODING)&&(input->MbInterlace==FRAME_CODING))
+          img->bottompoc = img->toppoc;     //progressive
+        else
+          img->bottompoc = img->toppoc+1;
+
+        img->framepoc = imin (img->toppoc, img->bottompoc);
+
+        //the following is sent in the slice header
+        if (input->BRefPictures != 1)
+        {
+          img->delta_pic_order_cnt[0]= 2*(img->b_frame_to_code-1);
+        }
+        else
+        {
+          img->delta_pic_order_cnt[0]= -2;
+        }
+
+        img->delta_pic_order_cnt[1]= 0;   // POC200301
+
+        encode_one_frame();  // encode one B-frame
+
+        if (input->BRefPictures == 1 && img->b_frame_to_code==input->successive_Bframe)
+        {
+          img->frame_num++;                 //increment frame_num for each stored B slice
+          img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4));
+        }
+
+        if (input->ReportFrameStats)
+          report_frame_statistic();
+      }
+    }
+  }
+  img->b_frame_to_code = 0;
+}
+
+
+void poc_based_ref_management(int current_pic_num)
+{
+  unsigned i, pic_num = 0;
+
+  int min_poc=INT_MAX;
+  DecRefPicMarking_t *tmp_drpm,*tmp_drpm2;
+
+  if (img->dec_ref_pic_marking_buffer!=NULL)
+    return;
+
+  if ((dpb.ref_frames_in_buffer+dpb.ltref_frames_in_buffer)==0)
+    return;
+
+  for (i=0; i<dpb.used_size;i++)
+  {
+    if (dpb.fs[i]->is_reference  && (!(dpb.fs[i]->is_long_term)) && dpb.fs[i]->poc < min_poc)
+    {
+      min_poc = dpb.fs[i]->frame->poc ;
+      pic_num =  dpb.fs[i]->frame->pic_num;
+    }
+  }
+
+  if (NULL==(tmp_drpm=(DecRefPicMarking_t*)calloc (1,sizeof (DecRefPicMarking_t)))) no_mem_exit("poc_based_ref_management: tmp_drpm");
+  tmp_drpm->Next=NULL;
+
+  tmp_drpm->memory_management_control_operation = 0;
+
+  if (NULL==(tmp_drpm2=(DecRefPicMarking_t*)calloc (1,sizeof (DecRefPicMarking_t)))) no_mem_exit("poc_based_ref_management: tmp_drpm2");
+  tmp_drpm2->Next=tmp_drpm;
+
+  tmp_drpm2->memory_management_control_operation = 1;
+  tmp_drpm2->difference_of_pic_nums_minus1 = current_pic_num - pic_num - 1;
+  img->dec_ref_pic_marking_buffer = tmp_drpm2;
+
+}


Index: llvm-test/MultiSource/Applications/JM/lencod/explicit_gop.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/explicit_gop.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/explicit_gop.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,25 @@
+
+/*!
+ *************************************************************************************
+ * \file explicit_gop.h
+ *
+ * \brief
+ *    Functions for explicit gop and hierarchy support
+ *
+ * \author
+ *     Main contributors (see contributors.h for copyright, address and affiliation details)
+ *     - Alexis Michael Tourapis          <alexismt at ieee.org>
+ *************************************************************************************
+ */
+
+#ifndef _EXPLICIT_GOP_H_
+#define _EXPLICIT_GOP_H_
+
+// GOP Hierarchy
+void init_gop_structure(void);
+void interpret_gop_structure(void);
+void create_hierarchy(void);
+void clear_gop_structure(void);
+void encode_enhancement_layer(void);
+
+#endif


Index: llvm-test/MultiSource/Applications/JM/lencod/filehandle.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/filehandle.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/filehandle.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,190 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ *    filehandle.c
+ * \brief
+ *    Start and terminate sequences
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Thomas Stockhammer            <stockhammer at ei.tum.de>
+ *      - Detlev Marpe                  <marpe at hhi.de>
+ ***************************************************************************************
+ */
+
+#include "contributors.h"
+
+#include <stdlib.h>
+
+#include "global.h"
+
+#include "rtp.h"
+#include "annexb.h"
+#include "parset.h"
+#include "mbuffer.h"
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Error handling procedure. Print error message to stderr and exit
+ *    with supplied code.
+ * \param text
+ *    Error message
+ * \param code
+ *    Exit code
+ ************************************************************************
+ */
+void error(char *text, int code)
+{
+  fprintf(stderr, "%s\n", text);
+  flush_dpb();
+  exit(code);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *     This function generates and writes the PPS
+ *
+ ************************************************************************
+ */
+int write_PPS(int len, int PPS_id)
+{
+  NALU_t *nalu;
+  nalu = NULL;
+  nalu = GeneratePic_parameter_set_NALU (PPS_id);
+  len += WriteNALU (nalu);
+  FreeNALU (nalu);
+
+  return len;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    This function opens the output files and generates the
+ *    appropriate sequence header
+ ************************************************************************
+ */
+int start_sequence()
+{
+  int i,len=0, total_pps = (input->GenerateMultiplePPS) ? 3 : 1;
+  NALU_t *nalu;
+
+
+  switch(input->of_mode)
+  {
+    case PAR_OF_ANNEXB:
+      OpenAnnexbFile (input->outfile);
+      WriteNALU = WriteAnnexbNALU;
+      break;
+    case PAR_OF_RTP:
+      OpenRTPFile (input->outfile);
+      WriteNALU = WriteRTPNALU;
+      break;
+    default:
+      snprintf(errortext, ET_SIZE, "Output File Mode %d not supported", input->of_mode);
+      error(errortext,1);
+      return 1;
+  }
+
+  //! As a sequence header, here we write the both sequence and picture
+  //! parameter sets.  As soon as IDR is implemented, this should go to the
+  //! IDR part, as both parsets have to be transmitted as part of an IDR.
+  //! An alternative may be to consider this function the IDR start function.
+
+  nalu = NULL;
+  nalu = GenerateSeq_parameter_set_NALU ();
+  len += WriteNALU (nalu);
+  FreeNALU (nalu);
+
+  //! Lets write now the Picture Parameter sets. Output will be equal to the total number of bits spend here.
+  for (i=0;i<total_pps;i++)
+  {
+     len = write_PPS(len, i);
+  }
+
+  if (input->Generate_SEIVUI)
+  {
+    nalu = NULL;
+    nalu = GenerateSEImessage_NALU();
+    len += WriteNALU (nalu);
+    FreeNALU (nalu);
+  }
+
+  stats->bit_ctr_parametersets_n = len;
+  return 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    This function opens the output files and generates the
+ *    appropriate sequence header
+ ************************************************************************
+ */
+int rewrite_paramsets()
+{
+  int i,len=0, total_pps = (input->GenerateMultiplePPS) ? 3 : 1;
+  NALU_t *nalu;
+
+
+  //! As a sequence header, here we write the both sequence and picture
+  //! parameter sets.  As soon as IDR is implemented, this should go to the
+  //! IDR part, as both parsets have to be transmitted as part of an IDR.
+  //! An alternative may be to consider this function the IDR start function.
+
+  nalu = NULL;
+  nalu = GenerateSeq_parameter_set_NALU ();
+  len += WriteNALU (nalu);
+  FreeNALU (nalu);
+
+  //! Lets write now the Picture Parameter sets. Output will be equal to the total number of bits spend here.
+  for (i=0;i<total_pps;i++)
+  {
+     len = write_PPS(len, i);
+  }
+
+  if (input->Generate_SEIVUI)
+  {
+    nalu = NULL;
+    nalu = GenerateSEImessage_NALU();
+    len += WriteNALU (nalu);
+    FreeNALU (nalu);
+  }
+
+  stats->bit_ctr_parametersets_n = len;
+  return 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *     This function terminates the sequence and closes the
+ *     output files
+ ************************************************************************
+ */
+int terminate_sequence()
+{
+//  Bitstream *currStream;
+
+  // Mainly flushing of everything
+  // Add termination symbol, etc.
+
+  switch(input->of_mode)
+  {
+    case PAR_OF_ANNEXB:
+      CloseAnnexbFile();
+      break;
+    case PAR_OF_RTP:
+      CloseRTPFile();
+      return 0;
+    default:
+      snprintf(errortext, ET_SIZE, "Output File Mode %d not supported", input->of_mode);
+      error(errortext,1);
+      return 1;
+  }
+  return 1;   // make lint happy
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/fmo.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/fmo.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/fmo.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,730 @@
+/*!
+ *****************************************************************************
+ *
+ * \file fmo.c
+ *
+ * \brief
+ *    Support for Flexible Macroblock Ordering for different Slice Group Modes: MBAmap handling
+ *
+ * \date
+ *    16 June, 2002  Modified April 25, 2004
+ *
+ * \author
+ *    Stephan Wenger   stewe at cs.tu-berlin.de
+ *    Dong Wang        Dong.Wang at bristol.ac.uk
+ *
+ *****************************************************************************/
+
+/*!
+ ****************************************************************************
+ *   Notes by Dong Wang (April 25 2004)
+ *
+ *  Source codes are modified to support 7 slice group types (fmo modes).
+ *  The functions for generating map are very similar to that in decoder, but have
+ *  a little difference.
+ *
+ *  The MB map is calculated at the beginning of coding of each picture (frame or field).
+ *
+ *  'slice_group_change_cycle' in structure 'ImageParameters' is the syntax in the slice
+ *  header. It's set to be 1 before the initialization of FMO in function code_a_picture().
+ *  It can be changed every time if needed.
+ *
+ ****************************************************************************
+ */
+
+/*!
+ *****************************************************************************
+ *  How does a MBAmap look like?
+ *
+ *  An MBAmap is a one-diemnsional array of ints.  Each int
+ *  represents an MB in scan order.  A zero or positive value represents
+ *  a slice group ID.  Negative values are reserved for future extensions.
+ *  The numbering range for the SliceGroupIDs is 0..7 as per JVT-C167.
+ *
+ *  This module contains a static variable MBAmap.  This is the MBAmap of the
+ *  picture currently coded.  It can be accessed only through the access
+ *  functions.
+ *****************************************************************************
+*/
+
+//#define PRINT_FMO_MAPS  1
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <memory.h>
+
+#include "global.h"
+
+#include "fmo.h"
+
+
+static int FirstMBInSlice[MAXSLICEGROUPIDS];
+
+byte *MBAmap = NULL;
+byte *MapUnitToSliceGroupMap = NULL;
+unsigned int PicSizeInMapUnits;
+
+
+static void FmoGenerateType0MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+static void FmoGenerateType1MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+static void FmoGenerateType2MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+static void FmoGenerateType3MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+static void FmoGenerateType4MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+static void FmoGenerateType5MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+static void FmoGenerateType6MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+
+
+static int FmoGenerateMapUnitToSliceGroupMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+static int FmoGenerateMBAmap (ImageParameters * img, seq_parameter_set_rbsp_t* sps);
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Generates MapUnitToSliceGroupMap
+ *
+ * \param img
+ *    Image Parameter to be used for map generation
+ * \param pps
+ *    Picture Parameter set to be used for map generation
+ *
+ ************************************************************************
+ */
+static int FmoGenerateMapUnitToSliceGroupMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps)
+{
+  PicSizeInMapUnits = img->PicHeightInMapUnits * img->PicWidthInMbs;
+
+
+  if (pps->slice_group_map_type == 6)
+  {
+    if ((pps->pic_size_in_map_units_minus1+1) != PicSizeInMapUnits)
+    {
+      error ("wrong pps->pic_size_in_map_units_minus1 for used SPS and FMO type 6", 500);
+    }
+  }
+
+  // allocate memory for MapUnitToSliceGroupMap
+  if (MapUnitToSliceGroupMap)
+    free (MapUnitToSliceGroupMap);
+
+  if ((MapUnitToSliceGroupMap = malloc ((PicSizeInMapUnits) * sizeof (byte))) == NULL)
+  {
+    printf ("cannot allocated %d bytes for MapUnitToSliceGroupMap, exit\n", (int) ( PicSizeInMapUnits * sizeof (byte)));
+    exit (-1);
+  }
+
+  if (pps->num_slice_groups_minus1 == 0)    // only one slice group
+  {
+    memset (MapUnitToSliceGroupMap, 0,  PicSizeInMapUnits * sizeof (byte));
+    return 0;
+  }
+
+  switch (pps->slice_group_map_type)
+  {
+  case 0:
+    FmoGenerateType0MapUnitMap (img, pps);
+    break;
+  case 1:
+    FmoGenerateType1MapUnitMap (img, pps);
+    break;
+  case 2:
+    FmoGenerateType2MapUnitMap (img, pps);
+    break;
+  case 3:
+    FmoGenerateType3MapUnitMap (img, pps);
+    break;
+  case 4:
+    FmoGenerateType4MapUnitMap (img, pps);
+    break;
+  case 5:
+    FmoGenerateType5MapUnitMap (img, pps);
+    break;
+  case 6:
+    FmoGenerateType6MapUnitMap (img, pps);
+    break;
+  default:
+    printf ("Illegal slice_group_map_type %d , exit \n", pps->slice_group_map_type);
+    exit (-1);
+  }
+  return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Generates MBAmap from MapUnitToSliceGroupMap
+ *
+ * \param img
+ *    Image Parameter to be used for map generation
+ * \param sps
+ *    Sequence Parameter set to be used for map generation
+ *
+ ************************************************************************
+ */
+static int FmoGenerateMBAmap (ImageParameters * img, seq_parameter_set_rbsp_t* sps)
+{
+  unsigned i;
+
+  // allocate memory for MBAmap
+  if (MBAmap)
+    free (MBAmap);
+
+
+  if ((MBAmap = malloc ((img->PicSizeInMbs) * sizeof (byte))) == NULL)
+  {
+    printf ("cannot allocated %d bytes for MBAmap, exit\n", (int) ((img->PicSizeInMbs) * sizeof (byte)));
+    exit (-1);
+  }
+
+  if ((sps->frame_mbs_only_flag) || img->field_picture)
+  {
+    for (i=0; i<img->PicSizeInMbs; i++)
+    {
+      MBAmap[i] = MapUnitToSliceGroupMap[i];
+    }
+  }
+  else
+    if (sps->mb_adaptive_frame_field_flag  &&  (! img->field_picture))
+    {
+      for (i=0; i<img->PicSizeInMbs; i++)
+      {
+        MBAmap[i] = MapUnitToSliceGroupMap[i/2];
+      }
+    }
+    else
+    {
+      for (i=0; i<img->PicSizeInMbs; i++)
+      {
+        MBAmap[i] = MapUnitToSliceGroupMap[(i/(2*img->PicWidthInMbs))*img->PicWidthInMbs+(i%img->PicWidthInMbs)];
+      }
+    }
+    return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    FMO initialization: Generates MapUnitToSliceGroupMap and MBAmap.
+ *
+ * \param img
+ *    Image Parameter to be used for map generation
+ * \param pps
+ *    Picture Parameter set to be used for map generation
+ * \param sps
+ *    Sequence Parameter set to be used for map generation
+ ************************************************************************
+ */
+int FmoInit(ImageParameters * img, pic_parameter_set_rbsp_t * pps, seq_parameter_set_rbsp_t * sps)
+{
+
+#ifdef PRINT_FMO_MAPS
+  unsigned i,j;
+  int bottom;
+#endif
+
+  int k;
+  for (k=0;k<MAXSLICEGROUPIDS;k++)
+    FirstMBInSlice[k] = -1;
+
+
+
+  FmoGenerateMapUnitToSliceGroupMap(img, pps);
+  FmoGenerateMBAmap(img, sps);
+
+#ifdef PRINT_FMO_MAPS
+  printf("\n");
+  printf("FMO Map (Units):\n");
+
+  for (j=0; j<img->PicHeightInMapUnits; j++)
+  {
+    for (i=0; i<img->PicWidthInMbs; i++)
+    {
+      printf("%d ",MapUnitToSliceGroupMap[i+j*img->PicWidthInMbs]);
+    }
+    printf("\n");
+  }
+  printf("\n");
+
+  if(sps->mb_adaptive_frame_field_flag==0)
+  {
+    printf("FMO Map (Mb):\n");
+    for (j=0; j<(img->PicSizeInMbs/img->PicWidthInMbs); j++)
+    {
+      for (i=0; i<img->PicWidthInMbs; i++)
+      {
+        printf("%d ",MBAmap[i+j*img->PicWidthInMbs]);
+      }
+      printf("\n");
+    }
+    printf("\n");
+  }
+  else
+  {
+    printf("FMO Map (Mb in scan order for MBAFF):\n");
+    for (j=0; j<(img->PicSizeInMbs/img->PicWidthInMbs); j++)
+    {
+      for (i=0; i<img->PicWidthInMbs; i++)
+      {
+        bottom=(j%2);
+        printf("%d ",MBAmap[(j-bottom)*img->PicWidthInMbs+i*2+bottom]);
+      }
+      printf("\n");
+
+    }
+    printf("\n");
+
+  }
+
+#endif
+
+  return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Free memory if allocated by FMO functions
+ ************************************************************************
+ */
+void FmoUninit()
+{
+  if (MBAmap)
+  {
+    free (MBAmap);
+    MBAmap = NULL;
+  }
+  if (MapUnitToSliceGroupMap)
+  {
+    free (MapUnitToSliceGroupMap);
+    MapUnitToSliceGroupMap = NULL;
+  }
+
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Generate interleaved slice group map type MapUnit map (type 0)
+ *
+ * \param img
+ *    Image Parameter to be used for map generation
+ * \param pps
+ *    Picture Parameter set to be used for map generation
+ ************************************************************************
+ */
+static void FmoGenerateType0MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+{
+  unsigned iGroup, j;
+  unsigned i = 0;
+  do
+  {
+    for( iGroup = 0;
+    (iGroup <= pps->num_slice_groups_minus1) && (i < PicSizeInMapUnits);
+    i += pps->run_length_minus1[iGroup++] + 1)
+    {
+      for( j = 0; j <= pps->run_length_minus1[ iGroup ] && i + j < PicSizeInMapUnits; j++ )
+        MapUnitToSliceGroupMap[i+j] = iGroup;
+    }
+  }
+  while( i < PicSizeInMapUnits );
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Generate dispersed slice group map type MapUnit map (type 1)
+ *
+ * \param img
+ *    Image Parameter to be used for map generation
+ * \param pps
+ *    Picture Parameter set to be used for map generation
+ ************************************************************************
+ */
+static void FmoGenerateType1MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+{
+  unsigned i;
+  for( i = 0; i < PicSizeInMapUnits; i++ )
+  {
+    MapUnitToSliceGroupMap[i] = ((i%img->PicWidthInMbs)+(((i/img->PicWidthInMbs)*(pps->num_slice_groups_minus1+1))/2))
+      %(pps->num_slice_groups_minus1+1);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Generate foreground with left-over slice group map type MapUnit map (type 2)
+ *
+ * \param img
+ *    Image Parameter to be used for map generation
+ * \param pps
+ *    Picture Parameter set to be used for map generation
+ ************************************************************************
+ */
+static void FmoGenerateType2MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+{
+  int iGroup;
+  unsigned i, x, y;
+  unsigned yTopLeft, xTopLeft, yBottomRight, xBottomRight;
+
+  for( i = 0; i < PicSizeInMapUnits; i++ )
+    MapUnitToSliceGroupMap[ i ] = pps->num_slice_groups_minus1;
+
+  for( iGroup = pps->num_slice_groups_minus1 - 1 ; iGroup >= 0; iGroup-- )
+  {
+    yTopLeft = pps->top_left[ iGroup ] / img->PicWidthInMbs;
+    xTopLeft = pps->top_left[ iGroup ] % img->PicWidthInMbs;
+    yBottomRight = pps->bottom_right[ iGroup ] / img->PicWidthInMbs;
+    xBottomRight = pps->bottom_right[ iGroup ] % img->PicWidthInMbs;
+    for( y = yTopLeft; y <= yBottomRight; y++ )
+      for( x = xTopLeft; x <= xBottomRight; x++ )
+        MapUnitToSliceGroupMap[ y * img->PicWidthInMbs + x ] = iGroup;
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Generate box-out slice group map type MapUnit map (type 3)
+ *
+ * \param img
+ *    Image Parameter to be used for map generation
+ * \param pps
+ *    Picture Parameter set to be used for map generation
+ ************************************************************************
+ */
+static void FmoGenerateType3MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+{
+  unsigned i, k;
+  int leftBound, topBound, rightBound, bottomBound;
+  int x, y, xDir, yDir;
+  int mapUnitVacant;
+
+  unsigned mapUnitsInSliceGroup0 = imin((pps->slice_group_change_rate_minus1 + 1) * img->slice_group_change_cycle, PicSizeInMapUnits);
+
+  for( i = 0; i < PicSizeInMapUnits; i++ )
+    MapUnitToSliceGroupMap[ i ] = 2;
+
+  x = ( img->PicWidthInMbs - pps->slice_group_change_direction_flag ) / 2;
+  y = ( img->PicHeightInMapUnits - pps->slice_group_change_direction_flag ) / 2;
+
+  leftBound   = x;
+  topBound    = y;
+  rightBound  = x;
+  bottomBound = y;
+
+  xDir =  pps->slice_group_change_direction_flag - 1;
+  yDir =  pps->slice_group_change_direction_flag;
+
+  for( k = 0; k < PicSizeInMapUnits; k += mapUnitVacant )
+  {
+    mapUnitVacant = ( MapUnitToSliceGroupMap[ y * img->PicWidthInMbs + x ]  ==  2 );
+    if( mapUnitVacant )
+      MapUnitToSliceGroupMap[ y * img->PicWidthInMbs + x ] = ( k >= mapUnitsInSliceGroup0 );
+
+    if( xDir  ==  -1  &&  x  ==  leftBound )
+    {
+      leftBound = imax( leftBound - 1, 0 );
+      x = leftBound;
+      xDir = 0;
+      yDir = 2 * pps->slice_group_change_direction_flag - 1;
+    }
+    else
+      if( xDir  ==  1  &&  x  ==  rightBound )
+      {
+        rightBound = imin( rightBound + 1, (int)img->PicWidthInMbs - 1 );
+        x = rightBound;
+        xDir = 0;
+        yDir = 1 - 2 * pps->slice_group_change_direction_flag;
+      }
+      else
+        if( yDir  ==  -1  &&  y  ==  topBound )
+        {
+          topBound = imax( topBound - 1, 0 );
+          y = topBound;
+          xDir = 1 - 2 * pps->slice_group_change_direction_flag;
+          yDir = 0;
+        }
+        else
+          if( yDir  ==  1  &&  y  ==  bottomBound )
+          {
+            bottomBound = imin( bottomBound + 1, (int)img->PicHeightInMapUnits - 1 );
+            y = bottomBound;
+            xDir = 2 * pps->slice_group_change_direction_flag - 1;
+            yDir = 0;
+          }
+          else
+          {
+            x = x + xDir;
+            y = y + yDir;
+          }
+  }
+
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Generate raster scan slice group map type MapUnit map (type 4)
+ *
+ * \param img
+ *    Image Parameter to be used for map generation
+ * \param pps
+ *    Picture Parameter set to be used for map generation
+ ************************************************************************
+ */
+static void FmoGenerateType4MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+{
+
+  unsigned mapUnitsInSliceGroup0 = imin((pps->slice_group_change_rate_minus1 + 1) * img->slice_group_change_cycle, PicSizeInMapUnits);
+  unsigned sizeOfUpperLeftGroup = pps->slice_group_change_direction_flag ? ( PicSizeInMapUnits - mapUnitsInSliceGroup0 ) : mapUnitsInSliceGroup0;
+
+  unsigned i;
+
+  for( i = 0; i < PicSizeInMapUnits; i++ )
+    if( i < sizeOfUpperLeftGroup )
+      MapUnitToSliceGroupMap[ i ] = pps->slice_group_change_direction_flag;
+    else
+      MapUnitToSliceGroupMap[ i ] = 1 - pps->slice_group_change_direction_flag;
+
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Generate wipe slice group map type MapUnit map (type 5)
+ *
+ * \param img
+ *    Image Parameter to be used for map generation
+ * \param pps
+ *    Picture Parameter set to be used for map generation
+ ************************************************************************
+*/
+static void FmoGenerateType5MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+{
+
+  unsigned mapUnitsInSliceGroup0 = imin((pps->slice_group_change_rate_minus1 + 1) * img->slice_group_change_cycle, PicSizeInMapUnits);
+  unsigned sizeOfUpperLeftGroup = pps->slice_group_change_direction_flag ? ( PicSizeInMapUnits - mapUnitsInSliceGroup0 ) : mapUnitsInSliceGroup0;
+
+  unsigned i,j, k = 0;
+
+  for( j = 0; j < img->PicWidthInMbs; j++ )
+    for( i = 0; i < img->PicHeightInMapUnits; i++ )
+      if( k++ < sizeOfUpperLeftGroup )
+        MapUnitToSliceGroupMap[ i * img->PicWidthInMbs + j ] = pps->slice_group_change_direction_flag;
+      else
+        MapUnitToSliceGroupMap[ i * img->PicWidthInMbs + j ] = 1 - pps->slice_group_change_direction_flag;
+
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Generate explicit slice group map type MapUnit map (type 6)
+ *
+ * \param img
+ *    Image Parameter to be used for map generation
+ * \param pps
+ *    Picture Parameter set to be used for map generation
+ ************************************************************************
+ */
+static void FmoGenerateType6MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+{
+  unsigned i;
+  for (i=0; i<PicSizeInMapUnits; i++)
+  {
+    MapUnitToSliceGroupMap[i] = pps->slice_group_id[i];
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    FmoStartPicture: initializes FMO at the begin of each new picture
+ *
+ * \par Input:
+ *    None
+ ************************************************************************
+ */
+int FmoStartPicture ()
+{
+  int i;
+
+  assert (MBAmap != NULL);
+
+  for (i=0; i<MAXSLICEGROUPIDS; i++)
+    FirstMBInSlice[i] = FmoGetFirstMBOfSliceGroup (i);
+  return 0;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    FmoEndPicture: Ends the Scattered Slices Module (called once
+ *    per picture).
+ *
+ * \par Input:
+ *    None
+ ************************************************************************
+ */
+int FmoEndPicture ()
+{
+  // Do nothing
+  return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    FmoMB2Slice: Returns SliceID for a given MB
+ *
+ * \par Input:
+ *    Macroblock Nr (in scan order)
+ ************************************************************************
+ */
+int FmoMB2SliceGroup ( int mb)
+{
+  assert (mb < (int)img->PicSizeInMbs);
+  assert (MBAmap != NULL);
+  return MBAmap[mb];
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    FmoGetNextMBBr: Returns the MB-Nr (in scan order) of the next
+ *    MB in the (FMO) Slice, -1 if the SliceGroup is finished
+ *
+ * \par Input:
+ *    CurrentMbNr
+ ************************************************************************
+ */
+int FmoGetNextMBNr (int CurrentMbNr)
+{
+
+  int  SliceGroupID = FmoMB2SliceGroup (CurrentMbNr);
+
+  while (++CurrentMbNr<(int)img->PicSizeInMbs &&  MBAmap[CurrentMbNr] != SliceGroupID)
+    ;
+
+  if (CurrentMbNr >= (int)img->PicSizeInMbs)
+    return -1;    // No further MB in this slice (could be end of picture)
+  else
+    return CurrentMbNr;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    FmoGetNextMBBr: Returns the MB-Nr (in scan order) of the next
+ *    MB in the (FMO) Slice, -1 if the SliceGroup is finished
+ *
+ * \par Input:
+ *    CurrentMbNr
+ ************************************************************************
+ */
+int FmoGetPreviousMBNr (int CurrentMbNr)
+{
+
+  int  SliceGroupID = FmoMB2SliceGroup (CurrentMbNr);
+  CurrentMbNr--;
+  while (CurrentMbNr>=0 &&  MBAmap[CurrentMbNr] != SliceGroupID)
+    CurrentMbNr--;
+
+  if (CurrentMbNr < 0)
+    return -1;    // No previous MB in this slice
+  else
+    return CurrentMbNr;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    FmoGetFirstMBOfSliceGroup: Returns the MB-Nr (in scan order) of the
+ *    next first MB of the Slice group, -1 if no such MB exists
+ *
+ * \par Input:
+ *    SliceGroupID: Id of SliceGroup
+ ************************************************************************
+ */
+int FmoGetFirstMBOfSliceGroup (int SliceGroupID)
+{
+  int i = 0;
+  while ((i<(int)img->PicSizeInMbs) && (FmoMB2SliceGroup (i) != SliceGroupID))
+    i++;
+
+  if (i < (int)img->PicSizeInMbs)
+    return i;
+  else
+    return -1;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    FmoGetLastCodedMBOfSlice: Returns the MB-Nr (in scan order) of
+ *    the last MB of the slice group
+ *
+ * \par Input:
+ *    SliceGroupID
+ * \par Return
+ *    MB Nr in case of success (is always >= 0)
+ *    -1 if the SliceGroup doesn't exist
+ ************************************************************************
+ */
+int FmoGetLastCodedMBOfSliceGroup (int SliceGroupID)
+{
+  int i;
+  int LastMB = -1;
+
+  for (i=0; i<(int)img->PicSizeInMbs; i++)
+    if (FmoMB2SliceGroup (i) == SliceGroupID)
+      LastMB = i;
+  return LastMB;
+}
+
+
+void FmoSetLastMacroblockInSlice ( int mb)
+{
+  // called by terminate_slice(), writes the last processed MB into the
+  // FirstMBInSlice[MAXSLICEGROUPIDS] array.  FmoGetFirstMacroblockInSlice()
+  // uses this info to identify the first uncoded MB in each slice group
+
+  int currSliceGroup = FmoMB2SliceGroup (mb);
+  assert (mb >= 0);
+  mb = FmoGetNextMBNr (mb);   // The next (still uncoded) MB, or -1 if SG is finished
+  FirstMBInSlice[currSliceGroup] = mb;
+}
+
+int FmoGetFirstMacroblockInSlice ( int SliceGroup)
+{
+  return FirstMBInSlice[SliceGroup];
+  // returns the first uncoded MB in each slice group, -1 if there is no
+  // more to do in this slice group
+}
+
+
+int FmoSliceGroupCompletelyCoded( int SliceGroupID)
+{
+  if (FmoGetFirstMacroblockInSlice (SliceGroupID) < 0)  // slice group completelty coded or not present
+    return TRUE;
+  else
+    return FALSE;
+}
+
+
+


Index: llvm-test/MultiSource/Applications/JM/lencod/fmo.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/fmo.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/fmo.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,39 @@
+
+/*!
+ ***************************************************************************
+ *
+ * \file fmo.h
+ *
+ * \brief
+ *    Support for Flexible Macroblock Ordering
+ *
+ * \date
+ *    16 June 2002
+ *
+ * \author
+ *    Stephan Wenger   stewe at cs.tu-berlin.de
+ **************************************************************************/
+
+#ifndef _FMO_H_
+#define _FMO_H_
+
+#define MAXSLICEGROUPIDS 8
+
+int FmoInit(ImageParameters * img, pic_parameter_set_rbsp_t * pps, seq_parameter_set_rbsp_t * sps);
+void FmoUninit (void);
+int FmoFinit (seq_parameter_set_rbsp_t * sps);
+int FmoMB2SliceGroup (int mb);
+int FmoGetFirstMBOfSliceGroup (int SliceGroupID);
+int FmoGetFirstMacroblockInSlice (int SliceGroup);
+int FmoGetNextMBNr (int CurrentMbNr);
+int FmoGetLastCodedMBOfSliceGroup (int SliceGroupID);
+int FmoStartPicture (void);
+int FmoEndPicture(void);
+int FmoSliceGroupCompletelyCoded(int SliceGroupID);
+void FmoSetLastMacroblockInSlice (int mb);
+
+int FmoGetPreviousMBNr (int CurrentMbNr);
+
+extern byte *MBAmap;
+
+#endif


Index: llvm-test/MultiSource/Applications/JM/lencod/global.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/global.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/global.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,1483 @@
+
+/*!
+ ************************************************************************
+ *  \file
+ *     global.h
+ *
+ *  \brief
+ *     global definitions for H.264 encoder.
+ *
+ *  \author
+ *     Copyright (C) 1999  Telenor Satellite Services,Norway
+ *                         Ericsson Radio Systems, Sweden
+ *
+ *     Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+ *
+ *     Telenor Satellite Services
+ *     Keysers gt.13                       tel.:   +47 23 13 86 98
+ *     N-0130 Oslo,Norway                  fax.:   +47 22 77 79 80
+ *
+ *     Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+ *
+ *     Ericsson Radio Systems
+ *     KI/ERA/T/VV
+ *     164 80 Stockholm, Sweden
+ *
+ ************************************************************************
+ */
+#ifndef _GLOBAL_H_
+#define _GLOBAL_H_
+
+#include <stdio.h>
+
+#include "win32.h"
+#include "defines.h"
+#include "parsetcommon.h"
+#include "q_matrix.h"
+#include "q_offsets.h"
+#include "minmax.h"
+#include "ifunctions.h"
+
+/***********************************************************************
+ * T y p e    d e f i n i t i o n s    f o r    T M L
+ ***********************************************************************
+ */
+
+
+//typedef byte imgpel;
+//typedef unsigned short distpel;
+typedef unsigned short imgpel;
+typedef int distpel;
+
+enum {
+  YUV400 = 0,
+  YUV420 = 1,
+  YUV422 = 2,
+  YUV444 = 3
+} color_formats;
+
+enum {
+  LIST_0 = 0,
+  LIST_1 = 1,
+  BI_PRED = 2,
+  BI_PRED_L0 = 3,
+  BI_PRED_L1 = 4
+};
+
+enum {
+  ERROR_SAD = 0,
+  ERROR_SSE = 1,
+  ERROR_SATD = 2,
+  ERROR_PSATD = 3
+};
+
+enum {
+  ME_Y_ONLY = 0,
+  ME_YUV_FP = 1,
+  ME_YUV_FP_SP = 2
+};
+
+enum {
+  DISTORTION_MSE = 0,
+  DISTORTION_WEIGHTED_MSE = 1,
+  DISTORTION_OVERLAP_TRANSFORM = 2,
+  DISTORTION_EDGE_MSE = 3
+};
+
+//! Data Partitioning Modes
+typedef enum
+{
+  PAR_DP_1,   //!< no data partitioning is supported
+  PAR_DP_3    //!< data partitioning with 3 partitions
+} PAR_DP_TYPE;
+
+
+//! Output File Types
+typedef enum
+{
+  PAR_OF_ANNEXB,    //!< Annex B byte stream format
+  PAR_OF_RTP       //!< RTP packets in outfile
+} PAR_OF_TYPE;
+
+//! Field Coding Types
+typedef enum
+{
+  FRAME_CODING,
+  FIELD_CODING,
+  ADAPTIVE_CODING,
+  FRAME_MB_PAIR_CODING
+} CodingType;
+
+//! definition of H.264 syntax elements
+typedef enum
+{
+  SE_HEADER,
+  SE_PTYPE,
+  SE_MBTYPE,
+  SE_REFFRAME,
+  SE_INTRAPREDMODE,
+  SE_MVD,
+  SE_CBP,
+  SE_LUM_DC_INTRA,
+  SE_CHR_DC_INTRA,
+  SE_LUM_AC_INTRA,
+  SE_CHR_AC_INTRA,
+  SE_LUM_DC_INTER,
+  SE_CHR_DC_INTER,
+  SE_LUM_AC_INTER,
+  SE_CHR_AC_INTER,
+  SE_DELTA_QUANT,
+  SE_BFRAME,
+  SE_EOS,
+  SE_MAX_ELEMENTS  //!< number of maximum syntax elements
+} SE_type;         // substituting the definitions in elements.h
+
+
+typedef enum
+{
+  INTER_MB,
+  INTRA_MB_4x4,
+  INTRA_MB_16x16
+} IntraInterDecision;
+
+
+typedef enum
+{
+  BITS_HEADER,
+  BITS_TOTAL_MB,
+  BITS_MB_MODE,
+  BITS_INTER_MB,
+  BITS_CBP_MB,
+  BITS_COEFF_Y_MB,
+  BITS_COEFF_UV_MB,
+  BITS_DELTA_QUANT_MB,
+  MAX_BITCOUNTER_MB
+} BitCountType;
+
+
+typedef enum
+{
+  NO_SLICES,
+  FIXED_MB,
+  FIXED_RATE,
+  CALLBACK,
+  FMO
+} SliceMode;
+
+
+typedef enum
+{
+  UVLC,
+  CABAC
+} SymbolMode;
+
+typedef enum
+{
+  FULL_SEARCH      = -1,
+  FAST_FULL_SEARCH =  0,
+  UM_HEX           =  1,
+  UM_HEX_SIMPLE    =  2,
+  EPZS             =  3
+} SearchType;
+
+
+typedef enum
+{
+  FRAME,
+  TOP_FIELD,
+  BOTTOM_FIELD
+} PictureStructure;           //!< New enum for field processing
+
+typedef enum
+{
+  P_SLICE = 0,
+  B_SLICE = 1,
+  I_SLICE = 2,
+  SP_SLICE = 3,
+  SI_SLICE = 4
+} SliceType;
+
+//Motion Estimation levels
+typedef enum
+{
+  F_PEL,   //!< Full Pel refinement
+  H_PEL,   //!< Half Pel refinement
+  Q_PEL    //!< Quarter Pel refinement
+} MELevel;
+
+typedef enum
+{
+  FAST_ACCESS = 0,    //!< Fast/safe reference access
+  UMV_ACCESS = 1      //!< unconstrained reference access
+} REF_ACCESS_TYPE;
+
+typedef enum
+{
+  IS_LUMA = 0,
+  IS_CHROMA = 1
+} Component_Type;
+
+typedef enum
+{
+  RC_MODE_0 = 0,
+  RC_MODE_1 = 1,
+  RC_MODE_2 = 2,
+  RC_MODE_3 = 3
+} RCModeType;
+
+/***********************************************************************
+ * D a t a    t y p e s   f o r  C A B A C
+ ***********************************************************************
+ */
+
+//! struct to characterize the state of the arithmetic coding engine
+typedef struct
+{
+  unsigned int  Elow, Erange;
+  unsigned int  Ebuffer;
+  unsigned int  Ebits_to_go;
+  unsigned int  Ebits_to_follow;
+  byte          *Ecodestrm;
+  int           *Ecodestrm_len;
+  int           C;
+  int           E;
+
+} EncodingEnvironment;
+
+typedef EncodingEnvironment *EncodingEnvironmentPtr;
+
+//! struct for context management
+typedef struct
+{
+  unsigned short state;         // index into state-table CP
+  unsigned char  MPS;           // Least Probable Symbol 0/1 CP
+  unsigned long  count;
+} BiContextType;
+
+typedef BiContextType *BiContextTypePtr;
+
+
+/**********************************************************************
+ * C O N T E X T S   F O R   T M L   S Y N T A X   E L E M E N T S
+ **********************************************************************
+ */
+
+
+#define NUM_MB_TYPE_CTX  11
+#define NUM_B8_TYPE_CTX  9
+#define NUM_MV_RES_CTX   10
+#define NUM_REF_NO_CTX   6
+#define NUM_DELTA_QP_CTX 4
+#define NUM_MB_AFF_CTX 4
+
+#define NUM_TRANSFORM_SIZE_CTX 3
+
+typedef struct
+{
+  BiContextType mb_type_contexts [3][NUM_MB_TYPE_CTX];
+  BiContextType b8_type_contexts [2][NUM_B8_TYPE_CTX];
+  BiContextType mv_res_contexts  [2][NUM_MV_RES_CTX];
+  BiContextType ref_no_contexts  [2][NUM_REF_NO_CTX];
+  BiContextType delta_qp_contexts   [NUM_DELTA_QP_CTX];
+  BiContextType mb_aff_contexts     [NUM_MB_AFF_CTX];
+  BiContextType transform_size_contexts   [NUM_TRANSFORM_SIZE_CTX];
+} MotionInfoContexts;
+
+
+#define NUM_IPR_CTX    2
+#define NUM_CIPR_CTX   4
+#define NUM_CBP_CTX    4
+#define NUM_BCBP_CTX   4
+#define NUM_MAP_CTX   15
+#define NUM_LAST_CTX  15
+#define NUM_ONE_CTX    5
+#define NUM_ABS_CTX    5
+
+
+typedef struct
+{
+  BiContextType  ipr_contexts [NUM_IPR_CTX];
+  BiContextType  cipr_contexts[NUM_CIPR_CTX];
+  BiContextType  cbp_contexts [3][NUM_CBP_CTX];
+  BiContextType  bcbp_contexts[NUM_BLOCK_TYPES][NUM_BCBP_CTX];
+  BiContextType  map_contexts [NUM_BLOCK_TYPES][NUM_MAP_CTX];
+  BiContextType  last_contexts[NUM_BLOCK_TYPES][NUM_LAST_CTX];
+  BiContextType  one_contexts [NUM_BLOCK_TYPES][NUM_ONE_CTX];
+  BiContextType  abs_contexts [NUM_BLOCK_TYPES][NUM_ABS_CTX];
+  BiContextType  fld_map_contexts [NUM_BLOCK_TYPES][NUM_MAP_CTX];
+  BiContextType  fld_last_contexts[NUM_BLOCK_TYPES][NUM_LAST_CTX];
+} TextureInfoContexts;
+
+//*********************** end of data type definition for CABAC *******************
+
+//! Pixel position for checking neighbors
+typedef struct pix_pos
+{
+  int available;
+  int mb_addr;
+  int x;
+  int y;
+  int pos_x;
+  int pos_y;
+} PixelPos;
+
+//! Buffer structure for decoded reference picture marking commands
+typedef struct DecRefPicMarking_s
+{
+  int memory_management_control_operation;
+  int difference_of_pic_nums_minus1;
+  int long_term_pic_num;
+  int long_term_frame_idx;
+  int max_long_term_frame_idx_plus1;
+  struct DecRefPicMarking_s *Next;
+} DecRefPicMarking_t;
+
+//! Syntax Element
+typedef struct syntaxelement
+{
+  int                 type;           //!< type of syntax element for data part.
+  int                 value1;         //!< numerical value of syntax element
+  int                 value2;         //!< for blocked symbols, e.g. run/level
+  int                 len;            //!< length of code
+  int                 inf;            //!< info part of UVLC code
+  unsigned int        bitpattern;     //!< UVLC bitpattern
+  int                 context;        //!< CABAC context
+
+#if TRACE
+  #define             TRACESTRING_SIZE 100            //!< size of trace string
+  char                tracestring[TRACESTRING_SIZE];  //!< trace string
+#endif
+
+  //!< for mapping of syntaxElement to UVLC
+  void    (*mapping)(int value1, int value2, int* len_ptr, int* info_ptr);
+
+} SyntaxElement;
+
+//! Macroblock
+typedef struct macroblock
+{
+  int                 slice_nr;
+  int                 delta_qp;
+  int                 qp;                         //!< QP luma
+  int                 qpc[2];                     //!< QP chroma
+  int                 qpsp ;
+  int                 bitcounter[MAX_BITCOUNTER_MB];
+
+  struct macroblock   *mb_available_up;   //!< pointer to neighboring MB (CABAC)
+  struct macroblock   *mb_available_left; //!< pointer to neighboring MB (CABAC)
+
+  int                 mb_type;
+  int                 mvd[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE][2];          //!< indices correspond to [list][block_y][block_x][x,y]
+  char                intra_pred_modes[MB_BLOCK_PARTITIONS];
+  char                intra_pred_modes8x8[MB_BLOCK_PARTITIONS];           //!< four 8x8 blocks in a macroblock
+  int                 cbp ;
+  int64               cbp_blk ;    //!< 1 bit set for every 4x4 block with coefs (not implemented for INTRA)
+  int                 b8mode[4];
+  int                 b8pdir[4];
+  int64               cbp_bits;
+
+  int                 c_ipred_mode;      //!< chroma intra prediction mode
+  int                 IntraChromaPredModeFlag;
+
+  int                 mb_field;
+  int                 is_field_mode;
+  int                 list_offset;
+
+  int                 mbAddrA, mbAddrB, mbAddrC, mbAddrD;
+  int                 mbAvailA, mbAvailB, mbAvailC, mbAvailD;
+
+  int                 all_blk_8x8;
+  int                 luma_transform_size_8x8_flag;
+  int                 NoMbPartLessThan8x8Flag;
+
+  short               bi_pred_me;
+
+  // rate control
+  double              actj;               // macroblock activity measure for macroblock j
+  int                 prev_qp;
+  int                 prev_delta_qp;
+  int                 prev_cbp;
+  int                 predict_qp;
+  int                 predict_error;
+
+  int                 LFDisableIdc;
+  int                 LFAlphaC0Offset;
+  int                 LFBetaOffset;
+
+  int                 skip_flag;
+} Macroblock;
+
+
+
+//! Bitstream
+typedef struct
+{
+  int             byte_pos;           //!< current position in bitstream;
+  int             bits_to_go;         //!< current bitcounter
+  byte            byte_buf;           //!< current buffer for last written byte
+  int             stored_byte_pos;    //!< storage for position in bitstream;
+  int             stored_bits_to_go;  //!< storage for bitcounter
+  byte            stored_byte_buf;    //!< storage for buffer of last written byte
+
+  byte            byte_buf_skip;      //!< current buffer for last written byte
+  int             byte_pos_skip;      //!< storage for position in bitstream;
+  int             bits_to_go_skip;    //!< storage for bitcounter
+
+  byte            *streamBuffer;      //!< actual buffer for written bytes
+  int             write_flag;         //!< Bitstream contains data and needs to be written
+
+#if TRACE
+  Boolean             trace_enabled;
+#endif
+
+} Bitstream;
+
+//! DataPartition
+typedef struct datapartition
+{
+
+  Bitstream           *bitstream;
+  EncodingEnvironment ee_cabac;
+  EncodingEnvironment ee_recode;
+} DataPartition;
+
+//! Slice
+typedef struct
+{
+  int                 picture_id;
+  int                 qp;
+  int                 picture_type; //!< picture type
+  int                 start_mb_nr;
+  int                 max_part_nr;  //!< number of different partitions
+  int                 num_mb;       //!< number of MBs in the slice
+  DataPartition       *partArr;     //!< array of partitions
+  MotionInfoContexts  *mot_ctx;     //!< pointer to struct of context models for use in CABAC
+  TextureInfoContexts *tex_ctx;     //!< pointer to struct of context models for use in CABAC
+
+  int                 ref_pic_list_reordering_flag_l0;
+  int                 *reordering_of_pic_nums_idc_l0;
+  int                 *abs_diff_pic_num_minus1_l0;
+  int                 *long_term_pic_idx_l0;
+  int                 ref_pic_list_reordering_flag_l1;
+  int                 *reordering_of_pic_nums_idc_l1;
+  int                 *abs_diff_pic_num_minus1_l1;
+  int                 *long_term_pic_idx_l1;
+
+  Boolean             (*slice_too_big)(int bits_slice); //!< for use of callback functions
+
+  int                 field_ctx[3][2]; //GB
+
+} Slice;
+
+
+
+#define MAXSLICEPERPICTURE 100
+typedef struct
+{
+  int   no_slices;
+  int   idr_flag;
+  Slice *slices[MAXSLICEPERPICTURE];
+  int bits_per_picture;
+  float distortion_y;
+  float distortion_u;
+  float distortion_v;
+} Picture;
+
+Picture *top_pic;
+Picture *bottom_pic;
+Picture *frame_pic;
+Picture *frame_pic_1;
+Picture *frame_pic_2;
+Picture *frame_pic_3;
+Picture *frame_pic_si;
+
+#ifdef _LEAKYBUCKET_
+long *Bit_Buffer;
+#endif
+
+// global picture format dependend buffers, mem allocation in image.c
+imgpel **imgY_org;           //!< Reference luma image
+imgpel ***imgUV_org;         //!< Reference chroma image
+int    **imgY_sub_tmp;       //!< Y picture temporary component (Quarter pel)
+
+int **PicPos;
+unsigned int log2_max_frame_num_minus4;
+unsigned int log2_max_pic_order_cnt_lsb_minus4;
+
+time_t  me_tot_time,me_time;
+pic_parameter_set_rbsp_t *active_pps;
+seq_parameter_set_rbsp_t *active_sps;
+
+
+int dsr_new_search_range; //!<Dynamic Search Range.
+//////////////////////////////////////////////////////////////////////////
+// B pictures
+// motion vector : forward, backward, direct
+int  mb_adaptive;       //!< For MB level field/frame coding tools
+int  MBPairIsField;     //!< For MB level field/frame coding tools
+
+
+//Weighted prediction
+int ***wp_weight;  // weight in [list][index][component] order
+int ***wp_offset;  // offset in [list][index][component] order
+int ****wbp_weight;  // weight in [list][fwd_index][bwd_idx][component] order
+int luma_log_weight_denom;
+int chroma_log_weight_denom;
+int wp_luma_round;
+int wp_chroma_round;
+
+// global picture format dependend buffers, mem allocation in image.c (field picture)
+imgpel   **imgY_org_top;
+imgpel   **imgY_org_bot;
+
+imgpel  ***imgUV_org_top;
+imgpel  ***imgUV_org_bot;
+
+imgpel   **imgY_org_frm;
+imgpel  ***imgUV_org_frm;
+
+imgpel   **imgY_com;               //!< Encoded luma images
+imgpel  ***imgUV_com;              //!< Encoded croma images
+
+char    ***direct_ref_idx;           //!< direct mode reference index buffer
+char    **direct_pdir;              //!< direct mode reference index buffer
+
+// Buffers for rd optimization with packet losses, Dim. Kontopodis
+byte **pixel_map;   //!< Shows the latest reference frame that is reliable for each pixel
+byte **refresh_map; //!< Stores the new values for pixel_map
+int intras;         //!< Counts the intra updates in each frame.
+
+int  frame_ctr[5];
+int  frame_no, nextP_tr_fld, nextP_tr_frm;
+
+time_t  tot_time;
+
+#define ET_SIZE 300      //!< size of error text buffer
+char errortext[ET_SIZE]; //!< buffer for error message for exit with error()
+
+// Residue Color Transform
+char b8_ipredmode8x8[4][4], b8_intra_pred_modes8x8[16];
+
+//! Info for the "decoders-in-the-encoder" used for rdoptimization with packet losses
+typedef struct
+{
+  int  **resY;               //!< Residue of Luminance
+  imgpel ***decY;            //!< Decoded values at the simulated decoders
+  imgpel ****decref;         //!< Reference frames of the simulated decoders
+  imgpel ***decY_best;       //!< Decoded frames for the best mode for all decoders
+  imgpel **RefBlock;
+  byte **status_map;
+  byte **dec_mb_mode;
+} Decoders;
+extern Decoders *decs;
+
+//! SNRParameters
+typedef struct
+{
+  float snr_y;               //!< current Y SNR
+  float snr_u;               //!< current U SNR
+  float snr_v;               //!< current V SNR
+  float snr_y1;              //!< SNR Y(dB) first frame
+  float snr_u1;              //!< SNR U(dB) first frame
+  float snr_v1;              //!< SNR V(dB) first frame
+  float snr_yt[5];           //!< SNR Y(dB) based on frame type
+  float snr_ut[5];           //!< SNR U(dB) based on frame type
+  float snr_vt[5];           //!< SNR V(dB) based on frame type
+  float snr_ya;              //!< Average SNR Y(dB) remaining frames
+  float snr_ua;              //!< Average SNR U(dB) remaining frames
+  float snr_va;              //!< Average SNR V(dB) remaining frames
+  float sse_y;               //!< SSE Y
+  float sse_u;               //!< SSE U
+  float sse_v;               //!< SSE V
+  float msse_y;              //!< Average SSE Y
+  float msse_u;              //!< Average SSE U
+  float msse_v;              //!< Average SSE V
+  int   frame_ctr;           //!< number of coded frames
+} SNRParameters;
+
+#define FILE_NAME_SIZE 200
+                             //! all input parameters
+typedef struct
+{
+  int ProfileIDC;                    //!< value of syntax element profile_idc
+  int LevelIDC;                      //!< value of syntax element level_idc
+
+  int no_frames;                     //!< number of frames to be encoded
+  int qp0;                           //!< QP of first frame
+  int qpN;                           //!< QP of remaining frames
+  int jumpd;                         //!< number of frames to skip in input sequence (e.g 2 takes frame 0,3,6,9...)
+  int DisableSubpelME;               //!< Disable sub-pixel motion estimation
+  int search_range;                  /*!< search range - integer pel search and 16x16 blocks.  The search window is
+                                          generally around the predicted vector. Max vector is 2xmcrange.  For 8x8
+                                          and 4x4 block sizes the search range is 1/2 of that for 16x16 blocks.       */
+  int num_ref_frames;                //!< number of reference frames to be used
+  int P_List0_refs;                  //!< number of reference picture in list 0 in P pictures
+  int B_List0_refs;                  //!< number of reference picture in list 0 in B pictures
+  int B_List1_refs;                  //!< number of reference picture in list 1 in B pictures
+  int Log2MaxFNumMinus4;             //!< value of syntax element log2_max_frame_num
+  int Log2MaxPOCLsbMinus4;           //!< value of syntax element log2_max_pic_order_cnt_lsb_minus4
+
+  int img_width;                     //!< image width  (must be a multiple of 16 pels)
+  int img_height;                    //!< image height (must be a multiple of 16 pels)
+  int yuv_format;                    //!< YUV format (0=4:0:0, 1=4:2:0, 2=4:2:2, 3=4:4:4)
+  int intra_upd;                     /*!< For error robustness. 0: no special action. 1: One GOB/frame is intra coded
+                                          as regular 'update'. 2: One GOB every 2 frames is intra coded etc.
+                                          In connection with this intra update, restrictions is put on motion vectors
+                                          to prevent errors to propagate from the past                                */
+  int blc_size[8][2];                //!< array for different block sizes
+  int part_size[8][2];               //!< array for different partition sizes
+  int blocktype_lut[4][4];           //!< array for different partition sizes
+  int slice_mode;                    //!< Indicate what algorithm to use for setting slices
+  int slice_argument;                //!< Argument to the specified slice algorithm
+  int UseConstrainedIntraPred;       //!< 0: Inter MB pixels are allowed for intra prediction 1: Not allowed
+  int  infile_header;                //!< If input file has a header set this to the length of the header
+  char infile[FILE_NAME_SIZE];       //!< YUV 4:2:0 input format
+  char outfile[FILE_NAME_SIZE];      //!< H.264 compressed output bitstream
+  char ReconFile[FILE_NAME_SIZE];    //!< Reconstructed Pictures
+  char TraceFile[FILE_NAME_SIZE];    //!< Trace Outputs
+  char QmatrixFile[FILE_NAME_SIZE];  //!< Q matrix cfg file
+  int intra_period;                  //!< Random Access period though intra
+  int EnableOpenGOP;                 //!< support for open gops.
+
+  int idr_enable;                    //!< Encode intra slices as IDR
+  int start_frame;                   //!< Encode sequence starting from Frame start_frame
+
+  int GenerateMultiplePPS;
+  int Generate_SEIVUI;
+  char SEIMessageText[500];
+  int VUISupport;
+
+  int ResendSPS;
+  int ResendPPS;
+
+  // B pictures
+  int successive_Bframe;             //!< number of B frames that will be used
+  int PReplaceBSlice;
+  int qpB;                           //!< QP for non-reference B slice coded pictures
+  int qpBRSOffset;                   //!< QP for reference B slice coded pictures
+  int direct_spatial_mv_pred_flag;   //!< Direct Mode type to be used (0: Temporal, 1: Spatial)
+  int directInferenceFlag;           //!< Direct Mode Inference Flag
+
+  int BiPredMotionEstimation;
+  int BiPredMERefinements;
+  int BiPredMESearchRange;
+  int BiPredMESubPel;
+
+
+  // SP Pictures
+  int sp_periodicity;                //!< The periodicity of SP-pictures
+  int qpsp;                          //!< SP Picture QP for prediction error
+  int qpsp_pred;                     //!< SP Picture QP for predicted block
+
+  int si_frame_indicator;            //!< Flag indicating whether SI frames should be encoded rather than SP frames (0: not used, 1: used)
+  int sp2_frame_indicator;           //!< Flag indicating whether switching SP frames should be encoded rather than SP frames (0: not used, 1: used)
+  int sp_output_indicator;           //!< Flag indicating whether coefficients are output to allow future encoding of switchin SP frames (0: not used, 1: used)
+  char sp_output_filename[FILE_NAME_SIZE];    //!<Filename where SP coefficients are output
+  char sp2_input_filename1[FILE_NAME_SIZE];   //!<Filename of coefficients of the first bitstream when encoding SP frames to switch bitstreams
+  char sp2_input_filename2[FILE_NAME_SIZE];   //!<Filenames of coefficients of the second bitstream when encoding SP frames to switch bitstreams
+
+  int WeightedPrediction;            //!< Weighted prediction for P frames (0: not used, 1: explicit)
+  int WeightedBiprediction;          //!< Weighted prediction for B frames (0: not used, 1: explicit, 2: implicit)
+  int UseWeightedReferenceME;        //!< Use Weighted Reference for ME.
+  int RDPictureDecision;             //!< Perform RD optimal decision between various coded versions of same picture
+  int RDPictureIntra;                //!< Enabled RD pic decision for intra as well.
+  int RDPSliceWeightOnly;            //!< If enabled, does not check QP variations for P slices.
+  int RDPSliceBTest;                 //!< Tests B slice replacement for P.
+  int RDBSliceWeightOnly;            //!< If enabled, does not check QP variations for B slices.
+  int SkipIntraInInterSlices;        //!< Skip intra type checking in inter slices if best_mode is skip/direct
+  int BRefPictures;                  //!< B coded reference pictures replace P pictures (0: not used, 1: used)
+  int HierarchicalCoding;
+  int HierarchyLevelQPEnable;
+  char ExplicitHierarchyFormat[1024];//!< Explicit GOP format (HierarchicalCoding==3).
+  int ReferenceReorder;              //!< Reordering based on Poc distances
+  int PocMemoryManagement;           //!< Memory management based on Poc distances for hierarchical coding
+
+  int symbol_mode;                   //!< Specifies the mode the symbols are mapped on bits
+  int of_mode;                       //!< Specifies the mode of the output file
+  int partition_mode;                //!< Specifies the mode of data partitioning
+
+  int InterSearch16x16;
+  int InterSearch16x8;
+  int InterSearch8x16;
+  int InterSearch8x8;
+  int InterSearch8x4;
+  int InterSearch4x8;
+  int InterSearch4x4;
+
+  int IntraDisableInterOnly;
+  int Intra4x4ParDisable;
+  int Intra4x4DiagDisable;
+  int Intra4x4DirDisable;
+  int Intra16x16ParDisable;
+  int Intra16x16PlaneDisable;
+  int ChromaIntraDisable;
+
+  int EnableIPCM;
+
+  double FrameRate;
+
+  int EPZSPattern;
+  int EPZSDual;
+  int EPZSFixed;
+  int EPZSTemporal;
+  int EPZSSpatialMem;
+  int EPZSMinThresScale;
+  int EPZSMaxThresScale;
+  int EPZSMedThresScale;
+  int EPZSSubPelGrid;
+  int EPZSSubPelME;
+  int EPZSSubPelMEBiPred;
+  int EPZSSubPelThresScale;
+
+  int chroma_qp_index_offset;
+#ifdef _FULL_SEARCH_RANGE_
+  int full_search;
+#endif
+#ifdef _ADAPT_LAST_GROUP_
+  int last_frame;
+#endif
+#ifdef _CHANGE_QP_
+  int qpN2, qpB2, qp2start;
+  int qp02, qpBRS2Offset;
+#endif
+  int rdopt;
+  int CtxAdptLagrangeMult;    //!< context adaptive lagrangian multiplier
+  int FastCrIntraDecision;
+  int disthres;
+  int nobskip;
+
+#ifdef _LEAKYBUCKET_
+  int NumberLeakyBuckets;
+  char LeakyBucketRateFile[FILE_NAME_SIZE];
+  char LeakyBucketParamFile[FILE_NAME_SIZE];
+#endif
+
+  int PicInterlace;           //!< picture adaptive frame/field
+  int MbInterlace;            //!< macroblock adaptive frame/field
+
+  int IntraBottom;            //!< Force Intra Bottom at GOP periods.
+
+  int LossRateA;              //!< assumed loss probablility of partition A (or full slice), in per cent, used for loss-aware R/D optimization
+  int LossRateB;              //!< assumed loss probablility of partition B, in per cent, used for loss-aware R/D
+  int LossRateC;              //!< assumed loss probablility of partition C, in per cent, used for loss-aware R/D
+  int NoOfDecoders;
+  int RestrictRef;
+  int NumFramesInELSubSeq;
+  int NumFrameIn2ndIGOP;
+
+  int RandomIntraMBRefresh;     //!< Number of pseudo-random intra-MBs per picture
+
+  int LFSendParameters;
+  int LFDisableIdc;
+  int LFAlphaC0Offset;
+  int LFBetaOffset;
+
+  int SparePictureOption;
+  int SPDetectionThreshold;
+  int SPPercentageThreshold;
+
+  // FMO
+  char SliceGroupConfigFileName[FILE_NAME_SIZE];    //!< Filename for config info fot type 0, 2, 6
+  int num_slice_groups_minus1;           //!< "FmoNumSliceGroups" in encoder.cfg, same as FmoNumSliceGroups, which should be erased later
+  int slice_group_map_type;
+
+  int *top_left;                         //!< top_left and bottom_right store values indicating foregrounds
+  int *bottom_right;
+  byte *slice_group_id;                   //!< slice_group_id is for slice group type being 6
+  int *run_length_minus1;                //!< run_length_minus1 is for slice group type being 0
+
+  int slice_group_change_direction_flag;
+  int slice_group_change_rate_minus1;
+  int slice_group_change_cycle;
+
+  int redundant_pic_flag;   //! encoding of redundant pictures
+  int pic_order_cnt_type;   //! POC type
+
+  int context_init_method;
+  int model_number;
+  int Transform8x8Mode;
+  int ReportFrameStats;
+  int DisplayEncParams;
+  int Verbose;
+
+  //! Rate Control on JVT standard
+  int RCEnable;
+  int bit_rate;
+  int SeinitialQP;
+  unsigned int basicunit;
+  int channel_type;
+  int RCUpdateMode;
+  double RCIoverPRatio;
+  double RCBoverPRatio;
+  double RCISliceBitRatio;
+  double RCBSliceBitRatio[RC_MAX_TEMPORAL_LEVELS];
+
+  int ScalingMatrixPresentFlag;
+  int ScalingListPresentFlag[8];
+
+  // Search Algorithm
+  SearchType SearchMode;
+
+  int UMHexDSR;//!< Dynamic Search Range
+  int UMHexScale;
+//////////////////////////////////////////////////////////////////////////
+  // Fidelity Range Extensions
+  int BitDepthLuma;
+  int BitDepthChroma;
+  int img_height_cr;
+  int img_width_cr;
+  int rgb_input_flag;
+  int cb_qp_index_offset;
+  int cr_qp_index_offset;
+
+  // Lossless Coding
+  int lossless_qpprime_y_zero_flag;
+
+  // Lambda Params
+  int UseExplicitLambdaParams;
+  double LambdaWeight[6];
+  double FixedLambda[6];
+
+  char QOffsetMatrixFile[FILE_NAME_SIZE];        //!< Quantization Offset matrix cfg file
+  int  OffsetMatrixPresentFlag;                  //!< Enable Explicit Quantization Offset Matrices
+
+  int AdaptiveRounding;                          //!< Adaptive Rounding parameter based on JVT-N011
+  int AdaptRndPeriod;                            //!< Set period for adaptive rounding of JVT-N011 in MBs
+  int AdaptRndChroma;
+  int AdaptRndWFactor[2][5];                     //!< Weighting factors for luma component based on reference indicator and slice type
+  int AdaptRndCrWFactor[2][5];                   //!< Weighting factors for chroma components based on reference indicator and slice type
+  // Fast Mode Decision
+  int EarlySkipEnable;
+  int SelectiveIntraEnable;
+  int DisposableP;
+  int DispPQPOffset;
+
+  //Redundant picture
+  int NumRedundantHierarchy;   //!< number of entries to allocate redundant pictures in a GOP
+  int PrimaryGOPLength;        //!< GOP length of primary pictures
+  int NumRefPrimary;           //!< number of reference frames for primary picture
+
+  // Chroma interpolation and buffering
+  int ChromaMCBuffer;
+  int ChromaMEEnable;
+  int MEErrorMetric[3];
+  int ModeDecisionMetric;
+
+} InputParameters;
+
+//! ImageParameters
+typedef struct
+{
+  int number;                  //!< current image number to be encoded
+  int pn;                      //!< picture number
+  int LevelIndex;              //!< mapped level idc
+  int current_mb_nr;
+  int current_slice_nr;
+  int type;
+  int structure;               //!< picture structure
+  int num_ref_frames;          //!< number of reference frames to be used
+  int max_num_references;      //!< maximum number of reference pictures that may occur
+  int qp;                      //!< quant for the current frame
+  int qpsp;                    //!< quant for the prediction frame of SP-frame
+  int qp_scaled;
+  float framerate;
+  int width;                   //!< Number of pels
+  int width_padded;            //!< Width in pels of padded picture
+  int width_blk;               //!< Number of columns in blocks
+  int width_cr;                //!< Number of pels chroma
+  int height;                  //!< Number of lines
+  int height_padded;           //!< Number in lines of padded picture
+  int height_blk;              //!< Number of lines in blocks
+  int height_cr;               //!< Number of lines  chroma
+  int height_cr_frame;         //!< Number of lines  chroma frame
+  int size;                    //!< Luma Picture size in pels
+  int size_cr;                 //!< Chroma Picture size in pels
+  int subblock_x;              //!< current subblock horizontal
+  int subblock_y;              //!< current subblock vertical
+  int is_intra_block;
+  int is_v_block;
+  int mb_y_upd;
+  int mb_y_intra;              //!< which GOB to intra code
+  int block_c_x;               //!< current block chroma vertical
+  char **ipredmode;             //!< intra prediction mode
+  char **ipredmode8x8;          //!< help storage for 8x8 modes, inserted by YV
+
+  int cod_counter;             //!< Current count of number of skipped macroblocks in a row
+  int ***nz_coeff;             //!< number of coefficients per block (CAVLC)
+
+  int mb_x;                    //!< current MB horizontal
+  int mb_y;                    //!< current MB vertical
+  int block_x;                 //!< current block horizontal
+  int block_y;                 //!< current block vertical
+  int pix_x;                   //!< current pixel horizontal
+  int pix_y;                   //!< current pixel vertical
+  int pix_c_x;                 //!< current pixel chroma horizontal
+  int pix_c_y;                 //!< current pixel chroma vertical
+
+  int opix_x;                   //!< current original picture pixel horizontal
+  int opix_y;                   //!< current original picture pixel vertical
+  int opix_c_x;                 //!< current original picture pixel chroma horizontal
+  int opix_c_y;                 //!< current original picture pixel chroma vertical
+
+
+  // some temporal buffers
+  imgpel mprr[9][16][16];      //!< all 9 prediction modes? // enlarged from 4 to 16 for ABT (is that neccessary?)
+
+  imgpel mprr_2[5][16][16];    //!< all 4 new intra prediction modes
+  imgpel mprr_3[9][8][8];      //!< all 9 prediction modes for 8x8 transformation
+  imgpel mprr_c[2][4][16][16]; //!< chroma intra prediction modes
+  imgpel mpr[16][16];          //!< current best prediction mode
+  int m7[16][16];              //!< the diff pixel values between orginal image and prediction
+
+  int ****cofAC;               //!< AC coefficients [8x8block][4x4block][level/run][scan_pos]
+  int ***cofDC;                //!< DC coefficients [yuv][level/run][scan_pos]
+
+  int ***fadjust4x4;           //!< Transform coefficients for 4x4 luma. Excludes DC for I16x16
+  int ***fadjust8x8;           //!< Transform coefficients for 8x8 luma
+  int ****fadjust4x4Cr;        //!< Transform coefficients for 4x4 chroma. Excludes DC chroma.
+  int ****fadjust8x8Cr;        //!< Transform coefficients for 4x4 chroma within 8x8 inter blocks.
+
+
+  Picture       *currentPicture; //!< The coded picture currently in the works (typically frame_pic, top_pic, or bottom_pic)
+  Slice         *currentSlice;                                //!< pointer to current Slice data struct
+  Macroblock    *mb_data;                                   //!< array containing all MBs of a whole frame
+
+  int *quad;               //!< Array containing square values,used for snr computation  */                                         /* Values are limited to 5000 for pixel differences over 70 (sqr(5000)).
+  int *intra_block;
+
+  int tr;
+  int fld_type;                        //!< top or bottom field
+  unsigned int fld_flag;
+  unsigned int rd_pass;
+  int direct_intraP_ref[4][4];
+  int pstruct_next_P;
+  int imgtr_next_P_frm;
+  int imgtr_last_P_frm;
+  int imgtr_next_P_fld;
+  int imgtr_last_P_fld;
+
+  // B pictures
+  double b_interval;
+  int p_interval;
+  int b_frame_to_code;
+  int fw_mb_mode;
+  int bw_mb_mode;
+
+  short****** pred_mv;                 //!< motion vector predictors for all block types and all reference frames
+  short****** all_mv;                  //!< replaces local all_mv
+
+  short****** bipred_mv1;              //!< Biprediction MVs
+  short****** bipred_mv2;              //!< Biprediction MVs
+  short bi_pred_me[MAXMODE];
+
+  int LFDisableIdc;
+  int LFAlphaC0Offset;
+  int LFBetaOffset;
+
+  int direct_spatial_mv_pred_flag;              //!< Direct Mode type to be used (0: Temporal, 1: Spatial)
+
+  int num_ref_idx_l0_active;
+  int num_ref_idx_l1_active;
+
+  int field_mode;     //!< For MB level field/frame -- field mode on flag
+  int top_field;      //!< For MB level field/frame -- top field flag
+  int mvscale[6][MAX_REFERENCE_PICTURES];
+  int buf_cycle;
+  int i16offset;
+
+  int layer;             //!< which layer this picture belonged to
+  int old_layer;         //!< old layer number
+  int NoResidueDirect;
+  int AdaptiveRounding;                          //!< Adaptive Rounding parameter based on JVT-N011
+
+  int redundant_pic_cnt; // JVT-D101
+
+  int MbaffFrameFlag;    //!< indicates frame with mb aff coding
+
+  //the following should probably go in sequence parameters
+  unsigned int pic_order_cnt_type;
+
+  // for poc mode 1
+  Boolean      delta_pic_order_always_zero_flag;
+  int          offset_for_non_ref_pic;
+  int          offset_for_top_to_bottom_field;
+  unsigned int num_ref_frames_in_pic_order_cnt_cycle;
+  int          offset_for_ref_frame[1];
+
+  //the following is for slice header syntax elements of poc
+  // for poc mode 0.
+  unsigned int pic_order_cnt_lsb;
+  int          delta_pic_order_cnt_bottom;
+  // for poc mode 1.
+  int          delta_pic_order_cnt[2];
+
+
+  unsigned int field_picture;
+    signed int toppoc;      //!< poc for this frame or field
+    signed int bottompoc;   //!< for completeness - poc of bottom field of a frame (always = poc+1)
+    signed int framepoc;    //!< min (toppoc, bottompoc)
+    signed int ThisPOC;     //!< current picture POC
+  unsigned int frame_num;   //!< frame_num for this frame
+
+  unsigned int PicWidthInMbs;
+  unsigned int PicHeightInMapUnits;
+  unsigned int FrameHeightInMbs;
+  unsigned int PicSizeInMbs;
+  unsigned int FrameSizeInMbs;
+
+  //the following should probably go in picture parameters
+  Boolean pic_order_present_flag; // ????????
+
+  //the following are sent in the slice header
+//  int delta_pic_order_cnt[2];
+  int nal_reference_idc;
+
+  int adaptive_ref_pic_buffering_flag;
+  int no_output_of_prior_pics_flag;
+  int long_term_reference_flag;
+
+  DecRefPicMarking_t *dec_ref_pic_marking_buffer;
+
+  int model_number;
+
+  // rate control variables
+  int NumberofCodedMacroBlocks;
+  int BasicUnitQP;
+  int NumberofMBTextureBits;
+  int NumberofMBHeaderBits;
+  unsigned int BasicUnit;
+  int write_macroblock;
+  int bot_MB;
+  int write_mbaff_frame;
+
+  int DeblockCall;
+
+  int last_pic_bottom_field;
+  int last_has_mmco_5;
+  int pre_frame_num;
+
+  int slice_group_change_cycle;
+
+  int pic_unit_size_on_disk;
+  int bitdepth_luma;
+  int bitdepth_chroma;
+  int bitdepth_luma_qp_scale;
+  int bitdepth_chroma_qp_scale;
+  int bitdepth_lambda_scale;
+  int max_bitCount;
+  int max_qp_delta;
+  int min_qp_delta;
+  // Lagrangian Parameters
+  double  **lambda_md;     //!< Mode decision Lambda
+  double ***lambda_me;     //!< Motion Estimation Lambda
+  int    ***lambda_mf;     //!< Integer formatted Motion Estimation Lambda
+
+  double **lambda_mf_factor; //!< Motion Estimation Lamda Scale Factor
+
+  unsigned int dc_pred_value_luma;   //!< luma value for DC prediction (depends on luma pel bit depth)
+  unsigned int dc_pred_value_chroma; //!< chroma value for DC prediction (depends on chroma pel bit depth)
+  int max_imgpel_value;              //!< max value that one picture element (pixel) can take (depends on pic_unit_bitdepth)
+  int max_imgpel_value_uv;
+
+  int num_blk8x8_uv;
+  int num_cdc_coeff;
+  int yuv_format;
+  int lossless_qpprime_flag;
+  int mb_cr_size_x;
+  int mb_cr_size_y;
+  int mb_size[3][2];
+
+  int chroma_qp_offset[2];      //!< offset for qp for chroma [0-Cb, 1-Cr]
+
+  int auto_crop_right;
+  int auto_crop_bottom;
+
+  short checkref;
+  int last_valid_reference;
+  int bytes_in_picture;
+
+  int AverageFrameQP;
+  int SumFrameQP;
+  int GopLevels;
+} ImageParameters;
+
+#define NUM_PIC_TYPE 5
+                                //!< statistics
+typedef struct
+{
+  int   quant0;                      //!< quant for the first frame
+  int   quant1;                      //!< average quant for the remaining frames
+  float bitr;                        //!< bit rate for current frame, used only for output til terminal
+  float bitrate;                     //!< average bit rate for the sequence except first frame
+  int64 bit_ctr;                     //!< counter for bit usage
+  int64 bit_ctr_n;                   //!< bit usage for the current frame
+  int   bit_slice;                   //!< number of bits in current slice
+  int   stored_bit_slice;            //!< keep number of bits in current slice (to restore status in case of MB re-encoding)
+  int   bit_ctr_emulationprevention; //!< stored bits needed to prevent start code emulation
+  int   b8_mode_0_use[NUM_PIC_TYPE][2];
+  int   mode_use_transform_8x8[NUM_PIC_TYPE][MAXMODE];
+  int   mode_use_transform_4x4[NUM_PIC_TYPE][MAXMODE];
+  int   intra_chroma_mode[4];
+
+  // B pictures
+  int   successive_Bframe;
+  int   *mode_use_Bframe;
+  int   *bit_use_mode_Bframe;
+  int64   bit_ctr_I;
+  int64   bit_ctr_P;
+  int64   bit_ctr_B;
+  float bitrate_I;
+  float bitrate_P;
+  float bitrate_B;
+
+  int64   mode_use            [NUM_PIC_TYPE][MAXMODE]; //!< Macroblock mode usage for Intra frames
+  int64   bit_use_mode        [NUM_PIC_TYPE][MAXMODE]; //!< statistics of bit usage
+  int64   bit_use_stuffingBits[NUM_PIC_TYPE];
+  int64   bit_use_mb_type     [NUM_PIC_TYPE];
+  int64   bit_use_header      [NUM_PIC_TYPE];
+  int64   tmp_bit_use_cbp     [NUM_PIC_TYPE];
+  int64   bit_use_coeffY      [NUM_PIC_TYPE];
+  int64   bit_use_coeffC      [NUM_PIC_TYPE];
+  int64   bit_use_delta_quant [NUM_PIC_TYPE];
+
+  int   em_prev_bits_frm;
+  int   em_prev_bits_fld;
+  int  *em_prev_bits;
+  int   bit_ctr_parametersets;
+  int   bit_ctr_parametersets_n;
+  } StatParameters;
+
+//! For MB level field/frame coding tools
+//! temporary structure to store MB data for field/frame coding
+typedef struct
+{
+  double min_rdcost;
+
+  imgpel rec_mbY[16][16];       // hold the Y component of reconstructed MB
+  imgpel rec_mbU[16][16], rec_mbV[16][16];
+  int    ****cofAC;
+  int    ***cofDC;
+  int    mb_type;
+  short  bi_pred_me;
+
+  int    b8mode[4], b8pdir[4];
+  char   **ipredmode;
+  char   intra_pred_modes[16];
+  char   intra_pred_modes8x8[16];
+  int    cbp;
+  int64  cbp_blk;
+  int    mode;
+  short  ******pred_mv;        //!< predicted motion vectors
+  short  ******all_mv;         //!< all modes motion vectors
+  char   refar[2][4][4];       //!< reference frame array [list][y][x]
+  int    i16offset;
+  int    c_ipred_mode;
+
+  int    luma_transform_size_8x8_flag;
+  int    NoMbPartLessThan8x8Flag;
+
+  int    qp;
+  int    prev_qp;
+  int    prev_delta_qp;
+  int    delta_qp;
+  int    prev_cbp;
+} RD_DATA;
+
+
+//! Set Explicit GOP Parameters.
+//! Currently only supports Enhancement GOP but could be easily extended
+typedef struct
+{
+  int slice_type;       //! Slice type
+  int display_no;       //! GOP Display order
+  int reference_idc;    //! Is reference?
+  int slice_qp;         //! Assigned QP
+  int hierarchy_layer;    //! Hierarchy layer (used with GOP Hierarchy option 2)
+  int hierarchyPocDelta;  //! Currently unused
+} GOP_DATA;
+
+
+typedef struct
+{
+  int cost8x8;
+  int rec_resG_8x8[16][16];
+  int resTrans_R_8x8[16][16];
+  int resTrans_B_8x8[16][16];
+  int mprRGB_8x8[3][16][16];
+  short part8x8mode[4];
+  char  part8x8pdir[4];
+  char  part8x8fwref[4];
+  char  part8x8bwref[4];
+  imgpel rec_mbY8x8[16][16];
+  imgpel mpr8x8[16][16];
+  int lrec[16][16]; // transform and quantized coefficients will be stored here for SP frames
+} RD_8x8DATA;
+
+typedef struct
+{
+  double lambda_md;        //!< Mode decision Lambda
+  double lambda_me[3];     //!< Motion Estimation Lambda
+  int    lambda_mf[3];     //!< Integer formatted Motion Estimation Lambda
+
+  short  valid[MAXMODE];
+  short  list_offset[2];
+  short  curr_mb_field;
+  short  best_ref[2];
+  int    best_mcost[2];
+} RD_PARAMS;
+
+GOP_DATA *gop_structure;
+RD_DATA *rdopt;
+RD_DATA rddata_top_frame_mb, rddata_bot_frame_mb; //!< For MB level field/frame coding tools
+RD_DATA rddata_top_field_mb, rddata_bot_field_mb; //!< For MB level field/frame coding tools
+
+extern InputParameters *input;
+extern ImageParameters *img;
+extern StatParameters  *stats;
+
+extern SNRParameters *snr;
+
+// files
+FILE *p_stat;                    //!< status file for the last encoding session
+FILE *p_log;                     //!< SNR file
+FILE *p_trace;                   //!< Trace file
+int  p_in;                       //!< original YUV file handle
+int  p_dec;                      //!< decoded image file handle
+
+
+/***********************************************************************
+ * P r o t o t y p e s   f o r    T M L
+ ***********************************************************************
+ */
+
+void intrapred_luma(int CurrPixX,int CurrPixY, int *left_available, int *up_available, int *all_available);
+int  dct_luma(int pos_mb1,int pos_mb2,int *cnt_nonz, int intra);
+int  dct_luma_sp(int pos_mb1,int pos_mb2,int *cnt_nonz);
+void copyblock_sp(int pos_mb1,int pos_mb2);
+int  dct_chroma(int uv,int i11);
+int  dct_chroma_sp(int uv,int i11);
+
+void intrapred_luma_16x16(void);
+
+int dct_luma_16x16(int);
+
+void init_poc(void);
+
+void init_img(void);
+void report(void);
+int  get_picture_type(void);
+void DeblockFrame(ImageParameters *img, imgpel **, imgpel ***) ;
+
+int  distortion4x4(int*);
+int  distortion8x8(int*);
+
+extern int*   refbits;
+extern int**** motion_cost;
+double *mb16x16_cost_frame;
+
+void  Get_Direct_Motion_Vectors (void);
+void  PartitionMotionSearch     (int, int, int*);
+int   BIDPartitionCost          (int, int, short, short, int);
+int   writeAbpCoeffIndex        (int, int, int, int);
+
+void estimate_weighting_factor_B_slice(void);
+void estimate_weighting_factor_P_slice(int offset);
+int  test_wp_P_slice(int offset);
+int  test_wp_B_slice(int method);
+void poc_based_ref_management(int current_pic_num);
+int  picture_coding_decision (Picture *picture1, Picture *picture2, int qp);
+
+unsigned CeilLog2( unsigned uiVal);
+
+int  GetDirectCost8x8 (int, int*);
+
+int   BPredPartitionCost  (int, int, short, short, int, int);
+
+int  GetDirectCostMB  (void);
+
+int  GetSkipCostMB (void);
+void FindSkipModeMotionVector (void);
+
+
+// dynamic mem allocation
+int  init_global_buffers(void);
+void free_global_buffers(void);
+void no_mem_exit  (char *where);
+
+int  get_mem_mv  (short*******);
+void free_mem_mv (short******);
+void free_img    (void);
+
+int  get_mem_ACcoeff  (int*****);
+int  get_mem_DCcoeff  (int****);
+void free_mem_ACcoeff (int****);
+void free_mem_DCcoeff (int***);
+
+int  decide_fld_frame(float snr_frame_Y, float snr_field_Y, int bit_field, int bit_frame, double lambda_picture);
+void combine_field(void);
+
+Picture *malloc_picture(void);
+void     free_picture (Picture *pic);
+
+int   encode_one_slice(int SLiceGroupId, Picture *pic, int TotalCodedMBs);   //! returns the number of MBs in the slice
+
+void free_slice_list(Picture *currPic);
+
+void report_stats_on_error(void);
+
+#if TRACE
+void  trace2out(SyntaxElement *se);
+void  trace2out_cabac(SyntaxElement *se);
+#endif
+
+
+void error(char *text, int code);
+int  start_sequence(void);
+int  rewrite_paramsets(void);
+int  terminate_sequence(void);
+int  start_slice(void);
+int  terminate_slice(int);
+int  write_PPS(int, int);
+
+// B pictures
+int  get_fwMV(int *min_fw_sad, int tot_intra_sad);
+void get_bwMV(int *min_bw_sad);
+void get_bid(int *bid_sad, int fw_predframe_no);
+void get_dir(int *dir_sad);
+void compare_sad(int tot_intra_sad, int fw_sad, int bw_sad, int bid_sad, int dir_sad, int);
+int  BlkSize2CodeNumber(int blc_size_h, int blc_size_v);
+
+void InitMotionVectorSearchModule(void);
+
+int  field_flag_inference(void);
+
+void set_mbaff_parameters(void);  // For MB AFF
+void writeVlcByteAlign(Bitstream* currStream);
+
+
+int   writeMB_bits_for_4x4_luma   (int, int, int);
+int   writeMB_bits_for_16x16_luma (void);
+int   writeMB_bits_for_luma       (int);
+int   writeMB_bits_for_DC_chroma  (int);
+int   writeMB_bits_for_AC_chroma  (int);
+int   writeMB_bits_for_CBP        (void);
+
+int   SingleUnifiedMotionSearch   (int, int, int**, int***, int*****, int, int*****, double);
+
+//============= rate-distortion optimization ===================
+void  clear_rdopt      (void);
+void  init_rdopt       (void);
+void  RD_Mode_Decision (void);
+//============= rate-distortion opt with packet losses ===========
+void decode_one_macroblock(void);
+void decode_one_mb (int, Macroblock*);
+void decode_one_b8block (int, int, int, int, int);
+void Get_Reference_Block(imgpel **imY, int block_y, int block_x, int mvhor, int mvver, imgpel **out);
+byte Get_Reference_Pixel(imgpel **imY, int y, int x);
+int  Half_Upsample(imgpel **imY, int j, int i);
+void DecOneForthPix(imgpel **dY, imgpel ***dref);
+void compute_residue(int mode);
+void compute_residue_b8block (int, int);
+void compute_residue_mb (int);
+void UpdateDecoders(void);
+void Build_Status_Map(byte **s_map);
+void Error_Concealment(imgpel **inY, byte **s_map, imgpel ***refY);
+void Conceal_Error(imgpel **inY, int mb_y, int mb_x, imgpel ***refY, byte **s_map);
+//============= restriction of reference frames based on the latest intra-refreshes==========
+void UpdatePixelMap(void);
+
+//============= fast full integer search =======================
+void  ClearFastFullIntegerSearch    (void);
+void  ResetFastFullIntegerSearch    (void);
+
+void process_2nd_IGOP(void);
+void SetImgType(void);
+
+// Tian Dong: for IGOPs
+extern Boolean In2ndIGOP;
+extern int start_frame_no_in_this_IGOP;
+extern int start_tr_in_this_IGOP;
+extern int FirstFrameIn2ndIGOP;
+#define IMG_NUMBER (img->number - start_frame_no_in_this_IGOP)
+
+void AllocNalPayloadBuffer(void);
+void FreeNalPayloadBuffer(void);
+void SODBtoRBSP(Bitstream *currStream);
+int RBSPtoEBSP(byte *streamBuffer, int begin_bytepos, int end_bytepos, int min_num_bytes);
+int Bytes_After_Header;
+
+// Fast ME enable
+int BlockMotionSearch (short,int,int,int,int,int, int*);
+void low_complexity_encode_md (void);
+void encode_one_macroblock_low (void);
+void encode_one_macroblock_high (void);
+void encode_one_macroblock_highfast (void);
+void encode_one_macroblock_highloss (void);
+void (*encode_one_macroblock) (void);
+
+
+void set_chroma_qp(Macroblock *currMB);
+
+
+#include "context_ini.h"
+
+void store_coding_state_cs_cm(void);
+void reset_coding_state_cs_cm(void);
+
+int writeIPCMBytes(Bitstream *currStream);
+int writePCMByteAlign(Bitstream *currStream);
+
+
+int  dct_luma_sp2(int pos_mb1,int pos_mb2,int *cnt_nonz);
+int  dct_chroma_sp2(int ,int);
+
+int check_for_SI16(void);
+int **lrec ;
+int ***lrec_uv;
+int si_frame_indicator;
+
+int sp2_frame_indicator;
+int number_sp2_frames;
+//#define sp_output_indicator 0 //will be in the config file
+//#define sp_output_filename "sp_stored.txt" // will be in the config file
+void output_SP_coefficients(void);
+void read_SP_coefficients(void);
+
+int giRDOpt_B8OnlyFlag;
+
+#ifdef BEST_NZ_COEFF
+int gaaiMBAFF_NZCoeff[4][12];
+#endif
+
+// Redundant picture
+imgpel **imgY_tmp;
+imgpel **imgUV_tmp[2];
+int frameNuminGOP;
+int redundant_coding;
+int key_frame;
+int redundant_ref_idx;
+void Init_redundant_frame(void);
+void Set_redundant_frame(void);
+void encode_one_redundant_frame(void);
+
+int img_pad_size_uv_x;
+int img_pad_size_uv_y;
+
+unsigned char chroma_mask_mv_y;
+unsigned char chroma_mask_mv_x;
+int chroma_shift_y, chroma_shift_x;
+int shift_cr_x, shift_cr_y;
+int img_padded_size_x;
+int img_cr_padded_size_x;
+
+// struct with pointers to the sub-images
+typedef struct {
+  imgpel ****luma; // component 0 (usually Y, X, or R)
+  imgpel ****crcb[2]; // component 2 (usually U/V, Y/Z, or G/B)
+} SubImageContainer;
+
+int start_me_refinement_hp; // if set then recheck the center position when doing half-pel motion refinement
+int start_me_refinement_qp; // if set then recheck the center position when doing quarter-pel motion refinement
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/header.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/header.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/header.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,559 @@
+
+/*!
+ *************************************************************************************
+ * \file header.c
+ *
+ * \brief
+ *    H.264 Slice and Sequence headers
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+ *      - Karsten Suehring                <suehring at hhi.de>
+ *************************************************************************************
+ */
+
+#include <math.h>
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "global.h"
+
+#include "elements.h"
+#include "header.h"
+#include "rtp.h"
+#include "mbuffer.h"
+#include "defines.h"
+#include "vlc.h"
+#include "parset.h"
+
+// A little trick to avoid those horrible #if TRACE all over the source code
+#if TRACE
+#define SYMTRACESTRING(s) strncpy(sym.tracestring,s,TRACESTRING_SIZE)
+#else
+#define SYMTRACESTRING(s) // do nothing
+#endif
+
+int * assignSE2partition[2] ;
+int assignSE2partition_NoDP[SE_MAX_ELEMENTS] =
+  {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+int assignSE2partition_DP[SE_MAX_ELEMENTS] =
+  // 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17
+  {  0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0 } ;
+
+static int ref_pic_list_reordering(Bitstream *bitstream);
+static int dec_ref_pic_marking    (Bitstream *bitstream);
+static int pred_weight_table      (Bitstream *bitstream);
+
+/*!
+ ********************************************************************************************
+ * \brief
+ *    Write a slice header
+ *
+ * \return
+ *    number of bits used
+ ********************************************************************************************
+*/
+int SliceHeader()
+{
+  int dP_nr = assignSE2partition[input->partition_mode][SE_HEADER];
+  Bitstream *bitstream = img->currentSlice->partArr[dP_nr].bitstream;
+  Slice* currSlice = img->currentSlice;
+  int len = 0;
+  unsigned int field_pic_flag = 0, bottom_field_flag = 0;
+
+  int num_bits_slice_group_change_cycle;
+  float numtmp;
+
+  if (img->MbaffFrameFlag)
+    len  = ue_v("SH: first_mb_in_slice", img->current_mb_nr >> 1,   bitstream);
+  else
+    len  = ue_v("SH: first_mb_in_slice", img->current_mb_nr,   bitstream);
+
+  len += ue_v("SH: slice_type",        get_picture_type (),   bitstream);
+
+  len += ue_v("SH: pic_parameter_set_id" , active_pps->pic_parameter_set_id ,bitstream);
+
+  len += u_v (log2_max_frame_num_minus4 + 4,"SH: frame_num", img->frame_num, bitstream);
+
+  if (!active_sps->frame_mbs_only_flag)
+  {
+    // field_pic_flag    u(1)
+    field_pic_flag = (img->structure ==TOP_FIELD || img->structure ==BOTTOM_FIELD)?1:0;
+    assert( field_pic_flag == img->fld_flag );
+    len += u_1("SH: field_pic_flag", field_pic_flag, bitstream);
+
+    if (field_pic_flag)
+    {
+      //bottom_field_flag     u(1)
+      bottom_field_flag = (img->structure == BOTTOM_FIELD)?1:0;
+      len += u_1("SH: bottom_field_flag" , bottom_field_flag ,bitstream);
+    }
+  }
+
+  if (img->currentPicture->idr_flag)
+  {
+    // idr_pic_id
+    len += ue_v ("SH: idr_pic_id", (img->number % 2), bitstream);
+  }
+
+  if (img->pic_order_cnt_type == 0)
+  {
+    if (active_sps->frame_mbs_only_flag)
+    {
+      img->pic_order_cnt_lsb = (img->toppoc & ~((((unsigned int)(-1)) << (log2_max_pic_order_cnt_lsb_minus4+4))) );
+    }
+    else
+    {
+      if (!field_pic_flag || img->structure == TOP_FIELD)
+        img->pic_order_cnt_lsb = (img->toppoc & ~((((unsigned int)(-1)) << (log2_max_pic_order_cnt_lsb_minus4+4))) );
+      else if ( img->structure == BOTTOM_FIELD )
+        img->pic_order_cnt_lsb = (img->bottompoc & ~((((unsigned int)(-1)) << (log2_max_pic_order_cnt_lsb_minus4+4))) );
+    }
+
+    len += u_v (log2_max_pic_order_cnt_lsb_minus4+4, "SH: pic_order_cnt_lsb", img->pic_order_cnt_lsb, bitstream);
+
+    if (img->pic_order_present_flag && !field_pic_flag)
+    {
+      len += se_v ("SH: delta_pic_order_cnt_bottom", img->delta_pic_order_cnt_bottom, bitstream);
+    }
+  }
+  if (img->pic_order_cnt_type == 1 && !img->delta_pic_order_always_zero_flag)
+  {
+    len += se_v ("SH: delta_pic_order_cnt[0]", img->delta_pic_order_cnt[0], bitstream);
+
+    if (img->pic_order_present_flag && !field_pic_flag)
+    {
+      len += se_v ("SH: delta_pic_order_cnt[1]", img->delta_pic_order_cnt[1], bitstream);
+    }
+  }
+
+  if (active_pps->redundant_pic_cnt_present_flag)
+  {
+    len += ue_v ("SH: redundant_pic_cnt", img->redundant_pic_cnt, bitstream);
+  }
+
+  // Direct Mode Type selection for B pictures
+  if (img->type==B_SLICE)
+  {
+    len +=  u_1 ("SH: direct_spatial_mv_pred_flag", img->direct_spatial_mv_pred_flag, bitstream);
+  }
+
+  if ((img->type == P_SLICE) || (img->type == B_SLICE) || (img->type==SP_SLICE))
+  {
+    int override_flag;
+    if ((img->type == P_SLICE) || (img->type==SP_SLICE))
+    {
+      override_flag = (img->num_ref_idx_l0_active != (active_pps->num_ref_idx_l0_active_minus1 +1)) ? 1 : 0;
+    }
+    else
+    {
+      override_flag = ((img->num_ref_idx_l0_active != (active_pps->num_ref_idx_l0_active_minus1 +1))
+                      || (img->num_ref_idx_l1_active != (active_pps->num_ref_idx_l1_active_minus1 +1))) ? 1 : 0;
+    }
+
+    len +=  u_1 ("SH: num_ref_idx_active_override_flag", override_flag, bitstream);
+
+    if (override_flag)
+    {
+      len += ue_v ("SH: num_ref_idx_l0_active_minus1", img->num_ref_idx_l0_active-1, bitstream);
+      if (img->type==B_SLICE)
+      {
+        len += ue_v ("SH: num_ref_idx_l1_active_minus1", img->num_ref_idx_l1_active-1, bitstream);
+      }
+    }
+
+  }
+  len += ref_pic_list_reordering(bitstream);
+
+  if (((img->type == P_SLICE || img->type == SP_SLICE) && active_pps->weighted_pred_flag) ||
+     ((img->type == B_SLICE) && active_pps->weighted_bipred_idc == 1))
+  {
+    len += pred_weight_table(bitstream);
+  }
+
+  if (img->nal_reference_idc)
+    len += dec_ref_pic_marking(bitstream);
+
+  if(input->symbol_mode==CABAC && img->type!=I_SLICE /*&& img->type!=SI_IMG*/)
+  {
+    len += ue_v("SH: cabac_init_idc", img->model_number, bitstream);
+  }
+
+  len += se_v("SH: slice_qp_delta", (currSlice->qp - 26 - active_pps->pic_init_qp_minus26), bitstream);
+
+  if (img->type==SP_SLICE /*|| img->type==SI_SLICE*/)
+  {
+    if (img->type==SP_SLICE) // Switch Flag only for SP pictures
+    {
+      len += u_1 ("SH: sp_for_switch_flag", (si_frame_indicator || sp2_frame_indicator), bitstream);   // 1 for switching SP, 0 for normal SP
+    }
+    len += se_v ("SH: slice_qs_delta", (img->qpsp - 26), bitstream );
+  }
+
+  if (active_pps->deblocking_filter_control_present_flag)
+  {
+    len += ue_v("SH: disable_deblocking_filter_idc",img->LFDisableIdc, bitstream);  // Turn loop filter on/off on slice basis
+
+    if (img->LFDisableIdc!=1)
+    {
+      len += se_v ("SH: slice_alpha_c0_offset_div2", img->LFAlphaC0Offset / 2, bitstream);
+
+      len += se_v ("SH: slice_beta_offset_div2", img->LFBetaOffset / 2, bitstream);
+    }
+  }
+
+
+  if ( active_pps->num_slice_groups_minus1>0 &&
+    active_pps->slice_group_map_type>=3 && active_pps->slice_group_map_type<=5)
+  {
+    numtmp=img->PicHeightInMapUnits*img->PicWidthInMbs/(float)(active_pps->slice_group_change_rate_minus1+1)+1;
+    num_bits_slice_group_change_cycle = (int)ceil(log(numtmp)/log(2));
+
+    //! img->slice_group_change_cycle can be changed before calling FmoInit()
+    len += u_v (num_bits_slice_group_change_cycle, "SH: slice_group_change_cycle", img->slice_group_change_cycle, bitstream);
+  }
+
+  // NOTE: The following syntax element is actually part
+  //        Slice data bitstream A RBSP syntax
+
+  if(input->partition_mode&&!img->currentPicture->idr_flag)
+  {
+    len += ue_v("DPA: slice_id", img->current_slice_nr, bitstream);
+  }
+
+  return len;
+}
+
+/*!
+ ********************************************************************************************
+ * \brief
+ *    writes the ref_pic_list_reordering syntax
+ *    based on content of according fields in img structure
+ *
+ * \return
+ *    number of bits used
+ ********************************************************************************************
+*/
+static int ref_pic_list_reordering(Bitstream *bitstream)
+{
+  Slice *currSlice = img->currentSlice;
+
+  int i, len=0;
+
+  // RPLR for redundant pictures
+  if(input->redundant_pic_flag && redundant_coding)
+  {
+    currSlice->ref_pic_list_reordering_flag_l0 = 1;
+    currSlice->reordering_of_pic_nums_idc_l0[0] = 0;
+    currSlice->reordering_of_pic_nums_idc_l0[1] = 3;
+    currSlice->abs_diff_pic_num_minus1_l0[0] = redundant_ref_idx - 1;
+    currSlice->long_term_pic_idx_l0[0] = 0;
+    reorder_ref_pic_list( listX[LIST_0], &listXsize[LIST_0],
+                          img->num_ref_idx_l0_active-1,
+                          currSlice->reordering_of_pic_nums_idc_l0,
+                          currSlice->abs_diff_pic_num_minus1_l0,
+                          currSlice->long_term_pic_idx_l0);
+  }
+
+  if ((img->type!=I_SLICE) /*&&(img->type!=SI_IMG)*/ )
+  {
+    len += u_1 ("SH: ref_pic_list_reordering_flag_l0", currSlice->ref_pic_list_reordering_flag_l0, bitstream);
+    if (currSlice->ref_pic_list_reordering_flag_l0)
+    {
+      i=-1;
+      do
+      {
+        i++;
+        len += ue_v ("SH: reordering_of_pic_nums_idc", currSlice->reordering_of_pic_nums_idc_l0[i], bitstream);
+        if (currSlice->reordering_of_pic_nums_idc_l0[i]==0 ||
+            currSlice->reordering_of_pic_nums_idc_l0[i]==1)
+        {
+          len += ue_v ("SH: abs_diff_pic_num_minus1_l0", currSlice->abs_diff_pic_num_minus1_l0[i], bitstream);
+        }
+        else
+        {
+          if (currSlice->reordering_of_pic_nums_idc_l0[i]==2)
+          {
+            len += ue_v ("SH: long_term_pic_idx_l0", currSlice->long_term_pic_idx_l0[i], bitstream);
+          }
+        }
+
+      } while (currSlice->reordering_of_pic_nums_idc_l0[i] != 3);
+    }
+  }
+
+  if (img->type==B_SLICE)
+  {
+    len += u_1 ("SH: ref_pic_list_reordering_flag_l1", currSlice->ref_pic_list_reordering_flag_l1, bitstream);
+    if (currSlice->ref_pic_list_reordering_flag_l1)
+    {
+      i=-1;
+      do
+      {
+        i++;
+        len += ue_v ("SH: remapping_of_pic_num_idc", currSlice->reordering_of_pic_nums_idc_l1[i], bitstream);
+        if (currSlice->reordering_of_pic_nums_idc_l1[i]==0 ||
+            currSlice->reordering_of_pic_nums_idc_l1[i]==1)
+        {
+          len += ue_v ("SH: abs_diff_pic_num_minus1_l1", currSlice->abs_diff_pic_num_minus1_l1[i], bitstream);
+        }
+        else
+        {
+          if (currSlice->reordering_of_pic_nums_idc_l1[i]==2)
+          {
+            len += ue_v ("SH: long_term_pic_idx_l1", currSlice->long_term_pic_idx_l1[i], bitstream);
+          }
+        }
+      } while (currSlice->reordering_of_pic_nums_idc_l1[i] != 3);
+    }
+  }
+
+  return len;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    write the memory management control operations
+ *
+ * \return
+ *    number of bits used
+ ************************************************************************
+ */
+static int dec_ref_pic_marking(Bitstream *bitstream)
+{
+  DecRefPicMarking_t *tmp_drpm;
+
+  int val, len=0;
+
+  if (img->currentPicture->idr_flag)
+  {
+    len += u_1("SH: no_output_of_prior_pics_flag", img->no_output_of_prior_pics_flag, bitstream);
+    len += u_1("SH: long_term_reference_flag", img->long_term_reference_flag, bitstream);
+  }
+  else
+  {
+    img->adaptive_ref_pic_buffering_flag = (img->dec_ref_pic_marking_buffer!=NULL);
+
+    len += u_1("SH: adaptive_ref_pic_buffering_flag", img->adaptive_ref_pic_buffering_flag, bitstream);
+
+    if (img->adaptive_ref_pic_buffering_flag)
+    {
+      tmp_drpm = img->dec_ref_pic_marking_buffer;
+      // write Memory Management Control Operation
+      do
+      {
+        if (tmp_drpm==NULL) error ("Error encoding MMCO commands", 500);
+
+        val = tmp_drpm->memory_management_control_operation;
+        len += ue_v("SH: memory_management_control_operation", val, bitstream);
+
+        if ((val==1)||(val==3))
+        {
+          len += 1 + ue_v("SH: difference_of_pic_nums_minus1", tmp_drpm->difference_of_pic_nums_minus1, bitstream);
+        }
+        if (val==2)
+        {
+          len+= ue_v("SH: long_term_pic_num", tmp_drpm->long_term_pic_num, bitstream);
+        }
+        if ((val==3)||(val==6))
+        {
+          len+= ue_v("SH: long_term_frame_idx", tmp_drpm->long_term_frame_idx, bitstream);
+        }
+        if (val==4)
+        {
+          len += ue_v("SH: max_long_term_pic_idx_plus1", tmp_drpm->max_long_term_frame_idx_plus1, bitstream);
+        }
+
+        tmp_drpm=tmp_drpm->Next;
+
+      } while (val != 0);
+
+    }
+  }
+  return len;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    write prediction weight table
+ *
+ * \return
+ *    number of bits used
+ ************************************************************************
+ */
+static int pred_weight_table(Bitstream *bitstream)
+{
+  int len = 0;
+  int i,j;
+
+  len += ue_v("SH: luma_log_weight_denom", luma_log_weight_denom, bitstream);
+
+  if ( 0 != active_sps->chroma_format_idc)
+  {
+    len += ue_v("SH: chroma_log_weight_denom", chroma_log_weight_denom, bitstream);
+  }
+
+  for (i=0; i< img->num_ref_idx_l0_active; i++)
+  {
+    if ( (wp_weight[0][i][0] != 1<<luma_log_weight_denom) || (wp_offset[0][i][0] != 0) )
+    {
+      len += u_1 ("SH: luma_weight_flag_l0", 1, bitstream);
+
+      len += se_v ("SH: luma_weight_l0", wp_weight[0][i][0], bitstream);
+
+      len += se_v ("SH: luma_offset_l0", wp_offset[0][i][0], bitstream);
+    }
+    else
+    {
+        len += u_1 ("SH: luma_weight_flag_l0", 0, bitstream);
+    }
+
+    if (active_sps->chroma_format_idc!=0)
+    {
+      if ( (wp_weight[0][i][1] != 1<<chroma_log_weight_denom) || (wp_offset[0][i][1] != 0) ||
+        (wp_weight[0][i][2] != 1<<chroma_log_weight_denom) || (wp_offset[0][i][2] != 0)  )
+      {
+        len += u_1 ("chroma_weight_flag_l0", 1, bitstream);
+        for (j=1; j<3; j++)
+        {
+          len += se_v ("chroma_weight_l0", wp_weight[0][i][j] ,bitstream);
+
+          len += se_v ("chroma_offset_l0", wp_offset[0][i][j] ,bitstream);
+        }
+      }
+      else
+      {
+        len += u_1 ("chroma_weight_flag_l0", 0, bitstream);
+      }
+    }
+  }
+
+  if (img->type == B_SLICE)
+  {
+    for (i=0; i< img->num_ref_idx_l1_active; i++)
+    {
+      if ( (wp_weight[1][i][0] != 1<<luma_log_weight_denom) || (wp_offset[1][i][0] != 0) )
+      {
+        len += u_1 ("SH: luma_weight_flag_l1", 1, bitstream);
+
+        len += se_v ("SH: luma_weight_l1", wp_weight[1][i][0], bitstream);
+
+        len += se_v ("SH: luma_offset_l1", wp_offset[1][i][0], bitstream);
+      }
+      else
+      {
+        len += u_1 ("SH: luma_weight_flag_l1", 0, bitstream);
+      }
+
+      if (active_sps->chroma_format_idc!=0)
+      {
+        if ( (wp_weight[1][i][1] != 1<<chroma_log_weight_denom) || (wp_offset[1][i][1] != 0) ||
+          (wp_weight[1][i][2] != 1<<chroma_log_weight_denom) || (wp_offset[1][i][2] != 0) )
+        {
+          len += u_1 ("chroma_weight_flag_l1", 1, bitstream);
+          for (j=1; j<3; j++)
+          {
+            len += se_v ("chroma_weight_l1", wp_weight[1][i][j] ,bitstream);
+            len += se_v ("chroma_offset_l1", wp_offset[1][i][j] ,bitstream);
+          }
+        }
+        else
+        {
+          len += u_1 ("chroma_weight_flag_l1", 0, bitstream);
+        }
+      }
+    }
+  }
+  return len;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Selects picture type and codes it to symbol
+ *
+ * \return
+ *    symbol value for picture type
+ ************************************************************************
+ */
+int get_picture_type()
+{
+  // set this value to zero for transmission without signaling
+  // that the whole picture has the same slice type
+  int same_slicetype_for_whole_frame = 5;
+
+  switch (img->type)
+  {
+  case I_SLICE:
+    return 2 + same_slicetype_for_whole_frame;
+    break;
+  case P_SLICE:
+    return 0 + same_slicetype_for_whole_frame;
+    break;
+  case B_SLICE:
+    return 1 + same_slicetype_for_whole_frame;
+    break;
+  case SP_SLICE:
+    return 3 + same_slicetype_for_whole_frame;
+    break;
+  default:
+    error("Picture Type not supported!",1);
+    break;
+  }
+
+  return 0;
+}
+
+
+
+/*!
+ *****************************************************************************
+ *
+ * \brief
+ *    int Partition_BC_Header () write the Partition type B, C header
+ *
+ * \return
+ *    Number of bits used by the partition header
+ *
+ * \par Parameters
+ *    PartNo: Partition Number to which the header should be written
+ *
+ * \par Side effects
+ *    Partition header as per VCEG-N72r2 is written into the appropriate
+ *    partition bit buffer
+ *
+ * \par Limitations/Shortcomings/Tweaks
+ *    The current code does not support the change of picture parameters within
+ *    one coded sequence, hence there is only one parameter set necessary.  This
+ *    is hard coded to zero.
+ *
+ * \date
+ *    October 24, 2001
+ *
+ * \author
+ *    Stephan Wenger   stewe at cs.tu-berlin.de
+ *****************************************************************************/
+int Partition_BC_Header(int PartNo)
+{
+  DataPartition *partition = &((img->currentSlice)->partArr[PartNo]);
+  SyntaxElement sym;
+
+  assert (PartNo > 0 && PartNo < img->currentSlice->max_part_nr);
+
+  sym.type = SE_HEADER;         // This will be true for all symbols generated here
+  sym.value2  = 0;
+
+  SYMTRACESTRING("RTP-PH: Slice ID");
+  sym.value1 = img->current_slice_nr;
+  writeSE_UVLC (&sym, partition);
+
+  return sym.len;
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/header.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/header.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/header.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,21 @@
+
+/*!
+ *************************************************************************************
+ * \file header.h
+ *
+ * \brief
+ *    Prototypes for header.c
+ *************************************************************************************
+ */
+
+#ifndef _HEADER_H_
+#define _HEADER_H_
+
+int SliceHeader();
+int Partition_BC_Header();
+
+int  writeERPS(SyntaxElement *sym, DataPartition *partition);
+// int  SequenceHeader(FILE *outf);
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/ifunctions.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/ifunctions.h:1.1
*** /dev/null	Sun Feb  4 08:38:54 2007
--- llvm-test/MultiSource/Applications/JM/lencod/ifunctions.h	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,125 ----
+ 
+ /*!
+  ************************************************************************
+  *  \file
+  *     ifunctions.h
+  *
+  *  \brief
+  *     define some inline functions that are used within the encoder.
+  *
+  *  \author
+  *      Main contributors (see contributors.h for copyright, address and affiliation details)
+  *      - Karsten Sühring                 <suehring at hhi.de>
+  *      - Alexis Tourapis                 <alexismt at ieee.org>
+  *
+  ************************************************************************
+  */
+ #ifndef _IFUNCTIONS_H_
+ #define _IFUNCTIONS_H_
+ 
+ static inline int imin(int a, int b)
+ {
+   return ((a) < (b)) ? (a) : (b);
+ }
+ 
+ static inline int imax(int a, int b)
+ {
+   return ((a) > (b)) ? (a) : (b);
+ }
+ 
+ static inline double dmin(double a, double b)
+ {
+   return ((a) < (b)) ? (a) : (b);
+ }
+ 
+ static inline double dmax(double a, double b)
+ {
+   return ((a) > (b)) ? (a) : (b);
+ }
+ 
+ static inline int64 i64min(int64 a, int64 b)
+ {
+   return ((a) < (b)) ? (a) : (b);
+ }
+ 
+ static inline int64 i64max(int64 a, int64 b)
+ {
+   return ((a) > (b)) ? (a) : (b);
+ }
+ 
+ static inline int iabs(int x)
+ {
+   return ((x) < 0) ? -(x) : (x);
+ }
+ 
+ static inline double dabs(double x)
+ {
+   return ((x) < 0) ? -(x) : (x);
+ }
+ 
+ static inline int isign(int x)
+ {
+   return ((x) < 0) ? -1 : 1;
+ }
+ 
+ static inline int isignab(int a, int b)
+ {
+   return ((b) < 0) ? -iabs(a) : iabs(a);
+ }
+ 
+ static inline int rshift_rnd(int x, int a)
+ {
+   return (a > 0) ? ((x + (1 << (a-1) )) >> a) : (x << (-a));
+ }
+ 
+ static inline unsigned int rshift_rnd_us(unsigned int x, unsigned int a)
+ {
+   return (a > 0) ? ((x + (1 << (a-1))) >> a) : x;
+ }
+ 
+ static inline int rshift_rnd_sf(int x, int a)
+ {
+   return ((x + (1 << (a-1) )) >> a);
+ }
+ 
+ static inline unsigned int rshift_rnd_us_sf(unsigned int x, unsigned int a)
+ {
+   return ((x + (1 << (a-1))) >> a);
+ }
+ 
+ static inline int iClip1(int high, int x)
+ {
+   x = imax(x, 0);
+   x = imin(x, high);
+ 
+   return x;
+ }
+ 
+ static inline int iClip3(int low, int high, int x)
+ {
+   x = imax(x, low);
+   x = imin(x, high);
+ 
+   return x;
+ }
+ 
+ static inline double dClip3(double low, double high, double x)
+ {
+   x = dmax(x, low);
+   x = dmin(x, high);
+ 
+   return x;
+ }
+ 
+ static inline int weighted_cost(int factor, int bits)
+ {
+   return (((factor)*(bits))>>LAMBDA_ACCURACY_BITS);
+ }
+ 
+ static inline int RSD(int x)
+ {
+  return ((x&2)?(x|1):(x&(~1)));
+ }
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/image.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/image.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/image.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,2875 @@
+
+/*!
+ *************************************************************************************
+ * \file image.c
+ *
+ * \brief
+ *    Code one image/slice
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *     - Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+ *     - Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+ *     - Jani Lainema                    <jani.lainema at nokia.com>
+ *     - Sebastian Purreiter             <sebastian.purreiter at mch.siemens.de>
+ *     - Byeong-Moon Jeon                <jeonbm at lge.com>
+ *     - Yoon-Seong Soh                  <yunsung at lge.com>
+ *     - Thomas Stockhammer              <stockhammer at ei.tum.de>
+ *     - Detlev Marpe                    <marpe at hhi.de>
+ *     - Guido Heising                   <heising at hhi.de>
+ *     - Thomas Wedi                     <wedi at tnt.uni-hannover.de>
+ *     - Ragip Kurceren                  <ragip.kurceren at nokia.com>
+ *     - Antti Hallapuro                 <antti.hallapuro at nokia.com>
+ *     - Alexis Michael Tourapis         <alexismt at ieee.org>
+ *     - Athanasios Leontaris            <aleon at dolby.com>
+ *************************************************************************************
+ */
+#include "contributors.h"
+
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <sys/timeb.h>
+#include <string.h>
+#include <memory.h>
+#include <assert.h>
+
+#include "global.h"
+
+#include "refbuf.h"
+#include "mbuffer.h"
+#include "img_luma.h"
+#include "img_chroma.h"
+#include "intrarefresh.h"
+#include "fmo.h"
+#include "sei.h"
+#include "memalloc.h"
+#include "nalu.h"
+#include "ratectl.h"
+#include "rc_quadratic.h"
+#include "mb_access.h"
+#include "output.h"
+#include "cabac.h"
+
+extern pic_parameter_set_rbsp_t *PicParSet[MAXPPS];
+
+void code_a_picture(Picture *pic);
+void frame_picture (Picture *frame, int method);
+void field_picture(Picture *top, Picture *bottom);
+
+static int  writeout_picture(Picture *pic);
+
+static int  picture_structure_decision(Picture *frame, Picture *top, Picture *bot);
+static void distortion_fld (float *dis_fld_y, float *dis_fld_u, float *dis_fld_v);
+static void find_snr(void);
+static void find_distortion(void);
+
+static void field_mode_buffer(int bit_field, float snr_field_y, float snr_field_u, float snr_field_v);
+static void frame_mode_buffer (int bit_frame, float snr_frame_y, float snr_frame_u, float snr_frame_v);
+
+static void init_frame(void);
+static void init_field(void);
+
+static void put_buffer_frame(void);
+static void put_buffer_top(void);
+static void put_buffer_bot(void);
+
+static void copy_motion_vectors_MB(void);
+
+static void PaddAutoCropBorders (int org_size_x, int org_size_y, int img_size_x, int img_size_y,
+                                 int org_size_x_cr, int org_size_y_cr, int img_size_x_cr, int img_size_y_cr);
+
+static void ReadOneFrame (int FrameNoInFile, int HeaderSize, int xs, int ys, int xs_cr, int ys_cr);
+
+static void writeUnit(Bitstream* currStream ,int partition);
+static void rdPictureCoding(void);
+
+#ifdef _ADAPT_LAST_GROUP_
+int *last_P_no;
+int *last_P_no_frm;
+int *last_P_no_fld;
+#endif
+
+static void ReportFirstframe(time_t tmp_time, time_t me_time);
+static void ReportIntra(time_t tmp_time, time_t me_time);
+static void ReportSP(time_t tmp_time, time_t me_time);
+static void ReportP(time_t tmp_time, time_t me_time);
+static void ReportB(time_t tmp_time, time_t me_time);
+static void ReportNALNonVLCBits(time_t tmp_time, time_t me_time);
+
+static int CalculateFrameNumber(void);  // Calculates the next frame number
+
+StorablePicture *enc_picture;
+StorablePicture *enc_frame_picture;
+StorablePicture *enc_frame_picture2;
+StorablePicture *enc_frame_picture3;
+StorablePicture *enc_top_picture;
+StorablePicture *enc_bottom_picture;
+//Rate control
+int    QP;
+
+void MbAffPostProc(void)
+{
+  imgpel temp[32][16];
+
+  imgpel ** imgY  = enc_picture->imgY;
+  imgpel ***imgUV = enc_picture->imgUV;
+  int i, y, x0, y0, uv;
+
+  if (img->yuv_format != YUV400)
+  {
+    for (i=0; i<(int)img->PicSizeInMbs; i+=2)
+    {
+      if (enc_picture->mb_field[i])
+      {
+        get_mb_pos(i, &x0, &y0, IS_LUMA);
+        for (y=0; y<(2*MB_BLOCK_SIZE);y++)
+          memcpy(&temp[y],&imgY[y0+y][x0], MB_BLOCK_SIZE * sizeof(imgpel));
+
+        for (y=0; y<MB_BLOCK_SIZE;y++)
+        {
+          memcpy(&imgY[y0+(2*y)][x0],temp[y], MB_BLOCK_SIZE * sizeof(imgpel));
+          memcpy(&imgY[y0+(2*y + 1)][x0],temp[y+ MB_BLOCK_SIZE], MB_BLOCK_SIZE * sizeof(imgpel));
+        }
+
+        x0 = x0 / (16/img->mb_cr_size_x);
+        y0 = y0 / (16/img->mb_cr_size_y);
+
+        for (uv=0; uv<2; uv++)
+        {
+          for (y=0; y<(2*img->mb_cr_size_y);y++)
+            memcpy(&temp[y],&imgUV[uv][y0+y][x0], img->mb_cr_size_x * sizeof(imgpel));
+
+          for (y=0; y<img->mb_cr_size_y;y++)
+          {
+            memcpy(&imgUV[uv][y0+(2*y)][x0],temp[y], img->mb_cr_size_x * sizeof(imgpel));
+            memcpy(&imgUV[uv][y0+(2*y + 1)][x0],temp[y+ img->mb_cr_size_y], img->mb_cr_size_x * sizeof(imgpel));
+          }
+        }
+      }
+    }
+  }
+  else
+  {
+    for (i=0; i<(int)img->PicSizeInMbs; i+=2)
+    {
+      if (enc_picture->mb_field[i])
+      {
+        get_mb_pos(i, &x0, &y0, IS_LUMA);
+        for (y=0; y<(2*MB_BLOCK_SIZE);y++)
+          memcpy(&temp[y],&imgY[y0+y][x0], MB_BLOCK_SIZE * sizeof(imgpel));
+
+        for (y=0; y<MB_BLOCK_SIZE;y++)
+        {
+          memcpy(&imgY[y0+(2*y)][x0],temp[y], MB_BLOCK_SIZE * sizeof(imgpel));
+          memcpy(&imgY[y0+(2*y + 1)][x0],temp[y+ MB_BLOCK_SIZE], MB_BLOCK_SIZE * sizeof(imgpel));
+        }
+      }
+    }
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Encodes a picture
+ *
+ *    This is the main picture coding loop.. It is called by all this
+ *    frame and field coding stuff after the img-> elements have been
+ *    set up.  Not sure whether it is useful for MB-adaptive frame/field
+ *    coding
+ ************************************************************************
+ */
+void code_a_picture(Picture *pic)
+{
+  unsigned int NumberOfCodedMBs = 0;
+  int SliceGroup = 0;
+  int j;
+
+  int intra_refresh = input->intra_period == 0 ? (IMG_NUMBER == 0) : ((IMG_NUMBER%input->intra_period) == 0);
+
+  img->currentPicture = pic;
+
+  img->currentPicture->idr_flag = ((!IMG_NUMBER) && (!(img->structure==BOTTOM_FIELD)))
+    || (input->idr_enable && intra_refresh && (img->type == I_SLICE || img->type==SI_SLICE)&& (!(img->structure==BOTTOM_FIELD)));
+
+  pic->no_slices = 0;
+  pic->distortion_u = pic->distortion_v = pic->distortion_y = 0.0;
+
+  RandomIntraNewPicture ();     //! Allocates forced INTRA MBs (even for fields!)
+
+  // The slice_group_change_cycle can be changed here.
+  // FmoInit() is called before coding each picture, frame or field
+  img->slice_group_change_cycle=1;
+  FmoInit(img, active_pps, active_sps);
+  FmoStartPicture ();           //! picture level initialization of FMO
+
+  CalculateQuantParam();
+  CalculateOffsetParam();
+
+  if(input->Transform8x8Mode)
+  {
+    CalculateQuant8Param();
+    CalculateOffset8Param();
+  }
+
+  reset_pic_bin_count();
+  img->bytes_in_picture = 0;
+
+  while (NumberOfCodedMBs < img->PicSizeInMbs)       // loop over slices
+  {
+    // Encode one SLice Group
+    while (!FmoSliceGroupCompletelyCoded (SliceGroup))
+    {
+      // Encode the current slice
+      NumberOfCodedMBs += encode_one_slice (SliceGroup, pic, NumberOfCodedMBs);
+      FmoSetLastMacroblockInSlice (img->current_mb_nr);
+      // Proceed to next slice
+      img->current_slice_nr++;
+      stats->bit_slice = 0;
+    }
+    // Proceed to next SliceGroup
+    SliceGroup++;
+  }
+  FmoEndPicture ();
+
+  // Modified for Fast Mode Decision. Inchoon Choi, SungKyunKwan Univ.
+  if (input->rdopt == 3 && (img->type != B_SLICE))
+    for (j = 0; j < input->NoOfDecoders; j++)
+      DeblockFrame (img, decs->decY_best[j], NULL);
+
+  DeblockFrame (img, enc_picture->imgY, enc_picture->imgUV); //comment out to disable loop filter
+
+  if (img->MbaffFrameFlag)
+    MbAffPostProc();
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Encodes one frame
+ ************************************************************************
+ */
+int encode_one_frame (void)
+{
+  static int prev_frame_no = 0; // POC200301
+  static int consecutive_non_reference_pictures = 0; // POC200301
+  int        FrameNumberInFile;
+  int        i, j;
+
+#ifdef _LEAKYBUCKET_
+  //extern long Bit_Buffer[20000];
+  extern unsigned long total_frame_buffer;
+#endif
+
+  time_t ltime1;
+  time_t ltime2;
+
+  struct TIMEB tstruct1;
+  struct TIMEB tstruct2;
+
+  time_t tmp_time;
+  int bits_frm = 0, bits_fld = 0;
+  float dis_frm = 0.0, dis_frm_y = 0.0, dis_frm_u = 0.0, dis_frm_v = 0.0;
+  float dis_fld = 0.0, dis_fld_y = 0.0, dis_fld_u = 0.0, dis_fld_v = 0.0;
+
+  //Rate control
+  int pic_type, bits = 0;
+
+  me_time=0;
+  img->rd_pass = 0;
+  enc_frame_picture  = NULL;
+  enc_frame_picture2 = NULL;
+  enc_frame_picture3 = NULL;
+
+  ftime (&tstruct1);            // start time ms
+  time (&ltime1);               // start time s
+
+  //Rate control
+  img->write_macroblock = 0;
+/*
+  //Shankar Regunathan (Oct 2002)
+  //Prepare Panscanrect SEI payload
+  UpdatePanScanRectInfo ();
+  //Prepare Arbitrarydata SEI Payload
+  UpdateUser_data_unregistered ();
+  //Prepare Registered data SEI Payload
+  UpdateUser_data_registered_itu_t_t35 ();
+  //Prepare RandomAccess SEI Payload
+  UpdateRandomAccess ();
+*/
+
+  if (input->ResendPPS && img->number !=0)
+  {
+    stats->bit_ctr_parametersets_n=write_PPS(0, 0);
+    stats->bit_ctr_parametersets += stats->bit_ctr_parametersets_n;
+  }
+
+  put_buffer_frame ();      // sets the pointers to the frame structures
+                            // (and not to one of the field structures)
+  init_frame ();
+  FrameNumberInFile = CalculateFrameNumber();
+
+  ReadOneFrame (FrameNumberInFile, input->infile_header,
+                input->img_width, input->img_height, input->img_width_cr, input->img_height_cr);
+
+  PaddAutoCropBorders (input->img_width, input->img_height, img->width, img->height,
+                       input->img_width_cr, input->img_height_cr, img->width_cr, img->height_cr);
+
+  // set parameters for direct mode and deblocking filter
+  img->direct_spatial_mv_pred_flag     = input->direct_spatial_mv_pred_flag;
+  img->LFDisableIdc    = input->LFDisableIdc;
+  img->LFAlphaC0Offset = input->LFAlphaC0Offset;
+  img->LFBetaOffset    = input->LFBetaOffset;
+  img->AdaptiveRounding = input->AdaptiveRounding;
+  // Following code should consider optimal coding mode. Currently also does not support
+  // multiple slices per frame.
+  frame_ctr[img->type]++;
+  snr->frame_ctr++;
+
+  if(img->type == SP_SLICE)
+  {
+    if(input->sp2_frame_indicator)
+    { // switching SP frame encoding
+      sp2_frame_indicator=1;
+      read_SP_coefficients();
+    }
+  }
+  else
+  {
+    sp2_frame_indicator=0;
+  }
+
+  if (input->PicInterlace == FIELD_CODING)
+  {
+    //Rate control
+    if ( input->RCEnable )
+      generic_RC->FieldControl=1;
+
+    img->field_picture = 1;  // we encode fields
+    field_picture (top_pic, bottom_pic);
+    img->fld_flag = 1;
+  }
+  else
+  {
+    int tmpFrameQP;
+    //Rate control
+    if ( input->RCEnable )
+      generic_RC->FieldControl=0;
+
+    // For frame coding, turn MB level field/frame coding flag on
+    if (input->MbInterlace)
+      mb_adaptive = 1;
+
+    img->field_picture = 0; // we encode a frame
+
+    //Rate control
+    if(input->RCEnable)
+    {
+      /*update the number of MBs in the basic unit for MB adaptive
+      f/f coding*/
+      if( (input->MbInterlace) && (input->basicunit < img->FrameSizeInMbs) && (img->type == P_SLICE || input->RCUpdateMode == RC_MODE_1) && (IMG_NUMBER) )
+        img->BasicUnit = input->basicunit << 1;
+      else
+        img->BasicUnit = input->basicunit;
+
+      if ( input->RDPictureDecision )
+      {
+        // store rate allocation quadratic...
+        copy_rc_jvt( quadratic_RC_init, quadratic_RC );
+        // ...and generic model
+        copy_rc_generic( generic_RC_init, generic_RC );
+      }
+
+      rc_init_pict(quadratic_RC, 1,0,1, 1.0F);
+      img->qp  = updateQP(quadratic_RC, 0);
+
+      pic_type = img->type;
+      QP =0;
+
+      if( active_sps->frame_mbs_only_flag)
+        generic_RC->TopFieldFlag=0;
+    }
+
+    if (input->GenerateMultiplePPS)
+      active_pps = PicParSet[0];
+
+    frame_picture (frame_pic_1, 0);
+
+    if ((input->RDPictureIntra || img->type!=I_SLICE) && input->RDPictureDecision)
+    {
+      rdPictureCoding();
+    }
+    tmpFrameQP = img->SumFrameQP; // call it here since rdPictureCoding buffers it and may modify it
+
+    if(img->type==SP_SLICE && si_frame_indicator==0 && input->si_frame_indicator)
+    {
+      // once the picture has been encoded as a primary SP frame encode as an SI frame
+      si_frame_indicator=1;
+      frame_picture (frame_pic_si, 0);
+    }
+    if(img->type==SP_SLICE && input->sp_output_indicator)
+    {
+      // output the transformed and quantized coefficients (useful for switching SP frames)
+      output_SP_coefficients();
+    }
+    // For field coding, turn MB level field/frame coding flag off
+    if (input->MbInterlace)
+      mb_adaptive = 0;
+
+    if (input->PicInterlace == ADAPTIVE_CODING)
+    {
+      //Rate control
+      if ( input->RCEnable )
+        generic_RC->FieldControl=1;
+      img->write_macroblock = 0;
+      img->bot_MB = 0;
+
+      img->field_picture = 1;  // we encode fields
+      field_picture (top_pic, bottom_pic);
+
+      //! Note: the distortion for a field coded picture is stored in the top field
+      //! the distortion values in the bottom field are dummies
+      dis_fld = top_pic->distortion_y + top_pic->distortion_u + top_pic->distortion_v;
+      dis_frm = frame_pic_1->distortion_y + frame_pic_1->distortion_u + frame_pic_1->distortion_v;
+
+      if(img->rd_pass==0)
+        img->fld_flag = picture_structure_decision (frame_pic_1, top_pic, bottom_pic);
+      else if(img->rd_pass==1)
+        img->fld_flag = picture_structure_decision (frame_pic_2, top_pic, bottom_pic);
+      else
+        img->fld_flag = picture_structure_decision (frame_pic_3, top_pic, bottom_pic);
+      if ( img->fld_flag )
+        tmpFrameQP = img->SumFrameQP;
+
+      update_field_frame_contexts (img->fld_flag);
+
+      //Rate control
+      if ( input->RCEnable )
+      {
+        generic_RC->FieldFrame = !(img->fld_flag) ? 1 : 0;
+      }
+    }
+    else
+      img->fld_flag = 0;
+    img->SumFrameQP = tmpFrameQP;
+  }
+
+  if (img->fld_flag)
+    stats->bit_ctr_emulationprevention += stats->em_prev_bits_fld;
+  else
+    stats->bit_ctr_emulationprevention += stats->em_prev_bits_frm;
+
+  if (img->type != B_SLICE)
+  {
+    img->pstruct_next_P = img->fld_flag;
+  }
+
+  // Here, img->structure may be either FRAME or BOTTOM FIELD depending on whether AFF coding is used
+  // The picture structure decision changes really only the fld_flag
+
+  if (img->fld_flag)            // field mode (use field when fld_flag=1 only)
+  {
+    field_mode_buffer (bits_fld, dis_fld_y, dis_fld_u, dis_fld_v);
+    writeout_picture (top_pic);
+    writeout_picture (bottom_pic);
+  }
+  else                          //frame mode
+  {
+    frame_mode_buffer (bits_frm, dis_frm_y, dis_frm_u, dis_frm_v);
+
+    if (input->RDPictureDecision && img->rd_pass == 2)
+      writeout_picture (frame_pic_3);
+    else if (input->RDPictureDecision && img->rd_pass == 1)
+      writeout_picture (frame_pic_2);
+    else
+    if(img->type==SP_SLICE && si_frame_indicator==1)
+    {
+      writeout_picture (frame_pic_si);
+      si_frame_indicator=0;
+    }
+    else
+      writeout_picture (frame_pic_1);
+  }
+
+  if (frame_pic_si)
+    free_slice_list(frame_pic_si);
+  if (frame_pic_3)
+    free_slice_list(frame_pic_3);
+  if (frame_pic_2)
+    free_slice_list(frame_pic_2);
+  if (frame_pic_1)
+    free_slice_list(frame_pic_1);
+  if (top_pic)
+    free_slice_list(top_pic);
+  if (bottom_pic)
+    free_slice_list(bottom_pic);
+
+  /*
+  // Tian Dong (Sept 2002)
+  // in frame mode, the newly reconstructed frame has been inserted to the mem buffer
+  // and it is time to prepare the spare picture SEI payload.
+  if (input->InterlaceCodingOption == FRAME_CODING
+      && input->SparePictureOption && img->type != B_SLICE)
+    CalculateSparePicture ();
+*/
+
+  //Rate control
+  if(input->RCEnable)
+  {
+    bits = (int) (stats->bit_ctr - stats->bit_ctr_n);
+    rc_update_pict_frame(quadratic_RC, bits);
+  }
+
+  if (input->PicInterlace == FRAME_CODING)
+  {
+    if (input->rdopt == 3 && img->type != B_SLICE)
+      UpdateDecoders ();      // simulate packet losses and move decoded image to reference buffers
+
+    if (input->RestrictRef)
+      UpdatePixelMap ();
+  }
+
+  if (input->Verbose != 0)
+    find_snr ();
+  else
+  {
+    snr->snr_y = 0.0;
+    snr->snr_u = 0.0;
+    snr->snr_v = 0.0;
+    snr->sse_y = 0.0;
+    snr->sse_u = 0.0;
+    snr->sse_v = 0.0;
+  }
+
+  // redundant pictures: save reconstruction to calculate SNR and replace reference picture
+  if(input->redundant_pic_flag && key_frame)
+  {
+    for(i=0; i<img->width; i++)
+    {
+      for(j=0; j<img->height; j++)
+      {
+        imgY_tmp[j][i] = enc_frame_picture->imgY[j][i];
+      }
+    }
+    for(i=0; i<img->width_cr; i++)
+    {
+      for(j=0; j<img->height_cr; j++)
+      {
+        imgUV_tmp[0][j][i] = enc_frame_picture->imgUV[0][j][i];
+        imgUV_tmp[1][j][i] = enc_frame_picture->imgUV[1][j][i];
+      }
+    }
+  }
+
+  if(input->redundant_pic_flag && redundant_coding)
+  {
+    for(i=0; i<img->width; i++)
+    {
+      for(j=0; j<img->height; j++)
+      {
+        enc_frame_picture->imgY[j][i] = imgY_tmp[j][i];
+      }
+    }
+    for(i=0; i<img->width_cr; i++)
+    {
+      for(j=0; j<img->height_cr; j++)
+      {
+        enc_frame_picture->imgUV[0][j][i] = imgUV_tmp[0][j][i];
+        enc_frame_picture->imgUV[1][j][i] = imgUV_tmp[1][j][i];
+      }
+    }
+  }
+
+  time (&ltime2);               // end time sec
+  ftime (&tstruct2);            // end time ms
+
+  tmp_time = (ltime2 * 1000 + tstruct2.millitm) - (ltime1 * 1000 + tstruct1.millitm);
+  tot_time = tot_time + tmp_time;
+
+  if (input->PicInterlace == ADAPTIVE_CODING)
+  {
+    if (img->fld_flag)
+    {
+      // store bottom field
+      store_picture_in_dpb(enc_bottom_picture);
+      free_storable_picture(enc_frame_picture);
+    }
+    else
+    {
+      // replace top with frame
+      replace_top_pic_with_frame(enc_frame_picture);
+      free_storable_picture(enc_bottom_picture);
+    }
+  }
+  else
+  {
+    if (img->fld_flag)
+    {
+      store_picture_in_dpb(enc_bottom_picture);
+    }
+    else
+    {
+      if (img->rd_pass==2)
+      {
+        store_picture_in_dpb(enc_frame_picture3);
+        free_storable_picture(enc_frame_picture);
+        free_storable_picture(enc_frame_picture2);
+      }
+      else if (img->rd_pass==1)
+      {
+        store_picture_in_dpb(enc_frame_picture2);
+        free_storable_picture(enc_frame_picture);
+        free_storable_picture(enc_frame_picture3);
+      }
+      else
+      {
+        if(input->redundant_pic_flag==0)
+        {
+          store_picture_in_dpb(enc_frame_picture);
+          free_storable_picture(enc_frame_picture2);
+          free_storable_picture(enc_frame_picture3);
+        }
+        else
+        {
+          // key picture will be stored in dpb after redundant picture is coded
+          if(key_frame==0)
+          {
+            store_picture_in_dpb(enc_frame_picture);
+            free_storable_picture(enc_frame_picture2);
+            free_storable_picture(enc_frame_picture3);
+          }
+        }
+      }
+    }
+  }
+
+  img->AverageFrameQP = (img->SumFrameQP + (img->FrameSizeInMbs >> 1))/img->FrameSizeInMbs;
+  if ( input->RCEnable && img->type != B_SLICE && input->basicunit < img->FrameSizeInMbs )
+    quadratic_RC->CurrLastQP = img->AverageFrameQP;
+
+#ifdef _LEAKYBUCKET_
+  // Store bits used for this frame and increment counter of no. of coded frames
+  Bit_Buffer[total_frame_buffer] = (int) (stats->bit_ctr - stats->bit_ctr_n);
+  total_frame_buffer++;
+#endif
+
+  // POC200301: Verify that POC coding type 2 is not used if more than one consecutive
+  // non-reference frame is requested or if decoding order is different from output order
+  if (img->pic_order_cnt_type == 2)
+  {
+    if (!img->nal_reference_idc) consecutive_non_reference_pictures++;
+    else consecutive_non_reference_pictures = 0;
+
+    if (frame_no < prev_frame_no || consecutive_non_reference_pictures>1)
+      error("POC type 2 cannot be applied for the coding pattern where the encoding /decoding order of pictures are different from the output order.\n", -1);
+    prev_frame_no = frame_no;
+  }
+
+  if (stats->bit_ctr_parametersets_n!=0)
+    ReportNALNonVLCBits(tmp_time, me_time);
+
+  if (IMG_NUMBER == 0)
+    ReportFirstframe(tmp_time,me_time);
+  else
+  {
+    //Rate control
+    if(input->RCEnable)
+    {
+      if((!input->PicInterlace)&&(!input->MbInterlace))
+        bits=(int) (stats->bit_ctr - stats->bit_ctr_n);
+      else
+      {
+        bits = (int)(stats->bit_ctr - (quadratic_RC->Pprev_bits)); // used for rate control update
+        quadratic_RC->Pprev_bits = stats->bit_ctr;
+      }
+    }
+
+    switch (img->type)
+    {
+    case I_SLICE:
+      stats->bit_ctr_I += stats->bit_ctr - stats->bit_ctr_n;
+      ReportIntra(tmp_time,me_time);
+      break;
+    case SP_SLICE:
+      stats->bit_ctr_P += stats->bit_ctr - stats->bit_ctr_n;
+      ReportSP(tmp_time,me_time);
+      break;
+    case B_SLICE:
+      stats->bit_ctr_B += stats->bit_ctr - stats->bit_ctr_n;
+      ReportB(tmp_time,me_time);
+      break;
+    default:      // P
+      stats->bit_ctr_P += stats->bit_ctr - stats->bit_ctr_n;
+      ReportP(tmp_time,me_time);
+    }
+  }
+
+  if (input->Verbose == 0)
+  {
+    //for (i = 0; i <= (img->number & 0x0F); i++)
+    //printf(".");
+    //printf("                              \r");
+    printf("Completed Encoding Frame %05d.\r",frame_no);
+  }
+  // Flush output statistics
+  fflush(stdout);
+
+  stats->bit_ctr_n = stats->bit_ctr;
+
+  //Rate control
+  if(input->RCEnable)
+  {
+    rc_update_pict(quadratic_RC, bits);
+
+    // update the parameters of quadratic R-D model
+    if( (img->type==P_SLICE || input->RCUpdateMode == RC_MODE_1) && (IMG_NUMBER) )
+    {
+      updateRCModel(quadratic_RC);
+
+      if ( input->RCUpdateMode == RC_MODE_3 )
+        quadratic_RC->PreviousWholeFrameMAD = ComputeFrameMAD();
+    }
+  }
+
+  stats->bit_ctr_parametersets_n=0;
+
+  if (IMG_NUMBER == 0)
+    return 0;
+  else
+    return 1;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    This function write out a picture
+ * \return
+ *    0 if OK,                                                         \n
+ *    1 in case of error
+ *
+ ************************************************************************
+ */
+static int writeout_picture(Picture *pic)
+{
+  Bitstream *currStream;
+  int partition, slice;
+  Slice *currSlice;
+
+  img->currentPicture=pic;
+
+  for (slice=0; slice<pic->no_slices; slice++)
+  {
+    currSlice = pic->slices[slice];
+    img->current_mb_nr = currSlice->start_mb_nr;
+    for (partition=0; partition<currSlice->max_part_nr; partition++)
+    {
+      currStream = (currSlice->partArr[partition]).bitstream;
+      assert (currStream->bits_to_go == 8);    //! should always be the case, the
+                                               //! byte alignment is done in terminate_slice
+
+      // write only if the partition has content
+      if (currSlice->partArr[partition].bitstream->write_flag )
+        writeUnit (currStream,partition);
+    }           // partition loop
+  }           // slice loop
+  return 0;
+}
+
+
+void copy_params(void)
+{
+  enc_picture->frame_mbs_only_flag = active_sps->frame_mbs_only_flag;
+  enc_picture->frame_cropping_flag = active_sps->frame_cropping_flag;
+  enc_picture->chroma_format_idc   = active_sps->chroma_format_idc;
+
+  if (active_sps->frame_cropping_flag)
+  {
+    enc_picture->frame_cropping_rect_left_offset=active_sps->frame_cropping_rect_left_offset;
+    enc_picture->frame_cropping_rect_right_offset=active_sps->frame_cropping_rect_right_offset;
+    enc_picture->frame_cropping_rect_top_offset=active_sps->frame_cropping_rect_top_offset;
+    enc_picture->frame_cropping_rect_bottom_offset=active_sps->frame_cropping_rect_bottom_offset;
+  }
+  else
+  {
+    enc_picture->frame_cropping_rect_left_offset=0;
+    enc_picture->frame_cropping_rect_right_offset=0;
+    enc_picture->frame_cropping_rect_top_offset=0;
+    enc_picture->frame_cropping_rect_bottom_offset=0;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Encodes a frame picture
+ ************************************************************************
+ */
+void frame_picture (Picture *frame, int rd_pass)
+{
+  img->SumFrameQP = 0;
+  img->structure = FRAME;
+  img->PicSizeInMbs = img->FrameSizeInMbs;
+
+  if (rd_pass == 2)
+  {
+    enc_frame_picture3  = alloc_storable_picture ((PictureStructure) img->structure, img->width, img->height, img->width_cr, img->height_cr);
+    img->ThisPOC=enc_frame_picture3->poc=img->framepoc;
+    enc_frame_picture3->top_poc    = img->toppoc;
+    enc_frame_picture3->bottom_poc = img->bottompoc;
+
+    enc_frame_picture3->frame_poc = img->framepoc;
+
+    enc_frame_picture3->pic_num = img->frame_num;
+    enc_frame_picture3->frame_num = img->frame_num;
+    enc_frame_picture3->coded_frame = 1;
+
+    enc_frame_picture3->MbaffFrameFlag = img->MbaffFrameFlag = (input->MbInterlace != FRAME_CODING);
+    get_mb_block_pos = img->MbaffFrameFlag ? get_mb_block_pos_mbaff : get_mb_block_pos_normal;
+    getNeighbour = img->MbaffFrameFlag ? getAffNeighbour : getNonAffNeighbour;
+
+    enc_picture=enc_frame_picture3;
+    copy_params();
+  }
+  else if (rd_pass == 1)
+  {
+    enc_frame_picture2  = alloc_storable_picture ((PictureStructure) img->structure, img->width, img->height, img->width_cr, img->height_cr);
+    img->ThisPOC=enc_frame_picture2->poc=img->framepoc;
+    enc_frame_picture2->top_poc    = img->toppoc;
+    enc_frame_picture2->bottom_poc = img->bottompoc;
+
+    enc_frame_picture2->frame_poc = img->framepoc;
+
+    enc_frame_picture2->pic_num = img->frame_num;
+    enc_frame_picture2->frame_num = img->frame_num;
+    enc_frame_picture2->coded_frame = 1;
+
+    enc_frame_picture2->MbaffFrameFlag = img->MbaffFrameFlag = (input->MbInterlace != FRAME_CODING);
+    get_mb_block_pos = img->MbaffFrameFlag ? get_mb_block_pos_mbaff : get_mb_block_pos_normal;
+    getNeighbour = img->MbaffFrameFlag ? getAffNeighbour : getNonAffNeighbour;
+
+    enc_picture=enc_frame_picture2;
+    copy_params();
+  }
+  else
+  {
+    enc_frame_picture  = alloc_storable_picture ((PictureStructure) img->structure, img->width, img->height, img->width_cr, img->height_cr);
+    img->ThisPOC=enc_frame_picture->poc=img->framepoc;
+    enc_frame_picture->top_poc    = img->toppoc;
+    enc_frame_picture->bottom_poc = img->bottompoc;
+
+    enc_frame_picture->frame_poc = img->framepoc;
+
+    enc_frame_picture->pic_num = img->frame_num;
+    enc_frame_picture->frame_num = img->frame_num;
+    enc_frame_picture->coded_frame = 1;
+
+    enc_frame_picture->MbaffFrameFlag = img->MbaffFrameFlag = (input->MbInterlace != FRAME_CODING);
+    get_mb_block_pos = img->MbaffFrameFlag ? get_mb_block_pos_mbaff : get_mb_block_pos_normal;
+    getNeighbour = img->MbaffFrameFlag ? getAffNeighbour : getNonAffNeighbour;
+
+    enc_picture=enc_frame_picture;
+    copy_params();
+  }
+
+
+  stats->em_prev_bits_frm = 0;
+  stats->em_prev_bits = &stats->em_prev_bits_frm;
+
+  img->fld_flag = 0;
+  code_a_picture(frame);
+
+  frame->bits_per_picture = 8 * ((((img->currentSlice)->partArr[0]).bitstream)->byte_pos);
+
+  if (img->structure==FRAME)
+  {
+    find_distortion ();
+    frame->distortion_y = snr->snr_y;
+    frame->distortion_u = snr->snr_u;
+    frame->distortion_v = snr->snr_v;
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Encodes a field picture, consisting of top and bottom field
+ ************************************************************************
+ */
+void field_picture (Picture *top, Picture *bottom)
+{
+  //Rate control
+  int old_pic_type;              // picture type of top field used for rate control
+  int TopFieldBits;
+  img->SumFrameQP = 0;
+
+  //Rate control
+  old_pic_type = img->type;
+
+  stats->em_prev_bits_fld = 0;
+  stats->em_prev_bits = &stats->em_prev_bits_fld;
+  img->number *= 2;
+  img->buf_cycle *= 2;
+  img->height = (input->img_height+img->auto_crop_bottom) / 2;
+  img->height_cr = img->height_cr_frame / 2;
+  img->fld_flag = 1;
+  img->PicSizeInMbs = img->FrameSizeInMbs/2;
+  // Top field
+
+  enc_top_picture  = alloc_storable_picture ((PictureStructure) img->structure, img->width, img->height, img->width_cr, img->height_cr);
+  enc_top_picture->poc=img->toppoc;
+  enc_top_picture->frame_poc = img->toppoc;
+  enc_top_picture->pic_num = img->frame_num;
+  enc_top_picture->frame_num = img->frame_num;
+  enc_top_picture->coded_frame = 0;
+  enc_top_picture->MbaffFrameFlag = img->MbaffFrameFlag = FALSE;
+  get_mb_block_pos = get_mb_block_pos_normal;
+  getNeighbour = getNonAffNeighbour;
+  img->ThisPOC = img->toppoc;
+
+  img->structure = TOP_FIELD;
+  enc_picture = enc_top_picture;
+  copy_params();
+
+  put_buffer_top ();
+  init_field ();
+  if (img->type == B_SLICE)       //all I- and P-frames
+    nextP_tr_fld--;
+
+
+  img->fld_flag = 1;
+
+  //Rate control
+  if(input->RCEnable)
+  {
+    img->BasicUnit=input->basicunit;
+
+    if(input->PicInterlace==FIELD_CODING)
+      rc_init_pict(quadratic_RC, 0,1,1, 1.0F);
+    else
+      rc_init_pict(quadratic_RC, 0,1,0, 1.0F);
+
+    img->qp  = updateQP(quadratic_RC, 1);
+
+    generic_RC->TopFieldFlag=1;
+  }
+
+  code_a_picture(top_pic);
+  enc_picture->structure = (PictureStructure) 1;
+
+  store_picture_in_dpb(enc_top_picture);
+
+  top->bits_per_picture = 8 * ((((img->currentSlice)->partArr[0]).bitstream)->byte_pos);
+
+  //Rate control
+  TopFieldBits=top->bits_per_picture;
+
+  //  Bottom field
+  enc_bottom_picture  = alloc_storable_picture ((PictureStructure) img->structure, img->width, img->height, img->width_cr, img->height_cr);
+  enc_bottom_picture->poc=img->bottompoc;
+  enc_bottom_picture->frame_poc = img->bottompoc;
+  enc_bottom_picture->pic_num = img->frame_num;
+  enc_bottom_picture->frame_num = img->frame_num;
+  enc_bottom_picture->coded_frame = 0;
+  enc_bottom_picture->MbaffFrameFlag = img->MbaffFrameFlag = FALSE;
+  get_mb_block_pos = get_mb_block_pos_normal;
+  getNeighbour = getNonAffNeighbour;
+
+  img->ThisPOC = img->bottompoc;
+  img->structure = BOTTOM_FIELD;
+  enc_picture = enc_bottom_picture;
+  copy_params();
+  put_buffer_bot ();
+  img->number++;
+
+  init_field ();
+
+  if (img->type == B_SLICE)       //all I- and P-frames
+    nextP_tr_fld++;             //check once coding B field
+
+ if (img->type == I_SLICE && input->IntraBottom!=1)
+   img->type = (input->BRefPictures == 2) ? B_SLICE : P_SLICE;
+
+  img->fld_flag = 1;
+
+  //Rate control
+  if(input->RCEnable)
+  {
+    quadratic_RC->bits_topfield = TopFieldBits;
+    rc_init_pict(quadratic_RC, 0,0,0, 1.0F);
+    img->qp  = updateQP(quadratic_RC, 0);
+    generic_RC->TopFieldFlag = 0;
+  }
+
+  enc_picture->structure = (PictureStructure) 2;
+  code_a_picture(bottom_pic);
+
+  bottom->bits_per_picture = 8 * ((((img->currentSlice)->partArr[0]).bitstream)->byte_pos);
+
+  // the distortion for a field coded frame (consisting of top and bottom field)
+  // lives in the top->distortion variables, the bottom-> are dummies
+  distortion_fld (&top->distortion_y, &top->distortion_u, &top->distortion_v);
+
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Distortion Field
+ ************************************************************************
+ */
+static void distortion_fld (float *dis_fld_y, float *dis_fld_u, float *dis_fld_v)
+{
+
+  img->number /= 2;
+  img->buf_cycle /= 2;
+  img->height = (input->img_height+img->auto_crop_bottom);
+  img->height_cr = img->height_cr_frame;
+
+  combine_field ();
+
+  imgY_org = imgY_org_frm;
+  imgUV_org = imgUV_org_frm;
+
+  find_distortion ();   // find snr from original frame picture
+
+  *dis_fld_y = snr->snr_y;
+  *dis_fld_u = snr->snr_u;
+  *dis_fld_v = snr->snr_v;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Picture Structure Decision
+ ************************************************************************
+ */
+static int picture_structure_decision (Picture *frame, Picture *top, Picture *bot)
+{
+  double lambda_picture;
+  int bframe = (img->type == B_SLICE);
+  float snr_frame, snr_field;
+  int bit_frame, bit_field;
+
+  lambda_picture = 0.68 * pow (2, img->bitdepth_lambda_scale + ((img->qp - SHIFT_QP) / 3.0)) * (bframe ? 1 : 1);
+
+  snr_frame = frame->distortion_y + frame->distortion_u + frame->distortion_v;
+  //! all distrortions of a field picture are accumulated in the top field
+  snr_field = top->distortion_y + top->distortion_u + top->distortion_v;
+  bit_field = top->bits_per_picture + bot->bits_per_picture;
+  bit_frame = frame->bits_per_picture;
+  return decide_fld_frame (snr_frame, snr_field, bit_field, bit_frame, lambda_picture);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Field Mode Buffer
+ ************************************************************************
+ */
+static void field_mode_buffer (int bit_field, float snr_field_y, float snr_field_u, float snr_field_v)
+{
+  put_buffer_frame ();
+
+  snr->snr_y = snr_field_y;
+  snr->snr_u = snr_field_u;
+  snr->snr_v = snr_field_v;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Frame Mode Buffer
+ ************************************************************************
+ */
+static void frame_mode_buffer (int bit_frame, float snr_frame_y, float snr_frame_u, float snr_frame_v)
+{
+  put_buffer_frame ();
+
+  if ((input->PicInterlace != FRAME_CODING)||(input->MbInterlace != FRAME_CODING))
+  {
+    img->height = img->height / 2;
+    img->height_cr = img->height_cr / 2;
+    img->number *= 2;
+
+    put_buffer_top ();
+
+    img->number++;
+    put_buffer_bot ();
+
+    img->number /= 2;         // reset the img->number to field
+    img->height = (input->img_height+img->auto_crop_bottom);
+    img->height_cr = img->height_cr_frame;
+
+    snr->snr_y = snr_frame_y;
+    snr->snr_u = snr_frame_u;
+    snr->snr_v = snr_frame_v;
+    put_buffer_frame ();
+
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    mmco initializations should go here
+ ************************************************************************
+ */
+static void init_dec_ref_pic_marking_buffer(void)
+{
+  img->dec_ref_pic_marking_buffer=NULL;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Initializes the parameters for a new frame
+ ************************************************************************
+ */
+static void init_frame (void)
+{
+  int i;
+  int prevP_no, nextP_no;
+
+  last_P_no = last_P_no_frm;
+
+  img->current_mb_nr = 0;
+  img->current_slice_nr = 0;
+  stats->bit_slice = 0;
+
+  img->mb_y = img->mb_x = 0;
+  img->block_y = img->pix_y = img->pix_c_y = 0;
+  img->block_x = img->pix_x = img->block_c_x = img->pix_c_x = 0;
+
+  // The 'slice_nr' of each macroblock is set to -1 here, to guarantee the correct encoding
+  // with FMO (if no FMO, encoding is correct without following assignment),
+  // for which MBs may not be encoded with scan order
+  for(i=0;i< ((int) (img->FrameSizeInMbs));i++)
+    img->mb_data[i].slice_nr=-1;
+
+  if (img->b_frame_to_code == 0)
+  {
+    img->tr = start_tr_in_this_IGOP + IMG_NUMBER * (input->jumpd + 1);
+
+    img->imgtr_last_P_frm = img->imgtr_next_P_frm;
+    img->imgtr_next_P_frm = img->tr;
+
+#ifdef _ADAPT_LAST_GROUP_
+    if (input->last_frame && img->number + 1 == input->no_frames)
+      img->tr = input->last_frame;
+#endif
+
+    if (IMG_NUMBER != 0 && input->successive_Bframe != 0)     // B pictures to encode
+      nextP_tr_frm = img->tr;
+
+    //Rate control
+    if(!input->RCEnable)                  // without using rate control
+    {
+      if (img->type == I_SLICE)
+      {
+#ifdef _CHANGE_QP_
+        //QP oscillation for secondary SP frames
+        if ((input->qp2start > 0 && img->tr >= input->qp2start && input->sp2_frame_indicator==0)||
+          ( (input->qp2start > 0) && ( ( (img->tr ) % (2*input->qp2start) ) >=input->qp2start ) && (input->sp2_frame_indicator==1)))
+          img->qp = input->qp02;
+        else
+#endif
+          img->qp = input->qp0;   // set quant. parameter for I-frame
+      }
+      else
+      {
+#ifdef _CHANGE_QP_
+        //QP oscillation for secondary SP frames
+        if ((input->qp2start > 0 && img->tr >= input->qp2start && input->sp2_frame_indicator==0)||
+          ( (input->qp2start > 0) && ( ( (img->tr ) % (2*input->qp2start) ) >=input->qp2start ) && (input->sp2_frame_indicator==1)))
+          img->qp = input->qpN2 + (img->nal_reference_idc ? 0 : input->DispPQPOffset);
+        else
+#endif
+          img->qp = input->qpN + (img->nal_reference_idc ? 0 : input->DispPQPOffset);
+
+        if (img->type == SP_SLICE)
+        {
+          if ( (input->qp2start > 0) && ( ( (img->tr ) % (2*input->qp2start) ) >=input->qp2start ))
+          {
+            img->qp = input->qpN2-(input->qpN-input->qpsp);
+            img->qpsp = input->qpN2-(input->qpN-input->qpsp_pred);
+          }
+          else
+          {
+            img->qp = input->qpsp;
+            img->qpsp = input->qpsp_pred;
+          }
+        }
+      }
+    }
+
+    img->mb_y_intra = img->mb_y_upd;  //  img->mb_y_intra indicates which GOB to intra code for this frame
+
+    if (input->intra_upd > 0) // if error robustness, find next GOB to update
+    {
+      img->mb_y_upd = (IMG_NUMBER / input->intra_upd) % (img->height / MB_BLOCK_SIZE);
+    }
+  }
+  else
+  {
+    img->p_interval = input->jumpd + 1;
+    prevP_no = start_tr_in_this_IGOP + (IMG_NUMBER - 1) * img->p_interval;
+    nextP_no = start_tr_in_this_IGOP + (IMG_NUMBER) * img->p_interval;
+
+#ifdef _ADAPT_LAST_GROUP_
+    last_P_no[0] = prevP_no;
+    for (i = 1; i < img->buf_cycle; i++)
+      last_P_no[i] = last_P_no[i - 1] - img->p_interval;
+
+    if (input->last_frame && img->number + 1 == input->no_frames)
+    {
+      nextP_no = input->last_frame;
+      img->p_interval = nextP_no - prevP_no;
+    }
+#endif
+
+    img->b_interval =
+      ((double) (input->jumpd + 1) / (input->successive_Bframe + 1.0) );
+
+    if (input->HierarchicalCoding == 3)
+      img->b_interval = 1.0;
+
+    if (input->HierarchicalCoding)
+      img->tr = prevP_no + (int) (img->b_interval  * (double) (1 + gop_structure[img->b_frame_to_code - 1].display_no));      // from prev_P
+    else
+      img->tr = prevP_no + (int) (img->b_interval * (double) img->b_frame_to_code);      // from prev_P
+
+
+    if (img->tr >= nextP_no)
+      img->tr = nextP_no - 1;
+    //Rate control
+    if(!input->RCEnable && input->HierarchicalCoding == 0)                  // without using rate control
+    {
+#ifdef _CHANGE_QP_
+      //QP oscillation for secondary SP frames
+      if ((input->qp2start > 0 && img->tr >= input->qp2start && input->sp2_frame_indicator==0)||
+        ( (input->qp2start > 0) && ( ( (img->tr ) % (2*input->qp2start) ) >=input->qp2start ) && (input->sp2_frame_indicator==1)))
+      {
+        img->qp = input->qpB2;
+      }
+      else
+#endif
+      {
+        img->qp = input->qpB;
+      }
+
+      if (img->nal_reference_idc)
+      {
+#ifdef _CHANGE_QP_
+        //QP oscillation for secondary SP frames
+        if ((input->qp2start > 0 && img->tr >= input->qp2start && input->sp2_frame_indicator==0)||
+          ( (input->qp2start > 0) && ( ( (img->tr ) % (2*input->qp2start) ) >=input->qp2start ) && (input->sp2_frame_indicator==1)))
+        {
+          img->qp = iClip3(-img->bitdepth_luma_qp_scale,51,input->qpB2 + input->qpBRS2Offset);
+        }
+        else
+#endif
+        {
+          img->qp = iClip3(-img->bitdepth_luma_qp_scale,51,input->qpB + input->qpBRSOffset);
+        }
+      }
+    }
+    else if (!input->RCEnable && input->HierarchicalCoding !=0)
+    {
+      // Note that _CHANGE_QP_ does not anymore work for gop_structure. Needs to be fixed
+      img->qp =  gop_structure[img->b_frame_to_code - 1].slice_qp;
+    }
+  }
+  img->qp_scaled = img->qp + img->bitdepth_luma_qp_scale;
+
+  UpdateSubseqInfo (img->layer);        // Tian Dong (Sept 2002)
+  UpdateSceneInformation (FALSE, 0, 0, -1); // JVT-D099, scene information SEI, nothing included by default
+
+  //! Commented out by StW, needs fixing in SEI.h to keep the trace file clean
+  //  PrepareAggregationSEIMessage ();
+
+  img->no_output_of_prior_pics_flag = 0;
+  img->long_term_reference_flag = 0;
+
+  init_dec_ref_pic_marking_buffer();
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Initializes the parameters for a new field
+ ************************************************************************
+ */
+static void init_field (void)
+{
+  int i;
+  int prevP_no, nextP_no;
+
+  last_P_no = last_P_no_fld;
+
+  img->current_mb_nr = 0;
+  img->current_slice_nr = 0;
+  stats->bit_slice = 0;
+
+  input->jumpd *= 2;
+  input->successive_Bframe *= 2;
+  img->number /= 2;
+  img->buf_cycle /= 2;
+
+  img->mb_y = img->mb_x = 0;
+  img->block_y = img->pix_y = img->pix_c_y = 0; // define vertical positions
+  img->block_x = img->pix_x = img->block_c_x = img->pix_c_x = 0;        // define horizontal positions
+
+  if (!img->b_frame_to_code)
+  {
+    img->tr = img->number * (input->jumpd + 2) + img->fld_type;
+
+    if (!img->fld_type)
+    {
+      img->imgtr_last_P_fld = img->imgtr_next_P_fld;
+      img->imgtr_next_P_fld = img->tr;
+    }
+
+#ifdef _ADAPT_LAST_GROUP_
+    if (input->last_frame && img->number + 1 == input->no_frames)
+      img->tr = input->last_frame;
+#endif
+    if (img->number != 0 && input->successive_Bframe != 0)    // B pictures to encode
+      nextP_tr_fld = img->tr;
+
+      //Rate control
+    if(!input->RCEnable)                  // without using rate control
+    {
+      if (img->type == I_SLICE)
+      {
+#ifdef _CHANGE_QP_
+        //QP oscillation for secondary SP frames
+        if ((input->qp2start > 0 && img->tr >= input->qp2start && input->sp2_frame_indicator==0)||
+          ( (input->qp2start > 0) && ( ( (img->tr ) % (2*input->qp2start) ) >=input->qp2start ) && (input->sp2_frame_indicator==1)))
+          img->qp = input->qp02;
+        else
+#endif
+          img->qp = input->qp0;   // set quant. parameter for I-frame
+      }
+      else
+      {
+#ifdef _CHANGE_QP_
+        //QP oscillation for secondary SP frames
+        if ((input->qp2start > 0 && img->tr >= input->qp2start && input->sp2_frame_indicator==0)||
+          ( (input->qp2start > 0) && ( ( (img->tr ) % (2*input->qp2start) ) >=input->qp2start ) && (input->sp2_frame_indicator==1)))
+          img->qp = input->qpN2 + (img->nal_reference_idc ? 0 : input->DispPQPOffset);
+        else
+#endif
+          img->qp = input->qpN + (img->nal_reference_idc ? 0 : input->DispPQPOffset);
+        if (img->type == SP_SLICE)
+        {
+          if ( (input->qp2start > 0) && ( ( (img->tr ) % (2*input->qp2start) ) >=input->qp2start ))
+          {
+            img->qp = input->qpN2-(input->qpN-input->qpsp);
+            img->qpsp = input->qpN2-(input->qpN-input->qpsp_pred);
+          }
+          else
+          {
+            img->qp = input->qpsp;
+            img->qpsp = input->qpsp_pred;
+          }
+        }
+      }
+    }
+    img->mb_y_intra = img->mb_y_upd;  //  img->mb_y_intra indicates which GOB to intra code for this frame
+
+    if (input->intra_upd > 0) // if error robustness, find next GOB to update
+    {
+      img->mb_y_upd =
+        (img->number / input->intra_upd) % (img->width / MB_BLOCK_SIZE);
+    }
+  }
+  else
+  {
+    img->p_interval = input->jumpd + 2;
+    prevP_no = (img->number - 1) * img->p_interval + img->fld_type;
+    nextP_no = img->number * img->p_interval + img->fld_type;
+#ifdef _ADAPT_LAST_GROUP_
+    if (!img->fld_type)       // top field
+    {
+      last_P_no[0] = prevP_no + 1;
+      last_P_no[1] = prevP_no;
+      for (i = 1; i <= img->buf_cycle; i++)
+      {
+        last_P_no[2 * i] = last_P_no[2 * i - 2] - img->p_interval;
+        last_P_no[2 * i + 1] = last_P_no[2 * i - 1] - img->p_interval;
+      }
+    }
+    else                      // bottom field
+    {
+      last_P_no[0] = nextP_no - 1;
+      last_P_no[1] = prevP_no;
+      for (i = 1; i <= img->buf_cycle; i++)
+      {
+        last_P_no[2 * i] = last_P_no[2 * i - 2] - img->p_interval;
+        last_P_no[2 * i + 1] = last_P_no[2 * i - 1] - img->p_interval;
+      }
+    }
+
+    if (input->last_frame && img->number + 1 == input->no_frames)
+    {
+      nextP_no = input->last_frame;
+      img->p_interval = nextP_no - prevP_no;
+    }
+#endif
+    img->b_interval =
+      ((double) (input->jumpd + 1) / (input->successive_Bframe + 1.0) );
+
+    if (input->HierarchicalCoding == 3)
+      img->b_interval = 1.0;
+
+    if (input->HierarchicalCoding)
+      img->tr = prevP_no + (int) ((img->b_interval + 1.0) * (double) (1 + gop_structure[img->b_frame_to_code - 1].display_no));      // from prev_P
+    else
+      img->tr = prevP_no + (int) ((img->b_interval + 1.0) * (double) img->b_frame_to_code);      // from prev_P
+
+
+    if (img->tr >= nextP_no)
+      img->tr = nextP_no - 1; // ?????
+    //Rate control
+    if(!input->RCEnable && input->HierarchicalCoding == 0)                  // without using rate control
+    {
+#ifdef _CHANGE_QP_
+      //QP oscillation for secondary SP frames
+      if ((input->qp2start > 0 && img->tr >= input->qp2start && input->sp2_frame_indicator==0)||
+        ( (input->qp2start > 0) && ( ( (img->tr ) % (2*input->qp2start) ) >=input->qp2start ) && (input->sp2_frame_indicator==1)))
+      {
+        img->qp = input->qpB2;
+      }
+      else
+#endif
+        img->qp = input->qpB;
+      if (img->nal_reference_idc)
+      {
+#ifdef _CHANGE_QP_
+        //QP oscillation for secondary SP frames
+        if ((input->qp2start > 0 && img->tr >= input->qp2start && input->sp2_frame_indicator==0)||
+          ( (input->qp2start > 0) && ( ( (img->tr ) % (2*input->qp2start) ) >=input->qp2start ) && (input->sp2_frame_indicator==1)))
+        {
+          img->qp = iClip3(-img->bitdepth_luma_qp_scale,51,input->qpB2 + input->qpBRS2Offset);
+        }
+        else
+#endif
+          img->qp = iClip3(-img->bitdepth_luma_qp_scale,51,input->qpB + input->qpBRSOffset);
+
+      }
+    }
+    else if (!input->RCEnable && input->HierarchicalCoding != 0)
+    {
+      img->qp =  gop_structure[img->b_frame_to_code - 1].slice_qp;
+    }
+  }
+  img->qp_scaled = img->qp + img->bitdepth_luma_qp_scale;
+  input->jumpd /= 2;
+  input->successive_Bframe /= 2;
+  img->buf_cycle *= 2;
+  img->number = 2 * img->number + img->fld_type;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Upsample 4 times, store them in out4x.  Color is simply copied
+ *
+ * \par Input:
+ *    srcy, srcu, srcv, out4y, out4u, out4v
+ *
+ * \par Side Effects_
+ *    Uses (writes) img4Y_tmp.  This should be moved to a static variable
+ *    in this module
+ ************************************************************************/
+void UnifiedOneForthPix (StorablePicture *s)
+{
+  int ypadded_size = s->size_y + 2 * IMG_PAD_SIZE;
+  int xpadded_size = s->size_x + 2 * IMG_PAD_SIZE;
+
+  // don't upsample twice
+  if (s->imgY_sub)
+    return;
+  // Y component
+  get_mem4Dpel (&(s->imgY_sub), 4, 4, ypadded_size, xpadded_size);
+  if (NULL == s->imgY_sub)
+    no_mem_exit("alloc_storable_picture: s->imgY_sub");
+
+  if ( input->ChromaMCBuffer )
+  {
+    // UV components
+    if ( img->yuv_format != YUV400 )
+    {
+      if ( img->yuv_format == YUV420 )
+      {
+        get_mem5Dpel (&(s->imgUV_sub), 2, 8, 8, ypadded_size/2, xpadded_size/2);
+      }
+      else if ( img->yuv_format == YUV422 )
+      {
+        get_mem5Dpel (&(s->imgUV_sub), 2, 4, 8, ypadded_size, xpadded_size/2);
+      }
+      else
+      { // YUV444
+        get_mem5Dpel (&(s->imgUV_sub), 2, 4, 4, ypadded_size, xpadded_size);
+      }
+    }
+  }
+
+  // derive the subpixel images for first component
+  getSubImagesLuma ( s );
+  // and the sub-images for U and V
+  if ( img->yuv_format != YUV400 && input->ChromaMCBuffer )
+    getSubImagesChroma( s );
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Find SNR for all three components
+ ************************************************************************
+ */
+static void find_snr (void)
+{
+  int i, j;
+  int64 diff_y=0, diff_u=0, diff_v=0;
+  int impix;
+  int impix_cr;
+  unsigned int max_pix_value_sqd = img->max_imgpel_value * img->max_imgpel_value;
+  unsigned int max_pix_value_sqd_uv = img->max_imgpel_value_uv * img->max_imgpel_value_uv;
+
+  //  Calculate  PSNR for Y, U and V.
+
+  //     Luma.
+  impix = input->img_height * input->img_width;
+  impix_cr = input->img_height_cr * input->img_width_cr;
+
+  if (img->fld_flag != 0)
+  {
+
+    diff_y = 0;
+    for (i = 0; i < input->img_width; ++i)
+    {
+      for (j = 0; j < input->img_height; ++j)
+      {
+        diff_y += img->quad[imgY_org[j][i] - imgY_com[j][i]];
+      }
+    }
+
+    if (img->yuv_format != YUV400)
+    {
+      //     Chroma.
+      diff_u = 0;
+      diff_v = 0;
+
+      for (i = 0; i < input->img_width_cr; i++)
+      {
+        for (j = 0; j < input->img_height_cr; j++)
+        {
+          diff_u += img->quad[imgUV_org[0][j][i] - imgUV_com[0][j][i]];
+          diff_v += img->quad[imgUV_org[1][j][i] - imgUV_com[1][j][i]];
+        }
+      }
+    }
+  }
+  else
+  {
+    imgY_org  = imgY_org_frm;
+    imgUV_org = imgUV_org_frm;
+
+    if(input->PicInterlace==ADAPTIVE_CODING)
+    {
+      enc_picture = enc_frame_picture;
+    }
+
+    diff_y = 0;
+    for (i = 0; i < input->img_width; ++i)
+    {
+      for (j = 0; j < input->img_height; ++j)
+      {
+        diff_y += img->quad[imgY_org[j][i] - enc_picture->imgY[j][i]];
+      }
+    }
+
+    if (img->yuv_format != YUV400)
+    {
+      //     Chroma.
+      diff_u = 0;
+      diff_v = 0;
+
+      for (i = 0; i < input->img_width_cr; i++)
+      {
+        for (j = 0; j < input->img_height_cr; j++)
+        {
+          diff_u += img->quad[imgUV_org[0][j][i] - enc_picture->imgUV[0][j][i]];
+          diff_v += img->quad[imgUV_org[1][j][i] - enc_picture->imgUV[1][j][i]];
+        }
+      }
+    }
+  }
+  snr->sse_y = (float)diff_y;
+  snr->sse_u = (float)diff_u;
+  snr->sse_v = (float)diff_v;
+
+#if ZEROSNR
+  if (diff_y == 0)
+    diff_y = 1;
+  if (diff_u == 0)
+    diff_u = 1;
+  if (diff_v == 0)
+    diff_v = 1;
+#endif
+
+  //  Collecting SNR statistics
+  if (diff_y != 0)
+  {
+    snr->snr_y = (float) (10 * log10 (max_pix_value_sqd * (double)((double) impix    / diff_y)));         // luma snr for current frame
+    if (img->yuv_format != YUV400)
+    {
+      snr->snr_u = (float) (10 * log10 (max_pix_value_sqd_uv * (double)((double) impix_cr / diff_u)));   // u croma snr for current frame, 1/4 of luma samples
+      snr->snr_v = (float) (10 * log10 (max_pix_value_sqd_uv * (double)((double) impix_cr / diff_v)));   // v croma snr for current frame, 1/4 of luma samples
+    }
+    else
+    {
+      snr->snr_u = 0.0;
+      snr->snr_v = 0.0;
+    }
+  }
+
+
+  if (img->number == 0)
+  {
+    snr->snr_y1 = snr->snr_y;  // keep luma snr for first frame
+    snr->snr_u1 = snr->snr_u;  // keep croma u snr for first frame
+    snr->snr_v1 = snr->snr_v;  // keep croma v snr for first frame
+    snr->snr_ya = snr->snr_y1;
+    snr->snr_ua = snr->snr_u1;
+    snr->snr_va = snr->snr_v1;
+    // sse stats
+    snr->msse_y = snr->sse_y;
+    snr->msse_u = snr->sse_u;
+    snr->msse_v = snr->sse_v;
+    for (i=0; i<5; i++)
+    {
+      snr->snr_yt[i] = 0.0;
+      snr->snr_ut[i] = 0.0;
+      snr->snr_vt[i] = 0.0;
+    }
+  }
+  else
+  {
+    //int total_frames = img->number + frame_ctr[B_SLICE];
+    int total_frames = snr->frame_ctr - 1;
+
+    snr->snr_ya = (float) (snr->snr_ya * total_frames + snr->snr_y) / (total_frames + 1); // average snr luma for all frames inc. first
+    snr->snr_ua = (float) (snr->snr_ua * total_frames + snr->snr_u) / (total_frames + 1); // average snr u croma for all frames inc. first
+    snr->snr_va = (float) (snr->snr_va * total_frames + snr->snr_v) / (total_frames + 1); // average snr v croma for all frames inc. first
+    snr->msse_y = (float) (snr->msse_y * total_frames + snr->sse_y) / (total_frames + 1); // average sse luma for all frames inc. first
+    snr->msse_u = (float) (snr->msse_u * total_frames + snr->sse_u) / (total_frames + 1); // average sse u croma for all frames inc. first
+    snr->msse_v = (float) (snr->msse_v * total_frames + snr->sse_v) / (total_frames + 1); // average sse v croma for all frames inc. first
+  }
+
+  snr->snr_yt[img->type] = (float) (snr->snr_yt[img->type] * (frame_ctr[img->type] - 1) + snr->snr_y) / ( frame_ctr[img->type] );  // average luma snr for img->type coded frames
+  snr->snr_ut[img->type] = (float) (snr->snr_ut[img->type] * (frame_ctr[img->type] - 1) + snr->snr_u) / ( frame_ctr[img->type] );  // average chroma u snr for img->type coded frames
+  snr->snr_vt[img->type] = (float) (snr->snr_vt[img->type] * (frame_ctr[img->type] - 1) + snr->snr_v) / ( frame_ctr[img->type] );  // average chroma v snr for img->type coded frames
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Find distortion for all three components
+ ************************************************************************
+ */
+static void find_distortion (void)
+{
+  int i, j;
+  int64 diff_y, diff_u, diff_v;
+  int impix;
+
+  //  Calculate  PSNR for Y, U and V.
+
+  //     Luma.
+  impix = input->img_height * input->img_width;
+
+  if (img->structure!=FRAME)
+  {
+
+    diff_y = 0;
+    for (i = 0; i < input->img_width; ++i)
+    {
+      for (j = 0; j < input->img_height; ++j)
+      {
+        diff_y += img->quad[imgY_org[j][i] - imgY_com[j][i]];
+      }
+    }
+
+    diff_u = 0;
+    diff_v = 0;
+
+    if (img->yuv_format != YUV400)
+    {
+      //     Chroma.
+      for (i = 0; i < input->img_width_cr; i++)
+      {
+        for (j = 0; j < input->img_height_cr; j++)
+        {
+          diff_u += img->quad[imgUV_org[0][j][i] - imgUV_com[0][j][i]];
+          diff_v += img->quad[imgUV_org[1][j][i] - imgUV_com[1][j][i]];
+        }
+      }
+    }
+  }
+  else
+  {
+    imgY_org  = imgY_org_frm;
+    imgUV_org = imgUV_org_frm;
+
+    diff_y = 0;
+    for (i = 0; i < input->img_width; ++i)
+    {
+      for (j = 0; j < input->img_height; ++j)
+      {
+        diff_y += img->quad[imgY_org[j][i] - enc_picture->imgY[j][i]];
+      }
+    }
+
+    diff_u = 0;
+    diff_v = 0;
+
+    if (img->yuv_format != YUV400)
+    {
+      //     Chroma.
+      for (i = 0; i < input->img_width_cr; i++)
+      {
+        for (j = 0; j < input->img_height_cr; j++)
+        {
+          diff_u += img->quad[imgUV_org[0][j][i] - enc_picture->imgUV[0][j][i]];
+          diff_v += img->quad[imgUV_org[1][j][i] - enc_picture->imgUV[1][j][i]];
+        }
+      }
+    }
+  }
+  // Calculate real PSNR at find_snr_avg()
+  snr->snr_y = (float) diff_y;
+  snr->snr_u = (float) diff_u;
+  snr->snr_v = (float) diff_v;
+}
+
+
+  /*!
+ ************************************************************************
+ * \brief
+ *    Just a placebo
+ ************************************************************************
+ */
+Boolean dummy_slice_too_big (int bits_slice)
+{
+  return FALSE;
+}
+
+
+/*!
+***************************************************************************
+// For MB level field/frame coding
+***************************************************************************
+*/
+void copy_rdopt_data (int bot_block)
+{
+  int mb_nr = img->current_mb_nr;
+  Macroblock *currMB = &img->mb_data[mb_nr];
+  int i, j, k;
+
+  int bframe = (img->type == B_SLICE);
+  int mode;
+  int b8mode, b8pdir;
+  int block_y;
+
+  int list_offset = currMB->list_offset;
+
+  mode                = rdopt->mode;
+  currMB->mb_type     = rdopt->mb_type;    // copy mb_type
+  currMB->cbp         = rdopt->cbp;        // copy cbp
+  currMB->cbp_blk     = rdopt->cbp_blk;    // copy cbp_blk
+  currMB->bi_pred_me  = rdopt->bi_pred_me; // copy biprediction
+  img->i16offset      = rdopt->i16offset;
+
+  currMB->prev_qp=rdopt->prev_qp;
+  currMB->prev_delta_qp=rdopt->prev_delta_qp;
+  currMB->prev_cbp = rdopt->prev_cbp;
+  currMB->delta_qp = rdopt->delta_qp;
+
+  currMB->qp = rdopt->qp;
+  set_chroma_qp(currMB);
+
+  currMB->c_ipred_mode = rdopt->c_ipred_mode;
+
+  for (i = 0; i < 4+img->num_blk8x8_uv; i++)
+  {
+    for (j = 0; j < 4; j++)
+      for (k = 0; k < 2; k++)
+        memcpy(img->cofAC[i][j][k],rdopt->cofAC[i][j][k], 65 * sizeof(int));
+  }
+
+  for (i = 0; i < 3; i++)
+  {
+    for (k = 0; k < 2; k++)
+      memcpy(img->cofDC[i][k],rdopt->cofDC[i][k], 18 * sizeof(int));
+  }
+
+  for (j = 0; j < BLOCK_MULTIPLE; j++)
+  {
+    block_y = img->block_y + j;
+    memcpy(&enc_picture->ref_idx[LIST_0][block_y][img->block_x], rdopt->refar[LIST_0][j], BLOCK_MULTIPLE * sizeof(char));
+    for (i = 0; i < BLOCK_MULTIPLE; i++)
+      enc_picture->ref_pic_id [LIST_0][block_y][img->block_x + i] =
+      enc_picture->ref_pic_num[LIST_0 + list_offset][(short)enc_picture->ref_idx[LIST_0][block_y][img->block_x+i]];
+  }
+  if (bframe)
+  {
+    for (j = 0; j < BLOCK_MULTIPLE; j++)
+    {
+      block_y = img->block_y + j;
+      memcpy(&enc_picture->ref_idx[LIST_1][block_y][img->block_x], rdopt->refar[LIST_1][j], BLOCK_MULTIPLE * sizeof(char));
+      for (i = 0; i < BLOCK_MULTIPLE; i++)
+        enc_picture->ref_pic_id [LIST_1][block_y][img->block_x + i] =
+        enc_picture->ref_pic_num[LIST_1 + list_offset][(short)enc_picture->ref_idx[LIST_1][block_y][img->block_x+i]];
+    }
+  }
+
+  //===== reconstruction values =====
+  for (j = 0; j < MB_BLOCK_SIZE; j++)
+    memcpy(&enc_picture->imgY[img->pix_y + j][img->pix_x],rdopt->rec_mbY[j], MB_BLOCK_SIZE * sizeof(imgpel));
+
+  if (img->yuv_format != YUV400)
+  {
+    for (j = 0; j < img->mb_cr_size_y; j++)
+    {
+      memcpy(&enc_picture->imgUV[0][img->pix_c_y + j][img->pix_c_x],rdopt->rec_mbU[j], img->mb_cr_size_x * sizeof(imgpel));
+      memcpy(&enc_picture->imgUV[1][img->pix_c_y + j][img->pix_c_x],rdopt->rec_mbV[j], img->mb_cr_size_x * sizeof(imgpel));
+    }
+  }
+
+  memcpy(currMB->b8mode,rdopt->b8mode, 4 * sizeof(int));
+  memcpy(currMB->b8pdir,rdopt->b8pdir, 4 * sizeof(int));
+
+  currMB->luma_transform_size_8x8_flag = rdopt->luma_transform_size_8x8_flag;
+
+  //==== intra prediction modes ====
+  if (mode == P8x8)
+  {
+    memcpy(currMB->intra_pred_modes,rdopt->intra_pred_modes, MB_BLOCK_PARTITIONS * sizeof(char));
+    for (j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+      memcpy(&img->ipredmode[j][img->block_x],&rdopt->ipredmode[j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+  }
+  else if (mode != I4MB && mode != I8MB)
+  {
+    memset(currMB->intra_pred_modes,DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
+    for (j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+      memset(&img->ipredmode[j][img->block_x],DC_PRED, BLOCK_MULTIPLE * sizeof(char));
+  }
+  else if (mode == I4MB || mode == I8MB)
+  {
+    memcpy(currMB->intra_pred_modes,rdopt->intra_pred_modes, MB_BLOCK_PARTITIONS * sizeof(char));
+    memcpy(currMB->intra_pred_modes8x8,rdopt->intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));
+    for (j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++) {
+      memcpy(&img->ipredmode[j][img->block_x],&rdopt->ipredmode[j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+    }
+  }
+
+  if (img->MbaffFrameFlag)
+  {
+    // motion vectors
+    copy_motion_vectors_MB ();
+
+    if (!IS_INTRA(currMB))
+    {
+      for (j = 0; j < 4; j++)
+        for (i = 0; i < 4; i++)
+        {
+          b8mode = currMB->b8mode[i/2+2*(j/2)];
+          b8pdir = currMB->b8pdir[i/2+2*(j/2)];
+
+          if (b8pdir!=1)
+          {
+            enc_picture->mv[LIST_0][j+img->block_y][i+img->block_x][0] = rdopt->all_mv[j][i][LIST_0][(short)rdopt->refar[LIST_0][j][i]][b8mode][0];
+            enc_picture->mv[LIST_0][j+img->block_y][i+img->block_x][1] = rdopt->all_mv[j][i][LIST_0][(short)rdopt->refar[LIST_0][j][i]][b8mode][1];
+          }
+          else
+          {
+            enc_picture->mv[LIST_0][j+img->block_y][i+img->block_x][0] = 0;
+            enc_picture->mv[LIST_0][j+img->block_y][i+img->block_x][1] = 0;
+          }
+          if (bframe)
+          {
+            if (b8pdir!=0)
+            {
+              enc_picture->mv[LIST_1][j+img->block_y][i+img->block_x][0] = rdopt->all_mv[j][i][LIST_1][(short)rdopt->refar[LIST_1][j][i]][b8mode][0];
+              enc_picture->mv[LIST_1][j+img->block_y][i+img->block_x][1] = rdopt->all_mv[j][i][LIST_1][(short)rdopt->refar[LIST_1][j][i]][b8mode][1];
+            }
+            else
+            {
+              enc_picture->mv[LIST_1][j+img->block_y][i+img->block_x][0] = 0;
+              enc_picture->mv[LIST_1][j+img->block_y][i+img->block_x][1] = 0;
+            }
+          }
+        }
+    }
+    else
+    {
+      for (j = 0; j < 4; j++)
+        memset(enc_picture->mv[LIST_0][j+img->block_y][img->block_x], 0, 2 * BLOCK_MULTIPLE * sizeof(short));
+      if (bframe)
+      {
+        for (j = 0; j < 4; j++)
+          memset(enc_picture->mv[LIST_1][j+img->block_y][img->block_x], 0, 2 * BLOCK_MULTIPLE * sizeof(short));
+      }
+    }
+  }
+}                             // end of copy_rdopt_data
+
+static void copy_motion_vectors_MB (void)
+{
+  int i,j,k,l;
+
+  for (i = 0; i < 4; i++)
+  {
+    for (j = 0; j < 4; j++)
+    {
+      for (k = 0; k < img->max_num_references; k++)
+      {
+        for (l = 0; l < 9; l++)
+        {
+          img->all_mv[j][i][LIST_0][k][l][0] = rdopt->all_mv[j][i][LIST_0][k][l][0];
+          img->all_mv[j][i][LIST_0][k][l][1] = rdopt->all_mv[j][i][LIST_0][k][l][1];
+
+          img->all_mv[j][i][LIST_1][k][l][0] = rdopt->all_mv[j][i][LIST_1][k][l][0];
+          img->all_mv[j][i][LIST_1][k][l][1] = rdopt->all_mv[j][i][LIST_1][k][l][1];
+
+          img->pred_mv[j][i][LIST_0][k][l][0] = rdopt->pred_mv[j][i][LIST_0][k][l][0];
+          img->pred_mv[j][i][LIST_0][k][l][1] = rdopt->pred_mv[j][i][LIST_0][k][l][1];
+
+          img->pred_mv[j][i][LIST_1][k][l][0] = rdopt->pred_mv[j][i][LIST_1][k][l][0];
+          img->pred_mv[j][i][LIST_1][k][l][1] = rdopt->pred_mv[j][i][LIST_1][k][l][1];
+        }
+      }
+    }
+  }
+}
+
+
+static void ReportNALNonVLCBits(time_t tmp_time, time_t me_time)
+{
+
+  //! Need to add type (i.e. SPS, PPS, SEI etc).
+  if (input->Verbose != 0)
+  printf ("%04d(NVB)%8d \n", frame_no, 0);
+}
+static void ReportFirstframe(time_t tmp_time, time_t me_time)
+{
+  //Rate control
+  int bits;
+
+  if (input->Verbose == 1)
+  {
+    printf ("%04d(IDR)%8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+      frame_no, 0,
+      img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+      img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc);
+  }
+  else if (input->Verbose == 2)
+  {
+    printf ("%04d(IDR)%8d %1d %2d %7.3f %7.3f %7.3f %9d %7d    %3s %5d   %2d %2d  %d   %d\n",
+      frame_no,0,0,
+      img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+      img->fld_flag ? "FLD" : "FRM", intras, img->num_ref_idx_l0_active, img->num_ref_idx_l1_active,img->rd_pass, img->nal_reference_idc);
+  }
+  //Rate control
+  if(input->RCEnable)
+  {
+    if((!input->PicInterlace) && (!input->MbInterlace))
+      bits = (int) (stats->bit_ctr - stats->bit_ctr_n); // used for rate control update
+    else
+    {
+      bits = (int)(stats->bit_ctr - quadratic_RC->Iprev_bits); // used for rate control update
+      quadratic_RC->Iprev_bits = stats->bit_ctr;
+    }
+  }
+
+  stats->bit_ctr_I = stats->bit_ctr;
+  stats->bit_ctr = 0;
+
+}
+
+
+static void ReportIntra(time_t tmp_time, time_t me_time)
+{
+  if (input->Verbose == 1)
+  {
+   if (img->currentPicture->idr_flag == 1)
+      printf ("%04d(IDR)%8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+      frame_no, 0,
+      img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+      img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc);
+    else
+      printf ("%04d(I)  %8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+      frame_no, 0,
+      img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+      img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc);
+  }
+  else if (input->Verbose == 2)
+  {
+    if (img->currentPicture->idr_flag == 1)
+      printf ("%04d(IDR)%8d %1d %2d %7.3f %7.3f %7.3f %9d %7d    %3s %5d   %2d %2d  %d   %d\n",
+      frame_no, 0, 0,
+      img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+      img->fld_flag ? "FLD" : "FRM", intras, img->num_ref_idx_l0_active, img->num_ref_idx_l1_active,img->rd_pass, img->nal_reference_idc);
+    else
+      printf ("%04d(I)  %8d %1d %2d %7.3f %7.3f %7.3f %9d %7d    %3s %5d   %2d %2d  %d   %d\n",
+      frame_no, 0, 0,
+      img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+      img->fld_flag ? "FLD" : "FRM", intras, img->num_ref_idx_l0_active, img->num_ref_idx_l1_active,img->rd_pass, img->nal_reference_idc);
+  }
+}
+
+static void ReportSP(time_t tmp_time, time_t me_time)
+{
+  if (input->Verbose == 1)
+  {
+    printf ("%04d(SP) %8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+      frame_no, 0,
+      img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+      img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc);
+  }
+  else if (input->Verbose == 2)
+  {
+
+    printf ("%04d(SP) %8d %1d %2d %7.3f %7.3f %7.3f %9d %7d    %3s %5d   %2d %2d  %d   %d\n",
+      frame_no, 0, active_pps->weighted_pred_flag,
+      img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+      img->fld_flag ? "FLD" : "FRM", intras, img->num_ref_idx_l0_active, img->num_ref_idx_l1_active,img->rd_pass, img->nal_reference_idc);
+  }
+}
+
+static void ReportB(time_t tmp_time, time_t me_time)
+{
+  if (input->Verbose == 1)
+  {
+    printf ("%04d(B)  %8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+    frame_no, 0,
+    img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+    img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc);
+  }
+  else if (input->Verbose == 2)
+  {
+    printf ("%04d(B)  %8d %1d %2d %7.3f %7.3f %7.3f %9d %7d    %3s %5d %1d %2d %2d  %d   %d\n",
+    frame_no, 0, active_pps->weighted_bipred_idc,
+    img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+    img->fld_flag ? "FLD" : "FRM",intras,img->direct_spatial_mv_pred_flag, img->num_ref_idx_l0_active, img->num_ref_idx_l1_active,img->rd_pass, img->nal_reference_idc);
+  }
+}
+
+
+static void ReportP(time_t tmp_time, time_t me_time)
+{
+  if (input->Verbose == 1)
+  {
+    if(input->redundant_pic_flag==0)
+    {
+      printf ("%04d(P)  %8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+        frame_no, 0,
+        img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+        img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc);
+    }
+    else
+    {
+      if(!redundant_coding)
+      {
+        printf ("%04d(P)  %8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+          frame_no, 0,
+          img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+          img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc);
+      }
+      else
+      { // report a redundant picture.
+        printf ("    (R)  %8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+          0,
+          img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+          img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc);
+      }
+    }
+  }
+  else if (input->Verbose == 2)
+  {
+    printf ("%04d(P)  %8d %1d %2d %7.3f %7.3f %7.3f %9d %7d    %3s %5d   %2d %2d  %d   %d\n",
+      frame_no, 0, active_pps->weighted_pred_flag,
+      img->AverageFrameQP, snr->snr_y, snr->snr_u, snr->snr_v, 0, 0,
+      img->fld_flag ? "FLD" : "FRM", intras, img->num_ref_idx_l0_active, img->num_ref_idx_l1_active, img->rd_pass, img->nal_reference_idc);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Padding of automatically added border for picture sizes that are not
+ *     multiples of macroblock/macroblock pair size
+ *
+ * \param org_size_x
+ *    original image horizontal size (luma)
+ * \param org_size_y
+ *    original image vertical size (luma)
+ * \param img_size_x
+ *    coded image horizontal size (luma)
+ * \param img_size_y
+ *    code image vertical size (luma)
+ * \param org_size_x_cr
+ *    original image horizontal size (chroma)
+ * \param org_size_y_cr
+ *    original image vertical size (chroma)
+ * \param img_size_x_cr
+ *    coded image horizontal size (chroma)
+ * \param img_size_y_cr
+ *    code image vertical size (chroma)
+ ************************************************************************
+ */
+static void PaddAutoCropBorders (int org_size_x, int org_size_y, int img_size_x, int img_size_y,
+                                 int org_size_x_cr, int org_size_y_cr, int img_size_x_cr, int img_size_y_cr)
+{
+  int x, y;
+
+  //padding right border
+  for (y=0; y<org_size_y; y++)
+    for (x=org_size_x; x<img_size_x; x++)
+      imgY_org_frm [y][x] = imgY_org_frm [y][x-1];
+
+  //padding bottom border
+  for (y=org_size_y; y<img_size_y; y++)
+    for (x=0; x<img_size_x; x++)
+      imgY_org_frm [y][x] = imgY_org_frm [y-1][x];
+
+
+  if (img->yuv_format != YUV400)
+  {
+    //padding right border
+    for (y=0; y<org_size_y_cr; y++)
+      for (x=org_size_x_cr; x<img_size_x_cr; x++)
+      {
+        imgUV_org_frm [0][y][x] = imgUV_org_frm [0][y][x-1];
+        imgUV_org_frm [1][y][x] = imgUV_org_frm [1][y][x-1];
+      }
+
+    //padding bottom border
+    for (y=org_size_y_cr; y<img_size_y_cr; y++)
+      for (x=0; x<img_size_x_cr; x++)
+      {
+        imgUV_org_frm [0][y][x] = imgUV_org_frm [0][y-1][x];
+        imgUV_org_frm [1][y][x] = imgUV_org_frm [1][y-1][x];
+      }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Calculates the absolute frame number in the source file out
+ *    of various variables in img-> and input->
+ * \return
+ *    frame number in the file to be read
+ * \par side effects
+ *    global variable frame_no updated -- dunno, for what this one is necessary
+ ************************************************************************
+ */
+static int CalculateFrameNumber(void)
+{
+  if (img->b_frame_to_code)
+  {
+    if (input->HierarchicalCoding)
+      frame_no = start_tr_in_this_IGOP + (IMG_NUMBER - 1) * (input->jumpd + 1) + (int) (img->b_interval * (double) (1 + gop_structure[img->b_frame_to_code - 1].display_no));
+    else
+      frame_no = start_tr_in_this_IGOP + (IMG_NUMBER - 1) * (input->jumpd + 1) + (int) (img->b_interval * (double) img->b_frame_to_code);
+  }
+  else
+    {
+      frame_no = start_tr_in_this_IGOP + IMG_NUMBER * (input->jumpd + 1);
+#ifdef _ADAPT_LAST_GROUP_
+      if (input->last_frame && img->number + 1 == input->no_frames)
+        frame_no = input->last_frame;
+#endif
+    }
+
+  return frame_no;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Convert file read buffer to source picture structure
+ ************************************************************************
+ */
+void buf2img ( imgpel** imgX,           //!< Pointer to image plane
+               unsigned char* buf,      //!< Buffer for file output
+               int size_x,              //!< horizontal size of picture
+               int size_y,              //!< vertical size of picture
+               int symbol_size_in_bytes //!< number of bytes in file used for one pixel
+               )
+{
+  int i,j;
+
+  unsigned short tmp16, ui16;
+  unsigned long  tmp32, ui32;
+
+  if (symbol_size_in_bytes> sizeof(imgpel))
+  {
+    error ("Source picture has higher bit depth than imgpel data type. Please recompile with larger data type for imgpel.", 500);
+  }
+
+  if (( sizeof(char) == sizeof (imgpel)) && ( sizeof(char) == symbol_size_in_bytes))
+  {
+    // imgpel == pixel_in_file == 1 byte -> simple copy
+    for(j=0;j<size_y;j++)
+      memcpy(imgX[j], buf+j*size_x, size_x);
+  }
+  else
+  {
+    // sizeof (imgpel) > sizeof(char)
+    if (testEndian())
+    {
+      // big endian
+      switch (symbol_size_in_bytes)
+      {
+      case 1:
+        {
+          for(j=0;j<size_y;j++)
+            for(i=0;i<size_x;i++)
+            {
+              imgX[j][i]= buf[i+j*size_x];
+            }
+          break;
+        }
+      case 2:
+        {
+          for(j=0;j<size_y;j++)
+            for(i=0;i<size_x;i++)
+            {
+              memcpy(&tmp16, buf+((i+j*size_x)*2), 2);
+              ui16  = (tmp16 >> 8) | ((tmp16&0xFF)<<8);
+              imgX[j][i] = (imgpel) ui16;
+            }
+          break;
+        }
+      case 4:
+        {
+          for(j=0;j<size_y;j++)
+            for(i=0;i<size_x;i++)
+            {
+              memcpy(&tmp32, buf+((i+j*size_x)*4), 4);
+              ui32  = ((tmp32&0xFF00)<<8) | ((tmp32&0xFF)<<24) | ((tmp32&0xFF0000)>>8) | ((tmp32&0xFF000000)>>24);
+              imgX[j][i] = (imgpel) ui32;
+            }
+        }
+      default:
+        {
+           error ("reading only from formats of 8, 16 or 32 bit allowed on big endian architecture", 500);
+           break;
+        }
+      }
+    }
+    else
+    {
+      // little endian
+      for (j=0; j < size_y; j++)
+        for (i=0; i < size_x; i++)
+        {
+          imgX[j][i]=0;
+          memcpy(&(imgX[j][i]), buf +((i+j*size_x)*symbol_size_in_bytes), symbol_size_in_bytes);
+        }
+    }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Reads one new frame from file
+ *
+ * \param FrameNoInFile
+ *    Frame number in the source file
+ * \param HeaderSize
+ *    Number of bytes in the source file to be skipped
+ * \param xs
+ *    horizontal size of frame in pixels
+ * \param ys
+ *    vertical size of frame in pixels
+ * \param xs_cr
+ *    horizontal chroma size of frame in pixels
+ * \param ys_cr
+ *    vertical chroma size of frame in pixels
+ ************************************************************************
+ */
+static void ReadOneFrame (int FrameNoInFile, int HeaderSize, int xs, int ys, int xs_cr, int ys_cr)
+{
+  unsigned int symbol_size_in_bytes = img->pic_unit_size_on_disk/8;
+
+  const int imgsize_y = xs*ys;
+  const int imgsize_uv = xs_cr*ys_cr;
+
+  const int bytes_y = imgsize_y * symbol_size_in_bytes;
+  const int bytes_uv = imgsize_uv * symbol_size_in_bytes;
+
+  const int64 framesize_in_bytes = bytes_y + 2*bytes_uv;
+  unsigned char *buf;
+
+  Boolean rgb_input = (Boolean) (input->rgb_input_flag==1 && input->yuv_format==3);
+
+  assert (p_in != -1);
+
+  // KS: this buffer should actually be allocated only once, but this is still much faster than the previous version
+  if (NULL==(buf = malloc (xs*ys * symbol_size_in_bytes))) no_mem_exit("ReadOneFrame: buf");
+
+  // skip Header
+  if (lseek (p_in, HeaderSize, SEEK_SET) != HeaderSize)
+  {
+    error ("ReadOneFrame: cannot fseek to (Header size) in p_in", -1);
+  }
+
+  // skip starting frames
+  if (lseek (p_in, framesize_in_bytes * input->start_frame, SEEK_CUR) == -1)
+  {
+    snprintf(errortext, ET_SIZE, "ReadOneFrame: cannot advance file pointer in p_in beyond frame %d\n", input->start_frame);
+    error (errortext,-1);
+  }
+
+  // seek to current frame
+  if (lseek (p_in, framesize_in_bytes * FrameNoInFile, SEEK_CUR) == -1)
+  {
+    snprintf(errortext, ET_SIZE, "ReadOneFrame: cannot advance file pointer in p_in beyond frame %d\n", input->start_frame + FrameNoInFile);
+    error (errortext,-1);
+  }
+
+  // Here we are at the correct position for the source frame in the file.  Now
+  // read it.
+  if (img->pic_unit_size_on_disk%8 == 0)
+  {
+    if(rgb_input)
+      lseek (p_in, framesize_in_bytes/3, SEEK_CUR);
+
+    if (read(p_in, buf, bytes_y) != bytes_y)
+    {
+      printf ("ReadOneFrame: cannot read %d bytes from input file, unexpected EOF?, exiting", bytes_y);
+      report_stats_on_error();
+      exit (-1);
+    }
+
+    buf2img(imgY_org_frm, buf, xs, ys, symbol_size_in_bytes);
+//#define _DEBUG_BITDEPTH_
+#ifdef _DEBUG_BITDEPTH_
+    {
+      int i,j;
+      for (j=0; j < ys; j++)
+      {
+        for (i=0; i < xs; i++)
+        {
+          imgY_org_frm[j][i]= (imgpel) (imgY_org_frm[j][i] & ((1<<img->bitdepth_luma ) - 1));
+        }
+      }
+    }
+#endif
+    if (img->yuv_format != YUV400)
+    {
+      if (read(p_in, buf, bytes_uv) != bytes_uv)
+      {
+        printf ("ReadOneFrame: cannot read %d bytes from input file, unexpected EOF?, exiting", bytes_y);
+        report_stats_on_error();
+        exit (-1);
+      }
+      buf2img(imgUV_org_frm[0], buf, xs_cr, ys_cr, symbol_size_in_bytes);
+
+      if(rgb_input)
+        lseek (p_in, -framesize_in_bytes, SEEK_CUR);
+
+      if (read(p_in, buf, bytes_uv) != bytes_uv)
+      {
+        printf ("ReadOneFrame: cannot read %d bytes from input file, unexpected EOF?, exiting", bytes_y);
+        report_stats_on_error();
+        exit (-1);
+      }
+      buf2img(imgUV_org_frm[1], buf, xs_cr, ys_cr, symbol_size_in_bytes);
+
+#ifdef _DEBUG_BITDEPTH_
+      {
+        int i,j;
+
+        for (j=0; j < ys_cr; j++)
+        {
+          for (i=0; i < xs_cr; i++)
+          {
+            imgUV_org_frm[0][j][i]=(imgpel) (imgUV_org_frm[0][j][i] & ((1<<img->bitdepth_chroma ) - 1));
+            imgUV_org_frm[1][j][i]=(imgpel) (imgUV_org_frm[1][j][i] & ((1<<img->bitdepth_chroma ) - 1));
+          }
+        }
+      }
+#endif
+
+      if(rgb_input)
+        lseek (p_in, framesize_in_bytes*2/3, SEEK_CUR);
+    }
+  }
+  else
+  {
+    printf ("ReadOneFrame (NOT IMPLEMENTED): pic unit size on disk must be divided by 8");
+    exit (-1);
+  }
+  free (buf);
+
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    point to frame coding variables
+ ************************************************************************
+ */
+static void put_buffer_frame(void)
+{
+  imgY_org  = imgY_org_frm;
+  imgUV_org = imgUV_org_frm;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    point to top field coding variables
+ ************************************************************************
+ */
+static void put_buffer_top(void)
+{
+  img->fld_type = 0;
+
+  imgY_org = imgY_org_top;
+  imgUV_org = imgUV_org_top;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    point to bottom field coding variables
+ ************************************************************************
+ */
+static void put_buffer_bot(void)
+{
+  img->fld_type = 1;
+
+  imgY_org = imgY_org_bot;
+  imgUV_org = imgUV_org_bot;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Writes a NAL unit of a partition or slice
+ ************************************************************************
+ */
+
+static void writeUnit(Bitstream* currStream, int partition)
+{
+  const int buffer_size = 500 + img->FrameSizeInMbs * (128 + 256 * img->bitdepth_luma + 512 * img->bitdepth_chroma);
+                                                          // KS: this is approx. max. allowed code picture size
+  NALU_t *nalu;
+  assert (currStream->bits_to_go == 8);
+  nalu = AllocNALU(buffer_size);
+  nalu->startcodeprefix_len = 1+ (img->current_mb_nr == 0 && partition == 0 ?ZEROBYTES_SHORTSTARTCODE+1:ZEROBYTES_SHORTSTARTCODE);
+//printf ("nalu->startcodeprefix_len %d\n", nalu->startcodeprefix_len);
+  nalu->len = currStream->byte_pos +1;            // add one for the first byte of the NALU
+//printf ("nalu->len %d\n", nalu->len);
+  memcpy (&nalu->buf[1], currStream->streamBuffer, nalu->len-1);
+  if (img->currentPicture->idr_flag)
+  {
+    nalu->nal_unit_type = NALU_TYPE_IDR;
+    nalu->nal_reference_idc = NALU_PRIORITY_HIGHEST;
+  }
+  else if (img->type == B_SLICE)
+  {
+    //different nal header for different partitions
+    if(input->partition_mode == 0)
+    {
+    nalu->nal_unit_type = NALU_TYPE_SLICE;
+    }
+    else
+    {
+      nalu->nal_unit_type = NALU_TYPE_DPA +  partition;
+    }
+
+    if (img->nal_reference_idc !=0)
+    {
+      nalu->nal_reference_idc = NALU_PRIORITY_HIGH;
+    }
+    else
+    {
+      nalu->nal_reference_idc = NALU_PRIORITY_DISPOSABLE;
+    }
+  }
+  else   // non-b frame, non IDR slice
+  {
+    //different nal header for different partitions
+    if(input->partition_mode == 0)
+    {
+     nalu->nal_unit_type = NALU_TYPE_SLICE;
+    }
+    else
+    {
+     nalu->nal_unit_type = NALU_TYPE_DPA +  partition;
+    }
+    if (img->nal_reference_idc !=0)
+    {
+      nalu->nal_reference_idc = NALU_PRIORITY_HIGH;
+    }
+    else
+    {
+      nalu->nal_reference_idc = NALU_PRIORITY_DISPOSABLE;
+    }
+  }
+  nalu->forbidden_bit = 0;
+  stats->bit_ctr += WriteNALU (nalu);
+
+  FreeNALU(nalu);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    performs multi-pass encoding of same picture using different
+ *    coding conditions
+ ************************************************************************
+ */
+
+static void rdPictureCoding(void)
+{
+  int second_qp = img->qp, rd_qp = img->qp;
+  int previntras = intras;
+  int prevtype = img->type;
+  int skip_encode = 0;
+  pic_parameter_set_rbsp_t *sec_pps;
+  int tmpFrameQP = img->SumFrameQP;
+
+  if ( input->RCEnable )
+  {
+    // save the current RC model as preliminary best
+    copy_rc_jvt( quadratic_RC_best, quadratic_RC );
+    copy_rc_generic( generic_RC_best, generic_RC );
+  }
+
+  if (img->type!=I_SLICE && input->GenerateMultiplePPS)
+  {
+    if (img->type==P_SLICE)
+    {
+      if (test_wp_P_slice(0) == 1)
+      {
+        active_pps = PicParSet[1];
+      }
+      else
+      {
+        skip_encode = input->RDPSliceWeightOnly;
+        active_pps = PicParSet[0];
+        if (!img->AdaptiveRounding)
+          img->qp-=1;
+      }
+    }
+    else
+    {
+      active_pps = PicParSet[2];
+    }
+  }
+  else
+  {
+    if (!img->AdaptiveRounding)
+      img->qp-=1;
+  }
+
+  sec_pps = active_pps;
+  second_qp = img->qp;
+
+  img->write_macroblock = 0;
+
+  if (skip_encode)
+  {
+    img->rd_pass = 0;
+    enc_frame_picture2 = NULL;
+  }
+  else
+  {
+    if(input->RCEnable)
+    {
+      // re-store the initial RC model
+      copy_rc_jvt( quadratic_RC, quadratic_RC_init );
+      copy_rc_generic( generic_RC, generic_RC_init );
+
+      rc_init_pict(quadratic_RC, 1,0,1, input->GenerateMultiplePPS ? 1.0F : 0.85F );
+
+      img->qp  = updateQP(quadratic_RC, 0);
+    }
+
+    frame_picture (frame_pic_2,1);
+    img->rd_pass=picture_coding_decision(frame_pic_1, frame_pic_2, rd_qp);
+  }
+  //      update_rd_picture_contexts (img->rd_pass);
+  if (img->rd_pass==0)
+  {
+    enc_picture=enc_frame_picture;
+    if (img->type!=I_SLICE && input->GenerateMultiplePPS)
+    {
+      img->qp=rd_qp;
+      active_pps = PicParSet[0];
+    }
+    else
+    {
+      img->qp=rd_qp;
+    }
+    intras = previntras;
+    frame_pic = frame_pic_1;
+  }
+  else
+  {
+    previntras = intras;
+    frame_pic = frame_pic_2;
+    tmpFrameQP = img->SumFrameQP;
+
+    if(input->RCEnable)
+    {
+      copy_rc_jvt( quadratic_RC_best, quadratic_RC );
+      copy_rc_generic( generic_RC_best, generic_RC );
+    }
+  }
+  // Final Encoding pass - note that we should
+  // make this more flexible in a later version.
+
+  if (img->type!=I_SLICE )
+  {
+    skip_encode = 0;
+    img->qp    = rd_qp;
+
+    if (img->type == P_SLICE && (intras * 100 )/img->FrameSizeInMbs >=75)
+    {
+      img->type=I_SLICE;
+      active_pps = PicParSet[0];
+    }
+    else if (img->type==P_SLICE)
+    {
+      if (input->GenerateMultiplePPS)
+      {
+        if (test_wp_P_slice(1) == 1)
+        {
+          active_pps = PicParSet[1];
+        }
+        else if (input->RDPSliceBTest && active_sps->profile_idc != 66)
+        {
+          img->type = B_SLICE;
+          active_pps = PicParSet[0];
+        }
+        else
+        {
+          skip_encode = input->RDPSliceWeightOnly;
+          active_pps = PicParSet[0];
+          if (!img->AdaptiveRounding)
+            img->qp+=1;
+        }
+      }
+    }
+    else
+    {
+      if (input->GenerateMultiplePPS && test_wp_B_slice(0) == 1)
+      {
+        active_pps = PicParSet[1];
+      }
+      else
+      {
+        skip_encode = input->RDBSliceWeightOnly;
+        img->qp = rd_qp + (img->nal_reference_idc ? - 1 : 1);
+      }
+    }
+  }
+  else
+  {
+    active_pps = PicParSet[0];
+    if (!img->AdaptiveRounding)
+      img->qp    = (rd_qp + 1);
+  }
+
+
+  img->write_macroblock = 0;
+
+  if (skip_encode)
+  {
+    enc_frame_picture3 = NULL;
+    img->qp = rd_qp;
+  }
+  else
+  {
+    if(input->RCEnable)
+    {
+      // re-store the initial RC model
+      copy_rc_jvt( quadratic_RC, quadratic_RC_init );
+      copy_rc_generic( generic_RC, generic_RC_init );
+
+      rc_init_pict(quadratic_RC, 1,0,1, input->GenerateMultiplePPS ? 1.0F : 1.15F );
+
+      img->qp = updateQP(quadratic_RC, 0);
+    }
+
+    frame_picture (frame_pic_3,2);
+
+    if (img->rd_pass==0)
+      img->rd_pass  = 2*picture_coding_decision(frame_pic_1, frame_pic_3, rd_qp);
+    else
+      img->rd_pass +=   picture_coding_decision(frame_pic_2, frame_pic_3, rd_qp);
+
+    if ( input->RCEnable && img->rd_pass == 2 )
+    {
+      copy_rc_jvt( quadratic_RC_best, quadratic_RC );
+      copy_rc_generic( generic_RC_best, generic_RC );
+    }
+    if ( img->rd_pass == 2 )
+      tmpFrameQP = img->SumFrameQP;
+  }
+
+  //update_rd_picture_contexts (img->rd_pass);
+  if (img->rd_pass==0)
+  {
+    enc_picture = enc_frame_picture;
+    img->type   = prevtype;
+    active_pps  = PicParSet[0];
+    img->qp     = rd_qp;
+    intras      = previntras;
+  }
+  else if (img->rd_pass==1)
+  {
+    enc_picture = enc_frame_picture2;
+    img->type   = prevtype;
+    active_pps  = sec_pps;
+    img->qp     = second_qp;
+    intras      = previntras;
+  }
+  if ( input->RCEnable )
+  {
+    copy_rc_jvt( quadratic_RC, quadratic_RC_best );
+    copy_rc_generic( generic_RC, generic_RC_best );
+  }
+  img->SumFrameQP = tmpFrameQP;
+}
+
+/*!
+*************************************************************************************
+* Brief
+*     Output SP frames coefficients
+*************************************************************************************
+*/
+void output_SP_coefficients()
+{
+  int i,k;
+  FILE *SP_coeff_file;
+  if(number_sp2_frames==0)
+  {
+    if ((SP_coeff_file = fopen(input->sp_output_filename,"wb")) == NULL)
+    {
+      printf ("Fatal: cannot open SP output file '%s', exit (-1)\n", input->sp_output_filename);
+      exit (-1);
+    }
+    number_sp2_frames++;
+  }
+  else
+  {
+    if ((SP_coeff_file = fopen(input->sp_output_filename,"ab")) == NULL)
+    {
+      printf ("Fatal: cannot open SP output file '%s', exit (-1)\n", input->sp_output_filename);
+      exit (-1);
+    }
+  }
+
+  for(i=0;i<img->height;i++)
+  {
+    fwrite(lrec[i],sizeof(int),img->width,SP_coeff_file);
+  }
+  for(k=0;k<2;k++)
+  {
+    for(i=0;i<img->height_cr;i++)
+    {
+      fwrite(lrec_uv[k][i],sizeof(int),img->width_cr,SP_coeff_file);
+    }
+  }
+  fclose(SP_coeff_file);
+}
+
+/*!
+*************************************************************************************
+* Brief
+*     Read SP frames coefficients
+*************************************************************************************
+*/
+void read_SP_coefficients()
+{
+  int i,k;
+  FILE *SP_coeff_file;
+
+  if ( (input->qp2start > 0) && ( ( (img->tr ) % (2*input->qp2start) ) >=input->qp2start ))
+  {
+    if ((SP_coeff_file = fopen(input->sp2_input_filename1,"rb")) == NULL)
+    {
+      printf ("Fatal: cannot open SP input file '%s', exit (-1)\n", input->sp2_input_filename2);
+      exit (-1);
+    }
+  }
+  else
+  {
+    if ((SP_coeff_file = fopen(input->sp2_input_filename2,"rb")) == NULL)
+    {
+      printf ("Fatal: cannot open SP input file '%s', exit (-1)\n", input->sp2_input_filename1);
+      exit (-1);
+    }
+  }
+
+  if (0 != fseek (SP_coeff_file, img->size * 3/2*number_sp2_frames*sizeof(int), SEEK_SET))
+  {
+    printf ("Fatal: cannot seek in SP input file, exit (-1)\n");
+    exit (-1);
+  }
+  number_sp2_frames++;
+
+  for(i=0;i<img->height;i++)
+  {
+    if(img->width!=(int)fread(lrec[i],sizeof(int),img->width,SP_coeff_file))
+    {
+      printf ("Fatal: cannot read in SP input file, exit (-1)\n");
+      exit (-1);
+    }
+  }
+
+  for(k=0;k<2;k++)
+  {
+    for(i=0;i<img->height_cr;i++)
+    {
+      if(img->width_cr!=(int)fread(lrec_uv[k][i],sizeof(int),img->width_cr,SP_coeff_file))
+      {
+        printf ("Fatal: cannot read in SP input file, exit (-1)\n");
+        exit (-1);
+      }
+    }
+  }
+  fclose(SP_coeff_file);
+}
+
+


Index: llvm-test/MultiSource/Applications/JM/lencod/image.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/image.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/image.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,34 @@
+
+/*!
+ ************************************************************************
+ * \file image.h
+ *
+ * \brief
+ *    headers for image processing
+ *
+ * \author
+ *  Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+ *  Copyright (C) 1999  Telenor Satellite Services, Norway
+ ************************************************************************
+ */
+#ifndef _IMAGE_H_
+#define _IMAGE_H_
+
+#include "mbuffer.h"
+
+extern StorablePicture *enc_picture;
+extern StorablePicture *enc_frame_picture;
+extern StorablePicture *enc_frame_picture2;
+extern StorablePicture *enc_frame_picture3;
+extern StorablePicture *enc_top_picture;
+extern StorablePicture *enc_bottom_picture;
+
+int encode_one_frame (void);
+void report_frame_statistic(void);
+Boolean dummy_slice_too_big(int bits_slice);
+void copy_rdopt_data (int field_type);       // For MB level field/frame coding tools
+
+void UnifiedOneForthPix (StorablePicture *s);
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/img_chroma.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/img_chroma.c:1.1
*** /dev/null	Sun Feb  4 08:38:54 2007
--- llvm-test/MultiSource/Applications/JM/lencod/img_chroma.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,127 ----
+ 
+ /*!
+ *************************************************************************************
+ * \file img_chroma.c
+ *
+ * \brief
+ *    Chroma interpolation functions
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Athanasios Leontaris    <aleon at dolby.com>
+ *      - Alexis Michael Tourapis <alexis.tourapis at dolby.com>
+ *
+ *************************************************************************************
+ */
+ 
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ #include <limits.h>
+ 
+ #include "global.h"
+ #include "image.h"
+ #include "img_luma.h"
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Creates the 16 (YUV444), 32 (YUV422), or 64 (YUV420) sub-images that
+  *    contain quarter-pel samples sub-sampled at different
+  *    spatial orientationss;
+  *      enables more efficient implementation
+  *
+  * \param s
+  *    pointer to StorablePicture structure
+  s************************************************************************
+  */
+ void getSubImagesChroma( StorablePicture *s )
+ {
+   int jpad, ipad, i, j, jj, ii_plus_one, jj_plus_one, uv, k, l;
+   int weight00, weight01, weight10, weight11;
+   int ypadded_size;
+   int xpadded_size;
+   int subimages_y, subimages_x, subx, suby;
+   int maxx, maxy;
+   int size_x_minus1;
+   int size_y_minus1;
+   // multiplier factor for index to account for UV sampling ratios
+   int mul_x, mul_y;
+   int mm, kk;
+ 
+   imgpel *wBufSrc0, *wBufSrc1, *wBufDst;
+ 
+   size_x_minus1 = s->size_x_cr - 1;
+   size_y_minus1 = s->size_y_cr - 1;
+ 
+   if ( img->yuv_format == YUV420 ) {
+     subimages_x = 8;
+     subimages_y = 8;
+     mul_x = mul_y = 1;
+   }
+   else if ( img->yuv_format == YUV422 ) {
+     subimages_x = 8;
+     subimages_y = 4;
+     mul_y = 2;
+     mul_x = 1;
+   }
+   else { // YUV444
+     subimages_x = 4;
+     subimages_y = 4;
+     mul_x = mul_y = 2;
+   }
+ 
+   xpadded_size = s->size_x_cr + 2*img_pad_size_uv_x;
+   ypadded_size = s->size_y_cr + 2*img_pad_size_uv_y;
+ 
+   maxx = xpadded_size - 1;
+   maxy = ypadded_size - 1;
+ 
+   // U or V
+   for ( uv = 0; uv < 2; uv++ )
+   {
+     for ( suby = 0, k = 0; suby < subimages_y; suby++, k += mul_y )
+     {
+       int m = (8 - k);
+       mm = m * 8;
+       kk = k * 8;
+       for ( subx = 0, l = 0; subx < subimages_x; subx++, l += mul_x )
+       {
+         weight00 = m * (8-l);
+         weight01 = m * l;
+         weight10 = k * (8-l);
+         weight11 = k * l;
+         for (j = -img_pad_size_uv_y, jpad = 0; j < ypadded_size - img_pad_size_uv_y; j++, jpad++)
+         {
+           jj = iClip3(0,size_y_minus1, j);
+           jj_plus_one = iClip3(0,size_y_minus1, j + 1);
+ 
+           wBufDst = &( s->imgUV_sub[uv][suby][subx][jpad][0] );
+           wBufSrc0 = s->imgUV[uv][jj         ];
+           wBufSrc1 = s->imgUV[uv][jj_plus_one];
+ 
+           for (i = -img_pad_size_uv_x, ipad = 0; i < 0; i++, ipad++)
+           {
+             wBufDst[ipad] = (imgpel) rshift_rnd_sf(
+               mm * wBufSrc0[0] + kk * wBufSrc1[0], 6 );
+           }
+ 
+           for (i = 0; i < size_x_minus1; i++, ipad++)
+           {
+             ii_plus_one = i + 1;
+ 
+             wBufDst[ipad] = (imgpel) rshift_rnd_sf(
+               weight00 * wBufSrc0[i] + weight01 * wBufSrc0[ii_plus_one] +
+               weight10 * wBufSrc1[i] + weight11 * wBufSrc1[ii_plus_one], 6 );
+           }
+           for (i = size_x_minus1; i < xpadded_size - img_pad_size_uv_x; i++, ipad++)
+           {
+             wBufDst[ipad] = (imgpel) rshift_rnd_sf(
+               mm * wBufSrc0[size_x_minus1] + kk * wBufSrc1[size_x_minus1], 6 );
+           }
+         }
+       }
+     }
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/img_chroma.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/img_chroma.h:1.1
*** /dev/null	Sun Feb  4 08:38:54 2007
--- llvm-test/MultiSource/Applications/JM/lencod/img_chroma.h	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,23 ----
+ /*!
+  ***************************************************************************
+  * \file
+  *    img_chroma.h
+  *
+  * \author
+  *    Athanasios Leontaris           <aleon at dolby.com>
+  *    Alexis Michael Tourapis        <alexis.tourapis at dolby.com>
+  *
+  * \date
+  *    4. October 2006
+  *
+  * \brief
+  *    Headerfile for luma interpolation functions
+  **************************************************************************
+  */
+ 
+ #ifndef _IMG_CHROMA_H_
+ #define _IMG_CHROMA_H_
+ 
+ void getSubImagesChroma( StorablePicture *s );
+ 
+ #endif // _IMG_CHROMA_H_


Index: llvm-test/MultiSource/Applications/JM/lencod/img_luma.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/img_luma.c:1.1
*** /dev/null	Sun Feb  4 08:38:54 2007
--- llvm-test/MultiSource/Applications/JM/lencod/img_luma.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,557 ----
+ 
+ /*!
+ *************************************************************************************
+ * \file img_luma.c
+ *
+ * \brief
+ *    Luma interpolation functions
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Athanasios Leontaris    <aleon at dolby.com>
+ *      - Alexis Michael Tourapis <alexis.tourapis at dolby.com>
+ *
+ *************************************************************************************
+ */
+ 
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ #include <memory.h>
+ #include <limits.h>
+ 
+ #include "global.h"
+ #include "image.h"
+ #include "img_luma.h"
+ 
+ const int ONE_FOURTH_TAP[2][3] =
+ {
+   {20, -5, 1},  // AVC Interpolation taps
+   {20,-4, 0},   // Experimental - not valid
+ };
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Creates the 4x4 = 16 images that contain quarter-pel samples
+  *    sub-sampled at different spatial orientations;
+  *    enables more efficient implementation
+  *
+  * \param s
+  *    pointer to StorablePicture structure
+  s************************************************************************
+  */
+ void getSubImagesLuma( StorablePicture *s )
+ {
+   int i, j;
+   int jj, jpad;
+ 
+   imgpel  **imgY = s->imgY;
+   int size_x_minus1 = s->size_x - 1;
+   int size_y_minus1 = s->size_y - 1;
+ 
+   imgpel *wBufDst, *wBufSrc;
+ 
+   //  0  1  2  3
+   //  4  5  6  7
+   //  8  9 10 11
+   // 12 13 14 15
+ 
+   //// INTEGER PEL POSITIONS ////
+ 
+   // sub-image 0 [0][0]
+   // simply copy the integer pels
+   for (j = -IMG_PAD_SIZE; j < s->size_y + IMG_PAD_SIZE; j++)
+   {
+     jj = iClip3(0, size_y_minus1, j);
+     jpad = j + IMG_PAD_SIZE;
+     wBufDst = &( s->imgY_sub[0][0][jpad][IMG_PAD_SIZE] );
+     wBufSrc = imgY[jj];
+     // left IMG_PAD_SIZE
+     for (i = -IMG_PAD_SIZE; i < 0; i++)
+     {
+       wBufDst[i] = wBufSrc[0];
+     }
+     // right IMG_PAD_SIZE
+     for (i = s->size_x; i < s->size_x + IMG_PAD_SIZE; i++)
+     {
+       wBufDst[i] = wBufSrc[size_x_minus1];
+     }
+     // center 0-(s->size_x)
+     memcpy(wBufDst, wBufSrc, s->size_x * sizeof(imgpel));
+   }
+ 
+   //// HALF-PEL POSITIONS: SIX-TAP FILTER ////
+ 
+   // sub-image 2 [0][2]
+   // HOR interpolate (six-tap) sub-image [0][0]
+   getHorSubImageSixTap( s, 0, 2, 0, 0 );
+ 
+   // sub-image 8 [2][0]
+   // VER interpolate (six-tap) sub-image [0][0]
+   getVerSubImageSixTap( s, 2, 0, 0, 0, 0 );
+ 
+   // sub-image 10 [2][2]
+   // VER interpolate (six-tap) sub-image [0][2]
+   getVerSubImageSixTap( s, 2, 2, 0, 2, 1 );
+ 
+   //// QUARTER-PEL POSITIONS: BI-LINEAR INTERPOLATION ////
+ 
+   // sub-image 1 [0][1]
+   getHorSubImageBiLinear( s, 0, 1, 0, 0, 0, 2,  0 );
+   // sub-image 3 [0][3]
+   getHorSubImageBiLinear( s, 0, 3, 0, 2, 0, 0,  1 );
+   // sub-image 9 [2][1]
+   getHorSubImageBiLinear( s, 2, 1, 2, 0, 2, 2,  0 );
+   // sub-image 11 [0][3]
+   getHorSubImageBiLinear( s, 2, 3, 2, 2, 2, 0,  1 );
+ 
+   // sub-image 4 [1][0]
+   getVerSubImageBiLinear( s, 1, 0, 0, 0, 2, 0,  0 );
+   // sub-image 6 [1][2]
+   getVerSubImageBiLinear( s, 1, 2, 0, 2, 2, 2,  0 );
+ 
+   // sub-image 12 [3][0]
+   getVerSubImageBiLinear( s, 3, 0, 2, 0, 0, 0,  1 );
+   // sub-image 14 [3][2]
+   getVerSubImageBiLinear( s, 3, 2, 2, 2, 0, 2,  1 );
+ 
+   // sub-image 5 [1][1]
+   getDiagSubImageBiLinear( s, 1, 1, 0, 2, 2, 0,  0, 0, 0, 0 );
+   // sub-image 7 [1][3]
+   getDiagSubImageBiLinear( s, 1, 3, 0, 2, 2, 0,  0, 0, 0, 1 );
+   // sub-image 13 [3][1]
+   getDiagSubImageBiLinear( s, 3, 1, 2, 0, 0, 2,  0, 0, 1, 0 );
+   // sub-image 15 [3][3]
+   getDiagSubImageBiLinear( s, 3, 3, 0, 2, 2, 0,  1, 0, 0, 1 );
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Does _horizontal_ interpolation using the SIX TAP filters
+  *
+  * \param s
+  *    pointer to StorablePicture structure
+  * \param dst_x
+  *    horizontal index to sub-image being generated
+  * \param dst_y
+  *    vertical index to sub-image being generated
+  * \param src_x
+  *    horizontal index to source sub-image
+  * \param src_y
+  *    vertical index to source sub-image
+  * \param store_int
+  *    store shifted integer version of picture to temporary array for
+  *    increased fidelity during application of the six tap filter
+  ************************************************************************
+  */
+ void getHorSubImageSixTap( StorablePicture *s, int dst_y, int dst_x, int src_y, int src_x )
+ {
+   int is;
+   int jpad;
+   int ipad;
+   int ypadded_size = s->size_y + 2 * IMG_PAD_SIZE;
+   int xpadded_size = s->size_x + 2 * IMG_PAD_SIZE;
+   int maxx = xpadded_size - 1;
+ 
+   imgpel *wBufSrc, *wBufDst;
+   int *iBufDst;
+   int tap0 = ONE_FOURTH_TAP[0][0];
+   int tap1 = ONE_FOURTH_TAP[0][1];
+   int tap2 = ONE_FOURTH_TAP[0][2];
+ 
+   for (jpad = 0; jpad < ypadded_size; jpad++)
+   {
+     wBufSrc = s->imgY_sub[src_y][src_x][jpad];
+     wBufDst = s->imgY_sub[dst_y][dst_x][jpad];
+     iBufDst = imgY_sub_tmp[jpad];
+ 
+     // left padded area
+     for (ipad = 0; ipad < 2; ipad++)
+     {
+       is =
+         (tap0 * (wBufSrc[ipad]               + wBufSrc[ipad + 1]) +
+         tap1  * (wBufSrc[imax (0, ipad - 1)] + wBufSrc[ipad + 2]) +
+         tap2  * (wBufSrc[imax (0, ipad - 2)] + wBufSrc[ipad + 3]));
+ 
+       wBufDst[ipad] = (imgpel) iClip3 (0, img->max_imgpel_value, rshift_rnd_sf( is, 5 ) );
+       iBufDst[ipad] =  is;
+     }
+     // center
+     for (ipad = 2; ipad < xpadded_size - 3; ipad++)
+     {
+       is =
+         (tap0 * (wBufSrc[ipad]     + wBufSrc[ipad + 1]) +
+         tap1  * (wBufSrc[ipad - 1] + wBufSrc[ipad + 2]) +
+         tap2  * (wBufSrc[ipad - 2] + wBufSrc[ipad + 3]));
+ 
+       wBufDst[ipad] = (imgpel) iClip3 (0, img->max_imgpel_value, rshift_rnd_sf( is, 5 ) );
+       iBufDst[ipad] =  is;
+     }
+     // right padded area
+     for (ipad = xpadded_size - 3; ipad < xpadded_size; ipad++)
+     {
+       is =
+         (tap0 * (wBufSrc[ipad]     + wBufSrc[imin (maxx, ipad + 1)]) +
+         tap1  * (wBufSrc[ipad - 1] + wBufSrc[imin (maxx, ipad + 2)]) +
+         tap2  * (wBufSrc[ipad - 2] + wBufSrc[imin (maxx, ipad + 3)]));
+ 
+       wBufDst[ipad] = (imgpel) iClip3 (0, img->max_imgpel_value, rshift_rnd_sf( is, 5 ) );
+       iBufDst[ipad] =  is;
+     }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Does _vertical_ interpolation using the SIX TAP filters
+  *
+  * \param s
+  *    pointer to StorablePicture structure
+  * \param dst_x
+  *    horizontal index to sub-image being generated
+  * \param dst_y
+  *    vertical index to sub-image being generated
+  * \param src_x
+  *    horizontal index to source sub-image
+  * \param src_y
+  *    vertical index to source sub-image
+  * \param use_stored_int
+  *    use stored shifted integer version of picture to temporary array for
+  *    increased fidelity during application of the six tap filter
+  ************************************************************************
+  */
+ void getVerSubImageSixTap( StorablePicture *s, int dst_y, int dst_x, int src_y, int src_x, int use_stored_int )
+ {
+   int is;
+   int jpad;
+   int jlow1, jlow2, jhigh1, jhigh2, jhigh3;
+   int ipad;
+   int ypadded_size = s->size_y + 2 * IMG_PAD_SIZE;
+   int xpadded_size = s->size_x + 2 * IMG_PAD_SIZE;
+   int maxy = ypadded_size - 1;
+ 
+   imgpel **wxBufSrc, **wxBufDst, *wxLineDst;
+   int tap0 = ONE_FOURTH_TAP[0][0];
+   int tap1 = ONE_FOURTH_TAP[0][1];
+   int tap2 = ONE_FOURTH_TAP[0][2];
+ 
+   wxBufSrc = s->imgY_sub[src_y][src_x];
+   wxBufDst = s->imgY_sub[dst_y][dst_x];
+ 
+   if ( !use_stored_int ) { // causes code expansion but is better since we avoid too many
+     // branches within the j loop
+     // top
+     for (jpad = 0; jpad < 2; jpad++)
+     {
+       wxLineDst = wxBufDst[jpad];
+       jlow1  = imax (0, jpad - 1);
+       jlow2  = imax (0, jpad - 2);
+       jhigh1 = jpad + 1;
+       jhigh2 = jpad + 2;
+       jhigh3 = jpad + 3;
+       for (ipad = 0; ipad < xpadded_size; ipad++)
+       {
+         is =
+           (tap0 * (wxBufSrc[jpad ][ipad] + wxBufSrc[jhigh1][ipad]) +
+           tap1 *  (wxBufSrc[jlow1][ipad] + wxBufSrc[jhigh2][ipad]) +
+           tap2 *  (wxBufSrc[jlow2][ipad] + wxBufSrc[jhigh3][ipad]));
+ 
+         wxLineDst[ipad] = (imgpel) iClip3 (0, img->max_imgpel_value, rshift_rnd_sf( is, 5 ) );
+       }
+     }
+     // center
+     for (jpad = 2; jpad < ypadded_size - 3; jpad++)
+     {
+       wxLineDst = wxBufDst[jpad];
+       jlow1  = jpad - 1;
+       jlow2  = jpad - 2;
+       jhigh1 = jpad + 1;
+       jhigh2 = jpad + 2;
+       jhigh3 = jpad + 3;
+       for (ipad = 0; ipad < xpadded_size; ipad++)
+       {
+         is =
+           (tap0 * (wxBufSrc[jpad ][ipad] + wxBufSrc[jhigh1][ipad]) +
+           tap1 *  (wxBufSrc[jlow1][ipad] + wxBufSrc[jhigh2][ipad]) +
+           tap2 *  (wxBufSrc[jlow2][ipad] + wxBufSrc[jhigh3][ipad]));
+ 
+         wxLineDst[ipad] = (imgpel) iClip3 (0, img->max_imgpel_value, rshift_rnd_sf( is, 5 ) );
+       }
+     }
+ 
+     // bottom
+     for (jpad = ypadded_size - 3; jpad < ypadded_size; jpad++)
+     {
+       wxLineDst = wxBufDst[jpad];
+       jlow1  = jpad - 1;
+       jlow2  = jpad - 2;
+       jhigh1 = imin (maxy, jpad + 1);
+       jhigh2 = imin (maxy, jpad + 2);
+       jhigh3 = imin (maxy, jpad + 3);
+       for (ipad = 0; ipad < xpadded_size; ipad++)
+       {
+         is =
+           (tap0 * (wxBufSrc[jpad ][ipad] + wxBufSrc[jhigh1][ipad]) +
+           tap1 *  (wxBufSrc[jlow1][ipad] + wxBufSrc[jhigh2][ipad]) +
+           tap2 *  (wxBufSrc[jlow2][ipad] + wxBufSrc[jhigh3][ipad]));
+ 
+         wxLineDst[ipad] = (imgpel) iClip3 (0, img->max_imgpel_value, rshift_rnd_sf( is, 5 ) );
+       }
+     }
+   }
+   else
+   {
+     // top
+     for (jpad = 0; jpad < 2; jpad++)
+     {
+       wxLineDst = wxBufDst[jpad];
+       jlow1  = imax (0, jpad - 1);
+       jlow2  = imax (0, jpad - 2);
+       jhigh1 = jpad + 1;
+       jhigh2 = jpad + 2;
+       jhigh3 = jpad + 3;
+ 
+       for (ipad = 0; ipad < xpadded_size; ipad++)
+       {
+         is =
+           (tap0 * (imgY_sub_tmp[jpad ][ipad] + imgY_sub_tmp[jhigh1][ipad]) +
+           tap1 *  (imgY_sub_tmp[jlow1][ipad] + imgY_sub_tmp[jhigh2][ipad]) +
+           tap2 *  (imgY_sub_tmp[jlow2][ipad] + imgY_sub_tmp[jhigh3][ipad]));
+ 
+         wxLineDst[ipad] = (imgpel) iClip3 (0, img->max_imgpel_value, rshift_rnd_sf( is, 10 ) );
+       }
+     }
+ 
+     // center
+     for (jpad = 2; jpad < ypadded_size - 3; jpad++)
+     {
+       wxLineDst = wxBufDst[jpad];
+       jlow1  = jpad - 1;
+       jlow2  = jpad - 2;
+       jhigh1 = jpad + 1;
+       jhigh2 = jpad + 2;
+       jhigh3 = jpad + 3;
+       for (ipad = 0; ipad < xpadded_size; ipad++)
+       {
+         is =
+           (tap0 * (imgY_sub_tmp[jpad ][ipad] + imgY_sub_tmp[jhigh1][ipad]) +
+           tap1 *  (imgY_sub_tmp[jlow1][ipad] + imgY_sub_tmp[jhigh2][ipad]) +
+           tap2 *  (imgY_sub_tmp[jlow2][ipad] + imgY_sub_tmp[jhigh3][ipad]));
+ 
+         wxLineDst[ipad] = (imgpel) iClip3 (0, img->max_imgpel_value, rshift_rnd_sf( is, 10 ) );
+       }
+     }
+ 
+     // bottom
+     for (jpad = ypadded_size - 3; jpad < ypadded_size; jpad++)
+     {
+       wxLineDst = wxBufDst[jpad];
+       jlow1  = jpad - 1;
+       jlow2  = jpad - 2;
+       jhigh1 = imin (maxy, jpad + 1);
+       jhigh2 = imin (maxy, jpad + 2);
+       jhigh3 = imin (maxy, jpad + 3);
+       for (ipad = 0; ipad < xpadded_size; ipad++)
+       {
+         is =
+           (tap0 * (imgY_sub_tmp[jpad ][ipad] + imgY_sub_tmp[jhigh1][ipad]) +
+           tap1 *  (imgY_sub_tmp[jlow1][ipad] + imgY_sub_tmp[jhigh2][ipad]) +
+           tap2 *  (imgY_sub_tmp[jlow2][ipad] + imgY_sub_tmp[jhigh3][ipad]));
+ 
+         wxLineDst[ipad] = (imgpel) iClip3 (0, img->max_imgpel_value, rshift_rnd_sf( is, 10 ) );
+       }
+     }
+ 
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Does _horizontal_ interpolation using the BiLinear filter
+  *
+  * \param s
+  *    pointer to StorablePicture structure
+  * \param dst_x
+  *    horizontal index to sub-image being generated
+  * \param dst_y
+  *    vertical index to sub-image being generated
+  * \param src_x_l
+  *    horizontal index to "LEFT" source sub-image
+  * \param src_y_l
+  *    vertical index to "LEFT" source sub-image
+  * \param src_x_r
+  *    horizontal index to "RIGHT" source sub-image
+  * \param src_y_r
+  *    vertical index to "RIGHT" source sub-image
+  * \param offset
+  *    offset (either +0 or +1) for RIGHT sub-image HOR coordinate
+  ************************************************************************
+  */
+ void getHorSubImageBiLinear( StorablePicture *s, int dst_y, int dst_x, int src_y_l, int src_x_l, int src_y_r, int src_x_r, int offset )
+ {
+   int jpad;
+   int ipad;
+   int ypadded_size = s->size_y + 2 * IMG_PAD_SIZE;
+   int xpadded_size = s->size_x + 2 * IMG_PAD_SIZE;
+   int maxx = xpadded_size - 1;
+ 
+   imgpel *wBufSrcL, *wBufSrcR, *wBufDst;
+   int xpadded_size_left = maxx - offset;
+ 
+   for (jpad = 0; jpad < ypadded_size; jpad++)
+   {
+     wBufSrcL = s->imgY_sub[src_y_l][src_x_l][jpad];
+     wBufSrcR = s->imgY_sub[src_y_r][src_x_r][jpad];
+     wBufDst = s->imgY_sub[dst_y][dst_x][jpad];
+ 
+     // left padded area + center
+     for (ipad = 0; ipad < xpadded_size_left; ipad++)
+     {
+       wBufDst[ipad] = (imgpel)
+         rshift_rnd_sf( wBufSrcL[ipad] + wBufSrcR[ipad + offset], 1 );
+     }
+     // right padded area
+     for (ipad = xpadded_size_left; ipad < xpadded_size; ipad++)
+     {
+       wBufDst[ipad] = (imgpel)
+         rshift_rnd_sf( wBufSrcL[ipad] + wBufSrcR[maxx], 1 );
+     }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Does _vertical_ interpolation using the BiLinear filter
+  *
+  * \param s
+  *    pointer to StorablePicture structure
+  * \param dst_x
+  *    horizontal index to sub-image being generated
+  * \param dst_y
+  *    vertical index to sub-image being generated
+  * \param src_x_l
+  *    horizontal index to "TOP" source sub-image
+  * \param src_y_l
+  *    vertical index to "TOP" source sub-image
+  * \param src_x_r
+  *    horizontal index to "BOTTOM" source sub-image
+  * \param src_y_r
+  *    vertical index to "BOTTOM" source sub-image
+  * \param offset
+  *    offset (either +0 or +1) for BOTTOM sub-image VER coordinate
+  ************************************************************************
+  */
+ void getVerSubImageBiLinear( StorablePicture *s, int dst_y, int dst_x, int src_y_l, int src_x_l, int src_y_r, int src_x_r, int offset )
+ {
+   int jpad;
+   int ipad;
+   int ypadded_size = s->size_y + 2 * IMG_PAD_SIZE;
+   int xpadded_size = s->size_x + 2 * IMG_PAD_SIZE;
+   int maxy = ypadded_size - 1;
+ 
+   imgpel *wBufSrcL, *wBufSrcR, *wBufDst;
+   int ypadded_size_top = maxy - offset;
+ 
+   // top
+   for (jpad = 0; jpad < ypadded_size_top; jpad++)
+   {
+     wBufSrcL = s->imgY_sub[src_y_l][src_x_l][jpad];
+     wBufDst  = s->imgY_sub[dst_y][dst_x][jpad];
+     wBufSrcR = s->imgY_sub[src_y_r][src_x_r][jpad + offset];
+ 
+     for (ipad = 0; ipad < xpadded_size; ipad++)
+     {
+       wBufDst[ipad] = (imgpel)
+         rshift_rnd_sf(wBufSrcL[ipad] + wBufSrcR[ipad], 1);
+     }
+   }
+   // bottom
+   for (jpad = ypadded_size_top; jpad < ypadded_size; jpad++)
+   {
+     wBufSrcL = s->imgY_sub[src_y_l][src_x_l][jpad];
+     wBufDst  = s->imgY_sub[dst_y  ][dst_x  ][jpad];
+     wBufSrcR = s->imgY_sub[src_y_r][src_x_r][maxy];
+ 
+     for (ipad = 0; ipad < xpadded_size; ipad++)
+     {
+       wBufDst[ipad] = (imgpel)
+         rshift_rnd_sf(wBufSrcL[ipad] + wBufSrcR[ipad], 1);
+     }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Does _diagonal_ interpolation using the BiLinear filter
+  *
+  * \param s
+  *    pointer to StorablePicture structure
+  * \param dst_x
+  *    horizontal index to sub-image being generated
+  * \param dst_y
+  *    vertical index to sub-image being generated
+  * \param src_x_l
+  *    horizontal index to "TOP" source sub-image
+  * \param src_y_l
+  *    vertical index to "TOP" source sub-image
+  * \param src_x_r
+  *    horizontal index to "BOTTOM" source sub-image
+  * \param src_y_r
+  *    vertical index to "BOTTOM" source sub-image
+  * \param offset_y_l
+  *    Y offset (either +0 or +1) for TOP sub-image coordinate
+  * \param offset_x_l
+  *    X offset (either +0 or +1) for TOP sub-image coordinate
+  * \param offset_y_r
+  *    Y offset (either +0 or +1) for BOTTOM sub-image coordinate
+  * \param offset_x_r
+  *    X offset (either +0 or +1) for BOTTOM sub-image coordinate
+  ************************************************************************
+  */
+ void getDiagSubImageBiLinear( StorablePicture *s, int dst_y, int dst_x, int src_y_l, int src_x_l, int src_y_r, int src_x_r, int offset_y_l, int offset_x_l,
+                              int offset_y_r, int offset_x_r )
+ {
+   int jpad;
+   int ipad;
+   int ypadded_size = s->size_y + 2 * IMG_PAD_SIZE;
+   int xpadded_size = s->size_x + 2 * IMG_PAD_SIZE;
+   int maxx = xpadded_size - 1;
+   int maxy = ypadded_size - 1;
+ 
+   imgpel *wBufSrcL, *wBufSrcR, *wBufDst;
+   // -1 explanation: offsets can be maximally one so let's assume the worst and avoid too many checks
+   int ypadded_size_top = ypadded_size - IMG_PAD_SIZE - 1;
+ 
+   for (jpad = 0; jpad < ypadded_size_top; jpad++)
+   {
+     wBufSrcL = s->imgY_sub[src_y_l][src_x_l][jpad + offset_y_l];
+     wBufSrcR = s->imgY_sub[src_y_r][src_x_r][jpad + offset_y_r];
+     wBufDst = s->imgY_sub[dst_y][dst_x][jpad];
+ 
+     for (ipad = 0; ipad < xpadded_size; ipad++)
+     {
+       wBufDst[ipad] = (imgpel)
+         rshift_rnd_sf(wBufSrcL[imin (maxx, ipad + offset_x_l)] +
+         wBufSrcR[imin (maxx, ipad + offset_x_r)], 1);
+     }
+   }
+   for (jpad = ypadded_size_top; jpad < ypadded_size; jpad++)
+   {
+     wBufSrcL = s->imgY_sub[src_y_l][src_x_l][imin (maxy, jpad + offset_y_l)];
+     wBufSrcR = s->imgY_sub[src_y_r][src_x_r][imin (maxy, jpad + offset_y_r)];
+     wBufDst = s->imgY_sub[dst_y][dst_x][jpad];
+ 
+     for (ipad = 0; ipad < xpadded_size; ipad++)
+     {
+       wBufDst[ipad] = (imgpel)
+         rshift_rnd_sf(wBufSrcL[imin (maxx, ipad + offset_x_l)] +
+         wBufSrcR[imin (maxx, ipad + offset_x_r)], 1);
+     }
+   }
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/img_luma.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/img_luma.h:1.1
*** /dev/null	Sun Feb  4 08:38:54 2007
--- llvm-test/MultiSource/Applications/JM/lencod/img_luma.h	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,29 ----
+ /*!
+  ***************************************************************************
+  * \file
+  *    img_luma.h
+  *
+  * \author
+  *    Athanasios Leontaris           <aleon at dolby.com>
+  *    Alexis Michael Tourapis        <alexis.tourapis at dolby.com>
+  *
+  * \date
+  *    4. October 2006
+  *
+  * \brief
+  *    Headerfile for luma interpolation functions
+  **************************************************************************
+  */
+ 
+ #ifndef _IMG_LUMA_H_
+ #define _IMG_LUMA_H_
+ 
+ void getSubImagesLuma       ( StorablePicture *s );
+ void getHorSubImageSixTap   ( StorablePicture *s, int dst_y, int dst_x, int src_y, int src_x );
+ void getVerSubImageSixTap   ( StorablePicture *s, int dst_y, int dst_x, int src_y, int src_x, int use_stored_int );
+ void getHorSubImageBiLinear ( StorablePicture *s, int dst_y, int dst_x, int src_y_l, int src_x_l, int src_y_r, int src_x_r, int offset );
+ void getVerSubImageBiLinear ( StorablePicture *s, int dst_y, int dst_x, int src_y_l, int src_x_l, int src_y_r, int src_x_r, int offset );
+ void getDiagSubImageBiLinear( StorablePicture *s, int dst_y, int dst_x, int src_y_l, int src_x_l, int src_y_r, int src_x_r, int offset_y_l, int offset_x_l,
+                               int offset_y_r, int offset_x_r );
+ 
+ #endif // _IMG_LUMA_H_


Index: llvm-test/MultiSource/Applications/JM/lencod/intrarefresh.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/intrarefresh.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/intrarefresh.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,136 @@
+
+/*!
+ *****************************************************************************
+ *
+ * \file intrarefresh.c
+ *
+ * \brief
+ *    Encoder support for pseudo-random intra macroblock refresh
+ *
+ * \date
+ *    16 June 2002
+ *
+ * \author
+ *    Stephan Wenger   stewe at cs.tu-berlin.de
+ *****************************************************************************/
+
+#include <stdlib.h>
+#include <assert.h>
+
+#include "global.h"
+
+static int *RefreshPattern;
+static int *IntraMBs;
+static int WalkAround = 0;
+static int NumberOfMBs = 0;
+static int NumberIntraPerPicture;
+
+/*!
+ ************************************************************************
+ * \brief
+ *    RandomIntraInit: Initializes Random Intra module.  Should be called
+ *    only after initialization (or changes) of the picture size or the
+ *    random intra refresh value.  In version jm2.1 it is impossible to
+ *    change those values on-the-fly, hence RandomIntraInit should be
+ *    called immediately after the parsing of the config file
+ *
+ * \par Input:
+ *    xsize, ysize: size of the picture (in MBs)
+ *    refresh     : refresh rate in MBs per picture
+ ************************************************************************
+ */
+
+void RandomIntraInit(int xsize, int ysize, int refresh)
+{
+  int i, pos;
+
+  srand (1);      // A fixed random initializer to make things reproducible
+  NumberOfMBs = xsize * ysize;
+  NumberIntraPerPicture = refresh;
+
+  if (refresh != 0)
+  {
+    RefreshPattern = malloc (sizeof (int) * NumberOfMBs);
+    if (RefreshPattern == NULL) no_mem_exit("RandomIntraInit: RefreshPattern");
+
+    IntraMBs = malloc (sizeof (int) * refresh);
+    if (IntraMBs == NULL) no_mem_exit("RandomIntraInit: IntraMBs");
+
+    for (i= 0; i<NumberOfMBs; i++)
+      RefreshPattern[i] = -1;
+
+    for (i=0; i<NumberOfMBs; i++)
+    {
+      do
+      {
+        pos = rand() % NumberOfMBs;
+      } while (RefreshPattern [pos] != -1);
+      RefreshPattern [pos] = i;
+    }
+    /*
+    for (i=0; i<NumberOfMBs; i++) printf ("%d\t", RefreshPattern[i]);
+    getchar();
+    */
+  }
+  else
+  {
+    RefreshPattern = NULL;
+    IntraMBs = NULL;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    RandomIntra: Code an MB as Intra?
+ *
+ * \par Input
+ *    MacroblockNumberInScanOrder
+ * \par Output
+ *    1 if an MB should be forced to Intra, according the the
+ *      RefreshPattern
+ *    0 otherwise
+ *
+ ************************************************************************
+ */
+
+int RandomIntra (int mb)
+{
+  int i;
+
+  for (i=0; i<NumberIntraPerPicture; i++)
+    if (IntraMBs[i] == mb)
+      return 1;
+  return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    RandomIntraNewPicture: Selects new set of MBs for forced Intra
+ *
+ * \par
+ *    This function should be called exactly once per picture, and
+ *    requires a finished initialization
+ *
+ ************************************************************************
+ */
+
+void RandomIntraNewPicture ()
+{
+  int i, j;
+
+  WalkAround += NumberIntraPerPicture;
+  for (j=0,i=WalkAround; j<NumberIntraPerPicture; j++, i++)
+    IntraMBs[j] = RefreshPattern [i%NumberOfMBs];
+}
+
+void RandomIntraUninit()
+{
+  if (NumberIntraPerPicture >0 )
+  {
+    free(RefreshPattern);
+    free(IntraMBs);
+  }
+}


Index: llvm-test/MultiSource/Applications/JM/lencod/intrarefresh.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/intrarefresh.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/intrarefresh.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,26 @@
+
+/*!
+ ***************************************************************************
+ *
+ * \file intrarefresh.h
+ *
+ * \brief
+ *    Pseudo-Raqndom Intra macroblock refresh support
+ *
+ * \date
+ *    16 June 2002
+ *
+ * \author
+ *    Stephan Wenger   stewe at cs.tu-berlin.de
+ **************************************************************************/
+
+#ifndef _INTRAREFRESH_H_
+#define _INTRAREFRESH_H_
+
+void RandomIntraInit(int xsize, int ysize, int refresh);
+void RandomIntraUninit(void);
+int RandomIntra (int mb);   //! returns 1 for MBs that need forced Intra
+void RandomIntraNewPicture (void);  //! to be called once per picture
+
+
+#endif //_INTRAREFRESH_H_


Index: llvm-test/MultiSource/Applications/JM/lencod/leaky_bucket.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/leaky_bucket.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/leaky_bucket.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,292 @@
+
+/*!
+ ***************************************************************************
+ * \file leaky_bucket.c
+ *
+ * \brief
+ *    calculate Leaky Buffer parameters
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Shankar Regunathan                   <shanre at microsoft.com>
+ ***************************************************************************
+ */
+#include <stdlib.h>
+
+#include "contributors.h"
+#include "global.h"
+
+#ifdef _LEAKYBUCKET_
+
+//long Bit_Buffer[20000];
+unsigned long total_frame_buffer = 0;
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ *   Function to get Leaky Bucket rates from rate file
+ * \param NumberLeakyBuckets
+ *    Number of Leaky Bucket Parameters
+ * \param Rmin
+ *    Rate values for each Bucket.
+ * \return
+ *    returns 1 if successful; else returns zero.
+ * \para SideEffects
+ *     None.
+ * \para Notes
+ *     Failure if LeakyBucketRate is missing or if it does not have
+ *     the correct number of entries.
+ * \author
+ *    Shankar Regunathan                   shanre at microsoft.com
+ *  \date
+ *      December 06, 2001.
+ ***********************************************************************
+ */
+
+int get_LeakyBucketRate(unsigned long NumberLeakyBuckets, unsigned long *Rmin)
+{
+  FILE *f;
+  unsigned long i, buf;
+
+  if((f = fopen(input->LeakyBucketRateFile, "r")) == NULL)
+  {
+    printf(" LeakyBucketRate File does not exist. Using rate calculated from avg. rate \n");
+    return 0;
+  }
+
+  for(i=0; i<NumberLeakyBuckets; i++)
+  {
+    if(1 != fscanf(f, "%lu", &buf))
+    {
+      printf(" Leaky BucketRateFile does not have valid entries.\n Using rate calculated from avg. rate \n");
+      fclose (f);
+      return 0;
+    }
+    Rmin[i] = buf;
+  }
+  fclose (f);
+  return 1;
+}
+/*!
+ ***********************************************************************
+ * \brief
+ *   Writes one unsigned long word in big endian order to a file.
+ * \param dw
+ *    Value to be written
+ * \param fp
+ *    File pointer
+ * \return
+ *    None.
+ * \para SideEffects
+ *     None.
+ * \author
+ *    Shankar Regunathan                   shanre at microsoft.com
+ *  \date
+ *      December 06, 2001.
+ ***********************************************************************
+ */
+
+void PutBigDoubleWord(unsigned long dw, FILE *fp)
+{
+  fputc((dw >> 0x18) & 0xFF, fp);
+  fputc((dw >> 0x10) & 0xFF, fp);
+  fputc((dw >> 0x08) & 0xFF, fp);
+  fputc(dw & 0xFF, fp);
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *   Stores the Leaky BucketParameters in file input->LeakyBucketParamFile.
+ * \param NumberLeakyBuckets
+ *    Number of LeakyBuckets.
+ * \param Rmin
+ *    Rate values of the buckets.
+ * \param Bmin
+ *    Minimum buffer values of the buckets.
+ *  \param Fmin
+ *     Minimum initial buffer fullness of the buckets
+ * \return
+ *    None.
+ * \para
+ *    Returns error if LeakyBucketParamFile cannot be opened.
+ * \para SideEffects
+ *     Prints the LeakyBucket Parameters in standard output.
+ * \author
+ *    Shankar Regunathan                   shanre at microsoft.com
+ *  \date
+ *      December 06, 2001.
+ ***********************************************************************
+ */
+
+
+void write_buffer(unsigned long NumberLeakyBuckets, unsigned long Rmin[], unsigned long Bmin[], unsigned long Fmin[])
+{
+  FILE *outf;
+  unsigned long iBucket;
+
+  if ((outf=fopen(input->LeakyBucketParamFile,"wb"))==NULL)
+  {
+    snprintf(errortext, ET_SIZE, "Error open file lk %s  \n",input->LeakyBucketParamFile);
+    error(errortext,1);
+  }
+
+  PutBigDoubleWord(NumberLeakyBuckets, outf);
+  for(iBucket =0; iBucket < NumberLeakyBuckets; iBucket++)
+  {
+    //assert(Rmin[iBucket]<4294967296); //Overflow should be corrected already.
+    //assert(Bmin[iBucket]<4294967296);
+    //assert(Fmin[iBucket]<4294967296);
+    PutBigDoubleWord(Rmin[iBucket], outf);
+    PutBigDoubleWord(Bmin[iBucket], outf);
+    PutBigDoubleWord(Fmin[iBucket], outf);
+  }
+  fclose(outf);
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Sorts the rate array in ascending order.
+ * \param NumberLeakyBuckets
+ *    Number of LeakyBuckets.
+ * \param Rmin
+ *    Rate values of the buckets.
+ * \return
+ *    None.
+ * \author
+ *    Shankar Regunathan                   shanre at microsoft.com
+ *  \date
+ *      December 06, 2001.
+ ***********************************************************************
+ */
+
+
+void Sort(unsigned long NumberLeakyBuckets, unsigned long *Rmin)
+{
+  unsigned long i, j;
+  unsigned long temp;
+  for(i=0; i< NumberLeakyBuckets-1; i++)
+  {
+    for(j=i+1; j<NumberLeakyBuckets; j++)
+    {
+      if(Rmin[i] > Rmin[j]) {
+        temp = Rmin[i];
+        Rmin[i] = Rmin[j];
+        Rmin[j] = temp;
+      }
+    }
+  }
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Main Routine to calculate Leaky Buffer parameters
+ * \param NumberLeakyBuckets
+ *    None.
+ * \return
+ *    None.
+ * \author
+ *    Shankar Regunathan                   shanre at microsoft.com
+ *  \date
+ *      December 06, 2001.
+ ***********************************************************************
+ */
+
+void calc_buffer()
+{
+  unsigned long AvgRate, TotalRate, NumberLeakyBuckets;
+  long *buffer_frame, minB;
+  unsigned long iBucket, iFrame,  FrameIndex = 0;
+  long maxBuffer, actualBuffer, InitFullness, iChannelRate;
+  unsigned long *Rmin, *Bmin, *Fmin;
+
+  fprintf(stdout,"-------------------------------------------------------------------------------\n");
+  printf(" Total Frames:  %ld (%d) \n", total_frame_buffer, input->no_frames);
+  NumberLeakyBuckets = (unsigned long) input->NumberLeakyBuckets;
+  buffer_frame = calloc(total_frame_buffer+1, sizeof(long));
+  if(!buffer_frame)
+    no_mem_exit("init_buffer: buffer_frame");
+  Rmin = calloc(NumberLeakyBuckets, sizeof(unsigned long));
+  if(!Rmin)
+    no_mem_exit("init_buffer: Rmin");
+  Bmin = calloc(NumberLeakyBuckets, sizeof(unsigned long));
+  if(!Bmin)
+    no_mem_exit("init_buffer: Bmin");
+  Fmin = calloc(NumberLeakyBuckets, sizeof(unsigned long));
+  if(!Fmin)
+    no_mem_exit("init_buffer: Fmin");
+
+  TotalRate = 0;
+  for(iFrame=0; iFrame < total_frame_buffer; iFrame++)
+  {
+    TotalRate += (unsigned long) Bit_Buffer[iFrame];
+  }
+  AvgRate = (unsigned long) ((float) TotalRate/ total_frame_buffer);
+
+  if(1 != get_LeakyBucketRate(NumberLeakyBuckets, Rmin))
+  { /* if rate file is not present, use default calculated from avg.rate */
+    for(iBucket=0; iBucket < NumberLeakyBuckets; iBucket++)
+    {
+      if(iBucket == 0)
+        Rmin[iBucket] = (unsigned long)((float) AvgRate * img->framerate)/(input->jumpd+1); /* convert bits/frame to bits/second */
+      else
+        Rmin[iBucket] = (unsigned long) ((float) Rmin[iBucket-1] + (AvgRate/4) * (img->framerate) / (input->jumpd+1));
+    }
+  }
+  Sort(NumberLeakyBuckets, Rmin);
+
+  maxBuffer = AvgRate * 20; /* any initialization is good. */
+  for(iBucket=0; iBucket< NumberLeakyBuckets; iBucket++)
+  {
+    iChannelRate = (long) (Rmin[iBucket] * (input->jumpd+1)/(img->framerate)); /* converts bits/second to bits/frame */
+    /* To calculate initial buffer size */
+    InitFullness = maxBuffer; /* set Initial Fullness to be buffer size */
+    buffer_frame[0] = InitFullness;
+    minB = maxBuffer;
+
+    for(iFrame=0; iFrame<total_frame_buffer ; iFrame++)
+    {
+      buffer_frame[iFrame] = buffer_frame[iFrame] - Bit_Buffer[iFrame];
+      if(buffer_frame[iFrame] < minB)
+      {
+        minB = buffer_frame[iFrame];
+        FrameIndex = iFrame;
+      }
+
+      buffer_frame[iFrame+1] = buffer_frame[iFrame] + iChannelRate;
+      if(buffer_frame[iFrame+1] > maxBuffer)
+        buffer_frame[iFrame+1] = maxBuffer;
+    }
+    actualBuffer = (maxBuffer - minB);
+
+    /* To calculate initial buffer Fullness */
+    InitFullness = Bit_Buffer[0];
+    buffer_frame[0] = InitFullness;
+    for(iFrame=0; iFrame < FrameIndex+1; iFrame++)
+    {
+      buffer_frame[iFrame] = buffer_frame[iFrame] - Bit_Buffer[iFrame];
+      if(buffer_frame[iFrame] < 0) {
+        InitFullness -= buffer_frame[iFrame];
+        buffer_frame[iFrame] = 0;
+      }
+      buffer_frame[iFrame+1] = buffer_frame[iFrame] + iChannelRate;
+      if(buffer_frame[iFrame+1] > actualBuffer)
+        break;
+    }
+    Bmin[iBucket] = (unsigned long) actualBuffer;
+    Fmin[iBucket] = (unsigned long) InitFullness;
+  }
+
+  write_buffer(NumberLeakyBuckets, Rmin, Bmin, Fmin);
+
+  free(buffer_frame);
+  free(Rmin);
+  free(Bmin);
+  free(Fmin);
+  return;
+}
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/leaky_bucket.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/leaky_bucket.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/leaky_bucket.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,29 @@
+
+/*!
+ ***************************************************************************
+ *
+ * \file leaky_bucket.h
+ *
+ * \brief
+ *    Header for Leaky Buffer parameters
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Shankar Regunathan                   <shanre at microsoft.com>
+ **************************************************************************/
+
+#ifndef _LEAKY_BUCKET_H_
+#define _LEAKY_BUCKET_H_
+
+
+/* Leaky Bucket Parameter Optimization */
+#ifdef _LEAKYBUCKET_
+int get_LeakyBucketRate(unsigned long NumberLeakyBuckets, unsigned long *Rmin);
+void PutBigDoubleWord(unsigned long dw, FILE *fp);
+void write_buffer(unsigned long NumberLeakyBuckets, unsigned long Rmin[], unsigned long Bmin[], unsigned long Fmin[]);
+void Sort(unsigned long NumberLeakyBuckets, unsigned long *Rmin);
+void calc_buffer();
+#endif
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/lencod.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/lencod.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/lencod.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,2751 @@
+
+/*!
+ ***********************************************************************
+ *  \mainpage
+ *     This is the H.264/AVC encoder reference software. For detailed documentation
+ *     see the comments in each file.
+ *
+ *  \author
+ *     The main contributors are listed in contributors.h
+ *
+ *  \version
+ *     JM 12.1 (FRExt)
+ *
+ *  \note
+ *     tags are used for document system "doxygen"
+ *     available at http://www.doxygen.org
+ */
+/*!
+ *  \file
+ *     lencod.c
+ *  \brief
+ *     H.264/AVC reference encoder project main()
+ *  \author
+ *   Main contributors (see contributors.h for copyright, address and affiliation details)
+ *   - Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+ *   - Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+ *   - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+ *   - Jani Lainema                    <jani.lainema at nokia.com>
+ *   - Byeong-Moon Jeon                <jeonbm at lge.com>
+ *   - Yoon-Seong Soh                  <yunsung at lge.com>
+ *   - Thomas Stockhammer              <stockhammer at ei.tum.de>
+ *   - Detlev Marpe                    <marpe at hhi.de>
+ *   - Guido Heising                   <heising at hhi.de>
+ *   - Valeri George                   <george at hhi.de>
+ *   - Karsten Suehring                <suehring at hhi.de>
+ *   - Alexis Michael Tourapis         <alexismt at ieee.org>
+ ***********************************************************************
+ */
+
+#include "contributors.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+#include <sys/timeb.h>
+#include "global.h"
+
+#include "configfile.h"
+#include "leaky_bucket.h"
+#include "memalloc.h"
+#include "intrarefresh.h"
+#include "fmo.h"
+#include "sei.h"
+#include "parset.h"
+#include "image.h"
+#include "output.h"
+
+#include "me_epzs.h"
+#include "me_umhex.h"
+#include "me_umhexsmp.h"
+
+#include "ratectl.h"
+#include "rc_quadratic.h"
+#include "explicit_gop.h"
+
+#define JM      "12 (FRExt)"
+#define VERSION "12.1"
+#define EXT_VERSION "(FRExt)"
+
+InputParameters inputs,      *input = &inputs;
+ImageParameters images,      *img   = &images;
+StatParameters  statistics,  *stats = &statistics;
+SNRParameters   snrs,        *snr   = &snrs;
+Decoders decoders, *decs=&decoders;
+
+static void information_init(void);
+
+#ifdef _ADAPT_LAST_GROUP_
+int initial_Bframes = 0;
+#endif
+
+Boolean In2ndIGOP = FALSE;
+int    start_frame_no_in_this_IGOP = 0;
+int    start_tr_in_this_IGOP = 0;
+int    FirstFrameIn2ndIGOP=0;
+int    cabac_encoding = 0;
+int    frame_statistic_start;
+extern ColocatedParams *Co_located;
+extern double *mb16x16_cost_frame;
+static char DistortionType[3][20] = {"SAD","SSE","Hadamard SAD"};
+
+void Init_Motion_Search_Module (void);
+void Clear_Motion_Search_Module (void);
+void report_frame_statistic(void);
+void SetLevelIndices(void);
+void chroma_mc_setup(void);
+
+void init_stats(void)
+{
+  stats->successive_Bframe = input->successive_Bframe;
+  stats->bit_ctr_I = 0;
+  stats->bit_ctr_P = 0;
+  stats->bit_ctr_B = 0;
+  snr->snr_y = 0.0;
+  snr->snr_u = 0.0;
+  snr->snr_v = 0.0;
+  snr->snr_y1 = 0.0;
+  snr->snr_u1 = 0.0;
+  snr->snr_v1 = 0.0;
+  snr->snr_ya = 0.0;
+  snr->snr_ua = 0.0;
+  snr->snr_va = 0.0;
+  snr->sse_y  = 0.0;
+  snr->sse_u  = 0.0;
+  snr->sse_v  = 0.0;
+  snr->msse_y = 0.0;
+  snr->msse_u = 0.0;
+  snr->msse_v = 0.0;
+  snr->frame_ctr = 0;
+}
+/*!
+ ***********************************************************************
+ * \brief
+ *    Main function for encoder.
+ * \param argc
+ *    number of command line arguments
+ * \param argv
+ *    command line arguments
+ * \return
+ *    exit code
+ ***********************************************************************
+ */
+int main(int argc,char **argv)
+{
+  int M,N,n,np,nb;           //Rate control
+  int primary_disp = 0;
+  giRDOpt_B8OnlyFlag = 0;
+
+  p_dec = p_in = -1;
+
+  p_stat = p_log = p_trace = NULL;
+
+  frame_statistic_start = 1;
+
+  Configure (argc, argv);
+
+  Init_QMatrix();
+
+  Init_QOffsetMatrix();
+
+  AllocNalPayloadBuffer();
+
+  init_poc();
+  GenerateParameterSets();
+  SetLevelIndices();
+
+  init_img();
+  frame_pic_1= malloc_picture();
+
+  if (input->RDPictureDecision)
+  {
+    frame_pic_2 = malloc_picture();
+    frame_pic_3 = malloc_picture();
+  }
+
+  if (input->si_frame_indicator)
+  {
+    si_frame_indicator=0; //indicates whether the frame is SP or SI
+    number_sp2_frames=0;
+
+    frame_pic_si = malloc_picture();//picture buffer for the encoded SI picture
+    //allocation of lrec and lrec_uv for SI picture
+    get_mem2Dint (&lrec, img->height, img->width);
+    get_mem3Dint (&lrec_uv, 2, img->height, img->width);
+  }
+
+  if (input->PicInterlace != FRAME_CODING)
+  {
+    top_pic = malloc_picture();
+    bottom_pic = malloc_picture();
+  }
+  init_rdopt ();
+#ifdef _LEAKYBUCKET_
+  Bit_Buffer = malloc((input->no_frames * (input->successive_Bframe + 1) + 1) * sizeof(long));
+#endif
+
+  if (input->HierarchicalCoding )
+  {
+    init_gop_structure();
+    if (input->successive_Bframe && input->HierarchicalCoding == 3)
+    {
+      interpret_gop_structure();
+    }
+    else
+    {
+      create_hierarchy();
+    }
+  }
+
+  dpb.init_done = 0;
+  init_dpb();
+  init_out_buffer();
+  init_stats();
+
+  enc_picture = enc_frame_picture = enc_top_picture = enc_bottom_picture = NULL;
+
+  init_global_buffers();
+
+  create_context_memory ();
+
+  Init_Motion_Search_Module ();
+
+  information_init();
+
+  //Rate control
+  if (input->RCEnable)
+     rc_init_seq(quadratic_RC);
+
+  if(input->SearchMode == UM_HEX)
+    UMHEX_DefineThreshold();
+
+  // Init frame type counter. Only supports single slice per frame.
+  memset(frame_ctr, 0, 5 * sizeof(int));
+
+  img->last_valid_reference = 0;
+  tot_time=0;                 // time for total encoding session
+
+#ifdef _ADAPT_LAST_GROUP_
+  if (input->last_frame > 0)
+    input->no_frames = 1 + (input->last_frame + input->jumpd) / (input->jumpd + 1);
+  initial_Bframes = input->successive_Bframe;
+#endif
+
+  PatchInputNoFrames();
+
+  // Write sequence header (with parameter sets)
+  stats->bit_ctr_parametersets = 0;
+  stats->bit_slice = start_sequence();
+  stats->bit_ctr_parametersets += stats->bit_ctr_parametersets_n;
+  start_frame_no_in_this_IGOP = 0;
+
+  if ( input->ChromaMCBuffer )
+    chroma_mc_setup();
+
+  for (img->number=0; img->number < input->no_frames; img->number++)
+  {
+    //img->nal_reference_idc = 1;
+    if (input->intra_period)
+      img->nal_reference_idc = ((IMG_NUMBER % input->intra_period) && input->DisposableP) ? (img->number + 1)% 2 : 1;
+    else
+      img->nal_reference_idc = (img->number && input->DisposableP) ? (img->number + 1)% 2 : 1;
+
+    //much of this can go in init_frame() or init_field()?
+    //poc for this frame or field
+    img->toppoc = (input->intra_period && input->idr_enable ? IMG_NUMBER % input->intra_period : IMG_NUMBER) * (2*(input->jumpd+1));
+
+    if ((input->PicInterlace==FRAME_CODING)&&(input->MbInterlace==FRAME_CODING))
+      img->bottompoc = img->toppoc;     //progressive
+    else
+      img->bottompoc = img->toppoc+1;   //hard coded
+
+    img->framepoc = imin (img->toppoc, img->bottompoc);
+
+    //frame_num for this frame
+    //if (input->BRefPictures== 0 || input->successive_Bframe == 0 || img-> number < 2)
+    if ((input->BRefPictures != 1 &&  input->HierarchicalCoding == 0) || input->successive_Bframe == 0 || img-> number < 2)// ||  input->HierarchicalCoding == 0)
+    {
+      if (input->intra_period && input->idr_enable)
+      {
+        img->frame_num =  ((IMG_NUMBER - primary_disp)  % input->intra_period ) % (1 << (log2_max_frame_num_minus4 + 4));
+        if (IMG_NUMBER % input->intra_period  == 0)
+        {
+          img->frame_num = 0;
+          primary_disp   = 0;
+        }
+      }
+      else
+      img->frame_num = (IMG_NUMBER - primary_disp) % (1 << (log2_max_frame_num_minus4 + 4));
+
+    }
+    else
+    {
+      //img->frame_num ++;
+      if (input->intra_period && input->idr_enable)
+      {
+        if (0== (img->number % input->intra_period))
+        {
+          img->frame_num=0;
+          primary_disp   = 0;
+        }
+      }
+      img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4));
+    }
+
+    //the following is sent in the slice header
+    img->delta_pic_order_cnt[0]=0;
+
+    if (input->BRefPictures == 1)
+    {
+      if (img->number)
+      {
+        img->delta_pic_order_cnt[0]=+2 * input->successive_Bframe;
+      }
+    }
+
+    SetImgType();
+
+    if (input->ResendSPS == 1 && img->type == I_SLICE && img->number != 0)
+    {
+      stats->bit_slice = rewrite_paramsets();
+      stats->bit_ctr_parametersets += stats->bit_ctr_parametersets_n;
+    }
+
+
+#ifdef _ADAPT_LAST_GROUP_
+    if (input->successive_Bframe && input->last_frame && IMG_NUMBER+1 == input->no_frames)
+    {
+      int bi = (int)((float)(input->jumpd+1)/(input->successive_Bframe+1.0)+0.499999);
+
+      input->successive_Bframe = (input->last_frame-(img->number-1)*(input->jumpd+1))/bi-1;
+
+      //about to code the last ref frame, adjust delta poc
+      img->delta_pic_order_cnt[0]= -2*(initial_Bframes - input->successive_Bframe);
+      img->toppoc += img->delta_pic_order_cnt[0];
+      img->bottompoc += img->delta_pic_order_cnt[0];
+      img->framepoc = imin (img->toppoc, img->bottompoc);
+    }
+#endif
+
+     //Rate control
+    if (img->type == I_SLICE && ((input->RCUpdateMode != RC_MODE_1 && input->RCUpdateMode != RC_MODE_3) || (!IMG_NUMBER) ) )
+    {
+      if(input->RCEnable)
+      {
+        if (input->intra_period == 0)
+        {
+          n = input->no_frames + (input->no_frames - 1) * input->successive_Bframe;
+
+          /* number of P frames */
+          np = input->no_frames-1;
+
+          /* number of B frames */
+          nb = (input->no_frames - 1) * input->successive_Bframe;
+        }
+        else if ( input->RCUpdateMode != RC_MODE_1 && input->RCUpdateMode != RC_MODE_3 )
+        {
+          N = input->intra_period*(input->successive_Bframe+1);
+          M = input->successive_Bframe+1;
+          n = (img->number==0) ? N - ( M - 1) : N;
+
+          /* last GOP may contain less frames */
+          if(img->number/input->intra_period >= input->no_frames / input->intra_period)
+          {
+            if (img->number != 0)
+              n = (input->no_frames - img->number) + (input->no_frames - img->number - 1) * input->successive_Bframe + input->successive_Bframe;
+            else
+              n = input->no_frames  + (input->no_frames - 1) * input->successive_Bframe;
+          }
+
+          /* number of P frames */
+          if (img->number == 0)
+            np = (n + 2 * (M - 1)) / M - 1; /* first GOP */
+          else
+            np = (n + (M - 1)) / M - 1;
+
+          /* number of B frames */
+          nb = n - np - 1;
+        }
+        else // applies RC to I and B slices
+        {
+          np = input->no_frames - 1; // includes I and P slices/frames except the very first IDR I_SLICE
+          nb = (input->no_frames - 1) * input->successive_Bframe;
+        }
+        rc_init_GOP(quadratic_RC,np,nb);
+      }
+    }
+
+
+    // which layer the image belonged to?
+    if ( IMG_NUMBER % (input->NumFramesInELSubSeq+1) == 0 )
+      img->layer = 0;
+    else
+      img->layer = 1;
+
+    // redundant frame initialization and allocation
+    if(input->redundant_pic_flag)
+    {
+      Init_redundant_frame();
+      Set_redundant_frame();
+    }
+
+    encode_one_frame(); // encode one I- or P-frame
+
+    // if key frame is encoded, encode one redundant frame
+    if(input->redundant_pic_flag && key_frame)
+    {
+      encode_one_redundant_frame();
+    }
+
+    if (img->type == I_SLICE && input->EnableOpenGOP)
+      img->last_valid_reference = img->ThisPOC;
+
+    if (input->ReportFrameStats)
+      report_frame_statistic();
+
+    if (img->nal_reference_idc == 0)
+    {
+      primary_disp ++;
+      img->frame_num -= 1;
+      img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4));
+    }
+    encode_enhancement_layer();
+
+    process_2nd_IGOP();
+  }
+  // terminate sequence
+  terminate_sequence();
+
+  flush_dpb();
+
+  close(p_in);
+  if (-1!=p_dec)
+    close(p_dec);
+  if (p_trace)
+    fclose(p_trace);
+
+  Clear_Motion_Search_Module ();
+
+  RandomIntraUninit();
+  FmoUninit();
+
+  if (input->HierarchicalCoding)
+    clear_gop_structure ();
+
+  // free structure for rd-opt. mode decision
+  clear_rdopt ();
+
+#ifdef _LEAKYBUCKET_
+  calc_buffer();
+#endif
+
+  // report everything
+  report();
+
+#ifdef _LEAKYBUCKET_
+  free(Bit_Buffer);
+#endif
+  free_picture (frame_pic_1);
+
+  if (input->RDPictureDecision)
+  {
+    free_picture (frame_pic_2);
+    free_picture (frame_pic_3);
+  }
+
+  // Deallocation of SI picture related memory
+  if (input->si_frame_indicator)
+  {
+    free_picture (frame_pic_si);
+    //deallocation of lrec and lrec_uv for SI frames
+    free_mem2Dint (lrec);
+    free_mem3Dint (lrec_uv,2);
+  }
+
+  if (top_pic)
+    free_picture (top_pic);
+  if (bottom_pic)
+    free_picture (bottom_pic);
+
+  free_dpb();
+  free_colocated(Co_located);
+  uninit_out_buffer();
+
+  free_global_buffers();
+
+  // free image mem
+  free_img ();
+  free_context_memory ();
+  FreeNalPayloadBuffer();
+  FreeParameterSets();
+  return 0;
+}
+/*!
+ ***********************************************************************
+ * \brief
+ *    Terminates and reports statistics on error.
+ *
+ ***********************************************************************
+ */
+void report_stats_on_error(void)
+{
+  input->no_frames=img->number;
+  terminate_sequence();
+
+  flush_dpb();
+
+  close(p_in);
+  if (-1!=p_dec)
+    close(p_dec);
+
+  if (p_trace)
+    fclose(p_trace);
+
+  Clear_Motion_Search_Module ();
+
+  RandomIntraUninit();
+  FmoUninit();
+
+  if (input->HierarchicalCoding)
+    clear_gop_structure ();
+
+  // free structure for rd-opt. mode decision
+  clear_rdopt ();
+
+#ifdef _LEAKYBUCKET_
+  calc_buffer();
+#endif
+
+  if (input->ReportFrameStats)
+    report_frame_statistic();
+
+  // report everything
+  report();
+
+  free_picture (frame_pic_1);
+  if (top_pic)
+    free_picture (top_pic);
+  if (bottom_pic)
+    free_picture (bottom_pic);
+
+  free_dpb();
+  free_colocated(Co_located);
+  uninit_out_buffer();
+
+  free_global_buffers();
+
+  // free image mem
+  free_img ();
+  free_context_memory ();
+  FreeNalPayloadBuffer();
+  FreeParameterSets();
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Initializes the POC structure with appropriate parameters.
+ *
+ ***********************************************************************
+ */
+void init_poc()
+{
+  //the following should probably go in sequence parameters
+  // frame poc's increase by 2, field poc's by 1
+
+  img->pic_order_cnt_type=input->pic_order_cnt_type;
+
+  img->delta_pic_order_always_zero_flag = FALSE;
+  img->num_ref_frames_in_pic_order_cnt_cycle= 1;
+
+  if (input->BRefPictures == 1)
+  {
+    img->offset_for_non_ref_pic  =   0;
+    img->offset_for_ref_frame[0] =   2;
+  }
+  else
+  {
+    img->offset_for_non_ref_pic  =  -2*(input->successive_Bframe);
+    img->offset_for_ref_frame[0] =   2*(input->successive_Bframe+1);
+  }
+
+  if ((input->PicInterlace==FRAME_CODING)&&(input->MbInterlace==FRAME_CODING))
+    img->offset_for_top_to_bottom_field=0;
+  else
+    img->offset_for_top_to_bottom_field=1;
+
+  if ((input->PicInterlace==FRAME_CODING)&&(input->MbInterlace==FRAME_CODING))
+  {
+    img->pic_order_present_flag=FALSE;
+    img->delta_pic_order_cnt_bottom = 0;
+  }
+  else
+  {
+    img->pic_order_present_flag=TRUE;
+    img->delta_pic_order_cnt_bottom = 1;
+  }
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Initializes the img->nz_coeff
+ * \par Input:
+ *    none
+ * \par  Output:
+ *    none
+ * \ side effects
+ *    sets omg->nz_coef[][][][] to -1
+ ***********************************************************************
+ */
+void CAVLC_init(void)
+{
+  unsigned int i, k, l;
+
+  for (i=0;i < img->PicSizeInMbs; i++)
+    for (k=0;k<4;k++)
+      for (l=0;l < (4 + (unsigned int)img->num_blk8x8_uv);l++)
+        img->nz_coeff[i][k][l]=0;
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Initializes the Image structure with appropriate parameters.
+ * \par Input:
+ *    Input Parameters struct inp_par *inp
+ * \par  Output:
+ *    Image Parameters struct img_par *img
+ ***********************************************************************
+ */
+void init_img()
+{
+  int i;
+  int byte_abs_range;
+
+  static int mb_width_cr[4] = {0,8, 8,16};
+  static int mb_height_cr[4]= {0,8,16,16};
+
+  img->yuv_format = input->yuv_format;
+
+  //pel bitdepth init
+  img->bitdepth_luma   = input->BitDepthLuma;
+
+  img->bitdepth_luma_qp_scale   = 6*(img->bitdepth_luma   - 8);
+  img->bitdepth_lambda_scale    = 2*(img->bitdepth_luma   - 8);
+
+  img->dc_pred_value_luma   = 1<<(img->bitdepth_luma - 1);
+  img->max_imgpel_value = (1<<img->bitdepth_luma) - 1;
+  img->mb_size[0][0] = img->mb_size[0][1] = MB_BLOCK_SIZE;
+
+  if (img->yuv_format != YUV400)
+  {
+    img->bitdepth_chroma      = input->BitDepthChroma;
+    img->dc_pred_value_chroma = 1<<(img->bitdepth_chroma - 1);
+    img->max_imgpel_value_uv  = (1<<img->bitdepth_chroma) - 1;
+    img->num_blk8x8_uv        = (1<<img->yuv_format)&(~(0x1));
+    img->num_cdc_coeff        = img->num_blk8x8_uv<<1;
+    img->mb_size[1][0] = img->mb_size[2][0] = img->mb_cr_size_x = (img->yuv_format==YUV420 || img->yuv_format==YUV422)? 8:16;
+    img->mb_size[1][1] = img->mb_size[2][1] = img->mb_cr_size_y = (img->yuv_format==YUV444 || img->yuv_format==YUV422)? 16:8;
+
+    img->bitdepth_chroma_qp_scale = 6*(img->bitdepth_chroma - 8);
+
+    img->chroma_qp_offset[0] = active_pps->cb_qp_index_offset;
+    img->chroma_qp_offset[1] = active_pps->cr_qp_index_offset;
+  }
+  else
+  {
+    img->bitdepth_chroma     = 0;
+    img->max_imgpel_value_uv = 0;
+    img->num_blk8x8_uv       = 0;
+    img->num_cdc_coeff       = 0;
+    img->mb_size[1][0] = img->mb_size[2][0] = img->mb_cr_size_x        = 0;
+    img->mb_size[1][1] = img->mb_size[2][1] = img->mb_cr_size_y        = 0;
+
+    img->bitdepth_chroma_qp_scale = 0;
+    img->bitdepth_chroma_qp_scale = 0;
+
+    img->chroma_qp_offset[0] = 0;
+    img->chroma_qp_offset[1] = 0;
+  }
+
+  if((img->bitdepth_luma > img->bitdepth_chroma) || img->yuv_format == YUV400)
+    img->pic_unit_size_on_disk = (img->bitdepth_luma > 8)? 16:8;
+  else
+    img->pic_unit_size_on_disk = (img->bitdepth_chroma > 8)? 16:8;
+
+  img->max_bitCount =  128 + 256 * img->bitdepth_luma + 2 * img->mb_cr_size_y * img->mb_cr_size_x * img->bitdepth_chroma;
+
+  img->max_qp_delta = (25 + (img->bitdepth_luma_qp_scale>>1));
+  img->min_qp_delta = img->max_qp_delta + 1;
+
+  img->num_ref_frames = active_sps->num_ref_frames;
+  img->max_num_references   = active_sps->frame_mbs_only_flag ? active_sps->num_ref_frames : 2 * active_sps->num_ref_frames;
+
+  img->buf_cycle = input->num_ref_frames;
+
+  img->DeblockCall = 0;
+
+  img->framerate=(float) input->FrameRate;   // The basic frame rate (of the original sequence)
+
+  get_mem_mv (&(img->pred_mv));
+  get_mem_mv (&(img->all_mv));
+
+  if (input->BiPredMotionEstimation)
+  {
+    get_mem_mv (&(img->bipred_mv1));
+    get_mem_mv (&(img->bipred_mv2));
+  }
+
+  get_mem_ACcoeff (&(img->cofAC));
+  get_mem_DCcoeff (&(img->cofDC));
+
+  if (input->AdaptiveRounding)
+  {
+    get_mem3Dint(&(img->fadjust4x4), 4, MB_BLOCK_SIZE, MB_BLOCK_SIZE);
+    get_mem3Dint(&(img->fadjust8x8), 3, MB_BLOCK_SIZE, MB_BLOCK_SIZE);
+    get_mem4Dint(&(img->fadjust4x4Cr), 4, 2, img->mb_cr_size_y, img->mb_cr_size_x);
+    get_mem4Dint(&(img->fadjust8x8Cr), 1, 2, img->mb_cr_size_y, img->mb_cr_size_x);
+  }
+
+  if(input->MbInterlace)
+  {
+    get_mem_mv (&(rddata_top_frame_mb.pred_mv));
+    get_mem_mv (&(rddata_top_frame_mb.all_mv));
+
+    get_mem_mv (&(rddata_bot_frame_mb.pred_mv));
+    get_mem_mv (&(rddata_bot_frame_mb.all_mv));
+
+    get_mem_ACcoeff (&(rddata_top_frame_mb.cofAC));
+    get_mem_DCcoeff (&(rddata_top_frame_mb.cofDC));
+
+    get_mem_ACcoeff (&(rddata_bot_frame_mb.cofAC));
+    get_mem_DCcoeff (&(rddata_bot_frame_mb.cofDC));
+
+    if ( input->MbInterlace != FRAME_MB_PAIR_CODING )
+    {
+      get_mem_mv (&(rddata_top_field_mb.pred_mv));
+      get_mem_mv (&(rddata_top_field_mb.all_mv));
+
+      get_mem_mv (&(rddata_bot_field_mb.pred_mv));
+      get_mem_mv (&(rddata_bot_field_mb.all_mv));
+
+      get_mem_ACcoeff (&(rddata_top_field_mb.cofAC));
+      get_mem_DCcoeff (&(rddata_top_field_mb.cofDC));
+
+      get_mem_ACcoeff (&(rddata_bot_field_mb.cofAC));
+      get_mem_DCcoeff (&(rddata_bot_field_mb.cofDC));
+    }
+  }
+
+  if(img->max_imgpel_value > img->max_imgpel_value_uv)
+    byte_abs_range = (img->max_imgpel_value + 1) * 2;
+  else
+    byte_abs_range = (img->max_imgpel_value_uv + 1) * 2;
+
+  if ((img->quad = (int*)calloc (byte_abs_range, sizeof(int))) == NULL)
+    no_mem_exit ("init_img: img->quad");
+  img->quad+=byte_abs_range/2;
+  for (i=0; i < byte_abs_range/2; ++i)
+  {
+    img->quad[i]=img->quad[-i]=i*i;
+  }
+
+  img->width    = (input->img_width+img->auto_crop_right);
+  img->height   = (input->img_height+img->auto_crop_bottom);
+  img->width_blk  = img->width / BLOCK_SIZE;
+  img->height_blk = img->height / BLOCK_SIZE;
+
+  img->width_padded =  img->width + 2 * IMG_PAD_SIZE;
+  img->height_padded = img->height + 2 * IMG_PAD_SIZE;
+
+  if (img->yuv_format != YUV400)
+  {
+    img->width_cr = img->width/(16/mb_width_cr[img->yuv_format]);
+    img->height_cr= img->height/(16/mb_height_cr[img->yuv_format]);
+
+    input->img_width_cr  = input->img_width/(16/mb_width_cr[img->yuv_format]);
+    input->img_height_cr = input->img_height/(16/mb_height_cr[img->yuv_format]);
+  }
+  else
+  {
+    img->width_cr = 0;
+    img->height_cr= 0;
+
+    input->img_width_cr  = 0;
+    input->img_height_cr = 0;
+  }
+  img->height_cr_frame = img->height_cr;
+
+  img->size = img->width * img->height;
+  img->size_cr = img->width_cr * img->height_cr;
+
+  img->PicWidthInMbs    = img->width/MB_BLOCK_SIZE;
+  img->FrameHeightInMbs = img->height/MB_BLOCK_SIZE;
+  img->FrameSizeInMbs   = img->PicWidthInMbs * img->FrameHeightInMbs;
+
+  img->PicHeightInMapUnits = ( active_sps->frame_mbs_only_flag ? img->FrameHeightInMbs : img->FrameHeightInMbs/2 );
+
+  if(((img->mb_data) = (Macroblock *) calloc(img->FrameSizeInMbs,sizeof(Macroblock))) == NULL)
+    no_mem_exit("init_img: img->mb_data");
+
+  if(input->UseConstrainedIntraPred)
+  {
+    if(((img->intra_block) = (int*)calloc(img->FrameSizeInMbs,sizeof(int))) == NULL)
+      no_mem_exit("init_img: img->intra_block");
+  }
+
+  if (input->CtxAdptLagrangeMult == 1)
+  {
+    if ((mb16x16_cost_frame = (double*)calloc(img->FrameSizeInMbs, sizeof(double))) == NULL)
+    {
+      no_mem_exit("init mb16x16_cost_frame");
+    }
+  }
+  get_mem2D((byte***)&(img->ipredmode), img->height_blk, img->width_blk);        //need two extra rows at right and bottom
+  get_mem2D((byte***)&(img->ipredmode8x8), img->height_blk, img->width_blk);     // help storage for ipredmode 8x8, inserted by YV
+  memset(&img->ipredmode[0][0],-1, img->height_blk * img->width_blk *sizeof(char));
+  memset(&img->ipredmode8x8[0][0],-1, img->height_blk * img->width_blk *sizeof(char));
+
+  get_mem2D((byte***)&(rddata_top_frame_mb.ipredmode), img->height_blk, img->width_blk);
+
+  if(input->MbInterlace)
+  {
+    get_mem2D((byte***)&(rddata_bot_frame_mb.ipredmode), img->height_blk, img->width_blk);
+    get_mem2D((byte***)&(rddata_top_field_mb.ipredmode), img->height_blk, img->width_blk);
+    get_mem2D((byte***)&(rddata_bot_field_mb.ipredmode), img->height_blk, img->width_blk);
+  }
+  // CAVLC mem
+  get_mem3Dint(&(img->nz_coeff), img->FrameSizeInMbs, 4, 4+img->num_blk8x8_uv);
+
+
+  get_mem2Ddb_offset(&(img->lambda_md), 10, 52 + img->bitdepth_luma_qp_scale,img->bitdepth_luma_qp_scale);
+  get_mem3Ddb_offset (&(img->lambda_me), 10, 52 + img->bitdepth_luma_qp_scale, 3, img->bitdepth_luma_qp_scale);
+  get_mem3Dint_offset(&(img->lambda_mf), 10, 52 + img->bitdepth_luma_qp_scale, 3, img->bitdepth_luma_qp_scale);
+
+  if (input->CtxAdptLagrangeMult == 1)
+  {
+    get_mem2Ddb_offset(&(img->lambda_mf_factor), 10, 52 + img->bitdepth_luma_qp_scale,img->bitdepth_luma_qp_scale);
+  }
+
+  //get_mem2Ddouble(&(img->lambda_md), 10, 52 + img->bitdepth_luma_qp_scale);
+
+  CAVLC_init();
+
+
+  img->GopLevels = (input->successive_Bframe) ? 1 : 0;
+  img->mb_y_upd=0;
+
+  RandomIntraInit (img->PicWidthInMbs, img->FrameHeightInMbs, input->RandomIntraMBRefresh);
+
+  InitSEIMessages();  // Tian Dong (Sept 2002)
+
+  // Initialize filtering parameters. If sending parameters, the offsets are
+  // multiplied by 2 since inputs are taken in "div 2" format.
+  // If not sending parameters, all fields are cleared
+  if (input->LFSendParameters)
+  {
+    input->LFAlphaC0Offset <<= 1;
+    input->LFBetaOffset <<= 1;
+  }
+  else
+  {
+    input->LFDisableIdc = 0;
+    input->LFAlphaC0Offset = 0;
+    input->LFBetaOffset = 0;
+  }
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Free the Image structures
+ * \par Input:
+ *    Image Parameters struct img_par *img
+ ***********************************************************************
+ */
+void free_img ()
+{
+  CloseSEIMessages(); // Tian Dong (Sept 2002)
+  free_mem_mv (img->pred_mv);
+  free_mem_mv (img->all_mv);
+
+  if (input->BiPredMotionEstimation)
+  {
+    free_mem_mv (img->bipred_mv1);
+    free_mem_mv (img->bipred_mv2);
+  }
+
+  free_mem_ACcoeff (img->cofAC);
+  free_mem_DCcoeff (img->cofDC);
+
+  if (input->AdaptiveRounding)
+  {
+    free_mem3Dint(img->fadjust4x4, 4);
+    free_mem3Dint(img->fadjust8x8, 3);
+    free_mem4Dint(img->fadjust4x4Cr, 4, 2);
+    free_mem4Dint(img->fadjust8x8Cr, 1, 2);
+  }
+
+
+  if(input->MbInterlace)
+  {
+    free_mem_mv (rddata_top_frame_mb.pred_mv);
+    free_mem_mv (rddata_top_frame_mb.all_mv);
+
+    free_mem_mv (rddata_bot_frame_mb.pred_mv);
+    free_mem_mv (rddata_bot_frame_mb.all_mv);
+
+    free_mem_ACcoeff (rddata_top_frame_mb.cofAC);
+    free_mem_DCcoeff (rddata_top_frame_mb.cofDC);
+
+    free_mem_ACcoeff (rddata_bot_frame_mb.cofAC);
+    free_mem_DCcoeff (rddata_bot_frame_mb.cofDC);
+
+    if ( input->MbInterlace != FRAME_MB_PAIR_CODING )
+    {
+      free_mem_mv (rddata_top_field_mb.pred_mv);
+      free_mem_mv (rddata_top_field_mb.all_mv);
+
+      free_mem_mv (rddata_bot_field_mb.pred_mv);
+      free_mem_mv (rddata_bot_field_mb.all_mv);
+
+      free_mem_ACcoeff (rddata_top_field_mb.cofAC);
+      free_mem_DCcoeff (rddata_top_field_mb.cofDC);
+
+      free_mem_ACcoeff (rddata_bot_field_mb.cofAC);
+      free_mem_DCcoeff (rddata_bot_field_mb.cofDC);
+    }
+  }
+
+  if(img->max_imgpel_value > img->max_imgpel_value_uv)
+    free (img->quad-(img->max_imgpel_value + 1));
+  else
+    free (img->quad-(img->max_imgpel_value_uv + 1));
+
+  if(input->MbInterlace)
+  {
+    free_mem2D((byte**)rddata_bot_frame_mb.ipredmode);
+    free_mem2D((byte**)rddata_top_field_mb.ipredmode);
+    free_mem2D((byte**)rddata_bot_field_mb.ipredmode);
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocates the picture structure along with its dependent
+ *    data structures
+ * \return
+ *    Pointer to a Picture
+ ************************************************************************
+ */
+
+Picture *malloc_picture()
+{
+  Picture *pic;
+  if ((pic = calloc (1, sizeof (Picture))) == NULL) no_mem_exit ("malloc_picture: Picture structure");
+  //! Note: slice structures are allocated as needed in code_a_picture
+  return pic;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Frees a picture
+ * \param
+ *    pic: POinter to a Picture to be freed
+ ************************************************************************
+ */
+
+
+void free_picture(Picture *pic)
+{
+  if (pic != NULL)
+  {
+    free_slice_list(pic);
+    free (pic);
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Reports frame statistical data to a stats file
+ ************************************************************************
+ */
+void report_frame_statistic()
+{
+  FILE *p_stat_frm = NULL;
+  static int64   last_mode_use[NUM_PIC_TYPE][MAXMODE];
+  static int   last_b8_mode_0[NUM_PIC_TYPE][2];
+  static int   last_mode_chroma_use[4];
+  static int64   last_bit_ctr_n = 0;
+  int i;
+  char name[20];
+  int bitcounter;
+
+#ifndef WIN32
+  time_t now;
+  struct tm *l_time;
+  char string[1000];
+#else
+  char timebuf[128];
+#endif
+
+
+  // write to log file
+  if ((p_stat_frm=fopen("stat_frame.dat","r"))==0)                      // check if file exist
+  {
+    if ((p_stat_frm=fopen("stat_frame.dat","a"))==NULL)            // append new statistic at the end
+    {
+      snprintf(errortext, ET_SIZE, "Error open file %s  \n","stat_frame.dat.dat");
+      error(errortext, 500);
+    }
+    else                                            // Create header for new log file
+    {
+      fprintf(p_stat_frm," --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- \n");
+      fprintf(p_stat_frm,"|            Encoder statistics. This file is generated during first encoding session, new sessions will be appended                                                                                                                                                                                                                                                                                                                                                              |\n");
+      fprintf(p_stat_frm," --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- \n");
+    }
+  }
+  else
+  {
+    fclose (p_stat_frm);
+    if ((p_stat_frm=fopen("stat_frame.dat","a"))==NULL)            // File exist,just open for appending
+    {
+      snprintf(errortext, ET_SIZE, "Error open file %s  \n","stat_frame.dat.dat");
+      error(errortext, 500);
+    }
+  }
+
+  if (frame_statistic_start)
+  {
+    fprintf(p_stat_frm,"|  ver       | Date  | Time  |    Sequence        |Frm | QP |P/MbInt|   Bits   |  SNRY  |  SNRU  |  SNRV  |  I4  |  I8  | I16  | IC0  | IC1  | IC2  | IC3  | PI4  | PI8  | PI16 |  P0  |  P1  |  P2  |  P3  | P1*8*| P1*4*| P2*8*| P2*4*| P3*8*| P3*4*|  P8  | P8:4 | P4*8*| P4*4*| P8:5 | P8:6 | P8:7 | BI4  | BI8  | BI16 |  B0  |  B1  |  B2  |  B3  | B0*8*| B0*4*| B1*8*| B1*4*| B2*8*| B2*4*| B3*8*| B3*4*|  B8  | B8:0 |B80*8*|B80*4*| B8:4 | B4*8*| B4*4*| B8:5 | B8:6 | B8:7 |\n");
+    fprintf(p_stat_frm," ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- \n");
+  }
+
+  //report
+  fprintf(p_stat_frm,"|%4s/%s", VERSION, EXT_VERSION);
+
+#ifdef WIN32
+  _strdate( timebuf );
+  fprintf(p_stat_frm,"| %1.5s |",timebuf );
+
+  _strtime( timebuf);
+  fprintf(p_stat_frm," % 1.5s |",timebuf);
+#else
+  now = time ((time_t *) NULL); // Get the system time and put it into 'now' as 'calender time'
+  time (&now);
+  l_time = localtime (&now);
+  strftime (string, sizeof string, "%d-%b-%Y", l_time);
+  fprintf(p_stat_frm,"| %1.5s |",string );
+
+  strftime (string, sizeof string, "%H:%M:%S", l_time);
+  fprintf(p_stat_frm," %1.5s |",string );
+#endif
+
+  for (i=0;i<20;i++)
+    name[i]=input->infile[i + imax(0,(int) (strlen(input->infile)-20))]; // write last part of path, max 20 chars
+  fprintf(p_stat_frm,"%20.20s|",name);
+
+  fprintf(p_stat_frm,"%3d |",frame_no);
+
+  fprintf(p_stat_frm,"%3d |",img->qp);
+
+  fprintf(p_stat_frm,"  %d/%d  |",input->PicInterlace, input->MbInterlace);
+
+
+  if (img->number == 0 && img->frame_num == 0)
+  {
+    bitcounter = (int) stats->bit_ctr_I;
+  }
+  else
+  {
+    bitcounter = (int) (stats->bit_ctr_n - last_bit_ctr_n);
+    last_bit_ctr_n = stats->bit_ctr_n;
+  }
+
+  //report bitrate
+  fprintf(p_stat_frm, " %9d|", bitcounter);
+
+  //report snr's
+  fprintf(p_stat_frm, " %2.4f| %2.4f| %2.4f|", snr->snr_y, snr->snr_u, snr->snr_v);
+
+  //report modes
+  //I-Modes
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[I_SLICE][I4MB] - last_mode_use[I_SLICE][I4MB]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[I_SLICE][I8MB] - last_mode_use[I_SLICE][I8MB]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[I_SLICE][I16MB] - last_mode_use[I_SLICE][I16MB]);
+
+  //chroma intra mode
+  fprintf(p_stat_frm, " %5d|",stats->intra_chroma_mode[0] - last_mode_chroma_use[0]);
+  fprintf(p_stat_frm, " %5d|",stats->intra_chroma_mode[1] - last_mode_chroma_use[1]);
+  fprintf(p_stat_frm, " %5d|",stats->intra_chroma_mode[2] - last_mode_chroma_use[2]);
+  fprintf(p_stat_frm, " %5d|",stats->intra_chroma_mode[3] - last_mode_chroma_use[3]);
+
+  //P-Modes
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[P_SLICE][I4MB] - last_mode_use[P_SLICE][I4MB]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[P_SLICE][I8MB] - last_mode_use[P_SLICE][I8MB]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[P_SLICE][I16MB] - last_mode_use[P_SLICE][I16MB]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[P_SLICE][0   ] - last_mode_use[P_SLICE][0   ]);
+
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[P_SLICE][1   ] - last_mode_use[P_SLICE][1   ]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[P_SLICE][2   ] - last_mode_use[P_SLICE][2   ]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[P_SLICE][3   ] - last_mode_use[P_SLICE][3   ]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[0][1]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[0][1]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[0][2]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[0][2]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[0][3]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[0][3]);
+
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[P_SLICE][P8x8] - last_mode_use[P_SLICE][P8x8]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[P_SLICE][4   ] - last_mode_use[P_SLICE][4   ]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[0][4]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[0][4]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[P_SLICE][5   ] - last_mode_use[P_SLICE][5   ]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[P_SLICE][6   ] - last_mode_use[P_SLICE][6   ]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[P_SLICE][7   ] - last_mode_use[P_SLICE][7   ]);
+
+  //B-Modes
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[B_SLICE][I4MB] - last_mode_use[B_SLICE][I4MB]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[B_SLICE][I8MB] - last_mode_use[B_SLICE][I8MB]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[B_SLICE][I16MB] - last_mode_use[B_SLICE][I16MB]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[B_SLICE][0   ] - last_mode_use[B_SLICE][0   ]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[B_SLICE][1   ] - last_mode_use[B_SLICE][1   ]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[B_SLICE][2   ] - last_mode_use[B_SLICE][2   ]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[B_SLICE][3   ] - last_mode_use[B_SLICE][3   ]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[1][0]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[1][0]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[1][1]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[1][1]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[1][2]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[1][2]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[1][3]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[1][3]);
+
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[B_SLICE][P8x8] - last_mode_use[B_SLICE][P8x8]);
+  fprintf(p_stat_frm, " %d|",(stats->b8_mode_0_use [B_SLICE][0]+stats->b8_mode_0_use [B_SLICE][1]) - (last_b8_mode_0[B_SLICE][0]+last_b8_mode_0[B_SLICE][1]));
+  fprintf(p_stat_frm, " %5d|",stats->b8_mode_0_use [B_SLICE][1] - last_b8_mode_0[B_SLICE][1]);
+  fprintf(p_stat_frm, " %5d|",stats->b8_mode_0_use [B_SLICE][0] - last_b8_mode_0[B_SLICE][0]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[B_SLICE][4   ] - last_mode_use[B_SLICE][4   ]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[1][4]);
+  fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[1][4]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[B_SLICE][5   ] - last_mode_use[B_SLICE][5   ]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[B_SLICE][6   ] - last_mode_use[B_SLICE][6   ]);
+  fprintf(p_stat_frm, " %5" FORMAT_OFF_T  "|",stats->mode_use[B_SLICE][7   ] - last_mode_use[B_SLICE][7   ]);
+
+  fprintf(p_stat_frm, "\n");
+
+  //save the last results
+  memcpy(last_mode_use[I_SLICE],stats->mode_use[I_SLICE], MAXMODE *  sizeof(int64));
+  memcpy(last_mode_use[P_SLICE],stats->mode_use[P_SLICE], MAXMODE *  sizeof(int64));
+  memcpy(last_mode_use[B_SLICE],stats->mode_use[B_SLICE], MAXMODE *  sizeof(int64));
+  memset(stats->mode_use_transform_8x8,0, 2 * MAXMODE *  sizeof(int));
+  memset(stats->mode_use_transform_4x4,0, 2 * MAXMODE *  sizeof(int));
+  memcpy(last_b8_mode_0[B_SLICE],stats->b8_mode_0_use[B_SLICE], 2 *  sizeof(int));
+  memcpy(last_mode_chroma_use,stats->intra_chroma_mode, 4 *  sizeof(int));
+
+  frame_statistic_start = 0;
+  fclose(p_stat_frm);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Reports the gathered information to appropriate outputs
+ * \par Input:
+ *    struct inp_par *inp,                                            \n
+ *    struct img_par *img,                                            \n
+ *    struct stat_par *stats,                                          \n
+ *    struct stat_par *stats                                           \n
+ *
+ * \par Output:
+ *    None
+ ************************************************************************
+ */
+void report()
+{
+  int64 bit_use[NUM_PIC_TYPE][2] ;
+  int i,j;
+  char name[30];
+  int64 total_bits;
+  float frame_rate;
+  double mean_motion_info_bit_use[2] = {0.0};
+
+#ifndef WIN32
+  time_t now;
+  struct tm *l_time;
+  char string[1000];
+#else
+  char timebuf[128];
+#endif
+  bit_use[I_SLICE][0] = frame_ctr[I_SLICE];
+  bit_use[P_SLICE][0] = imax(frame_ctr[P_SLICE] + frame_ctr[SP_SLICE],1);
+  bit_use[B_SLICE][0] = frame_ctr[B_SLICE];
+
+  //  Accumulate bit usage for inter and intra frames
+  for (j=0;j<NUM_PIC_TYPE;j++)
+  {
+    bit_use[j][1] = 0;
+  }
+
+  for (j=0;j<NUM_PIC_TYPE;j++)
+  {
+    for(i=0; i<MAXMODE; i++)
+      bit_use[j][1] += stats->bit_use_mode[j][i];
+
+    bit_use[j][1]+=stats->bit_use_header[j];
+    bit_use[j][1]+=stats->bit_use_mb_type[j];
+    bit_use[j][1]+=stats->tmp_bit_use_cbp[j];
+    bit_use[j][1]+=stats->bit_use_coeffY[j];
+    bit_use[j][1]+=stats->bit_use_coeffC[j];
+    bit_use[j][1]+=stats->bit_use_delta_quant[j];
+    bit_use[j][1]+=stats->bit_use_stuffingBits[j];
+  }
+
+  frame_rate = (img->framerate *(float)(stats->successive_Bframe + 1)) / (float) (input->jumpd+1);
+
+//! Currently adding NVB bits on P rate. Maybe additional stats info should be created instead and added in log file
+  stats->bitrate_I = (stats->bit_ctr_I)*(frame_rate)/(float) (input->no_frames + frame_ctr[B_SLICE]);
+  stats->bitrate_P = (stats->bit_ctr_P)*(frame_rate)/(float) (input->no_frames + frame_ctr[B_SLICE]);
+  stats->bitrate_B = (stats->bit_ctr_B)*(frame_rate)/(float) (input->no_frames + frame_ctr[B_SLICE]);
+
+  fprintf(stdout,"-------------------------------------------------------------------------------\n");
+  if (input->Verbose != 0)
+  {
+    fprintf(stdout,  " Freq. for encoded bitstream       : %1.0f\n",img->framerate/(float)(input->jumpd+1));
+    for (i=0; i<3; i++)
+    {
+      fprintf(stdout," ME Metric for Refinement Level %1d  : %s\n",i,DistortionType[input->MEErrorMetric[i]]);
+    }
+    fprintf(stdout," Mode Decision Metric              : %s\n",DistortionType[input->ModeDecisionMetric]);
+
+    switch ( input->ChromaMEEnable )
+    {
+    case 1:
+      fprintf(stdout," Motion Estimation for components  : YCbCr\n");
+      break;
+    default:
+      fprintf(stdout," Motion Estimation for components  : Y\n");
+      break;
+    }
+
+    fprintf(stdout,  " Image format                      : %dx%d\n",input->img_width,input->img_height);
+
+    if(input->intra_upd)
+      fprintf(stdout," Error robustness                  : On\n");
+    else
+      fprintf(stdout," Error robustness                  : Off\n");
+    fprintf(stdout,  " Search range                      : %d\n",input->search_range);
+
+    fprintf(stdout,  " Total number of references        : %d\n",input->num_ref_frames);
+    fprintf(stdout,  " References for P slices           : %d\n",input->P_List0_refs? input->P_List0_refs:input->num_ref_frames);
+    if(stats->successive_Bframe != 0)
+    {
+      fprintf(stdout," List0 references for B slices     : %d\n",input->B_List0_refs? input->B_List0_refs:input->num_ref_frames);
+      fprintf(stdout," List1 references for B slices     : %d\n",input->B_List1_refs? input->B_List1_refs:input->num_ref_frames);
+    }
+
+
+    // B pictures
+    fprintf(stdout,  " Sequence type                     :" );
+
+    if(stats->successive_Bframe>0 && input->HierarchicalCoding)
+    {
+      fprintf(stdout, " Hierarchy (QP: I %d, P %d, B %d) \n",
+        input->qp0, input->qpN, input->qpB);
+    }
+    else if(stats->successive_Bframe>0)
+    {
+      char seqtype[80];
+      int i,j;
+
+      strcpy (seqtype,"I");
+
+      for (j=0;j<2;j++)
+      {
+        for (i=0;i<stats->successive_Bframe;i++)
+        {
+          if (input->BRefPictures)
+            strncat(seqtype,"-RB", imax(0, (int) (79-strlen(seqtype))));
+          else
+            strncat(seqtype,"-B", imax(0, (int) (79-strlen(seqtype))));
+        }
+        strncat(seqtype,"-P", imax(0, (int) (79-strlen(seqtype))));
+      }
+      if (input->BRefPictures)
+        fprintf(stdout, " %s (QP: I %d, P %d, RB %d) \n", seqtype,input->qp0, input->qpN, iClip3(0,51,input->qpB+input->qpBRSOffset));
+      else
+        fprintf(stdout, " %s (QP: I %d, P %d, B %d) \n", seqtype,input->qp0, input->qpN, input->qpB);
+    }
+    else if(stats->successive_Bframe==0 && input->sp_periodicity==0) fprintf(stdout, " IPPP (QP: I %d, P %d) \n",   input->qp0, input->qpN);
+
+    else fprintf(stdout, " I-P-P-SP-P (QP: I %d, P %d, SP (%d, %d)) \n",  input->qp0, input->qpN, input->qpsp, input->qpsp_pred);
+
+    // report on entropy coding  method
+    if (input->symbol_mode == UVLC)
+      fprintf(stdout," Entropy coding method             : CAVLC\n");
+    else
+      fprintf(stdout," Entropy coding method             : CABAC\n");
+
+    fprintf(stdout,  " Profile/Level IDC                 : (%d,%d)\n",input->ProfileIDC,input->LevelIDC);
+
+  if (input->SearchMode==UM_HEX)
+    fprintf(stdout,  " Motion Estimation Scheme          : HEX\n");
+  else if (input->SearchMode==UM_HEX_SIMPLE)
+    fprintf(stdout,  " Motion Estimation Scheme          : SHEX\n");
+   else if (input->SearchMode == EPZS)
+   {
+     fprintf(stdout,  " Motion Estimation Scheme          : EPZS\n");
+     EPZSOutputStats(stdout, 0);
+   }
+  else if (input->SearchMode == FAST_FULL_SEARCH)
+    fprintf(stdout,  " Motion Estimation Scheme          : Fast Full Search\n");
+  else
+    fprintf(stdout,  " Motion Estimation Scheme          : Full Search\n");
+
+
+
+#ifdef _FULL_SEARCH_RANGE_
+    if (input->full_search == 2)
+      fprintf(stdout," Search range restrictions         : none\n");
+    else if (input->full_search == 1)
+      fprintf(stdout," Search range restrictions         : older reference frames\n");
+    else
+      fprintf(stdout," Search range restrictions         : smaller blocks and older reference frames\n");
+#endif
+
+    if (input->rdopt)
+      fprintf(stdout," RD-optimized mode decision        : used\n");
+    else
+      fprintf(stdout," RD-optimized mode decision        : not used\n");
+
+    switch(input->partition_mode)
+    {
+    case PAR_DP_1:
+      fprintf(stdout," Data Partitioning Mode            : 1 partition \n");
+      break;
+    case PAR_DP_3:
+      fprintf(stdout," Data Partitioning Mode            : 3 partitions \n");
+      break;
+    default:
+      fprintf(stdout," Data Partitioning Mode            : not supported\n");
+      break;
+    }
+
+    switch(input->of_mode)
+    {
+    case PAR_OF_ANNEXB:
+      fprintf(stdout," Output File Format                : H.264 Bit Stream File Format \n");
+      break;
+    case PAR_OF_RTP:
+      fprintf(stdout," Output File Format                : RTP Packet File Format \n");
+      break;
+    default:
+      fprintf(stdout," Output File Format                : not supported\n");
+      break;
+    }
+}
+
+  fprintf(stdout,"------------------ Average data all frames  -----------------------------------\n");
+  if (input->Verbose != 0)
+  {
+    int  impix = input->img_height * input->img_width;
+    int  impix_cr = input->img_height_cr * input->img_width_cr;
+    unsigned int max_pix_value_sqd = img->max_imgpel_value * img->max_imgpel_value;
+    unsigned int max_pix_value_sqd_uv = img->max_imgpel_value_uv * img->max_imgpel_value_uv;
+    float csnr_y = (float) (10 * log10 (max_pix_value_sqd *
+      (double)((double) impix / (snr->msse_y == 0.0? 1.0 : snr->msse_y))));
+    float csnr_u = (float) (10 * log10 (max_pix_value_sqd_uv *
+      (double)((double) impix_cr / (snr->msse_u == 0.0? 1.0 : snr->msse_u))));
+    float csnr_v = (float) (10 * log10 (max_pix_value_sqd_uv *
+      (double)((double) impix_cr / (snr->msse_v == 0.0? 1.0 : snr->msse_v))));
+
+    fprintf(stdout," PSNR Y(dB)                        : %5.2f\n",snr->snr_ya);
+    fprintf(stdout," PSNR U(dB)                        : %5.2f\n",snr->snr_ua);
+    fprintf(stdout," PSNR V(dB)                        : %5.2f\n",snr->snr_va);
+    fprintf(stdout," cSNR Y(dB)                        : %5.2f (%5.2f)\n",csnr_y,snr->msse_y/impix);
+    fprintf(stdout," cSNR U(dB)                        : %5.2f (%5.2f)\n",csnr_u,snr->msse_u/impix_cr);
+    fprintf(stdout," cSNR V(dB)                        : %5.2f (%5.2f)\n",csnr_v,snr->msse_v/impix_cr);
+  }
+
+  if(frame_ctr[B_SLICE]!=0)
+  {
+    fprintf(stdout, " Total bits                        : %" FORMAT_OFF_T  " (I %" FORMAT_OFF_T  ", P %" FORMAT_OFF_T  ", B %" FORMAT_OFF_T  " NVB %d) \n",
+      total_bits=stats->bit_ctr_P + stats->bit_ctr_I + stats->bit_ctr_B + stats->bit_ctr_parametersets,
+      stats->bit_ctr_I, stats->bit_ctr_P, stats->bit_ctr_B, stats->bit_ctr_parametersets);
+
+    frame_rate = (img->framerate *(float)(stats->successive_Bframe + 1)) / (float) (input->jumpd+1);
+//    stats->bitrate= ((float) total_bits * frame_rate)/((float) (input->no_frames + frame_ctr[B_SLICE]));
+    stats->bitrate= ((float) total_bits * frame_rate)/((float)(frame_ctr[I_SLICE] + frame_ctr[P_SLICE] + frame_ctr[B_SLICE]));
+
+    fprintf(stdout, " Bit rate (kbit/s)  @ %2.2f Hz     : %5.2f\n", frame_rate, stats->bitrate/1000);
+
+  }
+  else if (input->sp_periodicity==0)
+  {
+    fprintf(stdout, " Total bits                        : %" FORMAT_OFF_T  " (I %" FORMAT_OFF_T  ", P %" FORMAT_OFF_T  ", NVB %d) \n",
+      total_bits=stats->bit_ctr_P + stats->bit_ctr_I + stats->bit_ctr_parametersets, stats->bit_ctr_I, stats->bit_ctr_P, stats->bit_ctr_parametersets);
+
+
+    frame_rate = img->framerate / ( (float) (input->jumpd + 1) );
+    stats->bitrate= ((float) total_bits * frame_rate)/((float) input->no_frames );
+
+    fprintf(stdout, " Bit rate (kbit/s)  @ %2.2f Hz     : %5.2f\n", frame_rate, stats->bitrate/1000);
+  }
+  else
+  {
+    fprintf(stdout, " Total bits                        : %" FORMAT_OFF_T  " (I %" FORMAT_OFF_T  ", P %" FORMAT_OFF_T  ", NVB %d) \n",
+      total_bits=stats->bit_ctr_P + stats->bit_ctr_I + stats->bit_ctr_parametersets, stats->bit_ctr_I, stats->bit_ctr_P, stats->bit_ctr_parametersets);
+
+
+    frame_rate = img->framerate / ( (float) (input->jumpd + 1) );
+    stats->bitrate= ((float) total_bits * frame_rate)/((float) input->no_frames );
+
+    fprintf(stdout, " Bit rate (kbit/s)  @ %2.2f Hz     : %5.2f\n", frame_rate, stats->bitrate/1000);
+  }
+
+  fprintf(stdout, " Bits to avoid Startcode Emulation : %d \n", stats->bit_ctr_emulationprevention);
+  fprintf(stdout, " Bits for parameter sets           : %d \n", stats->bit_ctr_parametersets);
+
+  fprintf(stdout,"-------------------------------------------------------------------------------\n");
+  fprintf(stdout,"Exit JM %s encoder ver %s ", JM, VERSION);
+  fprintf(stdout,"\n");
+
+  // status file
+  if ((p_stat=fopen("stats.dat","wt"))==0)
+  {
+    snprintf(errortext, ET_SIZE, "Error open file %s", "stats.dat");
+    error(errortext, 500);
+  }
+  fprintf(p_stat," -------------------------------------------------------------- \n");
+  fprintf(p_stat,"  This file contains statistics for the last encoded sequence   \n");
+  fprintf(p_stat," -------------------------------------------------------------- \n");
+  fprintf(p_stat,   " Sequence                     : %s\n",input->infile);
+  fprintf(p_stat,   " No.of coded pictures         : %4d\n",input->no_frames+frame_ctr[B_SLICE]);
+  fprintf(p_stat,   " Freq. for encoded bitstream  : %4.0f\n",frame_rate);
+
+  fprintf(p_stat,   " I Slice Bitrate(kb/s)        : %6.2f\n", stats->bitrate_I/1000);
+  fprintf(p_stat,   " P Slice Bitrate(kb/s)        : %6.2f\n", stats->bitrate_P/1000);
+  // B pictures
+  if(stats->successive_Bframe != 0)
+    fprintf(p_stat,   " B Slice Bitrate(kb/s)        : %6.2f\n", stats->bitrate_B/1000);
+  fprintf(p_stat,   " Total Bitrate(kb/s)          : %6.2f\n", stats->bitrate/1000);
+
+  for (i=0; i<3; i++)
+  {
+    fprintf(p_stat," ME Metric for Refinement Level %1d : %s\n",i,DistortionType[input->MEErrorMetric[i]]);
+  }
+  fprintf(p_stat," Mode Decision Metric              : %s\n",DistortionType[input->ModeDecisionMetric]);
+
+  switch ( input->ChromaMEEnable )
+  {
+  case 1:
+    fprintf(p_stat," Motion Estimation for components  : YCbCr\n");
+    break;
+  default:
+    fprintf(p_stat," Motion Estimation for components  : Y\n");
+    break;
+  }
+
+  fprintf(p_stat,  " Image format                 : %dx%d\n",input->img_width,input->img_height);
+
+  if(input->intra_upd)
+    fprintf(p_stat," Error robustness             : On\n");
+  else
+    fprintf(p_stat," Error robustness             : Off\n");
+
+  fprintf(p_stat,  " Search range                 : %d\n",input->search_range);
+
+  fprintf(p_stat,   " Total number of references   : %d\n",input->num_ref_frames);
+  fprintf(p_stat,   " References for P slices      : %d\n",input->P_List0_refs? input->P_List0_refs:input->num_ref_frames);
+  if(stats->successive_Bframe != 0)
+  {
+    fprintf(p_stat, " List0 refs for B slices      : %d\n",input->B_List0_refs? input->B_List0_refs:input->num_ref_frames);
+    fprintf(p_stat, " List1 refs for B slices      : %d\n",input->B_List1_refs? input->B_List1_refs:input->num_ref_frames);
+  }
+
+  if (input->symbol_mode == UVLC)
+    fprintf(p_stat,   " Entropy coding method        : CAVLC\n");
+  else
+    fprintf(p_stat,   " Entropy coding method        : CABAC\n");
+
+    fprintf(p_stat,   " Profile/Level IDC            : (%d,%d)\n",input->ProfileIDC,input->LevelIDC);
+  if(input->MbInterlace)
+    fprintf(p_stat, " MB Field Coding : On \n");
+
+  if (input->SearchMode == EPZS)
+    EPZSOutputStats(p_stat, 1);
+
+#ifdef _FULL_SEARCH_RANGE_
+  if (input->full_search == 2)
+    fprintf(p_stat," Search range restrictions    : none\n");
+  else if (input->full_search == 1)
+    fprintf(p_stat," Search range restrictions    : older reference frames\n");
+  else
+    fprintf(p_stat," Search range restrictions    : smaller blocks and older reference frames\n");
+#endif
+  if (input->rdopt)
+    fprintf(p_stat," RD-optimized mode decision   : used\n");
+  else
+    fprintf(p_stat," RD-optimized mode decision   : not used\n");
+
+  fprintf(p_stat," ---------------------|----------------|---------------|\n");
+  fprintf(p_stat,"     Item             |     Intra      |   All frames  |\n");
+  fprintf(p_stat," ---------------------|----------------|---------------|\n");
+  fprintf(p_stat," SNR Y(dB)            |");
+  fprintf(p_stat," %5.2f          |",snr->snr_y1);
+  fprintf(p_stat," %5.2f         |\n",snr->snr_ya);
+  fprintf(p_stat," SNR U/V (dB)         |");
+  fprintf(p_stat," %5.2f/%5.2f    |",snr->snr_u1,snr->snr_v1);
+  fprintf(p_stat," %5.2f/%5.2f   |\n",snr->snr_ua,snr->snr_va);
+
+  // QUANT.
+  fprintf(p_stat," Average quant        |");
+  fprintf(p_stat," %5d          |",iabs(input->qp0));
+  fprintf(p_stat," %5.2f         |\n",(float)stats->quant1/dmax(1.0,(float)stats->quant0));
+
+  fprintf(p_stat,"\n ---------------------|----------------|---------------|---------------|\n");
+  fprintf(p_stat,"     SNR              |        I       |       P       |       B       |\n");
+  fprintf(p_stat," ---------------------|----------------|---------------|---------------|\n");
+  fprintf(p_stat," SNR Y(dB)            |      %5.3f    |     %5.3f    |     %5.3f    |\n",
+                snr->snr_yt[I_SLICE],snr->snr_yt[P_SLICE],snr->snr_yt[B_SLICE]);
+  fprintf(p_stat," SNR U(dB)            |      %5.3f    |     %5.3f    |     %5.3f    |\n",
+                snr->snr_ut[I_SLICE],snr->snr_ut[P_SLICE],snr->snr_ut[B_SLICE]);
+  fprintf(p_stat," SNR V(dB)            |      %5.3f    |     %5.3f    |     %5.3f    |\n",
+                snr->snr_vt[I_SLICE],snr->snr_vt[P_SLICE],snr->snr_vt[B_SLICE]);
+
+
+  // MODE
+  fprintf(p_stat,"\n ---------------------|----------------|\n");
+  fprintf(p_stat,"   Intra              |   Mode used    |\n");
+  fprintf(p_stat," ---------------------|----------------|\n");
+
+  fprintf(p_stat," Mode 0  intra 4x4    |  %5" FORMAT_OFF_T  "         |\n",stats->mode_use[I_SLICE][I4MB ]);
+  fprintf(p_stat," Mode 1  intra 8x8    |  %5" FORMAT_OFF_T  "         |\n",stats->mode_use[I_SLICE][I8MB ]);
+  fprintf(p_stat," Mode 2+ intra 16x16  |  %5" FORMAT_OFF_T  "         |\n",stats->mode_use[I_SLICE][I16MB]);
+  fprintf(p_stat," Mode    intra IPCM   |  %5" FORMAT_OFF_T  "         |\n",stats->mode_use[I_SLICE][IPCM ]);
+
+  fprintf(p_stat,"\n ---------------------|----------------|-----------------|\n");
+  fprintf(p_stat,"   Inter              |   Mode used    | MotionInfo bits |\n");
+  fprintf(p_stat," ---------------------|----------------|-----------------|");
+  fprintf(p_stat,"\n Mode  0  (copy)      |  %5" FORMAT_OFF_T  "         |    %8.2f     |",stats->mode_use[P_SLICE][0   ],(double)stats->bit_use_mode[P_SLICE][0   ]/(double)bit_use[P_SLICE][0]);
+  fprintf(p_stat,"\n Mode  1  (16x16)     |  %5" FORMAT_OFF_T  "         |    %8.2f     |",stats->mode_use[P_SLICE][1   ],(double)stats->bit_use_mode[P_SLICE][1   ]/(double)bit_use[P_SLICE][0]);
+  fprintf(p_stat,"\n Mode  2  (16x8)      |  %5" FORMAT_OFF_T  "         |    %8.2f     |",stats->mode_use[P_SLICE][2   ],(double)stats->bit_use_mode[P_SLICE][2   ]/(double)bit_use[P_SLICE][0]);
+  fprintf(p_stat,"\n Mode  3  (8x16)      |  %5" FORMAT_OFF_T  "         |    %8.2f     |",stats->mode_use[P_SLICE][3   ],(double)stats->bit_use_mode[P_SLICE][3   ]/(double)bit_use[P_SLICE][0]);
+  fprintf(p_stat,"\n Mode  4  (8x8)       |  %5" FORMAT_OFF_T  "         |    %8.2f     |",stats->mode_use[P_SLICE][P8x8],(double)stats->bit_use_mode[P_SLICE][P8x8]/(double)bit_use[P_SLICE][0]);
+  fprintf(p_stat,"\n Mode  5  intra 4x4   |  %5" FORMAT_OFF_T  "         |-----------------|",stats->mode_use[P_SLICE][I4MB]);
+  fprintf(p_stat,"\n Mode  6  intra 8x8   |  %5" FORMAT_OFF_T  "         |",stats->mode_use[P_SLICE][I8MB]);
+  fprintf(p_stat,"\n Mode  7+ intra 16x16 |  %5" FORMAT_OFF_T  "         |",stats->mode_use[P_SLICE][I16MB]);
+  fprintf(p_stat,"\n Mode     intra IPCM  |  %5" FORMAT_OFF_T  "         |",stats->mode_use[P_SLICE][IPCM ]);
+  mean_motion_info_bit_use[0] = (double)(stats->bit_use_mode[P_SLICE][0] + stats->bit_use_mode[P_SLICE][1] + stats->bit_use_mode[P_SLICE][2]
+                                      + stats->bit_use_mode[P_SLICE][3] + stats->bit_use_mode[P_SLICE][P8x8])/(double) bit_use[P_SLICE][0];
+
+  // B pictures
+  if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+  {
+
+    fprintf(p_stat,"\n\n ---------------------|----------------|-----------------|\n");
+    fprintf(p_stat,"   B frame            |   Mode used    | MotionInfo bits |\n");
+    fprintf(p_stat," ---------------------|----------------|-----------------|");
+    fprintf(p_stat,"\n Mode  0  (copy)      |  %5" FORMAT_OFF_T  "         |    %8.2f     |",stats->mode_use[B_SLICE][0   ],(double)stats->bit_use_mode[B_SLICE][0   ]/(double)frame_ctr[B_SLICE]);
+    fprintf(p_stat,"\n Mode  1  (16x16)     |  %5" FORMAT_OFF_T  "         |    %8.2f     |",stats->mode_use[B_SLICE][1   ],(double)stats->bit_use_mode[B_SLICE][1   ]/(double)frame_ctr[B_SLICE]);
+    fprintf(p_stat,"\n Mode  2  (16x8)      |  %5" FORMAT_OFF_T  "         |    %8.2f     |",stats->mode_use[B_SLICE][2   ],(double)stats->bit_use_mode[B_SLICE][2   ]/(double)frame_ctr[B_SLICE]);
+    fprintf(p_stat,"\n Mode  3  (8x16)      |  %5" FORMAT_OFF_T  "         |    %8.2f     |",stats->mode_use[B_SLICE][3   ],(double)stats->bit_use_mode[B_SLICE][3   ]/(double)frame_ctr[B_SLICE]);
+    fprintf(p_stat,"\n Mode  4  (8x8)       |  %5" FORMAT_OFF_T  "         |    %8.2f     |",stats->mode_use[B_SLICE][P8x8],(double)stats->bit_use_mode[B_SLICE][P8x8]/(double)frame_ctr[B_SLICE]);
+    fprintf(p_stat,"\n Mode  5  intra 4x4   |  %5" FORMAT_OFF_T  "         |-----------------|",stats->mode_use[B_SLICE][I4MB]);
+    fprintf(p_stat,"\n Mode  6  intra 8x8   |  %5" FORMAT_OFF_T  "         |",stats->mode_use[B_SLICE][I8MB]);
+    fprintf(p_stat,"\n Mode  7+ intra 16x16 |  %5" FORMAT_OFF_T  "         |",stats->mode_use[B_SLICE][I16MB]);
+    fprintf(p_stat,"\n Mode     intra IPCM  |  %5" FORMAT_OFF_T  "         |",stats->mode_use[B_SLICE][IPCM ]);
+    mean_motion_info_bit_use[1] = (double)(stats->bit_use_mode[B_SLICE][0] + stats->bit_use_mode[B_SLICE][1] + stats->bit_use_mode[B_SLICE][2]
+                                      + stats->bit_use_mode[B_SLICE][3] + stats->bit_use_mode[B_SLICE][P8x8])/(double) frame_ctr[B_SLICE];
+  }
+
+  fprintf(p_stat,"\n\n ---------------------|----------------|----------------|----------------|\n");
+  fprintf(p_stat,"  Bit usage:          |      Intra     |      Inter     |    B frame     |\n");
+  fprintf(p_stat," ---------------------|----------------|----------------|----------------|\n");
+
+  fprintf(p_stat," Header               |");
+  fprintf(p_stat," %10.2f     |",(float) stats->bit_use_header[I_SLICE]/bit_use[I_SLICE][0]);
+  fprintf(p_stat," %10.2f     |",(float) stats->bit_use_header[P_SLICE]/bit_use[P_SLICE][0]);
+  if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+    fprintf(p_stat," %10.2f     |",(float) stats->bit_use_header[B_SLICE]/frame_ctr[B_SLICE]);
+  else fprintf(p_stat," %10.2f     |", 0.);
+  fprintf(p_stat,"\n");
+
+  fprintf(p_stat," Mode                 |");
+  fprintf(p_stat," %10.2f     |",(float)stats->bit_use_mb_type[I_SLICE]/bit_use[I_SLICE][0]);
+  fprintf(p_stat," %10.2f     |",(float)stats->bit_use_mb_type[P_SLICE]/bit_use[P_SLICE][0]);
+  if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+    fprintf(p_stat," %10.2f     |",(float)stats->bit_use_mb_type[B_SLICE]/frame_ctr[B_SLICE]);
+  else fprintf(p_stat," %10.2f     |", 0.);
+  fprintf(p_stat,"\n");
+
+  fprintf(p_stat," Motion Info          |");
+  fprintf(p_stat,"        ./.     |");
+  fprintf(p_stat," %10.2f     |",mean_motion_info_bit_use[0]);
+  if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+    fprintf(p_stat," %10.2f     |",mean_motion_info_bit_use[1]);
+  else fprintf(p_stat," %10.2f     |", 0.);
+  fprintf(p_stat,"\n");
+
+  fprintf(p_stat," CBP Y/C              |");
+  fprintf(p_stat," %10.2f     |", (float)stats->tmp_bit_use_cbp[I_SLICE]/bit_use[I_SLICE][0]);
+  fprintf(p_stat," %10.2f     |", (float)stats->tmp_bit_use_cbp[P_SLICE]/bit_use[P_SLICE][0]);
+  if(stats->successive_Bframe!=0 && bit_use[B_SLICE][0]!=0)
+    fprintf(p_stat," %10.2f     |", (float)stats->tmp_bit_use_cbp[B_SLICE]/bit_use[B_SLICE][0]);
+  else fprintf(p_stat," %10.2f     |", 0.);
+  fprintf(p_stat,"\n");
+
+  if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+    fprintf(p_stat," Coeffs. Y            | %10.2f     | %10.2f     | %10.2f     |\n",
+    (float)stats->bit_use_coeffY[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_coeffY[P_SLICE]/bit_use[P_SLICE][0], (float)stats->bit_use_coeffY[B_SLICE]/frame_ctr[B_SLICE]);
+  else
+    fprintf(p_stat," Coeffs. Y            | %10.2f     | %10.2f     | %10.2f     |\n",
+      (float)stats->bit_use_coeffY[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_coeffY[P_SLICE]/(float)bit_use[P_SLICE][0], 0.);
+
+  if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+    fprintf(p_stat," Coeffs. C            | %10.2f     | %10.2f     | %10.2f     |\n",
+      (float)stats->bit_use_coeffC[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_coeffC[P_SLICE]/bit_use[P_SLICE][0], (float)stats->bit_use_coeffC[B_SLICE]/frame_ctr[B_SLICE]);
+  else
+    fprintf(p_stat," Coeffs. C            | %10.2f     | %10.2f     | %10.2f     |\n",
+      (float)stats->bit_use_coeffC[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_coeffC[P_SLICE]/bit_use[P_SLICE][0], 0.);
+
+  if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+    fprintf(p_stat," Delta quant          | %10.2f     | %10.2f     | %10.2f     |\n",
+      (float)stats->bit_use_delta_quant[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_delta_quant[P_SLICE]/bit_use[P_SLICE][0], (float)stats->bit_use_delta_quant[B_SLICE]/frame_ctr[B_SLICE]);
+  else
+    fprintf(p_stat," Delta quant          | %10.2f     | %10.2f     | %10.2f     |\n",
+      (float)stats->bit_use_delta_quant[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_delta_quant[P_SLICE]/bit_use[P_SLICE][0], 0.);
+
+  if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+    fprintf(p_stat," Stuffing Bits        | %10.2f     | %10.2f     | %10.2f     |\n",
+      (float)stats->bit_use_stuffingBits[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_stuffingBits[P_SLICE]/bit_use[P_SLICE][0], (float)stats->bit_use_stuffingBits[B_SLICE]/frame_ctr[B_SLICE]);
+  else
+    fprintf(p_stat," Stuffing Bits        | %10.2f     | %10.2f     | %10.2f     |\n",
+      (float)stats->bit_use_stuffingBits[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_stuffingBits[P_SLICE]/bit_use[P_SLICE][0], 0.);
+
+
+
+  fprintf(p_stat," ---------------------|----------------|----------------|----------------|\n");
+
+  fprintf(p_stat," average bits/frame   |");
+
+  fprintf(p_stat," %10.2f     |", (float) bit_use[I_SLICE][1]/(float) bit_use[I_SLICE][0] );
+  fprintf(p_stat," %10.2f     |", (float) bit_use[P_SLICE][1]/(float) bit_use[P_SLICE][0] );
+
+  if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+    fprintf(p_stat," %10.2f     |", (float) bit_use[B_SLICE][1]/ (float) frame_ctr[B_SLICE] );
+  else fprintf(p_stat," %10.2f     |", 0.);
+
+  fprintf(p_stat,"\n");
+  fprintf(p_stat," ---------------------|----------------|----------------|----------------|\n");
+
+  fclose(p_stat);
+
+  // write to log file
+  if ((p_log=fopen("log.dat","r"))==0)                      // check if file exist
+  {
+    if ((p_log=fopen("log.dat","a"))==NULL)            // append new statistic at the end
+    {
+      snprintf(errortext, ET_SIZE, "Error open file %s  \n","log.dat");
+      error(errortext, 500);
+    }
+    else                                            // Create header for new log file
+    {
+      fprintf(p_log," ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ \n");
+      fprintf(p_log,"|                   Encoder statistics. This file is generated during first encoding session, new sessions will be appended                                                                                                                                                                            |\n");
+      fprintf(p_log," ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ \n");
+      fprintf(p_log,"|    ver    | Date  | Time  |         Sequence             | #Img |P/MbInt| QPI| QPP| QPB| Format  |Iperiod| #B | FMES | Hdmd | S.R |#Ref | Freq |Coding|RD-opt|Intra upd|8x8Tr| SNRY 1| SNRU 1| SNRV 1| SNRY N| SNRU N| SNRV N|#Bitr I|#Bitr P|#Bitr B|#Bitr IPB|     Total Time   |      Me Time     |\n");
+      fprintf(p_log," ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ \n");
+    }
+  }
+  else
+  {
+    fclose (p_log);
+    if ((p_log=fopen("log.dat","a"))==NULL)            // File exist,just open for appending
+    {
+      snprintf(errortext, ET_SIZE, "Error open file %s  \n","log.dat");
+      error(errortext, 500);
+    }
+  }
+  fprintf(p_log,"|%s/%-4s", VERSION, EXT_VERSION);
+
+#ifdef WIN32
+  _strdate( timebuf );
+  fprintf(p_log,"| %1.5s |",timebuf );
+
+  _strtime( timebuf);
+  fprintf(p_log," % 1.5s |",timebuf);
+#else
+  now = time ((time_t *) NULL); // Get the system time and put it into 'now' as 'calender time'
+  time (&now);
+  l_time = localtime (&now);
+  strftime (string, sizeof string, "%d-%b-%Y", l_time);
+  fprintf(p_log,"| %1.5s |",string );
+
+  strftime (string, sizeof string, "%H:%M:%S", l_time);
+  fprintf(p_log," %1.5s |",string );
+#endif
+
+  for (i=0;i<30;i++)
+    name[i]=input->infile[i + imax(0,((int) strlen(input->infile))-30)]; // write last part of path, max 20 chars
+  fprintf(p_log,"%30.30s|",name);
+
+  fprintf(p_log,"%5d |",input->no_frames);
+  fprintf(p_log,"  %d/%d  |",input->PicInterlace, input->MbInterlace);
+  fprintf(p_log," %-3d|",input->qp0);
+  fprintf(p_log," %-3d|",input->qpN);
+  fprintf(p_log," %-3d|",input->qpB);
+
+  fprintf(p_log,"%4dx%-4d|",input->img_width,input->img_height);
+
+  fprintf(p_log,"  %3d  |",input->intra_period);
+  fprintf(p_log,"%3d |",stats->successive_Bframe);
+
+  if (input->SearchMode == UM_HEX)
+    fprintf(p_log,"  HEX |");
+  else if (input->SearchMode == UM_HEX_SIMPLE)
+    fprintf(p_log," SHEX |");
+  else if (input->SearchMode == EPZS)
+    fprintf(p_log," EPZS |");
+  else if (input->SearchMode == FAST_FULL_SEARCH)
+    fprintf(p_log,"  FFS |");
+  else
+    fprintf(p_log,"  FS  |");
+
+  fprintf(p_log,"  %1d%1d%1d |", input->MEErrorMetric[F_PEL], input->MEErrorMetric[H_PEL], input->MEErrorMetric[Q_PEL]);
+
+  fprintf(p_log," %3d |",input->search_range );
+
+  fprintf(p_log," %2d  |",input->num_ref_frames);
+
+  fprintf(p_log," %5.2f|",(img->framerate *(float) (stats->successive_Bframe + 1)) / (float)(input->jumpd+1));
+
+  if (input->symbol_mode == UVLC)
+    fprintf(p_log," CAVLC|");
+  else
+    fprintf(p_log," CABAC|");
+
+  fprintf(p_log,"   %d  |",input->rdopt);
+
+  if (input->intra_upd==1)
+    fprintf(p_log,"   ON    |");
+  else
+    fprintf(p_log,"   OFF   |");
+
+  fprintf(p_log,"  %d  |",input->Transform8x8Mode);
+
+  fprintf(p_log,"%7.3f|",snr->snr_y1);
+  fprintf(p_log,"%7.3f|",snr->snr_u1);
+  fprintf(p_log,"%7.3f|",snr->snr_v1);
+  fprintf(p_log,"%7.3f|",snr->snr_ya);
+  fprintf(p_log,"%7.3f|",snr->snr_ua);
+  fprintf(p_log,"%7.3f|",snr->snr_va);
+/*
+  fprintf(p_log,"%-5.3f|",snr->snr_yt[I_SLICE]);
+  fprintf(p_log,"%-5.3f|",snr->snr_ut[I_SLICE]);
+  fprintf(p_log,"%-5.3f|",snr->snr_vt[I_SLICE]);
+  fprintf(p_log,"%-5.3f|",snr->snr_yt[P_SLICE]);
+  fprintf(p_log,"%-5.3f|",snr->snr_ut[P_SLICE]);
+  fprintf(p_log,"%-5.3f|",snr->snr_vt[P_SLICE]);
+  fprintf(p_log,"%-5.3f|",snr->snr_yt[B_SLICE]);
+  fprintf(p_log,"%-5.3f|",snr->snr_ut[B_SLICE]);
+  fprintf(p_log,"%-5.3f|",snr->snr_vt[B_SLICE]);
+*/
+  fprintf(p_log,"%7.0f|",stats->bitrate_I);
+  fprintf(p_log,"%7.0f|",stats->bitrate_P);
+  fprintf(p_log,"%7.0f|",stats->bitrate_B);
+  fprintf(p_log,"%9.0f|",stats->bitrate);
+
+  fprintf(p_log,"   %12d   |", (int)tot_time);
+  fprintf(p_log,"   %12d   |", (int)me_tot_time);
+  fprintf(p_log,"\n");
+
+  fclose(p_log);
+
+  p_log=fopen("data.txt","a");
+
+  if(stats->successive_Bframe != 0 && frame_ctr[B_SLICE] != 0) // B picture used
+  {
+    fprintf(p_log, "%3d %2d %2d %2.2f %2.2f %2.2f %5" FORMAT_OFF_T  " "
+          "%2.2f %2.2f %2.2f %5d "
+        "%2.2f %2.2f %2.2f %5" FORMAT_OFF_T  " %5" FORMAT_OFF_T  " %.3f\n",
+        input->no_frames, input->qp0, input->qpN,
+        snr->snr_y1,
+        snr->snr_u1,
+        snr->snr_v1,
+        stats->bit_ctr_I,
+        0.0,
+        0.0,
+        0.0,
+        0,
+        snr->snr_ya,
+        snr->snr_ua,
+        snr->snr_va,
+        (stats->bit_ctr_I+stats->bit_ctr)/(input->no_frames+frame_ctr[B_SLICE]),
+        stats->bit_ctr_B/frame_ctr[B_SLICE],
+        (double)0.001*tot_time/(input->no_frames+frame_ctr[B_SLICE]));
+  }
+  else
+  {
+    if (input->no_frames!=0)
+    fprintf(p_log, "%3d %2d %2d %2.2f %2.2f %2.2f %5" FORMAT_OFF_T  " "
+          "%2.2f %2.2f %2.2f %5d "
+        "%2.2f %2.2f %2.2f %5" FORMAT_OFF_T  " %5d %.3f\n",
+        input->no_frames, input->qp0, input->qpN,
+        snr->snr_y1,
+        snr->snr_u1,
+        snr->snr_v1,
+        stats->bit_ctr_I,
+        0.0,
+        0.0,
+        0.0,
+        0,
+        snr->snr_ya,
+        snr->snr_ua,
+        snr->snr_va,
+        (stats->bit_ctr_I+stats->bit_ctr)/input->no_frames,
+        0,
+        (double)0.001*tot_time/input->no_frames);
+  }
+
+  fclose(p_log);
+
+  if (input->ReportFrameStats)
+  {
+    if ((p_log=fopen("stat_frame.dat","a"))==NULL)            // append new statistic at the end
+    {
+      snprintf(errortext, ET_SIZE, "Error open file %s  \n","stat_frame.dat.dat");
+      //    error(errortext, 500);
+    }
+    else
+    {
+      fprintf(p_log," --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- \n");
+      fclose(p_log);
+    }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Prints the header of the protocol.
+ * \par Input:
+ *    struct inp_par *inp
+ * \par Output:
+ *    none
+ ************************************************************************
+ */
+void information_init(void)
+{
+  char yuv_types[4][10]= {"YUV 4:0:0","YUV 4:2:0","YUV 4:2:2","YUV 4:4:4"};
+  if (input->Verbose == 0 || input->Verbose  == 1)
+    printf("------------------------------- JM %s %s --------------------------------\n",VERSION, EXT_VERSION);
+  else
+    printf("------------------------------- JM %s %s ------------------------------------------\n",VERSION, EXT_VERSION);
+  printf(" Input YUV file                    : %s \n",input->infile);
+  printf(" Output H.264 bitstream            : %s \n",input->outfile);
+  if (p_dec != -1)
+    printf(" Output YUV file                   : %s \n",input->ReconFile);
+  printf(" YUV Format                        : %s \n", &yuv_types[img->yuv_format][0]);//img->yuv_format==YUV422?"YUV 4:2:2":(img->yuv_format==YUV444)?"YUV 4:4:4":"YUV 4:2:0");
+  printf(" Frames to be encoded I-P/B        : %d/%d\n", input->no_frames, (input->successive_Bframe*(input->no_frames-1)));
+  printf(" PicInterlace / MbInterlace        : %d/%d\n", input->PicInterlace, input->MbInterlace);
+  printf(" Transform8x8Mode                  : %d\n", input->Transform8x8Mode);
+
+  switch (input->Verbose)
+  {
+    case 1:
+      printf("-------------------------------------------------------------------------------\n");
+      printf("  Frame  Bit/pic    QP   SnrY    SnrU    SnrV    Time(ms) MET(ms) Frm/Fld Ref  \n");
+      printf("-------------------------------------------------------------------------------\n");
+      break;
+    case 2:
+      printf("---------------------------------------------------------------------------------------------\n");
+      printf("  Frame  Bit/pic WP QP   SnrY    SnrU    SnrV    Time(ms) MET(ms) Frm/Fld   I D L0 L1 RDP Ref\n");
+      printf("---------------------------------------------------------------------------------------------\n");
+      break;
+    case 0:
+    default:
+      printf("-------------------------------------------------------------------------------\n");
+      printf("\nEncoding. Please Wait.\n\n");
+      break;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    memory allocation for original picture buffers
+ ************************************************************************
+ */
+int init_orig_buffers(void)
+{
+  int memory_size = 0;
+
+  // allocate memory for reference frame buffers: imgY_org_frm, imgUV_org_frm
+  memory_size += get_mem2Dpel(&imgY_org_frm, img->height, img->width);
+
+  if (img->yuv_format != YUV400)
+    memory_size += get_mem3Dpel(&imgUV_org_frm, 2, img->height_cr, img->width_cr);
+
+
+  if(!active_sps->frame_mbs_only_flag)
+  {
+    // allocate memory for reference frame buffers: imgY_org, imgUV_org
+    init_top_bot_planes(imgY_org_frm, img->height, img->width, &imgY_org_top, &imgY_org_bot);
+
+    if (img->yuv_format != YUV400)
+    {
+      if(((imgUV_org_top) = (imgpel***)calloc(2,sizeof(imgpel**))) == NULL)
+        no_mem_exit("init_global_buffers: imgUV_org_top");
+      if(((imgUV_org_bot) = (imgpel***)calloc(2,sizeof(imgpel**))) == NULL)
+        no_mem_exit("init_global_buffers: imgUV_org_bot");
+
+      memory_size += 4*(sizeof(imgpel**));
+
+      memory_size += init_top_bot_planes(imgUV_org_frm[0], img->height_cr, img->width_cr, &(imgUV_org_top[0]), &(imgUV_org_bot[0]));
+      memory_size += init_top_bot_planes(imgUV_org_frm[1], img->height_cr, img->width_cr, &(imgUV_org_top[1]), &(imgUV_org_bot[1]));
+    }
+  }
+  return memory_size;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Dynamic memory allocation of frame size related global buffers
+ *    buffers are defined in global.h, allocated memory must be freed in
+ *    void free_global_buffers()
+ * \par Input:
+ *    Input Parameters struct inp_par *inp,                            \n
+ *    Image Parameters struct img_par *img
+ * \return Number of allocated bytes
+ ************************************************************************
+ */
+int init_global_buffers(void)
+{
+  int j,memory_size=0;
+#ifdef _ADAPT_LAST_GROUP_
+  extern int *last_P_no_frm;
+  extern int *last_P_no_fld;
+
+  if ((last_P_no_frm = (int*)malloc(2*img->max_num_references*sizeof(int))) == NULL)
+    no_mem_exit("init_global_buffers: last_P_no");
+  if(!active_sps->frame_mbs_only_flag)
+    if ((last_P_no_fld = (int*)malloc(4*img->max_num_references*sizeof(int))) == NULL)
+      no_mem_exit("init_global_buffers: last_P_no");
+#endif
+
+  memory_size += init_orig_buffers();
+
+  memory_size += get_mem2Dint(&PicPos,img->FrameSizeInMbs + 1,2);
+
+  for (j=0;j< (int) img->FrameSizeInMbs + 1;j++)
+  {
+    PicPos[j][0] = (j % img->PicWidthInMbs);
+    PicPos[j][1] = (j / img->PicWidthInMbs);
+  }
+
+  if (input->WeightedPrediction || input->WeightedBiprediction || input->GenerateMultiplePPS)
+  {
+    // Currently only use up to 20 references. Need to use different indicator such as maximum num of references in list
+    memory_size += get_mem3Dint(&wp_weight,6,MAX_REFERENCE_PICTURES,3);
+    memory_size += get_mem3Dint(&wp_offset,6,MAX_REFERENCE_PICTURES,3);
+
+    memory_size += get_mem4Dint(&wbp_weight, 6, MAX_REFERENCE_PICTURES, MAX_REFERENCE_PICTURES, 3);
+  }
+
+  // allocate memory for reference frames of each block: refFrArr
+
+  if(input->successive_Bframe!=0 || input->BRefPictures> 0)
+  {
+    memory_size += get_mem3D((byte ****)(void*)(&direct_ref_idx), 2, img->height_blk, img->width_blk);
+    memory_size += get_mem2D((byte ***)(void*)&direct_pdir, img->height_blk, img->width_blk);
+  }
+
+  if (input->rdopt==3)
+  {
+    memory_size += get_mem2Dint(&decs->resY, MB_BLOCK_SIZE, MB_BLOCK_SIZE);
+    if ((decs->decref = (imgpel****) calloc(input->NoOfDecoders,sizeof(imgpel***))) == NULL)
+      no_mem_exit("init_global_buffers: decref");
+    for (j=0 ; j<input->NoOfDecoders; j++)
+    {
+      memory_size += get_mem3Dpel(&decs->decref[j], img->max_num_references+1, img->height, img->width);
+    }
+    memory_size += get_mem2Dpel(&decs->RefBlock, BLOCK_SIZE,BLOCK_SIZE);
+    memory_size += get_mem3Dpel(&decs->decY, input->NoOfDecoders, img->height, img->width);
+    memory_size += get_mem3Dpel(&decs->decY_best, input->NoOfDecoders, img->height, img->width);
+    memory_size += get_mem2D(&decs->status_map, img->FrameHeightInMbs, img->PicWidthInMbs);
+    memory_size += get_mem2D(&decs->dec_mb_mode,img->FrameHeightInMbs, img->PicWidthInMbs);
+  }
+  if (input->RestrictRef)
+  {
+    memory_size += get_mem2D(&pixel_map, img->height,img->width);
+    memory_size += get_mem2D(&refresh_map, img->height/8,img->width/8);
+  }
+
+  if(!active_sps->frame_mbs_only_flag)
+  {
+    memory_size += get_mem2Dpel(&imgY_com, img->height, img->width);
+
+    if (img->yuv_format != YUV400)
+    {
+      memory_size += get_mem3Dpel(&imgUV_com, 2, img->height_cr, img->width_cr);
+    }
+  }
+
+  // allocate and set memory relating to motion estimation
+  if (input->SearchMode == UM_HEX)
+  {
+    memory_size += UMHEX_get_mem();
+  }
+  else if (input->SearchMode == UM_HEX_SIMPLE)
+  {
+    smpUMHEX_init();
+    memory_size += smpUMHEX_get_mem();
+  }
+  else if (input->SearchMode == EPZS)
+    memory_size += EPZSInit();
+
+
+  if (input->RCEnable)
+  {
+    generic_alloc( &generic_RC );
+    rc_alloc( &quadratic_RC );
+    // RDPictureDecision
+    if ( input->RDPictureDecision || input->MbInterlace == ADAPTIVE_CODING )
+    {
+      // INIT
+      generic_alloc( &generic_RC_init );
+      rc_alloc( &quadratic_RC_init );
+      // BEST
+      generic_alloc( &generic_RC_best );
+      rc_alloc( &quadratic_RC_best );
+    }
+  }
+
+  if(input->redundant_pic_flag)
+  {
+    memory_size += get_mem2Dpel(&imgY_tmp, img->height, input->img_width);
+    memory_size += get_mem2Dpel(&imgUV_tmp[0], input->img_height/2, input->img_width/2);
+    memory_size += get_mem2Dpel(&imgUV_tmp[1], input->img_height/2, input->img_width/2);
+  }
+
+  memory_size += get_mem2Dint (&imgY_sub_tmp, img->height_padded, img->width_padded);
+  img_padded_size_x = (img->width + 2 * IMG_PAD_SIZE);
+  img_cr_padded_size_x = (img->width_cr + 2 * img_pad_size_uv_x);
+
+  return (memory_size);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Free allocated memory of original picture buffers
+ ************************************************************************
+ */
+void free_orig_planes(void)
+{
+  free_mem2Dpel(imgY_org_frm);      // free ref frame buffers
+
+  if (img->yuv_format != YUV400)
+    free_mem3Dpel(imgUV_org_frm, 2);
+
+
+  if(!active_sps->frame_mbs_only_flag)
+  {
+    free_top_bot_planes(imgY_org_top, imgY_org_bot);
+
+    if (img->yuv_format != YUV400)
+    {
+      free_top_bot_planes(imgUV_org_top[0], imgUV_org_bot[0]);
+      free_top_bot_planes(imgUV_org_top[1], imgUV_org_bot[1]);
+      free (imgUV_org_top);
+      free (imgUV_org_bot);
+    }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Free allocated memory of frame size related global buffers
+ *    buffers are defined in global.h, allocated memory is allocated in
+ *    int get_mem4global_buffers()
+ * \par Input:
+ *    Input Parameters struct inp_par *inp,                             \n
+ *    Image Parameters struct img_par *img
+ * \par Output:
+ *    none
+ ************************************************************************
+ */
+void free_global_buffers(void)
+{
+  int  i,j;
+
+#ifdef _ADAPT_LAST_GROUP_
+  extern int *last_P_no_frm;
+  extern int *last_P_no_fld;
+  free (last_P_no_frm);
+  free (last_P_no_fld);
+#endif
+
+  free_orig_planes();
+  // free lookup memory which helps avoid divides with PicWidthInMbs
+  free_mem2Dint(PicPos);
+  // Free Qmatrices and offsets
+  free_QMatrix();
+  free_QOffsets();
+
+  if (input->WeightedPrediction || input->WeightedBiprediction || input->GenerateMultiplePPS)
+  {
+    free_mem3Dint(wp_weight,6);
+    free_mem3Dint(wp_offset,6);
+    free_mem4Dint(wbp_weight,6,MAX_REFERENCE_PICTURES);
+  }
+
+  if(stats->successive_Bframe!=0 || input->BRefPictures> 0)
+  {
+    free_mem3D((byte ***)direct_ref_idx,2);
+    free_mem2D((byte **) direct_pdir);
+  } // end if B frame
+
+  if (imgY_sub_tmp) // free temp quarter pel frame buffers
+  {
+    free_mem2Dint (imgY_sub_tmp);
+    imgY_sub_tmp=NULL;
+  }
+
+  // free mem, allocated in init_img()
+  // free intra pred mode buffer for blocks
+  free_mem2D((byte**)img->ipredmode);
+  free_mem2D((byte**)img->ipredmode8x8);
+  free(img->mb_data);
+
+  free_mem2D((byte**)rddata_top_frame_mb.ipredmode);
+
+  if(input->UseConstrainedIntraPred)
+  {
+    free (img->intra_block);
+  }
+
+  if (input->CtxAdptLagrangeMult == 1)
+  {
+    free(mb16x16_cost_frame);
+  }
+
+  if (input->rdopt==3)
+  {
+    free(decs->resY[0]);
+    free(decs->resY);
+    free(decs->RefBlock[0]);
+    free(decs->RefBlock);
+    for (j=0; j<input->NoOfDecoders; j++)
+    {
+      free(decs->decY[j][0]);
+      free(decs->decY[j]);
+      free(decs->decY_best[j][0]);
+      free(decs->decY_best[j]);
+      for (i=0; i<img->max_num_references+1; i++)
+      {
+        free(decs->decref[j][i][0]);
+        free(decs->decref[j][i]);
+      }
+      free(decs->decref[j]);
+    }
+    free(decs->decY);
+    free(decs->decY_best);
+    free(decs->decref);
+    free(decs->status_map[0]);
+    free(decs->status_map);
+    free(decs->dec_mb_mode[0]);
+    free(decs->dec_mb_mode);
+  }
+  if (input->RestrictRef)
+  {
+    free(pixel_map[0]);
+    free(pixel_map);
+    free(refresh_map[0]);
+    free(refresh_map);
+  }
+
+  if(!active_sps->frame_mbs_only_flag)
+  {
+    free_mem2Dpel(imgY_com);
+    if (img->yuv_format != YUV400)
+    {
+      free_mem3Dpel(imgUV_com,2);
+    }
+  }
+
+  free_mem3Dint(img->nz_coeff, img->FrameSizeInMbs);
+
+  free_mem2Ddb_offset (img->lambda_md, img->bitdepth_luma_qp_scale);
+  free_mem3Ddb_offset (img->lambda_me, 10, 52 + img->bitdepth_luma_qp_scale, img->bitdepth_luma_qp_scale);
+  free_mem3Dint_offset(img->lambda_mf, 10, 52 + img->bitdepth_luma_qp_scale, img->bitdepth_luma_qp_scale);
+
+  if (input->CtxAdptLagrangeMult == 1)
+  {
+    free_mem2Ddb_offset(img->lambda_mf_factor,img->bitdepth_luma_qp_scale);
+  }
+
+  if(input->SearchMode == UM_HEX)
+  {
+    UMHEX_free_mem();
+  }
+  else if (input->SearchMode == UM_HEX_SIMPLE)
+  {
+    smpUMHEX_free_mem();
+  }
+  else if (input->SearchMode == EPZS)
+  {
+    EPZSDelete();
+  }
+
+
+  if (input->RCEnable)
+  {
+    generic_free( &generic_RC );
+    rc_free( &quadratic_RC );
+    // RDPictureDecision
+    if ( input->RDPictureDecision || input->MbInterlace == ADAPTIVE_CODING )
+    {
+      // INIT
+      generic_free( &generic_RC_init );
+      rc_free( &quadratic_RC_init );
+      // BEST
+      generic_free( &generic_RC_best );
+      rc_free( &quadratic_RC_best );
+    }
+  }
+
+  if(input->redundant_pic_flag)
+  {
+    free_mem2Dpel(imgY_tmp);
+    free_mem2Dpel(imgUV_tmp[0]);
+    free_mem2Dpel(imgUV_tmp[1]);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate memory for mv
+ * \par Input:
+ *    Image Parameters struct img_par *img                             \n
+ *    int****** mv
+ * \return memory size in bytes
+ ************************************************************************
+ */
+int get_mem_mv (short ******* mv)
+{
+  int i, j, k, l, m;
+
+  if ((*mv = (short******)calloc(4,sizeof(short*****))) == NULL)
+    no_mem_exit ("get_mem_mv: mv");
+  for (i=0; i<4; i++)
+  {
+    if (((*mv)[i] = (short*****)calloc(4,sizeof(short****))) == NULL)
+      no_mem_exit ("get_mem_mv: mv");
+    for (j=0; j<4; j++)
+    {
+      if (((*mv)[i][j] = (short****)calloc(2,sizeof(short***))) == NULL)
+        no_mem_exit ("get_mem_mv: mv");
+      for (k=0; k<2; k++)
+      {
+        if (((*mv)[i][j][k] = (short***)calloc(img->max_num_references,sizeof(short**))) == NULL)
+          no_mem_exit ("get_mem_mv: mv");
+        for (l=0; l<img->max_num_references; l++)
+        {
+          if (((*mv)[i][j][k][l] = (short**)calloc(9,sizeof(short*))) == NULL)
+            no_mem_exit ("get_mem_mv: mv");
+          if (((*mv)[i][j][k][l][0] = (short*)calloc(2*9,sizeof(short))) == NULL)
+            no_mem_exit ("get_mem_mv: mv");
+          for (m=1; m<9; m++)
+            (*mv)[i][j][k][l][m] = (*mv)[i][j][k][l][m - 1] + 2;
+        }
+      }
+    }
+  }
+  return 4*4*img->max_num_references*9*2*sizeof(short);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Free memory from mv
+ * \par Input:
+ *    int****** mv
+ ************************************************************************
+ */
+void free_mem_mv (short****** mv)
+{
+  int i, j, k, l;
+
+  for (i=0; i<4; i++)
+  {
+    for (j=0; j<4; j++)
+    {
+      for (k=0; k<2; k++)
+      {
+        for (l=0; l<img->max_num_references; l++)
+        {
+          free (mv[i][j][k][l][0]);
+          free (mv[i][j][k][l]);
+        }
+        free (mv[i][j][k]);
+      }
+      free (mv[i][j]);
+    }
+    free (mv[i]);
+  }
+  free (mv);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate memory for AC coefficients
+ ************************************************************************
+ */
+int get_mem_ACcoeff (int***** cofAC)
+{
+  int i, j, k;
+  int num_blk8x8 = 4 + img->num_blk8x8_uv;
+
+  if ((*cofAC = (int****)calloc (num_blk8x8, sizeof(int***))) == NULL)              no_mem_exit ("get_mem_ACcoeff: cofAC");
+  for (k=0; k<num_blk8x8; k++)
+  {
+    if (((*cofAC)[k] = (int***)calloc (4, sizeof(int**))) == NULL)         no_mem_exit ("get_mem_ACcoeff: cofAC");
+    for (j=0; j<4; j++)
+    {
+      if (((*cofAC)[k][j] = (int**)calloc (2, sizeof(int*))) == NULL)      no_mem_exit ("get_mem_ACcoeff: cofAC");
+      for (i=0; i<2; i++)
+      {
+        if (((*cofAC)[k][j][i] = (int*)calloc (65, sizeof(int))) == NULL)  no_mem_exit ("get_mem_ACcoeff: cofAC"); // 18->65 for ABT
+      }
+    }
+  }
+  return num_blk8x8*4*2*65*sizeof(int);// 18->65 for ABT
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate memory for DC coefficients
+ ************************************************************************
+ */
+int get_mem_DCcoeff (int**** cofDC)
+{
+  int j, k;
+
+  if ((*cofDC = (int***)calloc (3, sizeof(int**))) == NULL)           no_mem_exit ("get_mem_DCcoeff: cofDC");
+  for (k=0; k<3; k++)
+  {
+    if (((*cofDC)[k] = (int**)calloc (2, sizeof(int*))) == NULL)      no_mem_exit ("get_mem_DCcoeff: cofDC");
+    for (j=0; j<2; j++)
+    {
+      if (((*cofDC)[k][j] = (int*)calloc (65, sizeof(int))) == NULL)  no_mem_exit ("get_mem_DCcoeff: cofDC"); // 18->65 for ABT
+    }
+  }
+  return 3*2*65*sizeof(int); // 18->65 for ABT
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Free memory of AC coefficients
+ ************************************************************************
+ */
+void free_mem_ACcoeff (int**** cofAC)
+{
+  int i, j, k;
+
+  for (k=0; k<4+img->num_blk8x8_uv; k++)
+  {
+    for (i=0; i<4; i++)
+    {
+      for (j=0; j<2; j++)
+      {
+        free (cofAC[k][i][j]);
+      }
+      free (cofAC[k][i]);
+    }
+    free (cofAC[k]);
+  }
+  free (cofAC);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Free memory of DC coefficients
+ ************************************************************************
+ */
+void free_mem_DCcoeff (int*** cofDC)
+{
+  int i, j;
+
+  for (j=0; j<3; j++)
+  {
+    for (i=0; i<2; i++)
+    {
+      free (cofDC[j][i]);
+    }
+    free (cofDC[j]);
+  }
+  free (cofDC);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    form frame picture from two field pictures
+ ************************************************************************
+ */
+void combine_field(void)
+{
+  int i;
+
+  for (i=0; i<img->height / 2; i++)
+  {
+    memcpy(imgY_com[i*2], enc_top_picture->imgY[i], img->width*sizeof(imgpel));     // top field
+    memcpy(imgY_com[i*2 + 1], enc_bottom_picture->imgY[i], img->width*sizeof(imgpel)); // bottom field
+  }
+
+  if (img->yuv_format != YUV400)
+  {
+    for (i=0; i<img->height_cr / 2; i++)
+    {
+      memcpy(imgUV_com[0][i*2],     enc_top_picture->imgUV[0][i],    img->width_cr*sizeof(imgpel));
+      memcpy(imgUV_com[0][i*2 + 1], enc_bottom_picture->imgUV[0][i], img->width_cr*sizeof(imgpel));
+      memcpy(imgUV_com[1][i*2],     enc_top_picture->imgUV[1][i],    img->width_cr*sizeof(imgpel));
+      memcpy(imgUV_com[1][i*2 + 1], enc_bottom_picture->imgUV[1][i], img->width_cr*sizeof(imgpel));
+    }
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    RD decision of frame and field coding
+ ************************************************************************
+ */
+int decide_fld_frame(float snr_frame_Y, float snr_field_Y, int bit_field, int bit_frame, double lambda_picture)
+{
+  double cost_frame, cost_field;
+
+  cost_frame = bit_frame * lambda_picture + snr_frame_Y;
+  cost_field = bit_field * lambda_picture + snr_field_Y;
+
+  if (cost_field > cost_frame)
+    return (0);
+  else
+    return (1);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Do some initialization work for encoding the 2nd IGOP
+ ************************************************************************
+ */
+void process_2nd_IGOP(void)
+{
+  Boolean FirstIGOPFinished = FALSE;
+  if ( img->number == input->no_frames-1 )
+    FirstIGOPFinished = TRUE;
+  if (input->NumFrameIn2ndIGOP==0) return;
+  if (!FirstIGOPFinished || In2ndIGOP) return;
+  In2ndIGOP = TRUE;
+
+//  img->number = -1;
+  start_frame_no_in_this_IGOP = input->no_frames;
+  start_tr_in_this_IGOP = (input->no_frames-1)*(input->jumpd+1) +1;
+  input->no_frames = input->no_frames + input->NumFrameIn2ndIGOP;
+
+/*  reset_buffers();
+
+  frm->picbuf_short[0]->used=0;
+  frm->picbuf_short[0]->picID=-1;
+  frm->picbuf_short[0]->lt_picID=-1;
+  frm->short_used = 0; */
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Set the image type for I,P and SP pictures (not B!)
+ ************************************************************************
+ */
+void SetImgType(void)
+{
+  int intra_refresh = input->intra_period == 0 ? (IMG_NUMBER == 0) : ((IMG_NUMBER%input->intra_period) == 0);
+
+  if (intra_refresh)
+  {
+    img->type = I_SLICE;        // set image type for first image to I-frame
+  }
+  else
+  {
+    img->type = input->sp_periodicity && ((IMG_NUMBER % input->sp_periodicity) ==0) ? SP_SLICE : ((input->BRefPictures == 2) ? B_SLICE : P_SLICE);
+  }
+}
+
+
+void SetLevelIndices(void)
+{
+  switch(active_sps->level_idc)
+  {
+  case 9:
+    img->LevelIndex=1;
+    break;
+  case 10:
+    img->LevelIndex=0;
+    break;
+  case 11:
+    if ((active_sps->profile_idc < FREXT_HP)&&(active_sps->constrained_set3_flag == 0))
+      img->LevelIndex=2;
+    else
+      img->LevelIndex=1;
+    break;
+  case 12:
+    img->LevelIndex=3;
+    break;
+  case 13:
+    img->LevelIndex=4;
+    break;
+  case 20:
+    img->LevelIndex=5;
+    break;
+  case 21:
+    img->LevelIndex=6;
+    break;
+  case 22:
+    img->LevelIndex=7;
+    break;
+  case 30:
+    img->LevelIndex=8;
+    break;
+  case 31:
+    img->LevelIndex=9;
+    break;
+  case 32:
+    img->LevelIndex=10;
+    break;
+  case 40:
+    img->LevelIndex=11;
+    break;
+  case 41:
+    img->LevelIndex=12;
+    break;
+  case 42:
+    if (active_sps->profile_idc <= 88)
+      img->LevelIndex=13;
+    else
+      img->LevelIndex=14;
+    break;
+  case 50:
+    img->LevelIndex=15;
+    break;
+  case 51:
+    img->LevelIndex=16;
+    break;
+  default:
+    fprintf ( stderr, "Warning: unknown LevelIDC, using maximum level 5.1 \n" );
+    img->LevelIndex=16;
+    break;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    initialize key frames and corresponding redundant frames.
+ ************************************************************************
+ */
+void Init_redundant_frame()
+{
+  if(input->redundant_pic_flag)
+  {
+    if(input->successive_Bframe)
+    {
+      error("B frame not supported when redundant picture used!",100);
+    }
+
+    if(input->PicInterlace)
+    {
+      error("Interlace not supported when redundant picture used!",100);
+    }
+
+    if(input->num_ref_frames<input->PrimaryGOPLength)
+    {
+      error("NumberReferenceFrames must be no less than PrimaryGOPLength",100);
+    }
+
+    if((1<<input->NumRedundantHierarchy)>input->PrimaryGOPLength)
+    {
+      error("PrimaryGOPLength must be greater than 2^NumRedundantHeirarchy",100);
+    }
+
+    if(input->Verbose!=1)
+    {
+      error("Redundant slices not supported when Verbose!=1",100);
+    }
+  }
+
+  key_frame = 0;
+  redundant_coding = 0;
+  img->redundant_pic_cnt = 0;
+  frameNuminGOP = img->number % input->PrimaryGOPLength;
+  if(img->number == 0)
+  {
+    frameNuminGOP = -1;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    allocate redundant frames in a primary GOP.
+ ************************************************************************
+ */
+void Set_redundant_frame()
+{
+  int GOPlength = input->PrimaryGOPLength;
+
+  //start frame of GOP
+  if(frameNuminGOP == 0)
+  {
+    redundant_coding = 0;
+    key_frame = 1;
+    redundant_ref_idx = GOPlength;
+  }
+
+  //1/2 position
+  if(input->NumRedundantHierarchy>0)
+  {
+    if(frameNuminGOP == GOPlength/2)
+    {
+      redundant_coding = 0;
+      key_frame = 1;
+      redundant_ref_idx = GOPlength/2;
+    }
+  }
+
+  //1/4, 3/4 position
+  if(input->NumRedundantHierarchy>1)
+  {
+    if(frameNuminGOP == GOPlength/4 || frameNuminGOP == GOPlength*3/4)
+    {
+      redundant_coding = 0;
+      key_frame = 1;
+      redundant_ref_idx = GOPlength/4;
+    }
+  }
+
+  //1/8, 3/8, 5/8, 7/8 position
+  if(input->NumRedundantHierarchy>2)
+  {
+    if(frameNuminGOP == GOPlength/8 || frameNuminGOP == GOPlength*3/8
+      || frameNuminGOP == GOPlength*5/8 || frameNuminGOP == GOPlength*7/8)
+    {
+      redundant_coding = 0;
+      key_frame = 1;
+      redundant_ref_idx = GOPlength/8;
+    }
+  }
+
+  //1/16, 3/16, 5/16, 7/16, 9/16, 11/16, 13/16 position
+  if(input->NumRedundantHierarchy>3)
+  {
+    if(frameNuminGOP == GOPlength/16 || frameNuminGOP == GOPlength*3/16
+      || frameNuminGOP == GOPlength*5/16 || frameNuminGOP == GOPlength*7/16
+      || frameNuminGOP == GOPlength*9/16 || frameNuminGOP == GOPlength*11/16
+      || frameNuminGOP == GOPlength*13/16)
+    {
+      redundant_coding = 0;
+      key_frame = 1;
+      redundant_ref_idx = GOPlength/16;
+    }
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    encode on redundant frame.
+ ************************************************************************
+ */
+void encode_one_redundant_frame()
+{
+  key_frame = 0;
+  redundant_coding = 1;
+  img->redundant_pic_cnt = 1;
+
+  if(img->type == I_SLICE)
+  {
+    img->type = P_SLICE;
+  }
+
+  encode_one_frame();
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Setup Chroma MC Variables
+ ************************************************************************
+ */
+void chroma_mc_setup(void)
+{
+  // initialize global variables used for chroma interpolation and buffering
+  if ( img->yuv_format == YUV420 )
+  {
+    img_pad_size_uv_x = IMG_PAD_SIZE >> 1;
+    img_pad_size_uv_y = IMG_PAD_SIZE >> 1;
+    chroma_mask_mv_y = 7;
+    chroma_mask_mv_x = 7;
+    chroma_shift_x = 3;
+    chroma_shift_y = 3;
+  }
+  else if ( img->yuv_format == YUV422 )
+  {
+    img_pad_size_uv_x = IMG_PAD_SIZE >> 1;
+    img_pad_size_uv_y = IMG_PAD_SIZE;
+    chroma_mask_mv_y = 3;
+    chroma_mask_mv_x = 7;
+    chroma_shift_y = 2;
+    chroma_shift_x = 3;
+  }
+  else
+  { // YUV444
+    img_pad_size_uv_x = IMG_PAD_SIZE;
+    img_pad_size_uv_y = IMG_PAD_SIZE;
+    chroma_mask_mv_y = 3;
+    chroma_mask_mv_x = 3;
+    chroma_shift_y = 2;
+    chroma_shift_x = 2;
+  }
+  shift_cr_y = chroma_shift_y - 2;
+  shift_cr_x = chroma_shift_x - 1;
+
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/loopFilter.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/loopFilter.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/loopFilter.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,480 @@
+
+/*!
+ *************************************************************************************
+ * \file loopFilter.c
+ *
+ * \brief
+ *    Filter to reduce blocking artifacts on a macroblock level.
+ *    The filter strength is QP dependent.
+ *
+ * \author
+ *    Contributors:
+ *    - Peter List       Peter.List at t-systems.de:  Original code                                 (13-Aug-2001)
+ *    - Jani Lainema     Jani.Lainema at nokia.com:   Some bug fixing, removal of recusiveness      (16-Aug-2001)
+ *    - Peter List       Peter.List at t-systems.de:  inplace filtering and various simplifications (10-Jan-2002)
+ *    - Anthony Joch     anthony at ubvideo.com:      Simplified switching between filters and
+ *                                                 non-recursive default filter.                 (08-Jul-2002)
+ *    - Cristina Gomila  cristina.gomila at thomson.net: Simplification of the chroma deblocking
+ *                                                    from JVT-E089                              (21-Nov-2002)
+ *************************************************************************************
+ */
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "global.h"
+#include "image.h"
+#include "mb_access.h"
+
+byte mixedModeEdgeFlag, fieldModeFilteringFlag;
+
+/*********************************************************************************************************/
+
+// NOTE: In principle, the alpha and beta tables are calculated with the formulas below
+//       Alpha( qp ) = 0.8 * (2^(qp/6)  -  1)
+//       Beta ( qp ) = 0.5 * qp  -  7
+
+// The tables actually used have been "hand optimized" though (by Anthony Joch). So, the
+// table values might be a little different to formula-generated values. Also, the first
+// few values of both tables is set to zero to force the filter off at low qp’s
+
+static const byte ALPHA_TABLE[52]  = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,4,5,6,  7,8,9,10,12,13,15,17,  20,22,25,28,32,36,40,45,  50,56,63,71,80,90,101,113,  127,144,162,182,203,226,255,255} ;
+static const byte  BETA_TABLE[52]  = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,2,2,2,3,  3,3,3, 4, 4, 4, 6, 6,   7, 7, 8, 8, 9, 9,10,10,  11,11,12,12,13,13, 14, 14,   15, 15, 16, 16, 17, 17, 18, 18} ;
+static const byte CLIP_TAB[52][5]  =
+{
+  { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},
+  { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},
+  { 0, 0, 0, 0, 0},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 1, 1, 1, 1},
+  { 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 2, 3, 3},
+  { 0, 1, 2, 3, 3},{ 0, 2, 2, 3, 3},{ 0, 2, 2, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 3, 3, 5, 5},{ 0, 3, 4, 6, 6},{ 0, 3, 4, 6, 6},
+  { 0, 4, 5, 7, 7},{ 0, 4, 5, 8, 8},{ 0, 4, 6, 9, 9},{ 0, 5, 7,10,10},{ 0, 6, 8,11,11},{ 0, 6, 8,13,13},{ 0, 7,10,14,14},{ 0, 8,11,16,16},
+  { 0, 9,12,18,18},{ 0,10,13,20,20},{ 0,11,15,23,23},{ 0,13,17,25,25}
+} ;
+
+static const char chroma_edge[2][4][4] = //[dir][edge][yuv_format]
+{ { {-1, 0, 0, 0},
+    {-1,-1,-1, 1},
+    {-1, 1, 1, 2},
+    {-1,-1,-1, 3}},
+
+  { {-1, 0, 0, 0},
+    {-1,-1, 1, 1},
+    {-1, 1, 2, 2},
+    {-1,-1, 3, 3}}};
+
+void GetStrength(byte Strength[16],ImageParameters *img,int MbQAddr,int dir,int edge, int mvlimit);
+void EdgeLoop(imgpel** Img, byte Strength[16],ImageParameters *img, int MbQAddr, int AlphaC0Offset, int BetaOffset, int dir, int edge, int width, int yuv, int uv);
+void DeblockMb(ImageParameters *img, imgpel **imgY, imgpel ***imgUV, int MbQAddr) ;
+
+/*!
+ *****************************************************************************************
+ * \brief
+ *    Filter all macroblocks in order of increasing macroblock address.
+ *****************************************************************************************
+ */
+void DeblockFrame(ImageParameters *img, imgpel **imgY, imgpel ***imgUV)
+{
+  unsigned i;
+
+  for (i=0; i<img->PicSizeInMbs; i++)
+  {
+    if (img->mb_data[i].mb_type==IPCM)
+    {
+      img->mb_data[i].qp = 0;
+    }
+  }
+
+  for (i=0; i<img->PicSizeInMbs; i++)
+  {
+    DeblockMb( img, imgY, imgUV, i ) ;
+  }
+}
+
+
+/*!
+ *****************************************************************************************
+ * \brief
+ *    Deblocking filter for one macroblock.
+ *****************************************************************************************
+ */
+
+void DeblockMb(ImageParameters *img, imgpel **imgY, imgpel ***imgUV, int MbQAddr)
+{
+  int           EdgeCondition;
+  int           dir,edge;
+  byte          Strength[16];
+  int           mb_x, mb_y;
+
+  int           filterNon8x8LumaEdgesFlag[4] = {1,1,1,1};
+  int           filterLeftMbEdgeFlag;
+  int           filterTopMbEdgeFlag;
+  int           fieldModeMbFlag;
+  int           mvlimit=4;
+  int           i, StrengthSum;
+  Macroblock    *MbQ;
+
+  int           edge_cr;
+
+  img->DeblockCall = 1;
+  get_mb_pos (MbQAddr, &mb_x, &mb_y, IS_LUMA);
+
+  filterLeftMbEdgeFlag  = (mb_x != 0);
+  filterTopMbEdgeFlag   = (mb_y != 0);
+
+  MbQ  = &(img->mb_data[MbQAddr]) ; // current Mb
+
+  if (MbQ->mb_type == I8MB)
+    assert(MbQ->luma_transform_size_8x8_flag);
+
+  filterNon8x8LumaEdgesFlag[1] =
+  filterNon8x8LumaEdgesFlag[3] = !(MbQ->luma_transform_size_8x8_flag);
+
+  if (img->MbaffFrameFlag && mb_y==16 && MbQ->mb_field)
+    filterTopMbEdgeFlag = 0;
+
+  fieldModeMbFlag = (img->structure!=FRAME) || (img->MbaffFrameFlag && MbQ->mb_field);
+  if (fieldModeMbFlag)
+    mvlimit = 2;
+
+  // return, if filter is disabled
+  if (MbQ->LFDisableIdc==1) {
+    img->DeblockCall = 0;
+    return;
+  }
+
+  if (MbQ->LFDisableIdc==2)
+  {
+    // don't filter at slice boundaries
+    filterLeftMbEdgeFlag = MbQ->mbAvailA;
+    // if this the bottom of a frame macroblock pair then always filter the top edge
+    if (img->MbaffFrameFlag && !MbQ->mb_field && (MbQAddr & 0x01))
+      filterTopMbEdgeFlag  = 1;
+    else
+      filterTopMbEdgeFlag  = MbQ->mbAvailB;
+  }
+
+  img->current_mb_nr = MbQAddr;
+  CheckAvailabilityOfNeighbors();
+
+  for( dir=0 ; dir<2 ; dir++ )                                             // vertical edges, than horicontal edges
+  {
+    EdgeCondition = (dir && filterTopMbEdgeFlag) || (!dir && filterLeftMbEdgeFlag); // can not filter beyond picture boundaries
+    for( edge=0 ; edge<4 ; edge++ )                                            // first 4 vertical strips of 16 pel
+    {                                                                                         // then  4 horicontal
+      if( edge || EdgeCondition )
+      {
+        edge_cr = chroma_edge[dir][edge][img->yuv_format];
+
+        GetStrength(Strength,img,MbQAddr,dir,edge, mvlimit); // Strength for 4 blks in 1 stripe
+        StrengthSum = Strength[0];
+        for (i = 1; i < 16; i++)
+        {
+          if (StrengthSum) break;
+          StrengthSum += Strength[i];
+        }
+        if( StrengthSum )                      // only if one of the 16 Strength bytes is != 0
+        {
+          if (filterNon8x8LumaEdgesFlag[edge])
+            EdgeLoop( imgY, Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, edge, img->width, 0, 0) ;
+          if( (imgUV != NULL) && (edge_cr >= 0))
+          {
+            EdgeLoop( imgUV[0], Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, edge_cr, img->width_cr, 1 , 0) ;
+            EdgeLoop( imgUV[1], Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, edge_cr, img->width_cr, 1 , 1) ;
+          }
+        }
+
+        if (dir && !edge && !MbQ->mb_field && mixedModeEdgeFlag) {
+          // this is the extra horizontal edge between a frame macroblock pair and a field above it
+          img->DeblockCall = 2;
+          GetStrength(Strength,img,MbQAddr,dir,4, mvlimit); // Strength for 4 blks in 1 stripe
+          //if( *((int*)Strength) )                      // only if one of the 4 Strength bytes is != 0
+          {
+            if (filterNon8x8LumaEdgesFlag[edge])
+              EdgeLoop( imgY, Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, 4, img->width, 0, 0) ;
+            if( (imgUV != NULL) && (edge_cr >= 0))
+            {
+              EdgeLoop( imgUV[0], Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, 4, img->width_cr, 1, 0) ;
+              EdgeLoop( imgUV[1], Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, 4, img->width_cr, 1, 1) ;
+            }
+          }
+          img->DeblockCall = 1;
+        }
+      }
+    }//end edge
+  }//end loop dir
+  img->DeblockCall = 0;
+
+}
+
+  /*!
+ *********************************************************************************************
+ * \brief
+ *    returns a buffer of 16 Strength values for one stripe in a mb (for different Frame types)
+ *********************************************************************************************
+ */
+
+#define ANY_INTRA (MbP->mb_type==I4MB||MbP->mb_type==I16MB||MbP->mb_type==IPCM||MbQ->mb_type==I4MB||MbQ->mb_type==I16MB||MbQ->mb_type==IPCM)
+
+void GetStrength(byte Strength[16],ImageParameters *img,int MbQAddr,int dir,int edge, int mvlimit)
+{
+  int    blkP, blkQ, idx;
+  int    blk_x, blk_x2, blk_y, blk_y2 ;
+  short  ***list0_mv = enc_picture->mv[LIST_0];
+  short  ***list1_mv = enc_picture->mv[LIST_1];
+  char   **list0_refIdxArr = enc_picture->ref_idx[LIST_0];
+  char   **list1_refIdxArr = enc_picture->ref_idx[LIST_1];
+  int64  **list0_refPicIdArr = enc_picture->ref_pic_id[LIST_0];
+  int64  **list1_refPicIdArr = enc_picture->ref_pic_id[LIST_1];
+  int64    ref_p0,ref_p1,ref_q0,ref_q1;
+  int    xQ, xP, yQ, yP;
+  int    mb_x, mb_y;
+  Macroblock    *MbQ, *MbP;
+  PixelPos pixP;
+  int dir_m1 = (1 - dir);
+
+  MbQ = &(img->mb_data[MbQAddr]);
+
+  for( idx=0 ; idx<16 ; idx++ )
+  {
+    xQ = dir ? idx : edge << 2;
+    yQ = dir ? (edge < 4 ? edge << 2 : 1) : idx;
+    getNeighbour(MbQAddr, xQ - dir_m1, yQ - dir, IS_LUMA, &pixP);
+    xP = pixP.x;
+    yP = pixP.y;
+    MbP = &(img->mb_data[pixP.mb_addr]);
+    mixedModeEdgeFlag = MbQ->mb_field != MbP->mb_field;
+
+    blkQ = ((yQ>>2)<<2) + (xQ>>2);
+    blkP = ((yP>>2)<<2) + (xP>>2);
+
+    if ((img->type==SP_SLICE)||(img->type==SI_SLICE) )
+    {
+      Strength[idx] = (edge == 0 && (((!img->MbaffFrameFlag && (img->structure==FRAME)) ||
+      (img->MbaffFrameFlag && !MbP->mb_field && !MbQ->mb_field)) ||
+      ((img->MbaffFrameFlag || (img->structure != FRAME)) && !dir))) ? 4 : 3;
+    }
+    else
+    {
+      // Start with Strength=3. or Strength=4 for Mb-edge
+      Strength[idx] = (edge == 0 && (((!img->MbaffFrameFlag && (img->structure==FRAME)) ||
+        (img->MbaffFrameFlag && !MbP->mb_field && !MbQ->mb_field)) ||
+        ((img->MbaffFrameFlag || (img->structure!=FRAME)) && !dir))) ? 4 : 3;
+
+      if(  !(MbP->mb_type==I4MB || MbP->mb_type==I16MB || MbP->mb_type==I8MB || MbP->mb_type==IPCM)
+        && !(MbQ->mb_type==I4MB || MbQ->mb_type==I16MB || MbQ->mb_type==I8MB || MbQ->mb_type==IPCM) )
+      {
+        if( ((MbQ->cbp_blk &  ((int64)1 << blkQ )) != 0) || ((MbP->cbp_blk &  ((int64)1 << blkP)) != 0) )
+          Strength[idx] = 2 ;
+        else
+        {
+          // if no coefs, but vector difference >= 1 set Strength=1
+          // if this is a mixed mode edge then one set of reference pictures will be frame and the
+          // other will be field
+          if (mixedModeEdgeFlag)
+          {
+            (Strength[idx] = 1);
+          }
+          else
+          {
+            get_mb_block_pos (MbQAddr, &mb_x, &mb_y);
+            blk_y  = (mb_y<<2) + (blkQ >> 2) ;
+            blk_x  = (mb_x<<2) + (blkQ  & 3) ;
+            blk_y2 = pixP.pos_y >> 2;
+            blk_x2 = pixP.pos_x >> 2;
+            {
+              ref_p0 = list0_refIdxArr[blk_y] [blk_x] <0 ? INT64_MIN : list0_refPicIdArr[blk_y ][blk_x];
+              ref_q0 = list0_refIdxArr[blk_y2][blk_x2]<0 ? INT64_MIN : list0_refPicIdArr[blk_y2][blk_x2];
+              ref_p1 = list1_refIdxArr[blk_y ][blk_x] <0 ? INT64_MIN : list1_refPicIdArr[blk_y ][blk_x];
+              ref_q1 = list1_refIdxArr[blk_y2][blk_x2]<0 ? INT64_MIN : list1_refPicIdArr[blk_y2][blk_x2];
+              if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) ||
+                ((ref_p0==ref_q1) && (ref_p1==ref_q0)))
+              {
+                Strength[idx]=0;
+                // L0 and L1 reference pictures of p0 are different; q0 as well
+                if (ref_p0 != ref_p1)
+                {
+                  // compare MV for the same reference picture
+                  if (ref_p0==ref_q0)
+                  {
+                    Strength[idx] =
+                      ( (iabs( list0_mv[blk_y][blk_x][0] - list0_mv[blk_y2][blk_x2][0]) >= 4)
+                      | (iabs( list0_mv[blk_y][blk_x][1] - list0_mv[blk_y2][blk_x2][1]) >= mvlimit)
+                      | (iabs( list1_mv[blk_y][blk_x][0] - list1_mv[blk_y2][blk_x2][0]) >= 4)
+                      | (iabs( list1_mv[blk_y][blk_x][1] - list1_mv[blk_y2][blk_x2][1]) >= mvlimit));
+                  }
+                  else
+                  {
+                    Strength[idx] =
+                      ( (iabs( list0_mv[blk_y][blk_x][0] - list1_mv[blk_y2][blk_x2][0]) >= 4)
+                      | (iabs( list0_mv[blk_y][blk_x][1] - list1_mv[blk_y2][blk_x2][1]) >= mvlimit)
+                      | (iabs( list1_mv[blk_y][blk_x][0] - list0_mv[blk_y2][blk_x2][0]) >= 4)
+                      | (iabs( list1_mv[blk_y][blk_x][1] - list0_mv[blk_y2][blk_x2][1]) >= mvlimit));
+                  }
+                }
+                else
+                { // L0 and L1 reference pictures of p0 are the same; q0 as well
+
+                  Strength[idx] =
+                    ( (iabs( list0_mv[blk_y][blk_x][0] - list0_mv[blk_y2][blk_x2][0]) >= 4)
+                    | (iabs( list0_mv[blk_y][blk_x][1] - list0_mv[blk_y2][blk_x2][1]) >= mvlimit )
+                    | (iabs( list1_mv[blk_y][blk_x][0] - list1_mv[blk_y2][blk_x2][0]) >= 4)
+                    | (iabs( list1_mv[blk_y][blk_x][1] - list1_mv[blk_y2][blk_x2][1]) >= mvlimit))
+                    &&
+                    ( (iabs( list0_mv[blk_y][blk_x][0] - list1_mv[blk_y2][blk_x2][0]) >= 4)
+                    | (iabs( list0_mv[blk_y][blk_x][1] - list1_mv[blk_y2][blk_x2][1]) >= mvlimit)
+                    | (iabs( list1_mv[blk_y][blk_x][0] - list0_mv[blk_y2][blk_x2][0]) >= 4)
+                    | (iabs( list1_mv[blk_y][blk_x][1] - list0_mv[blk_y2][blk_x2][1]) >= mvlimit));
+                }
+              }
+              else
+              {
+                Strength[idx] = 1;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+/*!
+ *****************************************************************************************
+ * \brief
+ *    Filters one edge of 16 (luma) or 8 (chroma) pel
+ *****************************************************************************************
+ */
+void EdgeLoop(imgpel** Img, byte Strength[16],ImageParameters *img, int MbQAddr, int AlphaC0Offset, int BetaOffset,
+              int dir, int edge, int width, int yuv, int uv)
+{
+  int      pel, ap = 0, aq = 0, Strng ;
+  int      incP, incQ;
+  int      C0, c0, Delta, dif, AbsDelta ;
+  int      L2 = 0, L1, L0, R0, R1, R2 = 0, RL0, L3, R3 ;
+  int      Alpha = 0, Beta = 0 ;
+  const byte* ClipTab = NULL;
+  int      small_gap;
+  int      indexA, indexB;
+  int      PelNum;
+  int      StrengthIdx;
+  imgpel   *SrcPtrP, *SrcPtrQ;
+  int      QP;
+  int      xP, xQ, yP, yQ;
+  Macroblock *MbQ, *MbP;
+  PixelPos pixP, pixQ;
+  int      bitdepth_scale;
+  static int      pelnum_cr[2][4] =  {{0,8,16,16}, {0,8, 8,16}};  //[dir:0=vert, 1=hor.][yuv_format]
+
+  if (!yuv)
+    bitdepth_scale = 1<<(img->bitdepth_luma - 8);
+  else
+    bitdepth_scale = 1<<(img->bitdepth_chroma - 8);
+
+  PelNum = yuv ? pelnum_cr[dir][img->yuv_format] : 16 ;
+
+  for( pel=0 ; pel<PelNum ; pel++ )
+  {
+    xQ = dir ? pel : edge << 2;
+    yQ = dir ? (edge < 4 ? edge << 2 : 1) : pel;
+    getNeighbour(MbQAddr, xQ, yQ, yuv, &pixQ);
+    getNeighbour(MbQAddr, xQ - (1 - dir), yQ - dir, yuv, &pixP);
+    xP = pixP.x;
+    yP = pixP.y;
+    MbQ = &(img->mb_data[MbQAddr]);
+    MbP = &(img->mb_data[pixP.mb_addr]);
+    fieldModeFilteringFlag = MbQ->mb_field || MbP->mb_field;
+    StrengthIdx = (yuv&&(PelNum==8)) ? ((MbQ->mb_field && !MbP->mb_field) ? pel<<1 :((pel>>1)<<2)+(pel&0x01)) : pel;
+
+    if (pixP.available || (MbQ->LFDisableIdc== 0))
+    {
+      incQ = dir ? ((fieldModeFilteringFlag && !MbQ->mb_field) ? 2 * width : width) : 1;
+      incP = dir ? ((fieldModeFilteringFlag && !MbP->mb_field) ? 2 * width : width) : 1;
+      SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]);
+      SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]);
+
+      // Average QP of the two blocks
+      QP = yuv ? (MbP->qpc[uv] + MbQ->qpc[uv] + 1) >> 1 : (MbP->qp + MbQ->qp + 1) >> 1;
+
+      indexA = iClip3(0, MAX_QP, QP + AlphaC0Offset);
+      indexB = iClip3(0, MAX_QP, QP + BetaOffset);
+
+      Alpha  =ALPHA_TABLE[indexA] * bitdepth_scale;
+      Beta   =BETA_TABLE[indexB]  * bitdepth_scale;
+      ClipTab=CLIP_TAB[indexA];
+
+      L0  = SrcPtrP[0] ;
+      R0  = SrcPtrQ[0] ;
+      L1  = SrcPtrP[-incP] ;
+      R1  = SrcPtrQ[ incQ] ;
+      L2  = SrcPtrP[-incP*2] ;
+      R2  = SrcPtrQ[ incQ*2] ;
+      L3  = SrcPtrP[-incP*3] ;
+      R3  = SrcPtrQ[ incQ*3] ;
+
+      if( (Strng = Strength[StrengthIdx]) )
+      {
+        AbsDelta  = iabs( Delta = R0 - L0 )  ;
+
+        if( AbsDelta < Alpha )
+        {
+          C0  = ClipTab[ Strng ] * bitdepth_scale;
+          if( ((iabs( R0 - R1) - Beta )  & (iabs(L0 - L1) - Beta )) < 0  )
+          {
+            if( !yuv)
+            {
+              aq  = (iabs( R0 - R2) - Beta ) < 0  ;
+              ap  = (iabs( L0 - L2) - Beta ) < 0  ;
+            }
+
+            RL0             = L0 + R0 ;
+
+            if(Strng == 4 )    // INTRA strong filtering
+            {
+              if( yuv)  // Chroma
+              {
+                SrcPtrQ[0] = (imgpel)(((R1 << 1) + R0 + L1 + 2) >> 2);
+                SrcPtrP[0] = (imgpel)(((L1 << 1) + L0 + R1 + 2) >> 2);
+              }
+              else  // Luma
+              {
+                small_gap = (AbsDelta < ((Alpha >> 2) + 2));
+
+                aq &= small_gap;
+                ap &= small_gap;
+
+                SrcPtrQ[0]   = (imgpel) (aq ? ( L1 + ((R1 + RL0) << 1) +  R2 + 4) >> 3 : ((R1 << 1) + R0 + L1 + 2) >> 2);
+                SrcPtrP[0]   = (imgpel) (ap ? ( R1 + ((L1 + RL0) << 1) +  L2 + 4) >> 3 : ((L1 << 1) + L0 + R1 + 2) >> 2);
+
+                SrcPtrQ[ incQ] =   (imgpel) (aq  ? ( R2 + R0 + R1 + L0 + 2) >> 2 : R1);
+                SrcPtrP[-incP] =   (imgpel) (ap  ? ( L2 + L1 + L0 + R0 + 2) >> 2 : L1);
+
+                SrcPtrQ[ incQ*2] = (imgpel) (aq ? (((R3 + R2) <<1) + R2 + R1 + RL0 + 4) >> 3 : R2);
+                SrcPtrP[-incP*2] = (imgpel) (ap ? (((L3 + L2) <<1) + L2 + L1 + RL0 + 4) >> 3 : L2);
+              }
+            }
+            else                                                     // normal filtering
+            {
+              c0  = yuv? (C0+1):(C0 + ap + aq) ;
+              dif = iClip3( -c0, c0, ( (Delta << 2) + (L1 - R1) + 4) >> 3 ) ;
+              if(!yuv)
+              {
+                SrcPtrP[0]  = iClip3(0, img->max_imgpel_value, L0 + dif) ;
+                SrcPtrQ[0]  = iClip3(0, img->max_imgpel_value, R0 - dif) ;
+                if( ap )
+                  SrcPtrP[-incP] += iClip3( -C0,  C0, ( L2 + ((RL0 + 1) >> 1) - (L1<<1)) >> 1 ) ;
+                if( aq  )
+                  SrcPtrQ[ incQ] += iClip3( -C0,  C0, ( R2 + ((RL0 + 1) >> 1) - (R1<<1)) >> 1 ) ;
+              }
+              else
+              {
+                SrcPtrP[0]  = iClip3(0, img->max_imgpel_value_uv, L0 + dif) ;
+                SrcPtrQ[0]  = iClip3(0, img->max_imgpel_value_uv, R0 - dif) ;
+              }
+            } ;
+          } ;
+        } ;
+      } ;
+    } ;
+  }
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/macroblock.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/macroblock.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/macroblock.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,4262 @@
+
+/*!
+ *************************************************************************************
+ * \file macroblock.c
+ *
+ * \brief
+ *    Process one macroblock
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+ *    - Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+ *    - Jani Lainema                    <jani.lainema at nokia.com>
+ *    - Sebastian Purreiter             <sebastian.purreiter at mch.siemens.de>
+ *    - Detlev Marpe                    <marpe at hhi.de>
+ *    - Thomas Wedi                     <wedi at tnt.uni-hannover.de>
+ *    - Ragip Kurceren                  <ragip.kurceren at nokia.com>
+ *    - Alexis Michael Tourapis         <alexismt at ieee.org>
+ *************************************************************************************
+ */
+
+#include "contributors.h"
+
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+#include <memory.h>
+
+#include "global.h"
+
+#include "elements.h"
+#include "macroblock.h"
+#include "refbuf.h"
+#include "fmo.h"
+#include "vlc.h"
+#include "image.h"
+#include "mb_access.h"
+#include "ratectl.h"              // header file for rate control
+#include "rc_quadratic.h"
+#include "cabac.h"
+#include "transform8x8.h"
+#include "me_fullsearch.h"
+#include "symbol.h"
+
+#if TRACE
+#define TRACE_SE(trace,str)  snprintf(trace,TRACESTRING_SIZE,str)
+#else
+#define TRACE_SE(trace,str)
+#endif
+
+extern const byte QP_SCALE_CR[52] ;
+
+//Rate control
+int        predict_error,dq;
+extern int delta_qp_mbaff[2][2],delta_qp_mbaff[2][2];
+extern int qp_mbaff[2][2],qp_mbaff[2][2];
+
+// function pointer for different ways of obtaining chroma interpolation
+static void (*OneComponentChromaPrediction4x4) (imgpel* , int , int , short****** , int , short , int , int );
+static void OneComponentChromaPrediction4x4_regenerate (imgpel* , int , int , short****** , int , short , int , int );
+static void OneComponentChromaPrediction4x4_retrieve (imgpel* , int , int , short****** , int , short , int , int );
+
+static int  slice_too_big(int rlc_bits);
+
+static int  writeChromaIntraPredMode (void);
+static int  writeMotionInfo2NAL      (void);
+static int  writeChromaCoeff         (void);
+static int  writeCBPandLumaCoeff     (void);
+
+extern int *mvbits;
+
+extern int QP2QUANT[40];
+extern int ver_offset[4][8][4];
+extern int hor_offset[4][8][4];
+
+static int diff  [16];
+static int diff64[64];
+static const unsigned char subblk_offset_x[3][8][4] =
+{
+  { {0, 4, 0, 4},
+    {0, 4, 0, 4},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0}, },
+
+  { {0, 4, 0, 4},
+    {0, 4, 0, 4},
+    {0, 4, 0, 4},
+    {0, 4, 0, 4},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0}, },
+
+  { {0, 4, 0, 4},
+    {8,12, 8,12},
+    {0, 4, 0, 4},
+    {8,12, 8,12},
+    {0, 4, 0, 4},
+    {8,12, 8,12},
+    {0, 4, 0, 4},
+    {8,12, 8,12}  }
+};
+
+static const unsigned char subblk_offset_y[3][8][4] =
+{
+  { {0, 0, 4, 4},
+    {0, 0, 4, 4},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0}, },
+
+  { {0, 0, 4, 4},
+    {8, 8,12,12},
+    {0, 0, 4, 4},
+    {8, 8,12,12},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0},
+    {0, 0, 0, 0} },
+
+ { {0, 0, 4, 4},
+   {0, 0, 4, 4},
+   {8, 8,12,12},
+   {8, 8,12,12},
+   {0, 0, 4, 4},
+   {0, 0, 4, 4},
+   {8, 8,12,12},
+   {8, 8,12,12} }
+};
+
+
+ /*!
+ ************************************************************************
+ * \brief
+ *    updates the coordinates for the next macroblock to be processed
+ *
+ * \param mb_addr
+ *    macroblock address in scan order
+ ************************************************************************
+ */
+void set_MB_parameters (int mb_addr)
+{
+  img->current_mb_nr = mb_addr;
+
+  get_mb_block_pos(mb_addr, &img->mb_x, &img->mb_y);
+
+  img->block_x = img->mb_x << 2;
+  img->block_y = img->mb_y << 2;
+
+  img->pix_x   = img->block_x << 2;
+  img->pix_y   = img->block_y << 2;
+
+  img->opix_x   = img->pix_x;
+
+  if (img->MbaffFrameFlag)
+  {
+    if (img->mb_data[mb_addr].mb_field)
+    {
+      imgY_org  = (mb_addr % 2) ? imgY_org_bot  : imgY_org_top;
+      imgUV_org = (mb_addr % 2) ? imgUV_org_bot : imgUV_org_top;
+      img->opix_y   = (img->mb_y >> 1 ) << 4;
+      img->mb_data[mb_addr].list_offset = (mb_addr % 2) ? 4 : 2;
+    }
+    else
+    {
+      imgY_org  = imgY_org_frm;
+      imgUV_org = imgUV_org_frm;
+      img->opix_y   = img->block_y << 2;
+      img->mb_data[mb_addr].list_offset = 0;
+    }
+  }
+  else
+  {
+    img->opix_y   = img->block_y << 2;
+    img->mb_data[mb_addr].list_offset = 0;
+  }
+
+  if (img->yuv_format != YUV400)
+  {
+    img->pix_c_x = (img->mb_cr_size_x * img->pix_x) >> 4;
+    img->pix_c_y = (img->mb_cr_size_y * img->pix_y) >> 4;
+
+    img->opix_c_x = (img->mb_cr_size_x * img->opix_x) >> 4;
+    img->opix_c_y = (img->mb_cr_size_y * img->opix_y) >> 4;
+  }
+  //  printf ("set_MB_parameters: mb %d,  mb_x %d,  mb_y %d\n", mb_addr, img->mb_x, img->mb_y);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    updates the coordinates and statistics parameter for the
+ *    next macroblock
+ ************************************************************************
+ */
+void proceed2nextMacroblock(void)
+{
+#if TRACE
+  int use_bitstream_backing = (input->slice_mode == FIXED_RATE || input->slice_mode == CALLBACK);
+#endif
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+  int*        bitCount = currMB->bitcounter;
+  int i;
+
+  if (bitCount[BITS_TOTAL_MB] > img->max_bitCount)
+    printf("Warning!!! Number of bits (%d) of macroblock_layer() data seems to exceed defined limit (%d).\n", bitCount[BITS_TOTAL_MB],img->max_bitCount);
+
+  // Update the statistics
+  stats->bit_use_mb_type[img->type]      += bitCount[BITS_MB_MODE];
+  stats->bit_use_coeffY[img->type]       += bitCount[BITS_COEFF_Y_MB] ;
+  stats->tmp_bit_use_cbp[img->type]      += bitCount[BITS_CBP_MB];
+  stats->bit_use_coeffC[img->type]       += bitCount[BITS_COEFF_UV_MB];
+  stats->bit_use_delta_quant[img->type]  += bitCount[BITS_DELTA_QUANT_MB];
+
+  if (IS_INTRA(currMB))
+  {
+    ++stats->intra_chroma_mode[currMB->c_ipred_mode];
+
+    if ((currMB->cbp&15) != 0)
+    {
+      if (currMB->luma_transform_size_8x8_flag)
+        ++stats->mode_use_transform_8x8[img->type][currMB->mb_type];
+      else
+        ++stats->mode_use_transform_4x4[img->type][currMB->mb_type];
+    }
+  }
+
+   ++stats->mode_use[img->type][currMB->mb_type];
+   stats->bit_use_mode[img->type][currMB->mb_type]+= bitCount[BITS_INTER_MB];
+
+   if (img->type != I_SLICE)
+   {
+     if (currMB->mb_type == P8x8)
+     {
+       for(i=0;i<4;i++)
+       {
+         if (currMB->b8mode[i] > 0)
+           ++stats->mode_use[img->type][currMB->b8mode[i]];
+         else
+           ++stats->b8_mode_0_use[img->type][currMB->luma_transform_size_8x8_flag];
+
+         if (currMB->b8mode[i]==4)
+         {
+           if ((currMB->luma_transform_size_8x8_flag && (currMB->cbp&15) != 0) || input->Transform8x8Mode == 2)
+             ++stats->mode_use_transform_8x8[img->type][4];
+           else
+             ++stats->mode_use_transform_4x4[img->type][4];
+         }
+       }
+     }
+     else if (currMB->mb_type >= 0 && currMB->mb_type <=3 && ((currMB->cbp&15) != 0))
+     {
+       if (currMB->luma_transform_size_8x8_flag)
+         ++stats->mode_use_transform_8x8[img->type][currMB->mb_type];
+       else
+         ++stats->mode_use_transform_4x4[img->type][currMB->mb_type];
+     }
+   }
+
+  // Statistics
+  if ((img->type == P_SLICE)||(img->type==SP_SLICE) )
+  {
+    ++stats->quant0;
+    stats->quant1 += currMB->qp;      // to find average quant for inter frames
+  }
+}
+
+/*!
+************************************************************************
+* \brief
+*    updates chroma QP according to luma QP and bit depth
+************************************************************************
+*/
+void set_chroma_qp(Macroblock *currMB)
+{
+  int i;
+  for (i=0; i<2; i++)
+  {
+    currMB->qpc[i] = iClip3 ( -img->bitdepth_chroma_qp_scale, 51, currMB->qp + img->chroma_qp_offset[i] );
+    currMB->qpc[i] = currMB->qpc[i] < 0 ? currMB->qpc[i] : QP_SCALE_CR[currMB->qpc[i]];
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    initializes the current macroblock
+ *
+ * \param mb_addr
+ *    macroblock address in scan order
+ * \param mb_field
+ *    true for field macroblock coding
+ ************************************************************************
+ */
+void start_macroblock(int mb_addr, int mb_field)
+{
+  int i,j,l;
+  int use_bitstream_backing = (input->slice_mode == FIXED_RATE || input->slice_mode == CALLBACK);
+  Macroblock *currMB = &img->mb_data[mb_addr];
+  Slice *curr_slice = img->currentSlice;
+  DataPartition *dataPart;
+  Bitstream *currStream;
+  int prev_mb;
+
+  currMB->mb_field = mb_field;
+
+  enc_picture->mb_field[mb_addr] = mb_field;
+  currMB->is_field_mode = (img->field_picture || ( img->MbaffFrameFlag && currMB->mb_field));
+
+  set_MB_parameters (mb_addr);
+
+  prev_mb = FmoGetPreviousMBNr(img->current_mb_nr);
+
+  if(use_bitstream_backing)
+  {
+    if ((!input->MbInterlace)||((mb_addr&1)==0)) // KS: MB AFF -> store stream positions for
+                                               //       first macroblock only
+    {
+      // Keep the current state of the bitstreams
+      if(!img->cod_counter)
+      {
+        for (i=0; i<curr_slice->max_part_nr; i++)
+        {
+          dataPart = &(curr_slice->partArr[i]);
+          currStream = dataPart->bitstream;
+          currStream->stored_bits_to_go = currStream->bits_to_go;
+          currStream->stored_byte_pos   = currStream->byte_pos;
+          currStream->stored_byte_buf   = currStream->byte_buf;
+          stats->stored_bit_slice       = stats->bit_slice;
+
+          if (input->symbol_mode ==CABAC)
+          {
+            dataPart->ee_recode = dataPart->ee_cabac;
+          }
+        }
+      }
+    }
+  }
+
+  // Save the slice number of this macroblock. When the macroblock below
+  // is coded it will use this to decide if prediction for above is possible
+  currMB->slice_nr = img->current_slice_nr;
+
+  // Initialize delta qp change from last macroblock. Feature may be used for future rate control
+  // Rate control
+  currMB->qpsp       = img->qpsp;
+  if(input->RCEnable)
+  {
+    if (prev_mb > -1)
+    {
+      if ( input->MbInterlace == ADAPTIVE_CODING && !img->bot_MB && currMB->mb_field )
+      {
+        currMB->qp = img->qp = img->mb_data[prev_mb].qp;
+      }
+      currMB->prev_qp = img->mb_data[prev_mb].qp;
+      if (img->mb_data[prev_mb].slice_nr == img->current_slice_nr)
+      {
+        currMB->prev_delta_qp = img->mb_data[prev_mb].delta_qp;
+      }
+      else
+      {
+        currMB->prev_delta_qp = 0;
+      }
+    }
+    else
+    {
+      currMB->prev_qp = curr_slice->qp;
+      currMB->prev_delta_qp = 0;
+    }
+    // frame layer rate control
+    if(input->basicunit==img->FrameSizeInMbs)
+    {
+      currMB->delta_qp = 0;
+      currMB->qp       = img->qp;
+    }
+    // basic unit layer rate control
+    else
+    {
+      // each I or B frame has only one QP
+      if( ((img->type == I_SLICE || img->type == B_SLICE) && input->RCUpdateMode != RC_MODE_1 ) || (!IMG_NUMBER) )
+      {
+        currMB->delta_qp = 0;
+        currMB->qp       = img->qp;
+      }
+      else if( img->type == P_SLICE || input->RCUpdateMode == RC_MODE_1 )
+      {
+        if (!img->write_macroblock) //write macroblock
+        {
+          if (prev_mb < 0) //first macroblock (of slice)
+          {
+            // Initialize delta qp change from last macroblock. Feature may be used for future rate control
+            currMB->delta_qp = 0;
+            currMB->qp       = img->qp;
+            delta_qp_mbaff[currMB->mb_field][img->bot_MB] = currMB->delta_qp;
+            qp_mbaff      [currMB->mb_field][img->bot_MB] = currMB->qp;
+          }
+          else
+          {
+            if (!((input->MbInterlace) && img->bot_MB)) //top macroblock
+            {
+              if (img->mb_data[prev_mb].prev_cbp == 1)
+              {
+                currMB->delta_qp = 0;
+                currMB->qp       = img->qp;
+              }
+              else
+              {
+                currMB->qp = img->mb_data[prev_mb].prev_qp;
+                currMB->delta_qp = currMB->qp - img->mb_data[prev_mb].qp;
+                img->qp = currMB->qp;
+              }
+              delta_qp_mbaff[currMB->mb_field][img->bot_MB] = currMB->delta_qp;
+              qp_mbaff      [currMB->mb_field][img->bot_MB] = currMB->qp;
+            }
+            else //bottom macroblock
+            {
+              // Initialize delta qp change from last macroblock. Feature may be used for future rate control
+              currMB->delta_qp = 0;
+              currMB->qp       = img->qp;       // needed in loop filter (even if constant QP is used)
+            }
+          }
+        }
+        else
+        {
+          if (!img->bot_MB) //write top macroblock
+          {
+            if (img->write_mbaff_frame)
+            {
+              currMB->delta_qp = delta_qp_mbaff[0][img->bot_MB];
+              img->qp = currMB->qp =   qp_mbaff[0][img->bot_MB];
+              //set_chroma_qp(currMB);
+            }
+            else
+            {
+              if (prev_mb < 0) //first macroblock (of slice)
+              {
+                // Initialize delta qp change from last macroblock. Feature may be used for future rate control
+                currMB->delta_qp = 0;
+                currMB->qp       = img->qp;
+                delta_qp_mbaff[currMB->mb_field][img->bot_MB] = currMB->delta_qp;
+                qp_mbaff      [currMB->mb_field][img->bot_MB] = currMB->qp;
+              }
+              else
+              {
+                currMB->delta_qp = delta_qp_mbaff[1][img->bot_MB];
+                img->qp = currMB->qp =   qp_mbaff[1][img->bot_MB];
+                //set_chroma_qp(currMB);
+              }
+            }
+          }
+          else //write bottom macroblock
+          {
+            currMB->delta_qp = 0;
+            currMB->qp = img->qp;
+            set_chroma_qp(currMB);
+          }
+        }
+
+        // compute the quantization parameter for each basic unit of P frame
+        if (!img->write_macroblock)
+        {
+          if(!((input->MbInterlace) && img->bot_MB))
+          {
+            if((img->NumberofCodedMacroBlocks>0) && (img->NumberofCodedMacroBlocks%img->BasicUnit==0))
+            {
+              // frame coding
+              if(active_sps->frame_mbs_only_flag)
+              {
+                updateRCModel(quadratic_RC);
+                img->BasicUnitQP=updateQP(quadratic_RC, generic_RC->TopFieldFlag);
+              }
+              // picture adaptive field/frame coding
+              else if((input->PicInterlace!=FRAME_CODING)&&(!input->MbInterlace)&&(generic_RC->NoGranularFieldRC==0))
+              {
+                updateRCModel(quadratic_RC);
+                img->BasicUnitQP=updateQP(quadratic_RC, generic_RC->TopFieldFlag);
+              }
+              // mb adaptive f/f coding, field coding
+              else if((input->MbInterlace))
+              {
+                updateRCModel(quadratic_RC);
+                img->BasicUnitQP=updateQP(quadratic_RC, generic_RC->TopFieldFlag);
+              }
+            }
+
+            if(img->current_mb_nr==0)
+              img->BasicUnitQP=img->qp;
+
+            currMB->predict_qp = iClip3(currMB->qp - img->min_qp_delta, currMB->qp + img->max_qp_delta, img->BasicUnitQP);
+
+            dq = currMB->delta_qp + currMB->predict_qp - currMB->qp;
+            if(dq < -img->min_qp_delta)
+            {
+              dq = -img->min_qp_delta;
+              predict_error = dq-currMB->delta_qp;
+              img->qp = img->qp+predict_error;
+              currMB->delta_qp = -img->min_qp_delta;
+            }
+            else if(dq > img->max_qp_delta)
+            {
+              dq = img->max_qp_delta;
+              predict_error = dq - currMB->delta_qp;
+              img->qp = img->qp + predict_error;
+              currMB->delta_qp = img->max_qp_delta;
+            }
+            else
+            {
+              currMB->delta_qp = dq;
+              predict_error=currMB->predict_qp-currMB->qp;
+              img->qp = currMB->predict_qp;
+            }
+            currMB->qp =  img->qp;
+            if (input->MbInterlace)
+            {
+              delta_qp_mbaff[currMB->mb_field][img->bot_MB] = currMB->delta_qp;
+              qp_mbaff      [currMB->mb_field][img->bot_MB] = currMB->qp;
+            }
+            currMB->predict_error=predict_error;
+          }
+          else
+            currMB->prev_qp=img->qp;
+        }
+      }
+    }
+  }
+  else
+  {
+    Slice* currSlice = img->currentSlice;
+
+    if (prev_mb>-1)
+    {
+      currMB->prev_qp = img->mb_data[prev_mb].qp;
+      currMB->prev_delta_qp = (img->mb_data[prev_mb].slice_nr == img->current_slice_nr) ? img->mb_data[prev_mb].delta_qp : 0;
+    }
+    else
+    {
+      currMB->prev_qp = currSlice->qp;
+      currMB->prev_delta_qp = 0;
+    }
+
+    currMB->qp = currSlice->qp ;
+
+    currMB->delta_qp = currMB->qp - currMB->prev_qp;
+    delta_qp_mbaff[currMB->mb_field][img->bot_MB] = currMB->delta_qp;
+    qp_mbaff      [currMB->mb_field][img->bot_MB] = currMB->qp;
+  }
+  img->qp_scaled = img->qp + img->bitdepth_luma_qp_scale;
+
+  set_chroma_qp (currMB);
+
+  // loop filter parameter
+  if (active_pps->deblocking_filter_control_present_flag)
+  {
+    currMB->LFDisableIdc    = img->LFDisableIdc;
+    currMB->LFAlphaC0Offset = img->LFAlphaC0Offset;
+    currMB->LFBetaOffset    = img->LFBetaOffset;
+  }
+  else
+  {
+    currMB->LFDisableIdc    = 0;
+    currMB->LFAlphaC0Offset = 0;
+    currMB->LFBetaOffset    = 0;
+  }
+
+  // If MB is next to a slice boundary, mark neighboring blocks unavailable for prediction
+  CheckAvailabilityOfNeighbors();
+
+  if (input->symbol_mode == CABAC)
+    CheckAvailabilityOfNeighborsCABAC();
+
+  // Reset vectors and reference indices
+  for (l=0; l<2; l++)
+  {
+    for (j=img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+    {
+      memset(&enc_picture->ref_idx[l][j][img->block_x], -1, BLOCK_MULTIPLE * sizeof(char));
+      memset(enc_picture->mv [l][j][img->block_x], 0, 2 * BLOCK_MULTIPLE * sizeof(short));
+      for (i=img->block_x; i < img->block_x + BLOCK_MULTIPLE; i++)
+        enc_picture->ref_pic_id[l][j][i]= -1;
+    }
+  }
+
+  // Reset syntax element entries in MB struct
+  currMB->mb_type      = 0;
+  currMB->cbp_blk      = 0;
+  currMB->cbp          = 0;
+  currMB->cbp_bits     = 0;
+  currMB->c_ipred_mode = DC_PRED_8;
+
+  memset (currMB->mvd, 0, BLOCK_CONTEXT * sizeof(int));
+  memset (currMB->intra_pred_modes, DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char)); // changing this to char would allow us to use memset
+  memset (currMB->intra_pred_modes8x8, DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
+
+  //initialize the whole MB as INTRA coded
+  //Blocks are set to notINTRA in write_one_macroblock
+  if (input->UseConstrainedIntraPred)
+  {
+    img->intra_block[img->current_mb_nr] = 1;
+  }
+
+  // Initialize bitcounters for this macroblock
+  if(prev_mb < 0) // No slice header to account for
+  {
+    currMB->bitcounter[BITS_HEADER] = 0;
+  }
+  else if (currMB->slice_nr == img->mb_data[prev_mb].slice_nr) // current MB belongs to the
+  // same slice as the last MB
+  {
+    currMB->bitcounter[BITS_HEADER] = 0;
+  }
+
+  currMB->bitcounter[BITS_MB_MODE       ] = 0;
+  currMB->bitcounter[BITS_COEFF_Y_MB    ] = 0;
+  currMB->bitcounter[BITS_INTER_MB      ] = 0;
+  currMB->bitcounter[BITS_CBP_MB        ] = 0;
+  currMB->bitcounter[BITS_DELTA_QUANT_MB] = 0;
+  currMB->bitcounter[BITS_COEFF_UV_MB   ] = 0;
+
+  if(input->SearchMode == FAST_FULL_SEARCH)
+    ResetFastFullIntegerSearch ();
+
+  // disable writing of trace file
+#if TRACE
+  curr_slice->partArr[0].bitstream->trace_enabled = FALSE;
+  if (input->partition_mode)
+  {
+    curr_slice->partArr[1].bitstream->trace_enabled = FALSE;
+    curr_slice->partArr[2].bitstream->trace_enabled = FALSE;
+  }
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    terminates processing of the current macroblock depending
+ *    on the chosen slice mode
+ ************************************************************************
+ */
+void terminate_macroblock( Boolean *end_of_slice,      //!< returns true for last macroblock of a slice, otherwise false
+                           Boolean *recode_macroblock  //!< returns true if max. slice size is exceeded an macroblock must be recoded in next slice
+                           )
+{
+  int i;
+  Slice *currSlice = img->currentSlice;
+  Macroblock    *currMB    = &img->mb_data[img->current_mb_nr];
+  SyntaxElement se;
+  int *partMap = assignSE2partition[input->partition_mode];
+  DataPartition *dataPart;
+  Bitstream *currStream;
+  int rlc_bits=0;
+  int use_bitstream_backing = (input->slice_mode == FIXED_RATE || input->slice_mode == CALLBACK);
+  int new_slice;
+  static int skip = FALSE;
+
+
+  // if previous mb in the same slice group has different slice number as the current, it's the
+  // the start of new slice
+  new_slice=0;
+  if ( (img->current_mb_nr==0) || (FmoGetPreviousMBNr(img->current_mb_nr)<0) )
+    new_slice=1;
+  else if( img->mb_data[FmoGetPreviousMBNr(img->current_mb_nr)].slice_nr != img->current_slice_nr )
+    new_slice=1;
+
+  *recode_macroblock=FALSE;
+
+  switch(input->slice_mode)
+  {
+  case NO_SLICES:
+    currSlice->num_mb++;
+    *recode_macroblock = FALSE;
+    if ((currSlice->num_mb) == (int)img->PicSizeInMbs) // maximum number of MBs reached
+      *end_of_slice = TRUE;
+
+    // if it's end of current slice group, slice ends too
+    *end_of_slice = (Boolean) (*end_of_slice | (img->current_mb_nr == FmoGetLastCodedMBOfSliceGroup (FmoMB2SliceGroup (img->current_mb_nr))));
+
+    break;
+  case FIXED_MB:
+    // For slice mode one, check if a new slice boundary follows
+    currSlice->num_mb++;
+    *recode_macroblock = FALSE;
+    //! Check end-of-slice group condition first
+    *end_of_slice = (Boolean) (img->current_mb_nr == FmoGetLastCodedMBOfSliceGroup (FmoMB2SliceGroup (img->current_mb_nr)));
+    //! Now check maximum # of MBs in slice
+    *end_of_slice = (Boolean) (*end_of_slice | (currSlice->num_mb >= input->slice_argument));
+
+    break;
+
+    // For slice modes two and three, check if coding of this macroblock
+    // resulted in too many bits for this slice. If so, indicate slice
+    // boundary before this macroblock and code the macroblock again
+  case FIXED_RATE:
+    // in case of skip MBs check if there is a slice boundary
+    // only for UVLC (img->cod_counter is always 0 in case of CABAC)
+    if(img->cod_counter)
+    {
+      // write out the skip MBs to know how many bits we need for the RLC
+      se.value1 = img->cod_counter;
+      se.value2 = 0;
+      se.type = SE_MBTYPE;
+      dataPart = &(currSlice->partArr[partMap[se.type]]);
+
+      TRACE_SE (se.tracestring, "mb_skip_run");
+      writeSE_UVLC(&se, dataPart);
+      rlc_bits=se.len;
+
+      currStream = dataPart->bitstream;
+      // save the bitstream as it would be if we write the skip MBs
+      currStream->bits_to_go_skip  = currStream->bits_to_go;
+      currStream->byte_pos_skip    = currStream->byte_pos;
+      currStream->byte_buf_skip    = currStream->byte_buf;
+      // restore the bitstream
+      currStream->bits_to_go = currStream->stored_bits_to_go;
+      currStream->byte_pos = currStream->stored_byte_pos;
+      currStream->byte_buf = currStream->stored_byte_buf;
+      skip = TRUE;
+    }
+    //! Check if the last coded macroblock fits into the size of the slice
+    //! But only if this is not the first macroblock of this slice
+    if (!new_slice)
+    {
+      if(slice_too_big(rlc_bits))
+      {
+        *recode_macroblock = TRUE;
+        *end_of_slice = TRUE;
+      }
+      else if(!img->cod_counter)
+        skip = FALSE;
+    }
+    // maximum number of MBs
+
+    // check if current slice group is finished
+    if ((*recode_macroblock == FALSE) && (img->current_mb_nr == FmoGetLastCodedMBOfSliceGroup (FmoMB2SliceGroup (img->current_mb_nr))))
+    {
+      *end_of_slice = TRUE;
+      if(!img->cod_counter)
+        skip = FALSE;
+    }
+
+    //! (first MB OR first MB in a slice) AND bigger that maximum size of slice
+    if (new_slice && slice_too_big(rlc_bits))
+    {
+      *end_of_slice = TRUE;
+      if(!img->cod_counter)
+        skip = FALSE;
+    }
+    if (!*recode_macroblock)
+      currSlice->num_mb++;
+    break;
+
+  case  CALLBACK:
+    if (img->current_mb_nr > 0 && !new_slice)
+    {
+      if (currSlice->slice_too_big(rlc_bits))
+      {
+        *recode_macroblock = TRUE;
+        *end_of_slice = TRUE;
+      }
+    }
+
+    if ( (*recode_macroblock == FALSE) && (img->current_mb_nr == FmoGetLastCodedMBOfSliceGroup (FmoMB2SliceGroup (img->current_mb_nr))))
+      *end_of_slice = TRUE;
+    break;
+
+  default:
+    snprintf(errortext, ET_SIZE, "Slice Mode %d not supported", input->slice_mode);
+    error(errortext, 600);
+  }
+
+  if (*recode_macroblock == TRUE)
+  {
+    // Restore everything
+    for (i=0; i<currSlice->max_part_nr; i++)
+    {
+      dataPart = &(currSlice->partArr[i]);
+      currStream = dataPart->bitstream;
+      currStream->bits_to_go = currStream->stored_bits_to_go;
+      currStream->byte_pos  = currStream->stored_byte_pos;
+      currStream->byte_buf  = currStream->stored_byte_buf;
+      stats->bit_slice      = stats->stored_bit_slice;
+
+      if (input->symbol_mode == CABAC)
+      {
+        dataPart->ee_cabac = dataPart->ee_recode;
+      }
+    }
+  }
+
+  if (input->symbol_mode == UVLC)
+  {
+    // Skip MBs at the end of this slice
+    dataPart = &(currSlice->partArr[partMap[SE_MBTYPE]]);
+    if(*end_of_slice == TRUE  && skip == TRUE)
+    {
+      // only for Slice Mode 2 or 3
+      // If we still have to write the skip, let's do it!
+      if(img->cod_counter && *recode_macroblock == TRUE) // MB that did not fit in this slice
+      {
+        // If recoding is true and we have had skip,
+        // we have to reduce the counter in case of recoding
+        img->cod_counter--;
+        if(img->cod_counter)
+        {
+          se.value1 = img->cod_counter;
+          se.value2 = 0;
+          se.type = SE_MBTYPE;
+#if TRACE
+          snprintf(se.tracestring, TRACESTRING_SIZE, "Final MB runlength = %3d",img->cod_counter);
+#endif
+          writeSE_UVLC(&se, dataPart);
+          rlc_bits=se.len;
+          currMB->bitcounter[BITS_MB_MODE]+=rlc_bits;
+          img->cod_counter = 0;
+        }
+      }
+      else //! MB that did not fit in this slice anymore is not a Skip MB
+      {
+        currStream = dataPart->bitstream;
+        // update the bitstream
+        currStream->bits_to_go = currStream->bits_to_go_skip;
+        currStream->byte_pos  = currStream->byte_pos_skip;
+        currStream->byte_buf  = currStream->byte_buf_skip;
+
+        // update the statistics
+        img->cod_counter = 0;
+        skip = FALSE;
+      }
+    }
+
+    // Skip MBs at the end of this slice for Slice Mode 0 or 1
+    if(*end_of_slice == TRUE && img->cod_counter && !use_bitstream_backing)
+    {
+      se.value1 = img->cod_counter;
+      se.value2 = 0;
+      se.type = SE_MBTYPE;
+
+      TRACE_SE (se.tracestring, "mb_skip_run");
+      writeSE_UVLC(&se, dataPart);
+
+      rlc_bits=se.len;
+      currMB->bitcounter[BITS_MB_MODE]+=rlc_bits;
+      img->cod_counter = 0;
+    }
+  }
+}
+
+/*!
+ *****************************************************************************
+ *
+ * \brief
+ *    For Slice Mode 2: Checks if one partition of one slice exceeds the
+ *    allowed size
+ *
+ * \return
+ *    FALSE if all Partitions of this slice are smaller than the allowed size
+ *    TRUE is at least one Partition exceeds the limit
+ *
+ * \par Side effects
+ *    none
+ *
+ * \date
+ *    4 November 2001
+ *
+ * \author
+ *    Tobias Oelbaum      drehvial at gmx.net
+ *****************************************************************************/
+
+ int slice_too_big(int rlc_bits)
+ {
+   Slice *currSlice = img->currentSlice;
+   DataPartition *dataPart;
+   Bitstream *currStream;
+   EncodingEnvironmentPtr eep;
+   int i;
+   int size_in_bytes;
+
+   //! UVLC
+   if (input->symbol_mode == UVLC)
+   {
+     for (i=0; i<currSlice->max_part_nr; i++)
+     {
+       dataPart = &(currSlice->partArr[i]);
+       currStream = dataPart->bitstream;
+       size_in_bytes = currStream->byte_pos /*- currStream->tmp_byte_pos*/;
+
+       if (currStream->bits_to_go < 8)
+         size_in_bytes++;
+       if (currStream->bits_to_go < rlc_bits)
+         size_in_bytes++;
+       if(size_in_bytes > input->slice_argument)
+         return TRUE;
+     }
+   }
+
+   //! CABAC
+   if (input->symbol_mode ==CABAC)
+   {
+     for (i=0; i<currSlice->max_part_nr; i++)
+     {
+        dataPart= &(currSlice->partArr[i]);
+        eep = &(dataPart->ee_cabac);
+
+       if( arienco_bits_written(eep) > (input->slice_argument*8))
+          return TRUE;
+     }
+   }
+   return FALSE;
+ }
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Predict one component of a 4x4 Luma block
+ ************************************************************************
+ */
+void OneComponentLumaPrediction4x4 ( imgpel*   mpred,          //!< array of prediction values (row by row)
+                                     int    pic_pix_x,      //!< absolute horizontal coordinate of 4x4 block
+                                     int    pic_pix_y,      //!< absolute vertical   coordinate of 4x4 block
+                                     short* mv,             //!< motion vector
+                                     short  ref,            //!< reference frame
+                                     StorablePicture **list //!< reference picture list
+                                     )
+{
+  int     j;
+  imgpel *ref_line;
+
+  width_pad  = list[ref]->size_x_pad;
+  height_pad = list[ref]->size_y_pad;
+
+  ref_line = UMVLine4X (list[ref]->imgY_sub, pic_pix_y + mv[1], pic_pix_x + mv[0]);
+  
+  for (j = 0; j < BLOCK_SIZE; j++) 
+  {
+    memcpy(mpred, ref_line, BLOCK_SIZE * sizeof(imgpel));
+    ref_line += img_padded_size_x;
+    mpred += BLOCK_SIZE;
+  }  
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Predict one 4x4 Luma block
+ ************************************************************************
+ */
+void LumaPrediction4x4 ( int   block_x,    //!< relative horizontal block coordinate of 4x4 block
+                         int   block_y,    //!< relative vertical   block coordinate of 4x4 block
+                         int   p_dir,      //!< prediction direction (0=list0, 1=list1, 2=bipred)
+                         int   l0_mode,    //!< list0 prediction mode (1-7, 0=DIRECT if l1_mode=0)
+                         int   l1_mode,    //!< list1 prediction mode (1-7, 0=DIRECT if l0_mode=0)
+                         short l0_ref_idx, //!< reference frame for list0 prediction (-1: Intra4x4 pred. with l0_mode)
+                         short l1_ref_idx  //!< reference frame for list1 prediction 
+                         )
+{
+  static imgpel l0_pred[16];
+  static imgpel l1_pred[16];
+
+  int  i, j;
+  int  block_x4  = block_x+4;
+  int  block_y4  = block_y+4;
+  int  pic_opix_x = ((img->opix_x + block_x) << 2) + IMG_PAD_SIZE_TIMES4;
+  int  pic_opix_y = ((img->opix_y + block_y) << 2) + IMG_PAD_SIZE_TIMES4;
+  int  bx        = block_x >> 2;
+  int  by        = block_y >> 2;
+  imgpel* l0pred     = l0_pred;
+  imgpel* l1pred     = l1_pred;
+  Macroblock*    currMB     = &img->mb_data[img->current_mb_nr];
+
+  int  apply_weights = ( (active_pps->weighted_pred_flag && (img->type== P_SLICE || img->type == SP_SLICE)) ||
+                         (active_pps->weighted_bipred_idc && (img->type== B_SLICE)));
+  short**** mv_array = img->all_mv[by][bx];
+
+
+  if (currMB->bi_pred_me && l0_ref_idx == 0 && l1_ref_idx == 0 && p_dir == 2 && l0_mode==1 && l1_mode==1)
+  {
+    mv_array = currMB->bi_pred_me == 1? img->bipred_mv1[by][bx] : img->bipred_mv2[by][bx];
+  }
+
+  switch (p_dir)
+  {
+  case 0:
+    OneComponentLumaPrediction4x4 (l0_pred, pic_opix_x, pic_opix_y, mv_array[LIST_0][l0_ref_idx][l0_mode], l0_ref_idx, listX[0+currMB->list_offset]);   
+    break;
+  case 1:
+    OneComponentLumaPrediction4x4 (l1_pred, pic_opix_x, pic_opix_y, mv_array[LIST_1][l1_ref_idx][l1_mode], l1_ref_idx, listX[1+currMB->list_offset]);   
+    break;
+  case 2:
+    OneComponentLumaPrediction4x4 (l0_pred, pic_opix_x, pic_opix_y, mv_array[LIST_0][l0_ref_idx][l0_mode], l0_ref_idx, listX[0+currMB->list_offset]);   
+    OneComponentLumaPrediction4x4 (l1_pred, pic_opix_x, pic_opix_y, mv_array[LIST_1][l1_ref_idx][l1_mode], l1_ref_idx, listX[1+currMB->list_offset]);   
+    break;
+  default:
+    break;
+  }
+
+  if (apply_weights)
+  {
+
+    if (p_dir==2)
+    {
+      int wbp0 = wbp_weight[0][l0_ref_idx][l1_ref_idx][0];
+      int wbp1 = wbp_weight[1][l0_ref_idx][l1_ref_idx][0];
+      int offset = (wp_offset[0][l0_ref_idx][0] + wp_offset[1][l1_ref_idx][0] + 1)>>1;
+      int wp_round = 2*wp_luma_round;
+      int weight_denom = luma_log_weight_denom + 1;
+      for   (j=block_y; j<block_y4; j++)
+        for (i=block_x; i<block_x4; i++)  
+          img->mpr[j][i] = iClip1( img->max_imgpel_value, 
+          ((wbp0 * *l0pred++ + wbp1 * *l1pred++ + wp_round) >> (weight_denom)) + offset); 
+    }
+    else if (p_dir==0)
+    {
+      int wp = wp_weight[0][l0_ref_idx][0];
+      int offset = wp_offset[0][l0_ref_idx][0];
+      for   (j=block_y; j<block_y4; j++)
+        for (i=block_x; i<block_x4; i++)
+          img->mpr[j][i] = iClip1( img->max_imgpel_value, 
+          ((wp * *l0pred++  + wp_luma_round) >> luma_log_weight_denom) + offset);
+    }
+    else // p_dir==1
+    {
+      int wp = wp_weight[1][l1_ref_idx][0];
+      int offset = wp_offset[1][l1_ref_idx][0];
+      for   (j=block_y; j<block_y4; j++)
+        for (i=block_x; i<block_x4; i++)
+          img->mpr[j][i] = iClip1( img->max_imgpel_value, 
+          ((wp * *l1pred++  + wp_luma_round) >> luma_log_weight_denom) + offset );
+    }
+  }
+  else
+  {
+    if (p_dir==2)
+    {
+      for   (j=block_y; j<block_y4; j++)
+        for (i=block_x; i<block_x4; i++)
+          img->mpr[j][i] = (*l0pred++ + *l1pred++ + 1) >> 1;
+    }
+    else if (p_dir==0)
+    {
+      for (j=block_y; j<block_y4; j++)
+      {
+        memcpy(&(img->mpr[j][block_x]), l0pred, BLOCK_SIZE * sizeof(imgpel));
+        l0pred += BLOCK_SIZE;
+      }
+    }
+    else // p_dir==1
+    {
+      for   (j=block_y; j<block_y4; j++)
+      {
+        memcpy(&(img->mpr[j][block_x]), l1pred, BLOCK_SIZE * sizeof(imgpel));
+        l1pred += BLOCK_SIZE;
+      }
+    }
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Predict one 4x4 Luma block
+ ************************************************************************
+ */
+void LumaPrediction4x4Bi ( int   block_x,    //!< relative horizontal block coordinate of 4x4 block
+                           int   block_y,    //!< relative vertical   block coordinate of 4x4 block
+                           int   l0_mode,    //!< list0 prediction mode (1-7, 0=DIRECT if l1_mode=0)
+                           int   l1_mode,    //!< list1 prediction mode (1-7, 0=DIRECT if l0_mode=0)
+                           short l0_ref_idx, //!< reference frame for list0 prediction (-1: Intra4x4 pred. with l0_mode)
+                           short l1_ref_idx, //!< reference frame for list1 prediction 
+                           int   list        //!< current list for prediction.
+                           )
+{
+  static imgpel l0_pred[16];
+  static imgpel l1_pred[16];
+
+  int  i, j;
+  int  block_x4  = block_x+4;
+  int  block_y4  = block_y+4;
+  int  pic_opix_x = ((img->opix_x + block_x) << 2) + IMG_PAD_SIZE_TIMES4;
+  int  pic_opix_y = ((img->opix_y + block_y) << 2) + IMG_PAD_SIZE_TIMES4;
+  int  bx        = block_x >> 2;
+  int  by        = block_y >> 2;
+  imgpel* l0pred     = l0_pred;
+  imgpel* l1pred     = l1_pred;
+  Macroblock*    currMB     = &img->mb_data[img->current_mb_nr];
+   
+  int  apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+                         (active_pps->weighted_bipred_idc && (img->type == B_SLICE)));  
+  short   ****mv_array = list ? img->bipred_mv1[by][bx] : img->bipred_mv2[by][bx];
+
+  OneComponentLumaPrediction4x4 (l0_pred, pic_opix_x, pic_opix_y, mv_array[LIST_0][l0_ref_idx][l0_mode], l0_ref_idx, listX[0+currMB->list_offset]);
+  OneComponentLumaPrediction4x4 (l1_pred, pic_opix_x, pic_opix_y, mv_array[LIST_1][l1_ref_idx][l1_mode], l1_ref_idx, listX[1+currMB->list_offset]);
+
+  if (apply_weights)
+  {
+    int wbp0 = wbp_weight[0][l0_ref_idx][l1_ref_idx][0];
+    int wbp1 = wbp_weight[1][l0_ref_idx][l1_ref_idx][0];
+    int offset = (wp_offset[0][l0_ref_idx][0] + wp_offset[1][l1_ref_idx][0] + 1)>>1;
+    for   (j=block_y; j<block_y4; j++)
+      for (i=block_x; i<block_x4; i++)
+        img->mpr[j][i] = iClip1( img->max_imgpel_value,
+        ((wbp0 * *l0pred++ + wbp1 * *l1pred++ + 2*wp_luma_round) >> (luma_log_weight_denom + 1)) + offset);
+  }
+  else
+  {
+    for   (j=block_y; j<block_y4; j++)
+      for (i=block_x; i<block_x4; i++)
+        img->mpr[j][i] = (*l0pred++ + *l1pred++ + 1) >> 1;
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Residual Coding of an 8x8 Luma block (not for intra)
+ *
+ * \return
+ *    coefficient cost
+ ************************************************************************
+ */
+int LumaResidualCoding8x8 ( int   *cbp,        //!< Output: cbp (updated according to processed 8x8 luminance block)
+                            int64 *cbp_blk,    //!< Output: block cbp (updated according to processed 8x8 luminance block)
+                            int   block8x8,    //!< block number of 8x8 block
+                            short p_dir,       //!< prediction direction
+                            int   l0_mode,     //!< list0 prediction mode (1-7, 0=DIRECT)
+                            int   l1_mode,     //!< list1 prediction mode (1-7, 0=DIRECT)
+                            short l0_ref_idx, //!< reference picture for list0 prediction
+                            short l1_ref_idx  //!< reference picture for list0 prediction
+                           )
+{
+  int    block_y, block_x, pic_pix_y, pic_pix_x, i, j, nonzero = 0, cbp_blk_mask;
+  int    coeff_cost = 0;
+  int    mb_y       = (block8x8 >> 1) << 3;
+  int    mb_x       = (block8x8 & 0x01) << 3;
+  int    pix_y;
+  int    cbp_mask   = 1 << block8x8;
+  int    bxx, byy;                   // indexing curr_blk
+  int    skipped    = (l0_mode == 0 && l1_mode == 0 && (img->type != B_SLICE));
+  Macroblock* currMB = &img->mb_data[img->current_mb_nr];
+  //set transform size
+  int    need_8x8_transform = currMB->luma_transform_size_8x8_flag;
+
+  if ( input->ChromaMCBuffer )
+    OneComponentChromaPrediction4x4 = OneComponentChromaPrediction4x4_retrieve;
+  else
+    OneComponentChromaPrediction4x4 = OneComponentChromaPrediction4x4_regenerate;
+
+  //===== loop over 4x4 blocks =====
+  if(!need_8x8_transform)
+  {
+    for (byy=0, block_y=mb_y; block_y<mb_y+8; byy+=4, block_y+=4)
+    {
+      pic_pix_y = img->opix_y + block_y;
+    
+      for (bxx=0, block_x=mb_x; block_x<mb_x+8; bxx+=4, block_x+=4)
+      {
+        pic_pix_x = img->opix_x + block_x;
+
+        cbp_blk_mask = (block_x>>2) + block_y;
+
+        //===== prediction of 4x4 block =====
+        LumaPrediction4x4 (block_x, block_y, p_dir, l0_mode, l1_mode, l0_ref_idx, l1_ref_idx);
+
+        //===== get displaced frame difference ======
+        for (j=0; j<4; j++)
+        {
+          pix_y = pic_pix_y + j;
+          for (i=0; i<4; i++)
+          {
+            img->m7[j][i] = imgY_org[pix_y][pic_pix_x + i] - img->mpr[j+block_y][i+block_x];
+          }
+        }
+
+        //===== DCT, Quantization, inverse Quantization, IDCT, Reconstruction =====
+        if ( (img->NoResidueDirect != 1 && !skipped  ) ||
+          ((img->qp_scaled)==0 && img->lossless_qpprime_flag==1) )
+        {
+          //===== DCT, Quantization, inverse Quantization, IDCT, Reconstruction =====
+          if (img->type!=SP_SLICE)
+            nonzero = dct_luma   (block_x, block_y, &coeff_cost, 0);
+          else if(!si_frame_indicator && !sp2_frame_indicator)
+            nonzero = dct_luma_sp(block_x, block_y, &coeff_cost);// SP frame encoding
+          else
+            nonzero = dct_luma_sp2(block_x, block_y, &coeff_cost);//switching SP/SI encoding
+
+          if (nonzero)
+          {
+            (*cbp_blk) |= (int64)1 << cbp_blk_mask;  // one bit for every 4x4 block
+            (*cbp)     |= cbp_mask;           // one bit for the 4x4 blocks of an 8x8 block
+          }
+        }
+      }
+    }
+  }
+  else
+  {
+    for (byy=0, block_y=mb_y; block_y<mb_y+8; byy+=4, block_y+=4)
+    {
+      pic_pix_y = img->opix_y + block_y;
+
+      for (bxx=0, block_x=mb_x; block_x<mb_x+8; bxx+=4, block_x+=4)
+      {
+        pic_pix_x = img->opix_x + block_x;
+
+        cbp_blk_mask = (block_x>>2) + block_y;
+
+        //===== prediction of 4x4 block =====
+        LumaPrediction4x4 (block_x, block_y, p_dir, l0_mode, l1_mode, l0_ref_idx, l1_ref_idx);
+
+        //===== get displaced frame difference ======
+        for (j=0; j<4; j++)
+        {
+          pix_y = pic_pix_y + j;
+          for (i=0; i<4; i++)
+          {
+            img->m7[j+byy][i+bxx] = imgY_org[pix_y][pic_pix_x+i] - img->mpr[j+block_y][i+block_x];
+          }
+        }
+      }
+    }
+    if (img->NoResidueDirect != 1 && !skipped)
+    {
+      if (img->type!=SP_SLICE)
+        nonzero = dct_luma8x8   (block8x8, &coeff_cost, 0);
+
+      if (nonzero)
+      {
+        (*cbp_blk) |= 51 << (4*block8x8-2*(block8x8 & 0x01)); // corresponds to 110011, as if all four 4x4 blocks contain coeff, shifted to block position
+        (*cbp)     |= cbp_mask;           // one bit for the 4x4 blocks of an 8x8 block
+      }
+    }
+  }
+
+  /*
+  The purpose of the action below is to prevent that single or 'expensive' coefficients are coded.
+  With 4x4 transform there is larger chance that a single coefficient in a 8x8 or 16x16 block may be nonzero.
+  A single small (level=1) coefficient in a 8x8 block will cost: 3 or more bits for the coefficient,
+  4 bits for EOBs for the 4x4 blocks,possibly also more bits for CBP.  Hence the total 'cost' of that single
+  coefficient will typically be 10-12 bits which in a RD consideration is too much to justify the distortion improvement.
+  The action below is to watch such 'single' coefficients and set the reconstructed block equal to the prediction according
+  to a given criterium.  The action is taken only for inter luma blocks.
+
+  Notice that this is a pure encoder issue and hence does not have any implication on the standard.
+  coeff_cost is a parameter set in dct_luma() and accumulated for each 8x8 block.  If level=1 for a coefficient,
+  coeff_cost is increased by a number depending on RUN for that coefficient.The numbers are (see also dct_luma()): 3,2,2,1,1,1,0,0,...
+  when RUN equals 0,1,2,3,4,5,6, etc.
+  If level >1 coeff_cost is increased by 9 (or any number above 3). The threshold is set to 3. This means for example:
+  1: If there is one coefficient with (RUN,level)=(0,1) in a 8x8 block this coefficient is discarded.
+  2: If there are two coefficients with (RUN,level)=(1,1) and (4,1) the coefficients are also discarded
+  sum_cnt_nonz is the accumulation of coeff_cost over a whole macro block.  If sum_cnt_nonz is 5 or less for the whole MB,
+  all nonzero coefficients are discarded for the MB and the reconstructed block is set equal to the prediction.
+  */
+
+  if (img->NoResidueDirect != 1 && !skipped && coeff_cost <= _LUMA_COEFF_COST_ &&
+    ((img->qp_scaled)!=0 || img->lossless_qpprime_flag==0)&&
+    !(img->type==SP_SLICE && (si_frame_indicator==1 || sp2_frame_indicator==1 )))// last set of conditions
+    // cannot skip when perfect reconstruction is as in switching pictures or SI pictures
+  {
+    coeff_cost  = 0;
+    (*cbp)     &=  (63 - cbp_mask);
+    (*cbp_blk) &= ~(51 << (4*block8x8-2*(block8x8 & 0x01)));
+
+    for (j=mb_y; j<mb_y+8; j++)
+      memcpy(&enc_picture->imgY[img->pix_y + j][img->pix_x + mb_x], &img->mpr[j][mb_x], 2 * BLOCK_SIZE * sizeof(imgpel));
+
+    if (img->type==SP_SLICE)
+    {
+      for (i=mb_x; i < mb_x+BLOCK_SIZE*2; i+=BLOCK_SIZE)
+        for (j=mb_y; j < mb_y+BLOCK_SIZE*2; j+=BLOCK_SIZE)
+          copyblock_sp(i,j);
+    }
+  }
+
+  return coeff_cost;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Set mode parameters and reference frames for an 8x8 block
+ ************************************************************************
+ */
+void SetModesAndRefframe (int b8, short* p_dir, int* l0_mode, int* l1_mode, short* fw_ref, short* bw_ref)
+{
+  Macroblock* currMB = &img->mb_data[img->current_mb_nr];
+  int         j      = 2*(b8>>1);
+  int         i      = 2*(b8 & 0x01);
+
+  *l0_mode = *l1_mode = *fw_ref = *bw_ref = -1;
+
+  *p_dir  = currMB->b8pdir[b8];
+
+  if (img->type!=B_SLICE)
+  {
+    *fw_ref = enc_picture->ref_idx[LIST_0][img->block_y+j][img->block_x+i];
+    *bw_ref = 0;
+    *l0_mode  = currMB->b8mode[b8];
+    *l1_mode  = 0;
+  }
+  else
+  {
+    if (currMB->b8pdir[b8]==-1)
+    {
+      *fw_ref   = -1;
+      *bw_ref   = -1;
+      *l0_mode  =  0;
+      *l1_mode  =  0;
+    }
+    else if (currMB->b8pdir[b8]==0)
+    {
+      *fw_ref   = enc_picture->ref_idx[LIST_0][img->block_y+j][img->block_x+i];
+      *bw_ref   = 0;
+      *l0_mode  = currMB->b8mode[b8];
+      *l1_mode  = 0;
+    }
+    else if (currMB->b8pdir[b8]==1)
+    {
+      *fw_ref   = 0;
+      *bw_ref   = enc_picture->ref_idx[LIST_1][img->block_y+j][img->block_x+i];
+      *l0_mode  = 0;
+      *l1_mode  = currMB->b8mode[b8];
+    }
+    else
+    {
+      *fw_ref   = enc_picture->ref_idx[LIST_0][img->block_y+j][img->block_x+i];
+      *bw_ref   = enc_picture->ref_idx[LIST_1][img->block_y+j][img->block_x+i];
+      *l0_mode  = currMB->b8mode[b8];
+      *l1_mode  = currMB->b8mode[b8];
+    }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Residual Coding of a Luma macroblock (not for intra)
+ ************************************************************************
+ */
+void LumaResidualCoding (void)
+{
+  int i,j,block8x8,b8_x,b8_y;
+  int l0_mode, l1_mode;
+  short p_dir, refframe;
+  int sum_cnt_nonz;
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+
+  currMB->cbp     = 0 ;
+  currMB->cbp_blk = 0 ;
+  sum_cnt_nonz    = 0 ;
+
+  for (block8x8=0; block8x8<4; block8x8++)
+  {
+    short bw_ref;
+    SetModesAndRefframe (block8x8, &p_dir, &l0_mode, &l1_mode, &refframe, &bw_ref);
+    
+    sum_cnt_nonz += LumaResidualCoding8x8 (&(currMB->cbp), &(currMB->cbp_blk), block8x8,
+                                           p_dir, l0_mode, l1_mode, refframe, bw_ref);
+  }
+
+  if (sum_cnt_nonz <= _LUMA_MB_COEFF_COST_ &&
+    ((img->qp_scaled)!=0 || img->lossless_qpprime_flag==0) &&
+    !(img->type==SP_SLICE && (si_frame_indicator==1 || sp2_frame_indicator==1)))// modif ES added last set of conditions
+    //cannot skip if SI or switching SP frame perfect reconstruction is needed
+  {
+     currMB->cbp     &= 0xfffff0 ;
+     currMB->cbp_blk &= 0xff0000 ;
+     for (j=0; j < MB_BLOCK_SIZE; j++)
+       memcpy(&enc_picture->imgY[img->pix_y+j][img->pix_x], img->mpr[j], MB_BLOCK_SIZE * sizeof (imgpel));
+
+     if (img->type==SP_SLICE)
+     {
+       for(block8x8=0;block8x8<4;block8x8++)
+       {
+         b8_x=(block8x8&1)<<3;
+         b8_y=(block8x8&2)<<2;
+         for (i=b8_x;i<b8_x+8;i+=4)
+           for (j=b8_y;j<b8_y+8;j+=4)
+             copyblock_sp(i,j);
+       }
+     }
+   }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Makes the decision if 8x8 tranform will be used (for RD-off)
+ ************************************************************************
+ */
+int TransformDecision (int block_check, int *cost)
+{
+  int    block_y, block_x, pic_pix_y, pic_pix_x, i, j, k;
+  int    mb_y, mb_x, block8x8;
+  int    l0_mode, l1_mode;
+  short  p_dir, fw_ref, bw_ref;
+  int    num_blks;
+  int    cost8x8=0, cost4x4=0;
+  int    *diff_ptr;
+
+  if(block_check==-1)
+  {
+    block8x8=0;
+    num_blks=4;
+  }
+  else
+  {
+    block8x8=block_check;
+    num_blks=block_check+1;
+  }
+
+  for (; block8x8<num_blks; block8x8++)
+  {
+    SetModesAndRefframe (block8x8, &p_dir, &l0_mode, &l1_mode, &fw_ref, &bw_ref);
+
+    mb_y = (block8x8 >> 1) << 3;
+    mb_x = (block8x8 & 0x01) << 3;
+    //===== loop over 4x4 blocks =====
+    k=0;
+    for (block_y=mb_y; block_y<mb_y+8; block_y+=4)
+    {
+      pic_pix_y = img->opix_y + block_y;
+      
+      for (block_x=mb_x; block_x<mb_x+8; block_x+=4)
+      {
+        pic_pix_x = img->opix_x + block_x;
+
+        //===== prediction of 4x4 block =====
+        LumaPrediction4x4 (block_x, block_y, p_dir, l0_mode, l1_mode, fw_ref, bw_ref);
+
+        //===== get displaced frame difference ======
+        diff_ptr=&diff64[k];
+        for (j=0; j<4; j++)
+        {
+          for (i=0; i<4; i++, k++)
+            diff64[k] = imgY_org[pic_pix_y+j][pic_pix_x+i] - img->mpr[j+block_y][i+block_x];
+        }
+        cost4x4 += distortion4x4 (diff_ptr);
+      }
+    }
+    cost8x8 += distortion8x8 (diff64);
+  }
+
+  if(input->Transform8x8Mode==2) //always allow 8x8 transform
+    return 1;
+  else if(cost8x8<cost4x4)
+    return 1;
+  else
+  {
+    *cost = (*cost-cost8x8+cost4x4);
+    return 0;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Predict (on-the-fly) one component of a chroma 4x4 block
+ ************************************************************************
+ */
+void OneComponentChromaPrediction4x4_regenerate (
+                                 imgpel*     mpred,      //!< array to store prediction values
+                                 int         block_c_x,  //!< horizontal pixel coordinate of 4x4 block
+                                 int         block_c_y,  //!< vertical   pixel coordinate of 4x4 block
+                                 short****** mv,         //!< motion vector array
+                                 int         list_idx,   //!< reference picture list
+                                 short       ref,        //!< reference index
+                                 int         blocktype,  //!< block type
+                                 int         uv)         //!< chroma component
+{
+  int     i, j, ii, jj, ii0, jj0, ii1, jj1, if0, if1, jf0, jf1;
+  short*  mvb;
+
+  int     f1_x = 64/img->mb_cr_size_x;
+  int     f2_x=f1_x-1;
+
+  int     f1_y = 64/img->mb_cr_size_y;
+  int     f2_y=f1_y-1;
+
+  int     f3=f1_x*f1_y, f4=f3>>1;
+  int     list_offset = img->mb_data[img->current_mb_nr].list_offset;
+  int     max_y_cr = (int) (list_offset ? (img->height_cr >> 1) - 1 : img->height_cr - 1);
+  int     max_x_cr = (int) (img->width_cr - 1);
+  int     jjx, iix;
+  int     mb_cr_y_div4 = img->mb_cr_size_y>>2;
+  int     mb_cr_x_div4 = img->mb_cr_size_x>>2;
+  int     jpos;
+
+  StorablePicture **list = listX[list_idx + list_offset];
+
+  imgpel** refimage = list[ref]->imgUV[uv];
+
+  for (j=block_c_y; j < block_c_y + BLOCK_SIZE; j++)
+  {
+    jjx = j/mb_cr_y_div4;
+    jpos = (j + img->opix_c_y)*f1_y;
+
+    for (i=block_c_x; i < block_c_x + BLOCK_SIZE; i++)
+    {
+      iix = i/mb_cr_x_div4;
+      mvb  = mv [jjx][iix][list_idx][ref][blocktype];
+
+      ii   = (i + img->opix_c_x)*f1_x + mvb[0];
+      jj   = jpos + mvb[1];
+
+      if (active_sps->chroma_format_idc == 1)
+        jj  += list[ref]->chroma_vector_adjustment;
+
+      ii0  = iClip3 (0, max_x_cr, ii/f1_x);
+      jj0  = iClip3 (0, max_y_cr, jj/f1_y);
+      ii1  = iClip3 (0, max_x_cr, (ii+f2_x)/f1_x);
+      jj1  = iClip3 (0, max_y_cr, (jj+f2_y)/f1_y);
+
+      if1  = (ii&f2_x);  if0 = f1_x-if1;
+      jf1  = (jj&f2_y);  jf0 = f1_y-jf1;
+
+      *mpred++ = (if0 * jf0 * refimage[jj0][ii0] +
+                  if1 * jf0 * refimage[jj0][ii1] +
+                  if0 * jf1 * refimage[jj1][ii0] +
+                  if1 * jf1 * refimage[jj1][ii1] + f4) / f3;
+    }
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Retrieve one component of a chroma 4x4 block from the buffer
+ ************************************************************************
+ */
+void OneComponentChromaPrediction4x4_retrieve (imgpel*        mpred,      //!< array to store prediction values
+                                 int         block_c_x,  //!< horizontal pixel coordinate of 4x4 block
+                                 int         block_c_y,  //!< vertical   pixel coordinate of 4x4 block
+                                 short****** mv,         //!< motion vector array
+                                 int         list_idx,   //!< reference picture list
+                                 short       ref,        //!< reference index
+                                 int         blocktype,  //!< block type
+                                 int         uv)         //!< chroma component
+{
+  int     j, ii, jj;
+  short*  mvb;
+
+  int     list_offset = img->mb_data[img->current_mb_nr].list_offset;
+
+  int     jjx;
+  int     right_shift_x = 4 - chroma_shift_x;
+  int     right_shift_y = 4 - chroma_shift_y;
+  int     jpos;
+
+  int     pos_x1 = block_c_x >> right_shift_x;
+  int     pos_x2 = (block_c_x + 2) >> right_shift_x;
+  int     ipos1 = ((block_c_x + img->opix_c_x) << chroma_shift_x) + IMG_PAD_SIZE_TIMES4;
+  int     ipos2 = ((block_c_x + 2 + img->opix_c_x) << chroma_shift_x) + IMG_PAD_SIZE_TIMES4;
+
+
+  StorablePicture **list = listX[list_idx + list_offset];
+
+  imgpel**** refsubimage = list[ref]->imgUV_sub[uv];
+  imgpel *line_ptr;
+
+  int jj_chroma = ((active_sps->chroma_format_idc == 1) ? list[ref]->chroma_vector_adjustment : 0) + IMG_PAD_SIZE_TIMES4;
+
+  width_pad_cr  = list[ref]->size_x_cr_pad;
+  height_pad_cr = list[ref]->size_y_cr_pad;
+
+
+  for (j=block_c_y; j < block_c_y + BLOCK_SIZE; j++)
+  {
+    jjx = j >> right_shift_y; // translate into absolute block (luma) coordinates
+
+    jpos = ( (j + img->opix_c_y) << chroma_shift_y ) + jj_chroma;
+
+    mvb  = mv [jjx][pos_x1][list_idx][ref][blocktype];
+
+    ii   = ipos1 + mvb[0];
+    jj   = jpos  + mvb[1];
+
+    line_ptr = UMVLine8X_chroma ( refsubimage, jj, ii);
+    *mpred++ = *line_ptr++;
+    *mpred++ = *line_ptr;
+
+    mvb  = mv [jjx][pos_x2][list_idx][ref][blocktype];
+
+    ii   = ipos2 + mvb[0];
+    jj   = jpos  + mvb[1];
+
+    line_ptr = UMVLine8X_chroma ( refsubimage, jj, ii);
+    *mpred++ = *line_ptr++;
+    *mpred++ = *line_ptr;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Predict an intra chroma 4x4 block
+ ************************************************************************
+ */
+void IntraChromaPrediction4x4 (int  uv,       // <-- colour component
+                               int  block_x,  // <-- relative horizontal block coordinate of 4x4 block
+                               int  block_y)  // <-- relative vertical   block coordinate of 4x4 block
+{
+  int mode = img->mb_data[img->current_mb_nr].c_ipred_mode;
+  int j;
+
+  //===== prediction =====
+  for (j=block_y; j<block_y+4; j++)
+    memcpy(&img->mpr[j][block_x],&img->mprr_c[uv][mode][j][block_x], BLOCK_MULTIPLE * sizeof(imgpel));
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Predict one chroma 4x4 block
+ ************************************************************************
+ */
+void ChromaPrediction4x4 ( int   uv,           // <-- colour component
+                           int   block_x,      // <-- relative horizontal block coordinate of 4x4 block
+                           int   block_y,      // <-- relative vertical   block coordinate of 4x4 block
+                           int   p_dir,        // <-- prediction direction
+                           int   l0_mode,      // <-- list0  prediction mode (1-7, 0=DIRECT if l1_mode=0)
+                           int   l1_mode,      // <-- list1 prediction mode (1-7, 0=DIRECT if l0_mode=0)
+                           short l0_ref_idx,   // <-- reference frame for list0 prediction (if (<0) -> intra prediction)
+                           short l1_ref_idx)   // <-- reference frame for list1 prediction 
+{
+  static imgpel l0_pred[MB_BLOCK_SIZE];
+  static imgpel l1_pred[MB_BLOCK_SIZE];
+
+  int  i, j;
+  int  block_x4  = block_x+4;
+  int  block_y4  = block_y+4;
+  imgpel* l0pred     = l0_pred;
+  imgpel* l1pred     = l1_pred;
+  short****** mv_array = img->all_mv;
+
+  Macroblock*    currMB     = &img->mb_data[img->current_mb_nr];
+
+  int  apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+                         (active_pps->weighted_bipred_idc && (img->type == B_SLICE)));
+
+  if (currMB->bi_pred_me && l0_ref_idx == 0 && l1_ref_idx == 0 && p_dir == 2 && l0_mode==1 && l1_mode==1)
+    mv_array = currMB->bi_pred_me == 1? img->bipred_mv1 : img->bipred_mv2 ;
+
+  //===== INTRA PREDICTION =====
+  if (p_dir==-1)
+  {
+    IntraChromaPrediction4x4 (uv, block_x, block_y);
+    return;
+  }
+
+  //===== INTER PREDICTION =====
+  if ((p_dir==0) || (p_dir==2))
+  {
+    (*OneComponentChromaPrediction4x4) (l0_pred, block_x, block_y, mv_array, LIST_0, l0_ref_idx, l0_mode, uv);
+  }
+  if ((p_dir==1) || (p_dir==2))
+  {
+    (*OneComponentChromaPrediction4x4) (l1_pred, block_x, block_y, mv_array, LIST_1, l1_ref_idx, l1_mode, uv);
+  }
+
+  if (apply_weights)
+  {
+    if (p_dir==2)
+    {
+      int wbp0 = wbp_weight[0][l0_ref_idx][l1_ref_idx][uv+1];
+      int wbp1 = wbp_weight[1][l0_ref_idx][l1_ref_idx][uv+1];
+      int offset = (wp_offset[0][l0_ref_idx][uv+1] + wp_offset[1][l1_ref_idx][uv+1] + 1)>>1;
+      int wp_round = 2*wp_chroma_round;
+      int weight_denom = luma_log_weight_denom + 1;
+
+
+      for (j=block_y; j<block_y4; j++)
+        for (i=block_x; i<block_x4; i++)
+            img->mpr[j][i] =  iClip1( img->max_imgpel_value_uv,
+            ((wbp0 * *l0pred++ + wbp1 * *l1pred++ + wp_round) >> (weight_denom)) + (offset) );
+    }
+    else if (p_dir==0)
+    {
+      int wp = wp_weight[0][l0_ref_idx][uv+1];
+      int offset = wp_offset[0][l0_ref_idx][uv+1];
+      for (j=block_y; j<block_y4; j++)
+        for (i=block_x; i<block_x4; i++)
+           img->mpr[j][i] = iClip1( img->max_imgpel_value_uv, (( wp * *l0pred++ + wp_chroma_round) >> chroma_log_weight_denom) +  offset);
+    }
+    else // (p_dir==1)
+    {
+      int wp = wp_weight[1][l1_ref_idx][uv+1];
+      int offset = wp_offset[1][l1_ref_idx][uv+1];
+
+      for (j=block_y; j<block_y4; j++)
+        for (i=block_x; i<block_x4; i++)
+          img->mpr[j][i] = iClip1( img->max_imgpel_value_uv, ((wp * *l1pred++ + wp_chroma_round) >> chroma_log_weight_denom) + offset);
+    }
+  }
+  else
+  {
+    if (p_dir==2)
+    {
+      for (j=block_y; j<block_y4; j++)
+        for (i=block_x; i<block_x4; i++)
+          img->mpr[j][i] = (*l0pred++ + *l1pred++ + 1) >> 1;
+    }
+    else if (p_dir==0)
+    {
+      for (j=block_y; j<block_y4; j++)
+      {
+        memcpy(&(img->mpr[j][block_x]), l0pred, BLOCK_SIZE * sizeof(imgpel));
+        l0pred += BLOCK_SIZE;
+      }
+    }
+    else // (p_dir==1)
+    {
+      for (j=block_y; j<block_y4; j++)
+      {
+        memcpy(&(img->mpr[j][block_x]), l1pred, BLOCK_SIZE * sizeof(imgpel));
+        l1pred += BLOCK_SIZE;
+      }
+    }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Chroma residual coding for an macroblock
+ ************************************************************************
+ */
+void ChromaResidualCoding (int* cr_cbp)
+{
+  static const int block8x8_idx[3][4][4] =     //ADD-VG-15052004
+  {
+    { {0, 1, 0, 0},
+      {2, 3, 0, 0},
+      {0, 0, 0, 0},
+      {0, 0, 0, 0},
+    },
+    { {0, 1, 0, 0},
+      {0, 1, 0, 0},
+      {2, 3, 0, 0},
+      {2, 3, 0, 0},
+    },
+    { {0, 0, 1, 1},
+      {0, 0, 1, 1},
+      {2, 2, 3, 3},
+      {2, 2, 3, 3}
+    }
+  };
+  int   uv, block8, block_y, block_x, j, i;
+  int   l0_mode, l1_mode;
+  short p_dir, refframe, bw_ref;
+  int   skipped = (img->mb_data[img->current_mb_nr].mb_type == 0 && (img->type == P_SLICE || img->type == SP_SLICE));
+  int   yuv = img->yuv_format - 1; //ADD-VG-15052004
+
+  if ( input->ChromaMCBuffer )
+    OneComponentChromaPrediction4x4 = OneComponentChromaPrediction4x4_retrieve;
+  else
+    OneComponentChromaPrediction4x4 = OneComponentChromaPrediction4x4_regenerate;
+
+  for (*cr_cbp=0, uv=0; uv<2; uv++)
+  {
+    //===== prediction of chrominance blocks ===d==
+    block8 = 0;
+    for (block_y=0; block_y < img->mb_cr_size_y; block_y+=4)
+    for (block_x=0; block_x < img->mb_cr_size_x; block_x+=4)
+    {
+      block8 = block8x8_idx[yuv][block_y>>2][block_x>>2];
+      SetModesAndRefframe (block8, &p_dir, &l0_mode, &l1_mode, &refframe, &bw_ref);
+
+      ChromaPrediction4x4 (uv, block_x, block_y, p_dir, l0_mode, l1_mode, refframe, bw_ref);
+    }
+
+    // ==== set chroma residue to zero for skip Mode in SP frames
+    if (img->NoResidueDirect)
+    {
+      for (j=0; j<img->mb_cr_size_y; j++)
+        memcpy(&enc_picture->imgUV[uv][img->pix_c_y+j][img->pix_c_x], img->mpr[j], img->mb_cr_size_x * sizeof(imgpel));
+    }
+    else if (skipped && img->type==SP_SLICE)
+    {
+      for (j=0; j<8; j++)
+        memset(img->m7[j], 0 , 8 * sizeof(int));
+    }
+    else
+    if (skipped)
+    {
+      for (j=0; j<img->mb_cr_size_y; j++)
+        memcpy(&enc_picture->imgUV[uv][img->pix_c_y+j][img->pix_c_x], img->mpr[j], img->mb_cr_size_x * sizeof(imgpel));
+    }
+    else
+    {
+      for (j=0; j<img->mb_cr_size_y; j++)
+        for (i=0; i<img->mb_cr_size_x; i++)
+        {
+          img->m7[j][i] = imgUV_org[uv][img->opix_c_y+j][img->opix_c_x+i] - img->mpr[j][i];
+        }
+    }
+
+    //===== DCT, Quantization, inverse Quantization, IDCT, and Reconstruction =====
+    //===== Call function for skip mode in SP frames to properly process frame ====
+
+    if (skipped && img->type==SP_SLICE)
+    {
+      if(si_frame_indicator || sp2_frame_indicator)
+        *cr_cbp=dct_chroma_sp2(uv,*cr_cbp);//modif ES added, calls the SI/switching SP encoding function
+      else
+        *cr_cbp=dct_chroma_sp(uv,*cr_cbp);
+    }
+    else
+    {
+      if (!img->NoResidueDirect && !skipped)
+      {
+        if (img->type!=SP_SLICE || (img->mb_data[img->current_mb_nr].mb_type==I16MB ))
+        {
+          //even if the block is intra it should still be treated as SP
+          *cr_cbp=dct_chroma   (uv,*cr_cbp);
+        }
+        else
+        {
+          if(si_frame_indicator||sp2_frame_indicator)
+            *cr_cbp=dct_chroma_sp2(uv,*cr_cbp);// SI frames or switching SP frames
+          else
+            *cr_cbp=dct_chroma_sp(uv,*cr_cbp);
+        }
+      }
+    }
+  }
+
+  //===== update currMB->cbp =====
+  img->mb_data[img->current_mb_nr].cbp += ((*cr_cbp)<<4);
+
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Intra prediction of the chrminance layers of one macroblock
+ ************************************************************************
+ */
+void IntraChromaPrediction (int *mb_up, int *mb_left, int*mb_up_left)
+{
+
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+  int      s, s0, s1, s2, s3, i, j, k;
+  imgpel**  image;
+  int      block_x, block_y;
+  int      mb_nr = img->current_mb_nr;
+  int      mb_available_up;
+  int      mb_available_left[2];
+  int      mb_available_up_left;
+  int      ih,iv;
+  int      ib,ic,iaa;
+  int      uv;
+  imgpel   hline[16], vline[16];
+  int      mode;
+  int      best_mode = DC_PRED_8;  //just an initilaization here, should always be overwritten
+  int      cost;
+  int      min_cost;
+  PixelPos up;        //!< pixel position  p(0,-1)
+  PixelPos left[17];  //!< pixel positions p(-1, -1..15)
+  int      cr_MB_x = img->mb_cr_size_x;
+  int      cr_MB_y = img->mb_cr_size_y;
+
+  int      blk_x;
+  int      blk_y;
+  int      b8,b4;
+  int      yuv = img->yuv_format - 1;
+
+  static const int block_pos[3][4][4]= //[yuv][b8][b4]
+  {
+    { {0, 1, 2, 3},{0, 0, 0, 0},{0, 0, 0, 0},{0, 0, 0, 0}},
+    { {0, 1, 2, 3},{2, 3, 2, 3},{0, 0, 0, 0},{0, 0, 0, 0}},
+    { {0, 1, 2, 3},{1, 1, 3, 3},{2, 3, 2, 3},{3, 3, 3, 3}}
+  };
+
+  for (i=0;i<cr_MB_y+1;i++)
+  {
+    getNeighbour(mb_nr, -1 , i-1 , IS_CHROMA, &left[i]);
+  }
+  getNeighbour(mb_nr, 0 , -1 , IS_CHROMA, &up);
+
+
+  mb_available_up                             = up.available;
+  mb_available_up_left                        = left[0].available;
+  mb_available_left[0] = mb_available_left[1] = left[1].available;
+
+  if(input->UseConstrainedIntraPred)
+  {
+    mb_available_up = up.available ? img->intra_block[up.mb_addr] : 0;
+    for (i=0, mb_available_left[0]=1; i<(cr_MB_y>>1);i++)
+      mb_available_left[0]  &= left[i+1].available ? img->intra_block[left[i+1].mb_addr]: 0;
+    for (i=(cr_MB_y>>1), mb_available_left[1]=1; i<cr_MB_y;i++)
+      mb_available_left[1] &= left[i+1].available ? img->intra_block[left[i+1].mb_addr]: 0;
+    mb_available_up_left = left[0].available ? img->intra_block[left[0].mb_addr]: 0;
+  }
+
+  if (mb_up)
+    *mb_up = mb_available_up;
+  if (mb_left)
+    *mb_left = mb_available_left[0] && mb_available_left[1];
+  if (mb_up_left)
+    *mb_up_left = mb_available_up_left;
+
+
+  // compute all chroma intra prediction modes for both U and V
+  for (uv=0; uv<2; uv++)
+  {
+    image = enc_picture->imgUV[uv];
+
+    // DC prediction
+    for(b8=0; b8<img->num_blk8x8_uv >> 1;b8++)
+    {
+      for (b4=0; b4<4; b4++)
+      {
+        block_y = subblk_offset_y[yuv][b8][b4];
+        block_x = subblk_offset_x[yuv][b8][b4];
+        blk_x = block_x;
+        blk_y = block_y + 1;
+
+        s=img->dc_pred_value_chroma;
+        s0=s1=s2=s3=0;
+
+        //===== get prediction value =====
+        switch (block_pos[yuv][b8][b4])
+        {
+        case 0:  //===== TOP LEFT =====
+          if      (mb_available_up)       for (i=blk_x;i<(blk_x+4);i++)  s0 += image[up.pos_y][up.pos_x + i];
+          if      (mb_available_left[0])  for (i=blk_y;i<(blk_y+4);i++)  s2 += image[left[i].pos_y][left[i].pos_x];
+          if      (mb_available_up && mb_available_left[0])  s  = (s0+s2+4) >> 3;
+          else if (mb_available_up)                          s  = (s0   +2) >> 2;
+          else if (mb_available_left[0])                     s  = (s2   +2) >> 2;
+          break;
+        case 1: //===== TOP RIGHT =====
+          if      (mb_available_up)       for (i=blk_x;i<(blk_x+4);i++)  s1 += image[up.pos_y][up.pos_x + i];
+          else if (mb_available_left[0])  for (i=blk_y;i<(blk_y+4);i++)  s2 += image[left[i].pos_y][left[i].pos_x];
+          if      (mb_available_up)                          s  = (s1   +2) >> 2;
+          else if (mb_available_left[0])                     s  = (s2   +2) >> 2;
+          break;
+        case 2: //===== BOTTOM LEFT =====
+          if      (mb_available_left[1])  for (i=blk_y;i<(blk_y+4);i++)  s3 += image[left[i].pos_y][left[i].pos_x];
+          else if (mb_available_up)       for (i=blk_x;i<(blk_x+4);i++)  s0 += image[up.pos_y][up.pos_x + i];
+          if      (mb_available_left[1])                     s  = (s3   +2) >> 2;
+          else if (mb_available_up)                          s  = (s0   +2) >> 2;
+          break;
+        case 3: //===== BOTTOM RIGHT =====
+          if      (mb_available_up)       for (i=blk_x;i<(blk_x+4);i++)  s1 += image[up.pos_y][up.pos_x + i];
+          if      (mb_available_left[1])  for (i=blk_y;i<(blk_y+4);i++)  s3 += image[left[i].pos_y][left[i].pos_x];
+          if      (mb_available_up && mb_available_left[1])  s  = (s1+s3+4) >> 3;
+          else if (mb_available_up)                          s  = (s1   +2) >> 2;
+          else if (mb_available_left[1])                     s  = (s3   +2) >> 2;
+          break;
+        }
+
+        //===== prediction =====
+        for (j=block_y; j<block_y+4; j++)
+          for (i=block_x; i<block_x+4; i++)
+          {
+            img->mprr_c[uv][DC_PRED_8][j][i] = s;
+          }
+      }
+    }
+
+    // vertical prediction
+    if (mb_available_up)
+    {
+      memcpy(hline,&image[up.pos_y][up.pos_x], cr_MB_x * sizeof(imgpel));
+      for (j=0; j<cr_MB_y; j++)
+        memcpy(img->mprr_c[uv][VERT_PRED_8][j], hline, cr_MB_x * sizeof(imgpel));
+    }
+
+    // horizontal prediction
+    if (mb_available_left[0] && mb_available_left[1])
+    {
+      for (i=0; i<cr_MB_y; i++)
+        vline[i] = image[left[i+1].pos_y][left[i+1].pos_x];
+      for (i=0; i<cr_MB_x; i++)
+        for (j=0; j<cr_MB_y; j++)
+          img->mprr_c[uv][HOR_PRED_8][j][i] = vline[j];
+    }
+
+    // plane prediction
+    if (mb_available_left[0] && mb_available_left[1] && mb_available_up && mb_available_up_left)
+    {
+      ih = (cr_MB_x>>1)*(hline[cr_MB_x-1] - image[left[0].pos_y][left[0].pos_x]);
+      for (i=0;i<(cr_MB_x>>1)-1;i++)
+        ih += (i+1)*(hline[(cr_MB_x>>1)+i] - hline[(cr_MB_x>>1)-2-i]);
+
+      iv = (cr_MB_y>>1)*(vline[cr_MB_y-1] - image[left[0].pos_y][left[0].pos_x]);
+      for (i=0;i<(cr_MB_y>>1)-1;i++)
+        iv += (i+1)*(vline[(cr_MB_y>>1)+i] - vline[(cr_MB_y>>1)-2-i]);
+
+      ib= ((cr_MB_x == 8?17:5)*ih+2*cr_MB_x)>>(cr_MB_x == 8?5:6);
+      ic= ((cr_MB_y == 8?17:5)*iv+2*cr_MB_y)>>(cr_MB_y == 8?5:6);
+
+      iaa=16*(hline[cr_MB_x-1]+vline[cr_MB_y-1]);
+      for (j=0; j<cr_MB_y; j++)
+        for (i=0; i<cr_MB_x; i++)
+          img->mprr_c[uv][PLANE_8][j][i]= iClip3(0, img->max_imgpel_value_uv,
+                                                   (iaa+(i-(cr_MB_x>>1)+1)*ib+(j-(cr_MB_y>>1)+1)*ic+16)>>5);
+    }
+  }
+
+  if (!input->rdopt)      // the rd-opt part does not work correctly (see encode_one_macroblock)
+  {                       // since ipredmodes could be overwritten => encoder-decoder-mismatches
+    // pick lowest cost prediction mode
+    min_cost = INT_MAX;
+    for (i=0;i<cr_MB_y;i++)
+    {
+      getNeighbour(mb_nr, 0 , i, IS_CHROMA, &left[i]);
+    }
+    for (mode=DC_PRED_8; mode<=PLANE_8; mode++)
+    {
+      if ((img->type != I_SLICE || !input->IntraDisableInterOnly) && input->ChromaIntraDisable == 1 && mode!=DC_PRED_8)
+        continue;
+
+      if ((mode==VERT_PRED_8 && !mb_available_up) ||
+        (mode==HOR_PRED_8 && (!mb_available_left[0] || !mb_available_left[1])) ||
+        (mode==PLANE_8 && (!mb_available_left[0] || !mb_available_left[1] || !mb_available_up || !mb_available_up_left)))
+        continue;
+
+      cost = 0;
+      for (uv=0; uv<2; uv++)
+      {
+        image = imgUV_org[uv];
+        for (block_y=0; block_y<cr_MB_y; block_y+=4)
+          for (block_x=0; block_x<cr_MB_x; block_x+=4)
+          {
+            for (k=0,j=block_y; j<block_y+4; j++)
+            {
+              for (i=block_x; i<block_x+4; i++,k++)
+                diff[k] = image[left[j].pos_y][left[j].pos_x+i] - img->mprr_c[uv][mode][j][i];
+            }
+            cost += distortion4x4(diff);
+          }
+      }
+      if (cost < min_cost)
+      {
+        best_mode = mode;
+        min_cost = cost;
+      }
+    }
+    currMB->c_ipred_mode = best_mode;
+  }
+}
+
+/*!
+ **************************************************************************************
+ * \brief
+ *    RD Decision for Intra prediction mode of the chrominance layers of one macroblock
+ **************************************************************************************
+ */
+void IntraChromaRDDecision (RD_PARAMS enc_mb)
+{
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+  int      i, j, k;
+  imgpel** image;
+  int      block_x, block_y;
+  int      mb_nr = img->current_mb_nr;
+  int      mb_available_up;
+  int      mb_available_left[2];
+  int      mb_available_up_left;
+  int      uv;
+  int      mode;
+  int      best_mode = DC_PRED_8;  //just an initialization here, should always be overwritten
+  int      cost;
+  int      min_cost;
+  PixelPos up;        //!< pixel position  p(0,-1)
+  PixelPos left[17];  //!< pixel positions p(-1, -1..15)
+  int      cr_MB_x = img->mb_cr_size_x;
+  int      cr_MB_y = img->mb_cr_size_y;
+
+  for (i=0;i<cr_MB_y+1;i++)
+  {
+    getNeighbour(mb_nr, -1 , i-1 , IS_CHROMA, &left[i]);
+  }
+  getNeighbour(mb_nr, 0 , -1 , IS_CHROMA, &up);
+
+  mb_available_up                             = up.available;
+  mb_available_up_left                        = left[0].available;
+  mb_available_left[0] = mb_available_left[1] = left[1].available;
+
+  if(input->UseConstrainedIntraPred)
+  {
+    mb_available_up = up.available ? img->intra_block[up.mb_addr] : 0;
+    for (i=0, mb_available_left[0]=1; i<(cr_MB_y>>1);i++)
+      mb_available_left[0]  &= left[i+1].available ? img->intra_block[left[i+1].mb_addr]: 0;
+    for (i=(cr_MB_y>>1), mb_available_left[1]=1; i<cr_MB_y;i++)
+      mb_available_left[1] &= left[i+1].available ? img->intra_block[left[i+1].mb_addr]: 0;
+    mb_available_up_left = left[0].available ? img->intra_block[left[0].mb_addr]: 0;
+  }
+
+  // pick lowest cost prediction mode
+  min_cost = INT_MAX;
+  for (i=0;i<cr_MB_y;i++)
+  {
+    getNeighbour(mb_nr, 0 , i, IS_CHROMA, &left[i]);
+  }
+  if ( img->MbaffFrameFlag && img->field_mode )
+  {
+    for (i=0;i<cr_MB_y;i++)
+    {
+      left[i].pos_y = left[i].pos_y >> 1;
+    }
+  }
+
+  for (mode=DC_PRED_8; mode<=PLANE_8; mode++)
+  {
+    if ((mode==VERT_PRED_8 && !mb_available_up) ||
+      (mode==HOR_PRED_8 && (!mb_available_left[0] || !mb_available_left[1])) ||
+      (mode==PLANE_8 && (!mb_available_left[0] || !mb_available_left[1] || !mb_available_up || !mb_available_up_left)))
+      continue;
+
+    cost = 0;
+    for (uv=0; uv<2; uv++)
+    {
+      image = imgUV_org[uv];
+      for (block_y=0; block_y<cr_MB_y; block_y+=4)
+      {
+        for (block_x=0; block_x<cr_MB_x; block_x+=4)
+        {
+          for (k=0,j=block_y; j<block_y+4; j++)
+          {
+            for (i=block_x; i<block_x+4; i++,k++)
+              diff[k] = image[left[j].pos_y][left[j].pos_x+i] - img->mprr_c[uv][mode][j][i];
+          }
+          cost += distortion4x4(diff);
+        }
+        if (cost > min_cost) break;
+      }
+      if (cost > min_cost) break;
+    }
+
+    cost += (int) (enc_mb.lambda_me[Q_PEL] * mvbits[ mode ]); // exp golomb coding cost for mode signaling
+
+    if (cost < min_cost)
+    {
+      best_mode = mode;
+      min_cost = cost;
+    }
+  }
+  currMB->c_ipred_mode = best_mode;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Check if all reference frames for a macroblock are zero
+ ************************************************************************
+ */
+int ZeroRef (Macroblock* currMB)
+{
+  int i,j;
+
+  for (j=img->block_y; j<img->block_y + BLOCK_MULTIPLE; j++)
+  for (i=img->block_x; i<img->block_x + BLOCK_MULTIPLE; i++)
+  {
+    if (enc_picture->ref_idx[LIST_0][j][i]!=0)
+        return 0;
+  }
+  return 1;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Converts macroblock type to coding value
+ ************************************************************************
+ */
+int MBType2Value (Macroblock* currMB)
+{
+  static const int dir1offset[3]    =  { 1,  2, 3};
+  static const int dir2offset[3][3] = {{ 0,  4,  8},   // 1. block forward
+                                       { 6,  2, 10},   // 1. block backward
+                                       {12, 14, 16}};  // 1. block bi-directional
+
+  int mbtype, pdir0, pdir1;
+
+  if (img->type!=B_SLICE)
+  {
+    if      (currMB->mb_type==I8MB ||currMB->mb_type==I4MB)
+      return (img->type==I_SLICE ? 0 : 6);
+    else if (currMB->mb_type==I16MB) return (img->type==I_SLICE ? 0 : 6) + img->i16offset;
+    else if (currMB->mb_type==IPCM) return (img->type==I_SLICE ? 25 : 31);
+    else if (currMB->mb_type==P8x8)
+    {
+      if (input->symbol_mode==UVLC
+        && ZeroRef (currMB))         return 5;
+      else                           return 4;
+    }
+    else                             return currMB->mb_type;
+  }
+  else
+  {
+    mbtype = currMB->mb_type;
+    pdir0  = currMB->b8pdir[0];
+    pdir1  = currMB->b8pdir[3];
+
+    if      (mbtype==0)       return 0;
+    else if (mbtype==I4MB || mbtype==I8MB)
+      return 23;
+    else if (mbtype==I16MB)   return 23 + img->i16offset;
+    else if (mbtype==IPCM)    return 48;
+    else if (mbtype==P8x8)    return 22;
+    else if (mbtype==1)       return dir1offset[pdir0];
+    else if (mbtype==2)       return 4 + dir2offset[pdir0][pdir1];
+    else                      return 5 + dir2offset[pdir0][pdir1];
+  }
+}
+
+
+/*!
+************************************************************************
+* \brief
+*    Writes 4x4 intra prediction modes for a macroblock
+************************************************************************
+*/
+int writeIntra4x4Modes(void)
+{
+  int i;
+  Macroblock    *currMB     = &img->mb_data[img->current_mb_nr];
+  SyntaxElement se;
+  int           *bitCount   = currMB->bitcounter;
+  Slice         *currSlice  = img->currentSlice;
+  const int     *partMap    = assignSE2partition[input->partition_mode];
+  DataPartition *dataPart   = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);
+
+  int rate = 0;
+
+  currMB->IntraChromaPredModeFlag = 1;
+
+  for(i=0;i<16;i++)
+  {
+    se.context = i;
+    se.value1  = currMB->intra_pred_modes[i];
+    se.value2  = 0;
+
+#if TRACE
+    if (se.value1 < 0 )
+      snprintf(se.tracestring, TRACESTRING_SIZE, "Intra 4x4 mode  = predicted (context: %d)",se.context);
+    else
+      snprintf(se.tracestring, TRACESTRING_SIZE, "Intra 4x4 mode  = %3d (context: %d)",se.value1,se.context);
+#endif
+
+    // set symbol type and function pointers
+    se.type = SE_INTRAPREDMODE;
+
+    // encode and update rate
+    writeIntraPredMode (&se, dataPart);
+
+    bitCount[BITS_COEFF_Y_MB]+=se.len;
+    rate += se.len;
+  }
+
+  return rate;
+}
+
+/*!
+************************************************************************
+* \brief
+*    Writes 8x8 intra prediction modes for a macroblock
+************************************************************************
+*/
+int writeIntra8x8Modes(void)
+{
+  int block8x8;
+  Macroblock    *currMB     = &img->mb_data[img->current_mb_nr];
+  SyntaxElement se;
+  int           *bitCount   = currMB->bitcounter;
+  Slice         *currSlice  = img->currentSlice;
+  const int     *partMap    = assignSE2partition[input->partition_mode];
+  DataPartition *dataPart   = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);
+
+  int rate = 0;
+
+  currMB->IntraChromaPredModeFlag = 1;
+
+  for(block8x8=0;block8x8<4;block8x8++)
+  {
+
+    se.context = block8x8<<2;
+    se.value1  = currMB->intra_pred_modes8x8[(block8x8<<2)];
+    se.value2  = 0;
+
+#if TRACE
+    if (se.value1 < 0 )
+      snprintf(se.tracestring, TRACESTRING_SIZE, "Intra 8x8 mode  = predicted (context: %d)",se.context);
+    else
+      snprintf(se.tracestring, TRACESTRING_SIZE, "Intra 8x8 mode  = %3d (context: %d)",se.value1,se.context);
+#endif
+
+    // set symbol type and function pointers
+    se.type = SE_INTRAPREDMODE;
+
+    // encode and update rate
+    writeIntraPredMode (&se, dataPart);
+
+    bitCount[BITS_COEFF_Y_MB]+=se.len;
+    rate += se.len;
+  }
+
+  return rate;
+}
+
+int writeIntraModes(void)
+{
+  switch (img->mb_data[img->current_mb_nr].mb_type)
+  {
+  case I4MB:
+    return writeIntra4x4Modes();
+    break;
+  case I8MB:
+    return writeIntra8x8Modes();
+    break;
+  default:
+    return 0;
+    break;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Converts 8x8 block type to coding value
+ ************************************************************************
+ */
+int B8Mode2Value (int b8mode, int b8pdir)
+{
+  static const int b8start[8] = {0,0,0,0, 1, 4, 5, 10};
+  static const int b8inc  [8] = {0,0,0,0, 1, 2, 2, 1};
+
+  if (img->type!=B_SLICE)
+  {
+    return (b8mode-4);
+  }
+  else
+  {
+    return b8start[b8mode] + b8inc[b8mode] * b8pdir;
+  }
+}
+
+/*!
+************************************************************************
+* \brief
+*    Codes macroblock header
+* \param rdopt
+*    true for calls during RD-optimization
+* \param coeff_rate
+*    bitrate of Luma and Chroma coeff
+************************************************************************
+*/
+int writeMBLayer (int rdopt, int *coeff_rate)
+{
+  int             i,j;
+  int             mb_nr      = img->current_mb_nr;
+  int             prev_mb_nr = FmoGetPreviousMBNr(img->current_mb_nr);
+  Macroblock*     currMB     = &img->mb_data[mb_nr];
+  Macroblock*     prevMB     = mb_nr ? (&img->mb_data[prev_mb_nr]) : NULL;
+  SyntaxElement   se;
+  int*            bitCount   = currMB->bitcounter;
+  Slice*          currSlice  = img->currentSlice;
+  DataPartition*  dataPart;
+  const int*      partMap    = assignSE2partition[input->partition_mode];
+  int             no_bits    = 0;
+  int             skip       = currMB->mb_type ? 0:((img->type == B_SLICE) ? !currMB->cbp:1);
+  int             mb_type;
+  int             prevMbSkipped = 0;
+  int             mb_field_tmp;
+  Macroblock      *topMB = NULL;
+
+  int             WriteFrameFieldMBInHeader = 0;
+
+  if (img->MbaffFrameFlag)
+  {
+    if (0==(mb_nr & 0x01))
+    {
+      WriteFrameFieldMBInHeader = 1; // top field
+
+      prevMbSkipped = 0;
+    }
+    else
+    {
+      if (prevMB->mb_type ? 0:((img->type == B_SLICE) ? !prevMB->cbp:1))
+      {
+        WriteFrameFieldMBInHeader = 1; // bottom, if top was skipped
+      }
+
+      topMB= &img->mb_data[prev_mb_nr];
+      prevMbSkipped = topMB->skip_flag;
+    }
+  }
+  currMB->IntraChromaPredModeFlag = IS_INTRA(currMB);
+
+  // choose the appropriate data partition
+  dataPart = &(currSlice->partArr[partMap[SE_MBTYPE]]);
+
+  if(img->type == I_SLICE)
+  {
+    //========= write mb_aff (I_SLICE) =========
+    if(WriteFrameFieldMBInHeader)
+    {
+      se.value1 = currMB->mb_field;
+      se.value2 = 0;
+      se.type   = SE_MBTYPE;
+
+      TRACE_SE (se.tracestring, "mb_field_decoding_flag");
+      writeFieldModeInfo(&se, dataPart);
+
+      bitCount[BITS_MB_MODE] += se.len;
+      no_bits                += se.len;
+    }
+
+    //========= write mb_type (I_SLICE) =========
+    se.value1  = MBType2Value (currMB);
+    se.value2  = 0;
+    se.type    = SE_MBTYPE;
+
+#if TRACE
+    snprintf(se.tracestring, TRACESTRING_SIZE,   "mb_type (I_SLICE) (%2d,%2d) = %3d",img->mb_x, img->mb_y, currMB->mb_type);
+#endif
+    writeMB_typeInfo (&se, dataPart);
+
+    bitCount[BITS_MB_MODE] += se.len;
+    no_bits                += se.len;
+  }
+  // not I_SLICE, CABAC
+  else if (input->symbol_mode == CABAC)
+  {
+    if (img->MbaffFrameFlag && ((img->current_mb_nr & 0x01) == 0||prevMbSkipped))
+    {
+      mb_field_tmp = currMB->mb_field;
+      currMB->mb_field = field_flag_inference();
+      CheckAvailabilityOfNeighborsCABAC();
+      currMB->mb_field = mb_field_tmp;
+    }
+
+    //========= write mb_skip_flag (CABAC) =========
+    mb_type    = MBType2Value (currMB);
+    se.value1  = mb_type;
+    se.value2  = currMB->cbp;
+    se.type    = SE_MBTYPE;
+
+    TRACE_SE (se.tracestring, "mb_skip_flag");
+    writeMB_skip_flagInfo_CABAC(&se, dataPart);
+
+    bitCount[BITS_MB_MODE] += se.len;
+    no_bits                += se.len;
+
+    CheckAvailabilityOfNeighborsCABAC();
+
+    //========= write mb_aff (CABAC) =========
+    if(img->MbaffFrameFlag && !skip) // check for copy mode
+    {
+      if(WriteFrameFieldMBInHeader)
+      {
+        se.value1 = currMB->mb_field;
+        se.value2 = 0;
+        se.type   =  SE_MBTYPE;
+
+        TRACE_SE(se.tracestring, "mb_field_decoding_flag");
+        writeFieldModeInfo(&se, dataPart);
+
+        bitCount[BITS_MB_MODE] += se.len;
+        no_bits                += se.len;
+      }
+    }
+
+    //========= write mb_type (CABAC) =========
+    if (currMB->mb_type != 0 || ((img->type == B_SLICE) && currMB->cbp != 0))
+    {
+      se.value1  = mb_type;
+      se.value2  = 0;
+      se.type    = SE_MBTYPE;
+
+#if TRACE
+      if (img->type == B_SLICE)
+        snprintf(se.tracestring, TRACESTRING_SIZE, "mb_type (B_SLICE) (%2d,%2d) = %3d",img->mb_x, img->mb_y, currMB->mb_type);
+      else
+        snprintf(se.tracestring, TRACESTRING_SIZE, "mb_type (P_SLICE) (%2d,%2d) = %3d",img->mb_x, img->mb_y, currMB->mb_type);
+#endif
+      writeMB_typeInfo( &se, dataPart);
+
+      bitCount[BITS_MB_MODE] += se.len;
+      no_bits                += se.len;
+    }
+  }
+  // VLC not intra
+  else if (currMB->mb_type != 0 || ((img->type == B_SLICE) && currMB->cbp != 0))
+  {
+    //===== Run Length Coding: Non-Skipped macroblock =====
+    se.value1  = img->cod_counter;
+    se.value2  = 0;
+    se.type    = SE_MBTYPE;
+
+    TRACE_SE (se.tracestring, "mb_skip_run");
+    writeSE_UVLC(&se, dataPart);
+
+    bitCount[BITS_MB_MODE] += se.len;
+    no_bits                += se.len;
+
+    // Reset cod counter
+    img->cod_counter = 0;
+
+    // write mb_aff
+    if(img->MbaffFrameFlag && !skip) // check for copy mode
+    {
+      if(WriteFrameFieldMBInHeader)
+      {
+        se.value1 = currMB->mb_field;
+        se.type   =  SE_MBTYPE;
+
+        TRACE_SE(se.tracestring, "mb_field_decoding_flag");
+        writeSE_Flag (&se, dataPart);
+
+        bitCount[BITS_MB_MODE] += se.len;
+        no_bits                += se.len;
+      }
+    }
+    // Put out mb mode
+    se.value1  = MBType2Value (currMB);
+
+    if (img->type != B_SLICE)
+    {
+      se.value1--;
+    }
+    se.type    = SE_MBTYPE;
+    se.value2  = 0;
+
+#if TRACE
+    if (img->type == B_SLICE)
+      snprintf(se.tracestring, TRACESTRING_SIZE, "mb_type (B_SLICE) (%2d,%2d) = %3d",img->mb_x, img->mb_y, currMB->mb_type);
+    else
+      snprintf(se.tracestring, TRACESTRING_SIZE, "mb_type (P_SLICE) (%2d,%2d) = %3d",img->mb_x, img->mb_y, currMB->mb_type);
+#endif
+    writeMB_typeInfo(&se, dataPart);
+
+    bitCount[BITS_MB_MODE] += se.len;
+    no_bits                += se.len;
+  }
+  else
+  {
+    //Run Length Coding: Skipped macroblock
+    img->cod_counter++;
+
+    currMB->skip_flag = 1;
+    // CAVLC
+    for (j=0; j < (4 + img->num_blk8x8_uv); j++)
+      for (i=0; i < 4; i++)
+        img->nz_coeff [img->current_mb_nr][i][j]=0;
+
+
+    if(FmoGetNextMBNr(img->current_mb_nr) == -1 && img->cod_counter>0)
+    {
+      // Put out run
+      se.value1  = img->cod_counter;
+      se.value2  = 0;
+      se.type    = SE_MBTYPE;
+
+      TRACE_SE(se.tracestring, "mb_skip_run");
+      writeSE_UVLC(&se, dataPart);
+
+      bitCount[BITS_MB_MODE] += se.len;
+      no_bits                += se.len;
+
+      // Reset cod counter
+      img->cod_counter = 0;
+    }
+  }
+
+  //init NoMbPartLessThan8x8Flag
+  currMB->NoMbPartLessThan8x8Flag = (IS_DIRECT(currMB) && !(active_sps->direct_8x8_inference_flag))? 0: 1;
+
+  if (currMB->mb_type == IPCM)
+  {
+    int jj, uv;
+    if (input->symbol_mode == CABAC)
+    {
+      int len;
+      EncodingEnvironmentPtr eep = &dataPart->ee_cabac;
+      len = arienco_bits_written(eep);
+      arienco_done_encoding(eep); // This pads to byte
+      len = arienco_bits_written(eep) - len;
+      no_bits += len;
+      // Now restart the encoder
+      arienco_start_encoding(eep, dataPart->bitstream->streamBuffer, &(dataPart->bitstream->byte_pos));
+      reset_pic_bin_count();
+    }
+    if (dataPart->bitstream->bits_to_go < 8)
+    {
+      // This will only happen in the CAVLC case, CABAC is already padded
+      se.type  = SE_MBTYPE;
+      se.len   = dataPart->bitstream->bits_to_go;
+      no_bits += se.len;
+      bitCount[BITS_COEFF_Y_MB]+= se.len;
+      se.bitpattern = 0;
+#if TRACE
+      snprintf(se.tracestring, TRACESTRING_SIZE, "pcm_alignment_zero_bits = %d", se.len);
+#endif
+      writeSE_Fix(&se, dataPart);
+    }
+    for (j=0;j<MB_BLOCK_SIZE;j++)
+    {
+      jj = img->pix_y+j;
+      for (i=0;i<MB_BLOCK_SIZE;i++)
+      {
+        se.len   = img->bitdepth_luma;
+        se.type  = SE_MBTYPE;
+        no_bits += se.len;
+        se.bitpattern = enc_picture->imgY[jj][img->pix_x+i];
+        se.value1 = se.bitpattern;
+        bitCount[BITS_COEFF_Y_MB]+=se.len;
+#if TRACE
+        snprintf(se.tracestring, TRACESTRING_SIZE, "pcm_sample_luma (%d %d) = %d", j,i,se.bitpattern);
+#endif
+        writeSE_Fix(&se, dataPart);
+      }
+    }
+    if (img->yuv_format != YUV400)
+    {
+      for (uv = 0; uv < 2; uv ++)
+      {
+        for (j=0;j<img->mb_cr_size_y;j++)
+        {
+          jj = img->pix_c_y+j;
+          for (i=0;i<img->mb_cr_size_x;i++)
+          {
+            se.len   = img->bitdepth_chroma;
+            se.type  = SE_MBTYPE;
+            no_bits += se.len;
+            se.bitpattern = enc_picture->imgUV[uv][jj][img->pix_c_x+i];
+            se.value1 = se.bitpattern;
+            bitCount[BITS_COEFF_UV_MB]+=se.len;
+#if TRACE
+            snprintf(se.tracestring, TRACESTRING_SIZE, "pcm_sample_chroma (%s) (%d %d) = %d", uv?"v":"u", j,i,se.bitpattern);
+#endif
+            writeSE_Fix(&se, dataPart);
+          }
+        }
+      }
+    }
+    return no_bits;
+  }
+
+  //===== BITS FOR 8x8 SUB-PARTITION MODES =====
+  if (IS_P8x8 (currMB))
+  {
+    dataPart = &(currSlice->partArr[partMap[SE_MBTYPE]]);
+
+    for (i=0; i<4; i++)
+    {
+      se.value1  = B8Mode2Value (currMB->b8mode[i], currMB->b8pdir[i]);
+      se.value2  = 0;
+      se.type    = SE_MBTYPE;
+#if TRACE
+      snprintf(se.tracestring, TRACESTRING_SIZE, "8x8 mode/pdir(%2d) = %3d/%d", i, currMB->b8mode[i], currMB->b8pdir[i]);
+#endif
+      writeB8_typeInfo (&se, dataPart);
+      bitCount[BITS_MB_MODE]+= se.len;
+      no_bits               += se.len;
+
+      //set NoMbPartLessThan8x8Flag for P8x8 mode
+      currMB->NoMbPartLessThan8x8Flag &= (currMB->b8mode[i]==0 && active_sps->direct_8x8_inference_flag) ||
+                                         (currMB->b8mode[i]==4);
+    }
+    no_bits += writeMotionInfo2NAL  ();
+  }
+
+  //============= Transform size flag for INTRA MBs =============
+  //-------------------------------------------------------------
+  //transform size flag for INTRA_4x4 and INTRA_8x8 modes
+  if ((currMB->mb_type == I8MB || currMB->mb_type == I4MB) && input->Transform8x8Mode)
+  {
+    se.value1 = currMB->luma_transform_size_8x8_flag;
+    se.type   = SE_MBTYPE;
+
+#if TRACE
+    snprintf(se.tracestring, TRACESTRING_SIZE, "transform_size_8x8_flag = %3d", currMB->luma_transform_size_8x8_flag);
+#endif
+    writeMB_transform_size(&se, dataPart);
+
+    bitCount[BITS_MB_MODE] += se.len;
+    no_bits                += se.len;
+  }
+
+
+ //===== BITS FOR INTRA PREDICTION MODES ====
+  no_bits += writeIntraModes();
+  //===== BITS FOR CHROMA INTRA PREDICTION MODE ====
+  if (currMB->IntraChromaPredModeFlag && img->yuv_format != YUV400)
+    no_bits += writeChromaIntraPredMode();
+  else if(!rdopt) //GB CHROMA !!!!!
+    currMB->c_ipred_mode = DC_PRED_8; //setting c_ipred_mode to default is not the right place here
+                                      //resetting in rdopt.c (but where ??)
+                                      //with cabac and bframes maybe it could crash without this default
+                                      //since cabac needs the right neighborhood for the later MBs
+
+  //----- motion information -----
+  if (currMB->mb_type !=0 && currMB->mb_type !=P8x8)
+  {
+    no_bits  += writeMotionInfo2NAL  ();
+  }
+
+  if ((currMB->mb_type!=0) || (img->type==B_SLICE && (currMB->cbp!=0)))
+  {
+    *coeff_rate = writeCBPandLumaCoeff ();
+    if (img->yuv_format != YUV400)
+      *coeff_rate  += writeChromaCoeff ();
+
+    no_bits  += *coeff_rate;
+  }
+
+  return no_bits;
+}
+
+void write_terminating_bit (short bit)
+{
+  DataPartition*          dataPart;
+  const int*              partMap   = assignSE2partition[input->partition_mode];
+  EncodingEnvironmentPtr  eep_dp;
+
+  //--- write non-slice termination symbol if the macroblock is not the first one in its slice ---
+  dataPart = &(img->currentSlice->partArr[partMap[SE_MBTYPE]]);
+  dataPart->bitstream->write_flag = 1;
+  eep_dp                          = &(dataPart->ee_cabac);
+
+  biari_encode_symbol_final(eep_dp, bit);
+#if TRACE
+  fprintf (p_trace, "      CABAC terminating bit = %d\n",bit);
+#endif
+
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Write chroma intra prediction mode.
+ ************************************************************************
+ */
+int writeChromaIntraPredMode(void)
+{
+  Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+  SyntaxElement   se;
+  Slice*          currSlice = img->currentSlice;
+  int*            bitCount  = currMB->bitcounter;
+  const int*      partMap   = assignSE2partition[input->partition_mode];
+  int             rate      = 0;
+  DataPartition*  dataPart;
+
+  //===== BITS FOR CHROMA INTRA PREDICTION MODES
+
+  se.value1 = currMB->c_ipred_mode;
+  se.value2 = 0;
+  se.type = SE_INTRAPREDMODE;
+  dataPart = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);
+
+  TRACE_SE(se.tracestring, "intra_chroma_pred_mode");
+  writeCIPredMode(&se, dataPart);
+
+  bitCount[BITS_COEFF_UV_MB] += se.len;
+  rate                       += se.len;
+
+  return rate;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+*    Set global last_dquant according to macroblock delta qp
+************************************************************************
+*/
+
+extern int last_dquant;
+
+void set_last_dquant(void)
+{
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+
+  if ((IS_INTERMV (currMB) || IS_INTRA (currMB))
+    || ((img->type==B_SLICE)  && currMB->cbp != 0))
+  {
+    // non-skip
+    last_dquant = currMB->delta_qp;
+  }
+  else
+  {
+    // skip
+    last_dquant = 0;
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Passes the chosen syntax elements to the NAL
+ ************************************************************************
+ */
+void write_one_macroblock (int eos_bit)
+{
+  Macroblock* currMB   = &img->mb_data[img->current_mb_nr];
+  int*        bitCount = currMB->bitcounter;
+  int i;
+
+  extern int cabac_encoding;
+
+  // enable writing of trace file
+#if TRACE
+  Slice *curr_slice = img->currentSlice;
+  curr_slice->partArr[0].bitstream->trace_enabled = TRUE;
+  if (input->partition_mode)
+  {
+    curr_slice->partArr[1].bitstream->trace_enabled = TRUE;
+    curr_slice->partArr[2].bitstream->trace_enabled = TRUE;
+  }
+#endif
+
+  img->SumFrameQP += currMB->qp;
+
+  //--- constrain intra prediction ---
+  if(input->UseConstrainedIntraPred && (img->type==P_SLICE || img->type==B_SLICE))
+  {
+      img->intra_block[img->current_mb_nr] = IS_INTRA(currMB);
+  }
+
+  //===== init and update number of intra macroblocks =====
+  if (img->current_mb_nr==0)
+    intras=0;
+
+  if (IS_INTRA(currMB))
+    intras++;
+
+  //--- write non-slice termination symbol if the macroblock is not the first one in its slice ---
+  if (input->symbol_mode==CABAC && img->current_mb_nr!=img->currentSlice->start_mb_nr && eos_bit)
+  {
+    write_terminating_bit (0);
+  }
+
+#if TRACE
+  // trace: write macroblock header
+  if (p_trace)
+  {
+    fprintf(p_trace, "\n*********** Pic: %i (I/P) MB: %i Slice: %i **********\n\n", frame_no, img->current_mb_nr, img->current_slice_nr);
+  }
+#endif
+
+  cabac_encoding = 1;
+
+  //--- write macroblock ---
+  writeMBLayer (0, &i);  // i is temporary
+
+  if (!((currMB->mb_type !=0 ) || ((img->type==B_SLICE) && currMB->cbp != 0) ))
+  {
+    for (i=0; i < 4; i++)
+      memset(img->nz_coeff [img->current_mb_nr][i], 0, (4 + img->num_blk8x8_uv) * sizeof(int));  // CAVLC
+  }
+
+  set_last_dquant();
+
+  //--- set total bit-counter ---
+  bitCount[BITS_TOTAL_MB] = bitCount[BITS_MB_MODE]  + bitCount[BITS_COEFF_Y_MB]
+                          + bitCount[BITS_INTER_MB] + bitCount[BITS_CBP_MB]
+                          + bitCount[BITS_DELTA_QUANT_MB] + bitCount[BITS_COEFF_UV_MB];
+
+  if ( input->RCEnable )
+  {
+    //Rate control
+    img->NumberofMBHeaderBits= bitCount[BITS_MB_MODE] + bitCount[BITS_INTER_MB]
+    + bitCount[BITS_CBP_MB]  + bitCount[BITS_DELTA_QUANT_MB];
+    img->NumberofMBTextureBits= bitCount[BITS_COEFF_Y_MB]+ bitCount[BITS_COEFF_UV_MB];
+
+    generic_RC->NumberofTextureBits +=img->NumberofMBTextureBits;
+    generic_RC->NumberofHeaderBits +=img->NumberofMBHeaderBits;
+    /*basic unit layer rate control*/
+    if(img->BasicUnit < img->FrameSizeInMbs)
+    {
+      generic_RC->NumberofBasicUnitHeaderBits  += img->NumberofMBHeaderBits;
+      generic_RC->NumberofBasicUnitTextureBits += img->NumberofMBTextureBits;
+    }
+  }
+  /*record the total number of MBs*/
+  img->NumberofCodedMacroBlocks++;
+
+  stats->bit_slice += bitCount[BITS_TOTAL_MB];
+
+  cabac_encoding = 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Codes the reference frame
+ ************************************************************************
+ */
+int writeReferenceFrame (int mode, int i, int j, int fwd_flag, int  ref)
+{
+  Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+  SyntaxElement   se;
+  Slice*          currSlice = img->currentSlice;
+  int*            bitCount  = currMB->bitcounter;
+  const int*      partMap   = assignSE2partition[input->partition_mode];
+  int             rate      = 0;
+  DataPartition*  dataPart  = &(currSlice->partArr[partMap[SE_REFFRAME]]);
+  int             list       = ( fwd_flag ? LIST_0 + currMB->list_offset: LIST_1 + currMB->list_offset);
+
+  se.value1  = ref;
+  se.type    = SE_REFFRAME;
+  se.value2 = (fwd_flag)? LIST_0: LIST_1;
+
+  img->subblock_x = i; // position used for context determination
+  img->subblock_y = j; // position used for context determination
+
+#if TRACE
+  if (fwd_flag)
+    snprintf(se.tracestring, TRACESTRING_SIZE, "ref_idx_l0 = %d", se.value1);
+  else
+    snprintf(se.tracestring, TRACESTRING_SIZE, "ref_idx_l1 = %d", se.value1);
+#endif
+
+  writeRefFrame[list](&se, dataPart);
+
+  bitCount[BITS_INTER_MB] += se.len;
+  rate                    += se.len;
+
+  return rate;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Writes motion vectors of an 8x8 block
+ ************************************************************************
+ */
+int writeMotionVector8x8 (int  i0,
+                          int  j0,
+                          int  i1,
+                          int  j1,
+                          int  refframe,
+                          int  list_idx,
+                          int  mv_mode)
+{
+  int            i, j, k, l, m;
+  int            curr_mvd;
+  DataPartition* dataPart;
+
+  int            rate       = 0;
+  int            step_h     = input->part_size[mv_mode][0];
+  int            step_v     = input->part_size[mv_mode][1];
+  Macroblock*    currMB     = &img->mb_data[img->current_mb_nr];
+  SyntaxElement  se;
+  Slice*         currSlice  = img->currentSlice;
+  int*           bitCount   = currMB->bitcounter;
+  const int*     partMap    = assignSE2partition[input->partition_mode];
+  int            refindex   = refframe;
+
+  short******    all_mv     = img->all_mv;
+  short******    pred_mv    = img->pred_mv;
+
+  if (currMB->bi_pred_me && currMB->b8pdir[0]==2 && mv_mode == 1 && refindex == 0)
+    all_mv = currMB->bi_pred_me == 1? img->bipred_mv1 : img->bipred_mv2 ;
+
+  for (j=j0; j<j1; j+=step_v)
+  {
+    for (i=i0; i<i1; i+=step_h)
+    {
+      for (k=0; k<2; k++)
+      {
+        curr_mvd = all_mv[j][i][list_idx][refindex][mv_mode][k] - pred_mv[j][i][list_idx][refindex][mv_mode][k];
+
+        //--- store (oversampled) mvd ---
+        for (l=0; l < step_v; l++)
+          for (m=0; m < step_h; m++)
+          {
+            currMB->mvd[list_idx][j+l][i+m][k] = curr_mvd;
+          }
+        dataPart = &(currSlice->partArr[partMap[SE_MVD]]);
+        img->subblock_x = i; // position used for context determination
+        img->subblock_y = j; // position used for context determination
+        se.value1 = curr_mvd;
+        se.value2  = 2*k+list_idx; // identifies the component and the direction; only used for context determination
+        se.type   = SE_MVD;
+
+#if TRACE
+        if (!list_idx)
+          snprintf(se.tracestring, TRACESTRING_SIZE, "mvd_l0 (%d) = %3d  (org_mv %3d pred_mv %3d)",k, curr_mvd, all_mv[j][i][list_idx][refindex][mv_mode][k], pred_mv[j][i][list_idx][refindex][mv_mode][k]);
+        else
+          snprintf(se.tracestring, TRACESTRING_SIZE, "mvd_l1 (%d) = %3d  (org_mv %3d pred_mv %3d)",k, curr_mvd, all_mv[j][i][list_idx][refindex][mv_mode][k], pred_mv[j][i][list_idx][refindex][mv_mode][k]);
+#endif
+        writeMVD (&se, dataPart);
+
+        bitCount[BITS_INTER_MB] += se.len;
+        rate                    += se.len;
+      }
+    }
+  }
+  return rate;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Writes motion info
+ ************************************************************************
+ */
+int writeMotionInfo2NAL (void)
+{
+  int k, j0, i0, refframe;
+  int jj;
+  Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+  int             no_bits   = 0;
+  int   bframe          = (img->type==B_SLICE);
+  int   step_h0         = (input->blc_size[IS_P8x8(currMB) ? 4 : currMB->mb_type][0] >> 2);
+  int   step_v0         = (input->blc_size[IS_P8x8(currMB) ? 4 : currMB->mb_type][1] >> 2);
+
+  //=== If multiple ref. frames, write reference frame for the MB ===
+  if (IS_INTERMV (currMB))
+  {
+    // if UVLC is turned on, a 8x8 macroblock with all ref=0 in a P-frame is signalled in macroblock mode
+    if (!IS_P8x8 (currMB) || !ZeroRef (currMB) || input->symbol_mode==CABAC || bframe)
+    {
+      for (j0=0; j0<4; j0+=step_v0)
+      {
+        jj = img->block_y+j0;
+        for (i0=0; i0<4; i0+=step_h0)
+        {
+          k=j0+(i0 >> 1);
+
+          if ((currMB->b8pdir[k]==0 || currMB->b8pdir[k]==2) && currMB->b8mode[k]!=0)//has forward vector
+          {
+            no_bits += writeReferenceFrame (currMB->b8mode[k], i0, j0, 1, enc_picture->ref_idx[LIST_0][jj][img->block_x+i0]);
+          }
+        }
+      }
+      for (j0=0; j0<4; j0+=step_v0)
+      {
+        jj = img->block_y+j0;
+        for (i0=0; i0<4; i0+=step_h0)
+        {
+          k=j0+(i0 >> 1);
+          if ((currMB->b8pdir[k]==1 || currMB->b8pdir[k]==2) && currMB->b8mode[k]!=0)//has backward vector
+          {
+            no_bits += writeReferenceFrame (currMB->b8mode[k], i0, j0, 0, enc_picture->ref_idx[LIST_1][jj][img->block_x+i0]);
+          }
+        }
+      }
+    }
+  }
+
+  //===== write forward motion vectors =====
+  if (IS_INTERMV (currMB))
+  {
+    for (j0=0; j0<4; j0+=step_v0)
+    {
+      jj = img->block_y+j0;
+      for (i0=0; i0<4; i0+=step_h0)
+      {
+        k=j0+(i0 >> 1);
+        if ((currMB->b8pdir[k]==0 || currMB->b8pdir[k]==2) && currMB->b8mode[k]!=0)//has forward vector
+        {
+          refframe  = enc_picture->ref_idx[LIST_0][jj][img->block_x+i0];
+          no_bits  += writeMotionVector8x8 (i0, j0, i0+step_h0, j0+step_v0, refframe, LIST_0, currMB->b8mode[k]);
+        }
+      }
+    }
+  }
+
+
+  //===== write backward motion vectors =====
+  if (IS_INTERMV (currMB) && bframe)
+  {
+    for (j0=0; j0<4; j0+=step_v0)
+    {
+      jj = img->block_y+j0;
+      for (i0=0; i0<4; i0+=step_h0)
+      {
+        k=j0+(i0 >> 1);
+        if ((currMB->b8pdir[k]==1 || currMB->b8pdir[k]==2) && currMB->b8mode[k]!=0)//has backward vector
+        {
+          refframe  = enc_picture->ref_idx[LIST_1][jj][img->block_x+i0];
+          no_bits  += writeMotionVector8x8 (i0, j0, i0+step_h0, j0+step_v0, refframe, LIST_1, currMB->b8mode[k]);
+        }
+      }
+    }
+  }
+  return no_bits;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Writes chrominance coefficients
+ ************************************************************************
+ */
+int writeChromaCoeff (void)
+{
+  int             rate      = 0;
+  Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+  SyntaxElement   se;
+  int*            bitCount  = currMB->bitcounter;
+  Slice*          currSlice = img->currentSlice;
+  const int*      partMap   = assignSE2partition[input->partition_mode];
+  int             cbp       = currMB->cbp;
+  DataPartition*  dataPart;
+
+  int   level, run;
+  int   k, uv;
+  int   b8, b4, param;
+  int*  ACLevel;
+  int*  ACRun;
+  int*  DCLevel;
+  int*  DCRun;
+
+  static const int   chroma_dc_context[3]={CHROMA_DC, CHROMA_DC_2x4, CHROMA_DC_4x4};
+  int   yuv = img->yuv_format - 1;
+
+  static const unsigned char chroma_ac_param[3][8][4] =
+  {
+   {{ 4, 20,  5, 21},
+    {36, 52, 37, 53},
+    { 0,  0,  0,  0},
+    { 0,  0,  0,  0},
+    { 0,  0,  0,  0},
+    { 0,  0,  0,  0},
+    { 0,  0,  0,  0},
+    { 0,  0,  0,  0}},
+   {{ 4, 20,  5, 21},
+    { 6, 22,  7, 23},
+    {36, 52, 37, 53},
+    {38, 54, 39, 55},
+    { 0,  0,  0,  0},
+    { 0,  0,  0,  0},
+    { 0,  0,  0,  0},
+    { 0,  0,  0,  0}},
+   {{ 4, 20,  5, 21},
+    {36, 52, 37, 53},
+    { 6, 22,  7, 23},
+    {38, 54, 39, 55},
+    { 8, 24,  9, 25},
+    {40, 56, 41, 57},
+    {10, 26, 11, 27},
+    {42, 58, 43, 59}}
+  };
+
+  //=====
+  //=====   D C - C O E F F I C I E N T S
+  //=====
+  if (cbp > 15)  // check if any chroma bits in coded block pattern is set
+  {
+    for (uv=0; uv < 2; uv++)
+    {
+      if (input->symbol_mode == UVLC)
+      {
+        param = uv;
+        rate += writeCoeff4x4_CAVLC (CHROMA_DC, 0, 0, param);
+        // CAVLC
+      }
+      else
+      {
+
+        DCLevel = img->cofDC[uv+1][0];
+        DCRun   = img->cofDC[uv+1][1];
+
+        level=1;
+        for (k=0; k <= img->num_cdc_coeff && level != 0; ++k)
+        {
+          level = se.value1 = DCLevel[k]; // level
+          run   = se.value2 = DCRun  [k]; // run
+
+          se.context         = chroma_dc_context[yuv];
+          se.type             = (IS_INTRA(currMB) ? SE_CHR_DC_INTRA : SE_CHR_DC_INTER);
+          img->is_intra_block =  IS_INTRA(currMB);
+          img->is_v_block     = uv;
+
+          // choose the appropriate data partition
+          dataPart = &(currSlice->partArr[partMap[se.type]]);
+#if TRACE
+          snprintf(se.tracestring, TRACESTRING_SIZE, "DC Chroma %2d: level =%3d run =%2d",k, level, run);
+#endif
+          writeRunLevel_CABAC(&se, dataPart);
+
+          bitCount[BITS_COEFF_UV_MB] += se.len;
+          rate                       += se.len;
+        }
+      }
+    }
+  }
+
+  //=====
+  //=====   A C - C O E F F I C I E N T S
+  //=====
+  uv=-1;
+  if (cbp >> 4 == 2) // check if chroma bits in coded block pattern = 10b
+  {
+    for (b8=4; b8 < (4+img->num_blk8x8_uv); b8++)
+    for (b4=0; b4 < 4; b4++)
+    {
+      if (input->symbol_mode == UVLC)
+      {
+        param = chroma_ac_param[yuv][b8-4][b4];
+        rate += writeCoeff4x4_CAVLC (CHROMA_AC, b8, b4, param);
+        // CAVLC
+      }
+      else
+      {
+
+        ACLevel = img->cofAC[b8][b4][0];
+        ACRun   = img->cofAC[b8][b4][1];
+
+        level=1;
+        uv++;
+
+        img->subblock_y = subblk_offset_y[yuv][b8-4][b4]>>2;
+        img->subblock_x = subblk_offset_x[yuv][b8-4][b4]>>2;
+
+        for (k=0; k < 16 && level != 0; k++)
+        {
+          level = se.value1 = ACLevel[k]; // level
+          run   = se.value2 = ACRun  [k]; // run
+
+          se.context          = CHROMA_AC;
+          se.type             = (IS_INTRA(currMB) ? SE_CHR_AC_INTRA : SE_CHR_AC_INTER);
+          img->is_intra_block =  IS_INTRA(currMB);
+          img->is_v_block     = (uv>=(img->num_blk8x8_uv<<1));
+
+          // choose the appropriate data partition
+          dataPart = &(currSlice->partArr[partMap[se.type]]);
+#if TRACE
+          snprintf(se.tracestring, TRACESTRING_SIZE, "AC Chroma %2d: level =%3d run =%2d",k, level, run);
+#endif
+          writeRunLevel_CABAC(&se, dataPart);
+          bitCount[BITS_COEFF_UV_MB] += se.len;
+          rate                       += se.len;
+        }
+      }
+    }
+  }
+
+  return rate;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Writes Luma coeff of an 4x4 block
+ ************************************************************************
+ */
+int writeLumaCoeff4x4_CABAC (int b8, int b4, int intra4x4mode)
+{
+  int             rate      = 0;
+  Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+  SyntaxElement   se;
+  Slice*          currSlice = img->currentSlice;
+  const int*      partMap   = assignSE2partition[input->partition_mode];
+  int*            bitCount  = currMB->bitcounter;
+  DataPartition*  dataPart;
+
+  int   level, run;
+  int   k;
+  int*  ACLevel = img->cofAC[b8][b4][0];
+  int*  ACRun   = img->cofAC[b8][b4][1];
+
+  img->subblock_x = ((b8&0x1)==0) ? (((b4&0x1)==0)? 0: 1) : (((b4&0x1)==0)? 2: 3); // horiz. position for coeff_count context
+  img->subblock_y = (b8<2)        ? ((b4<2)       ? 0: 1) : ((b4<2)       ? 2: 3); // vert.  position for coeff_count context
+
+  level=1; // get inside loop
+  for(k=0; k<=16 && level !=0; k++)
+  {
+    level = se.value1 = ACLevel[k]; // level
+    run   = se.value2 = ACRun  [k]; // run
+
+    //currSE->writing = writeRunLevel_CABAC;
+
+    se.context     = LUMA_4x4;
+    se.type        = (k==0
+      ? (intra4x4mode ? SE_LUM_DC_INTRA : SE_LUM_DC_INTER)
+      : (intra4x4mode ? SE_LUM_AC_INTRA : SE_LUM_AC_INTER));
+    img->is_intra_block = intra4x4mode;
+
+    // choose the appropriate data partition
+    dataPart = &(currSlice->partArr[partMap[se.type]]);
+#if TRACE
+    snprintf(se.tracestring, TRACESTRING_SIZE, "Luma sng(%2d) level =%3d run =%2d", k, level,run);
+#endif
+    writeRunLevel_CABAC(&se, dataPart);
+    bitCount[BITS_COEFF_Y_MB] += se.len;
+    rate                      += se.len;
+  }
+  return rate;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Writes Luma coeff of an 8x8 block
+ ************************************************************************
+ */
+int writeLumaCoeff8x8_CABAC (int b8, int intra_mode)
+{
+  int             rate      = 0;
+  Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+  SyntaxElement   se;
+  Slice*          currSlice = img->currentSlice;
+  const int*      partMap   = assignSE2partition[input->partition_mode];
+  int*            bitCount  = currMB->bitcounter;
+  DataPartition*  dataPart;
+
+  int   level, run;
+  int   k;
+  int*  ACLevel = img->cofAC[b8][0][0];
+  int*  ACRun   = img->cofAC[b8][0][1];
+
+  img->subblock_x = ((b8&0x1)==0)?0:2;  // horiz. position for coeff_count context
+  img->subblock_y = (b8<2)?0:2;     // vert.  position for coeff_count context
+
+
+  level=1; // get inside loop
+  for(k=0; k<=64 && level !=0; k++)
+  {
+    level = se.value1 = ACLevel[k]; // level
+    run   = se.value2 = ACRun  [k]; // run
+
+    se.context     = LUMA_8x8;
+    se.type        = (k==0
+      ? (intra_mode ? SE_LUM_DC_INTRA : SE_LUM_DC_INTER)
+      : (intra_mode ? SE_LUM_AC_INTRA : SE_LUM_AC_INTER));
+    img->is_intra_block = intra_mode;
+
+    // choose the appropriate data partition
+    dataPart = &(currSlice->partArr[partMap[img->type != B_SLICE ? se.type : SE_BFRAME]]);
+
+#if TRACE
+    snprintf(se.tracestring, TRACESTRING_SIZE, "Luma8x8 sng(%2d) level =%3d run =%2d", k, level,run);
+#endif
+    writeRunLevel_CABAC(&se, dataPart);
+    bitCount[BITS_COEFF_Y_MB] += se.len;
+    rate                      += se.len;
+  }
+  return rate;
+}
+
+/*!
+************************************************************************
+* \brief
+*    Writes Luma Coeff of an 8x8 block
+************************************************************************
+*/
+int writeLumaCoeff8x8 (int block8x8, int block_mode, int transform_size_flag)
+{
+  int  block4x4, rate = 0;
+  int intra4x4mode = (block_mode==IBLOCK);
+
+  if (block_mode == I8MB)
+    assert(transform_size_flag == 1);
+
+
+  if((!transform_size_flag) || input->symbol_mode == UVLC) // allow here if 4x4 or UVLC
+  {
+    if (input->symbol_mode == UVLC )
+    {
+      for (block4x4=0; block4x4<4; block4x4++)
+        rate += writeCoeff4x4_CAVLC (LUMA, block8x8, block4x4, (transform_size_flag)?(block_mode==I8MB):intra4x4mode);// CAVLC, pass new intra
+    }
+    else
+    {
+      for (block4x4=0; block4x4<4; block4x4++)
+        rate += writeLumaCoeff4x4_CABAC (block8x8, block4x4, intra4x4mode);
+    }
+  }
+  else
+    rate += writeLumaCoeff8x8_CABAC (block8x8, (block_mode == I8MB));
+
+  return rate;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Writes CBP, DQUANT, and Luma Coefficients of an macroblock
+ ************************************************************************
+ */
+int writeCBPandLumaCoeff (void)
+{
+  int             mb_x, mb_y, i, j, k;
+  int             level, run;
+  int             rate      = 0;
+  Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+  int*            bitCount  = currMB->bitcounter;
+  SyntaxElement   se;
+  Slice*          currSlice = img->currentSlice;
+  const int*      partMap   = assignSE2partition[input->partition_mode];
+  int             cbp       = currMB->cbp;
+  DataPartition*  dataPart;
+  int             need_transform_size_flag;   //ADD-VG-24062004
+
+  int   b8, b4;
+  int*  DCLevel = img->cofDC[0][0];
+  int*  DCRun   = img->cofDC[0][1];
+  int*  ACLevel;
+  int*  ACRun;
+
+  if (!IS_NEWINTRA (currMB))
+  {
+    //=====   C B P   =====
+    //---------------------
+    se.value1 = cbp;
+    se.type   = SE_CBP;
+
+    // choose the appropriate data partition
+    dataPart = &(currSlice->partArr[partMap[se.type]]);
+
+#if TRACE
+    snprintf(se.tracestring, TRACESTRING_SIZE, "CBP (%2d,%2d) = %3d",img->mb_x, img->mb_y, cbp);
+#endif
+    writeCBP (&se, dataPart);
+
+    bitCount[BITS_CBP_MB] += se.len;
+    rate                  += se.len;
+
+    //============= Transform Size Flag for INTER MBs =============
+    //-------------------------------------------------------------
+    need_transform_size_flag = (((currMB->mb_type >= 1 && currMB->mb_type <= 3)||
+                                (IS_DIRECT(currMB) && active_sps->direct_8x8_inference_flag) ||
+                                (currMB->NoMbPartLessThan8x8Flag))
+                                && currMB->mb_type != I8MB && currMB->mb_type != I4MB
+                                && (currMB->cbp&15)
+                                && input->Transform8x8Mode);
+
+    if (need_transform_size_flag)
+    {
+      se.value1 = currMB->luma_transform_size_8x8_flag;
+      se.type   = SE_MBTYPE;
+
+#if TRACE
+      snprintf(se.tracestring, TRACESTRING_SIZE, "transform_size_8x8_flag = %3d", currMB->luma_transform_size_8x8_flag);
+#endif
+      writeMB_transform_size(&se, dataPart);
+
+      bitCount[BITS_MB_MODE] += se.len;
+      rate                   += se.len;
+    }
+  }
+
+  //=====   DQUANT   =====
+  //----------------------
+  if (cbp!=0 || IS_NEWINTRA (currMB))
+  {
+    se.value1 = currMB->delta_qp;
+    se.type = SE_DELTA_QUANT;
+
+    // choose the appropriate data partition
+    dataPart = &(img->currentSlice->partArr[partMap[se.type]]);
+#if TRACE
+    snprintf(se.tracestring, TRACESTRING_SIZE, "Delta QP (%2d,%2d) = %3d",img->mb_x, img->mb_y, currMB->delta_qp);
+#endif
+    writeDquant (&se, dataPart);
+    bitCount[BITS_DELTA_QUANT_MB] += se.len;
+    rate                          += se.len;
+  }
+
+  for (i=0; i < 4; i++)
+    memset(img->nz_coeff [img->current_mb_nr][i], 0, (4 + img->num_blk8x8_uv) * sizeof(int));
+
+  if (!IS_NEWINTRA (currMB))
+  {
+    //=====  L U M I N A N C E   =====
+    //--------------------------------
+    for (i=0; i<4; i++)  if (cbp & (1<<i))
+    {
+      rate += writeLumaCoeff8x8 (i, currMB->b8mode[i], currMB->luma_transform_size_8x8_flag);
+    }
+  }
+  else
+  {
+    //=====  L U M I N A N C E   f o r   1 6 x 1 6   =====
+    //----------------------------------------------------
+    // DC coeffs
+    if (input->symbol_mode == UVLC)
+    {
+      rate += writeCoeff4x4_CAVLC (LUMA_INTRA16x16DC, 0, 0, 0);  // CAVLC
+    }
+    else
+    {
+      level=1; // get inside loop
+      img->is_intra_block = TRUE;
+      for (k=0; k<=16 && level!=0; k++)
+      {
+        level = se.value1 = DCLevel[k]; // level
+        run   = se.value2 = DCRun  [k]; // run
+
+        se.context = LUMA_16DC;
+        se.type    = SE_LUM_DC_INTRA;   // element is of type DC
+
+        // choose the appropriate data partition
+        dataPart = &(currSlice->partArr[partMap[se.type]]);
+
+#if TRACE
+        snprintf(se.tracestring, TRACESTRING_SIZE, "DC luma 16x16 sng(%2d) level =%3d run =%2d", k, level, run);
+#endif
+        writeRunLevel_CABAC(&se, dataPart);
+        bitCount[BITS_COEFF_Y_MB] += se.len;
+        rate                      += se.len;
+      }
+    }
+
+    // AC coeffs
+    if (cbp & 15)
+    {
+      for (mb_y=0; mb_y < 4; mb_y += 2)
+      for (mb_x=0; mb_x < 4; mb_x += 2)
+      for (j=mb_y; j < mb_y+2; j++)
+      for (i=mb_x; i < mb_x+2; i++)
+      {
+        b8      = 2*(j >> 1) + (i >> 1);
+        b4      = 2*(j & 0x01) + (i & 0x01);
+        if (input->symbol_mode == UVLC)
+        {
+          rate += writeCoeff4x4_CAVLC (LUMA_INTRA16x16AC, b8, b4, 0);  // CAVLC
+        }
+        else
+        {
+          ACLevel = img->cofAC[b8][b4][0];
+          ACRun   = img->cofAC[b8][b4][1];
+
+          img->subblock_y = j;
+          img->subblock_x = i;
+
+          level=1; // get inside loop
+          img->is_intra_block = TRUE;
+          for (k=0;k<16 && level !=0;k++)
+          {
+            level = se.value1 = ACLevel[k]; // level
+            run   = se.value2 = ACRun  [k]; // run
+
+            se.context = LUMA_16AC;
+            se.type    = SE_LUM_AC_INTRA;   // element is of type AC
+
+            // choose the appropriate data partition
+            dataPart = &(currSlice->partArr[partMap[se.type]]);
+#if TRACE
+            snprintf(se.tracestring, TRACESTRING_SIZE, "AC luma 16x16 sng(%2d) level =%3d run =%2d", k, level, run);
+#endif
+            writeRunLevel_CABAC(&se, dataPart);
+            bitCount[BITS_COEFF_Y_MB] += se.len;
+            rate                      += se.len;
+          }
+        }
+      }
+    }
+  }
+
+  return rate;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Get the Prediction from the Neighboring Blocks for Number of Nonzero Coefficients
+ *
+ *    Luma Blocks
+ ************************************************************************
+ */
+int predict_nnz(int i,int j)
+{
+  PixelPos pix;
+
+  int pred_nnz = 0;
+  int cnt      = 0;
+  int mb_nr    = img->current_mb_nr;
+  Macroblock *currMB = &(img->mb_data[mb_nr]);
+
+  // left block
+  getLuma4x4Neighbour(mb_nr, (i << 2) - 1, (j << 2), &pix);
+
+  if (IS_INTRA(currMB) && pix.available && active_pps->constrained_intra_pred_flag && ((input->partition_mode != 0) && !img->currentPicture->idr_flag))
+  {
+    pix.available &= img->intra_block[pix.mb_addr];
+    if (!pix.available)
+      cnt++;
+  }
+
+  if (pix.available)
+  {
+    pred_nnz = img->nz_coeff [pix.mb_addr ][pix.x][pix.y];
+    cnt++;
+  }
+
+  // top block
+  getLuma4x4Neighbour(mb_nr, (i<<2), (j<<2) - 1, &pix);
+
+  if (IS_INTRA(currMB) && pix.available && active_pps->constrained_intra_pred_flag && ((input->partition_mode != 0) && !img->currentPicture->idr_flag))
+  {
+    pix.available &= img->intra_block[pix.mb_addr];
+    if (!pix.available)
+      cnt++;
+  }
+
+  if (pix.available)
+  {
+    pred_nnz += img->nz_coeff [pix.mb_addr ][pix.x][pix.y];
+    cnt++;
+  }
+
+  if (cnt==2)
+  {
+    pred_nnz++;
+    pred_nnz>>=1;
+  }
+
+  return pred_nnz;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Get the Prediction from the Neighboring Blocks for Number of Nonzero Coefficients
+ *
+ *    Chroma Blocks
+ ************************************************************************
+ */
+int predict_nnz_chroma(int i,int j)
+{
+  PixelPos pix;
+
+  int pred_nnz = 0;
+  int cnt      = 0;
+  int mb_nr    = img->current_mb_nr;
+  static const int j_off_tab [12] = {0,0,0,0,4,4,4,4,8,8,8,8};
+  int j_off = j_off_tab[j];
+  Macroblock *currMB = &(img->mb_data[mb_nr]);
+
+  if (img->yuv_format != YUV444)
+  {
+    //YUV420 and YUV422
+    // left block
+    getChroma4x4Neighbour(mb_nr, ((i & 0x01)<<2) - 1, ((j-4)<<2), &pix);
+
+    if (IS_INTRA(currMB) && pix.available && active_pps->constrained_intra_pred_flag && ((input->partition_mode != 0) && !img->currentPicture->idr_flag))
+    {
+      pix.available &= img->intra_block[pix.mb_addr];
+      if (!pix.available)
+        cnt++;
+    }
+
+    if (pix.available)
+    {
+      pred_nnz = img->nz_coeff [pix.mb_addr ][2 * (i >> 1) + pix.x][4 + pix.y];
+      cnt++;
+    }
+
+    // top block
+    getChroma4x4Neighbour(mb_nr, ((i & 0x01)<<2), ((j-4)<<2) -1,  &pix);
+
+    if (IS_INTRA(currMB) && pix.available && active_pps->constrained_intra_pred_flag && ((input->partition_mode != 0) && !img->currentPicture->idr_flag))
+    {
+      pix.available &= img->intra_block[pix.mb_addr];
+      if (!pix.available)
+        cnt++;
+    }
+
+    if (pix.available)
+    {
+      pred_nnz += img->nz_coeff [pix.mb_addr ][2 * (i >> 1) + pix.x][4 + pix.y];
+      cnt++;
+    }
+  }
+  else
+  {
+    //YUV444
+    // left block
+    getChroma4x4Neighbour(mb_nr, (i<<2) -1, ((j-j_off)<<2), &pix);
+
+    if (IS_INTRA(currMB) && pix.available && active_pps->constrained_intra_pred_flag && ((input->partition_mode != 0) && !img->currentPicture->idr_flag))
+    {
+      pix.available &= img->intra_block[pix.mb_addr];
+      cnt--;
+    }
+
+    if (pix.available)
+    {
+      pred_nnz = img->nz_coeff [pix.mb_addr ][pix.x][j_off + pix.y];
+      cnt++;
+    }
+
+    // top block
+    getChroma4x4Neighbour(mb_nr, (i<<2), ((j-j_off)<<2)-1, &pix);
+
+    if (IS_INTRA(currMB) && pix.available && active_pps->constrained_intra_pred_flag && ((input->partition_mode != 0) && !img->currentPicture->idr_flag))
+    {
+      pix.available &= img->intra_block[pix.mb_addr];
+      cnt--;
+    }
+
+    if (pix.available)
+    {
+      pred_nnz += img->nz_coeff [pix.mb_addr ][pix.x][j_off + pix.y];
+      cnt++;
+    }
+
+  }
+
+  if (cnt==2)
+  {
+    pred_nnz++;
+    pred_nnz>>=1;
+  }
+
+  return pred_nnz;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Writes coeff of an 4x4 block (CAVLC)
+ *
+ * \author
+ *    Karl Lillevold <karll at real.com>
+ *    contributions by James Au <james at ubvideo.com>
+ ************************************************************************
+ */
+
+int writeCoeff4x4_CAVLC (int block_type, int b8, int b4, int param)
+{
+  int           no_bits    = 0;
+  Macroblock    *currMB    = &img->mb_data[img->current_mb_nr];
+  SyntaxElement se;
+  int           *bitCount  = currMB->bitcounter;
+  Slice         *currSlice = img->currentSlice;
+  DataPartition *dataPart;
+  int           *partMap   = assignSE2partition[input->partition_mode];
+
+  int k,level = 1,run,vlcnum;
+  int numcoeff = 0, lastcoeff = 0, numtrailingones = 0; 
+  int numones = 0, totzeros = 0, zerosleft, numcoef;
+  int numcoeff_vlc;
+  int code, level_two_or_higher;
+  int dptype = 0, bitcounttype = 0;
+  int nnz, max_coeff_num = 0, cdc=0, cac=0;
+  int subblock_x, subblock_y;
+  char type[15];
+
+  static const int incVlc[] = {0,3,6,12,24,48,32768};  // maximum vlc = 6
+
+
+  int*  pLevel = NULL;
+  int*  pRun = NULL;
+
+  switch (block_type)
+  {
+  case LUMA:
+    max_coeff_num = 16;
+    bitcounttype = BITS_COEFF_Y_MB;
+
+    pLevel = img->cofAC[b8][b4][0];
+    pRun   = img->cofAC[b8][b4][1];
+
+    sprintf(type, "%s", "Luma");
+    if (IS_INTRA (currMB))
+    {
+      dptype = SE_LUM_AC_INTRA;
+    }
+    else
+    {
+      dptype = SE_LUM_AC_INTER;
+    }
+    break;
+  case LUMA_INTRA16x16DC:
+    max_coeff_num = 16;
+    bitcounttype = BITS_COEFF_Y_MB;
+
+    pLevel = img->cofDC[0][0];
+    pRun   = img->cofDC[0][1];
+
+    sprintf(type, "%s", "Lum16DC");
+    dptype = SE_LUM_DC_INTRA;
+    break;
+  case LUMA_INTRA16x16AC:
+    max_coeff_num = 15;
+    bitcounttype = BITS_COEFF_Y_MB;
+
+    pLevel = img->cofAC[b8][b4][0];
+    pRun   = img->cofAC[b8][b4][1];
+
+    sprintf(type, "%s", "Lum16AC");
+    dptype = SE_LUM_AC_INTRA;
+    break;
+
+  case CHROMA_DC:
+    max_coeff_num = img->num_cdc_coeff;
+    bitcounttype = BITS_COEFF_UV_MB;
+    cdc = 1;
+
+    pLevel = img->cofDC[param+1][0];
+    pRun   = img->cofDC[param+1][1];
+
+    sprintf(type, "%s", "ChrDC");
+    if (IS_INTRA (currMB))
+    {
+      dptype = SE_CHR_DC_INTRA;
+    }
+    else
+    {
+      dptype = SE_CHR_DC_INTER;
+    }
+    break;
+  case CHROMA_AC:
+    max_coeff_num = 15;
+    bitcounttype = BITS_COEFF_UV_MB;
+    cac = 1;
+
+    pLevel = img->cofAC[b8][b4][0];
+    pRun   = img->cofAC[b8][b4][1];
+
+    sprintf(type, "%s", "ChrAC");
+    if (IS_INTRA (currMB))
+    {
+      dptype = SE_CHR_AC_INTRA;
+    }
+    else
+    {
+      dptype = SE_CHR_AC_INTER;
+    }
+    break;
+  default:
+    error("writeCoeff4x4_CAVLC: Invalid block type", 600);
+    break;
+  }
+
+  dataPart = &(currSlice->partArr[partMap[dptype]]);
+
+  for(k = 0; (k <= ((cdc)?img->num_cdc_coeff:16))&& level !=0; k++)
+  {
+    level = pLevel[k]; // level
+    run   = pRun[k];   // run
+
+    if (level)
+    {
+      if (run)
+        totzeros += run;
+      if (iabs(level) == 1)
+      {
+        numtrailingones ++;
+        numones ++;
+        if (numtrailingones > 3)
+        {
+          numtrailingones = 3; /* clip to 3 */
+        }
+      }
+      else
+      {
+        numtrailingones = 0;
+      }
+      numcoeff ++;
+      lastcoeff = k;
+    }
+  }
+
+  if (!cdc)
+  {
+    if (!cac)
+    {
+      // luma
+      subblock_x = ((b8&0x1)==0)?(((b4&0x1)==0)?0:1):(((b4&0x1)==0)?2:3);
+        // horiz. position for coeff_count context
+      subblock_y = (b8<2)?((b4<2)?0:1):((b4<2)?2:3);
+        // vert.  position for coeff_count context
+      nnz = predict_nnz(subblock_x,subblock_y);
+    }
+    else
+    {
+      // chroma AC
+      subblock_x = param >> 4;
+      subblock_y = param & 15;
+      nnz = predict_nnz_chroma(subblock_x,subblock_y);
+    }
+
+    img->nz_coeff [img->current_mb_nr ][subblock_x][subblock_y] = numcoeff;
+
+
+    if (nnz < 2)
+    {
+      numcoeff_vlc = 0;
+    }
+    else if (nnz < 4)
+    {
+      numcoeff_vlc = 1;
+    }
+    else if (nnz < 8)
+    {
+      numcoeff_vlc = 2;
+    }
+    else
+    {
+      numcoeff_vlc = 3;
+    }
+
+  }
+  else
+  {
+    // chroma DC (has its own VLC)
+    // numcoeff_vlc not relevant
+    numcoeff_vlc = 0;
+
+    subblock_x = param;
+    subblock_y = param;
+  }
+
+  se.type  = dptype;
+
+  se.value1 = numcoeff;
+  se.value2 = numtrailingones;
+  se.len    = numcoeff_vlc; /* use len to pass vlcnum */
+
+#if TRACE
+  snprintf(se.tracestring,
+    TRACESTRING_SIZE, "%s # c & tr.1s(%d,%d) vlc=%d #c=%d #t1=%d",
+    type, subblock_x, subblock_y, numcoeff_vlc, numcoeff, numtrailingones);
+#endif
+
+  if (!cdc)
+    writeSyntaxElement_NumCoeffTrailingOnes(&se, dataPart);
+  else
+    writeSyntaxElement_NumCoeffTrailingOnesChromaDC(&se, dataPart);
+
+  bitCount[bitcounttype]+=se.len;
+  no_bits               +=se.len;
+
+  if (!numcoeff)
+    return no_bits;
+
+  if (numcoeff)
+  {
+    code = 0;
+    for (k = lastcoeff; k > lastcoeff-numtrailingones; k--)
+    {
+      level = pLevel[k]; // level
+      if (iabs(level) > 1)
+      {
+        printf("ERROR: level > 1\n");
+        exit(-1);
+      }
+      code <<= 1;
+      if (level < 0)
+      {
+        code |= 0x1;
+      }
+    }
+
+    if (numtrailingones)
+    {
+      se.type  = dptype;
+
+      se.value2 = numtrailingones;
+      se.value1 = code;
+
+#if TRACE
+      snprintf(se.tracestring,
+        TRACESTRING_SIZE, "%s trailing ones sign (%d,%d)",
+        type, subblock_x, subblock_y);
+#endif
+
+      writeSyntaxElement_VLC (&se, dataPart);
+      bitCount[bitcounttype]+=se.len;
+      no_bits               +=se.len;
+
+    }
+
+    // encode levels
+    level_two_or_higher = (numcoeff > 3 && numtrailingones == 3) ? 0 : 1;
+
+    vlcnum = (numcoeff > 10 && numtrailingones < 3) ? 1 : 0;
+
+    for (k = lastcoeff - numtrailingones; k >= 0; k--)
+    {
+      level = pLevel[k]; // level
+
+      se.value1 = level;
+      se.type  = dptype;
+
+  #if TRACE
+        snprintf(se.tracestring,
+          TRACESTRING_SIZE, "%s lev (%d,%d) k=%d vlc=%d lev=%3d",
+            type, subblock_x, subblock_y, k, vlcnum, level);
+  #endif
+
+          if (level_two_or_higher)
+          {
+            if (se.value1 > 0)
+              se.value1 --;
+            else
+              se.value1 ++;
+            level_two_or_higher = 0;
+          }
+
+      //    encode level
+      if (vlcnum == 0)
+        writeSyntaxElement_Level_VLC1(&se, dataPart, active_sps->profile_idc);
+      else
+        writeSyntaxElement_Level_VLCN(&se, vlcnum, dataPart, active_sps->profile_idc);
+
+      // update VLC table
+      if (iabs(level)>incVlc[vlcnum])
+        vlcnum++;
+
+      if (k == lastcoeff - numtrailingones && iabs(level)>3)
+        vlcnum = 2;
+
+      bitCount[bitcounttype]+=se.len;
+      no_bits               +=se.len;
+    }
+
+    // encode total zeroes
+    if (numcoeff < max_coeff_num)
+    {
+
+      se.type  = dptype;
+      se.value1 = totzeros;
+
+      vlcnum = numcoeff-1;
+
+      se.len = vlcnum;
+
+#if TRACE
+      snprintf(se.tracestring,
+        TRACESTRING_SIZE, "%s totalrun (%d,%d) vlc=%d totzeros=%3d",
+          type, subblock_x, subblock_y, vlcnum, totzeros);
+#endif
+      if (!cdc)
+        writeSyntaxElement_TotalZeros(&se, dataPart);
+      else
+        writeSyntaxElement_TotalZerosChromaDC(&se, dataPart);
+
+      bitCount[bitcounttype]+=se.len;
+      no_bits               +=se.len;
+    }
+
+    // encode run before each coefficient
+    zerosleft = totzeros;
+    numcoef = numcoeff;
+    for (k = lastcoeff; k >= 0; k--)
+    {
+      run = pRun[k]; // run
+
+      se.value1 = run;
+      se.type  = dptype;
+
+      // for last coeff, run is remaining totzeros
+      // when zerosleft is zero, remaining coeffs have 0 run
+      if (numcoeff <= 1 || !zerosleft)
+        break;
+
+      if (numcoef > 1 && zerosleft)
+      {
+
+        vlcnum = zerosleft - 1;
+        if (vlcnum > RUNBEFORE_NUM-1)
+          vlcnum = RUNBEFORE_NUM-1;
+
+        se.len = vlcnum;
+
+#if TRACE
+        snprintf(se.tracestring,
+          TRACESTRING_SIZE, "%s run (%d,%d) k=%d vlc=%d run=%2d",
+            type, subblock_x, subblock_y, k, vlcnum, run);
+#endif
+
+        writeSyntaxElement_Run(&se, dataPart);
+
+        bitCount[bitcounttype]+=se.len;
+        no_bits               +=se.len;
+
+        zerosleft -= run;
+        numcoef --;
+      }
+    }
+  }
+
+  return no_bits;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Find best 16x16 based intra mode
+ *
+ * \par Input:
+ *    Image parameters, pointer to best 16x16 intra mode
+ *
+ * \par Output:
+ *    best 16x16 based SAD
+ ************************************************************************/
+int find_sad_16x16(int *intra_mode)
+{
+  int current_intra_sad_2,best_intra_sad2;
+  int M1[16][16],M0[4][4][4][4],M3[4],M4[4][4];
+
+  int i,j,k;
+  int ii,jj;
+  int mb_nr = img->current_mb_nr;
+
+  PixelPos up;          //!< pixel position p(0,-1)
+  PixelPos left[17];    //!< pixel positions p(-1, -1..15)
+
+  int up_avail, left_avail, left_up_avail;
+
+  for (i=0;i<17;i++)
+  {
+    getNeighbour(mb_nr, -1 ,  i-1 , IS_LUMA, &left[i]);
+  }
+
+  getNeighbour(mb_nr, 0     ,  -1 , IS_LUMA, &up);
+
+  if (!(input->UseConstrainedIntraPred))
+  {
+    up_avail   = up.available;
+    left_avail = left[1].available;
+    left_up_avail = left[0].available;
+  }
+  else
+  {
+    up_avail      = up.available ? img->intra_block[up.mb_addr] : 0;
+    for (i=1, left_avail=1; i<17;i++)
+      left_avail  &= left[i].available ? img->intra_block[left[i].mb_addr]: 0;
+    left_up_avail = left[0].available ? img->intra_block[left[0].mb_addr]: 0;
+  }
+
+  best_intra_sad2=MAX_VALUE;
+  *intra_mode = DC_PRED_16;
+
+  for (k=0;k<4;k++)
+  {
+    if (input->IntraDisableInterOnly == 0 || img->type != I_SLICE)
+    {
+      if (input->Intra16x16ParDisable && (k==VERT_PRED_16||k==HOR_PRED_16))
+        continue;
+
+      if (input->Intra16x16PlaneDisable && k==PLANE_16)
+        continue;
+    }
+    //check if there are neighbours to predict from
+    if ((k==0 && !up_avail) || (k==1 && !left_avail) || (k==3 && (!left_avail || !up_avail || !left_up_avail)))
+    {
+      ; // edge, do nothing
+    }
+    else
+    {
+      for (j=0;j<16;j++)
+      {
+        for (i=0;i<16;i++)
+        {
+          M1[j][i]=imgY_org[img->opix_y+j][img->opix_x+i]-img->mprr_2[k][j][i];
+          M0[i & 0x03][i >> 2][j & 0x03][j >> 2]=M1[j][i];
+        }
+      }
+      current_intra_sad_2=0;              // no SAD start handicap here
+      for (jj=0;jj<4;jj++)
+      {
+        for (ii=0;ii<4;ii++)
+        {
+          for (j=0;j<4;j++)
+          {
+            M3[0]=M0[0][ii][j][jj]+M0[3][ii][j][jj];
+            M3[1]=M0[1][ii][j][jj]+M0[2][ii][j][jj];
+            M3[2]=M0[1][ii][j][jj]-M0[2][ii][j][jj];
+            M3[3]=M0[0][ii][j][jj]-M0[3][ii][j][jj];
+
+            M0[0][ii][j][jj]=M3[0]+M3[1];
+            M0[2][ii][j][jj]=M3[0]-M3[1];
+            M0[1][ii][j][jj]=M3[2]+M3[3];
+            M0[3][ii][j][jj]=M3[3]-M3[2];
+          }
+
+          for (i=0;i<4;i++)
+          {
+            M3[0]=M0[i][ii][0][jj]+M0[i][ii][3][jj];
+            M3[1]=M0[i][ii][1][jj]+M0[i][ii][2][jj];
+            M3[2]=M0[i][ii][1][jj]-M0[i][ii][2][jj];
+            M3[3]=M0[i][ii][0][jj]-M0[i][ii][3][jj];
+
+            M0[i][ii][0][jj]=M3[0]+M3[1];
+            M0[i][ii][2][jj]=M3[0]-M3[1];
+            M0[i][ii][1][jj]=M3[2]+M3[3];
+            M0[i][ii][3][jj]=M3[3]-M3[2];
+            for (j=0;j<4;j++)
+              if ((i+j)!=0)
+                current_intra_sad_2 += iabs(M0[i][ii][j][jj]);
+          }
+        }
+      }
+
+      for (j=0;j<4;j++)
+        for (i=0;i<4;i++)
+          M4[j][i]=M0[0][i][0][j]/4;
+
+        // Hadamard of DC koeff
+        for (j=0;j<4;j++)
+        {
+          M3[0]=M4[j][0]+M4[j][3];
+          M3[1]=M4[j][1]+M4[j][2];
+          M3[2]=M4[j][1]-M4[j][2];
+          M3[3]=M4[j][0]-M4[j][3];
+
+          M4[j][0]=M3[0]+M3[1];
+          M4[j][2]=M3[0]-M3[1];
+          M4[j][1]=M3[2]+M3[3];
+          M4[j][3]=M3[3]-M3[2];
+        }
+
+        for (i=0;i<4;i++)
+        {
+          M3[0]=M4[0][i]+M4[3][i];
+          M3[1]=M4[1][i]+M4[2][i];
+          M3[2]=M4[1][i]-M4[2][i];
+          M3[3]=M4[0][i]-M4[3][i];
+
+          M4[0][i]=M3[0]+M3[1];
+          M4[2][i]=M3[0]-M3[1];
+          M4[1][i]=M3[2]+M3[3];
+          M4[3][i]=M3[3]-M3[2];
+
+          for (j=0;j<4;j++)
+            current_intra_sad_2 += iabs(M4[j][i]);
+        }
+        if(current_intra_sad_2 < best_intra_sad2)
+        {
+          best_intra_sad2=current_intra_sad_2;
+          *intra_mode = k; // update best intra mode
+
+        }
+    }
+  }
+  best_intra_sad2 = best_intra_sad2/2;
+
+  return best_intra_sad2;
+
+}


Index: llvm-test/MultiSource/Applications/JM/lencod/macroblock.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/macroblock.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/macroblock.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,58 @@
+
+/*!
+ ************************************************************************
+ * \file
+ *    macroblock.h
+ *
+ * \brief
+ *    Arrays for macroblock processing
+ *
+ * \author
+ *    Inge Lille-Langoy               <inge.lille-langoy at telenor.com>     \n
+ *    Telenor Satellite Services                                          \n
+ *    P.O.Box 6914 St.Olavs plass                                         \n
+ *    N-0130 Oslo, Norway
+ *
+ ************************************************************************/
+
+#ifndef _MACROBLOCK_H_
+#define _MACROBLOCK_H_
+
+void proceed2nextMacroblock(void);
+
+void  start_macroblock(int mb_addr, int mb_field);
+void  terminate_macroblock(Boolean *end_of_slice, Boolean *recode_macroblock);
+
+void  write_one_macroblock(int eos_bit);
+
+void LumaPrediction4x4   (int, int, int, int, int,   short, short);
+void LumaPrediction4x4Bi (int, int, int, int, short, short, int  );
+
+int  LumaResidualCoding8x8 (int*, int64*, int, short, int, int, short, short);
+void LumaResidualCoding (void);
+
+void ChromaResidualCoding (int*);
+
+void IntraChromaPrediction (int*, int*, int*);
+void IntraChromaRDDecision (RD_PARAMS);
+
+int  TransformDecision(int, int*);
+
+int  B8Mode2Value (int b8mode, int b8pdir);
+
+int  writeMBLayer (int rdopt, int *coeff_rate);
+void write_terminating_bit (short bit);
+
+int  writeReferenceFrame  (int mode, int i, int j, int fwd_flag, int  ref);
+int  writeMotionVector8x8 (int  i0, int  j0, int  i1, int  j1, int  refframe, int  list_idx, int  mv_mode);
+
+int  writeLumaCoeff4x4_CABAC (int, int, int);
+int  writeLumaCoeff8x8_CABAC (int, int);
+int  writeLumaCoeff8x8       (int, int, int);
+
+int  writeCoeff4x4_CAVLC     (int block_type, int b8, int b4, int param);
+
+int  find_sad_16x16 (int *intra_mode);
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/mb_access.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/mb_access.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/mb_access.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,653 @@
+
+/*!
+ *************************************************************************************
+ * \file mb_access.c
+ *
+ * \brief
+ *    Functions for macroblock neighborhoods
+ *
+ *  \author
+ *      Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Karsten Sühring          <suehring at hhi.de>
+ *************************************************************************************
+ */
+#include <assert.h>
+
+#include "global.h"
+#include "mb_access.h"
+
+/*!
+ ************************************************************************
+ * \brief
+ *    returns 1 if the macroblock at the given address is available
+ ************************************************************************
+ */
+int mb_is_available(int mbAddr, int currMbAddr)
+{
+  if ((mbAddr < 0) || (mbAddr > ((int)img->PicSizeInMbs - 1)))
+    return 0;
+
+  // the following line checks both: slice number and if the mb has been decoded
+  if (!img->DeblockCall)
+  {
+    if (img->mb_data[mbAddr].slice_nr != img->mb_data[currMbAddr].slice_nr)
+      return 0;
+  }
+
+  return 1;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Checks the availability of neighboring macroblocks of
+ *    the current macroblock for prediction and context determination;
+ ************************************************************************
+ */
+void CheckAvailabilityOfNeighbors(void)
+{
+  const int mb_nr = img->current_mb_nr;
+  Macroblock *currMB = &img->mb_data[mb_nr];
+
+  // mark all neighbors as unavailable
+  currMB->mb_available_up   = NULL;
+  currMB->mb_available_left = NULL;
+
+  if (img->MbaffFrameFlag)
+  {
+    int cur_mb_pair = mb_nr >> 1;
+    currMB->mbAddrA = 2 * (cur_mb_pair - 1);
+    currMB->mbAddrB = 2 * (cur_mb_pair - img->PicWidthInMbs);
+    currMB->mbAddrC = 2 * (cur_mb_pair - img->PicWidthInMbs + 1);
+    currMB->mbAddrD = 2 * (cur_mb_pair - img->PicWidthInMbs - 1);
+
+    currMB->mbAvailA = mb_is_available(currMB->mbAddrA, mb_nr) && ((PicPos[cur_mb_pair    ][0])!=0);
+    currMB->mbAvailB = mb_is_available(currMB->mbAddrB, mb_nr);
+    currMB->mbAvailC = mb_is_available(currMB->mbAddrC, mb_nr) && ((PicPos[cur_mb_pair + 1][0])!=0);
+    currMB->mbAvailD = mb_is_available(currMB->mbAddrD, mb_nr) && ((PicPos[cur_mb_pair    ][0])!=0);
+  }
+  else
+  {
+    currMB->mbAddrA = mb_nr - 1;
+    currMB->mbAddrB = mb_nr - img->PicWidthInMbs;
+    currMB->mbAddrC = mb_nr - img->PicWidthInMbs + 1;
+    currMB->mbAddrD = mb_nr - img->PicWidthInMbs - 1;
+
+    currMB->mbAvailA = mb_is_available(currMB->mbAddrA, mb_nr) && ((PicPos[mb_nr    ][0])!=0);
+    currMB->mbAvailB = mb_is_available(currMB->mbAddrB, mb_nr);
+    currMB->mbAvailC = mb_is_available(currMB->mbAddrC, mb_nr) && ((PicPos[mb_nr + 1][0])!=0);
+    currMB->mbAvailD = mb_is_available(currMB->mbAddrD, mb_nr) && ((PicPos[mb_nr    ][0])!=0);
+  }
+
+  if (currMB->mbAvailA) currMB->mb_available_left = &(img->mb_data[currMB->mbAddrA]);
+  if (currMB->mbAvailB) currMB->mb_available_up   = &(img->mb_data[currMB->mbAddrB]);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    returns the x and y macroblock coordinates for a given MbAddress
+ ************************************************************************
+ */
+void get_mb_block_pos_normal (int mb_addr, int *x, int*y)
+{
+  *x = PicPos[ mb_addr ][0];
+  *y = PicPos[ mb_addr ][1];
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    returns the x and y macroblock coordinates for a given MbAddress
+ *    for mbaff type slices
+ ************************************************************************
+ */
+void get_mb_block_pos_mbaff (int mb_addr, int *x, int*y)
+{
+  *x =  PicPos[mb_addr>>1][0];
+  *y = (PicPos[mb_addr>>1][1] << 1) + (mb_addr & 0x01);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    returns the x and y sample coordinates for a given MbAddress
+ ************************************************************************
+ */
+void get_mb_pos (int mb_addr, int *x, int*y, int is_chroma)
+{
+  get_mb_block_pos(mb_addr, x, y);
+
+  (*x) *= img->mb_size[is_chroma][0];
+  (*y) *= img->mb_size[is_chroma][1];
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    get neighbouring positions for non-aff coding
+ * \param curr_mb_nr
+ *   current macroblock number (decoding order)
+ * \param xN
+ *    input x position
+ * \param yN
+ *    input y position
+ * \param luma
+ *    1 if luma coding, 0 for chroma
+ * \param pix
+ *    returns position informations
+ ************************************************************************
+ */
+void getNonAffNeighbour(unsigned int curr_mb_nr, int xN, int yN, int is_chroma, PixelPos *pix)
+{
+  Macroblock *currMb = &img->mb_data[curr_mb_nr];
+  int maxW = img->mb_size[is_chroma][0], maxH = img->mb_size[is_chroma][1];
+/*
+  if (!is_chroma)
+  {
+    maxW = 16;
+    maxH = 16;
+  }
+  else
+  {
+    assert(img->yuv_format != 0);
+    maxW = img->mb_cr_size_x;
+    maxH = img->mb_cr_size_y;
+  }
+*/
+
+  if ((xN<0)&&(yN<0))
+  {
+    pix->mb_addr   = currMb->mbAddrD;
+    pix->available = currMb->mbAvailD;
+  }
+  else if ((xN<0)&&((yN>=0)&&(yN<maxH)))
+  {
+    pix->mb_addr  = currMb->mbAddrA;
+    pix->available = currMb->mbAvailA;
+  }
+  else if (((xN>=0)&&(xN<maxW))&&(yN<0))
+  {
+    pix->mb_addr  = currMb->mbAddrB;
+    pix->available = currMb->mbAvailB;
+  }
+  else if (((xN>=0)&&(xN<maxW))&&((yN>=0)&&(yN<maxH)))
+  {
+    pix->mb_addr  = curr_mb_nr;
+    pix->available = TRUE;
+  }
+  else if ((xN>=maxW)&&(yN<0))
+  {
+    pix->mb_addr  = currMb->mbAddrC;
+    pix->available = currMb->mbAvailC;
+  }
+  else
+  {
+    pix->available = FALSE;
+  }
+
+  if (pix->available || img->DeblockCall)
+  {
+    int *CurPos = PicPos[ pix->mb_addr ];
+
+    pix->x = xN & (maxW - 1);
+    pix->y = yN & (maxH - 1);
+    pix->pos_x = CurPos[0] * maxW + pix->x;
+    pix->pos_y = CurPos[1] * maxH + pix->y;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    get neighbouring positions for aff coding
+ * \param curr_mb_nr
+ *   current macroblock number (decoding order)
+ * \param xN
+ *    input x position
+ * \param yN
+ *    input y position
+ * \param luma
+ *    1 if luma coding, 0 for chroma
+ * \param pix
+ *    returns position informations
+ ************************************************************************
+ */
+void getAffNeighbour(unsigned int curr_mb_nr, int xN, int yN, int is_chroma, PixelPos *pix)
+{
+  Macroblock *currMb = &img->mb_data[curr_mb_nr];
+  int maxW, maxH;
+  int yM = -1;
+
+/*
+  if (!is_chroma)
+  {
+    maxW = 16;
+    maxH = 16;
+  }
+  else
+  {
+    assert(img->yuv_format != 0);
+    maxW = img->mb_cr_size_x;
+    maxH = img->mb_cr_size_y;
+  }
+*/
+  maxW = img->mb_size[is_chroma][0];
+  maxH = img->mb_size[is_chroma][1];
+
+  // initialize to "not available"
+  pix->available = FALSE;
+
+  if(yN > (maxH - 1))
+  {
+    return;
+  }
+  if (xN > (maxW - 1) && yN >= 0 && yN < maxH)
+  {
+    return;
+  }
+
+  if (xN < 0)
+  {
+    if (yN < 0)
+    {
+      if(!currMb->mb_field)
+      {
+        // frame
+        if ((curr_mb_nr & 0x01) == 0)
+        {
+          // top
+          pix->mb_addr   = currMb->mbAddrD  + 1;
+          pix->available = currMb->mbAvailD;
+          yM = yN;
+        }
+        else
+        {
+          // bottom
+          pix->mb_addr   = currMb->mbAddrA;
+          pix->available = currMb->mbAvailA;
+          if (currMb->mbAvailA)
+          {
+            if(!img->mb_data[currMb->mbAddrA].mb_field)
+            {
+               yM = yN;
+            }
+            else
+            {
+              (pix->mb_addr)++;
+               yM = (yN + maxH) >> 1;
+            }
+          }
+        }
+      }
+      else
+      {
+        // field
+        if ((curr_mb_nr & 0x01) == 0)
+        {
+          // top
+          pix->mb_addr   = currMb->mbAddrD;
+          pix->available = currMb->mbAvailD;
+          if (currMb->mbAvailD)
+          {
+            if(!img->mb_data[currMb->mbAddrD].mb_field)
+            {
+              (pix->mb_addr)++;
+               yM = 2 * yN;
+            }
+            else
+            {
+               yM = yN;
+            }
+          }
+        }
+        else
+        {
+          // bottom
+          pix->mb_addr   = currMb->mbAddrD+1;
+          pix->available = currMb->mbAvailD;
+          yM = yN;
+        }
+      }
+    }
+    else
+    { // xN < 0 && yN >= 0
+      if (yN >= 0 && yN <maxH)
+      {
+        if (!currMb->mb_field)
+        {
+          // frame
+          if ((curr_mb_nr & 0x01) == 0)
+          {
+            // top
+            pix->mb_addr   = currMb->mbAddrA;
+            pix->available = currMb->mbAvailA;
+            if (currMb->mbAvailA)
+            {
+              if(!img->mb_data[currMb->mbAddrA].mb_field)
+              {
+                 yM = yN;
+              }
+              else
+              {
+                (pix->mb_addr)+= ((yN & 0x01) != 0);
+                yM = yN >> 1;
+              }
+            }
+          }
+          else
+          {
+            // bottom
+            pix->mb_addr   = currMb->mbAddrA;
+            pix->available = currMb->mbAvailA;
+            if (currMb->mbAvailA)
+            {
+              if(!img->mb_data[currMb->mbAddrA].mb_field)
+              {
+                (pix->mb_addr)++;
+                 yM = yN;
+              }
+              else
+              {
+                (pix->mb_addr)+= ((yN & 0x01) != 0);
+                yM = (yN + maxH) >> 1;
+              }
+            }
+          }
+        }
+        else
+        {
+          // field
+          if ((curr_mb_nr & 0x01) == 0)
+          {
+            // top
+            pix->mb_addr  = currMb->mbAddrA;
+            pix->available = currMb->mbAvailA;
+            if (currMb->mbAvailA)
+            {
+              if(!img->mb_data[currMb->mbAddrA].mb_field)
+              {
+                if (yN < (maxH >> 1))
+                {
+                   yM = yN << 1;
+                }
+                else
+                {
+                  (pix->mb_addr)++;
+                   yM = (yN << 1 ) - maxH;
+                }
+              }
+              else
+              {
+                 yM = yN;
+              }
+            }
+          }
+          else
+          {
+            // bottom
+            pix->mb_addr  = currMb->mbAddrA;
+            pix->available = currMb->mbAvailA;
+            if (currMb->mbAvailA)
+            {
+              if(!img->mb_data[currMb->mbAddrA].mb_field)
+              {
+                if (yN < (maxH >> 1))
+                {
+                  yM = (yN << 1) + 1;
+                }
+                else
+                {
+                  (pix->mb_addr)++;
+                   yM = (yN << 1 ) + 1 - maxH;
+                }
+              }
+              else
+              {
+                (pix->mb_addr)++;
+                 yM = yN;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  else
+  { // xN >= 0
+    if (xN >= 0 && xN < maxW)
+    {
+      if (yN<0)
+      {
+        if (!currMb->mb_field)
+        {
+          //frame
+          if ((curr_mb_nr & 0x01) == 0)
+          {
+            //top
+            pix->mb_addr  = currMb->mbAddrB;
+            // for the deblocker if the current MB is a frame and the one above is a field
+            // then the neighbor is the top MB of the pair
+            if (currMb->mbAvailB)
+            {
+              if (!(img->DeblockCall == 1 && (img->mb_data[currMb->mbAddrB]).mb_field))
+                pix->mb_addr  += 1;
+            }
+
+            pix->available = currMb->mbAvailB;
+            yM = yN;
+          }
+          else
+          {
+            // bottom
+            pix->mb_addr   = curr_mb_nr - 1;
+            pix->available = TRUE;
+            yM = yN;
+          }
+        }
+        else
+        {
+          // field
+          if ((curr_mb_nr & 0x01) == 0)
+          {
+            // top
+            pix->mb_addr   = currMb->mbAddrB;
+            pix->available = currMb->mbAvailB;
+            if (currMb->mbAvailB)
+            {
+              if(!img->mb_data[currMb->mbAddrB].mb_field)
+              {
+                (pix->mb_addr)++;
+                 yM = 2* yN;
+              }
+              else
+              {
+                 yM = yN;
+              }
+            }
+          }
+          else
+          {
+            // bottom
+            pix->mb_addr   = currMb->mbAddrB + 1;
+            pix->available = currMb->mbAvailB;
+            yM = yN;
+          }
+        }
+      }
+      else
+      {
+        // yN >=0
+        // for the deblocker if this is the extra edge then do this special stuff
+        if (yN == 0 && img->DeblockCall == 2)
+        {
+          pix->mb_addr  = currMb->mbAddrB + 1;
+          pix->available = TRUE;
+          yM = yN - 1;
+        }
+
+        else if ((yN >= 0) && (yN <maxH))
+        {
+          pix->mb_addr   = curr_mb_nr;
+          pix->available = TRUE;
+          yM = yN;
+        }
+      }
+    }
+    else
+    { // xN >= maxW
+      if(yN < 0)
+      {
+        if (!currMb->mb_field)
+        {
+          // frame
+          if ((curr_mb_nr & 0x01) == 0)
+          {
+            // top
+            pix->mb_addr  = currMb->mbAddrC + 1;
+            pix->available = currMb->mbAvailC;
+            yM = yN;
+          }
+          else
+          {
+            // bottom
+            pix->available = FALSE;
+          }
+        }
+        else
+        {
+          // field
+          if ((curr_mb_nr & 0x01) == 0)
+          {
+            // top
+            pix->mb_addr   = currMb->mbAddrC;
+            pix->available = currMb->mbAvailC;
+            if (currMb->mbAvailC)
+            {
+              if(!img->mb_data[currMb->mbAddrC].mb_field)
+              {
+                (pix->mb_addr)++;
+                 yM = 2* yN;
+              }
+              else
+              {
+                yM = yN;
+              }
+            }
+          }
+          else
+          {
+            // bottom
+            pix->mb_addr   = currMb->mbAddrC + 1;
+            pix->available = currMb->mbAvailC;
+            yM = yN;
+          }
+        }
+      }
+    }
+  }
+  if (pix->available || img->DeblockCall)
+  {
+    pix->x = xN & (maxW - 1);
+    pix->y = yM & (maxH - 1);
+    get_mb_pos(pix->mb_addr, &(pix->pos_x), &(pix->pos_y), is_chroma);
+    pix->pos_x += pix->x;
+    pix->pos_y += pix->y;
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    get neighbouring positions. MB AFF is automatically used from img structure
+ * \param curr_mb_nr
+ *   current macroblock number (decoding order)
+ * \param xN
+ *    input x position
+ * \param yN
+ *    input y position
+ * \param luma
+ *    1 if luma coding, 0 for chroma
+ * \param pix
+ *    returns position informations
+ ************************************************************************
+ */
+/*
+void getNeighbour(int curr_mb_nr, int xN, int yN, int is_chroma, PixelPos *pix)
+{
+  if (curr_mb_nr<0)
+    error ("getNeighbour: invalid macroblock number", 100);
+
+  if (img->MbaffFrameFlag)
+    getAffNeighbour(curr_mb_nr, xN, yN, is_chroma, pix);
+  else
+    getNonAffNeighbour(curr_mb_nr, xN, yN, is_chroma, pix);
+}
+*/
+
+/*!
+ ************************************************************************
+ * \brief
+ *    get neighbouring  get neighbouring 4x4 luma block
+ * \param curr_mb_nr
+ *   current macroblock number (decoding order)
+ * \param block_x
+ *    input x block position
+ * \param block_y
+ *    input y block position
+ * \param rel_x
+ *    relative x position of neighbor
+ * \param rel_y
+ *    relative y position of neighbor
+ * \param pix
+ *    returns position informations
+ ************************************************************************
+ */
+void getLuma4x4Neighbour (int curr_mb_nr, int block_x_pos, int block_y_pos, PixelPos *pix)
+{
+  getNeighbour(curr_mb_nr, block_x_pos, block_y_pos, IS_LUMA, pix);
+
+  if (pix->available)
+  {
+    pix->x >>= 2;
+    pix->y >>= 2;
+    pix->pos_x >>= 2;
+    pix->pos_y >>= 2;
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    get neighbouring 4x4 chroma block
+ * \param curr_mb_nr
+ *   current macroblock number (decoding order)
+ * \param block_x
+ *    input x block position
+ * \param block_y
+ *    input y block position
+ * \param rel_x
+ *    relative x position of neighbor
+ * \param rel_y
+ *    relative y position of neighbor
+ * \param pix
+ *    returns position informations
+ ************************************************************************
+ */
+void getChroma4x4Neighbour (int curr_mb_nr, int block_x, int block_y, PixelPos *pix)
+{
+  getNeighbour(curr_mb_nr, block_x, block_y, IS_CHROMA, pix);
+
+  if (pix->available)
+  {
+    pix->x >>= 2;
+    pix->y >>= 2;
+    pix->pos_x >>= 2;
+    pix->pos_y >>= 2;
+  }
+}


Index: llvm-test/MultiSource/Applications/JM/lencod/mb_access.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/mb_access.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/mb_access.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,33 @@
+
+/*!
+ *************************************************************************************
+ * \file mb_access.h
+ *
+ * \brief
+ *    Functions for macroblock neighborhoods
+ *
+ * \author
+ *     Main contributors (see contributors.h for copyright, address and affiliation details)
+ *     - Karsten Sühring          <suehring at hhi.de>
+ *************************************************************************************
+ */
+
+#ifndef _MB_ACCESS_H_
+#define _MB_ACCESS_H_
+
+void CheckAvailabilityOfNeighbors(void);
+
+void (*getNeighbour)(unsigned int curr_mb_nr, int xN, int yN, int is_chroma, PixelPos *pix);
+void getAffNeighbour(unsigned int curr_mb_nr, int xN, int yN, int is_chroma, PixelPos *pix);
+void getNonAffNeighbour(unsigned int curr_mb_nr, int xN, int yN, int is_chroma, PixelPos *pix);
+void getLuma4x4Neighbour (int curr_mb_nr, int block_x, int block_y, PixelPos *pix);
+void getChroma4x4Neighbour (int curr_mb_nr, int block_x, int block_y, PixelPos *pix);
+int  mb_is_available(int mbAddr, int currMbAddr);
+void get_mb_pos (int mb_addr, int *x, int*y, int is_chroma);
+void (*get_mb_block_pos) (int mb_addr, int *x, int*y);
+void get_mb_block_pos_normal (int mb_addr, int *x, int*y);
+void get_mb_block_pos_mbaff (int mb_addr, int *x, int*y);
+
+
+
+#endif


Index: llvm-test/MultiSource/Applications/JM/lencod/mbuffer.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/mbuffer.c:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/mbuffer.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,3848 @@
+
+/*!
+ ***********************************************************************
+ *  \file
+ *      mbuffer.c
+ *
+ *  \brief
+ *      Frame buffer functions
+ *
+ *  \author
+ *      Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Karsten Sühring                 <suehring at hhi.de>
+ *      - Alexis Tourapis                 <alexismt at ieee.org>
+ ***********************************************************************
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+#include "global.h"
+#include "mbuffer.h"
+#include "memalloc.h"
+#include "output.h"
+#include "image.h"
+
+static void insert_picture_in_dpb(FrameStore* fs, StorablePicture* p);
+static void output_one_frame_from_dpb(void);
+static int  is_used_for_reference(FrameStore* fs);
+static void get_smallest_poc(int *poc,int * pos);
+static int  remove_unused_frame_from_dpb(void);
+static int  is_short_term_reference(FrameStore* fs);
+static int  is_long_term_reference(FrameStore* fs);
+void gen_field_ref_ids(StorablePicture *p);
+
+DecodedPictureBuffer dpb;
+
+StorablePicture **listX[6];
+
+ColocatedParams *Co_located = NULL;
+
+
+int listXsize[6];
+
+#define MAX_LIST_SIZE 33
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Print out list of pictures in DPB. Used for debug purposes.
+ ************************************************************************
+ */
+void dump_dpb(void)
+{
+#if DUMP_DPB
+  unsigned i;
+
+  for (i=0; i<dpb.used_size;i++)
+  {
+    printf("(");
+    printf("fn=%d  ", dpb.fs[i]->frame_num);
+    if (dpb.fs[i]->is_used & 1)
+    {
+      if (dpb.fs[i]->top_field)
+        printf("T: poc=%d  ", dpb.fs[i]->top_field->poc);
+      else
+        printf("T: poc=%d  ", dpb.fs[i]->frame->top_poc);
+    }
+    if (dpb.fs[i]->is_used & 2)
+    {
+      if (dpb.fs[i]->bottom_field)
+        printf("B: poc=%d  ", dpb.fs[i]->bottom_field->poc);
+      else
+        printf("B: poc=%d  ", dpb.fs[i]->frame->bottom_poc);
+    }
+    if (dpb.fs[i]->is_used == 3)
+      printf("F: poc=%d  ", dpb.fs[i]->frame->poc);
+    printf("G: poc=%d)  ", dpb.fs[i]->poc);
+    if (dpb.fs[i]->is_reference) printf ("ref (%d) ", dpb.fs[i]->is_reference);
+    if (dpb.fs[i]->is_long_term) printf ("lt_ref (%d) ", dpb.fs[i]->is_reference);
+    if (dpb.fs[i]->is_output) printf ("out  ");
+    if (dpb.fs[i]->is_used == 3)
+    {
+      if (dpb.fs[i]->frame->non_existing) printf ("ne  ");
+    }
+    printf ("\n");
+  }
+#endif
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Returns the size of the dpb depending on level and picture size
+ *
+ *
+ ************************************************************************
+ */
+int getDpbSize(void)
+{
+  int pic_size = (active_sps->pic_width_in_mbs_minus1 + 1) * (active_sps->pic_height_in_map_units_minus1 + 1) * (active_sps->frame_mbs_only_flag?1:2) * 384;
+
+  int size = 0;
+
+  switch (active_sps->level_idc)
+  {
+  case 9:
+    size = 152064;
+    break;
+  case 10:
+    size = 152064;
+    break;
+  case 11:
+    if ((active_sps->profile_idc < FREXT_HP)&&(active_sps->constrained_set3_flag == 0))
+      size = 345600;
+    else
+      size = 152064;
+    break;
+  case 12:
+    size = 912384;
+    break;
+  case 13:
+    size = 912384;
+    break;
+  case 20:
+    size = 912384;
+    break;
+  case 21:
+    size = 1824768;
+    break;
+  case 22:
+    size = 3110400;
+    break;
+  case 30:
+    size = 3110400;
+    break;
+  case 31:
+    size = 6912000;
+    break;
+  case 32:
+    size = 7864320;
+    break;
+  case 40:
+    size = 12582912;
+    break;
+  case 41:
+    size = 12582912;
+    break;
+ case 42:
+   if(  (active_sps->profile_idc==FREXT_HP   ) || (active_sps->profile_idc==FREXT_Hi10P)
+     || (active_sps->profile_idc==FREXT_Hi422) || (active_sps->profile_idc==FREXT_Hi444))
+     size = 13369344;
+   else
+     size = 12582912;
+   break;
+  case 50:
+    size = 42393600;
+    break;
+  case 51:
+    size = 70778880;
+    break;
+  default:
+    error ("undefined level", 500);
+    break;
+  }
+
+  size /= pic_size;
+  return imin( size, 16);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Check then number of frames marked "used for reference" and break
+ *    if maximum is exceeded
+ *
+ ************************************************************************
+ */
+void check_num_ref(void)
+{
+  if ((int)(dpb.ltref_frames_in_buffer +  dpb.ref_frames_in_buffer ) > (imax(1,img->num_ref_frames)))
+  {
+    error ("Max. number of reference frames exceeded. Invalid stream.", 500);
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate memory for decoded picture buffer and initialize with sane values.
+ *
+ ************************************************************************
+ */
+void init_dpb(void)
+{
+  unsigned i,j;
+
+  if (dpb.init_done)
+  {
+    free_dpb();
+  }
+
+  dpb.size      = getDpbSize();
+
+  if (dpb.size < (unsigned int)input->num_ref_frames)
+  {
+    error ("DPB size at specified level is smaller than the specified number of reference frames. This is not allowed.\n", 1000);
+  }
+
+  dpb.used_size = 0;
+  dpb.last_picture = NULL;
+
+  dpb.ref_frames_in_buffer = 0;
+  dpb.ltref_frames_in_buffer = 0;
+
+  dpb.fs = calloc(dpb.size, sizeof (FrameStore*));
+  if (NULL==dpb.fs)
+    no_mem_exit("init_dpb: dpb->fs");
+
+  dpb.fs_ref = calloc(dpb.size, sizeof (FrameStore*));
+  if (NULL==dpb.fs_ref)
+    no_mem_exit("init_dpb: dpb->fs_ref");
+
+  dpb.fs_ltref = calloc(dpb.size, sizeof (FrameStore*));
+  if (NULL==dpb.fs_ltref)
+    no_mem_exit("init_dpb: dpb->fs_ltref");
+
+  for (i=0; i<dpb.size; i++)
+  {
+    dpb.fs[i]       = alloc_frame_store();
+    dpb.fs_ref[i]   = NULL;
+    dpb.fs_ltref[i] = NULL;
+  }
+
+  for (i=0; i<6; i++)
+  {
+    listX[i] = calloc(MAX_LIST_SIZE, sizeof (StorablePicture*)); // +1 for reordering
+    if (NULL==listX[i])
+      no_mem_exit("init_dpb: listX[i]");
+  }
+
+  for (j=0;j<6;j++)
+  {
+    for (i=0; i<MAX_LIST_SIZE; i++)
+    {
+      listX[j][i] = NULL;
+    }
+    listXsize[j]=0;
+  }
+
+  dpb.last_output_poc = INT_MIN;
+
+  img->last_has_mmco_5 = 0;
+
+  dpb.init_done = 1;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Free memory for decoded picture buffer.
+ ************************************************************************
+ */
+void free_dpb(void)
+{
+  unsigned i;
+  if (dpb.fs)
+  {
+    for (i=0; i<dpb.size; i++)
+    {
+      free_frame_store(dpb.fs[i]);
+    }
+    free (dpb.fs);
+    dpb.fs=NULL;
+  }
+  if (dpb.fs_ref)
+  {
+    free (dpb.fs_ref);
+  }
+  if (dpb.fs_ltref)
+  {
+    free (dpb.fs_ltref);
+  }
+  dpb.last_output_poc = INT_MIN;
+
+  for (i=0; i<6; i++)
+    if (listX[i])
+    {
+      free (listX[i]);
+      listX[i] = NULL;
+    }
+
+  dpb.init_done = 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate memory for decoded picture buffer frame stores an initialize with sane values.
+ *
+ * \return
+ *    the allocated FrameStore structure
+ ************************************************************************
+ */
+FrameStore* alloc_frame_store(void)
+{
+  FrameStore *f;
+
+  f = calloc (1, sizeof(FrameStore));
+  if (NULL==f)
+    no_mem_exit("alloc_frame_store: f");
+
+  f->is_used      = 0;
+  f->is_reference = 0;
+  f->is_long_term = 0;
+  f->is_orig_reference = 0;
+
+  f->is_output = 0;
+
+  f->frame        = NULL;;
+  f->top_field    = NULL;
+  f->bottom_field = NULL;
+
+  return f;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate memory for a stored picture.
+ *
+ * \param structure
+ *    picture structure
+ * \param size_x
+ *    horizontal luma size
+ * \param size_y
+ *    vertical luma size
+ * \param size_x_cr
+ *    horizontal chroma size
+ * \param size_y_cr
+ *    vertical chroma size
+ *
+ * \return
+ *    the allocated StorablePicture structure
+ ************************************************************************
+ */
+StorablePicture* alloc_storable_picture(PictureStructure structure, int size_x, int size_y, int size_x_cr, int size_y_cr)
+{
+  StorablePicture *s;
+
+  //printf ("Allocating (%s) picture (x=%d, y=%d, x_cr=%d, y_cr=%d)\n", (type == FRAME)?"FRAME":(type == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", size_x, size_y, size_x_cr, size_y_cr);
+
+  s = calloc (1, sizeof(StorablePicture));
+  if (NULL==s)
+    no_mem_exit("alloc_storable_picture: s");
+
+  s->imgUV      = NULL;
+  s->imgY_sub   = NULL;
+  s->imgUV_sub  = NULL;
+
+  get_mem2Dpel (&(s->imgY), size_y, size_x);
+  if (img->yuv_format != YUV400)
+    get_mem3Dpel (&(s->imgUV), 2, size_y_cr, size_x_cr);
+
+  s->mb_field = calloc (img->PicSizeInMbs, sizeof(int));
+  if (NULL==s->mb_field)
+    no_mem_exit("alloc_storable_picture: s->mb_field");
+
+  get_mem3D      ((byte****)(&(s->ref_idx)),    2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+  get_mem3Dint64 (&(s->ref_pic_id), 6, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+  get_mem3Dint64 (&(s->ref_id),     6, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+  get_mem4Dshort (&(s->mv),         2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE, 2);
+
+  get_mem2D (&(s->moving_block),       size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+  get_mem2D (&(s->field_frame),        size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+
+  s->pic_num=0;
+  s->frame_num=0;
+  s->long_term_frame_idx=0;
+  s->long_term_pic_num=0;
+  s->used_for_reference=0;
+  s->is_long_term=0;
+  s->non_existing=0;
+  s->is_output = 0;
+
+  s->structure=structure;
+
+  s->size_x = size_x;
+  s->size_y = size_y;
+  s->size_x_pad = size_x + 2 * IMG_PAD_SIZE - 1 - MB_BLOCK_SIZE;
+  s->size_y_pad = size_y + 2 * IMG_PAD_SIZE - 1 - MB_BLOCK_SIZE;
+  s->size_x_cr = size_x_cr;
+  s->size_y_cr = size_y_cr;
+  s->size_x_cr_pad = (int) (size_x_cr - 1) + (img_pad_size_uv_x << 1) - (img->mb_cr_size_x);
+  s->size_y_cr_pad = (int) (size_y_cr - 1) + (img_pad_size_uv_y << 1) - (img->mb_cr_size_y);
+
+  s->top_field    = NULL;
+  s->bottom_field = NULL;
+  s->frame        = NULL;
+
+  s->coded_frame    = 0;
+  s->MbaffFrameFlag = 0;
+
+  return s;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Free frame store memory.
+ *
+ * \param f
+ *    FrameStore to be freed
+ *
+ ************************************************************************
+ */
+void free_frame_store(FrameStore* f)
+{
+  if (f)
+  {
+    if (f->frame)
+    {
+      free_storable_picture(f->frame);
+      f->frame=NULL;
+    }
+    if (f->top_field)
+    {
+      free_storable_picture(f->top_field);
+      f->top_field=NULL;
+    }
+    if (f->bottom_field)
+    {
+      free_storable_picture(f->bottom_field);
+      f->bottom_field=NULL;
+    }
+    free(f);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Free picture memory.
+ *
+ * \param p
+ *    Picture to be freed
+ *
+ ************************************************************************
+ */
+void free_storable_picture(StorablePicture* p)
+{
+  if (p)
+  {
+    if (p->ref_idx)
+    {
+      free_mem3D ((byte***)p->ref_idx, 2);
+      p->ref_idx = NULL;
+    }
+
+    if (p->ref_pic_id)
+    {
+      free_mem3Dint64 (p->ref_pic_id, 6);
+      p->ref_pic_id = NULL;
+    }
+    if (p->ref_id)
+    {
+      free_mem3Dint64 (p->ref_id, 6);
+      p->ref_id = NULL;
+    }
+    if (p->mv)
+    {
+      free_mem4Dshort (p->mv, 2, p->size_y / BLOCK_SIZE);
+      p->mv = NULL;
+    }
+
+    if (p->moving_block)
+    {
+      free_mem2D (p->moving_block);
+      p->moving_block=NULL;
+    }
+
+    if (p->field_frame)
+    {
+      free_mem2D (p->field_frame);
+      p->field_frame=NULL;
+    }
+
+    if (p->imgY)
+    {
+      free_mem2Dpel (p->imgY);
+      p->imgY=NULL;
+    }
+    if (p->imgY_sub)
+    {
+      free_mem4Dpel (p->imgY_sub,4,4);
+      p->imgY_sub=NULL;
+    }
+    if ( p->imgUV_sub && img->yuv_format != YUV400 && input->ChromaMCBuffer )
+    {
+      if ( img->yuv_format == YUV420 )
+      {
+        free_mem5Dpel (p->imgUV_sub, 2, 8, 8 );
+        p->imgUV_sub = NULL;
+      }
+      else if ( img->yuv_format == YUV422 )
+      {
+        free_mem5Dpel (p->imgUV_sub, 2, 4, 8 );
+        p->imgUV_sub = NULL;
+      }
+      else
+      { // YUV444
+        free_mem5Dpel (p->imgUV_sub, 2, 4, 4 );
+        p->imgUV_sub = NULL;
+      }
+    }
+
+    if (p->imgUV)
+    {
+      free_mem3Dpel (p->imgUV, 2);
+      p->imgUV=NULL;
+    }
+
+    if (p->mb_field)
+    {
+      free(p->mb_field);
+      p->mb_field=NULL;
+    }
+
+    free(p);
+    p = NULL;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    mark FrameStore unused for reference
+ *
+ ************************************************************************
+ */
+static void unmark_for_reference(FrameStore* fs)
+{
+
+  if (fs->is_used & 1)
+  {
+    if (fs->top_field)
+    {
+      fs->top_field->used_for_reference = 0;
+    }
+  }
+  if (fs->is_used & 2)
+  {
+    if (fs->bottom_field)
+    {
+      fs->bottom_field->used_for_reference = 0;
+    }
+  }
+  if (fs->is_used == 3)
+  {
+    if (fs->top_field && fs->bottom_field)
+    {
+      fs->top_field->used_for_reference = 0;
+      fs->bottom_field->used_for_reference = 0;
+    }
+    fs->frame->used_for_reference = 0;
+  }
+
+  fs->is_reference = 0;
+
+  if(fs->frame)
+  {
+    if (fs->frame->imgY_sub)
+    {
+      free_mem4Dpel (fs->frame->imgY_sub, 4, 4);
+      fs->frame->imgY_sub=NULL;
+    }
+
+    if (fs->frame->ref_pic_id)
+    {
+      free_mem3Dint64 (fs->frame->ref_pic_id, 6);
+      fs->frame->ref_pic_id = NULL;
+    }
+    if (fs->frame->ref_id)
+    {
+      free_mem3Dint64 (fs->frame->ref_id, 6);
+      fs->frame->ref_id = NULL;
+    }
+  }
+
+  if (fs->top_field)
+  {
+    if (fs->top_field->imgY_sub)
+    {
+      free_mem4Dpel (fs->top_field->imgY_sub, 4, 4);
+      fs->top_field->imgY_sub=NULL;
+    }
+
+    if (fs->top_field->ref_pic_id)
+    {
+      free_mem3Dint64 (fs->top_field->ref_pic_id, 6);
+      fs->top_field->ref_pic_id = NULL;
+    }
+    if (fs->top_field->ref_id)
+    {
+      free_mem3Dint64 (fs->top_field->ref_id, 6);
+      fs->top_field->ref_id = NULL;
+    }
+
+  }
+  if (fs->bottom_field)
+  {
+    if (fs->bottom_field->imgY_sub)
+    {
+      free_mem4Dpel (fs->bottom_field->imgY_sub, 4, 4);
+      fs->bottom_field->imgY_sub=NULL;
+    }
+    if (fs->bottom_field->ref_pic_id)
+    {
+      free_mem3Dint64 (fs->bottom_field->ref_pic_id, 6);
+      fs->bottom_field->ref_pic_id = NULL;
+    }
+    if (fs->bottom_field->ref_id)
+    {
+      free_mem3Dint64 (fs->bottom_field->ref_id, 6);
+      fs->bottom_field->ref_id = NULL;
+    }
+  }
+
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    mark FrameStore unused for reference and reset long term flags
+ *
+ ************************************************************************
+ */
+static void unmark_for_long_term_reference(FrameStore* fs)
+{
+
+  if (fs->is_used & 1)
+  {
+    if (fs->top_field)
+    {
+      fs->top_field->used_for_reference = 0;
+      fs->top_field->is_long_term = 0;
+    }
+  }
+  if (fs->is_used & 2)
+  {
+    if (fs->bottom_field)
+    {
+      fs->bottom_field->used_for_reference = 0;
+      fs->bottom_field->is_long_term = 0;
+    }
+  }
+  if (fs->is_used == 3)
+  {
+    if (fs->top_field && fs->bottom_field)
+    {
+      fs->top_field->used_for_reference = 0;
+      fs->top_field->is_long_term = 0;
+      fs->bottom_field->used_for_reference = 0;
+      fs->bottom_field->is_long_term = 0;
+    }
+    fs->frame->used_for_reference = 0;
+    fs->frame->is_long_term = 0;
+  }
+
+  fs->is_reference = 0;
+  fs->is_long_term = 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    compares two stored pictures by picture number for qsort in descending order
+ *
+ ************************************************************************
+ */
+static int compare_pic_by_pic_num_desc( const void *arg1, const void *arg2 )
+{
+  if ( (*(StorablePicture**)arg1)->pic_num < (*(StorablePicture**)arg2)->pic_num)
+    return 1;
+  if ( (*(StorablePicture**)arg1)->pic_num > (*(StorablePicture**)arg2)->pic_num)
+    return -1;
+  else
+    return 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    compares two stored pictures by picture number for qsort in descending order
+ *
+ ************************************************************************
+ */
+static int compare_pic_by_lt_pic_num_asc( const void *arg1, const void *arg2 )
+{
+  if ( (*(StorablePicture**)arg1)->long_term_pic_num < (*(StorablePicture**)arg2)->long_term_pic_num)
+    return -1;
+  if ( (*(StorablePicture**)arg1)->long_term_pic_num > (*(StorablePicture**)arg2)->long_term_pic_num)
+    return 1;
+  else
+    return 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    compares two frame stores by pic_num for qsort in descending order
+ *
+ ************************************************************************
+ */
+static int compare_fs_by_frame_num_desc( const void *arg1, const void *arg2 )
+{
+  if ( (*(FrameStore**)arg1)->frame_num_wrap < (*(FrameStore**)arg2)->frame_num_wrap)
+    return 1;
+  if ( (*(FrameStore**)arg1)->frame_num_wrap > (*(FrameStore**)arg2)->frame_num_wrap)
+    return -1;
+  else
+    return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    compares two frame stores by lt_pic_num for qsort in descending order
+ *
+ ************************************************************************
+ */
+static int compare_fs_by_lt_pic_idx_asc( const void *arg1, const void *arg2 )
+{
+  if ( (*(FrameStore**)arg1)->long_term_frame_idx < (*(FrameStore**)arg2)->long_term_frame_idx)
+    return -1;
+  if ( (*(FrameStore**)arg1)->long_term_frame_idx > (*(FrameStore**)arg2)->long_term_frame_idx)
+    return 1;
+  else
+    return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    compares two stored pictures by poc for qsort in ascending order
+ *
+ ************************************************************************
+ */
+static int compare_pic_by_poc_asc( const void *arg1, const void *arg2 )
+{
+  if ( (*(StorablePicture**)arg1)->poc < (*(StorablePicture**)arg2)->poc)
+    return -1;
+  if ( (*(StorablePicture**)arg1)->poc > (*(StorablePicture**)arg2)->poc)
+    return 1;
+  else
+    return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    compares two stored pictures by poc for qsort in descending order
+ *
+ ************************************************************************
+ */
+static int compare_pic_by_poc_desc( const void *arg1, const void *arg2 )
+{
+  if ( (*(StorablePicture**)arg1)->poc < (*(StorablePicture**)arg2)->poc)
+    return 1;
+  if ( (*(StorablePicture**)arg1)->poc > (*(StorablePicture**)arg2)->poc)
+    return -1;
+  else
+    return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    compares two frame stores by poc for qsort in ascending order
+ *
+ ************************************************************************
+ */
+static int compare_fs_by_poc_asc( const void *arg1, const void *arg2 )
+{
+  if ( (*(FrameStore**)arg1)->poc < (*(FrameStore**)arg2)->poc)
+    return -1;
+  if ( (*(FrameStore**)arg1)->poc > (*(FrameStore**)arg2)->poc)
+    return 1;
+  else
+    return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    compares two frame stores by poc for qsort in descending order
+ *
+ ************************************************************************
+ */
+static int compare_fs_by_poc_desc( const void *arg1, const void *arg2 )
+{
+  if ( (*(FrameStore**)arg1)->poc < (*(FrameStore**)arg2)->poc)
+    return 1;
+  if ( (*(FrameStore**)arg1)->poc > (*(FrameStore**)arg2)->poc)
+    return -1;
+  else
+    return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    returns true, if picture is short term reference picture
+ *
+ ************************************************************************
+ */
+int is_short_ref(StorablePicture *s)
+{
+  return ((s->used_for_reference) && (!(s->is_long_term)));
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    returns true, if picture is long term reference picture
+ *
+ ************************************************************************
+ */
+int is_long_ref(StorablePicture *s)
+{
+  return ((s->used_for_reference) && (s->is_long_term));
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Generates a alternating field list from a given FrameStore list
+ *
+ ************************************************************************
+ */
+static void gen_pic_list_from_frame_list(PictureStructure currStrcture, FrameStore **fs_list, int list_idx, StorablePicture **list, int *list_size, int long_term)
+{
+  int top_idx = 0;
+  int bot_idx = 0;
+
+  int (*is_ref)(StorablePicture *s);
+
+  if (long_term)
+    is_ref=is_long_ref;
+  else
+    is_ref=is_short_ref;
+
+  if (currStrcture == TOP_FIELD)
+  {
+    while ((top_idx<list_idx)||(bot_idx<list_idx))
+    {
+      for ( ; top_idx<list_idx; top_idx++)
+      {
+        if(fs_list[top_idx]->is_used & 1)
+        {
+          if(is_ref(fs_list[top_idx]->top_field))
+          {
+            // short term ref pic
+            list[*list_size] = fs_list[top_idx]->top_field;
+            (*list_size)++;
+            top_idx++;
+            break;
+          }
+        }
+      }
+      for ( ; bot_idx<list_idx; bot_idx++)
+      {
+        if(fs_list[bot_idx]->is_used & 2)
+        {
+          if(is_ref(fs_list[bot_idx]->bottom_field))
+          {
+            // short term ref pic
+            list[*list_size] = fs_list[bot_idx]->bottom_field;
+            (*list_size)++;
+            bot_idx++;
+            break;
+          }
+        }
+      }
+    }
+  }
+  if (currStrcture == BOTTOM_FIELD)
+  {
+    while ((top_idx<list_idx)||(bot_idx<list_idx))
+    {
+      for ( ; bot_idx<list_idx; bot_idx++)
+      {
+        if(fs_list[bot_idx]->is_used & 2)
+        {
+          if(is_ref(fs_list[bot_idx]->bottom_field))
+          {
+            // short term ref pic
+            list[*list_size] = fs_list[bot_idx]->bottom_field;
+            (*list_size)++;
+            bot_idx++;
+            break;
+          }
+        }
+      }
+      for ( ; top_idx<list_idx; top_idx++)
+      {
+        if(fs_list[top_idx]->is_used & 1)
+        {
+          if(is_ref(fs_list[top_idx]->top_field))
+          {
+            // short term ref pic
+            list[*list_size] = fs_list[top_idx]->top_field;
+            (*list_size)++;
+            top_idx++;
+            break;
+          }
+        }
+      }
+    }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Initialize listX[0] and list 1 depending on current picture type
+ *
+ ************************************************************************
+ */
+void init_lists(int currSliceType, PictureStructure currPicStructure)
+{
+  int add_top = 0, add_bottom = 0;
+  unsigned i;
+  int j;
+  int MaxFrameNum = 1 << (log2_max_frame_num_minus4 + 4);
+  int diff;
+
+  int list0idx = 0;
+  int list0idx_1 = 0;
+  int listltidx = 0;
+
+  FrameStore **fs_list0;
+  FrameStore **fs_list1;
+  FrameStore **fs_listlt;
+
+  StorablePicture *tmp_s;
+
+  if (currPicStructure == FRAME)
+  {
+    for (i=0; i<dpb.ref_frames_in_buffer; i++)
+    {
+      if (dpb.fs_ref[i]->is_used==3)
+      {
+        if ((dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term))
+        {
+          if( dpb.fs_ref[i]->frame_num > img->frame_num )
+          {
+            dpb.fs_ref[i]->frame_num_wrap = dpb.fs_ref[i]->frame_num - MaxFrameNum;
+          }
+          else
+          {
+            dpb.fs_ref[i]->frame_num_wrap = dpb.fs_ref[i]->frame_num;
+          }
+          dpb.fs_ref[i]->frame->pic_num = dpb.fs_ref[i]->frame_num_wrap;
+        }
+      }
+    }
+    // update long_term_pic_num
+    for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+    {
+      if (dpb.fs_ltref[i]->is_used==3)
+      {
+        if (dpb.fs_ltref[i]->frame->is_long_term)
+        {
+          dpb.fs_ltref[i]->frame->long_term_pic_num = dpb.fs_ltref[i]->frame->long_term_frame_idx;
+        }
+      }
+    }
+  }
+  else
+  {
+    if (currPicStructure == TOP_FIELD)
+    {
+      add_top    = 1;
+      add_bottom = 0;
+    }
+    else
+    {
+      add_top    = 0;
+      add_bottom = 1;
+    }
+
+    for (i=0; i<dpb.ref_frames_in_buffer; i++)
+    {
+      if (dpb.fs_ref[i]->is_reference)
+      {
+        if( dpb.fs_ref[i]->frame_num > img->frame_num )
+        {
+          dpb.fs_ref[i]->frame_num_wrap = dpb.fs_ref[i]->frame_num - MaxFrameNum;
+        }
+        else
+        {
+          dpb.fs_ref[i]->frame_num_wrap = dpb.fs_ref[i]->frame_num;
+        }
+        if (dpb.fs_ref[i]->is_reference & 1)
+        {
+          dpb.fs_ref[i]->top_field->pic_num = (2 * dpb.fs_ref[i]->frame_num_wrap) + add_top;
+        }
+        if (dpb.fs_ref[i]->is_reference & 2)
+        {
+          dpb.fs_ref[i]->bottom_field->pic_num = (2 * dpb.fs_ref[i]->frame_num_wrap) + add_bottom;
+        }
+      }
+    }
+    // update long_term_pic_num
+    for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+    {
+      if (dpb.fs_ltref[i]->is_long_term & 1)
+      {
+        dpb.fs_ltref[i]->top_field->long_term_pic_num = 2 * dpb.fs_ltref[i]->top_field->long_term_frame_idx + add_top;
+  }
+      if (dpb.fs_ltref[i]->is_long_term & 2)
+      {
+        dpb.fs_ltref[i]->bottom_field->long_term_pic_num = 2 * dpb.fs_ltref[i]->bottom_field->long_term_frame_idx + add_bottom;
+      }
+    }
+  }
+
+
+
+  if ((currSliceType == I_SLICE)||(currSliceType == SI_SLICE))
+  {
+    listXsize[0] = 0;
+    listXsize[1] = 0;
+    return;
+  }
+
+  if ((currSliceType == P_SLICE)||(currSliceType == SP_SLICE))
+  {
+    // Calculate FrameNumWrap and PicNum
+    if (currPicStructure == FRAME)
+    {
+      for (i=0; i<dpb.ref_frames_in_buffer; i++)
+      {
+        if (dpb.fs_ref[i]->is_used==3)
+        {
+          if ((dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term))
+          {
+            listX[0][list0idx++] = dpb.fs_ref[i]->frame;
+          }
+        }
+      }
+      // order list 0 by PicNum
+      qsort((void *)listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_pic_num_desc);
+      listXsize[0] = list0idx;
+      //printf("listX[0] (PicNum): "); for (i=0; i<list0idx; i++){printf ("%d  ", listX[0][i]->pic_num);} printf("\n");
+
+      // long term handling
+      for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+      {
+        if (dpb.fs_ltref[i]->is_used==3)
+        {
+          if (dpb.fs_ltref[i]->frame->is_long_term)
+          {
+            listX[0][list0idx++]=dpb.fs_ltref[i]->frame;
+          }
+        }
+      }
+      qsort((void *)&listX[0][listXsize[0]], list0idx-listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc);
+      listXsize[0] = list0idx;
+
+      //printf("listX[0] currPoc=%d (Poc): ", img->framepoc); for (i=0; i<listXsize[0]; i++){printf ("%d  ", listX[0][i]->poc);} printf("\n");
+    }
+    else
+    {
+      fs_list0 = calloc(dpb.size, sizeof (FrameStore*));
+      if (NULL==fs_list0)
+         no_mem_exit("init_lists: fs_list0");
+      fs_listlt = calloc(dpb.size, sizeof (FrameStore*));
+      if (NULL==fs_listlt)
+         no_mem_exit("init_lists: fs_listlt");
+
+      for (i=0; i<dpb.ref_frames_in_buffer; i++)
+      {
+        if (dpb.fs_ref[i]->is_reference)
+        {
+          fs_list0[list0idx++] = dpb.fs_ref[i];
+        }
+      }
+
+      qsort((void *)fs_list0, list0idx, sizeof(FrameStore*), compare_fs_by_frame_num_desc);
+
+      //printf("fs_list0 (FrameNum): "); for (i=0; i<list0idx; i++){printf ("%d  ", fs_list0[i]->frame_num_wrap);} printf("\n");
+
+      listXsize[0] = 0;
+      gen_pic_list_from_frame_list(currPicStructure, fs_list0, list0idx, listX[0], &listXsize[0], 0);
+
+      //printf("listX[0] (PicNum): "); for (i=0; i<listXsize[0]; i++){printf ("%d  ", listX[0][i]->pic_num);} printf("\n");
+
+      // long term handling
+      for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+      {
+        fs_listlt[listltidx++]=dpb.fs_ltref[i];
+      }
+
+      qsort((void *)fs_listlt, listltidx, sizeof(FrameStore*), compare_fs_by_lt_pic_idx_asc);
+
+      gen_pic_list_from_frame_list(currPicStructure, fs_listlt, listltidx, listX[0], &listXsize[0], 1);
+
+      free(fs_list0);
+      free(fs_listlt);
+    }
+    listXsize[1] = 0;
+  }
+  else
+  {
+    // B-Slice
+    if (currPicStructure == FRAME)
+    {
+      for (i=0; i<dpb.ref_frames_in_buffer; i++)
+      {
+        if (dpb.fs_ref[i]->is_used==3)
+        {
+          if ((dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term))
+          {
+            if (img->framepoc > dpb.fs_ref[i]->frame->poc)
+            {
+              listX[0][list0idx++] = dpb.fs_ref[i]->frame;
+            }
+          }
+        }
+      }
+      qsort((void *)listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_poc_desc);
+      list0idx_1 = list0idx;
+      for (i=0; i<dpb.ref_frames_in_buffer; i++)
+      {
+        if (dpb.fs_ref[i]->is_used==3)
+        {
+          if ((dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term))
+          {
+            if (img->framepoc < dpb.fs_ref[i]->frame->poc)
+            {
+              listX[0][list0idx++] = dpb.fs_ref[i]->frame;
+            }
+          }
+        }
+      }
+      qsort((void *)&listX[0][list0idx_1], list0idx-list0idx_1, sizeof(StorablePicture*), compare_pic_by_poc_asc);
+
+      for (j=0; j<list0idx_1; j++)
+      {
+        listX[1][list0idx-list0idx_1+j]=listX[0][j];
+      }
+      for (j=list0idx_1; j<list0idx; j++)
+      {
+        listX[1][j-list0idx_1]=listX[0][j];
+      }
+
+      listXsize[0] = listXsize[1] = list0idx;
+
+//      printf("listX[0] currPoc=%d (Poc): ", img->framepoc); for (i=0; i<listXsize[0]; i++){printf ("%d  ", listX[0][i]->poc);} printf("\n");
+//      printf("listX[1] currPoc=%d (Poc): ", img->framepoc); for (i=0; i<listXsize[1]; i++){printf ("%d  ", listX[1][i]->poc);} printf("\n");
+
+      // long term handling
+      for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+      {
+        if (dpb.fs_ltref[i]->is_used==3)
+        {
+          if (dpb.fs_ltref[i]->frame->is_long_term)
+          {
+            listX[0][list0idx]  =dpb.fs_ltref[i]->frame;
+            listX[1][list0idx++]=dpb.fs_ltref[i]->frame;
+          }
+        }
+      }
+      qsort((void *)&listX[0][listXsize[0]], list0idx-listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc);
+      qsort((void *)&listX[1][listXsize[0]], list0idx-listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc);
+      listXsize[0] = listXsize[1] = list0idx;
+    }
+    else
+    {
+      fs_list0 = calloc(dpb.size, sizeof (FrameStore*));
+      if (NULL==fs_list0)
+         no_mem_exit("init_lists: fs_list0");
+      fs_list1 = calloc(dpb.size, sizeof (FrameStore*));
+      if (NULL==fs_list1)
+         no_mem_exit("init_lists: fs_list1");
+      fs_listlt = calloc(dpb.size, sizeof (FrameStore*));
+      if (NULL==fs_listlt)
+         no_mem_exit("init_lists: fs_listlt");
+
+      listXsize[0] = 0;
+      listXsize[1] = 1;
+
+      for (i=0; i<dpb.ref_frames_in_buffer; i++)
+      {
+        if (dpb.fs_ref[i]->is_used)
+        {
+          if (img->ThisPOC >= dpb.fs_ref[i]->poc)
+          {
+            fs_list0[list0idx++] = dpb.fs_ref[i];
+          }
+        }
+      }
+      qsort((void *)fs_list0, list0idx, sizeof(FrameStore*), compare_fs_by_poc_desc);
+      list0idx_1 = list0idx;
+      for (i=0; i<dpb.ref_frames_in_buffer; i++)
+      {
+        if (dpb.fs_ref[i]->is_used)
+        {
+          if (img->ThisPOC < dpb.fs_ref[i]->poc)
+          {
+            fs_list0[list0idx++] = dpb.fs_ref[i];
+          }
+        }
+      }
+      qsort((void *)&fs_list0[list0idx_1], list0idx-list0idx_1, sizeof(FrameStore*), compare_fs_by_poc_asc);
+
+      for (j=0; j<list0idx_1; j++)
+      {
+        fs_list1[list0idx-list0idx_1+j]=fs_list0[j];
+      }
+      for (j=list0idx_1; j<list0idx; j++)
+      {
+        fs_list1[j-list0idx_1]=fs_list0[j];
+      }
+
+//      printf("fs_list0 currPoc=%d (Poc): ", img->ThisPOC); for (i=0; i<list0idx; i++){printf ("%d  ", fs_list0[i]->poc);} printf("\n");
+//      printf("fs_list1 currPoc=%d (Poc): ", img->ThisPOC); for (i=0; i<list0idx; i++){printf ("%d  ", fs_list1[i]->poc);} printf("\n");
+
+      listXsize[0] = 0;
+      listXsize[1] = 0;
+      gen_pic_list_from_frame_list(currPicStructure, fs_list0, list0idx, listX[0], &listXsize[0], 0);
+      gen_pic_list_from_frame_list(currPicStructure, fs_list1, list0idx, listX[1], &listXsize[1], 0);
+
+//      printf("listX[0] currPoc=%d (Poc): ", img->framepoc); for (i=0; i<listXsize[0]; i++){printf ("%d  ", listX[0][i]->poc);} printf("\n");
+//      printf("listX[1] currPoc=%d (Poc): ", img->framepoc); for (i=0; i<listXsize[1]; i++){printf ("%d  ", listX[1][i]->poc);} printf("\n");
+
+      // long term handling
+      for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+      {
+        fs_listlt[listltidx++]=dpb.fs_ltref[i];
+      }
+
+      qsort((void *)fs_listlt, listltidx, sizeof(FrameStore*), compare_fs_by_lt_pic_idx_asc);
+
+      gen_pic_list_from_frame_list(currPicStructure, fs_listlt, listltidx, listX[0], &listXsize[0], 1);
+      gen_pic_list_from_frame_list(currPicStructure, fs_listlt, listltidx, listX[1], &listXsize[1], 1);
+
+      free(fs_list0);
+      free(fs_list1);
+      free(fs_listlt);
+    }
+  }
+
+  if ((listXsize[0] == listXsize[1]) && (listXsize[0] > 1))
+  {
+    // check if lists are identical, if yes swap first two elements of listX[1]
+    diff=0;
+    for (j = 0; j< listXsize[0]; j++)
+    {
+      if (listX[0][j]!=listX[1][j])
+        diff=1;
+    }
+    if (!diff)
+    {
+      tmp_s = listX[1][0];
+      listX[1][0]=listX[1][1];
+      listX[1][1]=tmp_s;
+    }
+  }
+
+  // set max size
+  listXsize[0] = imin (listXsize[0], img->num_ref_idx_l0_active);
+  listXsize[1] = imin (listXsize[1], img->num_ref_idx_l1_active);
+
+  // set the unused list entries to NULL
+  for (i=listXsize[0]; i< (MAX_LIST_SIZE) ; i++)
+  {
+    listX[0][i] = NULL;
+  }
+  for (i=listXsize[1]; i< (MAX_LIST_SIZE) ; i++)
+  {
+    listX[1][i] = NULL;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Initialize listX[2..5] from lists 0 and 1
+ *    listX[2]: list0 for current_field==top
+ *    listX[3]: list1 for current_field==top
+ *    listX[4]: list0 for current_field==bottom
+ *    listX[5]: list1 for current_field==bottom
+ *
+ ************************************************************************
+ */
+void init_mbaff_lists(void)
+{
+  unsigned j;
+  int i;
+
+  for (i=2;i<6;i++)
+  {
+    for (j=0; j<MAX_LIST_SIZE; j++)
+    {
+      listX[i][j] = NULL;
+    }
+    listXsize[i]=0;
+  }
+
+  for (i=0; i<listXsize[0]; i++)
+  {
+    listX[2][2*i]  =listX[0][i]->top_field;
+    listX[2][2*i+1]=listX[0][i]->bottom_field;
+    listX[4][2*i]  =listX[0][i]->bottom_field;
+    listX[4][2*i+1]=listX[0][i]->top_field;
+  }
+  listXsize[2]=listXsize[4]=listXsize[0] * 2;
+
+  for (i=0; i<listXsize[1]; i++)
+  {
+    listX[3][2*i]  =listX[1][i]->top_field;
+    listX[3][2*i+1]=listX[1][i]->bottom_field;
+    listX[5][2*i]  =listX[1][i]->bottom_field;
+    listX[5][2*i+1]=listX[1][i]->top_field;
+  }
+  listXsize[3]=listXsize[5]=listXsize[1] * 2;
+}
+
+ /*!
+ ************************************************************************
+ * \brief
+ *    Returns short term pic with given picNum
+ *
+ ************************************************************************
+ */
+static StorablePicture*  get_short_term_pic(int picNum)
+{
+  unsigned i;
+
+  for (i=0; i<dpb.ref_frames_in_buffer; i++)
+  {
+    if (img->structure==FRAME)
+    {
+      if (dpb.fs_ref[i]->is_reference == 3)
+        if ((!dpb.fs_ref[i]->frame->is_long_term)&&(dpb.fs_ref[i]->frame->pic_num == picNum))
+          return dpb.fs_ref[i]->frame;
+    }
+    else
+    {
+      if (dpb.fs_ref[i]->is_reference & 1)
+        if ((!dpb.fs_ref[i]->top_field->is_long_term)&&(dpb.fs_ref[i]->top_field->pic_num == picNum))
+          return dpb.fs_ref[i]->top_field;
+      if (dpb.fs_ref[i]->is_reference & 2)
+        if ((!dpb.fs_ref[i]->bottom_field->is_long_term)&&(dpb.fs_ref[i]->bottom_field->pic_num == picNum))
+          return dpb.fs_ref[i]->bottom_field;
+    }
+  }
+  return NULL;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Returns short term pic with given LongtermPicNum
+ *
+ ************************************************************************
+ */
+static StorablePicture*  get_long_term_pic(int LongtermPicNum)
+{
+  unsigned i;
+
+  for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+  {
+    if (img->structure==FRAME)
+    {
+      if (dpb.fs_ltref[i]->is_reference == 3)
+        if ((dpb.fs_ltref[i]->frame->is_long_term)&&(dpb.fs_ltref[i]->frame->long_term_pic_num == LongtermPicNum))
+          return dpb.fs_ltref[i]->frame;
+    }
+    else
+    {
+      if (dpb.fs_ltref[i]->is_reference & 1)
+        if ((dpb.fs_ltref[i]->top_field->is_long_term)&&(dpb.fs_ltref[i]->top_field->long_term_pic_num == LongtermPicNum))
+          return dpb.fs_ltref[i]->top_field;
+      if (dpb.fs_ltref[i]->is_reference & 2)
+        if ((dpb.fs_ltref[i]->bottom_field->is_long_term)&&(dpb.fs_ltref[i]->bottom_field->long_term_pic_num == LongtermPicNum))
+          return dpb.fs_ltref[i]->bottom_field;
+    }
+  }
+  return NULL;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Reordering process for short-term reference pictures
+ *
+ ************************************************************************
+ */
+static void reorder_short_term(StorablePicture **RefPicListX, int num_ref_idx_lX_active_minus1, int picNumLX, int *refIdxLX)
+{
+  int cIdx, nIdx;
+
+  StorablePicture *picLX;
+
+  picLX = get_short_term_pic(picNumLX);
+
+  for( cIdx = num_ref_idx_lX_active_minus1+1; cIdx > *refIdxLX; cIdx-- )
+    RefPicListX[ cIdx ] = RefPicListX[ cIdx - 1];
+
+  RefPicListX[ (*refIdxLX)++ ] = picLX;
+
+  nIdx = *refIdxLX;
+
+  for( cIdx = *refIdxLX; cIdx <= num_ref_idx_lX_active_minus1+1; cIdx++ )
+    if (RefPicListX[ cIdx ])
+      if( (RefPicListX[ cIdx ]->is_long_term ) ||  (RefPicListX[ cIdx ]->pic_num != picNumLX ))
+        RefPicListX[ nIdx++ ] = RefPicListX[ cIdx ];
+
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Reordering process for short-term reference pictures
+ *
+ ************************************************************************
+ */
+static void reorder_long_term(StorablePicture **RefPicListX, int num_ref_idx_lX_active_minus1, int LongTermPicNum, int *refIdxLX)
+{
+  int cIdx, nIdx;
+
+  StorablePicture *picLX;
+
+  picLX = get_long_term_pic(LongTermPicNum);
+
+  for( cIdx = num_ref_idx_lX_active_minus1+1; cIdx > *refIdxLX; cIdx-- )
+    RefPicListX[ cIdx ] = RefPicListX[ cIdx - 1];
+
+  RefPicListX[ (*refIdxLX)++ ] = picLX;
+
+  nIdx = *refIdxLX;
+
+  for( cIdx = *refIdxLX; cIdx <= num_ref_idx_lX_active_minus1+1; cIdx++ )
+    if( (!RefPicListX[ cIdx ]->is_long_term ) ||  (RefPicListX[ cIdx ]->long_term_pic_num != LongTermPicNum ))
+      RefPicListX[ nIdx++ ] = RefPicListX[ cIdx ];
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Reordering process for reference picture lists
+ *
+ ************************************************************************
+ */
+void reorder_ref_pic_list(StorablePicture **list, int *list_size, int num_ref_idx_lX_active_minus1, int *reordering_of_pic_nums_idc, int *abs_diff_pic_num_minus1, int *long_term_pic_idx)
+{
+  int i;
+
+  int maxPicNum, currPicNum, picNumLXNoWrap, picNumLXPred, picNumLX;
+  int refIdxLX = 0;
+  int MaxFrameNum = 1 << (log2_max_frame_num_minus4 + 4);
+
+  if (img->structure==FRAME)
+  {
+    maxPicNum  = MaxFrameNum;
+    currPicNum = img->frame_num;
+  }
+  else
+  {
+    maxPicNum  = 2 * MaxFrameNum;
+    currPicNum = 2 * img->frame_num + 1;
+  }
+
+  picNumLXPred = currPicNum;
+
+  for (i=0; reordering_of_pic_nums_idc[i]!=3; i++)
+  {
+    if (reordering_of_pic_nums_idc[i]>3)
+      error ("Invalid remapping_of_pic_nums_idc command", 500);
+
+    if (reordering_of_pic_nums_idc[i] < 2)
+    {
+      if (reordering_of_pic_nums_idc[i] == 0)
+      {
+        if( picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 ) < 0 )
+          picNumLXNoWrap = picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 ) + maxPicNum;
+        else
+          picNumLXNoWrap = picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 );
+      }
+      else // (reordering_of_pic_nums_idc[i] == 1)
+      {
+        if( picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 )  >=  maxPicNum )
+          picNumLXNoWrap = picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 ) - maxPicNum;
+        else
+          picNumLXNoWrap = picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 );
+      }
+      picNumLXPred = picNumLXNoWrap;
+
+      if( picNumLXNoWrap > currPicNum )
+        picNumLX = picNumLXNoWrap - maxPicNum;
+      else
+        picNumLX = picNumLXNoWrap;
+
+      reorder_short_term(list, num_ref_idx_lX_active_minus1, picNumLX, &refIdxLX);
+    }
+    else //(reordering_of_pic_nums_idc[i] == 2)
+    {
+      reorder_long_term(list, num_ref_idx_lX_active_minus1, long_term_pic_idx[i], &refIdxLX);
+    }
+
+  }
+  // that's a definition
+  *list_size = num_ref_idx_lX_active_minus1 + 1;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Update the list of frame stores that contain reference frames/fields
+ *
+ ************************************************************************
+ */
+void update_ref_list(void)
+{
+  unsigned i, j;
+  for (i=0, j=0; i<dpb.used_size; i++)
+  {
+    if (is_short_term_reference(dpb.fs[i]))
+    {
+      dpb.fs_ref[j++]=dpb.fs[i];
+    }
+  }
+
+  dpb.ref_frames_in_buffer = j;
+
+  while (j<dpb.size)
+  {
+    dpb.fs_ref[j++]=NULL;
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Update the list of frame stores that contain long-term reference
+ *    frames/fields
+ *
+ ************************************************************************
+ */
+void update_ltref_list(void)
+{
+  unsigned i, j;
+  for (i=0, j=0; i<dpb.used_size; i++)
+  {
+    if (is_long_term_reference(dpb.fs[i]))
+    {
+      dpb.fs_ltref[j++]=dpb.fs[i];
+    }
+  }
+
+  dpb.ltref_frames_in_buffer=j;
+
+  while (j<dpb.size)
+  {
+    dpb.fs_ltref[j++]=NULL;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Perform Memory management for idr pictures
+ *
+ ************************************************************************
+ */
+static void idr_memory_management(StorablePicture* p)
+{
+  unsigned i;
+
+  assert (img->currentPicture->idr_flag);
+
+  if (img->no_output_of_prior_pics_flag)
+  {
+    // free all stored pictures
+    for (i=0; i<dpb.used_size; i++)
+    {
+      // reset all reference settings
+      free_frame_store(dpb.fs[i]);
+      dpb.fs[i] = alloc_frame_store();
+    }
+    for (i=0; i<dpb.ref_frames_in_buffer; i++)
+    {
+      dpb.fs_ref[i]=NULL;
+    }
+    for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+    {
+      dpb.fs_ltref[i]=NULL;
+    }
+    dpb.used_size=0;
+  }
+  else
+  {
+    flush_dpb();
+  }
+  dpb.last_picture = NULL;
+
+  update_ref_list();
+  update_ltref_list();
+  dpb.last_output_poc = INT_MIN;
+
+  if (img->long_term_reference_flag)
+  {
+    dpb.max_long_term_pic_idx = 0;
+    p->is_long_term           = 1;
+    p->long_term_frame_idx    = 0;
+  }
+  else
+  {
+    dpb.max_long_term_pic_idx = -1;
+    p->is_long_term           = 0;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Perform Sliding window decoded reference picture marking process
+ *
+ ************************************************************************
+ */
+static void sliding_window_memory_management(StorablePicture* p)
+{
+  unsigned i;
+
+  assert (!img->currentPicture->idr_flag);
+  // if this is a reference pic with sliding sliding window, unmark first ref frame
+  if (dpb.ref_frames_in_buffer==active_sps->num_ref_frames - dpb.ltref_frames_in_buffer)
+  {
+    for (i=0; i<dpb.used_size;i++)
+    {
+      if (dpb.fs[i]->is_reference  && (!(dpb.fs[i]->is_long_term)))
+      {
+        unmark_for_reference(dpb.fs[i]);
+        update_ref_list();
+        break;
+      }
+    }
+  }
+
+  p->is_long_term = 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Calculate picNumX
+ ************************************************************************
+ */
+static int get_pic_num_x (StorablePicture *p, int difference_of_pic_nums_minus1)
+{
+  int currPicNum;
+
+  if (p->structure == FRAME)
+    currPicNum = p->frame_num;
+  else
+    currPicNum = 2 * p->frame_num + 1;
+
+  return currPicNum - (difference_of_pic_nums_minus1 + 1);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Adaptive Memory Management: Mark short term picture unused
+ ************************************************************************
+ */
+static void mm_unmark_short_term_for_reference(StorablePicture *p, int difference_of_pic_nums_minus1)
+{
+  int picNumX;
+
+  unsigned i;
+
+  picNumX = get_pic_num_x(p, difference_of_pic_nums_minus1);
+
+  for (i=0; i<dpb.ref_frames_in_buffer; i++)
+  {
+    if (p->structure == FRAME)
+    {
+      if ((dpb.fs_ref[i]->is_reference==3) && (dpb.fs_ref[i]->is_long_term==0))
+      {
+        if (dpb.fs_ref[i]->frame->pic_num == picNumX)
+        {
+          unmark_for_reference(dpb.fs_ref[i]);
+          return;
+        }
+      }
+    }
+    else
+    {
+      if ((dpb.fs_ref[i]->is_reference & 1) && (!(dpb.fs_ref[i]->is_long_term & 1)))
+      {
+        if (dpb.fs_ref[i]->top_field->pic_num == picNumX)
+        {
+          dpb.fs_ref[i]->top_field->used_for_reference = 0;
+          dpb.fs_ref[i]->is_reference &= 2;
+          if (dpb.fs_ref[i]->is_used == 3)
+          {
+            dpb.fs_ref[i]->frame->used_for_reference = 0;
+          }
+          return;
+        }
+      }
+      if ((dpb.fs_ref[i]->is_reference & 2) && (!(dpb.fs_ref[i]->is_long_term & 2)))
+      {
+        if (dpb.fs_ref[i]->bottom_field->pic_num == picNumX)
+        {
+          dpb.fs_ref[i]->bottom_field->used_for_reference = 0;
+          dpb.fs_ref[i]->is_reference &= 1;
+          if (dpb.fs_ref[i]->is_used == 3)
+          {
+            dpb.fs_ref[i]->frame->used_for_reference = 0;
+          }
+          return;
+        }
+      }
+    }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Adaptive Memory Management: Mark long term picture unused
+ ************************************************************************
+ */
+static void mm_unmark_long_term_for_reference(StorablePicture *p, int long_term_pic_num)
+{
+  unsigned i;
+  for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+  {
+    if (p->structure == FRAME)
+    {
+      if ((dpb.fs_ltref[i]->is_reference==3) && (dpb.fs_ltref[i]->is_long_term==3))
+      {
+        if (dpb.fs_ltref[i]->frame->long_term_pic_num == long_term_pic_num)
+        {
+          unmark_for_long_term_reference(dpb.fs_ltref[i]);
+        }
+      }
+    }
+    else
+    {
+      if ((dpb.fs_ltref[i]->is_reference & 1) && ((dpb.fs_ltref[i]->is_long_term & 1)))
+      {
+        if (dpb.fs_ltref[i]->top_field->long_term_pic_num == long_term_pic_num)
+        {
+          dpb.fs_ltref[i]->top_field->used_for_reference = 0;
+          dpb.fs_ltref[i]->top_field->is_long_term = 0;
+          dpb.fs_ltref[i]->is_reference &= 2;
+          dpb.fs_ltref[i]->is_long_term &= 2;
+          if (dpb.fs_ltref[i]->is_used == 3)
+          {
+            dpb.fs_ltref[i]->frame->used_for_reference = 0;
+            dpb.fs_ltref[i]->frame->is_long_term = 0;
+          }
+          return;
+        }
+      }
+      if ((dpb.fs_ltref[i]->is_reference & 2) && ((dpb.fs_ltref[i]->is_long_term & 2)))
+      {
+        if (dpb.fs_ltref[i]->bottom_field->long_term_pic_num == long_term_pic_num)
+        {
+          dpb.fs_ltref[i]->bottom_field->used_for_reference = 0;
+          dpb.fs_ltref[i]->bottom_field->is_long_term = 0;
+          dpb.fs_ltref[i]->is_reference &= 1;
+          dpb.fs_ltref[i]->is_long_term &= 1;
+          if (dpb.fs_ltref[i]->is_used == 3)
+          {
+            dpb.fs_ltref[i]->frame->used_for_reference = 0;
+            dpb.fs_ltref[i]->frame->is_long_term = 0;
+          }
+          return;
+        }
+      }
+    }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Mark a long-term reference frame or complementary field pair unused for referemce
+ ************************************************************************
+ */
+static void unmark_long_term_frame_for_reference_by_frame_idx(int long_term_frame_idx)
+{
+  unsigned i;
+  for(i=0; i<dpb.ltref_frames_in_buffer; i++)
+  {
+    if (dpb.fs_ltref[i]->long_term_frame_idx == long_term_frame_idx)
+      unmark_for_long_term_reference(dpb.fs_ltref[i]);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Mark a long-term reference field unused for reference only if it's not
+ *    the complementary field of the picture indicated by picNumX
+ ************************************************************************
+ */
+static void unmark_long_term_field_for_reference_by_frame_idx(PictureStructure structure, int long_term_frame_idx, int mark_current, unsigned curr_frame_num, int curr_pic_num)
+{
+  unsigned i;
+  int MaxFrameNum = 1 << (log2_max_frame_num_minus4 + 4);
+
+  assert(structure!=FRAME);
+  if (curr_pic_num<0)
+    curr_pic_num+=(2*MaxFrameNum);
+
+  for(i=0; i<dpb.ltref_frames_in_buffer; i++)
+  {
+    if (dpb.fs_ltref[i]->long_term_frame_idx == long_term_frame_idx)
+    {
+      if (structure == TOP_FIELD)
+      {
+        if ((dpb.fs_ltref[i]->is_long_term == 3))
+        {
+          unmark_for_long_term_reference(dpb.fs_ltref[i]);
+        }
+        else
+        {
+          if ((dpb.fs_ltref[i]->is_long_term == 1))
+          {
+            unmark_for_long_term_reference(dpb.fs_ltref[i]);
+          }
+          else
+          {
+            if (mark_current)
+            {
+              if (dpb.last_picture)
+              {
+                if ( ( dpb.last_picture != dpb.fs_ltref[i] )|| dpb.last_picture->frame_num != curr_frame_num)
+                  unmark_for_long_term_reference(dpb.fs_ltref[i]);
+              }
+              else
+              {
+                unmark_for_long_term_reference(dpb.fs_ltref[i]);
+              }
+            }
+            else
+            {
+              if ((dpb.fs_ltref[i]->frame_num) != (unsigned)(curr_pic_num/2))
+              {
+                unmark_for_long_term_reference(dpb.fs_ltref[i]);
+              }
+            }
+          }
+        }
+      }
+      if (structure == BOTTOM_FIELD)
+      {
+        if ((dpb.fs_ltref[i]->is_long_term == 3))
+        {
+          unmark_for_long_term_reference(dpb.fs_ltref[i]);
+        }
+        else
+        {
+          if ((dpb.fs_ltref[i]->is_long_term == 2))
+          {
+            unmark_for_long_term_reference(dpb.fs_ltref[i]);
+          }
+          else
+          {
+            if (mark_current)
+            {
+              if (dpb.last_picture)
+              {
+                if ( ( dpb.last_picture != dpb.fs_ltref[i] )|| dpb.last_picture->frame_num != curr_frame_num)
+                  unmark_for_long_term_reference(dpb.fs_ltref[i]);
+              }
+              else
+              {
+                unmark_for_long_term_reference(dpb.fs_ltref[i]);
+              }
+            }
+            else
+            {
+              if ((dpb.fs_ltref[i]->frame_num) != (unsigned)(curr_pic_num/2))
+              {
+                unmark_for_long_term_reference(dpb.fs_ltref[i]);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    mark a picture as long-term reference
+ ************************************************************************
+ */
+static void mark_pic_long_term(StorablePicture* p, int long_term_frame_idx, int picNumX)
+{
+  unsigned i;
+  int add_top, add_bottom;
+
+  if (p->structure == FRAME)
+  {
+    for (i=0; i<dpb.ref_frames_in_buffer; i++)
+    {
+      if (dpb.fs_ref[i]->is_reference == 3)
+      {
+        if ((!dpb.fs_ref[i]->frame->is_long_term)&&(dpb.fs_ref[i]->frame->pic_num == picNumX))
+        {
+          dpb.fs_ref[i]->long_term_frame_idx = dpb.fs_ref[i]->frame->long_term_frame_idx
+                                             = long_term_frame_idx;
+          dpb.fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx;
+          dpb.fs_ref[i]->frame->is_long_term = 1;
+
+          if (dpb.fs_ref[i]->top_field && dpb.fs_ref[i]->bottom_field)
+          {
+            dpb.fs_ref[i]->top_field->long_term_frame_idx = dpb.fs_ref[i]->bottom_field->long_term_frame_idx
+                                                          = long_term_frame_idx;
+            dpb.fs_ref[i]->top_field->long_term_pic_num = long_term_frame_idx;
+            dpb.fs_ref[i]->bottom_field->long_term_pic_num = long_term_frame_idx;
+
+            dpb.fs_ref[i]->top_field->is_long_term = dpb.fs_ref[i]->bottom_field->is_long_term
+                                                   = 1;
+
+          }
+          dpb.fs_ref[i]->is_long_term = 3;
+          return;
+        }
+      }
+    }
+    printf ("Warning: reference frame for long term marking not found\n");
+  }
+  else
+  {
+    if (p->structure == TOP_FIELD)
+    {
+      add_top    = 1;
+      add_bottom = 0;
+    }
+    else
+    {
+      add_top    = 0;
+      add_bottom = 1;
+    }
+    for (i=0; i<dpb.ref_frames_in_buffer; i++)
+    {
+      if (dpb.fs_ref[i]->is_reference & 1)
+      {
+        if ((!dpb.fs_ref[i]->top_field->is_long_term)&&(dpb.fs_ref[i]->top_field->pic_num == picNumX))
+        {
+          if ((dpb.fs_ref[i]->is_long_term) && (dpb.fs_ref[i]->long_term_frame_idx != long_term_frame_idx))
+          {
+              printf ("Warning: assigning long_term_frame_idx different from other field\n");
+          }
+
+          dpb.fs_ref[i]->long_term_frame_idx = dpb.fs_ref[i]->top_field->long_term_frame_idx
+                                             = long_term_frame_idx;
+          dpb.fs_ref[i]->top_field->long_term_pic_num = 2 * long_term_frame_idx + add_top;
+          dpb.fs_ref[i]->top_field->is_long_term = 1;
+          dpb.fs_ref[i]->is_long_term |= 1;
+          if (dpb.fs_ref[i]->is_long_term == 3)
+          {
+            dpb.fs_ref[i]->frame->is_long_term = 1;
+            dpb.fs_ref[i]->frame->long_term_frame_idx = dpb.fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx;
+          }
+          return;
+        }
+      }
+      if (dpb.fs_ref[i]->is_reference & 2)
+      {
+        if ((!dpb.fs_ref[i]->bottom_field->is_long_term)&&(dpb.fs_ref[i]->bottom_field->pic_num == picNumX))
+        {
+          if ((dpb.fs_ref[i]->is_long_term) && (dpb.fs_ref[i]->long_term_frame_idx != long_term_frame_idx))
+          {
+              printf ("Warning: assigning long_term_frame_idx different from other field\n");
+          }
+
+          dpb.fs_ref[i]->long_term_frame_idx = dpb.fs_ref[i]->bottom_field->long_term_frame_idx
+                                             = long_term_frame_idx;
+          dpb.fs_ref[i]->bottom_field->long_term_pic_num = 2 * long_term_frame_idx + add_top;
+          dpb.fs_ref[i]->bottom_field->is_long_term = 1;
+          dpb.fs_ref[i]->is_long_term |= 2;
+          if (dpb.fs_ref[i]->is_long_term == 3)
+          {
+            dpb.fs_ref[i]->frame->is_long_term = 1;
+            dpb.fs_ref[i]->frame->long_term_frame_idx = dpb.fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx;
+          }
+          return;
+        }
+      }
+    }
+    printf ("Warning: reference field for long term marking not found\n");
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Assign a long term frame index to a short term picture
+ ************************************************************************
+ */
+static void mm_assign_long_term_frame_idx(StorablePicture* p, int difference_of_pic_nums_minus1, int long_term_frame_idx)
+{
+  int picNumX;
+
+  picNumX = get_pic_num_x(p, difference_of_pic_nums_minus1);
+
+  // remove frames/fields with same long_term_frame_idx
+  if (p->structure == FRAME)
+  {
+    unmark_long_term_frame_for_reference_by_frame_idx(long_term_frame_idx);
+  }
+  else
+  {
+    unsigned i;
+    PictureStructure structure = FRAME;
+
+    for (i=0; i<dpb.ref_frames_in_buffer; i++)
+    {
+      if (dpb.fs_ref[i]->is_reference & 1)
+      {
+        if (dpb.fs_ref[i]->top_field->pic_num == picNumX)
+        {
+          structure = TOP_FIELD;
+          break;
+        }
+      }
+      if (dpb.fs_ref[i]->is_reference & 2)
+      {
+        if (dpb.fs_ref[i]->bottom_field->pic_num == picNumX)
+        {
+          structure = BOTTOM_FIELD;
+          break;
+        }
+      }
+    }
+    if (structure==FRAME)
+    {
+      error ("field for long term marking not found",200);
+    }
+
+    unmark_long_term_field_for_reference_by_frame_idx(structure, long_term_frame_idx, 0, 0, picNumX);
+  }
+
+  mark_pic_long_term(p, long_term_frame_idx, picNumX);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Set new max long_term_frame_idx
+ ************************************************************************
+ */
+void mm_update_max_long_term_frame_idx(int max_long_term_frame_idx_plus1)
+{
+  unsigned i;
+
+  dpb.max_long_term_pic_idx = max_long_term_frame_idx_plus1 - 1;
+
+  // check for invalid frames
+  for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+  {
+    if (dpb.fs_ltref[i]->long_term_frame_idx > dpb.max_long_term_pic_idx)
+    {
+      unmark_for_long_term_reference(dpb.fs_ltref[i]);
+    }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Mark all long term reference pictures unused for reference
+ ************************************************************************
+ */
+static void mm_unmark_all_long_term_for_reference (void)
+{
+  mm_update_max_long_term_frame_idx(0);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Mark all short term reference pictures unused for reference
+ ************************************************************************
+ */
+static void mm_unmark_all_short_term_for_reference (void)
+{
+  unsigned int i;
+  for (i=0; i<dpb.ref_frames_in_buffer; i++)
+  {
+    unmark_for_reference(dpb.fs_ref[i]);
+  }
+  update_ref_list();
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Mark the current picture used for long term reference
+ ************************************************************************
+ */
+static void mm_mark_current_picture_long_term(StorablePicture *p, int long_term_frame_idx)
+{
+  // remove long term pictures with same long_term_frame_idx
+  if (p->structure == FRAME)
+  {
+    unmark_long_term_frame_for_reference_by_frame_idx(long_term_frame_idx);
+  }
+  else
+  {
+    unmark_long_term_field_for_reference_by_frame_idx(p->structure, long_term_frame_idx, 1, p->pic_num, 0);
+  }
+
+  p->is_long_term = 1;
+  p->long_term_frame_idx = long_term_frame_idx;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Perform Adaptive memory control decoded reference picture marking process
+ ************************************************************************
+ */
+static void adaptive_memory_management(StorablePicture* p)
+{
+  DecRefPicMarking_t *tmp_drpm;
+
+  img->last_has_mmco_5 = 0;
+
+  assert (!img->currentPicture->idr_flag);
+  assert (img->adaptive_ref_pic_buffering_flag);
+
+  while (img->dec_ref_pic_marking_buffer)
+  {
+    tmp_drpm = img->dec_ref_pic_marking_buffer;
+    switch (tmp_drpm->memory_management_control_operation)
+    {
+      case 0:
+        if (tmp_drpm->Next != NULL)
+        {
+          error ("memory_management_control_operation = 0 not last operation in buffer", 500);
+        }
+        break;
+      case 1:
+        mm_unmark_short_term_for_reference(p, tmp_drpm->difference_of_pic_nums_minus1);
+        update_ref_list();
+        break;
+      case 2:
+        mm_unmark_long_term_for_reference(p, tmp_drpm->long_term_pic_num);
+        update_ltref_list();
+        break;
+      case 3:
+        mm_assign_long_term_frame_idx(p, tmp_drpm->difference_of_pic_nums_minus1, tmp_drpm->long_term_frame_idx);
+        update_ref_list();
+        update_ltref_list();
+        break;
+      case 4:
+        mm_update_max_long_term_frame_idx (tmp_drpm->max_long_term_frame_idx_plus1);
+        update_ltref_list();
+        break;
+      case 5:
+        mm_unmark_all_short_term_for_reference();
+        mm_unmark_all_long_term_for_reference();
+       img->last_has_mmco_5 = 1;
+        break;
+      case 6:
+        mm_mark_current_picture_long_term(p, tmp_drpm->long_term_frame_idx);
+        check_num_ref();
+        break;
+      default:
+        error ("invalid memory_management_control_operation in buffer", 500);
+    }
+    img->dec_ref_pic_marking_buffer = tmp_drpm->Next;
+    free (tmp_drpm);
+  }
+  if ( img->last_has_mmco_5 )
+  {
+    p->pic_num = p->frame_num = 0;
+
+    switch (p->structure)
+    {
+    case TOP_FIELD:
+      {
+        p->poc = p->top_poc = img->toppoc =0;
+        break;
+      }
+    case BOTTOM_FIELD:
+      {
+        p->poc = p->bottom_poc = img->bottompoc = 0;
+        break;
+      }
+    case FRAME:
+      {
+        p->top_poc    -= p->poc;
+        p->bottom_poc -= p->poc;
+
+        img->toppoc = p->top_poc;
+        img->bottompoc = p->bottom_poc;
+
+        p->poc = imin (p->top_poc, p->bottom_poc);
+        img->framepoc = p->poc;
+        break;
+      }
+    }
+    img->ThisPOC = p->poc;
+    flush_dpb();
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Store a picture in DPB. This includes cheking for space in DPB and
+ *    flushing frames.
+ *    If we received a frame, we need to check for a new store, if we
+ *    got a field, check if it's the second field of an already allocated
+ *    store.
+ *
+ * \param p
+ *    Picture to be stored
+ *
+ ************************************************************************
+ */
+void store_picture_in_dpb(StorablePicture* p)
+{
+  unsigned i;
+  int poc, pos;
+  // diagnostics
+  //printf ("Storing (%s) non-ref pic with frame_num #%d\n", (p->type == FRAME)?"FRAME":(p->type == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", p->pic_num);
+  // if frame, check for new store,
+  assert (p!=NULL);
+
+  p->used_for_reference = (img->nal_reference_idc != 0);
+
+  img->last_has_mmco_5=0;
+  img->last_pic_bottom_field = (img->structure == BOTTOM_FIELD);
+
+  if (img->currentPicture->idr_flag)
+    idr_memory_management(p);
+  else
+  {
+    // adaptive memory management
+    if (p->used_for_reference && (img->adaptive_ref_pic_buffering_flag))
+      adaptive_memory_management(p);
+  }
+
+  if ((p->structure==TOP_FIELD)||(p->structure==BOTTOM_FIELD))
+  {
+    // check for frame store with same pic_number
+    if (dpb.last_picture)
+    {
+      if ((int)dpb.last_picture->frame_num == p->pic_num)
+      {
+        if (((p->structure==TOP_FIELD)&&(dpb.last_picture->is_used==2))||((p->structure==BOTTOM_FIELD)&&(dpb.last_picture->is_used==1)))
+        {
+          if ((p->used_for_reference && (dpb.last_picture->is_orig_reference!=0))||
+              (!p->used_for_reference && (dpb.last_picture->is_orig_reference==0)))
+          {
+            insert_picture_in_dpb(dpb.last_picture, p);
+            update_ref_list();
+            update_ltref_list();
+            dump_dpb();
+            dpb.last_picture = NULL;
+            return;
+          }
+        }
+      }
+    }
+  }
+
+  // this is a frame or a field which has no stored complementary field
+
+  // sliding window, if necessary
+  if ((!img->currentPicture->idr_flag)&&(p->used_for_reference && (!img->adaptive_ref_pic_buffering_flag)))
+  {
+    sliding_window_memory_management(p);
+  }
+
+  // first try to remove unused frames
+  if (dpb.used_size==dpb.size)
+  {
+    remove_unused_frame_from_dpb();
+  }
+
+  // then output frames until one can be removed
+  while (dpb.used_size==dpb.size)
+  {
+    // non-reference frames may be output directly
+    if (!p->used_for_reference)
+    {
+      get_smallest_poc(&poc, &pos);
+      if ((-1==pos) || (p->poc < poc))
+      {
+        direct_output(p, p_dec);
+        return;
+      }
+    }
+    // flush a frame
+    output_one_frame_from_dpb();
+  }
+
+  // check for duplicate frame number in short term reference buffer
+  if ((p->used_for_reference)&&(!p->is_long_term))
+  {
+    for (i=0; i<dpb.ref_frames_in_buffer; i++)
+    {
+      if (dpb.fs_ref[i]->frame_num == p->frame_num)
+      {
+        error("duplicate frame_num im short-term reference picture buffer", 500);
+      }
+    }
+
+  }
+  // store at end of buffer
+//  printf ("store frame/field at pos %d\n",dpb.used_size);
+  insert_picture_in_dpb(dpb.fs[dpb.used_size],p);
+
+  if (p->structure != FRAME)
+  {
+    dpb.last_picture = dpb.fs[dpb.used_size];
+  }
+  else
+  {
+    dpb.last_picture = NULL;
+  }
+
+  dpb.used_size++;
+
+  update_ref_list();
+  update_ltref_list();
+
+  check_num_ref();
+
+  dump_dpb();
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Insert the frame picture into the if the top field has already
+ *    been stored for the coding decision
+ *
+ * \param p
+ *    StorablePicture to be inserted
+ *
+ ************************************************************************
+ */
+void replace_top_pic_with_frame(StorablePicture* p)
+{
+  FrameStore* fs = NULL;
+  unsigned i, found;
+
+  assert (p!=NULL);
+  assert (p->structure==FRAME);
+
+  p->used_for_reference = (img->nal_reference_idc != 0);
+  // upsample a reference picture
+  if (p->used_for_reference)
+  {
+    UnifiedOneForthPix(p);
+  }
+
+  found=0;
+
+  for (i=0;i<dpb.used_size;i++)
+  {
+    if((dpb.fs[i]->frame_num == img->frame_num)&&(dpb.fs[i]->is_used==1))
+    {
+      found=1;
+      fs = dpb.fs[i];
+      break;
+    }
+  }
+
+  if (!found)
+  {
+    // this should only happen for non-reference pictures when the dpb is full of reference pics
+    direct_output_paff(p, p_dec);
+  }
+  else
+  {
+    free_storable_picture(fs->top_field);
+    fs->top_field=NULL;
+    fs->frame=p;
+    fs->is_used = 3;
+    if (p->used_for_reference)
+    {
+      fs->is_reference = 3;
+      if (p->is_long_term)
+      {
+        fs->is_long_term = 3;
+      }
+    }
+    // generate field views
+    dpb_split_field(fs);
+    update_ref_list();
+    update_ltref_list();
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Insert the picture into the DPB. A free DPB position is necessary
+ *    for frames, .
+ *
+ * \param fs
+ *    FrameStore into which the picture will be inserted
+ * \param p
+ *    StorablePicture to be inserted
+ *
+ ************************************************************************
+ */
+static void insert_picture_in_dpb(FrameStore* fs, StorablePicture* p)
+{
+  //  printf ("insert (%s) pic with frame_num #%d, poc %d\n", (p->structure == FRAME)?"FRAME":(p->structure == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", p->pic_num, p->poc);
+  assert (p!=NULL);
+  assert (fs!=NULL);
+
+  // upsample a reference picture
+  if (p->used_for_reference)
+  {
+    UnifiedOneForthPix(p);
+  }
+
+  switch (p->structure)
+  {
+  case FRAME:
+    fs->frame = p;
+    fs->is_used = 3;
+    if (p->used_for_reference)
+    {
+      fs->is_reference = 3;
+      fs->is_orig_reference = 3;
+      if (p->is_long_term)
+      {
+        fs->is_long_term = 3;
+        fs->long_term_frame_idx = p->long_term_frame_idx;
+      }
+    }
+    // generate field views
+    dpb_split_field(fs);
+    break;
+  case TOP_FIELD:
+    fs->top_field = p;
+    fs->is_used |= 1;
+    if (p->used_for_reference)
+    {
+      fs->is_reference |= 1;
+      fs->is_orig_reference |= 1;
+      if (p->is_long_term)
+      {
+        fs->is_long_term |= 1;
+        fs->long_term_frame_idx = p->long_term_frame_idx;
+      }
+    }
+    if (fs->is_used == 3)
+    {
+      // generate frame view
+      dpb_combine_field(fs);
+    }
+    else
+    {
+      fs->poc = p->poc;
+      gen_field_ref_ids(p);
+    }
+    break;
+  case BOTTOM_FIELD:
+    fs->bottom_field = p;
+    fs->is_used |= 2;
+    if (p->used_for_reference)
+    {
+      fs->is_reference |= 2;
+      fs->is_orig_reference |= 2;
+      if (p->is_long_term)
+      {
+        fs->is_long_term |= 2;
+        fs->long_term_frame_idx = p->long_term_frame_idx;
+      }
+    }
+    if (fs->is_used == 3)
+    {
+      // generate frame view
+      dpb_combine_field(fs);
+    } else
+    {
+      fs->poc = p->poc;
+      gen_field_ref_ids(p);
+    }
+    break;
+  }
+  fs->frame_num = p->pic_num;
+  fs->is_output = p->is_output;
+
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Check if one of the frames/fields in frame store is used for reference
+ ************************************************************************
+ */
+static int is_used_for_reference(FrameStore* fs)
+{
+  if (fs->is_reference)
+  {
+    return 1;
+  }
+
+  if (fs->is_used == 3) // frame
+  {
+    if (fs->frame->used_for_reference)
+    {
+      return 1;
+    }
+  }
+
+  if (fs->is_used & 1) // top field
+  {
+    if (fs->top_field)
+    {
+      if (fs->top_field->used_for_reference)
+      {
+        return 1;
+      }
+    }
+  }
+
+  if (fs->is_used & 2) // bottom field
+  {
+    if (fs->bottom_field)
+    {
+      if (fs->bottom_field->used_for_reference)
+      {
+        return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Check if one of the frames/fields in frame store is used for short-term reference
+ ************************************************************************
+ */
+static int is_short_term_reference(FrameStore* fs)
+{
+
+  if (fs->is_used==3) // frame
+  {
+    if ((fs->frame->used_for_reference)&&(!fs->frame->is_long_term))
+    {
+      return 1;
+    }
+  }
+
+  if (fs->is_used & 1) // top field
+  {
+    if (fs->top_field)
+    {
+      if ((fs->top_field->used_for_reference)&&(!fs->top_field->is_long_term))
+      {
+        return 1;
+      }
+    }
+  }
+
+  if (fs->is_used & 2) // bottom field
+  {
+    if (fs->bottom_field)
+    {
+      if ((fs->bottom_field->used_for_reference)&&(!fs->bottom_field->is_long_term))
+      {
+        return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Check if one of the frames/fields in frame store is used for short-term reference
+ ************************************************************************
+ */
+static int is_long_term_reference(FrameStore* fs)
+{
+
+  if (fs->is_used==3) // frame
+  {
+    if ((fs->frame->used_for_reference)&&(fs->frame->is_long_term))
+    {
+      return 1;
+    }
+  }
+
+  if (fs->is_used & 1) // top field
+  {
+    if (fs->top_field)
+    {
+      if ((fs->top_field->used_for_reference)&&(fs->top_field->is_long_term))
+      {
+        return 1;
+      }
+    }
+  }
+
+  if (fs->is_used & 2) // bottom field
+  {
+    if (fs->bottom_field)
+    {
+      if ((fs->bottom_field->used_for_reference)&&(fs->bottom_field->is_long_term))
+      {
+        return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    remove one frame from DPB
+ ************************************************************************
+ */
+static void remove_frame_from_dpb(int pos)
+{
+  FrameStore* fs = dpb.fs[pos];
+  FrameStore* tmp;
+  unsigned i;
+
+//  printf ("remove frame with frame_num #%d\n", fs->frame_num);
+  switch (fs->is_used)
+  {
+  case 3:
+    free_storable_picture(fs->frame);
+    free_storable_picture(fs->top_field);
+    free_storable_picture(fs->bottom_field);
+    fs->frame=NULL;
+    fs->top_field=NULL;
+    fs->bottom_field=NULL;
+    break;
+  case 2:
+    free_storable_picture(fs->bottom_field);
+    fs->bottom_field=NULL;
+    break;
+  case 1:
+    free_storable_picture(fs->top_field);
+    fs->top_field=NULL;
+    break;
+  case 0:
+    break;
+  default:
+    error("invalid frame store type",500);
+  }
+  fs->is_used = 0;
+  fs->is_long_term = 0;
+  fs->is_reference = 0;
+  fs->is_orig_reference = 0;
+
+  // move empty framestore to end of buffer
+  tmp = dpb.fs[pos];
+
+  for (i=pos; i<dpb.used_size-1;i++)
+  {
+    dpb.fs[i] = dpb.fs[i+1];
+  }
+  dpb.fs[dpb.used_size-1] = tmp;
+  dpb.used_size--;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    find smallest POC in the DPB.
+ ************************************************************************
+ */
+static void get_smallest_poc(int *poc,int * pos)
+{
+  unsigned i;
+
+  if (dpb.used_size<1)
+  {
+    error("Cannot determine smallest POC, DPB empty.",150);
+  }
+
+  *pos=-1;
+  *poc = INT_MAX;
+  for (i=0; i<dpb.used_size; i++)
+  {
+    if ((*poc>dpb.fs[i]->poc)&&(!dpb.fs[i]->is_output))
+    {
+      *poc = dpb.fs[i]->poc;
+      *pos=i;
+    }
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Remove a picture from DPB which is no longer needed.
+ ************************************************************************
+ */
+static int remove_unused_frame_from_dpb(void)
+{
+  unsigned i;
+
+  // check for frames that were already output and no longer used for reference
+  for (i=0; i<dpb.used_size; i++)
+  {
+    if (dpb.fs[i]->is_output && (!is_used_for_reference(dpb.fs[i])))
+    {
+      remove_frame_from_dpb(i);
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Output one picture stored in the DPB.
+ ************************************************************************
+ */
+static void output_one_frame_from_dpb(void)
+{
+  int poc, pos;
+  //diagnostics
+  if (dpb.used_size<1)
+  {
+    error("Cannot output frame, DPB empty.",150);
+  }
+
+  // find smallest POC
+  get_smallest_poc(&poc, &pos);
+
+  if(pos==-1)
+  {
+    error("no frames for output available", 150);
+  }
+
+  // call the output function
+//  printf ("output frame with frame_num #%d, poc %d (dpb. dpb.size=%d, dpb.used_size=%d)\n", dpb.fs[pos]->frame_num, dpb.fs[pos]->frame->poc, dpb.size, dpb.used_size);
+
+  write_stored_frame(dpb.fs[pos], p_dec);
+
+  // if redundant picture in use, output POC may be not in ascending order
+  if(input->redundant_pic_flag == 0)
+  {
+    if (dpb.last_output_poc >= poc)
+    {
+      error ("output POC must be in ascending order", 150);
+    }
+  }
+  dpb.last_output_poc = poc;
+
+  // free frame store and move empty store to end of buffer
+  if (!is_used_for_reference(dpb.fs[pos]))
+  {
+    remove_frame_from_dpb(pos);
+  }
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    All stored picture are output. Should be called to empty the buffer
+ ************************************************************************
+ */
+void flush_dpb(void)
+{
+  unsigned i;
+
+  //diagnostics
+//  printf("Flush remaining frames from dpb. dpb.size=%d, dpb.used_size=%d\n",dpb.size,dpb.used_size);
+
+  // mark all frames unused
+  for (i=0; i<dpb.used_size; i++)
+  {
+    unmark_for_reference (dpb.fs[i]);
+  }
+
+  while (remove_unused_frame_from_dpb()) ;
+
+  // output frames in POC order
+  while (dpb.used_size)
+  {
+    output_one_frame_from_dpb();
+  }
+
+  dpb.last_output_poc = INT_MIN;
+}
+
+
+void gen_field_ref_ids(StorablePicture *p)
+{
+  int i,j, dummylist0, dummylist1;
+   //! Generate Frame parameters from field information.
+  for (i=0 ; i<p->size_x/4 ; i++)
+  {
+    for (j=0 ; j<p->size_y/4 ; j++)
+    {
+        dummylist0= p->ref_idx[LIST_0][j][i];
+        dummylist1= p->ref_idx[LIST_1][j][i];
+        //! association with id already known for fields.
+        p->ref_id[LIST_0][j][i] = (dummylist0>=0)? p->ref_pic_num[LIST_0][dummylist0] : 0;
+        p->ref_id[LIST_1][j][i] = (dummylist1>=0)? p->ref_pic_num[LIST_1][dummylist1] : 0;
+        p->field_frame[j][i]=1;
+    }
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Extract top field from a frame
+ ************************************************************************
+ */
+void dpb_split_field(FrameStore *fs)
+{
+  int i, j, ii, jj, jj4;
+  int idiv,jdiv;
+  int currentmb;
+  int dummylist0,dummylist1;
+  int twosz16 = 2*(fs->frame->size_x>>4);
+
+  fs->poc = fs->frame->poc;
+
+  if (!active_sps->frame_mbs_only_flag)
+  {
+    fs->top_field    = alloc_storable_picture(TOP_FIELD,    fs->frame->size_x, fs->frame->size_y/2, fs->frame->size_x_cr, fs->frame->size_y_cr/2);
+    fs->bottom_field = alloc_storable_picture(BOTTOM_FIELD, fs->frame->size_x, fs->frame->size_y/2, fs->frame->size_x_cr, fs->frame->size_y_cr/2);
+
+    for (i=0; i<fs->frame->size_y/2; i++)
+    {
+      memcpy(fs->top_field->imgY[i], fs->frame->imgY[i*2], fs->frame->size_x*sizeof(imgpel));
+    }
+
+    for (i=0; i<fs->frame->size_y_cr/2; i++)
+    {
+      memcpy(fs->top_field->imgUV[0][i], fs->frame->imgUV[0][i*2], fs->frame->size_x_cr*sizeof(imgpel));
+      memcpy(fs->top_field->imgUV[1][i], fs->frame->imgUV[1][i*2], fs->frame->size_x_cr*sizeof(imgpel));
+    }
+
+    for (i=0; i<fs->frame->size_y/2; i++)
+    {
+      memcpy(fs->bottom_field->imgY[i], fs->frame->imgY[i*2 + 1], fs->frame->size_x*sizeof(imgpel));
+    }
+
+    for (i=0; i<fs->frame->size_y_cr/2; i++)
+    {
+      memcpy(fs->bottom_field->imgUV[0][i], fs->frame->imgUV[0][i*2 + 1], fs->frame->size_x_cr*sizeof(imgpel));
+      memcpy(fs->bottom_field->imgUV[1][i], fs->frame->imgUV[1][i*2 + 1], fs->frame->size_x_cr*sizeof(imgpel));
+    }
+
+    UnifiedOneForthPix(fs->top_field);
+    UnifiedOneForthPix(fs->bottom_field);
+
+    fs->top_field->poc = fs->frame->top_poc;
+    fs->bottom_field->poc =  fs->frame->bottom_poc;
+
+    fs->top_field->frame_poc =  fs->frame->frame_poc;
+
+    fs->top_field->bottom_poc =fs->bottom_field->bottom_poc =  fs->frame->bottom_poc;
+    fs->top_field->top_poc =fs->bottom_field->top_poc =  fs->frame->top_poc;
+    fs->bottom_field->frame_poc =  fs->frame->frame_poc;
+
+    fs->top_field->used_for_reference = fs->bottom_field->used_for_reference
+                                      = fs->frame->used_for_reference;
+    fs->top_field->is_long_term = fs->bottom_field->is_long_term
+                                = fs->frame->is_long_term;
+    fs->long_term_frame_idx = fs->top_field->long_term_frame_idx
+                            = fs->bottom_field->long_term_frame_idx
+                            = fs->frame->long_term_frame_idx;
+
+    fs->top_field->coded_frame = fs->bottom_field->coded_frame = 1;
+    fs->top_field->MbaffFrameFlag = fs->bottom_field->MbaffFrameFlag
+                                  = fs->frame->MbaffFrameFlag;
+
+    fs->frame->top_field    = fs->top_field;
+    fs->frame->bottom_field = fs->bottom_field;
+
+    fs->top_field->bottom_field = fs->bottom_field;
+    fs->top_field->frame        = fs->frame;
+    fs->bottom_field->top_field = fs->top_field;
+    fs->bottom_field->frame     = fs->frame;
+
+    fs->top_field->chroma_format_idc = fs->bottom_field->chroma_format_idc = fs->frame->chroma_format_idc;
+
+    //store reference picture index
+    memcpy(fs->top_field->ref_pic_num[LIST_1]   , fs->frame->ref_pic_num[2 + LIST_1], 2*listXsize[LIST_1] * sizeof(int64));
+    memcpy(fs->bottom_field->ref_pic_num[LIST_1], fs->frame->ref_pic_num[4 + LIST_1], 2*listXsize[LIST_1] * sizeof(int64));
+    memcpy(fs->top_field->ref_pic_num[LIST_0]   , fs->frame->ref_pic_num[2 + LIST_0], 2*listXsize[LIST_0] * sizeof(int64));
+    memcpy(fs->bottom_field->ref_pic_num[LIST_0], fs->frame->ref_pic_num[4 + LIST_0], 2*listXsize[LIST_0] * sizeof(int64));
+
+  }
+  else
+  {
+    fs->top_field=NULL;
+    fs->bottom_field=NULL;
+    fs->frame->top_field=NULL;
+    fs->frame->bottom_field=NULL;
+  }
+
+  for (j=0 ; j<fs->frame->size_y/4 ; j++)
+  {
+    jdiv=j/4;
+    for (i=0 ; i<fs->frame->size_x/4 ; i++)
+    {
+      idiv=i/4;
+      currentmb = twosz16*(jdiv/2)+ (idiv)*2 + (jdiv%2);
+
+      if (fs->frame->MbaffFrameFlag  && fs->frame->mb_field[currentmb])
+      {
+        int list_offset = currentmb%2? 4: 2;
+        dummylist0 = fs->frame->ref_idx[LIST_0][j][i];
+        dummylist1 = fs->frame->ref_idx[LIST_1][j][i];
+        //! association with id already known for fields.
+        fs->frame->ref_id[LIST_0 + list_offset][j][i] = (dummylist0>=0)? fs->frame->ref_pic_num[LIST_0 + list_offset][dummylist0] : 0;
+        fs->frame->ref_id[LIST_1 + list_offset][j][i] = (dummylist1>=0)? fs->frame->ref_pic_num[LIST_1 + list_offset][dummylist1] : 0;
+        //! need to make association with frames
+        fs->frame->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->frame->frm_ref_pic_num[LIST_0 + list_offset][dummylist0] : 0;
+        fs->frame->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->frame->frm_ref_pic_num[LIST_1 + list_offset][dummylist1] : 0;
+
+      }
+      else
+      {
+        dummylist0 = fs->frame->ref_idx[LIST_0][j][i];
+        dummylist1 = fs->frame->ref_idx[LIST_1][j][i];
+        fs->frame->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->frame->ref_pic_num[LIST_0][dummylist0] : -1;
+        fs->frame->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->frame->ref_pic_num[LIST_1][dummylist1] : -1;
+      }
+    }
+  }
+
+  if (!active_sps->frame_mbs_only_flag && fs->frame->MbaffFrameFlag)
+  {
+    for (j=0 ; j<fs->frame->size_y/8; j++)
+    {
+      jj = (j/4)*8 + j%4;
+      jj4 = jj + 4;
+      jdiv=j/2;
+      for (i=0 ; i<fs->frame->size_x/4 ; i++)
+      {
+        idiv=i/4;
+
+        currentmb = twosz16*(jdiv/2)+ (idiv)*2 + (jdiv%2);
+        // Assign field mvs attached to MB-Frame buffer to the proper buffer
+        if (fs->frame->mb_field[currentmb])
+        {
+          fs->bottom_field->field_frame[j][i] = fs->top_field->field_frame[j][i]=1;
+          fs->frame->field_frame[2*j][i] = fs->frame->field_frame[2*j+1][i]=1;
+
+          fs->bottom_field->mv[LIST_0][j][i][0] = fs->frame->mv[LIST_0][jj4][i][0];
+          fs->bottom_field->mv[LIST_0][j][i][1] = fs->frame->mv[LIST_0][jj4][i][1];
+          fs->bottom_field->mv[LIST_1][j][i][0] = fs->frame->mv[LIST_1][jj4][i][0];
+          fs->bottom_field->mv[LIST_1][j][i][1] = fs->frame->mv[LIST_1][jj4][i][1];
+          fs->bottom_field->ref_idx[LIST_0][j][i] = fs->frame->ref_idx[LIST_0][jj4][i];
+          fs->bottom_field->ref_idx[LIST_1][j][i] = fs->frame->ref_idx[LIST_1][jj4][i];
+          fs->bottom_field->ref_id[LIST_0][j][i] = fs->frame->ref_id[LIST_0+4][jj4][i];
+          fs->bottom_field->ref_id[LIST_1][j][i] = fs->frame->ref_id[LIST_1+4][jj4][i];
+
+
+          fs->top_field->mv[LIST_0][j][i][0] = fs->frame->mv[LIST_0][jj][i][0];
+          fs->top_field->mv[LIST_0][j][i][1] = fs->frame->mv[LIST_0][jj][i][1];
+          fs->top_field->mv[LIST_1][j][i][0] = fs->frame->mv[LIST_1][jj][i][0];
+          fs->top_field->mv[LIST_1][j][i][1] = fs->frame->mv[LIST_1][jj][i][1];
+          fs->top_field->ref_idx[LIST_0][j][i] = fs->frame->ref_idx[LIST_0][jj][i];
+          fs->top_field->ref_idx[LIST_1][j][i] = fs->frame->ref_idx[LIST_1][jj][i];
+          fs->top_field->ref_id[LIST_0][j][i] = fs->frame->ref_id[LIST_0+2][jj][i];
+          fs->top_field->ref_id[LIST_1][j][i] = fs->frame->ref_id[LIST_1+2][jj][i];
+        }
+      }
+    }
+  }
+
+  //! Generate field MVs from Frame MVs
+  if (!active_sps->frame_mbs_only_flag)
+  {
+    for (j=0 ; j<fs->frame->size_y/8 ; j++)
+    {
+      jj = 2* RSD(j);
+      jdiv = j/2;
+      for (i=0 ; i<fs->frame->size_x/4 ; i++)
+      {
+        ii = RSD(i);
+        idiv = i/4;
+
+        currentmb = twosz16*(jdiv/2)+ (idiv)*2 + (jdiv%2);
+
+        if (!fs->frame->MbaffFrameFlag  || !fs->frame->mb_field[currentmb])
+        {
+          fs->frame->field_frame[2*j+1][i] = fs->frame->field_frame[2*j][i]=0;
+
+          fs->top_field->field_frame[j][i] = fs->bottom_field->field_frame[j][i] = 0;
+
+          fs->top_field->mv[LIST_0][j][i][0] = fs->bottom_field->mv[LIST_0][j][i][0] = fs->frame->mv[LIST_0][jj][ii][0];
+          fs->top_field->mv[LIST_0][j][i][1] = fs->bottom_field->mv[LIST_0][j][i][1] = fs->frame->mv[LIST_0][jj][ii][1];
+          fs->top_field->mv[LIST_1][j][i][0] = fs->bottom_field->mv[LIST_1][j][i][0] = fs->frame->mv[LIST_1][jj][ii][0];
+          fs->top_field->mv[LIST_1][j][i][1] = fs->bottom_field->mv[LIST_1][j][i][1] = fs->frame->mv[LIST_1][jj][ii][1];
+
+          // Scaling of references is done here since it will not affect spatial direct (2*0 =0)
+          if (fs->frame->ref_idx[LIST_0][jj][ii] == -1)
+            fs->top_field->ref_idx[LIST_0][j][i] = fs->bottom_field->ref_idx[LIST_0][j][i] = - 1;
+          else
+          {
+            dummylist0=fs->top_field->ref_idx[LIST_0][j][i] = fs->bottom_field->ref_idx[LIST_0][j][i] = fs->frame->ref_idx[LIST_0][jj][ii];
+            fs->top_field   ->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->frame->top_ref_pic_num[LIST_0][dummylist0] : 0;
+            fs->bottom_field->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->frame->bottom_ref_pic_num[LIST_0][dummylist0] : 0;
+          }
+
+          if (fs->frame->ref_idx[LIST_1][jj][ii] == -1)
+            fs->top_field->ref_idx[LIST_1][j][i] = fs->bottom_field->ref_idx[LIST_1][j][i] = - 1;
+          else
+          {
+            dummylist1=fs->top_field->ref_idx[LIST_1][j][i] = fs->bottom_field->ref_idx[LIST_1][j][i] = fs->frame->ref_idx[LIST_1][jj][ii];
+
+            fs->top_field   ->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->frame->top_ref_pic_num[LIST_1][dummylist1] : 0;
+            fs->bottom_field->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->frame->bottom_ref_pic_num[LIST_1][dummylist1] : 0;
+          }
+        }
+        else
+        {
+          fs->frame->field_frame[2*j+1][i] = fs->frame->field_frame[2*j][i]= fs->frame->mb_field[currentmb];
+        }
+      }
+    }
+  }
+  else
+  {
+    memset( &(fs->frame->field_frame[0][0]), 0, fs->frame->size_y * (fs->frame->size_x >>4) * sizeof(byte));
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Generate a frame from top and bottom fields,
+ *    YUV components and display information only
+ ************************************************************************
+ */
+void dpb_combine_field_yuv(FrameStore *fs)
+{
+  int i;
+
+  fs->frame = alloc_storable_picture(FRAME, fs->top_field->size_x, fs->top_field->size_y*2, fs->top_field->size_x_cr, fs->top_field->size_y_cr*2);
+
+  for (i=0; i<fs->top_field->size_y; i++)
+  {
+    memcpy(fs->frame->imgY[i*2],     fs->top_field->imgY[i]   , fs->top_field->size_x*sizeof(imgpel));     // top field
+    memcpy(fs->frame->imgY[i*2 + 1], fs->bottom_field->imgY[i], fs->bottom_field->size_x*sizeof(imgpel)); // bottom field
+  }
+
+  for (i=0; i<fs->top_field->size_y_cr; i++)
+  {
+    memcpy(fs->frame->imgUV[0][i*2],     fs->top_field->imgUV[0][i],    fs->top_field->size_x_cr*sizeof(imgpel));
+    memcpy(fs->frame->imgUV[0][i*2 + 1], fs->bottom_field->imgUV[0][i], fs->bottom_field->size_x_cr*sizeof(imgpel));
+    memcpy(fs->frame->imgUV[1][i*2],     fs->top_field->imgUV[1][i],    fs->top_field->size_x_cr*sizeof(imgpel));
+    memcpy(fs->frame->imgUV[1][i*2 + 1], fs->bottom_field->imgUV[1][i], fs->bottom_field->size_x_cr*sizeof(imgpel));
+  }
+
+  fs->poc=fs->frame->poc =fs->frame->frame_poc = imin (fs->top_field->poc, fs->bottom_field->poc);
+
+  fs->bottom_field->frame_poc=fs->top_field->frame_poc=fs->frame->poc;
+
+  fs->bottom_field->top_poc=fs->frame->top_poc=fs->top_field->poc;
+  fs->top_field->bottom_poc=fs->frame->bottom_poc=fs->bottom_field->poc;
+
+  fs->frame->used_for_reference = (fs->top_field->used_for_reference && fs->bottom_field->used_for_reference );
+  fs->frame->is_long_term = (fs->top_field->is_long_term && fs->bottom_field->is_long_term );
+
+  if (fs->frame->is_long_term)
+    fs->frame->long_term_frame_idx = fs->long_term_frame_idx;
+
+  fs->frame->top_field    = fs->top_field;
+  fs->frame->bottom_field = fs->bottom_field;
+
+  fs->frame->coded_frame = 0;
+
+  fs->frame->chroma_format_idc = fs->top_field->chroma_format_idc;
+  fs->frame->frame_cropping_flag = fs->top_field->frame_cropping_flag;
+  if (fs->frame->frame_cropping_flag)
+  {
+    fs->frame->frame_cropping_rect_top_offset = fs->top_field->frame_cropping_rect_top_offset;
+    fs->frame->frame_cropping_rect_bottom_offset = fs->top_field->frame_cropping_rect_bottom_offset;
+    fs->frame->frame_cropping_rect_left_offset = fs->top_field->frame_cropping_rect_left_offset;
+    fs->frame->frame_cropping_rect_right_offset = fs->top_field->frame_cropping_rect_right_offset;
+  }
+
+  fs->top_field->frame = fs->bottom_field->frame = fs->frame;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Generate a frame from top and bottom fields
+ ************************************************************************
+ */
+void dpb_combine_field(FrameStore *fs)
+{
+  int i,j, jj, jj4;
+  int dummylist0, dummylist1;
+
+  dpb_combine_field_yuv(fs);
+
+  UnifiedOneForthPix(fs->frame);
+
+  //combine field for frame
+  for (i=0;i<(listXsize[LIST_1]+1)/2;i++)
+  {
+    fs->frame->ref_pic_num[LIST_1][i]= i64min ((fs->top_field->ref_pic_num[LIST_1][2*i]/2)*2, (fs->bottom_field->ref_pic_num[LIST_1][2*i]/2)*2);
+  }
+
+  for (i=0;i<(listXsize[LIST_0]+1)/2;i++)
+  {
+    fs->frame->ref_pic_num[LIST_0][i]= i64min ((fs->top_field->ref_pic_num[LIST_0][2*i]/2)*2, (fs->bottom_field->ref_pic_num[LIST_0][2*i]/2)*2);
+  }
+
+   //! Use inference flag to remap mvs/references
+
+  //! Generate Frame parameters from field information.
+  for (j=0 ; j<fs->top_field->size_y/4 ; j++)
+  {
+    jj = 8*(j/4) + (j%4);
+    jj4 = jj + 4;
+    for (i=0 ; i<fs->top_field->size_x/4 ; i++)
+    {
+      fs->frame->field_frame[jj][i]= fs->frame->field_frame[jj4][i]=1;
+
+      fs->frame->mv[LIST_0][jj][i][0] = fs->top_field->mv[LIST_0][j][i][0];
+      fs->frame->mv[LIST_0][jj][i][1] = fs->top_field->mv[LIST_0][j][i][1] ;
+      fs->frame->mv[LIST_1][jj][i][0] = fs->top_field->mv[LIST_1][j][i][0];
+      fs->frame->mv[LIST_1][jj][i][1] = fs->top_field->mv[LIST_1][j][i][1] ;
+
+      dummylist0=fs->frame->ref_idx[LIST_0][jj][i]  = fs->top_field->ref_idx[LIST_0][j][i];
+      dummylist1=fs->frame->ref_idx[LIST_1][jj][i]  = fs->top_field->ref_idx[LIST_1][j][i];
+
+      //! association with id already known for fields.
+      fs->top_field->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->top_field->ref_pic_num[LIST_0][dummylist0] : 0;
+      fs->top_field->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->top_field->ref_pic_num[LIST_1][dummylist1] : 0;
+
+      //! need to make association with frames
+      fs->frame->ref_id[LIST_0][jj][i] = (dummylist0>=0)? fs->top_field->frm_ref_pic_num[LIST_0][dummylist0] : 0;
+      fs->frame->ref_id[LIST_1][jj][i] = (dummylist1>=0)? fs->top_field->frm_ref_pic_num[LIST_1][dummylist1] : 0;
+
+      fs->frame->mv[LIST_0][jj4][i][0] = fs->bottom_field->mv[LIST_0][j][i][0];
+      fs->frame->mv[LIST_0][jj4][i][1] = fs->bottom_field->mv[LIST_0][j][i][1] ;
+      fs->frame->mv[LIST_1][jj4][i][0] = fs->bottom_field->mv[LIST_1][j][i][0];
+      fs->frame->mv[LIST_1][jj4][i][1] = fs->bottom_field->mv[LIST_1][j][i][1] ;
+
+      dummylist0=fs->frame->ref_idx[LIST_0][jj4][i]  = fs->bottom_field->ref_idx[LIST_0][j][i];
+      dummylist1=fs->frame->ref_idx[LIST_1][jj4][i]  = fs->bottom_field->ref_idx[LIST_1][j][i];
+
+      fs->bottom_field->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->bottom_field->ref_pic_num[LIST_0][dummylist0] : 0;
+      fs->bottom_field->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->bottom_field->ref_pic_num[LIST_1][dummylist1] : 0;
+
+      //! need to make association with frames
+      fs->frame->ref_id[LIST_0][jj4][i] = (dummylist0>=0)? fs->bottom_field->frm_ref_pic_num[LIST_0][dummylist0] : -1;
+      fs->frame->ref_id[LIST_1][jj4][i] = (dummylist1>=0)? fs->bottom_field->frm_ref_pic_num[LIST_1][dummylist1] : -1;
+
+      fs->top_field->field_frame[j][i]=1;
+      fs->bottom_field->field_frame[j][i]=1;
+    }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate memory for buffering of reference picture reordering commands
+ ************************************************************************
+ */
+void alloc_ref_pic_list_reordering_buffer(Slice *currSlice)
+{
+  int size = img->num_ref_idx_l0_active+1;
+
+  if (img->type!=I_SLICE && img->type!=SI_SLICE)
+  {
+    if ((currSlice->reordering_of_pic_nums_idc_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: remapping_of_pic_nums_idc_l0");
+    if ((currSlice->abs_diff_pic_num_minus1_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: abs_diff_pic_num_minus1_l0");
+    if ((currSlice->long_term_pic_idx_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: long_term_pic_idx_l0");
+  }
+  else
+  {
+    currSlice->reordering_of_pic_nums_idc_l0 = NULL;
+    currSlice->abs_diff_pic_num_minus1_l0 = NULL;
+    currSlice->long_term_pic_idx_l0 = NULL;
+  }
+
+  size = img->num_ref_idx_l1_active+1;
+
+  if (img->type==B_SLICE)
+  {
+    if ((currSlice->reordering_of_pic_nums_idc_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: remapping_of_pic_nums_idc_l1");
+    if ((currSlice->abs_diff_pic_num_minus1_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: abs_diff_pic_num_minus1_l1");
+    if ((currSlice->long_term_pic_idx_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: long_term_pic_idx_l1");
+  }
+  else
+  {
+    currSlice->reordering_of_pic_nums_idc_l1 = NULL;
+    currSlice->abs_diff_pic_num_minus1_l1 = NULL;
+    currSlice->long_term_pic_idx_l1 = NULL;
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Free memory for buffering of reference picture reordering commands
+ ************************************************************************
+ */
+void free_ref_pic_list_reordering_buffer(Slice *currSlice)
+{
+
+  if (currSlice->reordering_of_pic_nums_idc_l0)
+    free(currSlice->reordering_of_pic_nums_idc_l0);
+  if (currSlice->abs_diff_pic_num_minus1_l0)
+    free(currSlice->abs_diff_pic_num_minus1_l0);
+  if (currSlice->long_term_pic_idx_l0)
+    free(currSlice->long_term_pic_idx_l0);
+
+  currSlice->reordering_of_pic_nums_idc_l0 = NULL;
+  currSlice->abs_diff_pic_num_minus1_l0 = NULL;
+  currSlice->long_term_pic_idx_l0 = NULL;
+
+  if (currSlice->reordering_of_pic_nums_idc_l1)
+    free(currSlice->reordering_of_pic_nums_idc_l1);
+  if (currSlice->abs_diff_pic_num_minus1_l1)
+    free(currSlice->abs_diff_pic_num_minus1_l1);
+  if (currSlice->long_term_pic_idx_l1)
+    free(currSlice->long_term_pic_idx_l1);
+
+  currSlice->reordering_of_pic_nums_idc_l1 = NULL;
+  currSlice->abs_diff_pic_num_minus1_l1 = NULL;
+  currSlice->long_term_pic_idx_l1 = NULL;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *      Tian Dong
+ *          June 13, 2002, Modifed on July 30, 2003
+ *
+ *      If a gap in frame_num is found, try to fill the gap
+ * \param img
+ *
+ ************************************************************************
+ */
+void fill_frame_num_gap(ImageParameters *img)
+{
+  int CurrFrameNum;
+  int UnusedShortTermFrameNum;
+  StorablePicture *picture = NULL;
+  int nal_ref_idc_bak;
+  int MaxFrameNum = 1 << (log2_max_frame_num_minus4 + 4);
+
+//  printf("A gap in frame number is found, try to fill it.\n");
+
+  nal_ref_idc_bak = img->nal_reference_idc;
+  img->nal_reference_idc = 1;
+
+  UnusedShortTermFrameNum = (img->pre_frame_num + 1) % MaxFrameNum;
+  CurrFrameNum = img->frame_num;
+
+  while (CurrFrameNum != UnusedShortTermFrameNum)
+  {
+    picture = alloc_storable_picture (FRAME, img->width, img->height, img->width_cr, img->height_cr);
+    picture->coded_frame = 1;
+    picture->pic_num = UnusedShortTermFrameNum;
+    picture->non_existing = 1;
+    picture->is_output = 1;
+
+    img->adaptive_ref_pic_buffering_flag = 0;
+
+    store_picture_in_dpb(picture);
+
+    picture=NULL;
+    UnusedShortTermFrameNum = (UnusedShortTermFrameNum + 1) % MaxFrameNum;
+  }
+
+  img->nal_reference_idc = nal_ref_idc_bak;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate co-located memory
+ *
+ * \param size_x
+ *    horizontal luma size
+ * \param size_y
+ *    vertical luma size
+ * \param mb_adaptive_frame_field_flag
+ *    flag that indicates macroblock adaptive frame/field coding
+ *
+ * \return
+ *    the allocated StorablePicture structure
+ ************************************************************************
+ */
+ColocatedParams* alloc_colocated(int size_x, int size_y, int mb_adaptive_frame_field_flag)
+{
+  ColocatedParams *s;
+
+  s = calloc(1, sizeof(ColocatedParams));
+  if (NULL == s)
+    no_mem_exit("alloc_colocated: s");
+
+  s->size_x = size_x;
+  s->size_y = size_y;
+
+
+  get_mem3D      ((byte****)(&(s->ref_idx))   , 2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+  get_mem3Dint64 (&(s->ref_pic_id), 2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+  get_mem4Dshort (&(s->mv)        , 2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE,2 );
+
+  get_mem2D      (&(s->moving_block),  size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+  get_mem2D      (&(s->field_frame) , size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+
+  if (mb_adaptive_frame_field_flag)
+  {
+    get_mem3D      ((byte****)(&(s->top_ref_idx))   , 2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE);
+    get_mem3Dint64 (&(s->top_ref_pic_id),             2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE);
+    get_mem4Dshort (&(s->top_mv),                     2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE, 2);
+    get_mem2D (&(s->top_moving_block),                   size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE);
+
+    get_mem3D      ((byte****)(&(s->bottom_ref_idx)), 2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE);
+    get_mem3Dint64 (&(s->bottom_ref_pic_id),          2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE);
+    get_mem4Dshort (&(s->bottom_mv),                  2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE, 2);
+    get_mem2D (&(s->bottom_moving_block),                size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE);
+  }
+
+  s->mb_adaptive_frame_field_flag  = mb_adaptive_frame_field_flag;
+
+  return s;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Free co-located memory.
+ *
+ * \param p
+ *    Picture to be freed
+ *
+ ************************************************************************
+ */
+void free_colocated(ColocatedParams* p)
+{
+  if (p)
+  {
+    free_mem3D      ((byte***)p->ref_idx, 2);
+    free_mem3Dint64 (p->ref_pic_id, 2);
+    free_mem4Dshort (p->mv, 2, p->size_y / BLOCK_SIZE);
+
+    if (p->moving_block)
+    {
+      free_mem2D (p->moving_block);
+      p->moving_block=NULL;
+    }
+    if (p->field_frame)
+    {
+      free_mem2D (p->field_frame);
+      p->field_frame=NULL;
+    }
+
+    if (p->mb_adaptive_frame_field_flag)
+    {
+      free_mem3D      ((byte***)p->top_ref_idx, 2);
+      free_mem3Dint64 (p->top_ref_pic_id, 2);
+      free_mem4Dshort (p->top_mv, 2, p->size_y / BLOCK_SIZE / 2);
+
+
+      if (p->top_moving_block)
+      {
+        free_mem2D (p->top_moving_block);
+        p->top_moving_block=NULL;
+      }
+
+      free_mem3D      ((byte***)p->bottom_ref_idx, 2);
+      free_mem3Dint64 (p->bottom_ref_pic_id, 2);
+      free_mem4Dshort (p->bottom_mv, 2, p->size_y / BLOCK_SIZE / 2);
+
+
+      if (p->bottom_moving_block)
+      {
+        free_mem2D (p->bottom_moving_block);
+        p->bottom_moving_block=NULL;
+      }
+
+    }
+
+    free(p);
+
+    p=NULL;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Compute co-located motion info
+ *
+ ************************************************************************
+ */
+
+void compute_colocated(ColocatedParams* p, StorablePicture **listX[6])
+{
+  StorablePicture *fs, *fs_top, *fs_bottom;
+  int i,j, ii, jj, jdiv;
+
+  fs_top = fs_bottom = fs = listX[LIST_1 ][0];
+
+  if (img->MbaffFrameFlag)
+  {
+    fs_top= listX[LIST_1 + 2][0];
+    fs_bottom= listX[LIST_1 + 4][0];
+  }
+  else
+  {
+    if (img->structure!=FRAME)
+    {
+      if ((img->structure != fs->structure) && (fs->coded_frame))
+      {
+        if (img->structure==TOP_FIELD)
+        {
+          fs_top = fs_bottom = fs = listX[LIST_1 ][0]->top_field;
+        }
+        else
+        {
+          fs_top = fs_bottom = fs = listX[LIST_1 ][0]->bottom_field;
+        }
+      }
+    }
+  }
+
+  if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)
+  {
+    for (j=0 ; j<fs->size_y/4 ; j++)
+    {
+      jdiv = j/2;
+      jj = j/2 + 4 * (j/8);
+      for (i=0 ; i<fs->size_x/4 ; i++)
+      {
+
+        if (img->MbaffFrameFlag && fs->field_frame[j][i])
+        {
+          //! Assign frame buffers for field MBs
+          //! Check whether we should use top or bottom field mvs.
+          //! Depending on the assigned poc values.
+
+          if (iabs(enc_picture->poc - fs_bottom->poc) > iabs(enc_picture->poc - fs_top->poc) )
+          {
+            p->mv[LIST_0][j][i][0]    = fs_top->mv[LIST_0][jdiv][i][0];
+            p->mv[LIST_0][j][i][1]    = fs_top->mv[LIST_0][jdiv][i][1] ;
+            p->mv[LIST_1][j][i][0]    = fs_top->mv[LIST_1][jdiv][i][0];
+            p->mv[LIST_1][j][i][1]    = fs_top->mv[LIST_1][jdiv][i][1] ;
+            p->ref_idx[LIST_0][j][i]  = fs_top->ref_idx[LIST_0][jdiv][i];
+            p->ref_idx[LIST_1][j][i]  = fs_top->ref_idx[LIST_1][jdiv][i];
+            p->ref_pic_id[LIST_0][j][i]   = fs->ref_id[LIST_0][jj][i];
+            p->ref_pic_id[LIST_1][j][i]   = fs->ref_id[LIST_1][jj][i];
+
+            p->is_long_term             = fs_top->is_long_term;
+          }
+          else
+          {
+            p->mv[LIST_0][j][i][0]      = fs_bottom->mv[LIST_0][jdiv][i][0];
+            p->mv[LIST_0][j][i][1]      = fs_bottom->mv[LIST_0][jdiv][i][1] ;
+            p->mv[LIST_1][j][i][0]      = fs_bottom->mv[LIST_1][jdiv][i][0];
+            p->mv[LIST_1][j][i][1]      = fs_bottom->mv[LIST_1][jdiv][i][1] ;
+            p->ref_idx[LIST_0][j][i]    = fs_bottom->ref_idx[LIST_0][jdiv][i];
+            p->ref_idx[LIST_1][j][i]    = fs_bottom->ref_idx[LIST_1][jdiv][i];
+            p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][jj + 4][i];
+            p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][jj + 4][i];
+
+            p->is_long_term             = fs_bottom->is_long_term;
+          }
+        }
+        else
+        {
+          p->mv[LIST_0][j][i][0]      = fs->mv[LIST_0][j][i][0];
+          p->mv[LIST_0][j][i][1]      = fs->mv[LIST_0][j][i][1] ;
+          p->mv[LIST_1][j][i][0]      = fs->mv[LIST_1][j][i][0];
+          p->mv[LIST_1][j][i][1]      = fs->mv[LIST_1][j][i][1] ;
+          p->ref_idx[LIST_0][j][i]    = fs->ref_idx[LIST_0][j][i];
+          p->ref_idx[LIST_1][j][i]    = fs->ref_idx[LIST_1][j][i];
+          p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][j][i];
+          p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][j][i];
+
+          p->is_long_term             = fs->is_long_term;
+        }
+      }
+    }
+  }
+
+
+  //! Generate field MVs from Frame MVs
+  if (img->structure || img->MbaffFrameFlag)
+  {
+    for (j=0 ; j<fs->size_y/8 ; j++)
+    {
+      jj = RSD(j);
+      for (i=0 ; i<fs->size_x/4 ; i++)
+      {
+        ii = RSD(i);
+        //! Do nothing if macroblock as field coded in MB-AFF
+        if (!img->MbaffFrameFlag )
+        {
+          p->mv[LIST_0][j][i][0] = fs->mv[LIST_0][jj][ii][0];
+          p->mv[LIST_0][j][i][1] = fs->mv[LIST_0][jj][ii][1];
+          p->mv[LIST_1][j][i][0] = fs->mv[LIST_1][jj][ii][0];
+          p->mv[LIST_1][j][i][1] = fs->mv[LIST_1][jj][ii][1];
+
+          // Scaling of references is done here since it will not affect spatial direct (2*0 =0)
+
+          if (fs->ref_idx[LIST_0][jj][ii] == -1)
+          {
+            p->ref_idx   [LIST_0][j][i] = -1;
+            p->ref_pic_id[LIST_0][j][i] = -1;
+          }
+          else
+          {
+            p->ref_idx   [LIST_0][j][i] = fs->ref_idx[LIST_0][jj][ii] ;
+            p->ref_pic_id[LIST_0][j][i] = fs->ref_id [LIST_0][jj][ii];
+          }
+
+          if (fs->ref_idx[LIST_1][jj][ii] == -1)
+          {
+            p->ref_idx   [LIST_1][j][i] = -1;
+            p->ref_pic_id[LIST_1][j][i] = -1;
+          }
+          else
+          {
+            p->ref_idx   [LIST_1][j][i] = fs->ref_idx[LIST_1][jj][ii];
+            p->ref_pic_id[LIST_1][j][i] = fs->ref_id [LIST_1][jj][ii];
+          }
+
+          p->is_long_term = fs->is_long_term;
+
+          if (img->direct_spatial_mv_pred_flag == 1)
+          {
+            p->moving_block[j][i] =
+              !((!p->is_long_term
+              && ((p->ref_idx[LIST_0][j][i] == 0)
+              &&  (iabs(p->mv[LIST_0][j][i][0])>>1 == 0)
+              &&  (iabs(p->mv[LIST_0][j][i][1])>>1 == 0)))
+              || ((p->ref_idx[LIST_0][j][i] == -1)
+              &&  (p->ref_idx[LIST_1][j][i] == 0)
+              &&  (iabs(p->mv[LIST_1][j][i][0])>>1 == 0)
+              &&  (iabs(p->mv[LIST_1][j][i][1])>>1 == 0)));
+          }
+        }
+        else
+        {
+          p->bottom_mv[LIST_0][j][i][0] = fs_bottom->mv[LIST_0][jj][ii][0];
+          p->bottom_mv[LIST_0][j][i][1] = fs_bottom->mv[LIST_0][jj][ii][1];
+          p->bottom_mv[LIST_1][j][i][0] = fs_bottom->mv[LIST_1][jj][ii][0];
+          p->bottom_mv[LIST_1][j][i][1] = fs_bottom->mv[LIST_1][jj][ii][1];
+          p->bottom_ref_idx[LIST_0][j][i] = fs_bottom->ref_idx[LIST_0][jj][ii];
+          p->bottom_ref_idx[LIST_1][j][i] = fs_bottom->ref_idx[LIST_1][jj][ii];
+          p->bottom_ref_pic_id[LIST_0][j][i] = fs_bottom->ref_id[LIST_0][jj][ii];
+          p->bottom_ref_pic_id[LIST_1][j][i] = fs_bottom->ref_id[LIST_1][jj][ii];
+
+          if (img->direct_spatial_mv_pred_flag == 1)
+          {
+            p->bottom_moving_block[j][i] =
+              !((!fs_bottom->is_long_term
+              && ((p->bottom_ref_idx[LIST_0][j][i] == 0)
+              &&  (iabs(p->bottom_mv[LIST_0][j][i][0])>>1 == 0)
+              &&  (iabs(p->bottom_mv[LIST_0][j][i][1])>>1 == 0)))
+              || ((p->bottom_ref_idx[LIST_0][j][i] == -1)
+              &&  (p->bottom_ref_idx[LIST_1][j][i] == 0)
+              &&  (iabs(p->bottom_mv[LIST_1][j][i][0])>>1 == 0)
+              &&  (iabs(p->bottom_mv[LIST_1][j][i][1])>>1 == 0)));
+          }
+
+          p->top_mv[LIST_0][j][i][0] = fs_top->mv[LIST_0][jj][ii][0];
+          p->top_mv[LIST_0][j][i][1] = fs_top->mv[LIST_0][jj][ii][1];
+          p->top_mv[LIST_1][j][i][0] = fs_top->mv[LIST_1][jj][ii][0];
+          p->top_mv[LIST_1][j][i][1] = fs_top->mv[LIST_1][jj][ii][1];
+          p->top_ref_idx[LIST_0][j][i] = fs_top->ref_idx[LIST_0][jj][ii];
+          p->top_ref_idx[LIST_1][j][i] = fs_top->ref_idx[LIST_1][jj][ii];
+          p->top_ref_pic_id[LIST_0][j][i] = fs_top->ref_id[LIST_0][jj][ii];
+          p->top_ref_pic_id[LIST_1][j][i] = fs_top->ref_id[LIST_1][jj][ii];
+
+          if (img->direct_spatial_mv_pred_flag == 1)
+          {
+            p->top_moving_block[j][i] =
+              !((!fs_top->is_long_term
+              && ((p->top_ref_idx[LIST_0][j][i] == 0)
+              &&  (iabs(p->top_mv[LIST_0][j][i][0])>>1 == 0)
+              &&  (iabs(p->top_mv[LIST_0][j][i][1])>>1 == 0)))
+              || ((p->top_ref_idx[LIST_0][j][i] == -1)
+              &&  (p->top_ref_idx[LIST_1][j][i] == 0)
+              &&  (iabs(p->top_mv[LIST_1][j][i][0])>>1 == 0)
+              &&  (iabs(p->top_mv[LIST_1][j][i][1])>>1 == 0)));
+          }
+
+          if ((img->direct_spatial_mv_pred_flag == 0 ) && !fs->field_frame[2*j][i])
+          {
+            p->top_mv[LIST_0][j][i][1] /= 2;
+            p->top_mv[LIST_1][j][i][1] /= 2;
+            p->bottom_mv[LIST_0][j][i][1] /= 2;
+            p->bottom_mv[LIST_1][j][i][1] /= 2;
+          }
+
+        }
+      }
+    }
+  }
+
+
+  if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)
+  {
+    //! Use inference flag to remap mvs/references
+    //! Frame with field co-located
+
+    if (!img->structure)
+    {
+      for (j=0 ; j < (fs->size_y>>2) ; j++)
+      {
+        jdiv = j>>1;
+        jj = (j>>1) + 4*(j>>3);
+        for (i=0 ; i < (fs->size_x>>2) ; i++)
+        {
+
+          if (fs->field_frame[j][i])
+          {
+            if (iabs(enc_picture->poc - fs->bottom_field->poc) > iabs(enc_picture->poc - fs->top_field->poc))
+            {
+              p->mv[LIST_0][j][i][0] = fs->top_field->mv[LIST_0][jdiv][i][0];
+              p->mv[LIST_0][j][i][1] = fs->top_field->mv[LIST_0][jdiv][i][1] ;
+              p->mv[LIST_1][j][i][0] = fs->top_field->mv[LIST_1][jdiv][i][0];
+              p->mv[LIST_1][j][i][1] = fs->top_field->mv[LIST_1][jdiv][i][1] ;
+
+              p->ref_idx[LIST_0][j][i]  = fs->top_field->ref_idx[LIST_0][jdiv][i];
+              p->ref_idx[LIST_1][j][i]  = fs->top_field->ref_idx[LIST_1][jdiv][i];
+              p->ref_pic_id[LIST_0][j][i]   = fs->ref_id[LIST_0][jj][i];
+              p->ref_pic_id[LIST_1][j][i]   = fs->ref_id[LIST_1][jj][i];
+              p->is_long_term               = fs->top_field->is_long_term;
+            }
+            else
+            {
+              p->mv[LIST_0][j][i][0] = fs->bottom_field->mv[LIST_0][jdiv][i][0];
+              p->mv[LIST_0][j][i][1] = fs->bottom_field->mv[LIST_0][jdiv][i][1] ;
+              p->mv[LIST_1][j][i][0] = fs->bottom_field->mv[LIST_1][jdiv][i][0];
+              p->mv[LIST_1][j][i][1] = fs->bottom_field->mv[LIST_1][jdiv][i][1] ;
+
+              p->ref_idx[LIST_0][j][i]  = fs->bottom_field->ref_idx[LIST_0][jdiv][i];
+              p->ref_idx[LIST_1][j][i]  = fs->bottom_field->ref_idx[LIST_1][jdiv][i];
+              p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][jj + 4][i];
+              p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][jj + 4][i];
+              p->is_long_term             = fs->bottom_field->is_long_term;
+            }
+          }
+        }
+      }
+    }
+  }
+
+
+  p->is_long_term = fs->is_long_term;
+
+  if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)
+  {
+    for (j=0 ; j < (fs->size_y>>2) ; j++)
+    {
+      jj = RSD(j);
+      for (i=0 ; i < (fs->size_x>>2) ; i++)
+      {
+        ii = RSD(i);
+
+        p->mv[LIST_0][j][i][0]=p->mv[LIST_0][jj][ii][0];
+        p->mv[LIST_0][j][i][1]=p->mv[LIST_0][jj][ii][1];
+        p->mv[LIST_1][j][i][0]=p->mv[LIST_1][jj][ii][0];
+        p->mv[LIST_1][j][i][1]=p->mv[LIST_1][jj][ii][1];
+
+        p->ref_idx[LIST_0][j][i]=p->ref_idx[LIST_0][jj][ii];
+        p->ref_idx[LIST_1][j][i]=p->ref_idx[LIST_1][jj][ii];
+        p->ref_pic_id[LIST_0][j][i] = p->ref_pic_id[LIST_0][jj][ii];
+        p->ref_pic_id[LIST_1][j][i] = p->ref_pic_id[LIST_1][jj][ii];
+
+        if (img->direct_spatial_mv_pred_flag == 1)
+        {
+          p->moving_block[j][i]=
+            !((!p->is_long_term
+            && ((p->ref_idx[LIST_0][j][i] == 0)
+            &&  (iabs(p->mv[LIST_0][j][i][0])>>1 == 0)
+            &&  (iabs(p->mv[LIST_0][j][i][1])>>1 == 0)))
+            || ((p->ref_idx[LIST_0][j][i] == -1)
+            &&  (p->ref_idx[LIST_1][j][i] == 0)
+            &&  (iabs(p->mv[LIST_1][j][i][0])>>1 == 0)
+            &&  (iabs(p->mv[LIST_1][j][i][1])>>1 == 0)));
+        }
+      }
+    }
+  }
+  else
+  {
+    for (j=0 ; j<fs->size_y/4 ; j++)
+    {
+      jj = RSD(j);
+      for (i=0 ; i<fs->size_x/4 ; i++)
+      {
+        ii = RSD(i);
+        //! Use inference flag to remap mvs/references
+        p->mv[LIST_0][j][i][0]=fs->mv[LIST_0][j][i][0];
+        p->mv[LIST_0][j][i][1]=fs->mv[LIST_0][j][i][1];
+        p->mv[LIST_1][j][i][0]=fs->mv[LIST_1][j][i][0];
+        p->mv[LIST_1][j][i][1]=fs->mv[LIST_1][j][i][1];
+
+        p->ref_idx[LIST_0][j][i]=fs->ref_idx[LIST_0][j][i];
+        p->ref_idx[LIST_1][j][i]=fs->ref_idx[LIST_1][j][i];
+        p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][j][i];
+        p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][j][i];
+
+        if (img->direct_spatial_mv_pred_flag == 1)
+        {
+          p->moving_block[j][i]=
+            !((!p->is_long_term
+            && ((p->ref_idx[LIST_0][j][i] == 0)
+            &&  (iabs(p->mv[LIST_0][j][i][0])>>1 == 0)
+            &&  (iabs(p->mv[LIST_0][j][i][1])>>1 == 0)))
+            || ((p->ref_idx[LIST_0][j][i] == -1)
+            &&  (p->ref_idx[LIST_1][j][i] == 0)
+            &&  (iabs(p->mv[LIST_1][j][i][0])>>1 == 0)
+            &&  (iabs(p->mv[LIST_1][j][i][1])>>1 == 0)));
+        }
+      }
+    }
+  }
+
+
+  if (img->direct_spatial_mv_pred_flag ==0)
+  {
+    for (j=0 ; j<fs->size_y/4 ; j++)
+    {
+      for (i=0 ; i<fs->size_x/4 ; i++)
+      {
+        if ((!img->MbaffFrameFlag &&!img->structure && fs->field_frame[j][i]) || (img->MbaffFrameFlag && fs->field_frame[j][i]))
+        {
+          p->mv[LIST_0][j][i][1] *= 2;
+          p->mv[LIST_1][j][i][1] *= 2;
+        }
+        else  if (img->structure && !fs->field_frame[j][i])
+        {
+          p->mv[LIST_0][j][i][1] /= 2;
+          p->mv[LIST_1][j][i][1] /= 2;
+        }
+
+      }
+    }
+
+    for (j=0; j<2 + (img->MbaffFrameFlag * 4);j+=2)
+    {
+      for (i=0; i<listXsize[j];i++)
+      {
+        int prescale, iTRb, iTRp;
+
+        if (j==0)
+        {
+          iTRb = iClip3( -128, 127, enc_picture->poc - listX[LIST_0 + j][i]->poc );
+        }
+        else if (j == 2)
+        {
+          iTRb = iClip3( -128, 127, enc_picture->top_poc - listX[LIST_0 + j][i]->poc );
+        }
+        else
+        {
+          iTRb = iClip3( -128, 127, enc_picture->bottom_poc - listX[LIST_0 + j][i]->poc );
+        }
+
+        iTRp = iClip3( -128, 127,  listX[LIST_1 + j][0]->poc - listX[LIST_0 + j][i]->poc);
+
+        if (iTRp!=0)
+        {
+          prescale = ( 16384 + iabs( iTRp / 2 ) ) / iTRp;
+          img->mvscale[j][i] = iClip3( -1024, 1023, ( iTRb * prescale + 32 ) >> 6 ) ;
+        }
+        else
+        {
+          img->mvscale[j][i] = 9999;
+        }
+      }
+    }
+  }
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/mbuffer.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/mbuffer.h:1.3
--- /dev/null	Sun Feb  4 08:38:54 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/mbuffer.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,194 @@
+
+/*!
+ ***********************************************************************
+ *  \file
+ *      mbuffer.h
+ *
+ *  \brief
+ *      Frame buffer functions
+ *
+ *  \author
+ *      Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Karsten Sühring          <suehring at hhi.de>
+ ***********************************************************************
+ */
+#ifndef _MBUFFER_H_
+#define _MBUFFER_H_
+
+#define MAX_LIST_SIZE 33
+
+//! definition a picture (field or frame)
+typedef struct storable_picture
+{
+  PictureStructure structure;
+
+  int         poc;
+  int         top_poc;
+  int         bottom_poc;
+  int         frame_poc;
+  int         order_num;
+  int64       ref_pic_num[6][MAX_LIST_SIZE];
+  int64       frm_ref_pic_num[6][MAX_LIST_SIZE];
+  int64       top_ref_pic_num[6][MAX_LIST_SIZE];
+  int64       bottom_ref_pic_num[6][MAX_LIST_SIZE];
+  unsigned    frame_num;
+  int         pic_num;
+  int         long_term_pic_num;
+  int         long_term_frame_idx;
+
+  int         is_long_term;
+  int         used_for_reference;
+  int         is_output;
+  int         non_existing;
+
+  int         size_x, size_y, size_x_cr, size_y_cr;
+  int         size_x_pad, size_y_pad;
+  int         size_x_cr_pad, size_y_cr_pad;
+  int         chroma_vector_adjustment;
+  int         coded_frame;
+  int         MbaffFrameFlag;
+
+  imgpel **   imgY;          //!< Y picture component
+  imgpel ****   imgY_sub;      //!< Y picture component upsampled (Quarter pel)
+  imgpel ****   imgY_sub_w;    //!< Y picture component upsampled (Quarter pel) for weighted prediction
+  imgpel *****  imgUV_sub;      //!< UV picture component upsampled (Quarter/One-Eighth pel)
+  imgpel ***  imgUV;         //!< U and V picture components
+
+  byte *      mb_field;      //!< field macroblock indicator
+
+  char  ***   ref_idx;       //!< reference picture   [list][subblock_y][subblock_x]
+
+  int64 ***   ref_pic_id;    //!< reference picture identifier [list][subblock_y][subblock_x]
+                             //   (not  simply index)
+
+  int64 ***   ref_id;        //!< reference picture identifier [list][subblock_y][subblock_x]
+                             //   (not  simply index)
+
+  short ****  mv;            //!< motion vector       [list][subblock_x][subblock_y][component]
+
+  byte **     moving_block;
+  byte **     field_frame;         //!< indicates if co_located is field or frame.
+
+  struct storable_picture *top_field;     // for mb aff, if frame for referencing the top field
+  struct storable_picture *bottom_field;  // for mb aff, if frame for referencing the bottom field
+  struct storable_picture *frame;         // for mb aff, if field for referencing the combined frame
+
+  int         chroma_format_idc;
+  int         frame_mbs_only_flag;
+  int         frame_cropping_flag;
+  int         frame_cropping_rect_left_offset;
+  int         frame_cropping_rect_right_offset;
+  int         frame_cropping_rect_top_offset;
+  int         frame_cropping_rect_bottom_offset;
+} StorablePicture;
+
+
+//! definition a picture (field or frame)
+typedef struct colocated_params
+{
+  int         mb_adaptive_frame_field_flag;
+  int         size_x, size_y;
+
+  int64       ref_pic_num[6][MAX_LIST_SIZE];
+
+  char  ***   ref_idx;       //!< reference picture   [list][subblock_y][subblock_x]
+  int64 ***   ref_pic_id;    //!< reference picture identifier [list][subblock_y][subblock_x]
+  short ****  mv;            //!< motion vector       [list][subblock_x][subblock_y][component]
+  byte **     moving_block;
+
+  // Top field params
+  int64       top_ref_pic_num[6][MAX_LIST_SIZE];
+  char  ***   top_ref_idx;       //!< reference picture   [list][subblock_y][subblock_x]
+  int64 ***   top_ref_pic_id;    //!< reference picture identifier [list][subblock_y][subblock_x]
+  short ****  top_mv;            //!< motion vector       [list][subblock_x][subblock_y][component]
+  byte **     top_moving_block;
+
+  // Bottom field params
+  int64       bottom_ref_pic_num[6][MAX_LIST_SIZE];
+  char  ***   bottom_ref_idx;       //!< reference picture   [list][subblock_y][subblock_x]
+  int64 ***   bottom_ref_pic_id;    //!< reference picture identifier [list][subblock_y][subblock_x]
+  short ****  bottom_mv;            //!< motion vector       [list][subblock_x][subblock_y][component]
+  byte **     bottom_moving_block;
+
+  byte        is_long_term;
+  byte **     field_frame;         //!< indicates if co_located is field or frame.
+
+} ColocatedParams;
+
+//! Frame Stores for Decoded Picture Buffer
+typedef struct frame_store
+{
+  int       is_used;                //!< 0=empty; 1=top; 2=bottom; 3=both fields (or frame)
+  int       is_reference;           //!< 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used
+  int       is_long_term;           //!< 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used
+  int       is_orig_reference;      //!< original marking by nal_ref_idc: 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used
+
+  int       is_non_existent;
+
+  unsigned  frame_num;
+  int       frame_num_wrap;
+  int       long_term_frame_idx;
+  int       is_output;
+  int       poc;
+
+  StorablePicture *frame;
+  StorablePicture *top_field;
+  StorablePicture *bottom_field;
+
+} FrameStore;
+
+
+//! Decoded Picture Buffer
+typedef struct decoded_picture_buffer
+{
+  FrameStore  **fs;
+  FrameStore  **fs_ref;
+  FrameStore  **fs_ltref;
+  unsigned      size;
+  unsigned      used_size;
+  unsigned      ref_frames_in_buffer;
+  unsigned      ltref_frames_in_buffer;
+  int           last_output_poc;
+  int           max_long_term_pic_idx;
+
+  int           init_done;
+
+  FrameStore   *last_picture;
+} DecodedPictureBuffer;
+
+
+extern DecodedPictureBuffer dpb;
+extern StorablePicture **listX[6];
+extern int listXsize[6];
+
+void             init_dpb(void);
+void             free_dpb(void);
+FrameStore*      alloc_frame_store(void);
+void             free_frame_store(FrameStore* f);
+StorablePicture* alloc_storable_picture(PictureStructure type, int size_x, int size_y, int size_x_cr, int size_y_cr);
+void             free_storable_picture(StorablePicture* p);
+void             store_picture_in_dpb(StorablePicture* p);
+void             replace_top_pic_with_frame(StorablePicture* p);
+void             flush_dpb(void);
+
+void             dpb_split_field(FrameStore *fs);
+void             dpb_combine_field(FrameStore *fs);
+void             dpb_combine_field_yuv(FrameStore *fs);
+
+void             init_lists(int currSliceType, PictureStructure currPicStructure);
+void             reorder_ref_pic_list(StorablePicture **list, int *list_size,
+                                      int num_ref_idx_lX_active_minus1, int *reordering_of_pic_nums_idc,
+                                      int *abs_diff_pic_num_minus1, int *long_term_pic_idx);
+
+void             init_mbaff_lists(void);
+void             alloc_ref_pic_list_reordering_buffer(Slice *currSlice);
+void             free_ref_pic_list_reordering_buffer(Slice *currSlice);
+
+void             fill_frame_num_gap(ImageParameters *img);
+
+ColocatedParams* alloc_colocated(int size_x, int size_y,int mb_adaptive_frame_field_flag);
+void free_colocated(ColocatedParams* p);
+void compute_colocated(ColocatedParams* p, StorablePicture **listX[6]);
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/md_high.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/md_high.c:1.1
*** /dev/null	Sun Feb  4 08:38:54 2007
--- llvm-test/MultiSource/Applications/JM/lencod/md_high.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,459 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file md_high.c
+  *
+  * \brief
+  *    Main macroblock mode decision functions and helpers
+  *
+  **************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <math.h>
+ #include <assert.h>
+ #include <limits.h>
+ #include <float.h>
+ #include <memory.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ #include "rdopt_coding_state.h"
+ #include "mb_access.h"
+ #include "intrarefresh.h"
+ #include "image.h"
+ #include "transform8x8.h"
+ #include "ratectl.h"
+ #include "mode_decision.h"
+ #include "fmo.h"
+ #include "me_umhex.h"
+ #include "me_umhexsmp.h"
+ #include "macroblock.h"
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    Mode Decision for a macroblock
+ *************************************************************************************
+ */
+ void encode_one_macroblock_high ()
+ {
+   int         max_index;
+   int         block, index, mode, i, j, ctr16x16, MEPos;
+   char        best_pdir;
+   RD_PARAMS   enc_mb;
+   double      min_rdcost, max_rdcost=1e30;
+   char        best_ref[2] = {0, -1};
+   int         bmcost[5] = {INT_MAX};
+   int         cost=0;
+   int         min_cost = INT_MAX, cost_direct=0, have_direct=0, i16mode=0;
+   int         intra1 = 0;
+   int         cost8x8_direct = 0;
+   short       islice      = (short) (img->type==I_SLICE);
+   short       bslice      = (short) (img->type==B_SLICE);
+   short       pslice      = (short) ((img->type==P_SLICE) || (img->type==SP_SLICE));
+   short       intra       = (short) (islice || (pslice && img->mb_y==img->mb_y_upd && img->mb_y_upd!=img->mb_y_intra));
+   int         lambda_mf[3];
+ 
+   Macroblock* currMB      = &img->mb_data[img->current_mb_nr];
+   int         prev_mb_nr  = FmoGetPreviousMBNr(img->current_mb_nr);
+   Macroblock* prevMB      = (prev_mb_nr >= 0) ? &img->mb_data[prev_mb_nr]:NULL ;
+ 
+   short   min_chroma_pred_mode, max_chroma_pred_mode;
+ 
+   short inter_skip = 0;
+   double min_rate = 0;
+ 
+   if(input->SearchMode == UM_HEX)
+   {
+     UMHEX_decide_intrabk_SAD();
+   }
+   else if (input->SearchMode == UM_HEX_SIMPLE)
+   {
+     smpUMHEX_decide_intrabk_SAD();
+   }
+ 
+   intra |= RandomIntra (img->current_mb_nr);    // Forced Pseudo-Random Intra
+ 
+   //===== Setup Macroblock encoding parameters =====
+   init_enc_mb_params(currMB, &enc_mb, intra, bslice);
+ 
+   // reset chroma intra predictor to default
+   currMB->c_ipred_mode = DC_PRED_8;
+ 
+   //=====   S T O R E   C O D I N G   S T A T E   =====
+   //---------------------------------------------------
+   store_coding_state (cs_cm);
+ 
+   if (!intra)
+   {
+     //===== set direct motion vectors =====
+     best_mode = 1;
+     if (bslice)
+     {
+       Get_Direct_Motion_Vectors ();
+     }
+ 
+     if (input->CtxAdptLagrangeMult == 1)
+     {
+       get_initial_mb16x16_cost();
+     }
+ 
+ 
+     //===== MOTION ESTIMATION FOR 16x16, 16x8, 8x16 BLOCKS =====
+     for (min_cost=INT_MAX, mode=1; mode<4; mode++)
+     {
+       bi_pred_me = 0;
+       img->bi_pred_me[mode]=0;
+       if (enc_mb.valid[mode])
+       {
+         for (cost=0, block=0; block<(mode==1?1:2); block++)
+         {
+           for (MEPos = 0; MEPos < 3; MEPos ++)
+           {
+             lambda_mf[MEPos] = input->CtxAdptLagrangeMult == 0 ? enc_mb.lambda_mf[MEPos] : (int)(enc_mb.lambda_mf[MEPos] * sqrt(lambda_mf_factor));
+           }
+           PartitionMotionSearch (mode, block, lambda_mf);
+ 
+           //--- set 4x4 block indizes (for getting MV) ---
+           j = (block==1 && mode==2 ? 2 : 0);
+           i = (block==1 && mode==3 ? 2 : 0);
+ 
+           //--- get cost and reference frame for List 0 prediction ---
+           bmcost[LIST_0] = INT_MAX;
+           list_prediction_cost(LIST_0, block, mode, enc_mb, bmcost, best_ref);
+ 
+           if (bslice)
+           {
+             //--- get cost and reference frame for List 1 prediction ---
+             bmcost[LIST_1] = INT_MAX;
+             list_prediction_cost(LIST_1, block, mode, enc_mb, bmcost, best_ref);
+ 
+             // Compute bipredictive cost between best list 0 and best list 1 references
+             list_prediction_cost(BI_PRED, block, mode, enc_mb, bmcost, best_ref);
+ 
+             // Finally, if mode 16x16, compute cost for bipredictive ME vectore
+             if (input->BiPredMotionEstimation && mode == 1)
+             {
+               list_prediction_cost(BI_PRED_L0, block, mode, enc_mb, bmcost, 0);
+               list_prediction_cost(BI_PRED_L1, block, mode, enc_mb, bmcost, 0);
+             }
+             else
+             {
+               bmcost[BI_PRED_L0] = INT_MAX;
+               bmcost[BI_PRED_L1] = INT_MAX;
+             }
+ 
+             // Determine prediction list based on mode cost
+             determine_prediction_list(mode, bmcost, best_ref, &best_pdir, &cost, &bi_pred_me);
+           }
+           else // if (bslice)
+           {
+             best_pdir  = 0;
+             cost      += bmcost[LIST_0];
+           }
+ 
+           assign_enc_picture_params(mode, best_pdir, block, enc_mb.list_offset[LIST_0], best_ref[LIST_0], best_ref[LIST_1], bslice);
+ 
+           //----- set reference frame and direction parameters -----
+           if (mode==3)
+           {
+             best8x8fwref [3][block  ] = best8x8fwref [3][  block+2] = best_ref[LIST_0];
+             best8x8pdir  [3][block  ] = best8x8pdir  [3][  block+2] = best_pdir;
+             best8x8bwref [3][block  ] = best8x8bwref [3][  block+2] = best_ref[LIST_1];
+           }
+           else if (mode==2)
+           {
+             best8x8fwref [2][2*block] = best8x8fwref [2][2*block+1] = best_ref[LIST_0];
+             best8x8pdir  [2][2*block] = best8x8pdir  [2][2*block+1] = best_pdir;
+             best8x8bwref [2][2*block] = best8x8bwref [2][2*block+1] = best_ref[LIST_1];
+           }
+           else
+           {
+             memset(&best8x8fwref [1][0], best_ref[LIST_0], 4 * sizeof(char));
+             memset(&best8x8bwref [1][0], best_ref[LIST_1], 4 * sizeof(char));
+             best8x8pdir  [1][0] = best8x8pdir  [1][1] = best8x8pdir  [1][2] = best8x8pdir  [1][3] = best_pdir;
+           }
+ 
+           //--- set reference frames and motion vectors ---
+           if (mode>1 && block==0)
+             SetRefAndMotionVectors (block, mode, best_pdir, best_ref[LIST_0], best_ref[LIST_1]);
+         } // for (block=0; block<(mode==1?1:2); block++)
+ 
+         if (cost < min_cost)
+         {
+           best_mode = mode;
+           min_cost  = cost;
+           if (input->CtxAdptLagrangeMult == 1)
+           {
+             adjust_mb16x16_cost(cost);
+           }
+         }
+       } // if (enc_mb.valid[mode])
+     } // for (mode=1; mode<4; mode++)
+ 
+     if (enc_mb.valid[P8x8])
+     {
+       giRDOpt_B8OnlyFlag = 1;
+ 
+       tr8x8.cost8x8 = INT_MAX;
+       tr4x4.cost8x8 = INT_MAX;
+       //===== store coding state of macroblock =====
+       store_coding_state (cs_mb);
+ 
+       currMB->all_blk_8x8 = -1;
+ 
+       if (input->Transform8x8Mode)
+       {
+         tr8x8.cost8x8 = 0;
+         //===========================================================
+         // Check 8x8 partition with transform size 8x8
+         //===========================================================
+         //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
+         for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block=0; block<4; block++)
+         {
+           submacroblock_mode_decision(enc_mb, &tr8x8, currMB, cofAC_8x8ts[block],
+             &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 1);
+           best8x8mode       [block] = tr8x8.part8x8mode [block];
+           best8x8pdir [P8x8][block] = tr8x8.part8x8pdir [block];
+           best8x8fwref[P8x8][block] = tr8x8.part8x8fwref[block];
+           best8x8bwref[P8x8][block] = tr8x8.part8x8bwref[block];
+         }
+ 
+         // following params could be added in RD_8x8DATA structure
+         cbp8_8x8ts      = cbp8x8;
+         cbp_blk8_8x8ts  = cbp_blk8x8;
+         cnt_nonz8_8x8ts = cnt_nonz_8x8;
+         currMB->luma_transform_size_8x8_flag = 0; //switch to 4x4 transform size
+ 
+         //--- re-set coding state (as it was before 8x8 block coding) ---
+         //reset_coding_state (cs_mb);
+       }// if (input->Transform8x8Mode)
+ 
+ 
+       if (input->Transform8x8Mode != 2)
+       {
+         tr4x4.cost8x8 = 0;
+         //=================================================================
+         // Check 8x8, 8x4, 4x8 and 4x4 partitions with transform size 4x4
+         //=================================================================
+         //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
+         for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block=0; block<4; block++)
+         {
+           submacroblock_mode_decision(enc_mb, &tr4x4, currMB, cofAC8x8[block],
+             &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 0);
+ 
+           best8x8mode       [block] = tr4x4.part8x8mode [block];
+           best8x8pdir [P8x8][block] = tr4x4.part8x8pdir [block];
+           best8x8fwref[P8x8][block] = tr4x4.part8x8fwref[block];
+           best8x8bwref[P8x8][block] = tr4x4.part8x8bwref[block];
+         }
+         //--- re-set coding state (as it was before 8x8 block coding) ---
+         // reset_coding_state (cs_mb);
+       }// if (input->Transform8x8Mode != 2)
+ 
+       //--- re-set coding state (as it was before 8x8 block coding) ---
+       reset_coding_state (cs_mb);
+ 
+ 
+       // This is not enabled yet since mpr has reverse order.
+       if (input->RCEnable)
+         rc_store_diff(img->opix_x,img->opix_y,img->mpr);
+ 
+       //check cost for P8x8 for non-rdopt mode
+       giRDOpt_B8OnlyFlag = 0;
+     }
+     else // if (enc_mb.valid[P8x8])
+     {
+       tr4x4.cost8x8 = INT_MAX;
+     }
+ 
+     // Find a motion vector for the Skip mode
+     if(pslice)
+       FindSkipModeMotionVector ();
+   }
+   else // if (!intra)
+   {
+     min_cost = INT_MAX;
+   }
+ 
+   //========= C H O O S E   B E S T   M A C R O B L O C K   M O D E =========
+   //-------------------------------------------------------------------------
+ 
+   {
+     // store_coding_state (cs_cm);
+     int mb_available_up;
+     int mb_available_left;
+     int mb_available_up_left;
+ 
+     min_rdcost = max_rdcost;
+     max_index = 9;
+ 
+     if (input->BiPredMotionEstimation)
+       img->bi_pred_me[1] =0;
+ 
+     if (img->yuv_format != YUV400)
+     {
+       // precompute all new chroma intra prediction modes
+       IntraChromaPrediction(&mb_available_up, &mb_available_left, &mb_available_up_left);
+ 
+       if (input->FastCrIntraDecision)
+       {
+         IntraChromaRDDecision(enc_mb);
+         min_chroma_pred_mode = (short) currMB->c_ipred_mode;
+         max_chroma_pred_mode = (short) currMB->c_ipred_mode;
+       }
+       else
+       {
+         min_chroma_pred_mode = DC_PRED_8;
+         max_chroma_pred_mode = PLANE_8;
+       }
+     }
+     else
+     {
+       min_chroma_pred_mode = DC_PRED_8;
+       max_chroma_pred_mode = DC_PRED_8;
+     }
+ 
+     for (currMB->c_ipred_mode=min_chroma_pred_mode; currMB->c_ipred_mode<=max_chroma_pred_mode; currMB->c_ipred_mode++)
+     {
+       // bypass if c_ipred_mode is not allowed
+       if ( (img->yuv_format != YUV400) &&
+         (  ((!intra || !input->IntraDisableInterOnly) && input->ChromaIntraDisable == 1 && currMB->c_ipred_mode!=DC_PRED_8)
+         || (currMB->c_ipred_mode == VERT_PRED_8 && !mb_available_up)
+         || (currMB->c_ipred_mode == HOR_PRED_8  && !mb_available_left)
+         || (currMB->c_ipred_mode == PLANE_8     && (!mb_available_left || !mb_available_up || !mb_available_up_left))))
+         continue;
+ 
+       //===== GET BEST MACROBLOCK MODE =====
+       for (ctr16x16=0, index=0; index < max_index; index++)
+       {
+         mode = mb_mode_table[index];
+ 
+         if (img->yuv_format != YUV400)
+         {
+           mode = mb_mode_table[index];
+           i16mode = 0;
+         }
+         //--- for INTER16x16 check all prediction directions ---
+         if (mode==1 && bslice)
+         {
+           best8x8pdir[1][0] = best8x8pdir[1][1] = best8x8pdir[1][2] = best8x8pdir[1][3] = (char) ctr16x16;
+ 
+           if ( (bslice) && (input->BiPredMotionEstimation)
+             && (ctr16x16 == 2 && img->bi_pred_me[mode] < 2 && mode == 1))
+             ctr16x16--;
+           if (ctr16x16 < 2)
+             index--;
+           ctr16x16++;
+         }
+ 
+         // Skip intra modes in inter slices if best inter mode is
+         // a MB partition and cbp is 0.
+         if (input->SkipIntraInInterSlices && !intra && mode >= I16MB
+           && best_mode <=3 && currMB->cbp == 0)
+           continue;
+ 
+         // check if weights are in valid range for biprediction.
+         if (bslice && active_pps->weighted_bipred_idc == 1 && mode < P8x8)
+         {
+           int cur_blk, cur_comp;
+           int weight_sum;
+           Boolean invalid_mode = FALSE;
+           for (cur_blk = 0; cur_blk < 4; cur_blk ++)
+           {
+             if (best8x8pdir[mode][cur_blk] == 2)
+             {
+               for (cur_comp = 0; cur_comp < (active_sps->chroma_format_idc == YUV400 ? 1 : 3) ; cur_comp ++)
+               {
+                 weight_sum =
+                      wbp_weight[0][(int) best8x8fwref[mode][cur_blk]][(int) best8x8bwref[mode][cur_blk]][cur_comp] +
+                      wbp_weight[1][(int) best8x8fwref[mode][cur_blk]][(int) best8x8bwref[mode][cur_blk]][cur_comp];
+ 
+                 if (weight_sum < -128 ||  weight_sum > 127)
+                 {
+                   invalid_mode = TRUE;
+                   break;
+                 }
+               }
+               if (invalid_mode == TRUE)
+                 break;
+             }
+           }
+           if (invalid_mode == TRUE)
+           {
+             if ((input->BiPredMotionEstimation) && ctr16x16 == 2
+               && img->bi_pred_me[mode] < 2 && mode == 1)
+               img->bi_pred_me[mode] = (short) (img->bi_pred_me[mode] + 1);
+             continue;
+           }
+         }
+ 
+         if (enc_mb.valid[mode])
+           compute_mode_RD_cost(mode, currMB, enc_mb, &min_rdcost, &min_rate, i16mode, bslice, &inter_skip);
+ 
+         if ((input->BiPredMotionEstimation) && (bslice) && ctr16x16 == 2
+           && img->bi_pred_me[mode] < 2 && mode == 1 && best8x8pdir[1][0] == 2)
+           img->bi_pred_me[mode] = (short) (img->bi_pred_me[mode] + 1);
+       }// for (ctr16x16=0, index=0; index<max_index; index++)
+     }// for (currMB->c_ipred_mode=DC_PRED_8; currMB->c_ipred_mode<=max_chroma_pred_mode; currMB->c_ipred_mode++)
+ 
+ #ifdef BEST_NZ_COEFF
+     for (j=0;j<4;j++)
+       for (i=0; i<(4+img->num_blk8x8_uv); i++)
+         img->nz_coeff[img->current_mb_nr][j][i] = gaaiMBAFF_NZCoeff[j][i];
+ #endif
+   }
+ 
+   intra1 = IS_INTRA(currMB);
+ 
+   //=====  S E T   F I N A L   M A C R O B L O C K   P A R A M E T E R S ======
+   //---------------------------------------------------------------------------
+ 
+   if (((cbp!=0 || best_mode==I16MB) && (best_mode!=IPCM) ))
+     currMB->prev_cbp = 1;
+   else if ((cbp==0 && !input->RCEnable) || (best_mode==IPCM))
+   {
+     currMB->delta_qp = 0;
+     currMB->qp       = currMB->prev_qp;
+     set_chroma_qp(currMB);
+     img->qp          = currMB->qp;
+     currMB->prev_cbp = 0;
+   }
+   set_stored_macroblock_parameters ();
+ 
+   // Rate control
+   if(input->RCEnable)
+     update_rc(currMB, best_mode);
+ 
+   rdopt->min_rdcost = min_rdcost;
+ 
+   if ( (img->MbaffFrameFlag)
+     && (img->current_mb_nr%2)
+     && (currMB->mb_type ? 0:((bslice) ? !currMB->cbp:1))  // bottom is skip
+     && (prevMB->mb_type ? 0:((bslice) ? !prevMB->cbp:1))
+     && !(field_flag_inference() == enc_mb.curr_mb_field)) // top is skip
+   {
+     rdopt->min_rdcost = 1e30;  // don't allow coding of a MB pair as skip if wrong inference
+   }
+ 
+   //===== Decide if this MB will restrict the reference frames =====
+   if (input->RestrictRef)
+     update_refresh_map(intra, intra1, currMB);
+ 
+   if(input->SearchMode == UM_HEX)
+   {
+     UMHEX_skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
+   }
+   else if(input->SearchMode == UM_HEX_SIMPLE)
+   {
+     smpUMHEX_skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
+   }
+ 
+   //--- constrain intra prediction ---
+   if(input->UseConstrainedIntraPred && (img->type==P_SLICE || img->type==B_SLICE))
+   {
+     img->intra_block[img->current_mb_nr] = IS_INTRA(currMB);
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/md_highfast.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/md_highfast.c:1.1
*** /dev/null	Sun Feb  4 08:38:54 2007
--- llvm-test/MultiSource/Applications/JM/lencod/md_highfast.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,614 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file md_highfast.c
+  *
+  * \brief
+  *    Main macroblock mode decision functions and helpers
+  *
+  **************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <math.h>
+ #include <assert.h>
+ #include <limits.h>
+ #include <float.h>
+ #include <memory.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ #include "rdopt_coding_state.h"
+ #include "mb_access.h"
+ #include "intrarefresh.h"
+ #include "image.h"
+ #include "transform8x8.h"
+ #include "ratectl.h"
+ #include "mode_decision.h"
+ #include "fmo.h"
+ #include "me_umhex.h"
+ #include "me_umhexsmp.h"
+ #include "macroblock.h"
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    Mode Decision for a macroblock
+ *************************************************************************************
+ */
+ void encode_one_macroblock_highfast ()
+ {
+   int max_index;
+ 
+   int         rerun, block, index, mode, i, j, k, ctr16x16, MEPos;
+   char       best_pdir;
+   RD_PARAMS   enc_mb;
+   double      min_rdcost, max_rdcost=1e30;
+   char        best_ref[2] = {0, -1};
+   int         bmcost[5] = {INT_MAX};
+   int         cost=0;
+   int         min_cost = INT_MAX, cost_direct=0, have_direct=0, i16mode=0;
+   int         intra1 = 0;
+   int         lambda_mf[3];
+   int         cost8x8_direct = 0;
+   short       islice      = (img->type==I_SLICE);
+   short       bslice      = (img->type==B_SLICE);
+   short       pslice      = (img->type==P_SLICE) || (img->type==SP_SLICE);
+   short       intra       = (islice || (pslice && img->mb_y==img->mb_y_upd && img->mb_y_upd!=img->mb_y_intra));
+ 
+   short       runs        = 1;
+ 
+   Macroblock* currMB      = &img->mb_data[img->current_mb_nr];
+   int         prev_mb_nr  = FmoGetPreviousMBNr(img->current_mb_nr);
+   Macroblock* prevMB      = (prev_mb_nr >= 0) ? &img->mb_data[prev_mb_nr]:NULL ;
+ 
+   short   *allmvs = img->all_mv[0][0][0][0][0];
+   short   min_chroma_pred_mode, max_chroma_pred_mode;
+ 
+   // Fast Mode Decision
+   short inter_skip = 0, intra_skip = 0;
+   int cost16 = 0, mode16 = 0;
+   double min_rate = 0, RDCost16 = DBL_MAX;
+ 
+ 
+   if(input->SearchMode == UM_HEX)
+   {
+     UMHEX_decide_intrabk_SAD();
+   }
+   else if (input->SearchMode == UM_HEX_SIMPLE)
+   {
+     smpUMHEX_decide_intrabk_SAD();
+   }
+ 
+   intra |= RandomIntra (img->current_mb_nr);    // Forced Pseudo-Random Intra
+ 
+   //===== Setup Macroblock encoding parameters =====
+   init_enc_mb_params(currMB, &enc_mb, intra, bslice);
+ 
+   // Perform multiple encodings if rdopt with losses is enabled
+   for (rerun=0; rerun<runs; rerun++)
+   {
+ 
+     // reset chroma intra predictor to default
+     currMB->c_ipred_mode = DC_PRED_8;
+ 
+     //=====   S T O R E   C O D I N G   S T A T E   =====
+     //---------------------------------------------------
+     store_coding_state (cs_cm);
+ 
+     if (!intra)
+     {
+       //===== set direct motion vectors =====
+       best_mode = 1;
+       if (bslice)
+       {
+         Get_Direct_Motion_Vectors ();
+         if (enc_mb.valid[0])
+         {
+           best_mode = 0;
+           currMB->c_ipred_mode=DC_PRED_8;
+           min_rdcost = max_rdcost;
+           compute_mode_RD_cost(0, currMB, enc_mb, &min_rdcost, &min_rate, i16mode, bslice, &inter_skip);
+         }
+       }
+ 
+       if (input->CtxAdptLagrangeMult == 1)
+       {
+         get_initial_mb16x16_cost();
+       }
+ 
+       //===== MOTION ESTIMATION FOR 16x16, 16x8, 8x16 BLOCKS =====
+       for (min_cost=INT_MAX, mode=1; mode<4; mode++)
+       {
+         bi_pred_me = 0;
+         img->bi_pred_me[mode]=0;
+         if (enc_mb.valid[mode] && !inter_skip)
+         {
+           for (cost=0, block=0; block<(mode==1?1:2); block++)
+           {
+             for (MEPos=0; MEPos < 3; MEPos ++)
+             {
+               lambda_mf[MEPos] = input->CtxAdptLagrangeMult == 0 ? enc_mb.lambda_mf[MEPos] : (int)(enc_mb.lambda_mf[MEPos] * sqrt(lambda_mf_factor));
+             }
+             PartitionMotionSearch (mode, block, lambda_mf);
+ 
+             //--- set 4x4 block indizes (for getting MV) ---
+             j = (block==1 && mode==2 ? 2 : 0);
+             i = (block==1 && mode==3 ? 2 : 0);
+ 
+             //--- get cost and reference frame for List 0 prediction ---
+             bmcost[LIST_0] = INT_MAX;
+             list_prediction_cost(LIST_0, block, mode, enc_mb, bmcost, best_ref);
+ 
+             if (bslice)
+             {
+               //--- get cost and reference frame for List 1 prediction ---
+               bmcost[LIST_1] = INT_MAX;
+               list_prediction_cost(LIST_1, block, mode, enc_mb, bmcost, best_ref);
+ 
+               // Compute bipredictive cost between best list 0 and best list 1 references
+               list_prediction_cost(BI_PRED, block, mode, enc_mb, bmcost, best_ref);
+ 
+               // Finally, if mode 16x16, compute cost for bipredictive ME vectore
+               if (input->BiPredMotionEstimation && mode == 1)
+               {
+                 list_prediction_cost(BI_PRED_L0, block, mode, enc_mb, bmcost, 0);
+                 list_prediction_cost(BI_PRED_L1, block, mode, enc_mb, bmcost, 0);
+               }
+               else
+               {
+                 bmcost[BI_PRED_L0] = INT_MAX;
+                 bmcost[BI_PRED_L1] = INT_MAX;
+               }
+ 
+               // Determine prediction list based on mode cost
+               determine_prediction_list(mode, bmcost, best_ref, &best_pdir, &cost, &bi_pred_me);
+             }
+             else // if (bslice)
+             {
+               best_pdir  = 0;
+               cost      += bmcost[LIST_0];
+             }
+ 
+             assign_enc_picture_params(mode, best_pdir, block, enc_mb.list_offset[LIST_0], best_ref[LIST_0], best_ref[LIST_1], bslice);
+ 
+             //----- set reference frame and direction parameters -----
+             if (mode==3)
+             {
+               best8x8fwref [3][block  ] = best8x8fwref [3][  block+2] = best_ref[LIST_0];
+               best8x8pdir  [3][block  ] = best8x8pdir  [3][  block+2] = best_pdir;
+               best8x8bwref [3][block  ] = best8x8bwref [3][  block+2] = best_ref[LIST_1];
+             }
+             else if (mode==2)
+             {
+               best8x8fwref [2][2*block] = best8x8fwref [2][2*block+1] = best_ref[LIST_0];
+               best8x8pdir  [2][2*block] = best8x8pdir  [2][2*block+1] = best_pdir;
+               best8x8bwref [2][2*block] = best8x8bwref [2][2*block+1] = best_ref[LIST_1];
+             }
+             else
+             {
+               memset(&best8x8fwref [1][0], best_ref[LIST_0], 4 * sizeof(char));
+               memset(&best8x8bwref [1][0], best_ref[LIST_1], 4 * sizeof(char));
+               best8x8pdir  [1][0] = best8x8pdir  [1][1] = best8x8pdir  [1][2] = best8x8pdir  [1][3] = best_pdir;
+             }
+ 
+             //--- set reference frames and motion vectors ---
+             if (mode>1 && block==0)
+               SetRefAndMotionVectors (block, mode, best_pdir, best_ref[LIST_0], best_ref[LIST_1]);
+           } // for (block=0; block<(mode==1?1:2); block++)
+ 
+ 
+           if(mode == 1)
+           {
+             if(pslice)
+               min_rdcost = max_rdcost;
+ 
+             //=====   S T O R E   C O D I N G   S T A T E   =====
+             //---------------------------------------------------
+             //store_coding_state (cs_cm);
+ 
+             for (ctr16x16=0, k=0; k<1; k++)
+             {
+               i16mode = 0;
+ 
+               //--- for INTER16x16 check all prediction directions ---
+               if (bslice)
+               {
+                 best8x8pdir[1][0] = best8x8pdir[1][1] = best8x8pdir[1][2] = best8x8pdir[1][3] = ctr16x16;
+ 
+                 if ( (bslice) && (input->BiPredMotionEstimation)
+                   && (ctr16x16 == 2 && img->bi_pred_me[mode] < 2 && mode == 1))
+                   ctr16x16--;
+                 if (ctr16x16 < 2)
+                   index--;
+                 ctr16x16++;
+               }
+ 
+               currMB->c_ipred_mode=DC_PRED_8;
+               compute_mode_RD_cost(mode, currMB, enc_mb, &min_rdcost, &min_rate, i16mode, bslice, &inter_skip);
+ 
+               if ((input->BiPredMotionEstimation) && (bslice) && ctr16x16 == 2
+                 && img->bi_pred_me[mode] < 2 && mode == 1 && best8x8pdir[1][0] == 2)
+                 img->bi_pred_me[mode] = img->bi_pred_me[mode] + 1;
+             } // for (ctr16x16=0, k=0; k<1; k++)
+ 
+             if(pslice)
+             {
+               // Get SKIP motion vector and compare SKIP_MV with best motion vector of 16x16
+               FindSkipModeMotionVector ();
+               if(input->EarlySkipEnable)
+               {
+                 //===== check for SKIP mode =====
+                 if ( currMB->cbp==0 && enc_picture->ref_idx[LIST_0][img->block_y][img->block_x]==0 &&
+                   enc_picture->mv[LIST_0][img->block_y][img->block_x][0]==allmvs[0] &&
+                   enc_picture->mv[LIST_0][img->block_y][img->block_x][1]==allmvs[1]               )
+                 {
+                   inter_skip = 1;
+                   best_mode = 0;
+                 }
+               } // if(input->EarlySkipEnable)
+             }
+ 
+             // store variables.
+             RDCost16 = min_rdcost;
+             mode16 = best_mode;
+             cost16 = cost;
+           } // if(mode == 1)
+ 
+           if ((!inter_skip) && (cost < min_cost))
+           {
+             best_mode = mode;
+             min_cost  = cost;
+ 
+             if (input->CtxAdptLagrangeMult == 1)
+             {
+               adjust_mb16x16_cost(cost);
+             }
+           }
+         } // if (enc_mb.valid[mode])
+       } // for (mode=1; mode<4; mode++)
+ 
+       if ((!inter_skip) && enc_mb.valid[P8x8])
+       {
+         giRDOpt_B8OnlyFlag = 1;
+ 
+         tr8x8.cost8x8 = INT_MAX;
+         tr4x4.cost8x8 = INT_MAX;
+         //===== store coding state of macroblock =====
+         store_coding_state (cs_mb);
+ 
+         currMB->all_blk_8x8 = -1;
+ 
+         if (input->Transform8x8Mode)
+         {
+           tr8x8.cost8x8 = 0;
+           //===========================================================
+           // Check 8x8 partition with transform size 8x8
+           //===========================================================
+           //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
+           for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block=0; block<4; block++)
+           {
+             submacroblock_mode_decision(enc_mb, &tr8x8, currMB, cofAC_8x8ts[block],
+               &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 1);
+             best8x8mode       [block] = tr8x8.part8x8mode [block];
+             best8x8pdir [P8x8][block] = tr8x8.part8x8pdir [block];
+             best8x8fwref[P8x8][block] = tr8x8.part8x8fwref[block];
+             best8x8bwref[P8x8][block] = tr8x8.part8x8bwref[block];
+           }
+ 
+           // following params could be added in RD_8x8DATA structure
+           cbp8_8x8ts      = cbp8x8;
+           cbp_blk8_8x8ts  = cbp_blk8x8;
+           cnt_nonz8_8x8ts = cnt_nonz_8x8;
+           currMB->luma_transform_size_8x8_flag = 0; //switch to 4x4 transform size
+ 
+           //--- re-set coding state (as it was before 8x8 block coding) ---
+           //reset_coding_state (cs_mb);
+         }// if (input->Transform8x8Mode)
+ 
+ 
+         if (input->Transform8x8Mode != 2)
+         {
+           tr4x4.cost8x8 = 0;
+           //=================================================================
+           // Check 8x8, 8x4, 4x8 and 4x4 partitions with transform size 4x4
+           //=================================================================
+           //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
+           for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block=0; block<4; block++)
+           {
+             submacroblock_mode_decision(enc_mb, &tr4x4, currMB, cofAC8x8[block],
+               &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 0);
+ 
+             best8x8mode       [block] = tr4x4.part8x8mode [block];
+             best8x8pdir [P8x8][block] = tr4x4.part8x8pdir [block];
+             best8x8fwref[P8x8][block] = tr4x4.part8x8fwref[block];
+             best8x8bwref[P8x8][block] = tr4x4.part8x8bwref[block];
+           }
+           //--- re-set coding state (as it was before 8x8 block coding) ---
+           // reset_coding_state (cs_mb);
+         }// if (input->Transform8x8Mode != 2)
+ 
+         //--- re-set coding state (as it was before 8x8 block coding) ---
+         reset_coding_state (cs_mb);
+ 
+ 
+         // This is not enabled yet since mpr has reverse order.
+         if (input->RCEnable)
+           rc_store_diff(img->opix_x,img->opix_y,img->mpr);
+ 
+         //check cost for P8x8 for non-rdopt mode
+         giRDOpt_B8OnlyFlag = 0;
+       }
+       else // if (enc_mb.valid[P8x8])
+       {
+         tr4x4.cost8x8 = INT_MAX;
+       }
+ 
+     }
+     else // if (!intra)
+     {
+       min_cost = INT_MAX;
+     }
+ 
+     //========= C H O O S E   B E S T   M A C R O B L O C K   M O D E =========
+     //-------------------------------------------------------------------------
+     {
+       // store_coding_state (cs_cm);
+       if (!inter_skip)
+       {
+         int mb_available_up;
+         int mb_available_left;
+         int mb_available_up_left;
+ 
+         if(img->type!=I_SLICE)
+         {
+           min_rdcost = RDCost16;
+           best_mode  = mode16;
+         }
+         else
+           min_rdcost = max_rdcost;
+ 
+         // if Fast High mode, compute  inter modes separate process for inter/intra
+         max_index = ((!intra && input->SelectiveIntraEnable ) ? 5 : 9);
+ 
+         if (input->BiPredMotionEstimation)
+           img->bi_pred_me[1] =0;
+ 
+         if (img->yuv_format != YUV400 && max_index != 5)
+         {
+           // precompute all new chroma intra prediction modes
+           IntraChromaPrediction(&mb_available_up, &mb_available_left, &mb_available_up_left);
+ 
+           if (input->FastCrIntraDecision)
+           {
+             IntraChromaRDDecision(enc_mb);
+             min_chroma_pred_mode = currMB->c_ipred_mode;
+             max_chroma_pred_mode = currMB->c_ipred_mode;
+           }
+           else
+           {
+             min_chroma_pred_mode = DC_PRED_8;
+             max_chroma_pred_mode = PLANE_8;
+           }
+         }
+         else
+         {
+           min_chroma_pred_mode = DC_PRED_8;
+           max_chroma_pred_mode = DC_PRED_8;
+         }
+ 
+         for (currMB->c_ipred_mode=min_chroma_pred_mode; currMB->c_ipred_mode<=max_chroma_pred_mode; currMB->c_ipred_mode++)
+         {
+           // bypass if c_ipred_mode is not allowed
+           if ( (img->yuv_format != YUV400) &&
+             (  ((!intra || !input->IntraDisableInterOnly) && input->ChromaIntraDisable == 1 && currMB->c_ipred_mode!=DC_PRED_8)
+             || (currMB->c_ipred_mode == VERT_PRED_8 && !mb_available_up)
+             || (currMB->c_ipred_mode == HOR_PRED_8  && !mb_available_left)
+             || (currMB->c_ipred_mode == PLANE_8     && (!mb_available_left || !mb_available_up || !mb_available_up_left))))
+             continue;
+ 
+           //===== GET BEST MACROBLOCK MODE =====
+           for (ctr16x16=0, index=0; index < max_index; index++)
+           {
+             mode = mb_mode_table[index];
+ 
+             if (img->yuv_format != YUV400)
+             {
+               {
+                 i16mode = 0;
+                 // RDcost of mode 1 in P-slice and mode 0, 1 in B-slice are already available
+                 if(((bslice && mode == 0) || (!islice && mode == 1)))
+                   continue;
+               }
+             }
+             //--- for INTER16x16 check all prediction directions ---
+             if (mode==1 && bslice)
+             {
+               best8x8pdir[1][0] = best8x8pdir[1][1] = best8x8pdir[1][2] = best8x8pdir[1][3] = ctr16x16;
+ 
+               if ( (bslice) && (input->BiPredMotionEstimation)
+                 && (ctr16x16 == 2 && img->bi_pred_me[mode] < 2 && mode == 1))
+                 ctr16x16--;
+               if (ctr16x16 < 2)
+                 index--;
+               ctr16x16++;
+             }
+ 
+             // Skip intra modes in inter slices if best inter mode is
+             // a MB partition and cbp is 0.
+             if (input->SkipIntraInInterSlices && !intra && mode >= I16MB
+               && best_mode <=3 && currMB->cbp == 0)
+               continue;
+ 
+             if (bslice && active_pps->weighted_bipred_idc == 1 && mode < P8x8)
+             {
+               int cur_blk, cur_comp;
+               int weight_sum;
+               Boolean invalid_mode = FALSE;
+               for (cur_blk = 0; cur_blk < 4; cur_blk ++)
+               {
+                 if (best8x8pdir[mode][cur_blk] == 2)
+                 {
+                   for (cur_comp = 0; cur_comp < (active_sps->chroma_format_idc == YUV400 ? 1 : 3) ; cur_comp ++)
+                   {
+                     weight_sum =
+                       wbp_weight[0][(int) best8x8fwref[mode][cur_blk]][(int) best8x8bwref[mode][cur_blk]][cur_comp] +
+                       wbp_weight[1][(int) best8x8fwref[mode][cur_blk]][(int) best8x8bwref[mode][cur_blk]][cur_comp];
+ 
+                     if (weight_sum < -128 ||  weight_sum > 127)
+                     {
+                       invalid_mode = TRUE;
+                       break;
+                     }
+                   }
+                   if (invalid_mode == TRUE)
+                     break;
+                 }
+               }
+               if (invalid_mode == TRUE)
+               {
+                 if ((input->BiPredMotionEstimation) && ctr16x16 == 2
+                   && img->bi_pred_me[mode] < 2 && mode == 1)
+                   img->bi_pred_me[mode] = img->bi_pred_me[mode] + 1;
+                 continue;
+               }
+             }
+ 
+             if (enc_mb.valid[mode])
+               compute_mode_RD_cost(mode, currMB, enc_mb, &min_rdcost, &min_rate, i16mode, bslice, &inter_skip);
+ 
+             if ((input->BiPredMotionEstimation) && (bslice) && ctr16x16 == 2
+               && img->bi_pred_me[mode] < 2 && mode == 1 && best8x8pdir[1][0] == 2)
+               img->bi_pred_me[mode] = img->bi_pred_me[mode] + 1;
+           }// for (ctr16x16=0, index=0; index<max_index; index++)
+         }// for (currMB->c_ipred_mode=DC_PRED_8; currMB->c_ipred_mode<=max_chroma_pred_mode; currMB->c_ipred_mode++)
+ 
+         // Selective Intra Coding
+         if(img->type!=I_SLICE && input->SelectiveIntraEnable && input->ProfileIDC < FREXT_HP)
+         {
+           fast_mode_intra_decision(&intra_skip, min_rate);
+ 
+           if(!intra_skip)
+           {
+             // precompute all new chroma intra prediction modes
+             if (img->yuv_format != YUV400)
+             {
+               // precompute all new chroma intra prediction modes
+               IntraChromaPrediction(&mb_available_up, &mb_available_left, &mb_available_up_left);
+ 
+               if (input->FastCrIntraDecision)
+               {
+                 IntraChromaRDDecision(enc_mb);
+                 min_chroma_pred_mode = currMB->c_ipred_mode;
+                 max_chroma_pred_mode = currMB->c_ipred_mode;
+               }
+               else
+               {
+                 min_chroma_pred_mode = DC_PRED_8;
+                 max_chroma_pred_mode = PLANE_8;
+               }
+             }
+             else
+             {
+               min_chroma_pred_mode = DC_PRED_8;
+               max_chroma_pred_mode = DC_PRED_8;
+             }
+ 
+             max_index = 9;
+ 
+             for (currMB->c_ipred_mode=min_chroma_pred_mode; currMB->c_ipred_mode<=max_chroma_pred_mode; currMB->c_ipred_mode++)
+             {
+ 
+               // bypass if c_ipred_mode is not allowed
+               if ( (img->yuv_format != YUV400) &&
+                 (  ((!intra || !input->IntraDisableInterOnly) && input->ChromaIntraDisable == 1 && currMB->c_ipred_mode!=DC_PRED_8)
+                 || (currMB->c_ipred_mode == VERT_PRED_8 && !mb_available_up)
+                 || (currMB->c_ipred_mode == HOR_PRED_8  && !mb_available_left)
+                 || (currMB->c_ipred_mode == PLANE_8     && (!mb_available_left || !mb_available_up || !mb_available_up_left))))
+                 continue;
+ 
+               //===== GET BEST MACROBLOCK MODE =====
+               for (index = 5; index < max_index; index++)
+               {
+                 mode = mb_mode_table[index];
+ 
+                 if (input->SkipIntraInInterSlices && !intra && mode >= I16MB
+                   && best_mode <=3 && currMB->cbp == 0)
+                   continue;
+ 
+                 if (img->yuv_format != YUV400)
+                 {
+                   i16mode = 0;
+                   // RDcost of mode 1 in P-slice and mode 0, 1 in B-slice are already available
+                   if(((bslice && mode == 0) || (!islice && mode == 1)))
+                     continue;
+                 }
+ 
+                 if (enc_mb.valid[mode])
+                   compute_mode_RD_cost(mode, currMB, enc_mb, &min_rdcost, &min_rate, i16mode, bslice, &inter_skip);
+               } // for (index = 5; index < max_index; index++)
+             }
+           }
+         }
+       }
+ #ifdef BEST_NZ_COEFF
+       for (j=0;j<4;j++)
+         for (i=0; i<(4+img->num_blk8x8_uv); i++)
+           img->nz_coeff[img->current_mb_nr][j][i] = gaaiMBAFF_NZCoeff[j][i];
+ #endif
+     }
+ 
+     if (rerun==0)
+       intra1 = IS_INTRA(currMB);
+   } // for (rerun=0; rerun<runs; rerun++)
+ 
+   //=====  S E T   F I N A L   M A C R O B L O C K   P A R A M E T E R S ======
+   //---------------------------------------------------------------------------
+   if (((cbp!=0 || best_mode==I16MB) && (best_mode!=IPCM) ))
+     currMB->prev_cbp = 1;
+   else if ((cbp==0 && !input->RCEnable) || (best_mode==IPCM))
+   {
+     currMB->delta_qp = 0;
+     currMB->qp       = currMB->prev_qp;
+     set_chroma_qp(currMB);
+     img->qp          = currMB->qp;
+     currMB->prev_cbp = 0;
+   }
+   set_stored_macroblock_parameters ();
+ 
+ 
+   // Rate control
+   if(input->RCEnable)
+     update_rc(currMB, best_mode);
+ 
+   rdopt->min_rdcost = min_rdcost;
+ 
+   if ( (img->MbaffFrameFlag)
+     && (img->current_mb_nr%2)
+     && (currMB->mb_type ? 0:((bslice) ? !currMB->cbp:1))  // bottom is skip
+     && (prevMB->mb_type ? 0:((bslice) ? !prevMB->cbp:1))
+     && !(field_flag_inference() == enc_mb.curr_mb_field)) // top is skip
+   {
+     rdopt->min_rdcost = 1e30;  // don't allow coding of a MB pair as skip if wrong inference
+   }
+ 
+   //===== Decide if this MB will restrict the reference frames =====
+   if (input->RestrictRef)
+     update_refresh_map(intra, intra1, currMB);
+ 
+   if(input->SearchMode == UM_HEX)
+   {
+     UMHEX_skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
+   }
+   else if(input->SearchMode == UM_HEX_SIMPLE)
+   {
+     smpUMHEX_skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
+   }
+ 
+   //--- constrain intra prediction ---
+   if(input->UseConstrainedIntraPred && (img->type==P_SLICE || img->type==B_SLICE))
+   {
+     img->intra_block[img->current_mb_nr] = IS_INTRA(currMB);
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/md_highloss.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/md_highloss.c:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/md_highloss.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,466 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file md_highloss.c
+  *
+  * \brief
+  *    Main macroblock mode decision functions and helpers
+  *
+  **************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <math.h>
+ #include <assert.h>
+ #include <limits.h>
+ #include <float.h>
+ #include <memory.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ #include "rdopt_coding_state.h"
+ #include "mb_access.h"
+ #include "intrarefresh.h"
+ #include "image.h"
+ #include "transform8x8.h"
+ #include "ratectl.h"
+ #include "mode_decision.h"
+ #include "fmo.h"
+ #include "me_umhex.h"
+ #include "me_umhexsmp.h"
+ #include "macroblock.h"
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    Mode Decision for a macroblock with error resilience
+ *************************************************************************************
+ */
+ void encode_one_macroblock_highloss ()
+ {
+   int max_index;
+ 
+   int         rerun, block, index, mode, i, j, ctr16x16, MEPos;
+   char        best_pdir;
+   RD_PARAMS   enc_mb;
+   double      min_rdcost = 0, max_rdcost=1e30;
+   char        best_ref[2] = {0, -1};
+   int         bmcost[5] = {INT_MAX};
+   int         cost=0;
+   int         min_cost = INT_MAX, cost_direct=0, have_direct=0, i16mode=0;
+   int         intra1 = 0;
+   int         cost8x8_direct = 0;
+   short       islice      = (short) (img->type==I_SLICE);
+   short       bslice      = (short) (img->type==B_SLICE);
+   short       pslice      = (short) ((img->type==P_SLICE) || (img->type==SP_SLICE));
+   short       intra       = (short) (islice || (pslice && img->mb_y==img->mb_y_upd && img->mb_y_upd!=img->mb_y_intra));
+   int         lambda_mf[3];
+   short       runs        = (short) (input->RestrictRef==1 && (pslice  || (bslice && img->nal_reference_idc>0)) ? 2 : 1);
+ 
+   Macroblock* currMB      = &img->mb_data[img->current_mb_nr];
+   int         prev_mb_nr  = FmoGetPreviousMBNr(img->current_mb_nr);
+   Macroblock* prevMB      = (prev_mb_nr >= 0) ? &img->mb_data[prev_mb_nr]:NULL ;
+ 
+   short   min_chroma_pred_mode, max_chroma_pred_mode;
+ 
+   short inter_skip = 0;
+   double min_rate = 0;
+ 
+   if(input->SearchMode == UM_HEX)
+   {
+     UMHEX_decide_intrabk_SAD();
+   }
+   else if (input->SearchMode == UM_HEX_SIMPLE)
+   {
+     smpUMHEX_decide_intrabk_SAD();
+   }
+ 
+   intra |= RandomIntra (img->current_mb_nr);    // Forced Pseudo-Random Intra
+ 
+   //===== Setup Macroblock encoding parameters =====
+   init_enc_mb_params(currMB, &enc_mb, intra, bslice);
+ 
+   // Perform multiple encodings if rdopt with losses is enabled
+   for (rerun=0; rerun<runs; rerun++)
+   {
+     if (runs==2)
+       input->rdopt= (rerun==0) ? 1 : 3;
+ 
+     // reset chroma intra predictor to default
+     currMB->c_ipred_mode = DC_PRED_8;
+ 
+     //=====   S T O R E   C O D I N G   S T A T E   =====
+     //---------------------------------------------------
+     store_coding_state (cs_cm);
+ 
+     if (!intra)
+     {
+       //===== set direct motion vectors =====
+       best_mode = 1;
+       if (bslice)
+       {
+         Get_Direct_Motion_Vectors ();
+       }
+ 
+       if (input->CtxAdptLagrangeMult == 1)
+       {
+         get_initial_mb16x16_cost();
+       }
+ 
+       //===== MOTION ESTIMATION FOR 16x16, 16x8, 8x16 BLOCKS =====
+       for (min_cost=INT_MAX, mode=1; mode<4; mode++)
+       {
+         bi_pred_me = 0;
+         img->bi_pred_me[mode]=0;
+         if (enc_mb.valid[mode])
+         {
+           for (cost=0, block=0; block<(mode==1?1:2); block++)
+           {
+             for (MEPos =0; MEPos <3; MEPos ++)
+             {
+               lambda_mf[MEPos] = input->CtxAdptLagrangeMult == 0 ? enc_mb.lambda_mf[MEPos] : (int)(enc_mb.lambda_mf[MEPos] * sqrt(lambda_mf_factor));
+             }
+             PartitionMotionSearch (mode, block, lambda_mf);
+ 
+             //--- set 4x4 block indizes (for getting MV) ---
+             j = (block==1 && mode==2 ? 2 : 0);
+             i = (block==1 && mode==3 ? 2 : 0);
+ 
+             //--- get cost and reference frame for List 0 prediction ---
+             bmcost[LIST_0] = INT_MAX;
+             list_prediction_cost(LIST_0, block, mode, enc_mb, bmcost, best_ref);
+ 
+             if (bslice)
+             {
+               //--- get cost and reference frame for List 1 prediction ---
+               bmcost[LIST_1] = INT_MAX;
+               list_prediction_cost(LIST_1, block, mode, enc_mb, bmcost, best_ref);
+ 
+               // Compute bipredictive cost between best list 0 and best list 1 references
+               list_prediction_cost(BI_PRED, block, mode, enc_mb, bmcost, best_ref);
+ 
+               // Finally, if mode 16x16, compute cost for bipredictive ME vectore
+               if (input->BiPredMotionEstimation && mode == 1)
+               {
+                 list_prediction_cost(BI_PRED_L0, block, mode, enc_mb, bmcost, 0);
+                 list_prediction_cost(BI_PRED_L1, block, mode, enc_mb, bmcost, 0);
+               }
+               else
+               {
+                 bmcost[BI_PRED_L0] = INT_MAX;
+                 bmcost[BI_PRED_L1] = INT_MAX;
+               }
+ 
+               // Determine prediction list based on mode cost
+               determine_prediction_list(mode, bmcost, best_ref, &best_pdir, &cost, &bi_pred_me);
+             }
+             else // if (bslice)
+             {
+               best_pdir  = 0;
+               cost      += bmcost[LIST_0];
+             }
+ 
+             assign_enc_picture_params(mode, best_pdir, block, enc_mb.list_offset[LIST_0], best_ref[LIST_0], best_ref[LIST_1], bslice);
+ 
+             //----- set reference frame and direction parameters -----
+             if (mode==3)
+             {
+               best8x8fwref [3][block  ] = best8x8fwref [3][  block+2] = best_ref[LIST_0];
+               best8x8pdir  [3][block  ] = best8x8pdir  [3][  block+2] = best_pdir;
+               best8x8bwref [3][block  ] = best8x8bwref [3][  block+2] = best_ref[LIST_1];
+             }
+             else if (mode==2)
+             {
+               best8x8fwref [2][2*block] = best8x8fwref [2][2*block+1] = best_ref[LIST_0];
+               best8x8pdir  [2][2*block] = best8x8pdir  [2][2*block+1] = best_pdir;
+               best8x8bwref [2][2*block] = best8x8bwref [2][2*block+1] = best_ref[LIST_1];
+             }
+             else
+             {
+               memset(&best8x8fwref [1][0], best_ref[LIST_0], 4 * sizeof(char));
+               memset(&best8x8bwref [1][0], best_ref[LIST_1], 4 * sizeof(char));
+               best8x8pdir  [1][0] = best8x8pdir  [1][1] = best8x8pdir  [1][2] = best8x8pdir  [1][3] = best_pdir;
+             }
+ 
+             //--- set reference frames and motion vectors ---
+             if (mode>1 && block==0)
+               SetRefAndMotionVectors (block, mode, best_pdir, best_ref[LIST_0], best_ref[LIST_1]);
+           } // for (block=0; block<(mode==1?1:2); block++)
+ 
+           if (cost < min_cost)
+           {
+             best_mode = (short) mode;
+             min_cost  = cost;
+             if (input->CtxAdptLagrangeMult == 1)
+             {
+               adjust_mb16x16_cost(cost);
+             }
+           }
+         } // if (enc_mb.valid[mode])
+       } // for (mode=1; mode<4; mode++)
+ 
+     if (enc_mb.valid[P8x8])
+       {
+         giRDOpt_B8OnlyFlag = 1;
+ 
+         tr8x8.cost8x8 = INT_MAX;
+         tr4x4.cost8x8 = INT_MAX;
+         //===== store coding state of macroblock =====
+         store_coding_state (cs_mb);
+ 
+         currMB->all_blk_8x8 = -1;
+ 
+         if (input->Transform8x8Mode)
+         {
+           tr8x8.cost8x8 = 0;
+           //===========================================================
+           // Check 8x8 partition with transform size 8x8
+           //===========================================================
+           //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
+           for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block=0; block<4; block++)
+           {
+             submacroblock_mode_decision(enc_mb, &tr8x8, currMB, cofAC_8x8ts[block],
+               &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 1);
+             best8x8mode       [block] = tr8x8.part8x8mode [block];
+             best8x8pdir [P8x8][block] = tr8x8.part8x8pdir [block];
+             best8x8fwref[P8x8][block] = tr8x8.part8x8fwref[block];
+             best8x8bwref[P8x8][block] = tr8x8.part8x8bwref[block];
+           }
+ 
+           // following params could be added in RD_8x8DATA structure
+           cbp8_8x8ts      = cbp8x8;
+           cbp_blk8_8x8ts  = cbp_blk8x8;
+           cnt_nonz8_8x8ts = cnt_nonz_8x8;
+           currMB->luma_transform_size_8x8_flag = 0; //switch to 4x4 transform size
+ 
+           //--- re-set coding state (as it was before 8x8 block coding) ---
+           //reset_coding_state (cs_mb);
+         }// if (input->Transform8x8Mode)
+ 
+ 
+         if (input->Transform8x8Mode != 2)
+         {
+           tr4x4.cost8x8 = 0;
+           //=================================================================
+           // Check 8x8, 8x4, 4x8 and 4x4 partitions with transform size 4x4
+           //=================================================================
+           //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
+           for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block=0; block<4; block++)
+           {
+             submacroblock_mode_decision(enc_mb, &tr4x4, currMB, cofAC8x8[block],
+               &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 0);
+ 
+             best8x8mode       [block] = tr4x4.part8x8mode [block];
+             best8x8pdir [P8x8][block] = tr4x4.part8x8pdir [block];
+             best8x8fwref[P8x8][block] = tr4x4.part8x8fwref[block];
+             best8x8bwref[P8x8][block] = tr4x4.part8x8bwref[block];
+           }
+           //--- re-set coding state (as it was before 8x8 block coding) ---
+           // reset_coding_state (cs_mb);
+         }// if (input->Transform8x8Mode != 2)
+ 
+         //--- re-set coding state (as it was before 8x8 block coding) ---
+         reset_coding_state (cs_mb);
+ 
+ 
+         // This is not enabled yet since mpr has reverse order.
+         if (input->RCEnable)
+           rc_store_diff(img->opix_x,img->opix_y,img->mpr);
+ 
+         //check cost for P8x8 for non-rdopt mode
+         giRDOpt_B8OnlyFlag = 0;
+       }
+       else // if (enc_mb.valid[P8x8])
+       {
+         tr4x4.cost8x8 = INT_MAX;
+       }
+ 
+       // Find a motion vector for the Skip mode
+     if(pslice)
+         FindSkipModeMotionVector ();
+     }
+     else // if (!intra)
+     {
+       min_cost = INT_MAX;
+     }
+ 
+     //========= C H O O S E   B E S T   M A C R O B L O C K   M O D E =========
+     //-------------------------------------------------------------------------
+ 
+    {
+      // store_coding_state (cs_cm);
+      int mb_available_up;
+      int mb_available_left;
+      int mb_available_up_left;
+ 
+      min_rdcost = max_rdcost;
+      max_index = 9;
+ 
+      if (input->BiPredMotionEstimation)
+        img->bi_pred_me[1] =0;
+ 
+      if (img->yuv_format != YUV400)
+      {
+        // precompute all new chroma intra prediction modes
+        IntraChromaPrediction(&mb_available_up, &mb_available_left, &mb_available_up_left);
+ 
+        if (input->FastCrIntraDecision )
+        {
+          IntraChromaRDDecision(enc_mb);
+          min_chroma_pred_mode = currMB->c_ipred_mode;
+          max_chroma_pred_mode = currMB->c_ipred_mode;
+        }
+        else
+        {
+          min_chroma_pred_mode = DC_PRED_8;
+          max_chroma_pred_mode = PLANE_8;
+        }
+      }
+      else
+      {
+        min_chroma_pred_mode = DC_PRED_8;
+        max_chroma_pred_mode = DC_PRED_8;
+      }
+ 
+      for (currMB->c_ipred_mode=min_chroma_pred_mode; currMB->c_ipred_mode<=max_chroma_pred_mode; currMB->c_ipred_mode++)
+      {
+        // bypass if c_ipred_mode is not allowed
+        if ( (img->yuv_format != YUV400) &&
+          (  ((!intra || !input->IntraDisableInterOnly) && input->ChromaIntraDisable == 1 && currMB->c_ipred_mode!=DC_PRED_8)
+          || (currMB->c_ipred_mode == VERT_PRED_8 && !mb_available_up)
+          || (currMB->c_ipred_mode == HOR_PRED_8  && !mb_available_left)
+          || (currMB->c_ipred_mode == PLANE_8     && (!mb_available_left || !mb_available_up || !mb_available_up_left))))
+          continue;
+ 
+        //===== GET BEST MACROBLOCK MODE =====
+        for (ctr16x16=0, index=0; index < max_index; index++)
+        {
+          mode = mb_mode_table[index];
+ 
+          if (img->yuv_format != YUV400)
+          {
+            i16mode = 0;
+          }
+          //--- for INTER16x16 check all prediction directions ---
+          if (mode==1 && bslice)
+          {
+            best8x8pdir[1][0] = best8x8pdir[1][1] = best8x8pdir[1][2] = best8x8pdir[1][3] = (char) ctr16x16;
+ 
+            if ( (bslice) && (input->BiPredMotionEstimation)
+              && (ctr16x16 == 2 && img->bi_pred_me[mode] < 2 && mode == 1))
+              ctr16x16--;
+            if (ctr16x16 < 2)
+              index--;
+            ctr16x16++;
+          }
+ 
+          // Skip intra modes in inter slices if best inter mode is
+          // a MB partition and cbp is 0.
+          if (input->SkipIntraInInterSlices && !intra && mode >= I16MB
+            && best_mode <=3 && currMB->cbp == 0)
+            continue;
+ 
+          if (bslice && active_pps->weighted_bipred_idc == 1 && mode < P8x8)
+          {
+            int cur_blk, cur_comp;
+            int weight_sum;
+            Boolean invalid_mode = FALSE;
+            for (cur_blk = 0; cur_blk < 4; cur_blk ++)
+            {
+              if (best8x8pdir[mode][cur_blk] == 2)
+              {
+                for (cur_comp = 0; cur_comp < (active_sps->chroma_format_idc == YUV400 ? 1 : 3) ; cur_comp ++)
+                {
+                  weight_sum =
+                    wbp_weight[0][(int) best8x8fwref[mode][cur_blk]][(int) best8x8bwref[mode][cur_blk]][cur_comp] +
+                    wbp_weight[1][(int) best8x8fwref[mode][cur_blk]][(int) best8x8bwref[mode][cur_blk]][cur_comp];
+ 
+                  if (weight_sum < -128 ||  weight_sum > 127)
+                  {
+                    invalid_mode = TRUE;
+                    break;
+                  }
+                }
+                if (invalid_mode == TRUE)
+                  break;
+              }
+            }
+            if (invalid_mode == TRUE)
+            {
+              if ((input->BiPredMotionEstimation) && ctr16x16 == 2
+                && img->bi_pred_me[mode] < 2 && mode == 1)
+                img->bi_pred_me[mode] = img->bi_pred_me[mode] + 1;
+              continue;
+            }
+          }
+ 
+          if (enc_mb.valid[mode])
+            compute_mode_RD_cost(mode, currMB, enc_mb, &min_rdcost, &min_rate, i16mode, bslice, &inter_skip);
+ 
+          if ((input->BiPredMotionEstimation) && (bslice) && ctr16x16 == 2
+            && img->bi_pred_me[mode] < 2 && mode == 1 && best8x8pdir[1][0] == 2)
+            img->bi_pred_me[mode] = img->bi_pred_me[mode] + 1;
+        }// for (ctr16x16=0, index=0; index<max_index; index++)
+      }// for (currMB->c_ipred_mode=DC_PRED_8; currMB->c_ipred_mode<=max_chroma_pred_mode; currMB->c_ipred_mode++)
+ 
+ #ifdef BEST_NZ_COEFF
+      for (j=0;j<4;j++)
+        for (i=0; i<(4+img->num_blk8x8_uv); i++)
+          img->nz_coeff[img->current_mb_nr][j][i] = gaaiMBAFF_NZCoeff[j][i];
+ #endif
+    }
+ 
+      if (rerun==0)
+        intra1 = IS_INTRA(currMB);
+   } // for (rerun=0; rerun<runs; rerun++)
+ 
+   //=====  S E T   F I N A L   M A C R O B L O C K   P A R A M E T E R S ======
+   //---------------------------------------------------------------------------
+ 
+     if (((cbp!=0 || best_mode==I16MB) && (best_mode!=IPCM) ))
+       currMB->prev_cbp = 1;
+     else if ((cbp==0 && !input->RCEnable) || (best_mode==IPCM))
+     {
+       currMB->delta_qp = 0;
+       currMB->qp       = currMB->prev_qp;
+       set_chroma_qp(currMB);
+       img->qp          = currMB->qp;
+       currMB->prev_cbp = 0;
+     }
+     set_stored_macroblock_parameters ();
+ 
+   // Rate control
+   if(input->RCEnable)
+     update_rc(currMB, best_mode);
+ 
+   rdopt->min_rdcost = min_rdcost;
+ 
+   if ( (img->MbaffFrameFlag)
+     && (img->current_mb_nr%2)
+     && (currMB->mb_type ? 0:((bslice) ? !currMB->cbp:1))  // bottom is skip
+     && (prevMB->mb_type ? 0:((bslice) ? !prevMB->cbp:1))
+     && !(field_flag_inference() == enc_mb.curr_mb_field)) // top is skip
+   {
+     rdopt->min_rdcost = 1e30;  // don't allow coding of a MB pair as skip if wrong inference
+   }
+ 
+   //===== Decide if this MB will restrict the reference frames =====
+   if (input->RestrictRef)
+     update_refresh_map(intra, intra1, currMB);
+ 
+   if(input->SearchMode == UM_HEX)
+   {
+     UMHEX_skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
+   }
+   else if(input->SearchMode == UM_HEX_SIMPLE)
+   {
+     smpUMHEX_skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
+   }
+ 
+   //--- constrain intra prediction ---
+   if(input->UseConstrainedIntraPred && (img->type==P_SLICE || img->type==B_SLICE))
+   {
+     img->intra_block[img->current_mb_nr] = IS_INTRA(currMB);
+     }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/md_low.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/md_low.c:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/md_low.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,620 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file md_low.c
+  *
+  * \brief
+  *    Main macroblock mode decision functions and helpers
+  *
+  **************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <math.h>
+ #include <assert.h>
+ #include <limits.h>
+ #include <float.h>
+ #include <memory.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ #include "rdopt_coding_state.h"
+ #include "mb_access.h"
+ #include "intrarefresh.h"
+ #include "image.h"
+ #include "transform8x8.h"
+ #include "ratectl.h"
+ #include "mode_decision.h"
+ #include "fmo.h"
+ #include "me_umhex.h"
+ #include "me_umhexsmp.h"
+ #include "macroblock.h"
+ 
+ 
+ //==== MODULE PARAMETERS ====
+ static imgpel temp_imgY[16][16]; // to temp store the Y data for 8x8 transform
+ 
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    Mode Decision for a macroblock
+ *************************************************************************************
+ */
+ void encode_one_macroblock_low ()
+ {
+ 
+   int         block, mode, i, j, k, dummy, MEPos;
+   char        best_pdir;
+   RD_PARAMS   enc_mb;
+   char        best_ref[2] = {0, -1};
+   int         bmcost[5] = {INT_MAX};
+   int         cost=0;
+   int         min_cost = INT_MAX, cost_direct=0, have_direct=0, i16mode=0;
+   int         intra1 = 0;
+   int         temp_cpb = 0;
+   int         best_transform_flag = 0;
+   int         cost8x8_direct = 0;
+   short       islice      = (short) (img->type==I_SLICE);
+   short       bslice      = (short) (img->type==B_SLICE);
+   short       pslice      = (short) ((img->type==P_SLICE) || (img->type==SP_SLICE));
+   short       intra       = (short) (islice || (pslice && img->mb_y==img->mb_y_upd && img->mb_y_upd!=img->mb_y_intra));
+   int         lambda_mf[3];
+   int         pix_x, pix_y;
+   Macroblock* currMB      = &img->mb_data[img->current_mb_nr];
+   int         prev_mb_nr  = FmoGetPreviousMBNr(img->current_mb_nr);
+   Macroblock* prevMB      = (prev_mb_nr >= 0) ? &img->mb_data[prev_mb_nr]:NULL ;
+ 
+   char   **ipredmodes = img->ipredmode;
+   short   *allmvs = img->all_mv[0][0][0][0][0];
+   int     ****i4p;  //for non-RD-opt. mode
+ 
+   int tmp_8x8_flag, tmp_no_mbpart;
+   // Fast Mode Decision
+   short inter_skip = 0;
+ 
+ 
+   if(input->SearchMode == UM_HEX)
+   {
+     UMHEX_decide_intrabk_SAD();
+   }
+   else if (input->SearchMode == UM_HEX_SIMPLE)
+   {
+     smpUMHEX_decide_intrabk_SAD();
+   }
+ 
+   intra |= RandomIntra (img->current_mb_nr);    // Forced Pseudo-Random Intra
+ 
+   //===== Setup Macroblock encoding parameters =====
+   init_enc_mb_params(currMB, &enc_mb, intra, bslice);
+ 
+ 
+   // reset chroma intra predictor to default
+   currMB->c_ipred_mode = DC_PRED_8;
+ 
+   //=====   S T O R E   C O D I N G   S T A T E   =====
+   //---------------------------------------------------
+   store_coding_state (cs_cm);
+ 
+   if (!intra)
+   {
+     //===== set direct motion vectors =====
+     best_mode = 1;
+     if (bslice)
+     {
+       Get_Direct_Motion_Vectors ();
+     }
+ 
+     if (input->CtxAdptLagrangeMult == 1)
+     {
+       get_initial_mb16x16_cost();
+     }
+ 
+     //===== MOTION ESTIMATION FOR 16x16, 16x8, 8x16 BLOCKS =====
+     for (min_cost=INT_MAX, mode=1; mode<4; mode++)
+     {
+       bi_pred_me = 0;
+       img->bi_pred_me[mode]=0;
+       if (enc_mb.valid[mode] && !inter_skip)
+       {
+         for (cost=0, block=0; block<(mode==1?1:2); block++)
+         {
+           for (MEPos=0; MEPos<3; MEPos++)
+           {
+             lambda_mf[MEPos] = input->CtxAdptLagrangeMult == 0 ? enc_mb.lambda_mf[MEPos] : (int)(enc_mb.lambda_mf[MEPos] * sqrt(lambda_mf_factor));
+           }
+           PartitionMotionSearch (mode, block, lambda_mf);
+ 
+           //--- set 4x4 block indizes (for getting MV) ---
+           j = (block==1 && mode==2 ? 2 : 0);
+           i = (block==1 && mode==3 ? 2 : 0);
+ 
+           //--- get cost and reference frame for List 0 prediction ---
+           bmcost[LIST_0] = INT_MAX;
+           list_prediction_cost(LIST_0, block, mode, enc_mb, bmcost, best_ref);
+ 
+           if (bslice)
+           {
+             //--- get cost and reference frame for List 1 prediction ---
+             bmcost[LIST_1] = INT_MAX;
+             list_prediction_cost(LIST_1, block, mode, enc_mb, bmcost, best_ref);
+ 
+             // Compute bipredictive cost between best list 0 and best list 1 references
+             list_prediction_cost(BI_PRED, block, mode, enc_mb, bmcost, best_ref);
+ 
+             // Finally, if mode 16x16, compute cost for bipredictive ME vectore
+             if (input->BiPredMotionEstimation && mode == 1)
+             {
+               list_prediction_cost(BI_PRED_L0, block, mode, enc_mb, bmcost, 0);
+               list_prediction_cost(BI_PRED_L1, block, mode, enc_mb, bmcost, 0);
+             }
+             else
+             {
+               bmcost[BI_PRED_L0] = INT_MAX;
+               bmcost[BI_PRED_L1] = INT_MAX;
+             }
+ 
+             // Determine prediction list based on mode cost
+             determine_prediction_list(mode, bmcost, best_ref, &best_pdir, &cost, &bi_pred_me);
+           }
+           else // if (bslice)
+           {
+             best_pdir  = 0;
+             cost      += bmcost[LIST_0];
+           }
+ 
+           assign_enc_picture_params(mode, best_pdir, block, enc_mb.list_offset[LIST_0], best_ref[LIST_0], best_ref[LIST_1], bslice);
+ 
+           //----- set reference frame and direction parameters -----
+           if (mode==3)
+           {
+             best8x8fwref [3][block  ] = best8x8fwref [3][  block+2] = best_ref[LIST_0];
+             best8x8pdir  [3][block  ] = best8x8pdir  [3][  block+2] = best_pdir;
+             best8x8bwref [3][block  ] = best8x8bwref [3][  block+2] = best_ref[LIST_1];
+           }
+           else if (mode==2)
+           {
+             best8x8fwref [2][2*block] = best8x8fwref [2][2*block+1] = best_ref[LIST_0];
+             best8x8pdir  [2][2*block] = best8x8pdir  [2][2*block+1] = best_pdir;
+             best8x8bwref [2][2*block] = best8x8bwref [2][2*block+1] = best_ref[LIST_1];
+           }
+           else
+           {
+             memset(&best8x8fwref [1][0], best_ref[LIST_0], 4 * sizeof(char));
+             memset(&best8x8bwref [1][0], best_ref[LIST_1], 4 * sizeof(char));
+             best8x8pdir  [1][0] = best8x8pdir  [1][1] = best8x8pdir  [1][2] = best8x8pdir  [1][3] = best_pdir;
+           }
+ 
+           //--- set reference frames and motion vectors ---
+           if (mode>1 && block==0)
+             SetRefAndMotionVectors (block, mode, best_pdir, best_ref[LIST_0], best_ref[LIST_1]);
+         } // for (block=0; block<(mode==1?1:2); block++)
+ 
+         currMB->luma_transform_size_8x8_flag = 0;
+         if (input->Transform8x8Mode) //for inter rd-off, set 8x8 to do 8x8 transform
+         {
+           SetModesAndRefframeForBlocks(mode);
+           currMB->luma_transform_size_8x8_flag = TransformDecision(-1, &cost);
+         }
+ 
+         if ((!inter_skip) && (cost < min_cost))
+         {
+           best_mode = (short) mode;
+           min_cost  = cost;
+           best_transform_flag = currMB->luma_transform_size_8x8_flag;
+ 
+           if (input->CtxAdptLagrangeMult == 1)
+           {
+             adjust_mb16x16_cost(cost);
+           }
+         }
+       } // if (enc_mb.valid[mode])
+     } // for (mode=1; mode<4; mode++)
+ 
+     if ((!inter_skip) && enc_mb.valid[P8x8])
+     {
+       giRDOpt_B8OnlyFlag = 1;
+ 
+       tr8x8.cost8x8 = INT_MAX;
+       tr4x4.cost8x8 = INT_MAX;
+       //===== store coding state of macroblock =====
+       store_coding_state (cs_mb);
+ 
+       currMB->all_blk_8x8 = -1;
+ 
+       if (input->Transform8x8Mode)
+       {
+         tr8x8.cost8x8 = 0;
+         //===========================================================
+         // Check 8x8 partition with transform size 8x8
+         //===========================================================
+         //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
+         for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block=0; block<4; block++)
+         {
+           submacroblock_mode_decision(enc_mb, &tr8x8, currMB, cofAC_8x8ts[block],
+             &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 1);
+           best8x8mode       [block] = tr8x8.part8x8mode [block];
+           best8x8pdir [P8x8][block] = tr8x8.part8x8pdir [block];
+           best8x8fwref[P8x8][block] = tr8x8.part8x8fwref[block];
+           best8x8bwref[P8x8][block] = tr8x8.part8x8bwref[block];
+         }
+ 
+         // following params could be added in RD_8x8DATA structure
+         cbp8_8x8ts      = cbp8x8;
+         cbp_blk8_8x8ts  = cbp_blk8x8;
+         cnt_nonz8_8x8ts = cnt_nonz_8x8;
+         currMB->luma_transform_size_8x8_flag = 0; //switch to 4x4 transform size
+ 
+         //--- re-set coding state (as it was before 8x8 block coding) ---
+         //reset_coding_state (cs_mb);
+       }// if (input->Transform8x8Mode)
+ 
+ 
+       if (input->Transform8x8Mode != 2)
+       {
+         tr4x4.cost8x8 = 0;
+         //=================================================================
+         // Check 8x8, 8x4, 4x8 and 4x4 partitions with transform size 4x4
+         //=================================================================
+         //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
+         for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block=0; block<4; block++)
+         {
+           submacroblock_mode_decision(enc_mb, &tr4x4, currMB, cofAC8x8[block],
+             &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 0);
+ 
+           best8x8mode       [block] = tr4x4.part8x8mode [block];
+           best8x8pdir [P8x8][block] = tr4x4.part8x8pdir [block];
+           best8x8fwref[P8x8][block] = tr4x4.part8x8fwref[block];
+           best8x8bwref[P8x8][block] = tr4x4.part8x8bwref[block];
+         }
+         //--- re-set coding state (as it was before 8x8 block coding) ---
+         // reset_coding_state (cs_mb);
+       }// if (input->Transform8x8Mode != 2)
+ 
+       //--- re-set coding state (as it was before 8x8 block coding) ---
+       reset_coding_state (cs_mb);
+ 
+ 
+       // This is not enabled yet since mpr has reverse order.
+       if (input->RCEnable)
+         rc_store_diff(img->opix_x,img->opix_y,img->mpr);
+ 
+       //check cost for P8x8 for non-rdopt mode
+       if (tr4x4.cost8x8 < min_cost || tr8x8.cost8x8 < min_cost)
+       {
+         best_mode = P8x8;
+         if (input->Transform8x8Mode == 2)
+         {
+           min_cost = tr8x8.cost8x8;
+           currMB->luma_transform_size_8x8_flag=1;
+         }
+         else if (input->Transform8x8Mode)
+         {
+           if (tr8x8.cost8x8 < tr4x4.cost8x8)
+           {
+             min_cost = tr8x8.cost8x8;
+             currMB->luma_transform_size_8x8_flag=1;
+           }
+           else if(tr4x4.cost8x8 < tr8x8.cost8x8)
+           {
+             min_cost = tr4x4.cost8x8;
+             currMB->luma_transform_size_8x8_flag=0;
+           }
+           else
+           {
+             if (GetBestTransformP8x8() == 0)
+             {
+               min_cost = tr4x4.cost8x8;
+               currMB->luma_transform_size_8x8_flag=0;
+             }
+             else
+             {
+               min_cost = tr8x8.cost8x8;
+               currMB->luma_transform_size_8x8_flag=1;
+             }
+           }
+         }
+         else
+         {
+           min_cost = tr4x4.cost8x8;
+           currMB->luma_transform_size_8x8_flag=0;
+         }
+       }// if ((tr4x4.cost8x8 < min_cost || tr8x8.cost8x8 < min_cost))
+       giRDOpt_B8OnlyFlag = 0;
+     }
+     else // if (enc_mb.valid[P8x8])
+     {
+       tr4x4.cost8x8 = INT_MAX;
+     }
+ 
+     // Find a motion vector for the Skip mode
+     if(pslice)
+       FindSkipModeMotionVector ();
+   }
+   else // if (!intra)
+   {
+     min_cost = INT_MAX;
+   }
+ 
+   //========= C H O O S E   B E S T   M A C R O B L O C K   M O D E =========
+   //-------------------------------------------------------------------------
+   tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;  //save 8x8_flag
+   tmp_no_mbpart = currMB->NoMbPartLessThan8x8Flag;      //save no-part-less
+ 
+   if (img->yuv_format != YUV400)
+     // precompute all chroma intra prediction modes
+     IntraChromaPrediction(NULL, NULL, NULL);
+ 
+   if (enc_mb.valid[0] && bslice) // check DIRECT MODE
+   {
+     if(have_direct)
+     {
+       switch(input->Transform8x8Mode)
+       {
+       case 1: // Mixture of 8x8 & 4x4 transform
+         cost = ((cost8x8_direct < cost_direct) || !(enc_mb.valid[5] && enc_mb.valid[6] && enc_mb.valid[7]))
+           ? cost8x8_direct : cost_direct;
+         break;
+       case 2: // 8x8 Transform only
+         cost = cost8x8_direct;
+         break;
+       default: // 4x4 Transform only
+         cost = cost_direct;
+         break;
+       }
+     }
+     else
+     { //!have_direct
+       cost = GetDirectCostMB ();
+     }
+     if (cost!=INT_MAX)
+     {
+       cost -= (int)floor(16*enc_mb.lambda_md+0.4999);
+     }
+ 
+     if (cost <= min_cost)
+     {
+       if(active_sps->direct_8x8_inference_flag && input->Transform8x8Mode)
+       {
+         if(input->Transform8x8Mode==2)
+           currMB->luma_transform_size_8x8_flag=1;
+         else
+         {
+           if(cost8x8_direct < cost_direct)
+             currMB->luma_transform_size_8x8_flag=1;
+           else
+             currMB->luma_transform_size_8x8_flag=0;
+         }
+       }
+       else
+         currMB->luma_transform_size_8x8_flag=0;
+ 
+       //Rate control
+       if (input->RCEnable)
+         rc_store_diff(img->opix_x,img->opix_y,img->mpr);
+ 
+       min_cost  = cost;
+       best_mode = 0;
+       tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
+     }
+     else
+     {
+       currMB->luma_transform_size_8x8_flag = tmp_8x8_flag; // restore if not best
+       currMB->NoMbPartLessThan8x8Flag = tmp_no_mbpart; // restore if not best
+     }
+   }
+ 
+   if (enc_mb.valid[I8MB]) // check INTRA8x8
+   {
+     currMB->luma_transform_size_8x8_flag = 1; // at this point cost will ALWAYS be less than min_cost
+ 
+     currMB->mb_type = I8MB;
+     temp_cpb = Mode_Decision_for_new_Intra8x8Macroblock (enc_mb.lambda_md, &cost);
+ 
+     if (cost <= min_cost)
+     {
+       currMB->cbp = temp_cpb;
+ 
+       //coeffs
+       if (input->Transform8x8Mode != 2)
+       {
+         i4p=cofAC; cofAC=img->cofAC; img->cofAC=i4p;
+       }
+ 
+       for(j=0; j<MB_BLOCK_SIZE; j++)
+       {
+         pix_y = img->pix_y + j;
+         for(i=0; i<MB_BLOCK_SIZE; i++)
+         {
+           pix_x = img->pix_x + i;
+           temp_imgY[j][i] = enc_picture->imgY[pix_y][pix_x];
+         }
+       }
+ 
+       //Rate control
+       if (input->RCEnable)
+         rc_store_diff(img->opix_x,img->opix_y,img->mpr);
+ 
+       min_cost  = cost;
+       best_mode = I8MB;
+       tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
+     }
+     else
+       currMB->luma_transform_size_8x8_flag = tmp_8x8_flag; // restore if not best
+   }
+ 
+   if (enc_mb.valid[I4MB]) // check INTRA4x4
+   {
+     currMB->luma_transform_size_8x8_flag = 0;
+     currMB->mb_type = I4MB;
+     temp_cpb = Mode_Decision_for_Intra4x4Macroblock (enc_mb.lambda_md, &cost);
+ 
+     if (cost <= min_cost)
+     {
+       currMB->cbp = temp_cpb;
+ 
+       //Rate control
+       if (input->RCEnable)
+         rc_store_diff(img->opix_x,img->opix_y,img->mpr);
+ 
+       min_cost  = cost;
+       best_mode = I4MB;
+       tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
+     }
+     else
+     {
+       currMB->luma_transform_size_8x8_flag = tmp_8x8_flag; // restore if not best
+       //coeffs
+       i4p=cofAC; cofAC=img->cofAC; img->cofAC=i4p;
+     }
+   }
+   if (enc_mb.valid[I16MB]) // check INTRA16x16
+   {
+     currMB->luma_transform_size_8x8_flag = 0;
+     intrapred_luma_16x16 ();
+     cost = find_sad_16x16 (&i16mode);
+ 
+     if (cost < min_cost)
+     {
+       //Rate control
+       // should this access opix or pix?
+       if (input->RCEnable)
+         rc_store_diff(img->opix_x,img->opix_y,img->mprr_2[i16mode]);
+ 
+       best_mode   = I16MB;
+       currMB->cbp = dct_luma_16x16 (i16mode);
+ 
+     }
+     else
+     {
+       currMB->luma_transform_size_8x8_flag = tmp_8x8_flag; // restore
+       currMB->NoMbPartLessThan8x8Flag = tmp_no_mbpart;     // restore
+     }
+   }
+ 
+   intra1 = IS_INTRA(currMB);
+ 
+   //=====  S E T   F I N A L   M A C R O B L O C K   P A R A M E T E R S ======
+   //---------------------------------------------------------------------------
+   {
+     //===== set parameters for chosen mode =====
+     SetModesAndRefframeForBlocks (best_mode);
+ 
+     if (best_mode==P8x8)
+     {
+       if (currMB->luma_transform_size_8x8_flag && (cbp8_8x8ts == 0) && input->Transform8x8Mode != 2)
+         currMB->luma_transform_size_8x8_flag = 0;
+ 
+       SetCoeffAndReconstruction8x8 (currMB);
+ 
+       memset(currMB->intra_pred_modes, DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
+       for (k=0, j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+         memset(&ipredmodes[j][img->block_x], DC_PRED, BLOCK_MULTIPLE * sizeof(char));
+     }
+     else
+     {
+       //===== set parameters for chosen mode =====
+       if (best_mode == I8MB)
+       {
+         memcpy(currMB->intra_pred_modes,currMB->intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));
+         for(j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+           memcpy(&img->ipredmode[j][img->block_x],&img->ipredmode8x8[j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+ 
+         //--- restore reconstruction for 8x8 transform ---
+         for(j=0; j<MB_BLOCK_SIZE; j++)
+         {
+           memcpy(&enc_picture->imgY[img->pix_y + j][img->pix_x],temp_imgY[j], MB_BLOCK_SIZE * sizeof(imgpel));
+         }
+       }
+ 
+       if ((best_mode!=I4MB)&&(best_mode != I8MB))
+       {
+         memset(currMB->intra_pred_modes,DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
+         for(j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+           memset(&ipredmodes[j][img->block_x],DC_PRED, BLOCK_MULTIPLE * sizeof(char));
+ 
+         if (best_mode!=I16MB)
+         {
+           if((best_mode>=1) && (best_mode<=3))
+             currMB->luma_transform_size_8x8_flag = best_transform_flag;
+           LumaResidualCoding ();
+ 
+           if((currMB->cbp==0)&&(best_mode==0))
+             currMB->luma_transform_size_8x8_flag = 0;
+ 
+           //Rate control
+           if (input->RCEnable)
+             rc_store_diff(img->opix_x,img->opix_y,img->mpr);
+         }
+       }
+     }
+     //check luma cbp for transform size flag
+     if (((currMB->cbp&15) == 0) && !(IS_OLDINTRA(currMB) || currMB->mb_type == I8MB))
+       currMB->luma_transform_size_8x8_flag = 0;
+ 
+     // precompute all chroma intra prediction modes
+     if (img->yuv_format != YUV400)
+       IntraChromaPrediction(NULL, NULL, NULL);
+ 
+     img->i16offset = 0;
+     dummy = 0;
+ 
+     if (img->yuv_format!=YUV400)
+       ChromaResidualCoding (&dummy);
+ 
+     if (best_mode==I16MB)
+     {
+       img->i16offset = I16Offset  (currMB->cbp, i16mode);
+     }
+ 
+     SetMotionVectorsMB (currMB, bslice);
+ 
+     //===== check for SKIP mode =====
+     if ((pslice) && best_mode==1 && currMB->cbp==0 &&
+       enc_picture->ref_idx[LIST_0][img->block_y][img->block_x]    == 0 &&
+       enc_picture->mv     [LIST_0][img->block_y][img->block_x][0] == allmvs[0] &&
+       enc_picture->mv     [LIST_0][img->block_y][img->block_x][1] == allmvs[1])
+     {
+       currMB->mb_type = currMB->b8mode[0] = currMB->b8mode[1] = currMB->b8mode[2] = currMB->b8mode[3] = 0;
+       currMB->luma_transform_size_8x8_flag = 0;
+     }
+ 
+     if(img->MbaffFrameFlag)
+       set_mbaff_parameters();
+   }
+ 
+   // Rate control
+   if(input->RCEnable)
+     update_rc(currMB, best_mode);
+ 
+   rdopt->min_rdcost = min_cost;
+ 
+   if ( (img->MbaffFrameFlag)
+     && (img->current_mb_nr%2)
+     && (currMB->mb_type ? 0:((bslice) ? !currMB->cbp:1))  // bottom is skip
+     && (prevMB->mb_type ? 0:((bslice) ? !prevMB->cbp:1))
+     && !(field_flag_inference() == enc_mb.curr_mb_field)) // top is skip
+   {
+     rdopt->min_rdcost = 1e30;  // don't allow coding of a MB pair as skip if wrong inference
+   }
+ 
+   //===== Decide if this MB will restrict the reference frames =====
+   if (input->RestrictRef)
+     update_refresh_map(intra, intra1, currMB);
+ 
+   if(input->SearchMode == UM_HEX)
+   {
+     UMHEX_skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
+   }
+   else if(input->SearchMode == UM_HEX_SIMPLE)
+   {
+     smpUMHEX_skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
+   }
+ 
+   //--- constrain intra prediction ---
+   if(input->UseConstrainedIntraPred && (img->type==P_SLICE || img->type==B_SLICE))
+   {
+     img->intra_block[img->current_mb_nr] = IS_INTRA(currMB);
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/me_distortion.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/me_distortion.c:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/me_distortion.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,1331 ----
+ 
+ /*!
+ *************************************************************************************
+ * \file me_distortion.c
+ *
+ * \brief
+ *    Motion estimation error calculation functions
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Alexis Michael Tourapis <alexis.tourapis at dolby.com>
+ *      - Athanasios Leontaris    <aleon at dolby.com>
+ *
+ *************************************************************************************
+ */
+ 
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ #include <limits.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ #include "image.h"
+ #include "memalloc.h"
+ #include "mb_access.h"
+ #include "refbuf.h"
+ 
+ #include "me_distortion.h"
+ 
+ extern unsigned int *byte_abs;
+ 
+ // Define Global Parameters
+ // Luma
+ imgpel *(*get_line[2]) (imgpel****, int, int);
+ imgpel *(*get_line1[2]) (imgpel****, int, int);
+ imgpel *(*get_line2[2]) (imgpel****, int, int);
+ // Chroma
+ imgpel *(*get_crline[2]) (imgpel****, int, int);
+ imgpel *(*get_crline1[2]) (imgpel****, int, int);
+ imgpel *(*get_crline2[2]) (imgpel****, int, int);
+ // Access method (fast/safe or unconstrained)
+ int ref_access_method;
+ int bipred1_access_method;
+ int bipred2_access_method;
+ 
+ SubImageContainer ref_pic_sub;
+ SubImageContainer ref_pic1_sub;
+ SubImageContainer ref_pic2_sub;
+ 
+ short weight1, weight2, offsetBi;
+ short weight1_cr[2], weight2_cr[2], offsetBi_cr[2];
+ int weight_luma, weight_cr[2], offset_luma, offset_cr[2];
+ short img_width, img_height;
+ int test8x8transform;
+ int ChromaMEEnable;
+ 
+ // temp storage of pixel difference values prior to applying Hadamard Transform (4x4 or 8x8)
+ static int diff[MB_PIXELS];
+ // Hadamard related arrays
+ static int m[16], d[16];
+ static int m1[8][8], m2[8][8], m3[8][8];
+ static imgpel *src_line, *ref_line, *ref1_line, *ref2_line;
+ 
+ int (*computeUniPred[6])(imgpel* , int , int , int , int , int);
+ int (*computeBiPred) (imgpel* , int , int , int , int , int, int , int);
+ int (*computeBiPred1[3])(imgpel* , int , int , int , int , int, int , int);
+ int (*computeBiPred2[3])(imgpel* , int , int , int , int , int, int , int);
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Calculate SA(T)D
+  ***********************************************************************
+  */
+ int distortion4x4(int* diff)
+ {
+   int distortion = 0, k, *byte_sse;
+ 
+   switch(input->ModeDecisionMetric)
+   {
+   case ERROR_SAD:
+     for (k = 0; k < 16; k++)
+     {
+       distortion += byte_abs [diff [k]];
+     }
+     break;
+   case ERROR_SSE:
+     byte_sse = img->quad;
+     for (k = 0; k < 16; k++)
+     {
+       distortion += byte_sse [diff [k]];
+     }
+     break;
+   case ERROR_SATD :
+   default:
+     distortion = HadamardSAD4x4( diff );
+     break;
+   }
+ 
+   return distortion;
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Calculate SA(T)D for 8x8
+  ***********************************************************************
+  */
+ int distortion8x8(int* diff)
+ {
+   int distortion = 0, k, *byte_sse;
+ 
+   switch(input->ModeDecisionMetric)
+   {
+   case ERROR_SAD:
+     for (k = 0; k < 64; k++)
+     {
+       distortion += byte_abs [diff [k]];
+     }
+     break;
+   case ERROR_SSE:
+     byte_sse = img->quad;
+     for (k = 0; k < 64; k++)
+     {
+       distortion += byte_sse [diff [k]];
+     }
+     break;
+   case ERROR_SATD :
+   default:
+     distortion = HadamardSAD8x8( diff );
+     break;
+   }
+ 
+   return distortion;
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Calculate SA(T)D for 8x8
+  ***********************************************************************
+  */
+ int HadamardMB (int c_diff[MB_PIXELS], int blocktype)
+ {
+   int sad=0;
+ 
+   switch(blocktype)
+   {
+     //16x16
+   case 1:
+     sad  = HadamardSAD8x8( c_diff     );
+     sad += HadamardSAD8x8(&c_diff[ 64]);
+     sad += HadamardSAD8x8(&c_diff[128]);
+     sad += HadamardSAD8x8(&c_diff[192]);
+     break;
+     //16x8 8x16
+   case 2:
+   case 3:
+     sad  = HadamardSAD8x8( c_diff    );
+     sad += HadamardSAD8x8(&c_diff[64]);
+     break;
+     //8x8
+   case 4:
+     sad  = HadamardSAD8x8(c_diff);
+     break;
+     //8x4 4x8
+   default:
+     sad=-1;
+     break;
+   }
+ 
+   return sad;
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    Calculate 4x4 Hadamard-Transformed SAD
+ ***********************************************************************
+ */
+ int HadamardSAD4x4 (int* diff)
+ {
+   int k, satd = 0;
+ 
+   /*===== hadamard transform =====*/
+   m[ 0] = diff[ 0] + diff[12];
+   m[ 1] = diff[ 1] + diff[13];
+   m[ 2] = diff[ 2] + diff[14];
+   m[ 3] = diff[ 3] + diff[15];
+   m[ 4] = diff[ 4] + diff[ 8];
+   m[ 5] = diff[ 5] + diff[ 9];
+   m[ 6] = diff[ 6] + diff[10];
+   m[ 7] = diff[ 7] + diff[11];
+   m[ 8] = diff[ 4] - diff[ 8];
+   m[ 9] = diff[ 5] - diff[ 9];
+   m[10] = diff[ 6] - diff[10];
+   m[11] = diff[ 7] - diff[11];
+   m[12] = diff[ 0] - diff[12];
+   m[13] = diff[ 1] - diff[13];
+   m[14] = diff[ 2] - diff[14];
+   m[15] = diff[ 3] - diff[15];
+ 
+   d[ 0] = m[ 0] + m[ 4];
+   d[ 1] = m[ 1] + m[ 5];
+   d[ 2] = m[ 2] + m[ 6];
+   d[ 3] = m[ 3] + m[ 7];
+   d[ 4] = m[ 8] + m[12];
+   d[ 5] = m[ 9] + m[13];
+   d[ 6] = m[10] + m[14];
+   d[ 7] = m[11] + m[15];
+   d[ 8] = m[ 0] - m[ 4];
+   d[ 9] = m[ 1] - m[ 5];
+   d[10] = m[ 2] - m[ 6];
+   d[11] = m[ 3] - m[ 7];
+   d[12] = m[12] - m[ 8];
+   d[13] = m[13] - m[ 9];
+   d[14] = m[14] - m[10];
+   d[15] = m[15] - m[11];
+ 
+   m[ 0] = d[ 0] + d[ 3];
+   m[ 1] = d[ 1] + d[ 2];
+   m[ 2] = d[ 1] - d[ 2];
+   m[ 3] = d[ 0] - d[ 3];
+   m[ 4] = d[ 4] + d[ 7];
+   m[ 5] = d[ 5] + d[ 6];
+   m[ 6] = d[ 5] - d[ 6];
+   m[ 7] = d[ 4] - d[ 7];
+   m[ 8] = d[ 8] + d[11];
+   m[ 9] = d[ 9] + d[10];
+   m[10] = d[ 9] - d[10];
+   m[11] = d[ 8] - d[11];
+   m[12] = d[12] + d[15];
+   m[13] = d[13] + d[14];
+   m[14] = d[13] - d[14];
+   m[15] = d[12] - d[15];
+ 
+   d[ 0] = m[ 0] + m[ 1];
+   d[ 1] = m[ 0] - m[ 1];
+   d[ 2] = m[ 2] + m[ 3];
+   d[ 3] = m[ 3] - m[ 2];
+   d[ 4] = m[ 4] + m[ 5];
+   d[ 5] = m[ 4] - m[ 5];
+   d[ 6] = m[ 6] + m[ 7];
+   d[ 7] = m[ 7] - m[ 6];
+   d[ 8] = m[ 8] + m[ 9];
+   d[ 9] = m[ 8] - m[ 9];
+   d[10] = m[10] + m[11];
+   d[11] = m[11] - m[10];
+   d[12] = m[12] + m[13];
+   d[13] = m[12] - m[13];
+   d[14] = m[14] + m[15];
+   d[15] = m[15] - m[14];
+ 
+   //===== sum up =====
+   // Table lookup is faster than abs macro
+   for (k=0; k<16; ++k)
+   {
+     satd += byte_abs [d [k]];
+   }
+ 
+ 
+   return ((satd+1)>>1);
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    Calculate 8x8 Hadamard-Transformed SAD
+ ***********************************************************************
+ */
+ int HadamardSAD8x8 (int* diff)
+ {
+   int i, j, jj, sad=0;
+ 
+   //horizontal
+   for (j=0; j < 8; j++)
+   {
+     jj = j << 3;
+     m2[j][0] = diff[jj  ] + diff[jj+4];
+     m2[j][1] = diff[jj+1] + diff[jj+5];
+     m2[j][2] = diff[jj+2] + diff[jj+6];
+     m2[j][3] = diff[jj+3] + diff[jj+7];
+     m2[j][4] = diff[jj  ] - diff[jj+4];
+     m2[j][5] = diff[jj+1] - diff[jj+5];
+     m2[j][6] = diff[jj+2] - diff[jj+6];
+     m2[j][7] = diff[jj+3] - diff[jj+7];
+ 
+     m1[j][0] = m2[j][0] + m2[j][2];
+     m1[j][1] = m2[j][1] + m2[j][3];
+     m1[j][2] = m2[j][0] - m2[j][2];
+     m1[j][3] = m2[j][1] - m2[j][3];
+     m1[j][4] = m2[j][4] + m2[j][6];
+     m1[j][5] = m2[j][5] + m2[j][7];
+     m1[j][6] = m2[j][4] - m2[j][6];
+     m1[j][7] = m2[j][5] - m2[j][7];
+ 
+     m2[j][0] = m1[j][0] + m1[j][1];
+     m2[j][1] = m1[j][0] - m1[j][1];
+     m2[j][2] = m1[j][2] + m1[j][3];
+     m2[j][3] = m1[j][2] - m1[j][3];
+     m2[j][4] = m1[j][4] + m1[j][5];
+     m2[j][5] = m1[j][4] - m1[j][5];
+     m2[j][6] = m1[j][6] + m1[j][7];
+     m2[j][7] = m1[j][6] - m1[j][7];
+   }
+ 
+   //vertical
+   for (i=0; i < 8; i++)
+   {
+     m3[0][i] = m2[0][i] + m2[4][i];
+     m3[1][i] = m2[1][i] + m2[5][i];
+     m3[2][i] = m2[2][i] + m2[6][i];
+     m3[3][i] = m2[3][i] + m2[7][i];
+     m3[4][i] = m2[0][i] - m2[4][i];
+     m3[5][i] = m2[1][i] - m2[5][i];
+     m3[6][i] = m2[2][i] - m2[6][i];
+     m3[7][i] = m2[3][i] - m2[7][i];
+ 
+     m1[0][i] = m3[0][i] + m3[2][i];
+     m1[1][i] = m3[1][i] + m3[3][i];
+     m1[2][i] = m3[0][i] - m3[2][i];
+     m1[3][i] = m3[1][i] - m3[3][i];
+     m1[4][i] = m3[4][i] + m3[6][i];
+     m1[5][i] = m3[5][i] + m3[7][i];
+     m1[6][i] = m3[4][i] - m3[6][i];
+     m1[7][i] = m3[5][i] - m3[7][i];
+ 
+     m2[0][i] = m1[0][i] + m1[1][i];
+     m2[1][i] = m1[0][i] - m1[1][i];
+     m2[2][i] = m1[2][i] + m1[3][i];
+     m2[3][i] = m1[2][i] - m1[3][i];
+     m2[4][i] = m1[4][i] + m1[5][i];
+     m2[5][i] = m1[4][i] - m1[5][i];
+     m2[6][i] = m1[6][i] + m1[7][i];
+     m2[7][i] = m1[6][i] - m1[7][i];
+   }
+   for (j=0; j < 8; j++)
+     for (i=0; i < 8; i++)
+       sad += iabs (m2[j][i]);
+ 
+   return ((sad+2)>>2);
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    SAD computation
+ ************************************************************************
+ */
+ int computeSAD(imgpel* src_pic,
+                int blocksize_y,
+                int blocksize_x,
+                int min_mcost,
+                int cand_x,
+                int cand_y)
+ {
+   int mcost = 0;
+   int y,x4;
+   int pad_size_x = img_padded_size_x - blocksize_x;
+ 
+   src_line = src_pic;
+   ref_line = get_line[ref_access_method] (ref_pic_sub.luma, cand_y, cand_x);
+   for (y=0; y<blocksize_y; y++)
+   {
+     for (x4 = 0; x4 < blocksize_x; x4+=4)
+     {
+       mcost += byte_abs[ *src_line++ - *ref_line++ ];
+       mcost += byte_abs[ *src_line++ - *ref_line++ ];
+       mcost += byte_abs[ *src_line++ - *ref_line++ ];
+       mcost += byte_abs[ *src_line++ - *ref_line++ ];
+     }
+     if (mcost >= min_mcost) return mcost;
+     ref_line += pad_size_x;
+   }
+   if ( ChromaMEEnable ) {
+     // calculate chroma conribution to motion compensation error
+     int blocksize_x_c2 = blocksize_x >> shift_cr_x;
+     int blocksize_y_c = blocksize_y >> shift_cr_y;
+     int cr_pad_size_x = img_cr_padded_size_x - blocksize_x_c2;
+     int k;
+ 
+     for (k=0; k < 2; k++)
+     {
+       src_line = src_pic + (256 << k);
+       ref_line = get_crline[ref_access_method] ( ref_pic_sub.crcb[k], cand_y, cand_x);
+       for (y=0; y<blocksize_y_c; y++)
+       {
+         for (x4 = 0; x4 < blocksize_x_c2; x4++)
+         {
+           mcost += byte_abs[ *src_line++ - *ref_line++ ];
+           mcost += byte_abs[ *src_line++ - *ref_line++ ];
+         }
+         if (mcost >= min_mcost) return mcost;
+         ref_line += cr_pad_size_x;
+       }
+     }
+   }
+ 
+   return mcost;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    SAD computation for weighted samples
+ ************************************************************************
+ */
+ int computeSADWP(imgpel* src_pic,
+                int blocksize_y,
+                int blocksize_x,
+                int min_mcost,
+                int cand_x,
+                int cand_y)
+ {
+   int mcost = 0;
+   int y,x4;
+   int weighted_pel;
+   int pad_size_x = img_padded_size_x - blocksize_x;
+ 
+   src_line = src_pic;
+   ref_line = get_line[ref_access_method] (ref_pic_sub.luma, cand_y, cand_x);
+   for (y=0; y<blocksize_y; y++)
+   {
+     for (x4 = 0; x4 < blocksize_x; x4+=4)
+     {
+       weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+       mcost += byte_abs[ *src_line++ -  weighted_pel ];
+       weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+       mcost += byte_abs[ *src_line++ -  weighted_pel ];
+       weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+       mcost += byte_abs[ *src_line++ -  weighted_pel ];
+       weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+       mcost += byte_abs[ *src_line++ -  weighted_pel ];
+     }
+     if (mcost >= min_mcost) return mcost;
+     ref_line += pad_size_x;
+   }
+   if ( ChromaMEEnable ) {
+     // calculate chroma conribution to motion compensation error
+     int blocksize_x_c2 = blocksize_x >> shift_cr_x;
+     int blocksize_y_c = blocksize_y >> shift_cr_y;
+     int cr_pad_size_x = img_cr_padded_size_x - blocksize_x_c2;
+     int k;
+ 
+     for (k=0; k < 2; k++)
+     {
+       src_line = src_pic + (256 << k);
+       ref_line = get_crline[ref_access_method] ( ref_pic_sub.crcb[k], cand_y, cand_x);
+       for (y=0; y<blocksize_y_c; y++)
+       {
+         for (x4 = 0; x4 < blocksize_x_c2; x4++)
+         {
+           weighted_pel = iClip1( img->max_imgpel_value_uv, ((weight_cr[k] * *ref_line++  + wp_chroma_round) >> chroma_log_weight_denom) + offset_cr[k]);
+           mcost += byte_abs[ *src_line++ -  weighted_pel ];
+           weighted_pel = iClip1( img->max_imgpel_value_uv, ((weight_cr[k] * *ref_line++  + wp_chroma_round) >> chroma_log_weight_denom) + offset_cr[k]);
+           mcost += byte_abs[ *src_line++ -  weighted_pel ];
+         }
+         if (mcost >= min_mcost) return mcost;
+         ref_line += cr_pad_size_x;
+       }
+     }
+   }
+ 
+   return mcost;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    BiPred SAD computation (no weights)
+ ************************************************************************
+ */
+ int computeBiPredSAD1(imgpel* src_pic,
+                       int blocksize_y,
+                       int blocksize_x,
+                       int min_mcost,
+                       int cand_x1, int cand_y1,
+                       int cand_x2, int cand_y2)
+ {
+   int mcost = 0;
+   int bi_diff;
+   int y,x4;
+   int pad_size_x = img_padded_size_x - blocksize_x;
+ 
+   src_line   = src_pic;
+   ref2_line  = get_line[bipred2_access_method] (ref_pic2_sub.luma, cand_y2, cand_x2);
+   ref1_line  = get_line[bipred1_access_method] (ref_pic1_sub.luma, cand_y1, cand_x1);
+ 
+   for (y = 0; y < blocksize_y; y++)
+   {
+     for (x4 = 0; x4 < blocksize_x; x4+=4)
+     {
+       bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+       mcost += byte_abs[bi_diff];
+       bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+       mcost += byte_abs[bi_diff];
+       bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+       mcost += byte_abs[bi_diff];
+       bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+       mcost += byte_abs[bi_diff];
+     }
+     if (mcost >= min_mcost) return mcost;
+     ref2_line += pad_size_x;
+     ref1_line += pad_size_x;
+   }
+ 
+   if ( ChromaMEEnable ) {
+     // calculate chroma conribution to motion compensation error
+     int blocksize_x_c2 = blocksize_x >> shift_cr_x;
+     int blocksize_y_c  = blocksize_y >> shift_cr_y;
+     int cr_pad_size_x = img_cr_padded_size_x - blocksize_x_c2;
+     int k;
+ 
+     for (k=0; k<2; k++)
+     {
+       src_line = src_pic + (256 << k);
+       ref2_line = get_crline[bipred2_access_method] ( ref_pic2_sub.crcb[k], cand_y2, cand_x2);
+       ref1_line = get_crline[bipred1_access_method] ( ref_pic1_sub.crcb[k], cand_y1, cand_x1);
+ 
+       for (y=0; y<blocksize_y_c; y++)
+       {
+         for (x4 = 0; x4 < blocksize_x_c2; x4++)
+         {
+           bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           mcost += byte_abs[bi_diff];
+           bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           mcost += byte_abs[bi_diff];
+         }
+ 
+         if (mcost >= min_mcost) return mcost;
+         ref2_line += cr_pad_size_x;
+         ref1_line += cr_pad_size_x;
+       }
+     }
+   }
+   return mcost;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    BiPred SAD computation (with weights)
+ ************************************************************************
+ */
+ int computeBiPredSAD2(imgpel* src_pic,
+                       int blocksize_y,
+                       int blocksize_x,
+                       int min_mcost,
+                       int cand_x1, int cand_y1,
+                       int cand_x2, int cand_y2)
+ {
+   int mcost = 0;
+   int bi_diff;
+   int denom = luma_log_weight_denom + 1;
+   int lround = 2 * wp_luma_round;
+   int y,x4;
+   int weighted_pel, pixel1, pixel2;
+   int pad_size_x = img_padded_size_x - blocksize_x;
+ 
+   src_line   = src_pic;
+   ref2_line  = get_line[bipred2_access_method] (ref_pic2_sub.luma, cand_y2, cand_x2);
+   ref1_line  = get_line[bipred1_access_method] (ref_pic1_sub.luma, cand_y1, cand_x1);
+ 
+   for (y=0; y<blocksize_y; y++)
+   {
+     for (x4 = 0; x4 < blocksize_x; x4+=4)
+     {
+       pixel1 = weight1 * (*ref1_line++);
+       pixel2 = weight2 * (*ref2_line++);
+       weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+       bi_diff = (*src_line++) - weighted_pel;
+       mcost += byte_abs[bi_diff];
+ 
+       pixel1 = weight1 * (*ref1_line++);
+       pixel2 = weight2 * (*ref2_line++);
+       weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+       bi_diff = (*src_line++) - weighted_pel;
+       mcost += byte_abs[bi_diff];
+ 
+       pixel1 = weight1 * (*ref1_line++);
+       pixel2 = weight2 * (*ref2_line++);
+       weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+       bi_diff = (*src_line++) - weighted_pel;
+       mcost += byte_abs[bi_diff];
+ 
+       pixel1 = weight1 * (*ref1_line++);
+       pixel2 = weight2 * (*ref2_line++);
+       weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+       bi_diff = (*src_line++) - weighted_pel;
+       mcost += byte_abs[bi_diff];
+     }
+     if (mcost >= min_mcost) return mcost;
+     ref2_line += pad_size_x;
+     ref1_line += pad_size_x;
+   }
+ 
+   if ( ChromaMEEnable ) {
+     // calculate chroma conribution to motion compensation error
+     int blocksize_x_c2 = blocksize_x >> shift_cr_x;
+     int blocksize_y_c = blocksize_y >> shift_cr_y;
+     int cr_pad_size_x = img_cr_padded_size_x - blocksize_x_c2;
+     int k;
+ 
+     for (k=0; k<2; k++)
+     {
+       src_line = src_pic + (256 << k);
+       ref2_line = get_crline[bipred2_access_method] ( ref_pic2_sub.crcb[k], cand_y2, cand_x2);
+       ref1_line = get_crline[bipred1_access_method] ( ref_pic1_sub.crcb[k], cand_y1, cand_x1);
+ 
+       for (y=0; y<blocksize_y_c; y++)
+       {
+         for (x4 = 0; x4 < blocksize_x_c2; x4++)
+         {
+           pixel1 = weight1_cr[k] * (*ref1_line++);
+           pixel2 = weight2_cr[k] * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value_uv, ((pixel1 + pixel2 + lround) >> denom) + offsetBi_cr[k]);
+           bi_diff = (*src_line++) - weighted_pel;
+           mcost += byte_abs[bi_diff];
+ 
+           pixel1 = weight1_cr[k] * (*ref1_line++);
+           pixel2 = weight2_cr[k] * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value_uv, ((pixel1 + pixel2 + lround) >> denom) + offsetBi_cr[k]);
+           bi_diff = (*src_line++) - weighted_pel;
+           mcost += byte_abs[bi_diff];
+         }
+         if (mcost >= min_mcost) return mcost;
+         ref2_line += cr_pad_size_x;
+         ref1_line += cr_pad_size_x;
+       }
+     }
+   }
+   return mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    SAD computation _with_ Hadamard Transform
+  ************************************************************************
+ */
+ int computeSATD(imgpel* src_pic,
+                 int blocksize_y,
+                 int blocksize_x,
+                 int min_mcost,
+                 int cand_x,
+                 int cand_y)
+ {
+   int mcost = 0;
+   int y, x, y4, *d;
+   int pad_size_x, src_size_x, src_size_mul;
+   imgpel *src_tmp = src_pic;
+ 
+   if ( !test8x8transform )
+   { // 4x4 TRANSFORM
+     pad_size_x = img_padded_size_x - BLOCK_SIZE;
+     src_size_x = blocksize_x - BLOCK_SIZE;
+     src_size_mul = blocksize_x * BLOCK_SIZE;
+     for (y = cand_y; y < cand_y + (blocksize_y<<2); y += (BLOCK_SIZE_SP))
+     {
+       for (x=0; x<blocksize_x; x += BLOCK_SIZE)
+       {
+         d    = diff;
+         ref_line = get_line[ref_access_method] (ref_pic_sub.luma, y, cand_x + (x<<2));
+         src_line = src_tmp + x;
+         for (y4 = 0; y4 < BLOCK_SIZE; y4++ )
+         {
+           *d++ = *src_line++ - *ref_line++ ;
+           *d++ = *src_line++ - *ref_line++ ;
+           *d++ = *src_line++ - *ref_line++ ;
+           *d++ = *src_line++ - *ref_line++ ;
+ 
+           ref_line += pad_size_x;
+           src_line += src_size_x;
+         }
+         if ((mcost += HadamardSAD4x4 (diff)) > min_mcost) return mcost;
+       }
+       src_tmp += src_size_mul;
+     }
+   }
+   else
+   { // 8x8 TRANSFORM
+     pad_size_x = img_padded_size_x - BLOCK_SIZE8x8;
+     src_size_x = (blocksize_x - BLOCK_SIZE8x8);
+     src_size_mul = blocksize_x * BLOCK_SIZE8x8;
+     for (y = cand_y; y < cand_y + (blocksize_y<<2); y += (BLOCK_SIZE8x8_SP) )
+     {
+       for (x=0; x<blocksize_x; x += BLOCK_SIZE8x8 )
+       {
+         d = diff;
+         ref_line  = get_line[ref_access_method] (ref_pic_sub.luma, y, cand_x + (x<<2));
+         src_line = src_tmp + x;
+         for (y4 = 0; y4 < BLOCK_SIZE8x8; y4++ )
+         {
+           *d++ = *src_line++ - *ref_line++ ;
+           *d++ = *src_line++ - *ref_line++ ;
+           *d++ = *src_line++ - *ref_line++ ;
+           *d++ = *src_line++ - *ref_line++ ;
+           *d++ = *src_line++ - *ref_line++ ;
+           *d++ = *src_line++ - *ref_line++ ;
+           *d++ = *src_line++ - *ref_line++ ;
+           *d++ = *src_line++ - *ref_line++ ;
+ 
+           ref_line += pad_size_x;
+           src_line += src_size_x;
+         }
+         if ((mcost += HadamardSAD8x8 (diff)) > min_mcost) return mcost;
+       }
+       src_tmp += src_size_mul;
+     }
+   }
+   return mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    SAD computation of weighted samples _with_ Hadamard Transform
+  ************************************************************************
+ */
+ int computeSATDWP(imgpel* src_pic,
+                 int blocksize_y,
+                 int blocksize_x,
+                 int min_mcost,
+                 int cand_x,
+                 int cand_y)
+ {
+   int mcost = 0;
+   int y, x, y4, *d;
+   int weighted_pel;
+   int pad_size_x, src_size_x, src_size_mul;
+   imgpel *src_tmp = src_pic;
+ 
+   if ( !test8x8transform )
+   { // 4x4 TRANSFORM
+     pad_size_x = img_padded_size_x - BLOCK_SIZE;
+     src_size_x = (blocksize_x - BLOCK_SIZE);
+     src_size_mul = blocksize_x * BLOCK_SIZE;
+     for (y = cand_y; y < cand_y + (blocksize_y<<2); y += (BLOCK_SIZE_SP))
+     {
+       for (x=0; x<blocksize_x; x += BLOCK_SIZE)
+       {
+         d    = diff;
+         ref_line = get_line[ref_access_method] (ref_pic_sub.luma, y, cand_x + (x<<2));
+         src_line = src_tmp + x;
+         for (y4 = 0; y4 < BLOCK_SIZE; y4++ )
+         {
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           *d++ = *src_line++ - weighted_pel;
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           *d++ = *src_line++ - weighted_pel;
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           *d++ = *src_line++ - weighted_pel;
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           *d++ = *src_line++ - weighted_pel;
+ 
+           ref_line += pad_size_x;
+           src_line += src_size_x;
+         }
+         if ((mcost += HadamardSAD4x4 (diff)) > min_mcost) return mcost;
+       }
+       src_tmp += src_size_mul;
+     }
+   }
+   else
+   { // 8x8 TRANSFORM
+     pad_size_x = img_padded_size_x - BLOCK_SIZE8x8;
+     src_size_x = (blocksize_x - BLOCK_SIZE8x8);
+     src_size_mul = blocksize_x * BLOCK_SIZE8x8;
+     for (y = cand_y; y < cand_y + (blocksize_y<<2); y += (BLOCK_SIZE8x8_SP) )
+     {
+       for (x=0; x<blocksize_x; x += BLOCK_SIZE8x8 )
+       {
+         d = diff;
+         ref_line  = get_line[ref_access_method] (ref_pic_sub.luma, y, cand_x + (x<<2));
+         src_line = src_tmp + x;
+         for (y4 = 0; y4 < BLOCK_SIZE8x8; y4++ )
+         {
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           *d++ = *src_line++ - weighted_pel;
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           *d++ = *src_line++ - weighted_pel;
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           *d++ = *src_line++ - weighted_pel;
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           *d++ = *src_line++ - weighted_pel;
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           *d++ = *src_line++ - weighted_pel;
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           *d++ = *src_line++ - weighted_pel;
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           *d++ = *src_line++ - weighted_pel;
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           *d++ = *src_line++ - weighted_pel;
+ 
+           ref_line += pad_size_x;
+           src_line += src_size_x;
+         }
+         if ((mcost += HadamardSAD8x8 (diff)) > min_mcost) return mcost;
+       }
+       src_tmp += src_size_mul;
+     }
+   }
+   return mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    BiPred (w/o weights) SATD computation
+  ************************************************************************
+ */
+ int computeBiPredSATD1(imgpel* src_pic,
+                        int blocksize_y,
+                        int blocksize_x,
+                        int min_mcost,
+                        int cand_x1, int cand_y1,
+                        int cand_x2, int cand_y2)
+ {
+   int mcost = 0;
+   int y, x, y4, *d;
+   int pad_size_x, src_size_x, src_size_mul;
+   imgpel *src_tmp = src_pic;
+ 
+   if ( !test8x8transform )
+   { // 4x4 TRANSFORM
+     pad_size_x = img_padded_size_x - BLOCK_SIZE;
+     src_size_x = (blocksize_x - BLOCK_SIZE);
+     src_size_mul = blocksize_x * BLOCK_SIZE;
+     for (y=0; y<(blocksize_y<<2); y += (BLOCK_SIZE_SP))
+     {
+       for (x=0; x<blocksize_x; x += BLOCK_SIZE)
+       {
+         d    = diff;
+         src_line   = src_tmp + x;
+         ref2_line  = get_line[bipred2_access_method] (ref_pic2_sub.luma, cand_y2 + y, cand_x2 + (x<<2));
+         ref1_line  = get_line[bipred1_access_method] (ref_pic1_sub.luma, cand_y1 + y, cand_x1 + (x<<2));
+         for (y4 = 0; y4 < BLOCK_SIZE; y4++ )
+         {
+           *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+ 
+           ref1_line += pad_size_x;
+           ref2_line += pad_size_x;
+           src_line  += src_size_x;
+         }
+         if ((mcost += HadamardSAD4x4 (diff)) > min_mcost) return mcost;
+       }
+       src_tmp += src_size_mul;
+     }
+   }
+   else
+   { // 8x8 TRANSFORM
+     pad_size_x = img_padded_size_x - BLOCK_SIZE8x8;
+     src_size_x = (blocksize_x - BLOCK_SIZE8x8);
+     src_size_mul = blocksize_x * BLOCK_SIZE8x8;
+     for (y=0; y<blocksize_y; y += BLOCK_SIZE8x8 )
+     {
+       int y_pos2 = cand_y2 + (y<<2);
+       int y_pos1 = cand_y1 + (y<<2);
+       for (x=0; x<blocksize_x; x += BLOCK_SIZE8x8 )
+       {
+         d = diff;
+         src_line   = src_tmp + x;
+         ref2_line  = get_line[bipred2_access_method] (ref_pic2_sub.luma, y_pos2, cand_x2 + (x<<2));
+         ref1_line  = get_line[bipred1_access_method] (ref_pic1_sub.luma, y_pos1, cand_x1 + (x<<2));
+         for (y4 = 0; y4 < BLOCK_SIZE8x8; y4++ )
+         {
+           *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+ 
+           ref1_line += pad_size_x;
+           ref2_line += pad_size_x;
+           src_line += src_size_x;
+         }
+         if ((mcost += HadamardSAD8x8 (diff)) > min_mcost) return mcost;
+       }
+       src_tmp += src_size_mul;
+     }
+   }
+   return mcost;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    BiPred (w/ weights) SATD computation
+ ************************************************************************
+ */
+ int computeBiPredSATD2(imgpel* src_pic,
+                        int blocksize_y,
+                        int blocksize_x,
+                        int min_mcost,
+                        int cand_x1, int cand_y1,
+                        int cand_x2, int cand_y2)
+ {
+   int mcost = 0;
+   int y, x, y4, *d;
+   int weighted_pel, pixel1, pixel2;
+   int denom = luma_log_weight_denom + 1;
+   int lround = 2 * wp_luma_round;
+   int pad_size_x, src_size_x, src_size_mul;
+   imgpel *src_tmp = src_pic;
+ 
+   if ( !test8x8transform )
+   { // 4x4 TRANSFORM
+     pad_size_x = img_padded_size_x - BLOCK_SIZE;
+     src_size_x = (blocksize_x - BLOCK_SIZE);
+     src_size_mul = blocksize_x * BLOCK_SIZE;
+     for (y=0; y<(blocksize_y<<2); y += BLOCK_SIZE_SP)
+     {
+       for (x=0; x<blocksize_x; x += BLOCK_SIZE)
+       {
+         d    = diff;
+         src_line   = src_tmp + x;
+         ref2_line  = get_line[bipred2_access_method] (ref_pic2_sub.luma, cand_y2 + y, cand_x2 + (x<<2));
+         ref1_line  = get_line[bipred1_access_method] (ref_pic1_sub.luma, cand_y1 + y, cand_x1 + (x<<2));
+         for (y4 = 0; y4 < BLOCK_SIZE; y4++ )
+         {
+           // 0
+           pixel1 = weight1 * (*ref1_line++);
+           pixel2 = weight2 * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+           *d++ =  (*src_line++) - weighted_pel;
+           // 1
+           pixel1 = weight1 * (*ref1_line++);
+           pixel2 = weight2 * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+           *d++ =  (*src_line++) - weighted_pel;
+           // 2
+           pixel1 = weight1 * (*ref1_line++);
+           pixel2 = weight2 * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+           *d++ =  (*src_line++) - weighted_pel;
+           // 3
+           pixel1 = weight1 * (*ref1_line++);
+           pixel2 = weight2 * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+           *d++ =  (*src_line++) - weighted_pel;
+ 
+           ref1_line += pad_size_x;
+           ref2_line += pad_size_x;
+           src_line  += src_size_x;
+         }
+         if ((mcost += HadamardSAD4x4 (diff)) > min_mcost) return mcost;
+       }
+       src_tmp += src_size_mul;
+     }
+   }
+   else
+   { // 8x8 TRANSFORM
+     pad_size_x = img_padded_size_x - BLOCK_SIZE8x8;
+     src_size_x = (blocksize_x - BLOCK_SIZE8x8);
+     src_size_mul = blocksize_x * BLOCK_SIZE8x8;
+     for (y=0; y<blocksize_y; y += BLOCK_SIZE8x8 )
+     {
+       int y_pos2 = cand_y2 + (y<<2);
+       int y_pos1 = cand_y1 + (y<<2);
+       for (x=0; x<blocksize_x; x += BLOCK_SIZE8x8 )
+       {
+         d = diff;
+         src_line   = src_tmp + x;
+         ref2_line  = get_line[bipred2_access_method] (ref_pic2_sub.luma, y_pos2, cand_x2 + (x<<2));
+         ref1_line  = get_line[bipred1_access_method] (ref_pic1_sub.luma, y_pos1, cand_x1 + (x<<2));
+         for (y4 = 0; y4 < BLOCK_SIZE8x8; y4++ )
+         {
+           // 0
+           pixel1 = weight1 * (*ref1_line++);
+           pixel2 = weight2 * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+           *d++ =  (*src_line++) - weighted_pel;
+           // 1
+           pixel1 = weight1 * (*ref1_line++);
+           pixel2 = weight2 * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+           *d++ =  (*src_line++) - weighted_pel;
+           // 2
+           pixel1 = weight1 * (*ref1_line++);
+           pixel2 = weight2 * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+           *d++ =  (*src_line++) - weighted_pel;
+           // 3
+           pixel1 = weight1 * (*ref1_line++);
+           pixel2 = weight2 * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+           *d++ =  (*src_line++) - weighted_pel;
+           // 4
+           pixel1 = weight1 * (*ref1_line++);
+           pixel2 = weight2 * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+           *d++ =  (*src_line++) - weighted_pel;
+           // 5
+           pixel1 = weight1 * (*ref1_line++);
+           pixel2 = weight2 * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+           *d++ =  (*src_line++) - weighted_pel;
+           // 6
+           pixel1 = weight1 * (*ref1_line++);
+           pixel2 = weight2 * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+           *d++ =  (*src_line++) - weighted_pel;
+           // 7
+           pixel1 = weight1 * (*ref1_line++);
+           pixel2 = weight2 * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+           *d++ =  (*src_line) - weighted_pel;
+ 
+           ref1_line += pad_size_x;
+           ref2_line += pad_size_x;
+           src_line  += src_size_x;
+         }
+         if ((mcost += HadamardSAD8x8 (diff)) > min_mcost) return mcost;
+       }
+       src_tmp += src_size_mul;
+     }
+   }
+   return mcost;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    SSE computation
+ ************************************************************************
+ */
+ 
+ int computeSSE(imgpel* src_pic,
+                int blocksize_y,
+                int blocksize_x,
+                int min_mcost,
+                int cand_x,
+                int cand_y)
+ {
+   int mcost = 0;
+   int y,x4;
+   int *byte_sse = img->quad;
+   int pad_size_x = img_padded_size_x - blocksize_x;
+ 
+   src_line = src_pic;
+   ref_line = get_line[ref_access_method] (ref_pic_sub.luma, cand_y, cand_x);
+   for (y=0; y<blocksize_y; y++)
+   {
+     for (x4 = 0; x4 < blocksize_x; x4+=4)
+     {
+       mcost += byte_sse[ *src_line++ - *ref_line++ ];
+       mcost += byte_sse[ *src_line++ - *ref_line++ ];
+       mcost += byte_sse[ *src_line++ - *ref_line++ ];
+       mcost += byte_sse[ *src_line++ - *ref_line++ ];
+     }
+     if (mcost >= min_mcost) return mcost;
+     ref_line += pad_size_x;
+   }
+ 
+   if ( ChromaMEEnable ) {
+     // calculate chroma conribution to motion compensation error
+     int blocksize_x_c2 = blocksize_x >> shift_cr_x;
+     int blocksize_y_c = blocksize_y >> shift_cr_y;
+     int cr_pad_size_x = img_cr_padded_size_x - blocksize_x_c2;
+     int k;
+ 
+     for (k=0; k<2; k++)
+     {
+       src_line = src_pic + (256 << k);
+       ref_line = get_crline[ref_access_method] ( ref_pic_sub.crcb[k], cand_y, cand_x);
+       for (y=0; y<blocksize_y_c; y++)
+       {
+         for (x4 = 0; x4 < blocksize_x_c2; x4++)
+         {
+           mcost += byte_sse[ *src_line++ - *ref_line++ ];
+           mcost += byte_sse[ *src_line++ - *ref_line++ ];
+         }
+         if (mcost >= min_mcost) return mcost;
+         ref_line += cr_pad_size_x;
+       }
+     }
+   }
+ 
+   return mcost;
+ }
+ 
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    SSE computation of weighted samples
+ ************************************************************************
+ */
+ 
+ int computeSSEWP(imgpel* src_pic,
+                int blocksize_y,
+                int blocksize_x,
+                int min_mcost,
+                int cand_x,
+                int cand_y)
+ {
+   int mcost = 0;
+   int y,x4;
+   int weighted_pel;
+   int *byte_sse = img->quad;
+   int pad_size_x = img_padded_size_x - blocksize_x;
+ 
+   src_line = src_pic;
+   ref_line = get_line[ref_access_method] (ref_pic_sub.luma, cand_y, cand_x);
+   for (y=0; y<blocksize_y; y++)
+   {
+     for (x4 = 0; x4 < blocksize_x; x4+=4)
+     {
+       weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+       mcost += byte_sse[ *src_line++ - weighted_pel ];
+       weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+       mcost += byte_sse[ *src_line++ - weighted_pel ];
+       weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+       mcost += byte_sse[ *src_line++ - weighted_pel ];
+       weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *ref_line++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+       mcost += byte_sse[ *src_line++ - weighted_pel ];
+     }
+     if (mcost >= min_mcost) return mcost;
+     ref_line += pad_size_x;
+   }
+ 
+   if ( ChromaMEEnable ) {
+     // calculate chroma conribution to motion compensation error
+     // These could be made global to reduce computations
+     int blocksize_x_c2 = blocksize_x >> shift_cr_x;
+     int blocksize_y_c = blocksize_y >> shift_cr_y;
+     int cr_pad_size_x = img_cr_padded_size_x - blocksize_x_c2;
+     int k;
+ 
+     for (k=0; k<2; k++)
+     {
+       src_line = src_pic + (256 << k);
+       ref_line = get_crline[ref_access_method] ( ref_pic_sub.crcb[k], cand_y, cand_x);
+       for (y=0; y<blocksize_y_c; y++)
+       {
+ 
+         for (x4 = 0; x4 < blocksize_x_c2; x4++)
+         {
+           weighted_pel = iClip1( img->max_imgpel_value_uv, ((weight_cr[k] * *ref_line++  + wp_chroma_round) >> chroma_log_weight_denom) + offset_cr[k]);
+           mcost += byte_sse[ *src_line++ - weighted_pel ];
+           weighted_pel = iClip1( img->max_imgpel_value_uv, ((weight_cr[k] * *ref_line++  + wp_chroma_round) >> chroma_log_weight_denom) + offset_cr[k]);
+           mcost += byte_sse[ *src_line++ - weighted_pel ];
+         }
+         if (mcost >= min_mcost) return mcost;
+         ref_line += cr_pad_size_x;
+       }
+     }
+   }
+ 
+   return mcost;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    BiPred SSE computation (no weights)
+ ************************************************************************
+ */
+ int computeBiPredSSE1(imgpel* src_pic,
+                       int blocksize_y,
+                       int blocksize_x,
+                       int min_mcost,
+                       int cand_x1, int cand_y1,
+                       int cand_x2, int cand_y2)
+ {
+   int mcost = 0;
+   int bi_diff;
+   int y,x4;
+   int *byte_sse = img->quad;
+   int pad_size_x = img_padded_size_x - blocksize_x;
+ 
+   src_line   = src_pic;
+   ref2_line  = get_line[bipred2_access_method] (ref_pic2_sub.luma, cand_y2, cand_x2);
+   ref1_line  = get_line[bipred1_access_method] (ref_pic1_sub.luma, cand_y1, cand_x1);
+ 
+   for (y = 0; y < blocksize_y; y++)
+   {
+     for (x4 = 0; x4 < blocksize_x; x4+=4)
+     {
+       bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+       mcost += byte_sse[bi_diff];
+       bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+       mcost += byte_sse[bi_diff];
+       bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+       mcost += byte_sse[bi_diff];
+       bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+       mcost += byte_sse[bi_diff];
+     }
+ 
+     if (mcost >= min_mcost) return mcost;
+     ref2_line += pad_size_x;
+     ref1_line += pad_size_x;
+   }
+ 
+   if ( ChromaMEEnable ) {
+     // calculate chroma conribution to motion compensation error
+     int blocksize_x_c2 = blocksize_x >> shift_cr_x;
+     int blocksize_y_c = blocksize_y >> shift_cr_y;
+     int cr_pad_size_x = img_cr_padded_size_x - blocksize_x_c2;
+     int k;
+ 
+     for (k=0; k<2; k++)
+     {
+       src_line = src_pic + (256 << k);
+       ref2_line = get_crline[bipred2_access_method] ( ref_pic2_sub.crcb[k], cand_y2, cand_x2);
+       ref1_line = get_crline[bipred1_access_method] ( ref_pic1_sub.crcb[k], cand_y1, cand_x1);
+ 
+       for (y=0; y<blocksize_y_c; y++)
+       {
+         for (x4 = 0; x4 < blocksize_x_c2; x4++)
+         {
+           bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           mcost += byte_sse[bi_diff];
+           bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
+           mcost += byte_sse[bi_diff];
+         }
+         if (mcost >= min_mcost) return mcost;
+         ref2_line += cr_pad_size_x;
+         ref1_line += cr_pad_size_x;
+       }
+     }
+   }
+ 
+   return mcost;
+ }
+ 
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    BiPred SSE computation (with weights)
+ ************************************************************************
+ */
+ int computeBiPredSSE2(imgpel* src_pic,
+                       int blocksize_y,
+                       int blocksize_x,
+                       int min_mcost,
+                       int cand_x1, int cand_y1,
+                       int cand_x2, int cand_y2)
+ {
+   int mcost = 0;
+   int bi_diff;
+   int denom = luma_log_weight_denom + 1;
+   int lround = 2 * wp_luma_round;
+   int y,x4;
+   int weighted_pel, pixel1, pixel2;
+   int pad_size_x = img_padded_size_x - blocksize_x;
+ 
+   src_line   = src_pic;
+   ref2_line  = get_line[bipred2_access_method] (ref_pic2_sub.luma, cand_y2, cand_x2);
+   ref1_line  = get_line[bipred1_access_method] (ref_pic1_sub.luma, cand_y1, cand_x1);
+   for (y=0; y<blocksize_y; y++)
+   {
+     for (x4 = 0; x4 < blocksize_x; x4+=4)
+     {
+       pixel1 = weight1 * (*ref1_line++);
+       pixel2 = weight2 * (*ref2_line++);
+       weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+       bi_diff = (*src_line++) - weighted_pel;
+       mcost += bi_diff * bi_diff;
+ 
+       pixel1 = weight1 * (*ref1_line++);
+       pixel2 = weight2 * (*ref2_line++);
+       weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+       bi_diff = (*src_line++) - weighted_pel;
+       mcost += bi_diff * bi_diff;
+ 
+       pixel1 = weight1 * (*ref1_line++);
+       pixel2 = weight2 * (*ref2_line++);
+       weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+       bi_diff = (*src_line++) - weighted_pel;
+       mcost += bi_diff * bi_diff;
+ 
+       pixel1 = weight1 * (*ref1_line++);
+       pixel2 = weight2 * (*ref2_line++);
+       weighted_pel =  iClip1( img->max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+       bi_diff = (*src_line++) - weighted_pel;
+       mcost += bi_diff * bi_diff;
+     }
+     if (mcost >= min_mcost) return mcost;
+     ref2_line += pad_size_x;
+     ref1_line += pad_size_x;
+   }
+ 
+   if ( ChromaMEEnable ) {
+     // calculate chroma conribution to motion compensation error
+     int blocksize_x_c2 = blocksize_x >> shift_cr_x;
+     int blocksize_y_c = blocksize_y >> shift_cr_y;
+     int cr_pad_size_x = img_cr_padded_size_x - blocksize_x_c2;
+     int k;
+ 
+     for (k=0; k<2; k++)
+     {
+       src_line = src_pic + (256 << k);
+       ref2_line = get_crline[bipred2_access_method] ( ref_pic2_sub.crcb[k], cand_y2, cand_x2);
+       ref1_line = get_crline[bipred1_access_method] ( ref_pic1_sub.crcb[k], cand_y1, cand_x1);
+ 
+       for (y=0; y<blocksize_y_c; y++)
+       {
+         for (x4 = 0; x4 < blocksize_x_c2; x4++)
+         {
+           pixel1 = weight1_cr[k] * (*ref1_line++);
+           pixel2 = weight2_cr[k] * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value_uv, ((pixel1 + pixel2 + lround) >> denom) + offsetBi_cr[k]);
+           bi_diff = (*src_line++) - weighted_pel;
+           mcost += bi_diff * bi_diff;
+ 
+           pixel1 = weight1_cr[k] * (*ref1_line++);
+           pixel2 = weight2_cr[k] * (*ref2_line++);
+           weighted_pel =  iClip1( img->max_imgpel_value_uv, ((pixel1 + pixel2 + lround) >> denom) + offsetBi_cr[k]);
+           bi_diff = (*src_line++) - weighted_pel;
+           mcost += bi_diff * bi_diff;
+         }
+         if (mcost >= min_mcost) return mcost;
+         ref2_line += cr_pad_size_x;
+         ref1_line += cr_pad_size_x;
+       }
+     }
+   }
+ 
+   return mcost;
+ }
+ 
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/me_distortion.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/me_distortion.h:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/me_distortion.h	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,65 ----
+ /*!
+  ***************************************************************************
+  * \file
+  *    me_distortion.h
+  *
+  * \author
+  *    Alexis Michael Tourapis        <alexis.tourapis at dolby.com>
+  *    Athanasios Leontaris           <aleon at dolby.com>
+  *
+  * \date
+  *    11. August 2006
+  *
+  * \brief
+  *    Headerfile for motion estimation distortion
+  **************************************************************************
+  */
+ 
+ #ifndef _ME_DISTORTION_H_
+ #define _ME_DISTORTION_H_
+ 
+ extern imgpel *(*get_line[2]) (imgpel****, int, int);
+ extern imgpel *(*get_line1[2]) (imgpel****, int, int);
+ extern imgpel *(*get_line2[2]) (imgpel****, int, int);
+ 
+ extern imgpel *(*get_crline[2]) (imgpel****, int, int);
+ extern imgpel *(*get_crline1[2]) (imgpel****, int, int);
+ extern imgpel *(*get_crline2[2]) (imgpel****, int, int);
+ 
+ extern int ref_access_method;
+ extern int bipred1_access_method;
+ extern int bipred2_access_method;
+ 
+ extern SubImageContainer ref_pic_sub;
+ extern SubImageContainer ref_pic1_sub;
+ extern SubImageContainer ref_pic2_sub;
+ 
+ extern short weight1, weight2, offsetBi;
+ extern int weight_luma, weight_cr[2], offset_luma, offset_cr[2];
+ extern short weight1_cr[2], weight2_cr[2], offsetBi_cr[2];
+ extern short img_width, img_height;
+ extern int test8x8transform;
+ extern int ChromaMEEnable;
+ 
+ extern int HadamardSAD4x4(int* diff);
+ extern int HadamardSAD8x8(int* diff);
+ 
+ extern int computeSAD(imgpel* , int, int, int, int, int);
+ extern int computeSADWP(imgpel* , int, int, int, int, int);
+ extern int computeBiPredSAD1(imgpel* , int, int, int, int, int, int, int);
+ extern int computeBiPredSAD2(imgpel* , int, int, int, int, int, int, int);
+ extern int computeSATD(imgpel* , int, int, int, int, int);
+ extern int computeSATDWP(imgpel* , int, int, int, int, int);
+ extern int computeBiPredSATD1(imgpel* , int, int, int, int, int, int, int);
+ extern int computeBiPredSATD2(imgpel* , int, int, int, int, int, int, int);
+ extern int computeSSE(imgpel* , int, int, int, int, int);
+ extern int computeSSEWP(imgpel* , int, int, int, int, int);
+ extern int computeBiPredSSE1(imgpel* , int, int, int, int, int, int, int);
+ extern int computeBiPredSSE2(imgpel* , int, int, int, int, int, int, int);
+ // Function
+ extern int (*computeUniPred[6])(imgpel* , int , int , int , int , int );
+ extern int (*computeBiPred) (imgpel* , int , int , int , int , int, int , int);
+ extern int (*computeBiPred1[3])(imgpel* , int , int , int , int , int, int , int);
+ extern int (*computeBiPred2[3])(imgpel* , int , int , int , int , int, int , int);
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/me_epzs.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/me_epzs.c:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/me_epzs.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,3123 ----
+ 
+ /*!
+ *************************************************************************************
+ * \file me_epzs.c
+ *
+ * \brief
+ *    Motion Estimation using EPZS
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Alexis Michael Tourapis <alexismt at ieee.org>
+ *      - Athanasios Leontaris    <aleon at dolby.com>
+ *
+ *************************************************************************************
+ */
+ 
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ #include <limits.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ #include "image.h"
+ #include "memalloc.h"
+ #include "mb_access.h"
+ #include "refbuf.h"
+ 
+ #include "me_distortion.h"
+ #include "me_epzs.h"
+ 
+ #define EPZSREF 1
+ 
+ extern int *mvbits;
+ extern int *byte_abs;
+ 
+ // Define Global Parameters
+ static const short blk_parent[8] = {1, 1, 1, 1, 2, 4, 4, 5}; //!< {skip, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8, 4x4}
+ //static const short blk_child[8]  = {1, 2, 4, 4, 5, 7, 7, 7}; //!< {skip, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8, 4x4}
+ static const int   minthres_base[8] = {0,  64,  32,  32,  16,  8,  8,  4};
+ static const int   medthres_base[8] = {0, 256, 128, 128,  64, 32, 32, 16};
+ static const int   maxthres_base[8] = {0, 768, 384, 384, 192, 96, 96, 48};
+ static const short search_point_hp[10][2] = {{0,0},{-2,0}, {0,2}, {2,0},  {0,-2}, {-2,2},  {2,2},  {2,-2}, {-2,-2}, {-2,2}};
+ static const short search_point_qp[10][2] = {{0,0},{-1,0}, {0,1}, {1,0},  {0,-1}, {-1,1},  {1,1},  {1,-1}, {-1,-1}, {-1,1}};
+ //static const int   next_subpel_pos_start[5][5] = {};
+ //static const int   next_subpel_pos_end  [5][5] = {};
+ 
+ 
+ 
+ static short EPZSBlkCount;
+ static int   searcharray;
+ static int   mv_rescale;
+ 
+ //! Define EPZS Refinement patterns
+ static int pattern_data[5][12][4] =
+ {
+   { // Small Diamond pattern
+     {  0,  4,  3, 3 }, {  4,  0,  0, 3 }, {  0, -4,  1, 3 }, { -4,  0, 2, 3 }
+   },
+   { // Square pattern
+     {  0,  4,  7, 3 }, {  4,  4,  7, 5 }, {  4,  0,  1, 3 }, {  4, -4, 1, 5 },
+     {  0, -4,  3, 3 }, { -4, -4,  3, 5 }, { -4,  0,  5, 3 }, { -4,  4, 5, 5 }
+   },
+   { // Enhanced Diamond pattern
+     { -4,  4, 10, 5 }, {  0,  8, 10, 8 }, {  0,  4, 10, 7 }, {  4,  4, 1, 5 },
+     {  8,  0, 1,  8 }, {  4,  0,  1, 7 }, {  4, -4,  4, 5 }, {  0, -8, 4, 8 },
+     {  0, -4, 4,  7 }, { -4, -4, 7,  5 }, { -8,  0,  7, 8 }, { -4,  0, 7, 7 }
+ 
+   },
+   { // Large Diamond pattern
+     {  0,  8, 6,  5 }, {  4,  4, 0,  3 }, {  8,  0, 0,  5 }, {  4, -4, 2, 3 },
+     {  0, -8, 2,  5 }, { -4, -4, 4,  3 }, { -8,  0, 4,  5 }, { -4,  4, 6, 3 }
+   },
+   { // Extended Subpixel pattern
+     {  0,  8, 6, 12 }, {  4,  4, 0, 12 }, {  8,  0, 0, 12 }, {  4, -4, 2, 12 },
+     {  0, -8, 2, 12 }, { -4, -4, 4, 12 }, { -8,  0, 4, 12 }, { -4,  4, 6, 12 },
+     {  0,  2, 6, 12 }, {  2,  0, 0, 12 }, {  0, -2, 2, 12 }, { -2,  0, 4, 12 }
+   }
+ };
+ 
+ // Other definitions
+ const  char c_EPZSPattern[6][20]    = { "Diamond", "Square", "Extended Diamond", "Large Diamond", "SBP Large Diamond", "PMVFAST"};
+ const  char c_EPZSDualPattern[7][20] = { "Disabled","Diamond", "Square", "Extended Diamond", "Large Diamond", "SBP Large Diamond", "PMVFAST"};
+ const  char c_EPZSFixed[3][20] = { "Disabled","All P", "All P + B"};
+ const  char c_EPZSOther[2][20] = { "Disabled","Enabled"};
+ 
+ static int medthres[8];
+ static int maxthres[8];
+ static int minthres[8];
+ static int subthres[8];
+ static int mv_scale[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES];
+ 
+ static short **EPZSMap;  //!< Memory Map definition
+ int ***EPZSDistortion;  //!< Array for storing SAD Values
+ #if EPZSREF
+ short ******EPZSMotion;  //!< Array for storing Motion Vectors
+ #else
+ short *****EPZSMotion;  //!< Array for storing Motion Vectors
+ #endif
+ 
+ //
+ EPZSStructure *searchPattern,*searchPatternD, *predictor;
+ EPZSStructure *window_predictor, *window_predictor_extended;
+ EPZSStructure *sdiamond,*square,*ediamond,*ldiamond, *sbdiamond, *pmvfast;
+ EPZSColocParams *EPZSCo_located;
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Allocate co-located memory
+ *
+ * \param size_x
+ *    horizontal luma size
+ * \param size_y
+ *    vertical luma size
+ * \param mb_adaptive_frame_field_flag
+ *    flag that indicates macroblock adaptive frame/field coding
+ *
+ * \return
+ *    the allocated EPZSColocParams structure
+ ************************************************************************
+ */
+ EPZSColocParams* allocEPZScolocated(int size_x, int size_y, int mb_adaptive_frame_field_flag)
+ {
+   EPZSColocParams *s;
+ 
+   s = calloc(1, sizeof(EPZSColocParams));
+   if (NULL == s)
+     no_mem_exit("alloc_EPZScolocated: s");
+ 
+   s->size_x = size_x;
+   s->size_y = size_y;
+   get_mem4Dshort (&(s->mv), 2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE, 2);
+ 
+   if (mb_adaptive_frame_field_flag)
+   {
+     get_mem4Dshort (&(s->top_mv),   2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE, 2);
+     get_mem4Dshort (&(s->bottom_mv),2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE, 2);
+   }
+ 
+   s->mb_adaptive_frame_field_flag  = mb_adaptive_frame_field_flag;
+ 
+   return s;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Free co-located memory.
+ *
+ * \param p
+ *    structure to be freed
+ *
+ ************************************************************************
+ */
+ void freeEPZScolocated(EPZSColocParams* p)
+ {
+   if (p)
+   {
+     free_mem4Dshort (p->mv, 2, p->size_y / BLOCK_SIZE);
+ 
+     if (p->mb_adaptive_frame_field_flag)
+     {
+       free_mem4Dshort (p->top_mv, 2, p->size_y / BLOCK_SIZE / 2);
+       free_mem4Dshort (p->bottom_mv, 2, p->size_y / BLOCK_SIZE / 2);
+     }
+ 
+     free(p);
+ 
+     p=NULL;
+   }
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Allocate EPZS pattern memory
+ *
+ * \param searchpoints
+ *    number of searchpoints to allocate
+ *
+ * \return
+ *    the allocated EPZSStructure structure
+ ************************************************************************
+ */
+ EPZSStructure* allocEPZSpattern(int searchpoints)
+ {
+   EPZSStructure *s;
+ 
+   s = calloc(1, sizeof(EPZSStructure));
+   if (NULL == s)
+     no_mem_exit("alloc_EPZSpattern: s");
+ 
+   s->searchPoints = searchpoints;
+   s->point = (SPoint*) calloc(searchpoints, sizeof(SPoint));
+ 
+   return s;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Free EPZS pattern memory.
+ *
+ * \param p
+ *    structure to be freed
+ *
+ ************************************************************************
+ */
+ void freeEPZSpattern(EPZSStructure* p)
+ {
+   if (p)
+   {
+     free ( (SPoint*) p->point);
+     free(p);
+     p=NULL;
+   }
+ }
+ 
+ void assignEPZSpattern(EPZSStructure *pattern,int type, int stopSearch, int nextLast, EPZSStructure *nextpattern)
+ {
+   int i;
+ 
+   for (i = 0; i < pattern->searchPoints; i++)
+   {
+     pattern->point[i].mv[0]       = pattern_data[type][i][0] >> mv_rescale;
+     pattern->point[i].mv[1]       = pattern_data[type][i][1] >> mv_rescale;
+     pattern->point[i].start_nmbr  = pattern_data[type][i][2];
+     pattern->point[i].next_points = pattern_data[type][i][3];
+   }
+   pattern->stopSearch = stopSearch;
+   pattern->nextLast = nextLast;
+   pattern->nextpattern = nextpattern;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    calculate RoundLog2(uiVal)
+ ************************************************************************
+ */
+ static int RoundLog2 (int iValue)
+ {
+   int iRet = 0;
+   int iValue_square = iValue * iValue;
+ 
+   while ((1 << (iRet + 1)) <= iValue_square)
+     iRet++;
+ 
+   iRet = (iRet + 1) >> 1;
+ 
+   return iRet;
+ }
+ 
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    EPZS Search Window Predictor Initialization
+ ************************************************************************
+ */
+ void EPZSWindowPredictorInit (short search_range, EPZSStructure * predictor, short mode)
+ {
+   int pos;
+   int searchpos, fieldsearchpos;
+   int prednum = 0;
+   int i;
+   int search_range_qpel = input->EPZSSubPelGrid ? 2 : 0;
+   if (mode == 0)
+   {
+     for (pos = RoundLog2 (search_range) - 2; pos > -1; pos--)
+     {
+       searchpos = ((search_range << search_range_qpel)>> pos);
+ 
+       for (i=1; i>=-1; i-=2)
+       {
+         predictor->point[prednum  ].mv[0] =  i * searchpos;
+         predictor->point[prednum++].mv[1] =  0;
+         predictor->point[prednum  ].mv[0] =  i * searchpos;
+         predictor->point[prednum++].mv[1] =  i * searchpos;
+         predictor->point[prednum  ].mv[0] =  0;
+         predictor->point[prednum++].mv[1] =  i * searchpos;
+         predictor->point[prednum  ].mv[0] = -i * searchpos;
+         predictor->point[prednum++].mv[1] =  i * searchpos;
+       }
+     }
+   }
+   else // if (mode == 0)
+   {
+     for (pos = RoundLog2 (search_range) - 2; pos > -1; pos--)
+     {
+       searchpos = ((search_range << search_range_qpel) >> pos);
+       fieldsearchpos = ((3 * searchpos + 1) << search_range_qpel) >> 1;
+ 
+       for (i=1; i>=-1; i-=2)
+       {
+         predictor->point[prednum  ].mv[0] =  i * searchpos;
+         predictor->point[prednum++].mv[1] =  0;
+         predictor->point[prednum  ].mv[0] =  i * searchpos;
+         predictor->point[prednum++].mv[1] =  i * searchpos;
+         predictor->point[prednum  ].mv[0] =  0;
+         predictor->point[prednum++].mv[1] =  i * searchpos;
+         predictor->point[prednum  ].mv[0] = -i * searchpos;
+         predictor->point[prednum++].mv[1] =  i * searchpos;
+       }
+ 
+       for (i=1; i>=-1; i-=2)
+       {
+         predictor->point[prednum  ].mv[0] =  i * fieldsearchpos;
+         predictor->point[prednum++].mv[1] = -i * searchpos;
+         predictor->point[prednum  ].mv[0] =  i * fieldsearchpos;
+         predictor->point[prednum++].mv[1] =  0;
+         predictor->point[prednum  ].mv[0] =  i * fieldsearchpos;
+         predictor->point[prednum++].mv[1] =  i * searchpos;
+         predictor->point[prednum  ].mv[0] =  i * searchpos;
+         predictor->point[prednum++].mv[1] =  i * fieldsearchpos;
+         predictor->point[prednum  ].mv[0] =  0;
+         predictor->point[prednum++].mv[1] =  i * fieldsearchpos;
+         predictor->point[prednum  ].mv[0] = -i * searchpos;
+         predictor->point[prednum++].mv[1] =  i * fieldsearchpos;
+       }
+     }
+   }
+   predictor->searchPoints = prednum;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    EPZS Global Initialization
+ ************************************************************************
+ */
+ int
+ EPZSInit (void)
+ {
+   int pel_error_me = 1 << (img->bitdepth_luma - 8);
+   int i, memory_size = 0;
+   int searchlevels = RoundLog2 (input->search_range) - 1;
+ 
+   searcharray = input->BiPredMotionEstimation? (2 * imax (input->search_range, input->BiPredMESearchRange) + 1) << (2 * input->EPZSSubPelGrid) : (2 * input->search_range + 1)<< (2 * input->EPZSSubPelGrid);
+ 
+   mv_rescale = input->EPZSSubPelGrid ? 0 : 2;
+   //! In this implementation we keep threshold limits fixed.
+   //! However one could adapt these limits based on lagrangian
+   //! optimization considerations (i.e. qp), while also allow
+   //! adaptation of the limits themselves based on content or complexity.
+   for (i=0;i<8;i++)
+   {
+     medthres[i] = input->EPZSMedThresScale * medthres_base[i] * pel_error_me;
+     maxthres[i] = input->EPZSMaxThresScale * maxthres_base[i] * pel_error_me;
+     minthres[i] = input->EPZSMinThresScale * minthres_base[i] * pel_error_me;
+     subthres[i] = input->EPZSSubPelThresScale * medthres_base[i] * pel_error_me;
+   }
+ 
+   //! Definition of pottential EPZS patterns.
+   //! It is possible to also define other patterns, or even use
+   //! resizing patterns (such as the PMVFAST scheme. These patterns
+   //! are only shown here as reference, while the same also holds
+   //! for this implementation (i.e. new conditions could be added
+   //! on adapting predictors, or thresholds etc. Note that search
+   //! could also be performed on subpel positions directly while
+   //! pattern needs not be restricted on integer positions only.
+ 
+   //! Allocate memory and assign search patterns
+   sdiamond = allocEPZSpattern(4);
+   assignEPZSpattern(sdiamond, SDIAMOND, TRUE, TRUE, sdiamond);
+   square = allocEPZSpattern(8);
+   assignEPZSpattern(square, SQUARE, TRUE, TRUE, square);
+   ediamond = allocEPZSpattern(12);
+   assignEPZSpattern(ediamond, EDIAMOND, TRUE, TRUE, ediamond);
+   ldiamond = allocEPZSpattern(8);
+   assignEPZSpattern(ldiamond, LDIAMOND, TRUE, TRUE, ldiamond);
+   sbdiamond = allocEPZSpattern(12);
+   assignEPZSpattern(sbdiamond, SBDIAMOND, FALSE, TRUE, sdiamond);
+   pmvfast = allocEPZSpattern(8);
+   assignEPZSpattern(pmvfast, LDIAMOND, FALSE, TRUE, sdiamond);
+ 
+   //! Allocate and assign window based predictors.
+   //! Other window types could also be used, while method could be
+   //! made a bit more adaptive (i.e. patterns could be assigned
+   //! based on neighborhood
+   window_predictor = allocEPZSpattern(searchlevels * 8);
+   window_predictor_extended = allocEPZSpattern(searchlevels * 20);
+   EPZSWindowPredictorInit ((short) input->search_range, window_predictor, 0);
+   EPZSWindowPredictorInit ((short) input->search_range, window_predictor_extended, 1);
+   //! Also assing search predictor memory
+   // maxwindow + spatial + blocktype + temporal + memspatial
+   predictor = allocEPZSpattern(searchlevels * 20 + 5 + 5 + 9 * (input->EPZSTemporal) + 3 * (input->EPZSSpatialMem));
+ 
+   //! Finally assign memory for all other elements
+   //! (distortion, EPZSMap, and temporal predictors)
+ 
+   memory_size += get_mem3Dint (&EPZSDistortion, 6, 7, img->width/BLOCK_SIZE);
+   memory_size += get_mem2Dshort (&EPZSMap, searcharray, searcharray );
+   if (input->EPZSSpatialMem)
+   {
+ #if EPZSREF
+     memory_size += get_mem6Dshort (&EPZSMotion, 6, img->max_num_references, 7, 4, img->width/BLOCK_SIZE, 2);
+ #else
+     memory_size += get_mem5Dshort (&EPZSMotion, 6, 7, 4, img->width/BLOCK_SIZE, 2);
+ #endif
+   }
+ 
+   if (input->EPZSTemporal)
+     EPZSCo_located = allocEPZScolocated (img->width, img->height,
+     active_sps->mb_adaptive_frame_field_flag);
+ 
+   switch (input->EPZSPattern)
+   {
+   case 5:
+     searchPattern = pmvfast;
+     break;
+   case 4:
+     searchPattern = sbdiamond;
+     break;
+   case 3:
+     searchPattern = ldiamond;
+     break;
+   case 2:
+     searchPattern = ediamond;
+     break;
+   case 1:
+     searchPattern = square;
+     break;
+   case 0:
+   default:
+     searchPattern = sdiamond;
+     break;
+   }
+ 
+   switch (input->EPZSDual)
+   {
+   case 6:
+     searchPatternD = pmvfast;
+     break;
+   case 5:
+     searchPatternD = sbdiamond;
+     break;
+   case 4:
+     searchPatternD = ldiamond;
+     break;
+   case 3:
+     searchPatternD = ediamond;
+     break;
+   case 2:
+     searchPatternD = square;
+     break;
+   case 1:
+   default:
+     searchPatternD = sdiamond;
+     break;
+   }
+ 
+   return memory_size;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Delete EPZS Alocated memory
+ ************************************************************************
+ */
+ void EPZSDelete (void)
+ {
+   if (input->EPZSTemporal)
+     freeEPZScolocated (EPZSCo_located);
+ 
+   free_mem2Dshort(EPZSMap);
+   free_mem3Dint  (EPZSDistortion, 6);
+   freeEPZSpattern(window_predictor_extended);
+   freeEPZSpattern(window_predictor);
+   freeEPZSpattern(predictor);
+   // Free search patterns
+   freeEPZSpattern(pmvfast);
+   freeEPZSpattern(sbdiamond);
+   freeEPZSpattern(ldiamond);
+   freeEPZSpattern(ediamond);
+   freeEPZSpattern(sdiamond);
+   freeEPZSpattern(square);
+   if (input->EPZSSpatialMem)
+   {
+ #if EPZSREF
+     free_mem6Dshort (EPZSMotion, 6, img->max_num_references, 7, 4);
+ #else
+     free_mem5Dshort (EPZSMotion, 6, 7, 4);
+ #endif
+   }
+ 
+ }
+ 
+ //! For ME purposes restricting the co-located partition is not necessary.
+ /*!
+ ************************************************************************
+ * \brief
+ *    EPZS Slice Level Initialization
+ ************************************************************************
+ */
+ void
+ EPZSSliceInit (EPZSColocParams * p,
+                StorablePicture ** listX[6])
+ {
+   StorablePicture *fs, *fs_top, *fs_bottom;
+   StorablePicture *fs1, *fs_top1, *fs_bottom1, *fsx;
+   int i, j, k, jj, jdiv, loffset;
+   int prescale, iTRb, iTRp;
+   int list = img->type == B_SLICE ? LIST_1 : LIST_0;
+   int tempmv_scale[2];
+   int epzs_scale[2][6][MAX_LIST_SIZE];
+   int iref;
+   int invmv_precision = 8;
+ 
+   // Lets compute scaling factoes between all references in lists.
+   // Needed to scale spatial predictors.
+   for (j = LIST_0; j < 2 + (img->MbaffFrameFlag * 4); j ++)
+   {
+     for (k = 0; k < listXsize[j]; k++)
+     {
+       for (i = 0; i < listXsize[j]; i++)
+       {
+         if (j/2 == 0)
+         {
+           iTRb = iClip3 (-128, 127, enc_picture->poc - listX[j][i]->poc);
+           iTRp = iClip3 (-128, 127, enc_picture->poc - listX[j][k]->poc);
+         }
+         else if (j/2 == 1)
+         {
+           iTRb = iClip3 (-128, 127, enc_picture->top_poc - listX[j][i]->poc);
+           iTRp = iClip3 (-128, 127, enc_picture->top_poc - listX[j][k]->poc);
+         }
+         else
+         {
+           iTRb = iClip3 (-128, 127, enc_picture->bottom_poc - listX[j][i]->poc);
+           iTRp = iClip3 (-128, 127, enc_picture->bottom_poc - listX[j][k]->poc);
+         }
+ 
+         if (iTRp != 0)
+         {
+           prescale = (16384 + iabs (iTRp / 2)) / iTRp;
+           mv_scale[j][i][k] = iClip3 (-2048, 2047, rshift_rnd_sf((iTRb * prescale), 6));
+         }
+         else
+           mv_scale[j][i][k] = 256;
+       }
+     }
+   }
+ 
+   if (input->EPZSTemporal)
+   {
+     fs_top = fs_bottom = fs = listX[list][0];
+     if (listXsize[list]> 1)
+       fs_top1 = fs_bottom1 = fs1 = listX[list][1];
+     else
+       fs_top1 = fs_bottom1 = fs1 = listX[list][0];
+ 
+     for (j = 0; j < 6; j++)
+     {
+       for (i = 0; i < 6; i++)
+       {
+         epzs_scale[0][j][i] = 256;
+         epzs_scale[1][j][i] = 256;
+       }
+     }
+ 
+     for (j = 0; j < 2 + (img->MbaffFrameFlag * 4); j += 2)
+     {
+       for (i = 0; i < listXsize[j]; i++)
+       {
+         if (j == 0)
+           iTRb = iClip3 (-128, 127, enc_picture->poc - listX[LIST_0 + j][i]->poc);
+         else if (j == 2)
+           iTRb = iClip3 (-128, 127, enc_picture->top_poc - listX[LIST_0 + j][i]->poc);
+         else
+           iTRb = iClip3 (-128, 127, enc_picture->bottom_poc - listX[LIST_0 + j][i]->poc);
+         iTRp = iClip3 (-128, 127, listX[list + j][0]->poc - listX[LIST_0 + j][i]->poc);
+         if (iTRp != 0)
+         {
+           prescale = (16384 + iabs (iTRp / 2)) / iTRp;
+           prescale = iClip3 (-2048, 2047, rshift_rnd_sf((iTRb * prescale), 6));
+           //prescale = (iTRb * prescale + 32) >> 6;
+         }
+         else      // This could not happen but lets use it in case that reference is removed.
+           prescale = 256;
+         epzs_scale[0][j][i] = rshift_rnd_sf((mv_scale[j][0][i] * prescale), 8);
+         epzs_scale[0][j + 1][i] = prescale - 256;
+         if (listXsize[list + j]>1)
+         {
+           iTRp = iClip3 (-128, 127, listX[list + j][1]->poc - listX[LIST_0 + j][i]->poc);
+           if (iTRp != 0)
+           {
+             prescale = (16384 + iabs (iTRp / 2)) / iTRp;
+             prescale = iClip3 (-2048, 2047, rshift_rnd_sf((iTRb * prescale), 6));
+             //prescale = (iTRb * prescale + 32) >> 6;
+           }
+           else      // This could not happen but lets use it for case that reference is removed.
+             prescale = 256;
+           epzs_scale[1][j][i] = rshift_rnd_sf((mv_scale[j][1][i] * prescale), 8);
+           epzs_scale[1][j + 1][i] = prescale - 256;
+         }
+         else
+         {
+           epzs_scale[1][j][i] = epzs_scale[0][j][i];
+           epzs_scale[1][j + 1][i] = epzs_scale[0][j + 1][i];
+         }
+       }
+     }
+     if (img->MbaffFrameFlag)
+     {
+       fs_top = listX[list + 2][0];
+       fs_bottom = listX[list + 4][0];
+       if (listXsize[0]> 1)
+       {
+         fs_top1 = listX[list + 2][1];
+         fs_bottom = listX[list + 4][1];
+       }
+     }
+     else
+     {
+       if (img->structure != FRAME)
+       {
+         if ((img->structure != fs->structure) && (fs->coded_frame))
+         {
+           if (img->structure == TOP_FIELD)
+           {
+             fs_top = fs_bottom = fs = listX[list][0]->top_field;
+             fs_top1 = fs_bottom1 = fs1 = listX[list][0]->bottom_field;
+           }
+           else
+           {
+             fs_top = fs_bottom = fs = listX[list][0]->bottom_field;
+             fs_top1 = fs_bottom1 = fs1 = listX[list][0]->top_field;
+           }
+         }
+       }
+     }
+ 
+     //if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)
+     if (!active_sps->frame_mbs_only_flag)
+     {
+       for (j = 0; j < fs->size_y >> 2; j++)
+       {
+         jj = j / 2;
+         jdiv = j / 2 + 4 * (j / 8);
+         for (i = 0; i < fs->size_x >> 2; i++)
+         {
+           if (img->MbaffFrameFlag && fs->field_frame[j][i])
+           {
+             //! Assign frame buffers for field MBs
+             //! Check whether we should use top or bottom field mvs.
+             //! Depending on the assigned poc values.
+             if (iabs (enc_picture->poc - fs_bottom->poc) > iabs (enc_picture->poc - fs_top->poc))
+             {
+               tempmv_scale[LIST_0] = 256;
+               tempmv_scale[LIST_1] = 0;
+ 
+               if (fs->ref_id [LIST_0][jdiv][i] < 0 && listXsize[LIST_0] > 1)
+               {
+                 fsx = fs_top1;
+                 loffset = 1;
+               }
+               else
+               {
+                 fsx = fs_top;
+                 loffset = 0;
+               }
+ 
+               if (fs->ref_id [LIST_0][jdiv][i] != -1)
+               {
+                 for (iref = 0; iref < imin(img->num_ref_idx_l0_active,listXsize[LIST_0]); iref++)
+                 {
+                   if (enc_picture->ref_pic_num[LIST_0][iref]==fs->ref_id [LIST_0][jdiv][i])
+                   {
+                     tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0][iref];
+                     tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1][iref];
+                     break;
+                   }
+                 }
+                 p->mv[LIST_0][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][jj][i][0]), invmv_precision));
+                 p->mv[LIST_0][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][jj][i][1]), invmv_precision));
+                 p->mv[LIST_1][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][jj][i][0]), invmv_precision));
+                 p->mv[LIST_1][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][jj][i][1]), invmv_precision));
+               }
+               else
+               {
+                 p->mv[LIST_0][j][i][0] = 0;
+                 p->mv[LIST_0][j][i][1] = 0;
+                 p->mv[LIST_1][j][i][0] = 0;
+                 p->mv[LIST_1][j][i][1] = 0;
+               }
+ 
+             }
+             else
+             {
+               tempmv_scale[LIST_0] = 256;
+               tempmv_scale[LIST_1] = 0;
+ 
+               if (fs->ref_id [LIST_0][jdiv + 4][i] < 0 && listXsize[LIST_0] > 1)
+               {
+                 fsx = fs_bottom1;
+                 loffset = 1;
+               }
+               else
+               {
+                 fsx = fs_bottom;
+                 loffset = 0;
+               }
+ 
+               if (fs->ref_id [LIST_0][jdiv + 4][i] != -1)
+               {
+                 for (iref = 0; iref < imin(img->num_ref_idx_l0_active,listXsize[LIST_0]); iref++)
+                 {
+                   if (enc_picture->ref_pic_num[LIST_0][iref]==fs->ref_id [LIST_0][jdiv + 4][i])
+                   {
+                     tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0][iref];
+                     tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1][iref];
+                     break;
+                   }
+                 }
+                 p->mv[LIST_0][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][jj][i][0]), invmv_precision));
+                 p->mv[LIST_0][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][jj][i][1]), invmv_precision));
+                 p->mv[LIST_1][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][jj][i][0]), invmv_precision));
+                 p->mv[LIST_1][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][jj][i][1]), invmv_precision));
+               }
+               else
+               {
+                 p->mv[LIST_0][j][i][0] = 0;
+                 p->mv[LIST_0][j][i][1] = 0;
+                 p->mv[LIST_1][j][i][0] = 0;
+                 p->mv[LIST_1][j][i][1] = 0;
+               }
+             }
+           }
+           else
+           {
+             tempmv_scale[LIST_0] = 256;
+             tempmv_scale[LIST_1] = 0;
+             if (fs->ref_id [LIST_0][j][i] < 0 && listXsize[LIST_0] > 1)
+             {
+               fsx = fs1;
+               loffset = 1;
+             }
+             else
+             {
+               fsx = fs;
+               loffset = 0;
+             }
+ 
+             if (fsx->ref_id [LIST_0][j][i] != -1)
+             {
+               for (iref = 0; iref < imin(img->num_ref_idx_l0_active,listXsize[LIST_0]); iref++)
+               {
+                 if (enc_picture->ref_pic_num[LIST_0][iref]==fsx->ref_id [LIST_0][j][i])
+                 {
+                   tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0][iref];
+                   tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1][iref];
+                   break;
+                 }
+               }
+               p->mv[LIST_0][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][0]), invmv_precision));
+               p->mv[LIST_0][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][1]), invmv_precision));
+               p->mv[LIST_1][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][0]), invmv_precision));
+               p->mv[LIST_1][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][1]), invmv_precision));
+             }
+             else
+             {
+               p->mv[LIST_0][j][i][0] = 0;
+               p->mv[LIST_0][j][i][1] = 0;
+               p->mv[LIST_1][j][i][0] = 0;
+               p->mv[LIST_1][j][i][1] = 0;
+             }
+           }
+         }
+       }
+     }
+ 
+     //! Generate field MVs from Frame MVs
+     if (img->structure || img->MbaffFrameFlag)
+     {
+       for (j = 0; j < fs->size_y / 8; j++)
+       {
+         for (i = 0; i < fs->size_x / 4; i++)
+         {
+           if (!img->MbaffFrameFlag)
+           {
+             tempmv_scale[LIST_0] = 256;
+             tempmv_scale[LIST_1] = 0;
+ 
+             if (fs->ref_id [LIST_0][j][i] < 0 && listXsize[LIST_0] > 1)
+             {
+               fsx = fs1;
+               loffset = 1;
+             }
+             else
+             {
+               fsx = fs;
+               loffset = 0;
+             }
+ 
+             if (fsx->ref_id [LIST_0][j][i] != -1)
+             {
+               for (iref = 0; iref < imin(img->num_ref_idx_l0_active,listXsize[LIST_0]); iref++)
+               {
+                 if (enc_picture->ref_pic_num[LIST_0][iref]==fsx->ref_id [LIST_0][j][i])
+                 {
+                   tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0][iref];
+                   tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1][iref];
+                   break;
+                 }
+               }
+               p->mv[LIST_0][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][0]), invmv_precision));
+               p->mv[LIST_0][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][1]), invmv_precision));
+               p->mv[LIST_1][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][0]), invmv_precision));
+               p->mv[LIST_1][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][1]), invmv_precision));
+             }
+             else
+             {
+               p->mv[LIST_0][j][i][0] = 0;
+               p->mv[LIST_0][j][i][1] = 0;
+               p->mv[LIST_1][j][i][0] = 0;
+               p->mv[LIST_1][j][i][1] = 0;
+             }
+           }
+           else
+           {
+             tempmv_scale[LIST_0] = 256;
+             tempmv_scale[LIST_1] = 0;
+ 
+             if (fs_bottom->ref_id [LIST_0][j][i] < 0 && listXsize[LIST_0] > 1)
+             {
+               fsx = fs_bottom1;
+               loffset = 1;
+             }
+             else
+             {
+               fsx = fs_bottom;
+               loffset = 0;
+             }
+ 
+             if (fsx->ref_id [LIST_0][j][i] != -1)
+             {
+               for (iref = 0; iref < imin(2*img->num_ref_idx_l0_active,listXsize[LIST_0 + 4]); iref++)
+               {
+                 if (enc_picture->ref_pic_num[LIST_0 + 4][iref]==fsx->ref_id [LIST_0][j][i])
+                 {
+                   tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0 + 4][iref];
+                   tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1 + 4][iref];
+                   break;
+                 }
+               }
+               p->bottom_mv[LIST_0][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][0]), invmv_precision));
+               p->bottom_mv[LIST_0][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][1]), invmv_precision));
+               p->bottom_mv[LIST_1][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][0]), invmv_precision));
+               p->bottom_mv[LIST_1][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][1]), invmv_precision));
+             }
+             else
+             {
+               p->bottom_mv[LIST_0][j][i][0] = 0;
+               p->bottom_mv[LIST_0][j][i][1] = 0;
+               p->bottom_mv[LIST_1][j][i][0] = 0;
+               p->bottom_mv[LIST_1][j][i][1] = 0;
+             }
+ 
+             if (!fs->field_frame[2 * j][i])
+             {
+               p->bottom_mv[LIST_0][j][i][1] = (p->bottom_mv[LIST_0][j][i][1] + 1) >> 1;
+               p->bottom_mv[LIST_1][j][i][1] = (p->bottom_mv[LIST_1][j][i][1] + 1) >> 1;
+             }
+ 
+             tempmv_scale[LIST_0] = 256;
+             tempmv_scale[LIST_1] = 0;
+ 
+             if (fs_top->ref_id [LIST_0][j][i] < 0 && listXsize[LIST_0] > 1)
+             {
+               fsx = fs_top1;
+               loffset = 1;
+             }
+             else
+             {
+               fsx = fs_top;
+               loffset = 0;
+             }
+ 
+             if (fsx->ref_id [LIST_0][j][i] != -1)
+             {
+               for (iref = 0; iref < imin(2*img->num_ref_idx_l0_active,listXsize[LIST_0 + 2]); iref++)
+               {
+                 if (enc_picture->ref_pic_num[LIST_0 + 2][iref]==fsx->ref_id [LIST_0][j][i])
+                 {
+                   tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0 + 2][iref];
+                   tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1 + 2][iref];
+                   break;
+                 }
+               }
+               p->top_mv[LIST_0][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][0]), invmv_precision));
+               p->top_mv[LIST_0][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][1]), invmv_precision));
+               p->top_mv[LIST_1][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][0]), invmv_precision));
+               p->top_mv[LIST_1][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][1]), invmv_precision));
+             }
+             else
+             {
+               p->top_mv[LIST_0][j][i][0] = 0;
+               p->top_mv[LIST_0][j][i][1] = 0;
+               p->top_mv[LIST_1][j][i][0] = 0;
+               p->top_mv[LIST_1][j][i][1] = 0;
+             }
+ 
+             if (!fs->field_frame[2 * j][i])
+             {
+               p->top_mv[LIST_0][j][i][1] = rshift_rnd_sf((p->top_mv[LIST_0][j][i][1]), 1);
+               p->top_mv[LIST_1][j][i][1] = rshift_rnd_sf((p->top_mv[LIST_1][j][i][1]), 1);
+             }
+           }
+         }
+       }
+     }
+ 
+     //if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)
+     if (!active_sps->frame_mbs_only_flag )
+     {
+       //! Use inference flag to remap mvs/references
+       //! Frame with field co-located
+       if (!img->structure)
+       {
+         for (j = 0; j < fs->size_y >> 2; j++)
+         {
+           jj = j>>1;
+           jdiv = (j>>1) + ((j>>3) << 2);
+           for (i = 0; i < fs->size_x >> 2; i++)
+           {
+             if (fs->field_frame[j][i])
+             {
+               tempmv_scale[LIST_0] = 256;
+               tempmv_scale[LIST_1] = 0;
+ 
+               if (fs->ref_id [LIST_0][jdiv][i] < 0 && listXsize[LIST_0] > 1)
+               {
+                 fsx = fs1;
+                 loffset = 1;
+               }
+               else
+               {
+                 fsx = fs;
+                 loffset = 0;
+               }
+               if (fsx->ref_id [LIST_0][jdiv][i] != -1)
+               {
+                 for (iref = 0; iref < imin(img->num_ref_idx_l0_active,listXsize[LIST_0]); iref++)
+                 {
+                   if (enc_picture->ref_pic_num[LIST_0][iref]==fsx->ref_id [LIST_0][jdiv][i])
+                   {
+                     tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0][iref];
+                     tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1][iref];
+                     break;
+                   }
+                 }
+ 
+                 if (iabs (enc_picture->poc - fsx->bottom_field->poc) > iabs (enc_picture->poc - fsx->top_field->poc))
+                 {
+                   p->mv[LIST_0][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->top_field->mv[LIST_0][jj][i][0]), invmv_precision));
+                   p->mv[LIST_0][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->top_field->mv[LIST_0][jj][i][1]), invmv_precision));
+                   p->mv[LIST_1][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->top_field->mv[LIST_0][jj][i][0]), invmv_precision));
+                   p->mv[LIST_1][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->top_field->mv[LIST_0][jj][i][1]), invmv_precision));
+                 }
+                 else
+                 {
+                   p->mv[LIST_0][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->bottom_field->mv[LIST_0][jj][i][0]), invmv_precision));
+                   p->mv[LIST_0][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->bottom_field->mv[LIST_0][jj][i][1]), invmv_precision));
+                   p->mv[LIST_1][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->bottom_field->mv[LIST_0][jj][i][0]), invmv_precision));
+                   p->mv[LIST_1][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->bottom_field->mv[LIST_0][jj][i][1]), invmv_precision));
+                 }
+               }
+               else
+               {
+                 p->mv[LIST_0][j][i][0] = 0;
+                 p->mv[LIST_0][j][i][1] = 0;
+                 p->mv[LIST_1][j][i][0] = 0;
+                 p->mv[LIST_1][j][i][1] = 0;
+               }
+             }
+           }
+         }
+       }
+     }
+ 
+     if (active_sps->frame_mbs_only_flag)
+     {
+       for (j = 0; j < fs->size_y >> 2; j++)
+       {
+         for (i = 0; i < fs->size_x >> 2; i++)
+         {
+           tempmv_scale[LIST_0] = 256;
+           tempmv_scale[LIST_1] = 0;
+           if (fs->ref_id [LIST_0][j][i] < 0 && listXsize[LIST_0] > 1)
+           {
+             fsx = fs1;
+             loffset = 1;
+           }
+           else
+           {
+             fsx = fs;
+             loffset = 0;
+           }
+           if (fsx->ref_id [LIST_0][j][i] != -1)
+           {
+             for (iref = 0; iref < imin(img->num_ref_idx_l0_active,listXsize[LIST_0]); iref++)
+             {
+               if (enc_picture->ref_pic_num[LIST_0][iref]==fsx->ref_id [LIST_0][j][i])
+               {
+                 tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0][iref];
+                 tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1][iref];
+                 break;
+               }
+             }
+ 
+             p->mv[LIST_0][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][0]), invmv_precision));
+             p->mv[LIST_0][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][1]), invmv_precision));
+             p->mv[LIST_1][j][i][0] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][0]), invmv_precision));
+             p->mv[LIST_1][j][i][1] = iClip3 (-32768, 32767, rshift_rnd_sf((tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][1]), invmv_precision));
+           }
+           else
+           {
+             p->mv[LIST_0][j][i][0] = 0;
+             p->mv[LIST_0][j][i][1] = 0;
+             p->mv[LIST_1][j][i][0] = 0;
+             p->mv[LIST_1][j][i][1] = 0;
+           }
+         }
+       }
+     }
+ 
+     if (!active_sps->frame_mbs_only_flag)
+     {
+       for (j = 0; j < fs->size_y >> 2; j++)
+       {
+         for (i = 0; i < fs->size_x >> 2; i++)
+         {
+           if ((!img->MbaffFrameFlag && !img->structure && fs->field_frame[j][i]) || (img->MbaffFrameFlag && fs->field_frame[j][i]))
+           {
+             p->mv[LIST_0][j][i][1] *= 2;
+             p->mv[LIST_1][j][i][1] *= 2;
+           }
+           else if (img->structure && !fs->field_frame[j][i])
+           {
+             p->mv[LIST_0][j][i][1] = rshift_rnd_sf((p->mv[LIST_0][j][i][1]), 1);
+             p->mv[LIST_1][j][i][1] = rshift_rnd_sf((p->mv[LIST_1][j][i][1]), 1);
+           }
+         }
+       }
+     }
+   }
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    Spatial Predictors
+ *    AMT/HYC
+ ***********************************************************************
+ */
+ static short EPZSSpatialPredictors (PixelPos block_a,
+                                     PixelPos block_b,
+                                     PixelPos block_c,
+                                     PixelPos block_d,
+                                     int list,
+                                     int list_offset,
+                                     short ref,
+                                     char **refPic,
+                                     short ***tmp_mv,
+                                     EPZSStructure * predictor)
+ {
+   int refA, refB, refC, refD;
+   int *mot_scale = mv_scale[list + list_offset][ref];
+   short sp_shift_mv = 8 + mv_rescale;
+ 
+   // zero predictor
+   predictor->point[0].mv[0] = 0;
+   predictor->point[0].mv[1] = 0;
+ 
+   // Non MB-AFF mode
+   if (!img->MbaffFrameFlag)
+   {
+     refA = block_a.available ? (int) refPic[block_a.pos_y][block_a.pos_x] : -1;
+     refB = block_b.available ? (int) refPic[block_b.pos_y][block_b.pos_x] : -1;
+     refC = block_c.available ? (int) refPic[block_c.pos_y][block_c.pos_x] : -1;
+     refD = block_d.available ? (int) refPic[block_d.pos_y][block_d.pos_x] : -1;
+ 
+     // Left Predictor
+     if (block_a.available)
+     {
+       predictor->point[1].mv[0] = rshift_rnd_sf((mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][0]), sp_shift_mv);
+       predictor->point[1].mv[1] = rshift_rnd_sf((mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][1]), sp_shift_mv);
+     }
+     else
+     {
+       predictor->point[1].mv[0] = (12 >> mv_rescale);
+       predictor->point[1].mv[1] = 0;
+     }
+     // Up predictor
+     if (block_b.available)
+     {
+       predictor->point[2].mv[0] = rshift_rnd_sf((mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][0]), sp_shift_mv);
+       predictor->point[2].mv[1] = rshift_rnd_sf((mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][1]), sp_shift_mv);
+     }
+     else
+     {
+       predictor->point[2].mv[0] = 0;
+       predictor->point[2].mv[1] = (12 >> mv_rescale);
+     }
+ 
+     // Up-Right predictor
+     if (block_c.available)
+     {
+       predictor->point[3].mv[0] = rshift_rnd_sf((mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][0]), sp_shift_mv);
+       predictor->point[3].mv[1] = rshift_rnd_sf((mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][1]), sp_shift_mv);
+     }
+     else
+     {
+       predictor->point[3].mv[0] = -(12 >> mv_rescale);
+       predictor->point[3].mv[1] = 0;
+     }
+ 
+     //Up-Left predictor
+     if (block_d.available)
+     {
+       predictor->point[4].mv[0] = rshift_rnd_sf((mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][0]), sp_shift_mv);
+       predictor->point[4].mv[1] = rshift_rnd_sf((mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][1]), sp_shift_mv);
+     }
+     else
+     {
+       predictor->point[4].mv[0] = 0;
+       predictor->point[4].mv[1] = -(12 >> mv_rescale);
+     }
+   }
+   else  // MB-AFF mode
+   {
+     // Field Macroblock
+     if (list_offset)
+     {
+       refA = block_a.available
+         ? img->mb_data[block_a.mb_addr].mb_field
+         ? (int) refPic[block_a.pos_y][block_a.pos_x]
+         : (int) refPic[block_a.pos_y][block_a.pos_x] * 2 : -1;
+       refB =block_b.available
+         ? img->mb_data[block_b.mb_addr].mb_field
+         ? (int) refPic[block_b.pos_y][block_b.pos_x]
+         : (int) refPic[block_b.pos_y][block_b.pos_x] * 2 : -1;
+       refC = block_c.available
+         ? img->mb_data[block_c.mb_addr].mb_field
+         ? (int) refPic[block_c.pos_y][block_c.pos_x]
+         : (int) refPic[block_c.pos_y][block_c.pos_x] * 2 : -1;
+       refD = block_d.available
+         ? img->mb_data[block_d.mb_addr].mb_field
+         ? (int) refPic[block_d.pos_y][block_d.pos_x]
+         : (int) refPic[block_d.pos_y][block_d.pos_x] * 2 : -1;
+ 
+       // Left Predictor
+       predictor->point[1].mv[0] = (block_a.available)
+         ? rshift_rnd_sf((mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][0]), sp_shift_mv) :  (12 >> mv_rescale);
+       predictor->point[1].mv[1] = (block_a.available)
+         ? img->mb_data[block_a.mb_addr].mb_field
+         ? rshift_rnd_sf((mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][1]), sp_shift_mv)
+         : rshift_rnd_sf((mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][1]), sp_shift_mv + 1) :  0;
+ 
+       // Up predictor
+       predictor->point[2].mv[0] = (block_b.available)
+         ? rshift_rnd_sf((mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][0]), sp_shift_mv) : 0;
+       predictor->point[2].mv[1] = (block_b.available)
+         ? img->mb_data[block_b.mb_addr].mb_field
+         ? rshift_rnd_sf((mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][1]), sp_shift_mv)
+         : rshift_rnd_sf((mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][1]), sp_shift_mv + 1) : (12 >> mv_rescale);
+ 
+       // Up-Right predictor
+       predictor->point[3].mv[0] = (block_c.available)
+         ? rshift_rnd_sf((mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][0]), sp_shift_mv) : -(12 >> mv_rescale);
+       predictor->point[3].mv[1] = (block_c.available)
+         ? img->mb_data[block_c.mb_addr].mb_field
+         ? rshift_rnd_sf((mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][1]), sp_shift_mv)
+         : rshift_rnd_sf((mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][1]), sp_shift_mv + 1) : 0;
+ 
+       //Up-Left predictor
+       predictor->point[4].mv[0] = (block_d.available)
+         ? rshift_rnd_sf((mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][0]), sp_shift_mv) : 0;
+       predictor->point[4].mv[1] = (block_d.available)
+         ? img->mb_data[block_d.mb_addr].mb_field
+         ? rshift_rnd_sf((mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][1]), sp_shift_mv)
+         : rshift_rnd_sf((mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][1]), sp_shift_mv + 1) : -(12 >> mv_rescale);
+     }
+     else // Frame macroblock
+     {
+       refA = block_a.available
+         ? img->mb_data[block_a.mb_addr].mb_field
+         ? (int) refPic[block_a.pos_y][block_a.pos_x] >> 1
+         : (int) refPic[block_a.pos_y][block_a.pos_x] : -1;
+       refB = block_b.available
+         ? img->mb_data[block_b.mb_addr].mb_field
+         ? (int) refPic[block_b.pos_y][block_b.pos_x] >> 1
+         : (int) refPic[block_b.pos_y][block_b.pos_x] : -1;
+       refC = block_c.available
+         ? img->mb_data[block_c.mb_addr].mb_field
+         ? (int) refPic[block_c.pos_y][block_c.pos_x] >> 1
+         : (int) refPic[block_c.pos_y][block_c.pos_x] : -1;
+       refD = block_d.available
+         ? img->mb_data[block_d.mb_addr].mb_field
+         ? (int) refPic[block_d.pos_y][block_d.pos_x] >> 1
+         : (int) refPic[block_d.pos_y][block_d.pos_x] : -1;
+ 
+       // Left Predictor
+       predictor->point[1].mv[0] = (block_a.available)
+         ? rshift_rnd_sf((mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][0]), sp_shift_mv) : (12 >> mv_rescale);
+       predictor->point[1].mv[1] = (block_a.available)
+         ? img->mb_data[block_a.mb_addr].mb_field
+         ? rshift_rnd_sf((mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][1]), sp_shift_mv - 1)
+         : rshift_rnd_sf((mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][1]), sp_shift_mv) : 0;
+ 
+       // Up predictor
+       predictor->point[2].mv[0] = (block_b.available)
+         ? rshift_rnd_sf((mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][0]), sp_shift_mv) : 0;
+       predictor->point[2].mv[1] = (block_b.available)
+         ? img->mb_data[block_b.mb_addr].mb_field
+         ? rshift_rnd_sf((mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][1]), sp_shift_mv - 1)
+         : rshift_rnd_sf((mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][1]), sp_shift_mv) : (12 >> mv_rescale);
+ 
+       // Up-Right predictor
+       predictor->point[3].mv[0] = (block_c.available)
+         ? rshift_rnd_sf((mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][0]), sp_shift_mv) : -(12 >> mv_rescale);
+       predictor->point[3].mv[1] = (block_c.available)
+         ? img->mb_data[block_c.mb_addr].mb_field
+         ? rshift_rnd_sf((mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][1]), sp_shift_mv - 1)
+         : rshift_rnd_sf((mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][1]), sp_shift_mv) : 0;
+ 
+       //Up-Left predictor
+       predictor->point[4].mv[0] = (block_d.available)
+         ? rshift_rnd_sf((mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][0]), sp_shift_mv) : 0;
+       predictor->point[4].mv[1] = (block_d.available)
+         ? img->mb_data[block_d.mb_addr].mb_field
+         ? rshift_rnd_sf((mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][1]), sp_shift_mv - 1)
+         : rshift_rnd_sf((mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][1]), sp_shift_mv) : -(12 >> mv_rescale);
+     }
+   }
+ 
+   return ((refA == -1) + (refB == -1) + (refC == -1 && refD == -1));
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    Spatial Predictors
+ *    AMT/HYC
+ ***********************************************************************
+ */
+ 
+ static void EPZSSpatialMemPredictors (int list,
+                                       short ref,
+                                       int blocktype,
+                                       int pic_x,
+                                       int bs_x,
+                                       int bs_y,
+                                       int by,
+                                       int *prednum,
+                                       int img_width,
+                                       EPZSStructure * predictor)
+ {
+ #if EPZSREF
+   short ***mv = EPZSMotion[list][ref][blocktype];
+   int *cur_mv = predictor->point[*prednum].mv;
+ 
+   // Left Predictor
+   cur_mv[0] = (pic_x > 0) ? mv[by][pic_x - bs_x][0] :  0;
+   cur_mv[1] = (pic_x > 0) ? mv[by][pic_x - bs_x][1] :  0;
+   *prednum += (cur_mv[0] | cur_mv[1])!=0;
+ 
+   // Up predictor
+   cur_mv = predictor->point[*prednum].mv;
+   cur_mv[0] = (by > 0) ? mv[by - bs_y][pic_x][0] : mv[4 - bs_y][pic_x][0];
+   cur_mv[1] = (by > 0) ? mv[by - bs_y][pic_x][1] : mv[4 - bs_y][pic_x][1];
+   *prednum += (cur_mv[0] | cur_mv[1])!=0;
+ 
+   // Up-Right predictor
+   cur_mv = predictor->point[*prednum].mv;
+   cur_mv[0] = (pic_x + bs_x < img_width) ? (by > 0)
+     ? mv[by - bs_y][pic_x + bs_x][0] : mv[4 - bs_y][pic_x + bs_x][0] : 0;
+   cur_mv[1] = (pic_x + bs_x < img_width) ? (by > 0)
+     ? mv[by - bs_y][pic_x + bs_x][1] : mv[4 - bs_y][pic_x + bs_x][1] : 0;
+   *prednum += (cur_mv[0] | cur_mv[1])!=0;
+ 
+ #else
+   int mot_scale = mv_scale[list][ref][0];
+   short **mv = EPZSMotion[list][blocktype];
+ 
+   // Left Predictor
+   predictor->point[*prednum].mv[0] = (pic_x > 0)
+     ? rshift_rnd_sf((mot_scale * mv[by][pic_x - bs_x][0]), 8)
+     : 0;
+   predictor->point[*prednum].mv[1] = (pic_x > 0)
+     ? rshift_rnd_sf((mot_scale * mv[by][pic_x - bs_x][1]), 8)
+     : 0;
+   *prednum += ((predictor->point[*prednum].mv[0] != 0) || (predictor->point[*prednum].mv[1] != 0));
+ 
+   // Up predictor
+   predictor->point[*prednum].mv[0] = (by > 0)
+     ? rshift_rnd_sf((mot_scale * mv[by - bs_y][pic_x][0]), 8)
+     : rshift_rnd_sf((mot_scale * mv[4  - bs_y][pic_x][0]), 8);
+   predictor->point[*prednum].mv[1] = (by > 0)
+     ? rshift_rnd_sf((mot_scale * mv[by - bs_y][pic_x][1]), 8)
+     : rshift_rnd_sf((mot_scale * mv[4  - bs_y][pic_x][1]), 8);
+   *prednum += ((predictor->point[*prednum].mv[0] != 0) || (predictor->point[*prednum].mv[1] != 0));
+ 
+   // Up-Right predictor
+   predictor->point[*prednum].mv[0] = (pic_x + bs_x < img_width)
+     ? (by > 0)
+     ? rshift_rnd_sf((mot_scale * mv[by - bs_y][pic_x + bs_x][0]), 8)
+     : rshift_rnd_sf((mot_scale * mv[4  - bs_y][pic_x + bs_x][0]), 8)
+     : 0;
+   predictor->point[*prednum].mv[1] = (pic_x + bs_x < img_width)
+     ? (by > 0)
+     ? rshift_rnd_sf((mot_scale * mv[by - bs_y][pic_x + bs_x][1]), 8)
+     : rshift_rnd_sf((mot_scale * mv[4  - bs_y][pic_x + bs_x][1]), 8)
+     : 0;
+   *prednum += ((predictor->point[*prednum].mv[0] != 0) || (predictor->point[*prednum].mv[1] != 0));
+ #endif
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    Temporal Predictors
+ *    AMT/HYC
+ ***********************************************************************
+ */
+ static void
+ EPZSTemporalPredictors (int list,         // <--  current list
+                         int list_offset,  // <--  list offset for MBAFF
+                         short ref,        // <--  current reference frame
+                         int o_block_x,    // <--  absolute x-coordinate of regarded AxB block
+                         int o_block_y,    // <--  absolute y-coordinate of regarded AxB block
+                         EPZSStructure * predictor,
+                         int *prednum,
+                         int block_available_left,
+                         int block_available_up,
+                         int block_available_right,
+                         int block_available_below,
+                         int blockshape_x,
+                         int blockshape_y,
+                         int stopCriterion,
+                         int min_mcost)
+ {
+   int mvScale = mv_scale[list + list_offset][ref][0];
+   short ***col_mv = (list_offset == 0) ? EPZSCo_located->mv[list]
+     : (list_offset == 2) ? EPZSCo_located->top_mv[list] : EPZSCo_located->bottom_mv[list];
+   short temp_shift_mv = 8 + mv_rescale; // 16 - invmv_precision + mv_rescale
+   int *cur_mv = predictor->point[*prednum].mv;
+ 
+   cur_mv[0] = rshift_rnd_sf((mvScale * col_mv[o_block_y][o_block_x][0]), temp_shift_mv);
+   cur_mv[1] = rshift_rnd_sf((mvScale * col_mv[o_block_y][o_block_x][1]), temp_shift_mv);
+   *prednum += (cur_mv[0] | cur_mv[1])!=0;
+ 
+   if (min_mcost > stopCriterion && ref < 2)
+   {
+     if (block_available_left)
+     {
+       cur_mv = predictor->point[*prednum].mv;
+       cur_mv[0] = rshift_rnd_sf((mvScale * col_mv[o_block_y][o_block_x - 1][0]), temp_shift_mv);
+       cur_mv[1] = rshift_rnd_sf((mvScale * col_mv[o_block_y][o_block_x - 1][1]), temp_shift_mv);
+       *prednum += (cur_mv[0] | cur_mv[1])!=0;
+ 
+       //Up_Left
+       if (block_available_up)
+       {
+         cur_mv = predictor->point[*prednum].mv;
+         cur_mv[0] = rshift_rnd_sf((mvScale * col_mv[o_block_y - 1][o_block_x - 1][0]), temp_shift_mv);
+         cur_mv[1] = rshift_rnd_sf((mvScale * col_mv[o_block_y - 1][o_block_x - 1][1]), temp_shift_mv);
+         *prednum += (cur_mv[0] | cur_mv[1])!=0;
+       }
+       //Down_Left
+       if (block_available_below)
+       {
+         cur_mv = predictor->point[*prednum].mv;
+         cur_mv[0] = rshift_rnd_sf((mvScale * col_mv[o_block_y + blockshape_y][o_block_x - 1][0]), temp_shift_mv);
+         cur_mv[1] = rshift_rnd_sf((mvScale * col_mv[o_block_y + blockshape_y][o_block_x - 1][1]), temp_shift_mv);
+         *prednum += (cur_mv[0] | cur_mv[1])!=0;
+       }
+     }
+     // Up
+     if (block_available_up)
+     {
+       cur_mv = predictor->point[*prednum].mv;
+       cur_mv[0] = rshift_rnd_sf((mvScale * col_mv[o_block_y - 1][o_block_x][0]), temp_shift_mv);
+       cur_mv[1] = rshift_rnd_sf((mvScale * col_mv[o_block_y - 1][o_block_x][1]), temp_shift_mv);
+       *prednum += (cur_mv[0] | cur_mv[1])!=0;
+     }
+ 
+     // Up - Right
+     if (block_available_right)
+     {
+       cur_mv = predictor->point[*prednum].mv;
+       cur_mv[0] = rshift_rnd_sf((mvScale * col_mv[o_block_y][o_block_x + blockshape_x][0]), temp_shift_mv);
+       cur_mv[1] = rshift_rnd_sf((mvScale * col_mv[o_block_y][o_block_x + blockshape_x][1]), temp_shift_mv);
+       *prednum += (cur_mv[0] | cur_mv[1])!=0;
+ 
+       if (block_available_up)
+       {
+         cur_mv = predictor->point[*prednum].mv;
+         cur_mv[0] = rshift_rnd_sf((mvScale * col_mv[o_block_y - 1][o_block_x + blockshape_x][0]), temp_shift_mv);
+         cur_mv[1] = rshift_rnd_sf((mvScale * col_mv[o_block_y - 1][o_block_x + blockshape_x][1]), temp_shift_mv);
+         *prednum += (cur_mv[0] | cur_mv[1])!=0;
+       }
+       if (block_available_below)
+       {
+         cur_mv = predictor->point[*prednum].mv;
+         cur_mv[0] = rshift_rnd_sf((mvScale * col_mv[o_block_y + blockshape_y][o_block_x + blockshape_x][0]), temp_shift_mv);
+         cur_mv[1] = rshift_rnd_sf((mvScale * col_mv[o_block_y + blockshape_y][o_block_x + blockshape_x][1]), temp_shift_mv);
+         *prednum += (cur_mv[0] | cur_mv[1])!=0;
+       }
+     }
+ 
+     if (block_available_below)
+     {
+       cur_mv = predictor->point[*prednum].mv;
+       cur_mv[0] = rshift_rnd_sf((mvScale * col_mv[o_block_y + blockshape_y][o_block_x][0]), temp_shift_mv);
+       cur_mv[1] = rshift_rnd_sf((mvScale * col_mv[o_block_y + blockshape_y][o_block_x][1]), temp_shift_mv);
+       *prednum += (cur_mv[0] | cur_mv[1])!=0;
+     }
+   }
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    EPZS Block Type Predictors
+ ************************************************************************
+ */
+ static void EPZSBlockTypePredictors (int block_x, int block_y, int blocktype, int ref, int list,
+                                      EPZSStructure * predictor, int *prednum)
+ {
+   short ***all_mv = img->all_mv[block_y][block_x][list];
+   short block_shift_mv = 8 + mv_rescale;
+   int *cur_mv = predictor->point[*prednum].mv;
+   
+   cur_mv[0] = rshift_rnd((all_mv[ref][(blk_parent[blocktype])][0]), mv_rescale);
+   cur_mv[1] = rshift_rnd((all_mv[ref][(blk_parent[blocktype])][1]), mv_rescale);
+   *prednum += ((cur_mv[0] | cur_mv[1])!=0);
+ 
+   if ((ref > 0) && (blocktype < 5 || img->structure != FRAME))
+   {
+     cur_mv = predictor->point[*prednum].mv;
+     cur_mv[0] = rshift_rnd_sf((mv_scale[list][ref][ref-1] * all_mv[ref-1][blocktype][0]), block_shift_mv );
+     cur_mv[1] = rshift_rnd_sf((mv_scale[list][ref][ref-1] * all_mv[ref-1][blocktype][1]), block_shift_mv );
+     *prednum += (cur_mv[0] | cur_mv[1])!=0;
+ 
+     cur_mv = predictor->point[*prednum].mv;
+     cur_mv[0] = rshift_rnd_sf((mv_scale[list][ref][0] * all_mv[0][blocktype][0]), block_shift_mv );
+     cur_mv[1] = rshift_rnd_sf((mv_scale[list][ref][0] * all_mv[0][blocktype][1]), block_shift_mv );
+     *prednum += (cur_mv[0] | cur_mv[1])!=0;
+   }
+ 
+   if (blocktype != 1)
+   {
+     cur_mv = predictor->point[*prednum].mv;
+     cur_mv[0] = rshift_rnd((all_mv[ref][1][0]), mv_rescale);
+     cur_mv[1] = rshift_rnd((all_mv[ref][1][1]), mv_rescale);
+     *prednum += (cur_mv[0] | cur_mv[1])!=0;
+   }
+ 
+   if (blocktype != 4)
+   {
+     cur_mv = predictor->point[*prednum].mv;
+     cur_mv[0] = rshift_rnd((all_mv[ref][4][0]), mv_rescale);
+     cur_mv[1] = rshift_rnd((all_mv[ref][4][1]), mv_rescale);
+     *prednum += (cur_mv[0] | cur_mv[1])!=0;
+   }
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    EPZS Window Based Predictors
+ ************************************************************************
+ */
+ static void EPZSWindowPredictors (short mv[2], EPZSStructure *predictor, int *prednum, int extended)
+ {
+   int pos;
+   EPZSStructure *windowPred = (extended) ? window_predictor_extended : window_predictor;
+ 
+   for (pos = 0; pos < windowPred->searchPoints; pos++)
+   {
+     predictor->point[(*prednum)  ].mv[0] = mv[0] + windowPred->point[pos].mv[0];
+     predictor->point[(*prednum)++].mv[1] = mv[1] + windowPred->point[pos].mv[1];
+   }
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    FAST Motion Estimation using EPZS
+ *    AMT/HYC
+ ***********************************************************************
+ */
+ int                                           //  ==> minimum motion cost after search
+ EPZSPelBlockMotionSearch (imgpel * cur_pic,  // <--  original pixel values for the AxB block
+                           short ref,          // <--  reference picture
+                           int list,           // <--  reference list
+                           int list_offset,    // <--  offset for Mbaff
+                           char ***refPic,     // <--  reference array
+                           short ****tmp_mv,   // <--  mv array
+                           int pic_pix_x,      // <--  absolute x-coordinate of regarded AxB block
+                           int pic_pix_y,      // <--  absolute y-coordinate of regarded AxB block
+                           int blocktype,      // <--  block type (1-16x16 ... 7-4x4)
+                           short pred_mv[2],    // <--  motion vector predictor (x) in sub-pel units
+                           short mv[2],        // <--> in: search center (x) / out: motion vector (x) - in pel units
+                           int search_range,   // <--  1-d search range in pel units
+                           int min_mcost,      // <--  minimum motion cost (cost for center or huge value)
+                           int lambda_factor)      // <--  lagrangian parameter for determining motion cost
+ {
+   StorablePicture *ref_picture = listX[list+list_offset][ref];
+   short blocksize_y = input->blc_size[blocktype][1];  // vertical block size
+   short blocksize_x = input->blc_size[blocktype][0];  // horizontal block size
+   short blockshape_x = (blocksize_x >> 2);  // horizontal block size in 4-pel units
+   short blockshape_y = (blocksize_y >> 2);  // vertical block size in 4-pel units
+ 
+   short mb_x = pic_pix_x - img->opix_x;
+   short mb_y = pic_pix_y - img->opix_y;
+   short pic_pix_x2 = pic_pix_x >> 2;
+   short pic_pix_y2 = pic_pix_y >> 2;
+   short block_x = (mb_x >> 2);
+   short block_y = (mb_y >> 2);
+ 
+   int   pred_x = (pic_pix_x << 2) + pred_mv[0];  // predicted position x (in sub-pel units)
+   int   pred_y = (pic_pix_y << 2) + pred_mv[1];  // predicted position y (in sub-pel units)
+   int   center_x = (pic_pix_x << (2 - mv_rescale))+ mv[0]; // center position x (in pel units)
+   int   center_y = (pic_pix_y << (2 - mv_rescale))+ mv[1]; // center position y (in pel units)
+   int   cand_x = center_x << mv_rescale;
+   int   cand_y = center_y << mv_rescale;
+   int   tempmv[2]  = {mv[0], mv[1]};
+   int   tempmv2[2] = {0, 0};
+   int   stopCriterion = medthres[blocktype];
+   int   mapCenter_x = search_range - mv[0];
+   int   mapCenter_y = search_range - mv[1];
+   int   second_mcost = INT_MAX;
+   short apply_weights = (active_pps->weighted_pred_flag > 0 || (active_pps->weighted_bipred_idc && (img->type == B_SLICE))) && input->UseWeightedReferenceME;
+   int   *prevSad = EPZSDistortion[list + list_offset][blocktype - 1];
+   short *motion=NULL;
+   int   dist_method = F_PEL + 3 * apply_weights;
+ 
+   short invalid_refs = 0;
+   byte  checkMedian = FALSE;
+   EPZSStructure *searchPatternF = searchPattern;
+   EPZSBlkCount ++;
+ 
+   ref_pic_sub.luma = ref_picture->imgY_sub;
+   img_width  = ref_picture->size_x;
+   img_height = ref_picture->size_y;
+   width_pad  = ref_picture->size_x_pad;
+   height_pad = ref_picture->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight_luma = wp_weight[list + list_offset][ref][0];
+     offset_luma = wp_offset[list + list_offset][ref][0];
+   }
+ 
+   if ( ChromaMEEnable)
+   {
+     ref_pic_sub.crcb[0] = ref_picture->imgUV_sub[0];
+     ref_pic_sub.crcb[1] = ref_picture->imgUV_sub[1];
+     width_pad_cr  = ref_picture->size_x_cr_pad;
+     height_pad_cr = ref_picture->size_y_cr_pad;
+ 
+     if (apply_weights)
+     {
+       weight_cr[0] = wp_weight[list + list_offset][ref][1];
+       weight_cr[1] = wp_weight[list + list_offset][ref][2];
+       offset_cr[0] = wp_offset[list + list_offset][ref][1];
+       offset_cr[1] = wp_offset[list + list_offset][ref][2];
+     }
+   }
+   pic_pix_x = (pic_pix_x << (2 - mv_rescale));
+   pic_pix_y = (pic_pix_y << (2 - mv_rescale));
+ 
+   if (input->EPZSSpatialMem)
+   {
+ #if EPZSREF
+     motion = EPZSMotion[list + list_offset][ref][blocktype - 1][block_y][pic_pix_x2];
+ #else
+     motion = EPZSMotion[list + list_offset][blocktype - 1][block_y][pic_pix_x2];
+ #endif
+   }
+ 
+   //===== set function for getting reference picture lines =====
+   ref_access_method = CHECK_RANGE ? FAST_ACCESS : UMV_ACCESS;
+ 
+   // Clear EPZSMap
+   // memset(EPZSMap[0],FALSE,searcharray*searcharray);
+   // Check median candidate;
+   EPZSMap[search_range][search_range] = EPZSBlkCount;
+ 
+   //--- initialize motion cost (cost for motion vector) and check ---
+   min_mcost = MV_COST_SMP (lambda_factor, cand_x, cand_y, pred_x, pred_y);
+ 
+   //--- add residual cost to motion cost ---
+   min_mcost += computeUniPred[dist_method](cur_pic, blocksize_y, blocksize_x,
+     INT_MAX, cand_x + IMG_PAD_SIZE_TIMES4, cand_y + IMG_PAD_SIZE_TIMES4);
+ 
+   // Additional threshold for ref>0
+   if ((ref>0 && img->structure == FRAME)
+     && (prevSad[pic_pix_x2] < medthres[blocktype])
+     && (prevSad[pic_pix_x2] < min_mcost))
+   {
+ #if EPZSREF
+     if (input->EPZSSpatialMem)
+ #else
+     if (input->EPZSSpatialMem && ref == 0)
+ #endif
+     {
+       motion[0]  = tempmv[0];
+       motion[1]  = tempmv[1];
+     }
+ 
+     return min_mcost;
+   }
+ 
+   //  if ((center_x > search_range) && (center_x < img_width  - search_range - blocksize_x) &&
+   //(center_y > search_range) && (center_y < img_height - search_range - blocksize_y)   )
+   if ( (center_x > search_range) && (center_x < ((img_width  - blocksize_x) << (input->EPZSSubPelGrid * 2)) - search_range)
+     && (center_y > search_range) && (center_y < ((img_height - blocksize_y) << (input->EPZSSubPelGrid * 2)) - search_range))
+   {
+     ref_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     ref_access_method = UMV_ACCESS;
+   }
+ 
+   //! If medthres satisfied, then terminate, otherwise generate Predictors
+   //! Condition could be strengthened by consideration distortion of adjacent partitions.
+   if (min_mcost > stopCriterion)
+   {
+     int mb_available_right   = (img->mb_x < (img_width  >> 4) - 1);
+     int mb_available_below   = (img->mb_y < (img_height >> 4) - 1);
+ 
+     int sadA, sadB, sadC;
+     int block_available_right;
+     int block_available_below;
+     int prednum = 5;
+     int patternStop = 0, pointNumber = 0, checkPts, nextLast = 0;
+     int totalCheckPts = 0, motionDirection = 0;
+     int conditionEPZS;
+     int tmv[2];
+     int pos, mcost;
+     PixelPos block_a, block_b, block_c, block_d;
+ 
+     getLuma4x4Neighbour (img->current_mb_nr, mb_x - 1, mb_y, &block_a);
+     getLuma4x4Neighbour (img->current_mb_nr, mb_x, mb_y - 1, &block_b);
+     getLuma4x4Neighbour (img->current_mb_nr, mb_x + blocksize_x, mb_y -1, &block_c);
+     getLuma4x4Neighbour (img->current_mb_nr, mb_x - 1, mb_y -1, &block_d);
+ 
+     if (mb_y > 0)
+     {
+       if (mb_x < 8)   // first column of 8x8 blocks
+       {
+         if (mb_y == 8)
+         {
+           block_available_right = (blocksize_x != MB_BLOCK_SIZE) || mb_available_right;
+           if (blocksize_x == MB_BLOCK_SIZE)
+             block_c.available = 0;
+         }
+         else
+         {
+           block_available_right = (mb_x + blocksize_x != 8) || mb_available_right;
+           if (mb_x + blocksize_x == 8)
+             block_c.available = 0;
+         }
+       }
+       else
+       {
+         block_available_right = (mb_x + blocksize_x != MB_BLOCK_SIZE) || mb_available_right;
+         if (mb_x + blocksize_x == MB_BLOCK_SIZE)
+           block_c.available = 0;
+       }
+     }
+     else
+     {
+       block_available_right = (mb_x + blocksize_x != MB_BLOCK_SIZE) || mb_available_right;
+     }
+     block_available_below = (mb_y + blocksize_y != MB_BLOCK_SIZE) || (mb_available_below);
+ 
+     sadA = block_a.available ? prevSad[pic_pix_x2 - blockshape_x] : INT_MAX;
+     sadB = block_b.available ? prevSad[pic_pix_x2] : INT_MAX;
+     sadC = block_c.available ? prevSad[pic_pix_x2 + blockshape_x] : INT_MAX;
+ 
+     stopCriterion = imin(sadA,imin(sadB,sadC));
+     stopCriterion = imax(stopCriterion,minthres[blocktype]);
+     stopCriterion = imin(stopCriterion,maxthres[blocktype]);
+ 
+     stopCriterion = (9 * imax (medthres[blocktype], stopCriterion) + 2 * medthres[blocktype]) >> 3;
+ 
+     //! Add Spatial Predictors in predictor list.
+     //! Scheme adds zero, left, top-left, top, top-right. Note that top-left adds very little
+     //! in terms of performance and could be removed with little penalty if any.
+     invalid_refs = EPZSSpatialPredictors (block_a, block_b, block_c, block_d,
+       list, list_offset, ref, refPic[list], tmp_mv[list], predictor);
+     if (input->EPZSSpatialMem)
+       EPZSSpatialMemPredictors (list + list_offset, ref, blocktype - 1, pic_pix_x2,
+       blockshape_x, blockshape_y, block_y, &prednum, img_width>>2, predictor);
+ 
+     // Temporal predictors
+     if (input->EPZSTemporal)
+       EPZSTemporalPredictors (list, list_offset, ref, pic_pix_x2, pic_pix_y2, predictor, &prednum,
+       block_a.available, block_b.available, block_available_right,
+       block_available_below, blockshape_x, blockshape_y, stopCriterion, min_mcost);
+ 
+     //! Window Size Based Predictors
+     //! Basically replaces a Hierarchical ME concept and helps escaping local minima, or
+     //! determining large motion variations.
+     //! Following predictors can be adjusted further (i.e. removed, conditioned etc)
+     //! based on distortion, correlation of adjacent MVs, complexity etc. These predictors
+     //! and their conditioning could also be moved after all other predictors have been
+     //! tested. Adaptation could also be based on type of material and coding mode (i.e.
+     //! field/frame coding,MBAFF etc considering the higher dependency with opposite parity field
+     //conditionEPZS = ((min_mcost > stopCriterion)
+     // && (input->EPZSFixed > 1 || (input->EPZSFixed && img->type == P_SLICE)));
+     //conditionEPZS = ((ref == 0) && (blocktype < 5) && (min_mcost > stopCriterion)
+     //&& (input->EPZSFixed > 1 || (input->EPZSFixed && img->type == P_SLICE)));
+     conditionEPZS = ((min_mcost > stopCriterion) && ((ref < 2 && blocktype < 5)
+       || ((img->structure!=FRAME || list_offset) && ref < 3))
+       && (input->EPZSFixed > 1 || (input->EPZSFixed && img->type == P_SLICE)));
+ 
+     if (conditionEPZS)
+       EPZSWindowPredictors (mv, predictor, &prednum,
+       (blocktype < 5) && (invalid_refs > 2) && (ref < 1 + (img->structure!=FRAME || list_offset)));
+ 
+     //! Blocktype/Reference dependent predictors.
+     //! Since already mvs for other blocktypes/references have been computed, we can reuse
+     //! them in order to easier determine the optimal point. Use of predictors could depend
+     //! on cost,
+     //conditionEPZS = (ref == 0 || (ref > 0 && min_mcost > stopCriterion) || img->structure != FRAME || list_offset);
+     conditionEPZS = (ref == 0 || (ref > 0 && min_mcost > stopCriterion));
+     // above seems to result in memory leak issues which need to be resolved
+ 
+     if (conditionEPZS && img->current_mb_nr != 0)
+       EPZSBlockTypePredictors (block_x, block_y, blocktype, ref, list, predictor, &prednum);
+ 
+     //! Check all predictors
+     for (pos = 0; pos < prednum; pos++)
+     {
+       tmv[0] = predictor->point[pos].mv[0];
+       tmv[1] = predictor->point[pos].mv[1];
+       //if ((iabs (tmv[0] - mv[0]) > search_range || iabs (tmv[1] - mv[1]) > search_range) && (tmv[0] || tmv[1]))
+       if (iabs (tmv[0] - mv[0]) > search_range || iabs (tmv[1] - mv[1]) > search_range)
+         continue;
+ 
+       if ((iabs (tmv[0] - mv[0]) <= search_range) && (iabs (tmv[1] - mv[1]) <= search_range))
+       {
+         if (EPZSMap[mapCenter_y + tmv[1]][mapCenter_x + tmv[0]] == EPZSBlkCount)
+           continue;
+         else
+           EPZSMap[mapCenter_y + tmv[1]][mapCenter_x + tmv[0]] = EPZSBlkCount;
+       }
+ 
+       cand_x = (pic_pix_x + tmv[0])<<mv_rescale;
+       cand_y = (pic_pix_y + tmv[1])<<mv_rescale;
+ 
+       //--- set motion cost (cost for motion vector) and check ---
+       mcost = MV_COST_SMP (lambda_factor, cand_x, cand_y, pred_x, pred_y);
+ 
+       if (mcost >= second_mcost) continue;
+ 
+       ref_access_method = CHECK_RANGE ? FAST_ACCESS : UMV_ACCESS;
+ 
+       mcost += computeUniPred[dist_method](cur_pic, blocksize_y,blocksize_x,
+         second_mcost - mcost, cand_x + IMG_PAD_SIZE_TIMES4,cand_y + IMG_PAD_SIZE_TIMES4);
+ 
+       //--- check if motion cost is less than minimum cost ---
+       if (mcost < min_mcost)
+       {
+         tempmv2[0] = tempmv[0];
+         tempmv2[1] = tempmv[1];
+         tempmv[0] = tmv[0];
+         tempmv[1] = tmv[1];
+         second_mcost = min_mcost;
+         min_mcost = mcost;
+         checkMedian = TRUE;
+       }
+       //else if (mcost < second_mcost && (tempmv[0] != tmv[0] || tempmv[1] != tmv[1]))
+       else if (mcost < second_mcost)
+       {
+         tempmv2[0] = tmv[0];
+         tempmv2[1] = tmv[1];
+         second_mcost = mcost;
+         checkMedian = TRUE;
+       }
+     }
+ 
+     //! Refine using EPZS pattern if needed
+     //! Note that we are using a conservative threshold method. Threshold
+     //! could be tested after checking only a certain number of predictors
+     //! instead of the full set. Code could be easily modified for this task.
+     if (min_mcost > stopCriterion)
+     {
+       //! Adapt pattern based on different conditions.
+       if (input->EPZSPattern != 0)
+       {
+         if ((min_mcost < stopCriterion + ((3 * medthres[blocktype]) >> 1)))
+         {
+           if ((tempmv[0] == 0 && tempmv[1] == 0)
+             || (iabs (tempmv[0] - mv[0]) < (2<<(2-mv_rescale)) && iabs (tempmv[1] - mv[1]) < (2<<(2-mv_rescale))))
+             searchPatternF = sdiamond;
+           else
+             searchPatternF = square;
+         }
+         else if (blocktype > 5 || (ref > 0 && blocktype != 1))
+           searchPatternF = square;
+         else
+           searchPatternF = searchPattern;
+       }
+ 
+       totalCheckPts = searchPatternF->searchPoints;
+ 
+       //! center on best predictor
+       center_x = tempmv[0];
+       center_y = tempmv[1];
+       while(1)
+       {
+         do
+         {
+           checkPts = totalCheckPts;
+           do
+           {
+             tmv[0] = center_x + searchPatternF->point[pointNumber].mv[0];
+             tmv[1] = center_y + searchPatternF->point[pointNumber].mv[1];
+             cand_x = (pic_pix_x + tmv[0])<<mv_rescale;
+             cand_y = (pic_pix_y + tmv[1])<<mv_rescale;
+ 
+             if ((iabs (tmv[0] - mv[0]) <= search_range)
+               && (iabs (tmv[1] - mv[1]) <= search_range))
+             {
+               if (EPZSMap[mapCenter_y + tmv[1]][mapCenter_x + tmv[0]] != EPZSBlkCount)
+                 EPZSMap[mapCenter_y + tmv[1]][mapCenter_x + tmv[0]] = EPZSBlkCount;
+               else
+               {
+                 pointNumber += 1;
+                 if (pointNumber >= searchPatternF->searchPoints)
+                   pointNumber -= searchPatternF->searchPoints;
+                 checkPts -= 1;
+                 continue;
+               }
+               mcost = MV_COST_SMP (lambda_factor, cand_x, cand_y, pred_x, pred_y);
+ 
+               if (mcost < min_mcost)
+               {
+                 ref_access_method = CHECK_RANGE ? FAST_ACCESS : UMV_ACCESS;
+ 
+                 mcost += computeUniPred[dist_method](cur_pic, blocksize_y,blocksize_x,
+                   min_mcost - mcost, cand_x + IMG_PAD_SIZE_TIMES4, cand_y + IMG_PAD_SIZE_TIMES4);
+ 
+                 if (mcost < min_mcost)
+                 {
+                   min_mcost = mcost;
+                   tempmv[0] = tmv[0];
+                   tempmv[1] = tmv[1];
+                   motionDirection = pointNumber;
+                 }
+               }
+             }
+             pointNumber += 1;
+             if (pointNumber >= searchPatternF->searchPoints)
+               pointNumber -= searchPatternF->searchPoints;
+             checkPts -= 1;
+           }
+           while (checkPts > 0);
+ 
+           if (nextLast || ((tempmv[0] == center_x) && (tempmv[1] == center_y)))
+           {
+             patternStop     = searchPatternF->stopSearch;
+             searchPatternF  = searchPatternF->nextpattern;
+             totalCheckPts   = searchPatternF->searchPoints;
+             nextLast        = searchPatternF->nextLast;
+             motionDirection = 0;
+             pointNumber = 0;
+           }
+           else
+           {
+             totalCheckPts = searchPatternF->point[motionDirection].next_points;
+             pointNumber = searchPatternF->point[motionDirection].start_nmbr;
+             center_x = tempmv[0];
+             center_y = tempmv[1];
+           }
+         }
+         while (patternStop != 1);
+ 
+         if ((ref>0) && (img->structure == FRAME)
+           && (( 4 * prevSad[pic_pix_x2] < min_mcost) ||
+           ((3 * prevSad[pic_pix_x2] < min_mcost) && (prevSad[pic_pix_x2] <= stopCriterion))))
+         {
+           mv[0] = tempmv[0];
+           mv[1] = tempmv[1];
+ #if EPZSREF
+           if (input->EPZSSpatialMem)
+ #else
+           if (input->EPZSSpatialMem && ref == 0)
+ #endif
+           {
+             motion[0]  = tempmv[0];
+             motion[1]  = tempmv[1];
+           }
+ 
+           return min_mcost;
+         }
+ 
+         //! Check Second best predictor with EPZS pattern
+         conditionEPZS = (checkMedian == TRUE)
+           && ((img->type == P_SLICE) || (blocktype < 5))
+           && (min_mcost > stopCriterion) && (input->EPZSDual > 0);
+ 
+         if (!conditionEPZS) break;
+ 
+         pointNumber = 0;
+         patternStop = 0;
+         motionDirection = 0;
+         nextLast = 0;
+ 
+         if ((tempmv[0] == 0 && tempmv[1] == 0)
+           || (tempmv[0] == mv[0] && tempmv[1] == mv[1]))
+         {
+           if (iabs (tempmv[0] - mv[0]) < (2<<(2-mv_rescale)) && iabs (tempmv[1] - mv[1]) < (2<<(2-mv_rescale)))
+             searchPatternF = sdiamond;
+           else
+             searchPatternF = square;
+         }
+         else
+           searchPatternF = searchPatternD;
+         totalCheckPts = searchPatternF->searchPoints;
+ 
+         //! Second best. Note that following code is identical as for best predictor.
+         center_x = tempmv2[0];
+         center_y = tempmv2[1];
+         checkMedian = FALSE;
+       }
+     }
+   }
+ 
+   if ((ref==0) || (prevSad[pic_pix_x2] > min_mcost))
+     prevSad[pic_pix_x2] = min_mcost;
+ #if EPZSREF
+   if (input->EPZSSpatialMem)
+ #else
+   if (input->EPZSSpatialMem && ref == 0)
+ #endif
+   {
+     motion[0]  = tempmv[0];
+     motion[1]  = tempmv[1];
+   }
+ 
+   mv[0] = tempmv[0];
+   mv[1] = tempmv[1];
+   return min_mcost;
+ }
+ 
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    FAST Motion Estimation using EPZS
+ *    AMT/HYC
+ ***********************************************************************
+ */
+ int                                               //  ==> minimum motion cost after search
+ EPZSBiPredBlockMotionSearch (imgpel * cur_pic,   // <--  original pixel values for the AxB block
+                              short  ref,          // <--  reference picture
+                              int    list,         // <--  reference list
+                              int    list_offset,  // <--  offset for Mbaff
+                              char  ***refPic,    // <--  reference array
+                              short  ****tmp_mv,   // <--  mv array
+                              int    pic_pix_x,    // <--  absolute x-coordinate of regarded AxB block
+                              int    pic_pix_y,    // <--  absolute y-coordinate of regarded AxB block
+                              int    blocktype,    //<--  block type (1-16x16 ... 7-4x4)
+                              short  *pred_mv1,   // <--  motion vector predictor (x) in sub-pel units
+                              short  *pred_mv2,   // <--  motion vector predictor (x) in sub-pel units
+                              short  mv[2],        // <--> in: search center (x) / out: motion vector (x) - in pel units
+                              short  s_mv[2],      // <--> in: search center (x) / out: motion vector (x) - in pel units
+                              int    search_range, // <--  1-d search range in pel units
+                              int    min_mcost,    // <--  minimum motion cost (cost for center or huge value)
+                              int    lambda_factor)        // <--  lagrangian parameter for determining motion cost
+ {
+   short blocksize_y = input->blc_size[blocktype][1];  // vertical block size
+   short blocksize_x = input->blc_size[blocktype][0];  // horizontal block size
+   short mb_x = pic_pix_x - img->opix_x;
+   short mb_y = pic_pix_y - img->opix_y;
+   int   pred_x1 = (pic_pix_x << 2) + pred_mv1[0]; // predicted position x (in sub-pel units)
+   int   pred_y1 = (pic_pix_y << 2) + pred_mv1[1]; // predicted position y (in sub-pel units)
+   int   pred_x2 = (pic_pix_x << 2) + pred_mv2[0]; // predicted position x (in sub-pel units)
+   int   pred_y2 = (pic_pix_y << 2) + pred_mv2[1]; // predicted position y (in sub-pel units)
+   int   center2_x = (pic_pix_x << (input->EPZSSubPelGrid * 2))+ mv[0];    // center position x (in pel units)
+   int   center2_y = (pic_pix_y << (input->EPZSSubPelGrid * 2))+ mv[1];    // center position y (in pel units)
+   int   center1_x = (pic_pix_x << (input->EPZSSubPelGrid * 2))+ s_mv[0];  // center position x (in pel units)
+   int   center1_y = (pic_pix_y << (input->EPZSSubPelGrid * 2))+ s_mv[1];  // center position y (in pel units)
+ 
+   int tempmv[2]  = {mv[0], mv[1]};
+   int tempmv2[2] = {0, 0};
+   int stopCriterion = medthres[blocktype];
+   int mapCenter_x = search_range - mv[0];
+   int mapCenter_y = search_range - mv[1];
+   int second_mcost = INT_MAX;
+   StorablePicture *ref_picture1 = listX[list       + list_offset][ref];
+   StorablePicture *ref_picture2 = listX[(list ^ 1) + list_offset][0];
+ 
+   short apply_weights = (active_pps->weighted_bipred_idc != 0);
+   short offset1 = apply_weights ? list == 0
+     ? wp_offset[list_offset         ][ref][0]
+     : wp_offset[list_offset + LIST_1][0  ][0]
+     : 0;
+   short offset2 = apply_weights ? list == 0
+     ? wp_offset[list_offset + LIST_1][ref][0]
+     : wp_offset[list_offset         ][0  ][0]
+     : 0;
+   short invalid_refs = 0;
+   byte checkMedian = FALSE;
+   EPZSStructure *searchPatternF = searchPattern;
+   EPZSBlkCount ++;
+ 
+   pic_pix_x = (pic_pix_x << (2 - mv_rescale));
+   pic_pix_y = (pic_pix_y << (2 - mv_rescale));
+ 
+   ref_pic1_sub.luma = ref_picture1->imgY_sub;
+   ref_pic2_sub.luma = ref_picture2->imgY_sub;
+   img_width  = ref_picture1->size_x;
+   img_height = ref_picture1->size_y;
+   width_pad  = ref_picture1->size_x_pad;
+   height_pad = ref_picture1->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight1 = list == 0 ? wbp_weight[list_offset         ][ref][0][0] : wbp_weight[list_offset + LIST_1][0  ][ref][0];
+     weight2 = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][0] : wbp_weight[list_offset         ][0  ][ref][0];
+     offsetBi=(offset1 + offset2 + 1)>>1;
+     computeBiPred = computeBiPred2[F_PEL];
+   }
+   else
+   {
+     weight1 = 1<<luma_log_weight_denom;
+     weight2 = 1<<luma_log_weight_denom;
+     offsetBi = 0;
+     computeBiPred = computeBiPred1[F_PEL];
+   }
+ 
+   if ( ChromaMEEnable ) {
+     ref_pic1_sub.crcb[0] = ref_picture1->imgUV_sub[0];
+     ref_pic1_sub.crcb[1] = ref_picture1->imgUV_sub[1];
+     ref_pic2_sub.crcb[0] = ref_picture2->imgUV_sub[0];
+     ref_pic2_sub.crcb[1] = ref_picture2->imgUV_sub[1];
+     width_pad_cr  = ref_picture1->size_x_cr_pad;
+     height_pad_cr = ref_picture1->size_y_cr_pad;
+     if (apply_weights)
+     {
+       weight1_cr[0] = list == 0 ? wbp_weight[list_offset         ][ref][0][1] : wbp_weight[list_offset + LIST_1][0  ][ref][1];
+       weight1_cr[1] = list == 0 ? wbp_weight[list_offset         ][ref][0][2] : wbp_weight[list_offset + LIST_1][0  ][ref][2];
+       weight2_cr[0] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][1] : wbp_weight[list_offset         ][0  ][ref][1];
+       weight2_cr[1] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][2] : wbp_weight[list_offset         ][0  ][ref][2];
+       offsetBi_cr[0] = (list == 0)
+         ? (wp_offset[list_offset         ][ref][1] + wp_offset[list_offset + LIST_1][ref][1] + 1) >> 1
+         : (wp_offset[list_offset + LIST_1][0  ][1] + wp_offset[list_offset         ][0  ][1] + 1) >> 1;
+       offsetBi_cr[1] = (list == 0)
+         ? (wp_offset[list_offset         ][ref][2] + wp_offset[list_offset + LIST_1][ref][2] + 1) >> 1
+         : (wp_offset[list_offset + LIST_1][0  ][2] + wp_offset[list_offset         ][0  ][2] + 1) >> 1;
+     }
+     else
+     {
+       weight1_cr[0] = 1<<chroma_log_weight_denom;
+       weight1_cr[1] = 1<<chroma_log_weight_denom;
+       weight2_cr[0] = 1<<chroma_log_weight_denom;
+       weight2_cr[1] = 1<<chroma_log_weight_denom;
+       offsetBi_cr[0] = 0;
+       offsetBi_cr[1] = 0;
+     }
+   }
+ 
+ 
+   //===== set function for getting reference picture lines from reference 1=====
+   if ( (center2_x > search_range) && (center2_x < ((img_width  - blocksize_x) << (input->EPZSSubPelGrid * 2)) - search_range)
+     && (center2_y > search_range) && (center2_y < ((img_height - blocksize_y) << (input->EPZSSubPelGrid * 2)) - search_range))
+   {
+     bipred2_access_method = FAST_ACCESS;
+   }
+   else
+   {
+    bipred2_access_method = UMV_ACCESS;
+   }
+ 
+   //===== set function for getting reference picture lines from reference 2=====
+   if ( (center1_x > search_range) && (center1_x < ((img_width  - blocksize_x) << (input->EPZSSubPelGrid * 2)) - search_range)
+     && (center1_y > search_range) && (center1_y < ((img_height - blocksize_y) << (input->EPZSSubPelGrid * 2)) - search_range))
+   {
+     bipred1_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred1_access_method = UMV_ACCESS;
+   }
+ 
+   // Clear EPZSMap
+   //memset(EPZSMap[0],FALSE,searcharray*searcharray);
+   // Check median candidate;
+   EPZSMap[search_range][search_range] = EPZSBlkCount;
+ 
+   //--- initialize motion cost (cost for motion vector) and check ---
+   min_mcost  = MV_COST_SMP (lambda_factor, (center1_x<<mv_rescale), (center1_y<<mv_rescale), pred_x1, pred_y1);
+   min_mcost += MV_COST_SMP (lambda_factor, (center2_x<<mv_rescale), (center2_y<<mv_rescale), pred_x2, pred_y2);
+ 
+   //--- add residual cost to motion cost ---
+   min_mcost += computeBiPred(cur_pic,
+     blocksize_y, blocksize_x, INT_MAX,
+     (center1_x << mv_rescale) + IMG_PAD_SIZE_TIMES4,
+     (center1_y << mv_rescale) + IMG_PAD_SIZE_TIMES4,
+     (center2_x << mv_rescale) + IMG_PAD_SIZE_TIMES4,
+     (center2_y << mv_rescale) + IMG_PAD_SIZE_TIMES4);
+ 
+   //! If medthres satisfied, then terminate, otherwise generate Predictors
+   if (min_mcost > stopCriterion)
+   {
+     int prednum = 5;
+     int patternStop = 0, pointNumber = 0, checkPts, nextLast = 0;
+     int totalCheckPts = 0, motionDirection = 0;
+     int conditionEPZS;
+     int tmv[2], cand_x, cand_y;
+     int pos, mcost;
+     PixelPos block_a, block_b, block_c, block_d;
+ 
+     getLuma4x4Neighbour (img->current_mb_nr, mb_x - 1, mb_y, &block_a);
+     getLuma4x4Neighbour (img->current_mb_nr, mb_x, mb_y - 1, &block_b);
+     getLuma4x4Neighbour (img->current_mb_nr, mb_x + blocksize_x, mb_y -1, &block_c);
+     getLuma4x4Neighbour (img->current_mb_nr, mb_x - 1, mb_y -1, &block_d);
+ 
+     if (mb_y > 0)
+     {
+       if (mb_x < 8)  // first column of 8x8 blocks
+       {
+         if (mb_y==8)
+         {
+           if (blocksize_x == MB_BLOCK_SIZE)
+             block_c.available  = 0;
+         }
+         else if (mb_x+blocksize_x == 8)
+           block_c.available = 0;
+       }
+       else if (mb_x+blocksize_x == MB_BLOCK_SIZE)
+         block_c.available = 0;
+     }
+ 
+     stopCriterion = (11 * medthres[blocktype]) >> 3;
+ 
+     //! Add Spatial Predictors in predictor list.
+     //! Scheme adds zero, left, top-left, top, top-right. Note that top-left adds very little
+     //! in terms of performance and could be removed with little penalty if any.
+     invalid_refs = EPZSSpatialPredictors (block_a, block_b, block_c, block_d,
+       list, list_offset, ref, refPic[list], tmp_mv[list], predictor);
+ 
+     //! Check all predictors
+     for (pos = 0; pos < prednum; pos++)
+     {
+       tmv[0] = predictor->point[pos].mv[0];
+       tmv[1] = predictor->point[pos].mv[1];
+       if ((iabs (tmv[0] - mv[0]) > search_range || iabs (tmv[1] - mv[1]) > search_range) && (tmv[0] || tmv[1]))
+         continue;
+ 
+       if ((iabs (tmv[0] - mv[0]) <= search_range) && (iabs (tmv[1] - mv[1]) <= search_range))
+       {
+         if (EPZSMap[mapCenter_y + tmv[1]][mapCenter_x + tmv[0]] == EPZSBlkCount)
+           continue;
+         else
+           EPZSMap[mapCenter_y + tmv[1]][mapCenter_x + tmv[0]] = EPZSBlkCount;
+       }
+ 
+       cand_x = (pic_pix_x + tmv[0])<<mv_rescale;
+       cand_y = (pic_pix_y + tmv[1])<<mv_rescale;
+ 
+       //--- set motion cost (cost for motion vector) and check ---
+       mcost  = MV_COST_SMP (lambda_factor, (center1_x<<mv_rescale), (center1_y<<mv_rescale), pred_x1, pred_y1);
+       mcost += MV_COST_SMP (lambda_factor, cand_x, cand_y, pred_x2, pred_y2);
+ 
+       if (mcost >= second_mcost) continue;
+ 
+       mcost += computeBiPred(cur_pic,
+         blocksize_y, blocksize_x, second_mcost - mcost,
+         (center1_x << mv_rescale) + IMG_PAD_SIZE_TIMES4,
+         (center1_y << mv_rescale) + IMG_PAD_SIZE_TIMES4,
+         cand_x + IMG_PAD_SIZE_TIMES4, cand_y + IMG_PAD_SIZE_TIMES4);
+ 
+       //--- check if motion cost is less than minimum cost ---
+       if (mcost < min_mcost)
+       {
+         tempmv2[0] = tempmv[0];
+         tempmv2[1] = tempmv[1];
+         second_mcost = min_mcost;
+         tempmv[0] = tmv[0];
+         tempmv[1] = tmv[1];
+         min_mcost = mcost;
+         checkMedian = TRUE;
+       }
+       //else if (mcost < second_mcost && (tempmv[0] != tmv[0] || tempmv[1] != tmv[1]))
+       else if (mcost < second_mcost)
+       {
+         tempmv2[0] = tmv[0];
+         tempmv2[1] = tmv[1];
+         second_mcost = mcost;
+         checkMedian = TRUE;
+       }
+     }
+ 
+     //! Refine using EPZS pattern if needed.
+     //! Note that we are using a simplistic threshold computation.
+     if (min_mcost > stopCriterion)
+     {
+       //! Adapt pattern based on different conditions.
+       if (input->EPZSPattern != 0)
+       {
+         if ((min_mcost < stopCriterion + ((3 * medthres[blocktype]) >> 1)))
+         {
+           if ((tempmv[0] == 0 && tempmv[1] == 0)
+             || (iabs (tempmv[0] - mv[0]) < (2<<(2-mv_rescale)) && iabs (tempmv[1] - mv[1]) < (2<<(2-mv_rescale))))
+             searchPatternF = sdiamond;
+           else
+             searchPatternF = square;
+         }
+         else if (blocktype > 5 || (ref > 0 && blocktype != 1))
+           searchPatternF = square;
+         else
+           searchPatternF = searchPattern;
+       }
+ 
+       totalCheckPts = searchPatternF->searchPoints;
+ 
+       //! center on best predictor
+       center2_x = tempmv[0];
+       center2_y = tempmv[1];
+       while (1)
+       {
+         do
+         {
+           checkPts = totalCheckPts;
+           do
+           {
+             tmv[0] = center2_x + searchPatternF->point[pointNumber].mv[0];
+             tmv[1] = center2_y + searchPatternF->point[pointNumber].mv[1];
+             cand_x = (pic_pix_x + tmv[0])<<mv_rescale;
+             cand_y = (pic_pix_y + tmv[1])<<mv_rescale;
+ 
+ 
+             if ((iabs (tmv[0] - mv[0]) <= search_range)
+               && (iabs (tmv[1] - mv[1]) <= search_range))
+             {
+               if (EPZSMap[mapCenter_y + tmv[1]][mapCenter_x + tmv[0]] != EPZSBlkCount)
+                 EPZSMap[mapCenter_y + tmv[1]][mapCenter_x + tmv[0]] = EPZSBlkCount;
+               else
+               {
+                 pointNumber += 1;
+                 if (pointNumber >= searchPatternF->searchPoints)
+                   pointNumber -= searchPatternF->searchPoints;
+                 checkPts -= 1;
+                 continue;
+               }
+ 
+               mcost  = MV_COST_SMP (lambda_factor, (center1_x << mv_rescale), (center1_y << mv_rescale), pred_x1, pred_y1);
+               mcost += MV_COST_SMP (lambda_factor, cand_x, cand_y, pred_x2, pred_y2);
+ 
+               if (mcost < min_mcost)
+               {
+                 mcost += computeBiPred(cur_pic,
+                   blocksize_y, blocksize_x, min_mcost - mcost,
+                   (center1_x << mv_rescale) + IMG_PAD_SIZE_TIMES4,
+                   (center1_y << mv_rescale) + IMG_PAD_SIZE_TIMES4,
+                   cand_x + IMG_PAD_SIZE_TIMES4, cand_y + IMG_PAD_SIZE_TIMES4);
+ 
+                 if (mcost < min_mcost)
+                 {
+                   min_mcost = mcost;
+                   tempmv[0] = tmv[0];
+                   tempmv[1] = tmv[1];
+                   motionDirection = pointNumber;
+                 }
+               }
+             }
+             pointNumber += 1;
+             if (pointNumber >= searchPatternF->searchPoints)
+               pointNumber -= searchPatternF->searchPoints;
+             checkPts -= 1;
+           }
+           while (checkPts > 0);
+ 
+           if (nextLast || ((tempmv[0] == center2_x) && (tempmv[1] == center2_y)))
+           {
+             patternStop     = searchPatternF->stopSearch;
+             searchPatternF  = searchPatternF->nextpattern;
+             totalCheckPts   = searchPatternF->searchPoints;
+             nextLast        = searchPatternF->nextLast;
+             motionDirection = 0;
+             pointNumber = 0;
+           }
+           else
+           {
+             totalCheckPts = searchPatternF->point[motionDirection].next_points;
+             pointNumber = searchPatternF->point[motionDirection].start_nmbr;
+             center2_x = tempmv[0];
+             center2_y = tempmv[1];
+           }
+         }
+         while (patternStop != 1);
+ 
+         //! Check Second best predictor with EPZS pattern
+ 
+         conditionEPZS = (checkMedian == TRUE) && (blocktype < 5) && (min_mcost > stopCriterion) && (input->EPZSDual > 0);
+ 
+         if (!conditionEPZS) break;
+ 
+         pointNumber = 0;
+         patternStop = 0;
+         motionDirection = 0;
+         nextLast = 0;
+ 
+         if ((tempmv[0] == 0 && tempmv[1] == 0)
+           || (tempmv[0] == mv[0] && tempmv[1] == mv[1]))
+         {
+           if (iabs (tempmv[0] - mv[0]) < (2<<(2-mv_rescale)) && iabs (tempmv[1] - mv[1]) < (2<<(2-mv_rescale)))
+             searchPatternF = sdiamond;
+           else
+             searchPatternF = square;
+         }
+         else
+           searchPatternF = searchPatternD;
+         totalCheckPts = searchPatternF->searchPoints;
+ 
+         //! Second best. Note that following code is identical as for best predictor.
+         center2_x = tempmv2[0];
+         center2_y = tempmv2[1];
+ 
+         checkMedian = FALSE;
+       }
+     }
+   }
+   mv[0] = tempmv[0];
+   mv[1] = tempmv[1];
+   return min_mcost;
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    Report function for EPZS Fast ME
+ *    AMT/HYC
+ ***********************************************************************
+ */
+ void
+ EPZSOutputStats (FILE * stat, short stats_file)
+ {
+   if (stats_file == 1)
+   {
+     fprintf (stat, " EPZS Pattern                 : %s\n",c_EPZSPattern[input->EPZSPattern]);
+     fprintf (stat, " EPZS Dual Pattern            : %s\n",c_EPZSDualPattern[input->EPZSDual]);
+     fprintf (stat, " EPZS Fixed Predictors        : %s\n",c_EPZSFixed[input->EPZSFixed]);
+     fprintf (stat, " EPZS Temporal Predictors     : %s\n",c_EPZSOther[input->EPZSTemporal]);
+     fprintf (stat, " EPZS Spatial Predictors      : %s\n",c_EPZSOther[input->EPZSSpatialMem]);
+     fprintf (stat, " EPZS Thresholds (16x16)      : (%d %d %d)\n",medthres[1], minthres[1], maxthres[1]);
+     fprintf (stat, " EPZS Subpel ME               : %s\n",c_EPZSOther[input->EPZSSubPelME]);
+     fprintf (stat, " EPZS Subpel ME BiPred        : %s\n",c_EPZSOther[input->EPZSSubPelMEBiPred]);
+   }
+   else
+   {
+     fprintf (stat, " EPZS Pattern                      : %s\n",c_EPZSPattern[input->EPZSPattern]);
+     fprintf (stat, " EPZS Dual Pattern                 : %s\n",c_EPZSDualPattern[input->EPZSDual]);
+     fprintf (stat, " EPZS Fixed Predictors             : %s\n",c_EPZSFixed[input->EPZSFixed]);
+     fprintf (stat, " EPZS Temporal Predictors          : %s\n",c_EPZSOther[input->EPZSTemporal]);
+     fprintf (stat, " EPZS Spatial Predictors           : %s\n",c_EPZSOther[input->EPZSSpatialMem]);
+     fprintf (stat, " EPZS Thresholds (16x16)           : (%d %d %d)\n",medthres[1], minthres[1], maxthres[1]);
+     fprintf (stat, " EPZS Subpel ME                    : %s\n",c_EPZSOther[input->EPZSSubPelME]);
+     fprintf (stat, " EPZS Subpel ME BiPred             : %s\n",c_EPZSOther[input->EPZSSubPelMEBiPred]);
+   }
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Fast sub pixel block motion search to support EPZS
+  ***********************************************************************
+  */
+ int                                               //  ==> minimum motion cost after search
+ EPZSSubPelBlockMotionSearch (imgpel*   orig_pic,      // <--  original pixel values for the AxB block
+                              short     ref,           // <--  reference frame (0... or -1 (backward))
+                              int       list,          // <--  reference picture list
+                              int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                              int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                              int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                              short     pred_mv[2],    // <--  motion vector predictor (x) in sub-pel units
+                              short     mv[2],         // <--> in: search center (x) / out: motion vector (x) - in pel units
+                              int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                              int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                              int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                              int*      lambda         // <--  lagrangian parameter for determining motion cost
+                              )
+ {
+ 
+   int   pos, best_pos = 0, second_pos = 0, mcost;
+   int   second_mcost = INT_MAX;
+ 
+   int   cand_mv_x, cand_mv_y;
+ 
+   int   blocksize_x     = input->blc_size[blocktype][0];
+   int   blocksize_y     = input->blc_size[blocktype][1];
+   int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<< 2);
+   int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<< 2);
+ 
+   int   max_pos2        = ( (!start_me_refinement_hp || !start_me_refinement_qp) ? imax(1,search_pos2) : search_pos2);
+   int   list_offset     = img->mb_data[img->current_mb_nr].list_offset;
+   int   apply_weights   = ((active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+     (active_pps->weighted_bipred_idc && (img->type == B_SLICE))) && input->UseWeightedReferenceME;
+ 
+   StorablePicture *ref_picture = listX[list+list_offset][ref];
+ 
+   int max_pos_x4 = ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
+   int max_pos_y4 = ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
+   int start_pos = 5, end_pos = max_pos2;
+   int dist_method = H_PEL + 3 * apply_weights;
+   int lambda_factor = lambda[H_PEL];
+ 
+   ref_pic_sub.luma = ref_picture->imgY_sub;
+   width_pad  = ref_picture->size_x_pad;
+   height_pad = ref_picture->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight_luma = wp_weight[list + list_offset][ref][0];
+     offset_luma = wp_offset[list + list_offset][ref][0];
+   }
+ 
+   if ( ChromaMEEnable )
+   {
+     ref_pic_sub.crcb[0] = ref_picture->imgUV_sub[0];
+     ref_pic_sub.crcb[1] = ref_picture->imgUV_sub[1];
+     width_pad_cr  = ref_picture->size_x_cr_pad;
+     height_pad_cr = ref_picture->size_y_cr_pad;
+ 
+     if (apply_weights)
+     {
+       weight_cr[0] = wp_weight[list + list_offset][ref][1];
+       weight_cr[1] = wp_weight[list + list_offset][ref][2];
+       offset_cr[0] = wp_offset[list + list_offset][ref][1];
+       offset_cr[1] = wp_offset[list + list_offset][ref][2];
+     }
+   }
+ 
+   /*********************************
+    *****                       *****
+    *****  HALF-PEL REFINEMENT  *****
+    *****                       *****
+    *********************************/
+ 
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + mv[0] > 1) && (pic4_pix_x + mv[0] < max_pos_x4 - 1) &&
+     (pic4_pix_y + mv[1] > 1) && (pic4_pix_y + mv[1] < max_pos_y4 - 1))
+   {
+     ref_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     ref_access_method = UMV_ACCESS;
+   }
+ 
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = start_me_refinement_hp; pos < 5; pos++)
+   {
+     cand_mv_x = mv[0] + search_point_hp[pos][0];    // quarter-pel units
+     cand_mv_y = mv[1] + search_point_hp[pos][1];    // quarter-pel units
+ 
+     //----- set motion vector cost -----
+     mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv[0], pred_mv[1]);
+     mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
+       INT_MAX, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+ 
+     if (mcost < min_mcost)
+     {
+       second_mcost = min_mcost;
+       second_pos  = best_pos;
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+     else if (mcost < second_mcost)
+     {
+       second_mcost = mcost;
+       second_pos  = pos;
+     }
+   }
+ 
+   if (best_pos ==0 && (pred_mv[0] == mv[0]) && (pred_mv[1] - mv[1])== 0 && min_mcost < subthres[blocktype])
+       return min_mcost;
+ 
+   if (best_pos != 0 && second_pos != 0)
+   {
+     switch (best_pos ^ second_pos)
+     {
+     case 1:
+       start_pos = 6;
+       end_pos   = 7;
+       break;
+     case 3:
+       start_pos = 5;
+       end_pos   = 6;
+       break;
+     case 5:
+       start_pos = 8;
+       end_pos   = 9;
+       break;
+     case 7:
+       start_pos = 7;
+       end_pos   = 8;
+       break;
+     default:
+       break;
+     }
+   }
+   else
+   {
+     switch (best_pos + second_pos)
+     {
+     case 0:
+       start_pos = 5;
+       end_pos   = 5;
+       break;
+     case 1:
+       start_pos = 8;
+       end_pos   = 10;
+       break;
+     case 2:
+       start_pos = 5;
+       end_pos   = 7;
+       break;
+     case 5:
+       start_pos = 6;
+       end_pos   = 8;
+       break;
+     case 7:
+       start_pos = 7;
+       end_pos   = 9;
+       break;
+     default:
+       break;
+     }
+   }
+ 
+   if (best_pos !=0 || (iabs(pred_mv[0] - mv[0]) + iabs(pred_mv[1] - mv[1])))
+   {
+     for (pos = start_pos; pos < end_pos; pos++)
+     {
+       cand_mv_x = mv[0] + search_point_hp[pos][0];    // quarter-pel units
+       cand_mv_y = mv[1] + search_point_hp[pos][1];    // quarter-pel units
+ 
+       //----- set motion vector cost -----
+       mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv[0], pred_mv[1]);
+ 
+       if (mcost >= min_mcost) continue;
+ 
+       mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
+         min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+ 
+       if (mcost < min_mcost)
+       {
+         min_mcost = mcost;
+         best_pos  = pos;
+       }
+     }
+   }
+ 
+   if (best_pos)
+   {
+     mv[0] += search_point_hp[best_pos][0];
+     mv[1] += search_point_hp[best_pos][1];
+   }
+ 
+   if ( !start_me_refinement_qp )
+     min_mcost = INT_MAX;
+ 
+   /************************************
+   *****                          *****
+   *****  QUARTER-PEL REFINEMENT  *****
+   *****                          *****
+   ************************************/
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + mv[0] > 0) && (pic4_pix_x + mv[0] < max_pos_x4) &&
+     (pic4_pix_y + mv[1] > 0) && (pic4_pix_y + mv[1] < max_pos_y4)   )
+   {
+     ref_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     ref_access_method = UMV_ACCESS;
+   }
+ 
+   dist_method = Q_PEL + 3 * apply_weights;
+   lambda_factor = lambda[Q_PEL];
+   second_pos = 0;
+   second_mcost = INT_MAX;
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = start_me_refinement_qp; pos < 5; pos++)
+   {
+     cand_mv_x = mv[0] + search_point_qp[pos][0];    // quarter-pel units
+     cand_mv_y = mv[1] + search_point_qp[pos][1];    // quarter-pel units
+ 
+     //----- set motion vector cost -----
+     mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv[0], pred_mv[1]);
+     mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
+       INT_MAX, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+ 
+     if (mcost < min_mcost)
+     {
+       second_mcost = min_mcost;
+       second_pos  = best_pos;
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+     else if (mcost < second_mcost)
+     {
+       second_mcost = mcost;
+       second_pos  = pos;
+     }
+   }
+ 
+   if (best_pos ==0 && (pred_mv[0] == mv[0]) && (pred_mv[1] - mv[1])== 0 && min_mcost < subthres[blocktype])
+   {
+     return min_mcost;
+   }
+ 
+   start_pos = 5;
+   end_pos = search_pos4;
+ 
+   if (best_pos != 0 && second_pos != 0)
+   {
+     switch (best_pos ^ second_pos)
+     {
+     case 1:
+       start_pos = 6;
+       end_pos   = 7;
+       break;
+     case 3:
+       start_pos = 5;
+       end_pos   = 6;
+       break;
+     case 5:
+       start_pos = 8;
+       end_pos   = 9;
+       break;
+     case 7:
+       start_pos = 7;
+       end_pos   = 8;
+       break;
+     default:
+       break;
+     }
+   }
+   else
+   {
+     switch (best_pos + second_pos)
+     {
+       //case 0:
+       //start_pos = 5;
+       //end_pos   = 5;
+       //break;
+     case 1:
+       start_pos = 8;
+       end_pos   = 10;
+       break;
+     case 2:
+       start_pos = 5;
+       end_pos   = 7;
+       break;
+     case 5:
+       start_pos = 6;
+       end_pos   = 8;
+       break;
+     case 7:
+       start_pos = 7;
+       end_pos   = 9;
+       break;
+     default:
+       break;
+     }
+   }
+ 
+   if (best_pos !=0 || (iabs(pred_mv[0] - mv[0]) + iabs(pred_mv[1] - mv[1])))
+   {
+     for (pos = start_pos; pos < end_pos; pos++)
+     {
+       cand_mv_x = mv[0] + search_point_qp[pos][0];    // quarter-pel units
+       cand_mv_y = mv[1] + search_point_qp[pos][1];    // quarter-pel units
+ 
+       //----- set motion vector cost -----
+       mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv[0], pred_mv[1]);
+ 
+       if (mcost >= min_mcost) continue;
+       mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
+         min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+ 
+       if (mcost < min_mcost)
+       {
+         min_mcost = mcost;
+         best_pos  = pos;
+       }
+     }
+   }
+   if (best_pos)
+   {
+     mv[0] += search_point_qp [best_pos][0];
+     mv[1] += search_point_qp [best_pos][1];
+   }
+ 
+   //===== return minimum motion cost =====
+   return min_mcost;
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Fast bipred sub pixel block motion search to support EPZS
+  ***********************************************************************
+  */
+ int                                               //  ==> minimum motion cost after search
+ EPZSSubPelBlockSearchBiPred (imgpel*   orig_pic,  // <--  original pixel values for the AxB block
+                          short     ref,           // <--  reference frame (0... or -1 (backward))
+                          int       list,          // <--  reference picture list
+                          int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                          int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                          int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                          short     *pred_mv1,     // <--  motion vector predictor (x) in sub-pel units
+                          short     *pred_mv2,     // <--  motion vector predictor (x) in sub-pel units
+                          short     mv[2],         // <--> in: search center (x) / out: motion vector (x) - in pel units
+                          short     s_mv[2],       // <--> in: search center (x) / out: motion vector (x) - in pel units
+                          int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                          int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                          int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                          int       *lambda        // <--  lagrangian parameter for determining motion cost
+                          )
+ {
+   int   apply_weights =  (active_pps->weighted_bipred_idc );
+   int   list_offset   = img->mb_data[img->current_mb_nr].list_offset;
+ 
+   short offset1 = (apply_weights ? (list == 0?  wp_offset[list_offset    ][ref]     [0]:  wp_offset[list_offset + 1][0  ]     [0]) : 0);
+   short offset2 = (apply_weights ? (list == 0?  wp_offset[list_offset + 1][ref]     [0]:  wp_offset[list_offset    ][0  ]     [0]) : 0);
+ 
+   int   pos, best_pos = 0, second_pos = 0, mcost;
+   int   second_mcost = INT_MAX;
+ 
+   int   cand_mv_x, cand_mv_y;
+ 
+   int   blocksize_x     = input->blc_size[blocktype][0];
+   int   blocksize_y     = input->blc_size[blocktype][1];
+ 
+   int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<< 2);
+   int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<< 2);
+ 
+   int   start_hp        = (min_mcost == INT_MAX) ? 0 : start_me_refinement_hp;
+   int   max_pos2        = ( (!start_me_refinement_hp || !start_me_refinement_qp) ? imax(1,search_pos2) : search_pos2);
+ 
+   int   smv_x = s_mv[0] + pic4_pix_x;
+   int   smv_y = s_mv[1] + pic4_pix_y;
+ 
+   StorablePicture *ref_picture1 = listX[list       + list_offset][ref];
+   StorablePicture *ref_picture2 = listX[(list ^ 1) + list_offset][0];
+ 
+ 
+   int max_pos_x4 = ((ref_picture1->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
+   int max_pos_y4 = ((ref_picture1->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
+   int start_pos = 5, end_pos = max_pos2;
+   int lambda_factor = lambda[H_PEL];
+ 
+   ref_pic1_sub.luma = ref_picture1->imgY_sub;
+   ref_pic2_sub.luma = ref_picture2->imgY_sub;
+   img_width     = ref_picture1->size_x;
+   img_height    = ref_picture1->size_y;
+   width_pad    = ref_picture1->size_x_pad;
+   height_pad   = ref_picture1->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight1 = list == 0
+       ? wbp_weight[list_offset         ][ref][0][0]
+       : wbp_weight[list_offset + LIST_1][0  ][ref][0];
+     weight2 = list == 0
+       ? wbp_weight[list_offset + LIST_1][ref][0][0]
+       : wbp_weight[list_offset         ][0  ][ref][0];
+     offsetBi=(offset1 + offset2 + 1)>>1;
+     computeBiPred = computeBiPred2[H_PEL];
+   }
+   else
+   {
+     weight1 = 1<<luma_log_weight_denom;
+     weight2 = 1<<luma_log_weight_denom;
+     offsetBi = 0;
+     computeBiPred = computeBiPred1[H_PEL];
+   }
+ 
+ 
+   if ( ChromaMEEnable )
+   {
+     ref_pic1_sub.crcb[0] = ref_picture1->imgUV_sub[0];
+     ref_pic1_sub.crcb[1] = ref_picture1->imgUV_sub[1];
+     ref_pic2_sub.crcb[0] = ref_picture2->imgUV_sub[0];
+     ref_pic2_sub.crcb[1] = ref_picture2->imgUV_sub[1];
+     width_pad_cr  = ref_picture1->size_x_cr_pad;
+     height_pad_cr = ref_picture1->size_y_cr_pad;
+ 
+     if (apply_weights)
+     {
+       weight1_cr[0] = list == 0
+         ? wbp_weight[list_offset         ][ref][0][1]
+         : wbp_weight[list_offset + LIST_1][0  ][ref][1];
+       weight1_cr[1] = list == 0
+         ? wbp_weight[list_offset         ][ref][0][2]
+         : wbp_weight[list_offset + LIST_1][0  ][ref][2];
+       weight2_cr[0] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][1] : wbp_weight[list_offset         ][0  ][ref][1];
+       weight2_cr[1] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][2] : wbp_weight[list_offset         ][0  ][ref][2];
+       offsetBi_cr[0] = (list == 0)
+         ? (wp_offset[list_offset         ][ref][1] + wp_offset[list_offset + LIST_1][ref][1] + 1) >> 1
+         : (wp_offset[list_offset + LIST_1][0  ][1] + wp_offset[list_offset         ][0  ][1] + 1) >> 1;
+       offsetBi_cr[1] = (list == 0)
+         ? (wp_offset[list_offset         ][ref][2] + wp_offset[list_offset + LIST_1][ref][2] + 1) >> 1
+         : (wp_offset[list_offset + LIST_1][0  ][2] + wp_offset[list_offset         ][0  ][2] + 1) >> 1;
+     }
+     else
+     {
+       weight1_cr[0] = 1<<chroma_log_weight_denom;
+       weight1_cr[1] = 1<<chroma_log_weight_denom;
+       weight2_cr[0] = 1<<chroma_log_weight_denom;
+       weight2_cr[1] = 1<<chroma_log_weight_denom;
+       offsetBi_cr[0] = 0;
+       offsetBi_cr[1] = 0;
+     }
+   }
+ 
+ 
+   /*********************************
+    *****                       *****
+    *****  HALF-PEL REFINEMENT  *****
+    *****                       *****
+    *********************************/
+ 
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + mv[0] > 1) && (pic4_pix_x + mv[0] < max_pos_x4 - 1) &&
+     (pic4_pix_y + mv[1] > 1) && (pic4_pix_y + mv[1] < max_pos_y4 - 1))
+   {
+     bipred2_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred2_access_method = UMV_ACCESS;
+   }
+ 
+   if ((pic4_pix_x + s_mv[0] > 1) && (pic4_pix_x + s_mv[0] < max_pos_x4 - 1) &&
+     (pic4_pix_y + s_mv[1] > 1) && (pic4_pix_y + s_mv[1] < max_pos_y4 - 1))
+   {
+     bipred1_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred1_access_method = UMV_ACCESS;
+   }
+ 
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = start_hp; pos < 5; pos++)
+   {
+     cand_mv_x = mv[0] + search_point_hp[pos][0];    // quarter-pel units
+     cand_mv_y = mv[1] + search_point_hp[pos][1];    // quarter-pel units
+ 
+     //----- set motion vector cost -----
+     mcost  = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv1[0], pred_mv1[1]);
+     mcost += MV_COST_SMP (lambda_factor, s_mv[0], s_mv[1], pred_mv2[0], pred_mv2[1]);
+     mcost += computeBiPred(orig_pic, blocksize_y, blocksize_x, INT_MAX,
+       smv_x, smv_y, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+ 
+     if (mcost < min_mcost)
+     {
+       second_mcost = min_mcost;
+       second_pos  = best_pos;
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+     else if (mcost < second_mcost)
+     {
+       second_mcost = mcost;
+       second_pos  = pos;
+     }
+   }
+ 
+ //  if (best_pos ==0 && (pred_mv1[0] == mv[0]) && (pred_mv1[1] - mv[1])== 0 && min_mcost < subthres[blocktype])
+       //return min_mcost;
+ 
+   if (best_pos != 0 && second_pos != 0)
+   {
+     switch (best_pos ^ second_pos)
+     {
+     case 1:
+       start_pos = 6;
+       end_pos   = 7;
+       break;
+     case 3:
+       start_pos = 5;
+       end_pos   = 6;
+       break;
+     case 5:
+       start_pos = 8;
+       end_pos   = 9;
+       break;
+     case 7:
+       start_pos = 7;
+       end_pos   = 8;
+       break;
+     default:
+       break;
+     }
+   }
+   else
+   {
+     switch (best_pos + second_pos)
+     {
+     case 0:
+       start_pos = 5;
+       end_pos   = 5;
+       break;
+     case 1:
+       start_pos = 8;
+       end_pos   = 10;
+       break;
+     case 2:
+       start_pos = 5;
+       end_pos   = 7;
+       break;
+     case 5:
+       start_pos = 6;
+       end_pos   = 8;
+       break;
+     case 7:
+       start_pos = 7;
+       end_pos   = 9;
+       break;
+     default:
+       break;
+     }
+   }
+ 
+   if (best_pos !=0 || (iabs(pred_mv1[0] - mv[0]) + iabs(pred_mv1[1] - mv[1])))
+   {
+     for (pos = start_pos; pos < end_pos; pos++)
+     {
+       cand_mv_x = mv[0] + search_point_hp[pos][0];    // quarter-pel units
+       cand_mv_y = mv[1] + search_point_hp[pos][1];    // quarter-pel units
+ 
+       //----- set motion vector cost -----
+       mcost  = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv1[0], pred_mv1[1]);
+       mcost += MV_COST_SMP (lambda_factor, s_mv[0], s_mv[1], pred_mv2[0], pred_mv2[1]);
+       if (mcost >= min_mcost) continue;
+ 
+       mcost += computeBiPred(orig_pic, blocksize_y, blocksize_x, min_mcost - mcost,
+         smv_x, smv_y, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+ 
+       if (mcost < min_mcost)
+       {
+         min_mcost = mcost;
+         best_pos  = pos;
+       }
+     }
+   }
+ 
+   if (best_pos)
+   {
+     mv[0] += search_point_hp [best_pos][0];
+     mv[1] += search_point_hp [best_pos][1];
+   }
+ 
+   computeBiPred = apply_weights? computeBiPred2[Q_PEL] : computeBiPred1[Q_PEL];
+ 
+   /************************************
+   *****                          *****
+   *****  QUARTER-PEL REFINEMENT  *****
+   *****                          *****
+   ************************************/
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + mv[0] > 0) && (pic4_pix_x + mv[0] < max_pos_x4) &&
+     (pic4_pix_y + mv[1] > 0) && (pic4_pix_y + mv[1] < max_pos_y4))
+   {
+     bipred2_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred2_access_method = UMV_ACCESS;
+   }
+ 
+   if ((pic4_pix_x + s_mv[0] > 0) && (pic4_pix_x + s_mv[0] < max_pos_x4) &&
+     (pic4_pix_y + s_mv[1] > 0) && (pic4_pix_y + s_mv[1] < max_pos_y4))
+   {
+     bipred1_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred1_access_method = UMV_ACCESS;
+   }
+ 
+   if ( !start_me_refinement_qp )
+     min_mcost = INT_MAX;
+ 
+   lambda_factor = lambda[Q_PEL];
+   second_pos = 0;
+   second_mcost = INT_MAX;
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = start_me_refinement_qp; pos < 5; pos++)
+   {
+     cand_mv_x = mv[0] + search_point_qp[pos][0];    // quarter-pel units
+     cand_mv_y = mv[1] + search_point_qp[pos][1];    // quarter-pel units
+ 
+     //----- set motion vector cost -----
+     mcost  = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv1[0], pred_mv1[1]);
+     mcost += MV_COST_SMP (lambda_factor, s_mv[0], s_mv[1], pred_mv2[0], pred_mv2[1]);
+ 
+     mcost += computeBiPred(orig_pic, blocksize_y, blocksize_x, INT_MAX,
+       smv_x, smv_y, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+ 
+ 
+     if (mcost < min_mcost)
+     {
+       second_mcost = min_mcost;
+       second_pos  = best_pos;
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+     else if (mcost < second_mcost)
+     {
+       second_mcost = mcost;
+       second_pos  = pos;
+     }
+   }
+ 
+   start_pos = 5;
+   end_pos = search_pos4;
+ 
+   if (best_pos != 0 && second_pos != 0)
+   {
+     switch (best_pos ^ second_pos)
+     {
+     case 1:
+       start_pos = 6;
+       end_pos   = 7;
+       break;
+     case 3:
+       start_pos = 5;
+       end_pos   = 6;
+       break;
+     case 5:
+       start_pos = 8;
+       end_pos   = 9;
+       break;
+     case 7:
+       start_pos = 7;
+       end_pos   = 8;
+       break;
+     default:
+       break;
+     }
+   }
+   else
+   {
+     switch (best_pos + second_pos)
+     {
+       //case 0:
+       //start_pos = 5;
+       //end_pos   = 5;
+       //break;
+     case 1:
+       start_pos = 8;
+       end_pos   = 10;
+       break;
+     case 2:
+       start_pos = 5;
+       end_pos   = 7;
+       break;
+     case 5:
+       start_pos = 6;
+       end_pos   = 8;
+       break;
+     case 7:
+       start_pos = 7;
+       end_pos   = 9;
+       break;
+     default:
+       break;
+     }
+   }
+ 
+   if (best_pos !=0 || (iabs(pred_mv1[0] - mv[0]) + iabs(pred_mv1[1] - mv[1])))
+   {
+     for (pos = start_pos; pos < end_pos; pos++)
+     {
+       cand_mv_x = mv[0] + search_point_qp[pos][0];    // quarter-pel units
+       cand_mv_y = mv[1] + search_point_qp[pos][1];    // quarter-pel units
+ 
+       //----- set motion vector cost -----
+       mcost  = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv1[0], pred_mv1[1]);
+       mcost += MV_COST_SMP (lambda_factor, s_mv[0], s_mv[1], pred_mv2[0], pred_mv2[1]);
+       if (mcost >= min_mcost) continue;
+ 
+       mcost += computeBiPred(orig_pic, blocksize_y, blocksize_x, min_mcost - mcost,
+         smv_x, smv_y, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+ 
+ 
+       if (mcost < min_mcost)
+       {
+         min_mcost = mcost;
+         best_pos  = pos;
+       }
+     }
+   }
+   if (best_pos)
+   {
+     mv[0] += search_point_qp[best_pos][0];
+     mv[1] += search_point_qp[best_pos][1];
+   }
+ 
+   //===== return minimum motion cost =====
+   return min_mcost;
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/me_epzs.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/me_epzs.h:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/me_epzs.h	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,90 ----
+ 
+ /*!
+  ************************************************************************
+  * \file
+  *     me_epzs.h
+  *
+  * \author
+  *    Alexis Michael Tourapis        <alexis.tourapis at dolby.com>
+  *
+  * \date
+  *    11. August 2006
+  *
+  * \brief
+  *    Headerfile for EPZS motion estimation
+  **************************************************************************
+  */
+ 
+ 
+ #ifndef _ME_EPZS_H_
+ #define _ME_EPZS_H_
+ 
+ 
+ #define CHECK_RANGE  ((cand_x >= 0) && (cand_x < img_width  - blocksize_x) &&(cand_y >= 0) && (cand_y < img_height - blocksize_y))
+ 
+ 
+ typedef struct
+ {
+   int         mb_adaptive_frame_field_flag;
+   int         size_x, size_y;
+ 
+   // Frame
+   short ****  mv;            //!< motion vector       [list][subblock_x][subblock_y][component]
+   // Top field
+   short ****  top_mv;        //!< motion vector       [list][subblock_x][subblock_y][component]
+   // Bottom field params
+   short ****  bottom_mv;     //!< motion vector       [list][subblock_x][subblock_y][component]
+ 
+ } EPZSColocParams;
+ 
+ typedef struct
+ {
+   int mv[2];
+   int start_nmbr;
+   int next_points;
+ }
+ SPoint;
+ 
+ typedef struct MEPatternNode
+ {
+   int    searchPoints;
+   SPoint *point;
+   int    stopSearch;
+   int    nextLast;
+   struct MEPatternNode *nextpattern;
+ }
+ EPZSStructure;
+ 
+ typedef enum
+ {
+   SDIAMOND  = 0,
+   SQUARE    = 1,
+   EDIAMOND  = 2,
+   LDIAMOND  = 3,
+   SBDIAMOND = 4
+ } EPZSPatterns;
+ 
+ extern EPZSColocParams *EPZSCo_located;
+ extern int ***EPZSDistortion;  //!< Array for storing SAD Values
+ 
+ extern int EPZSInit(void);
+ extern void EPZSDelete (void);
+ extern void EPZSOutputStats(FILE *stat,short stats_file);
+ extern void EPZSSliceInit(EPZSColocParams* p, StorablePicture **listX[6]);
+ extern int EPZSPelBlockMotionSearch (imgpel *, short, int, int, char ***, short ****,
+                                      int, int, int, short[2], short[2], int, int, int);
+ 
+ extern int EPZSBiPredBlockMotionSearch (imgpel *, short, int, int, char  ***, short  ****,
+                                         int, int, int, short*, short *,
+                                         short[2], short[2], int, int, int);
+ 
+ extern int EPZSSubPelBlockMotionSearch (imgpel *, short, int, int, int, int, short[2],
+                                         short[2], int, int, int, int*);
+ 
+ extern int EPZSSubPelBlockSearchBiPred  (imgpel* orig_pic, short ref, int list, int pic_pix_x, int pic_pix_y,
+                                          int blocktype, short *pred_mv1, short *pred_mv2,
+                                          short mv1[2], short mv2[2],
+                                          int search_pos2, int search_pos4, int min_mcost, int *lambda_factor);
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/me_fullfast.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/me_fullfast.c:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/me_fullfast.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,901 ----
+ 
+ /*!
+ *************************************************************************************
+ * \file me_fullfast.c
+ *
+ * \brief
+ *    Motion Estimation using Full Search Fast
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Alexis Michael Tourapis <alexismt at ieee.org>
+ *
+ *************************************************************************************
+ */
+ 
+ // Includes
+ #include "contributors.h"
+ #include <stdlib.h>
+ #include <limits.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ #include "image.h"
+ #include "memalloc.h"
+ #include "mb_access.h"
+ #include "refbuf.h"
+ 
+ #include "me_distortion.h"
+ #include "me_fullsearch.h"
+ 
+ // Define External Global Parameters
+ extern int *mvbits;
+ extern short*   spiral_search_x;
+ extern short*   spiral_search_y;
+ extern int *byte_abs;
+ extern const int LEVELMVLIMIT[17][6];
+ 
+ /*****
+  *****  static variables for fast integer motion estimation
+  *****
+  */
+ static int  **search_setup_done;  //!< flag if all block SAD's have been calculated yet
+ static int  **search_center_x;    //!< absolute search center for fast full motion search
+ static int  **search_center_y;    //!< absolute search center for fast full motion search
+ static int  **pos_00;             //!< position of (0,0) vector
+ static distpel  *****BlockSAD;        //!< SAD for all blocksize, ref. frames and motion vectors
+ static int  **max_search_range;
+ 
+ extern void SetMotionVectorPredictor (short  pmv[2], char   **refPic,
+                                       short  ***tmp_mv, short  ref_frame,
+                                       int    list, int    block_x,
+                                       int    block_y, int    blockshape_x,
+                                       int    blockshape_y);
+ 
+ // Functions
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Full pixel block motion search
+  ***********************************************************************
+  */
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    function creating arrays for fast integer motion estimation
+  ***********************************************************************
+  */
+ void
+ InitializeFastFullIntegerSearch ()
+ {
+   int  i, j, k, list;
+   int  search_range = input->search_range;
+   int  max_pos      = (2*search_range+1) * (2*search_range+1);
+ 
+   if ((BlockSAD = (distpel*****)malloc (2 * sizeof(distpel****))) == NULL)
+     no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
+ 
+   for (list=0; list<2;list++)
+   {
+     if ((BlockSAD[list] = (distpel****)malloc ((img->max_num_references) * sizeof(distpel***))) == NULL)
+       no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
+     for (i = 0; i < img->max_num_references; i++)
+     {
+       if ((BlockSAD[list][i] = (distpel***)malloc (8 * sizeof(distpel**))) == NULL)
+         no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
+       for (j = 1; j < 8; j++)
+       {
+         if ((BlockSAD[list][i][j] = (distpel**)malloc (16 * sizeof(distpel*))) == NULL)
+           no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
+         for (k = 0; k < 16; k++)
+         {
+           if ((BlockSAD[list][i][j][k] = (distpel*)malloc (max_pos * sizeof(distpel))) == NULL)
+             no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
+         }
+       }
+     }
+   }
+ 
+   if ((search_setup_done = (int**)malloc (2*sizeof(int*)))==NULL)
+     no_mem_exit ("InitializeFastFullIntegerSearch: search_setup_done");
+   if ((search_center_x = (int**)malloc (2*sizeof(int*)))==NULL)
+     no_mem_exit ("InitializeFastFullIntegerSearch: search_center_x");
+   if ((search_center_y = (int**)malloc (2*sizeof(int*)))==NULL)
+     no_mem_exit ("InitializeFastFullIntegerSearch: search_center_y");
+   if ((pos_00 = (int**)malloc (2*sizeof(int*)))==NULL)
+     no_mem_exit ("InitializeFastFullIntegerSearch: pos_00");
+   if ((max_search_range = (int**)malloc (2*sizeof(int*)))==NULL)
+     no_mem_exit ("InitializeFastFullIntegerSearch: max_search_range");
+ 
+   for (list=0; list<2; list++)
+   {
+     if ((search_setup_done[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
+       no_mem_exit ("InitializeFastFullIntegerSearch: search_setup_done");
+     if ((search_center_x[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
+       no_mem_exit ("InitializeFastFullIntegerSearch: search_center_x");
+     if ((search_center_y[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
+       no_mem_exit ("InitializeFastFullIntegerSearch: search_center_y");
+     if ((pos_00[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
+       no_mem_exit ("InitializeFastFullIntegerSearch: pos_00");
+     if ((max_search_range[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
+       no_mem_exit ("InitializeFastFullIntegerSearch: max_search_range");
+   }
+ 
+   // assign max search ranges for reference frames
+   if (input->full_search == 2)
+   {
+     for (list=0;list<2;list++)
+       for (i=0; i<img->max_num_references; i++)
+         max_search_range[list][i] = search_range;
+   }
+   else
+   {
+     for (list=0;list<2;list++)
+     {
+       max_search_range[list][0] = search_range;
+       for (i=1; i< img->max_num_references; i++)  max_search_range[list][i] = search_range / 2;
+     }
+   }
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    function for deleting the arrays for fast integer motion estimation
+  ***********************************************************************
+  */
+ void
+ ClearFastFullIntegerSearch ()
+ {
+   int  i, j, k, list;
+ 
+   for (list=0; list<2; list++)
+   {
+     for (i = 0; i < img->max_num_references; i++)
+     {
+       for (j = 1; j < 8; j++)
+       {
+         for (k = 0; k < 16; k++)
+         {
+           free (BlockSAD[list][i][j][k]);
+         }
+         free (BlockSAD[list][i][j]);
+       }
+       free (BlockSAD[list][i]);
+     }
+     free (BlockSAD[list]);
+   }
+   free (BlockSAD);
+ 
+   for (list=0; list<2; list++)
+   {
+     free (search_setup_done[list]);
+     free (search_center_x[list]);
+     free (search_center_y[list]);
+     free (pos_00[list]);
+     free (max_search_range[list]);
+   }
+   free (search_setup_done);
+   free (search_center_x);
+   free (search_center_y);
+   free (pos_00);
+   free (max_search_range);
+ 
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    function resetting flags for fast integer motion estimation
+  *    (have to be called in start_macroblock())
+  ***********************************************************************
+  */
+ void
+ ResetFastFullIntegerSearch ()
+ {
+   int list;
+   for (list=0; list<2; list++)
+     memset(&search_setup_done [list][0], 0, img->max_num_references * sizeof(int));
+ }
+ /*!
+  ***********************************************************************
+  * \brief
+  *    calculation of SAD for larger blocks on the basis of 4x4 blocks
+  ***********************************************************************
+  */
+ void
+ SetupLargerBlocks (int list, int refindex, int max_pos)
+ {
+ #define ADD_UP_BLOCKS()   _o=*_bo; _i=*_bi; _j=*_bj; for(pos=0;pos<max_pos;pos++) _o[pos] = _i[pos] + _j[pos];
+ #define INCREMENT(inc)    _bo+=inc; _bi+=inc; _bj+=inc;
+ 
+   distpel   pos, **_bo, **_bi, **_bj;
+   register distpel *_o,   *_i,   *_j;
+ 
+   //--- blocktype 6 ---
+   _bo = BlockSAD[list][refindex][6];
+   _bi = BlockSAD[list][refindex][7];
+   _bj = _bi + 4;
+   ADD_UP_BLOCKS(); INCREMENT(1);
+   ADD_UP_BLOCKS(); INCREMENT(1);
+   ADD_UP_BLOCKS(); INCREMENT(1);
+   ADD_UP_BLOCKS(); INCREMENT(5);
+   ADD_UP_BLOCKS(); INCREMENT(1);
+   ADD_UP_BLOCKS(); INCREMENT(1);
+   ADD_UP_BLOCKS(); INCREMENT(1);
+   ADD_UP_BLOCKS();
+ 
+   //--- blocktype 5 ---
+   _bo = BlockSAD[list][refindex][5];
+   _bi = BlockSAD[list][refindex][7];
+   _bj = _bi + 1;
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS();
+ 
+   //--- blocktype 4 ---
+   _bo = BlockSAD[list][refindex][4];
+   _bi = BlockSAD[list][refindex][6];
+   _bj = _bi + 1;
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(6);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS();
+ 
+   //--- blocktype 3 ---
+   _bo = BlockSAD[list][refindex][3];
+   _bi = BlockSAD[list][refindex][4];
+   _bj = _bi + 8;
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS();
+ 
+   //--- blocktype 2 ---
+   _bo = BlockSAD[list][refindex][2];
+   _bi = BlockSAD[list][refindex][4];
+   _bj = _bi + 2;
+   ADD_UP_BLOCKS(); INCREMENT(8);
+   ADD_UP_BLOCKS();
+ 
+   //--- blocktype 1 ---
+   _bo = BlockSAD[list][refindex][1];
+   _bi = BlockSAD[list][refindex][3];
+   _bj = _bi + 2;
+   ADD_UP_BLOCKS();
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Setup the fast search for an macroblock
+  ***********************************************************************
+  */
+ #define GEN_ME 0
+ #if GEN_ME
+ void SetupFastFullPelSearch (short ref, int list)  // <--  reference frame parameter, list0 or 1
+ {
+   short   pmv[2];
+   static imgpel   orig_pels[768];
+ 
+   imgpel  *srcptr = orig_pels;
+   int     offset_x, offset_y, range_partly_outside, ref_x, ref_y, pos, abs_x, abs_y, bindex, blky;
+   int     max_width, max_height;
+   int     img_width, img_height;
+ 
+   StorablePicture *ref_picture;
+   distpel**   block_sad = BlockSAD[list][ref][7];
+   int     search_range  = max_search_range[list][ref];
+   int     max_pos       = (2*search_range+1) * (2*search_range+1);
+ 
+   int     list_offset   = img->mb_data[img->current_mb_nr].list_offset;
+ 
+   int     apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+     (active_pps->weighted_bipred_idc && (img->type == B_SLICE))) && input->UseWeightedReferenceME;
+   int abs_y4, abs_x4;
+ 
+   int   i, j, k;
+   int   level_dist = F_PEL + apply_weights * 3;
+   int   pixel_x, pixel_y;
+ 
+ 
+   for ( j= img->opix_y; j < img->opix_y + MB_BLOCK_SIZE; j +=BLOCK_SIZE)
+   {
+     for (pixel_y = j; pixel_y < j + BLOCK_SIZE; pixel_y++)
+     {
+       memcpy( srcptr, &imgY_org[pixel_y][img->opix_x], BLOCK_SIZE * sizeof(imgpel));
+       memcpy( srcptr + 16, &imgY_org[pixel_y][img->opix_x + BLOCK_SIZE], BLOCK_SIZE * sizeof(imgpel));
+       memcpy( srcptr + 32, &imgY_org[pixel_y][img->opix_x + 2 * BLOCK_SIZE], BLOCK_SIZE * sizeof(imgpel));
+       memcpy( srcptr + 48, &imgY_org[pixel_y][img->opix_x + 3 * BLOCK_SIZE], BLOCK_SIZE * sizeof(imgpel));
+       srcptr += BLOCK_SIZE;
+     }
+     srcptr += 48;
+   }
+   // storage format is different from that of orig_pic
+   // for YUV 4:2:0 we have:
+   // YYYY
+   // YYYY
+   // U U
+   //
+   // V V
+   //
+   if (ChromaMEEnable)
+   {
+     imgpel *auxptr;
+     int   bsx_c = BLOCK_SIZE >> (chroma_shift_x - 2);
+     int   bsy_c = BLOCK_SIZE >> (chroma_shift_y - 2);
+     int   pic_pix_x_c = img->opix_x >> (chroma_shift_x - 2);
+     int   pic_pix_y_c = img->opix_y >> (chroma_shift_y - 2);
+ 
+     // copy the original cmp1 and cmp2 data to the orig_pic matrix
+     // This seems to be wrong.
+     for (k=0; k<2; k++)
+     {
+       srcptr = auxptr = orig_pels + (256 << k);
+       for ( pixel_y = 0, i = 0; i < (BLOCK_SIZE >> (chroma_shift_y - 2)); i++, pixel_y += bsy_c )
+       {
+         for ( pixel_x = 0, k = 0; k < (BLOCK_SIZE >> (chroma_shift_x - 2)); k++, pixel_x += bsx_c )
+         {
+           srcptr = auxptr;
+           for (j = 0; j < bsy_c; j++)
+           {
+             memcpy( srcptr, &imgUV_org[k][pic_pix_y_c + pixel_y + j][pic_pix_x_c + pixel_x], bsx_c * sizeof(imgpel));
+             srcptr += bsx_c;
+           }
+           auxptr += MB_BLOCK_SIZE;
+         }
+       }
+     }
+   }
+ 
+ 
+   ref_picture     = listX[list+list_offset][ref];
+   ref_access_method = FAST_ACCESS;
+ 
+   //===== Use weighted Reference for ME ====
+   ref_pic_sub.luma = ref_picture->imgY_sub;
+ 
+   if (apply_weights)
+   {
+     weight_luma = wp_weight[list + list_offset][ref][0];
+     offset_luma = wp_offset[list + list_offset][ref][0];
+   }
+ 
+   if ( ChromaMEEnable)
+   {
+     ref_pic_sub.crcb[0] = ref_picture->imgUV_sub[0];
+     ref_pic_sub.crcb[1] = ref_picture->imgUV_sub[1];
+     width_pad_cr  = ref_picture->size_x_cr_pad;
+     height_pad_cr = ref_picture->size_y_cr_pad;
+     if (apply_weights)
+     {
+       weight_cr[0] = wp_weight[list + list_offset][ref][1];
+       weight_cr[1] = wp_weight[list + list_offset][ref][2];
+       offset_cr[0] = wp_offset[list + list_offset][ref][1];
+       offset_cr[1] = wp_offset[list + list_offset][ref][2];
+     }
+   }
+ 
+   max_width     = ref_picture->size_x - 17;
+   max_height    = ref_picture->size_y - 17;
+ 
+   img_width     = ref_picture->size_x;
+   img_height    = ref_picture->size_y;
+   width_pad     = ref_picture->size_x_pad;
+   height_pad    = ref_picture->size_y_pad;
+ 
+   //===== get search center: predictor of 16x16 block =====
+   SetMotionVectorPredictor (pmv, enc_picture->ref_idx[list], enc_picture->mv[list], ref, list, 0, 0, 16, 16);
+ 
+   search_center_x[list][ref] = pmv[0] / 4;
+   search_center_y[list][ref] = pmv[1] / 4;
+ 
+   if (!input->rdopt)
+   {
+     //--- correct center so that (0,0) vector is inside ---
+     search_center_x[list][ref] = iClip3(-search_range, search_range, search_center_x[list][ref]);
+     search_center_y[list][ref] = iClip3(-search_range, search_range, search_center_y[list][ref]);
+   }
+   search_center_x[list][ref] = iClip3(-2047 + search_range, 2047 - search_range, search_center_x[list][ref]);
+   search_center_y[list][ref] = iClip3(LEVELMVLIMIT[img->LevelIndex][0] + search_range, LEVELMVLIMIT[img->LevelIndex][1]  - search_range, search_center_y[list][ref]);
+ 
+   search_center_x[list][ref] += img->opix_x;
+   search_center_y[list][ref] += img->opix_y;
+ 
+   offset_x = search_center_x[list][ref];
+   offset_y = search_center_y[list][ref];
+ 
+ 
+   //===== check if whole search range is inside image =====
+   if (offset_x >= search_range && offset_x <= max_width  - search_range &&
+     offset_y >= search_range && offset_y <= max_height - search_range   )
+   {
+     range_partly_outside = 0;
+   }
+   else
+   {
+     range_partly_outside = 1;
+   }
+ 
+   //===== determine position of (0,0)-vector =====
+   if (!input->rdopt)
+   {
+     ref_x = img->opix_x - offset_x;
+     ref_y = img->opix_y - offset_y;
+ 
+     for (pos = 0; pos < max_pos; pos++)
+     {
+       if (ref_x == spiral_search_x[pos] &&
+         ref_y == spiral_search_y[pos])
+       {
+         pos_00[list][ref] = pos;
+         break;
+       }
+     }
+   }
+ 
+   //===== loop over search range (spiral search): get blockwise SAD =====
+   for (pos = 0; pos < max_pos; pos++)
+   {
+     abs_y = offset_y + spiral_search_y[pos];
+     abs_x = offset_x + spiral_search_x[pos];
+ 
+     abs_y4 = (abs_y + IMG_PAD_SIZE) << 2;
+     abs_x4 = (abs_x + IMG_PAD_SIZE) << 2;
+ 
+     if (range_partly_outside)
+     {
+       if (abs_y >= 0 && abs_y <= max_height &&
+         abs_x >= 0 && abs_x <= max_width    )
+       {
+         ref_access_method = FAST_ACCESS;
+       }
+       else
+       {
+         ref_access_method = UMV_ACCESS;
+       }
+     }
+ 
+     srcptr = orig_pels;
+     bindex = 0;
+     for (blky = 0; blky < 4; blky++)
+     {
+       block_sad[bindex++][pos] = computeUniPred[level_dist](srcptr, 4, 4, INT_MAX, abs_x4,      abs_y4);
+       srcptr += 16;
+       block_sad[bindex++][pos] = computeUniPred[level_dist](srcptr, 4, 4, INT_MAX, abs_x4 + 16, abs_y4);
+       srcptr += 16;
+       block_sad[bindex++][pos] = computeUniPred[level_dist](srcptr, 4, 4, INT_MAX, abs_x4 + 32, abs_y4);
+       srcptr += 16;
+       block_sad[bindex++][pos] = computeUniPred[level_dist](srcptr, 4, 4, INT_MAX, abs_x4 + 48, abs_y4);
+       srcptr += 16;
+       abs_y4 += 16;
+     }
+   }
+ 
+   //===== combine SAD's for larger block types =====
+   SetupLargerBlocks (list, ref, max_pos);
+ 
+   //===== set flag marking that search setup have been done =====
+   search_setup_done[list][ref] = 1;
+ }
+ 
+ #else
+ void SetupFastFullPelSearch (short ref, int list)  // <--  reference frame parameter, list0 or 1
+ {
+   short   pmv[2];
+   static imgpel orig_pels[768];
+   imgpel  *srcptr = orig_pels, *refptr;
+   int     k, x, y;
+   int     abs_y4, abs_x4;
+   int     offset_x, offset_y, range_partly_outside, ref_x, ref_y, pos, abs_x, abs_y, bindex, blky;
+   int     LineSadBlk0, LineSadBlk1, LineSadBlk2, LineSadBlk3;
+   int     max_width, max_height;
+   int     img_width, img_height;
+ 
+   StorablePicture *ref_picture;
+   distpel**   block_sad = BlockSAD[list][ref][7];
+   int     search_range  = max_search_range[list][ref];
+   int     max_pos       = (2*search_range+1) * (2*search_range+1);
+ 
+   int     list_offset   = img->mb_data[img->current_mb_nr].list_offset;
+   int     apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+     (active_pps->weighted_bipred_idc && (img->type == B_SLICE))) && input->UseWeightedReferenceME;
+   int     weighted_pel;
+   int *dist_method = input->MEErrorMetric[0] ? img->quad : byte_abs;
+ 
+   ref_picture     = listX[list+list_offset][ref];
+   ref_access_method = FAST_ACCESS;
+   ref_pic_sub.luma = ref_picture->imgY_sub;
+ 
+   max_width     = ref_picture->size_x - 17;
+   max_height    = ref_picture->size_y - 17;
+ 
+   img_width     = ref_picture->size_x;
+   img_height    = ref_picture->size_y;
+   width_pad     = ref_picture->size_x_pad;
+   height_pad    = ref_picture->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight_luma = wp_weight[list + list_offset][ref][0];
+     offset_luma = wp_offset[list + list_offset][ref][0];
+   }
+ 
+   if ( ChromaMEEnable)
+   {
+     ref_pic_sub.crcb[0] = ref_picture->imgUV_sub[0];
+     ref_pic_sub.crcb[1] = ref_picture->imgUV_sub[1];
+     width_pad_cr  = ref_picture->size_x_cr_pad;
+     height_pad_cr = ref_picture->size_y_cr_pad;
+ 
+     if (apply_weights)
+     {
+       weight_cr[0] = wp_weight[list + list_offset][ref][1];
+       weight_cr[1] = wp_weight[list + list_offset][ref][2];
+       offset_cr[0] = wp_offset[list + list_offset][ref][1];
+       offset_cr[1] = wp_offset[list + list_offset][ref][2];
+     }
+   }
+ 
+   //===== get search center: predictor of 16x16 block =====
+   SetMotionVectorPredictor (pmv, enc_picture->ref_idx[list], enc_picture->mv[list], ref, list, 0, 0, 16, 16);
+ 
+   search_center_x[list][ref] = pmv[0] / 4;
+   search_center_y[list][ref] = pmv[1] / 4;
+ 
+   if (!input->rdopt)
+   {
+     //--- correct center so that (0,0) vector is inside ---
+     search_center_x[list][ref] = iClip3(-search_range, search_range, search_center_x[list][ref]);
+     search_center_y[list][ref] = iClip3(-search_range, search_range, search_center_y[list][ref]);
+   }
+ 
+   search_center_x[list][ref] = iClip3(-2047 + search_range, 2047 - search_range, search_center_x[list][ref]);
+   search_center_y[list][ref] = iClip3(LEVELMVLIMIT[img->LevelIndex][0] + search_range, LEVELMVLIMIT[img->LevelIndex][1]  - search_range, search_center_y[list][ref]);
+ 
+   search_center_x[list][ref] += img->opix_x;
+   search_center_y[list][ref] += img->opix_y;
+ 
+   offset_x = search_center_x[list][ref];
+   offset_y = search_center_y[list][ref];
+ 
+ 
+   //===== copy original block for fast access =====
+   for   (y = img->opix_y; y < img->opix_y+MB_BLOCK_SIZE; y++)
+   {
+     memcpy(srcptr, &imgY_org[y][img->opix_x], MB_BLOCK_SIZE * sizeof(imgpel));
+     srcptr += MB_BLOCK_SIZE;
+   }
+   if ( ChromaMEEnable)
+   {
+     for (k = 0; k < 2; k++)
+     {
+       for   (y = img->opix_c_y; y < img->opix_c_y + img->mb_cr_size_y; y++)
+       {
+         memcpy(srcptr, &imgUV_org[k][y][img->opix_c_x], img->mb_cr_size_x * sizeof(imgpel));
+         srcptr += img->mb_cr_size_x;
+       }
+     }
+   }
+ 
+   //===== check if whole search range is inside image =====
+   if (offset_x >= search_range && offset_x <= max_width  - search_range &&
+     offset_y >= search_range && offset_y <= max_height - search_range   )
+   {
+     range_partly_outside = 0;
+   }
+   else
+   {
+     range_partly_outside = 1;
+   }
+ 
+   //===== determine position of (0,0)-vector =====
+   if (!input->rdopt)
+   {
+     ref_x = img->opix_x - offset_x;
+     ref_y = img->opix_y - offset_y;
+ 
+     for (pos = 0; pos < max_pos; pos++)
+     {
+       if (ref_x == spiral_search_x[pos] &&
+         ref_y == spiral_search_y[pos])
+       {
+         pos_00[list][ref] = pos;
+         break;
+         }
+       }
+     }
+ 
+     //===== loop over search range (spiral search): get blockwise SAD =====
+     for (pos = 0; pos < max_pos; pos++)
+     {
+       abs_y = offset_y + spiral_search_y[pos];
+       abs_x = offset_x + spiral_search_x[pos];
+ 
+       abs_y4 = (abs_y + IMG_PAD_SIZE) << 2;
+       abs_x4 = (abs_x + IMG_PAD_SIZE) << 2;
+ 
+       if (range_partly_outside)
+       {
+       if (abs_y >= 0 && abs_y <= max_height&&
+         abs_x >= 0 && abs_x <= max_width  )
+         {
+         ref_access_method = FAST_ACCESS;
+         }
+         else
+         {
+         ref_access_method = UMV_ACCESS;
+       }
+     }
+ 
+     if (apply_weights)
+     {
+       srcptr = orig_pels;
+       bindex = 0;
+ 
+       refptr = get_line[ref_access_method] (ref_pic_sub.luma, abs_y4, abs_x4);
+ 
+       for (blky = 0; blky < 4; blky++)
+       {
+         LineSadBlk0 = LineSadBlk1 = LineSadBlk2 = LineSadBlk3 = 0;
+ 
+         for (y = 0; y < 4; y++)
+         {
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk0 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk0 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk0 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk0 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk1 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk1 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk1 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk1 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk2 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk2 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk2 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk2 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk3 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk3 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk3 += dist_method [weighted_pel - *srcptr++];
+           weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++  + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
+           LineSadBlk3 += dist_method [weighted_pel - *srcptr++];
+           refptr += img_padded_size_x - MB_BLOCK_SIZE;
+         }
+ 
+         block_sad[bindex++][pos] = LineSadBlk0;
+         block_sad[bindex++][pos] = LineSadBlk1;
+         block_sad[bindex++][pos] = LineSadBlk2;
+         block_sad[bindex++][pos] = LineSadBlk3;
+       }
+       if (ChromaMEEnable)
+       {
+         for (k = 0; k < 2; k ++)
+         {
+           bindex = 0;
+ 
+           refptr = get_crline[ref_access_method] (ref_pic_sub.crcb[k], abs_y4, abs_x4);
+           for (blky = 0; blky < 4; blky++)
+           {
+             LineSadBlk0 = LineSadBlk1 = LineSadBlk2 = LineSadBlk3 = 0;
+ 
+             for (y = 0; y < img->mb_cr_size_y; y+=BLOCK_SIZE)
+             {
+               for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
+               {
+                 weighted_pel = iClip1( img->max_imgpel_value_uv, ((weight_cr[k] * *refptr++  + wp_chroma_round) >> chroma_log_weight_denom) + offset_cr[k]);
+                 LineSadBlk0 += dist_method [weighted_pel - *srcptr++];
+               }
+               for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
+               {
+                 weighted_pel = iClip1( img->max_imgpel_value_uv, ((weight_cr[k] * *refptr++  + wp_chroma_round) >> chroma_log_weight_denom) + offset_cr[k]);
+                 LineSadBlk1 += dist_method [weighted_pel - *srcptr++];
+               }
+               for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
+               {
+                 weighted_pel = iClip1( img->max_imgpel_value_uv, ((weight_cr[k] * *refptr++  + wp_chroma_round) >> chroma_log_weight_denom) + offset_cr[k]);
+                 LineSadBlk2 += dist_method [weighted_pel - *srcptr++];
+               }
+               for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
+               {
+                 weighted_pel = iClip1( img->max_imgpel_value_uv, ((weight_cr[k] * *refptr++  + wp_chroma_round) >> chroma_log_weight_denom) + offset_cr[k]);
+                 LineSadBlk3 += dist_method [weighted_pel - *srcptr++];
+               }
+               refptr += img_cr_padded_size_x - img->mb_cr_size_x;
+             }
+ 
+             block_sad[bindex++][pos] += LineSadBlk0;
+             block_sad[bindex++][pos] += LineSadBlk1;
+             block_sad[bindex++][pos] += LineSadBlk2;
+             block_sad[bindex++][pos] += LineSadBlk3;
+           }
+         }
+       }
+     }
+     else
+     {
+       srcptr = orig_pels;
+       bindex = 0;
+ 
+       refptr = get_line[ref_access_method] (ref_pic_sub.luma, abs_y4, abs_x4);
+ 
+       for (blky = 0; blky < 4; blky++)
+       {
+         LineSadBlk0 = LineSadBlk1 = LineSadBlk2 = LineSadBlk3 = 0;
+ 
+         for (y = 0; y < 4; y++)
+         {
+           LineSadBlk0 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk0 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk0 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk0 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk1 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk1 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk1 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk1 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk2 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk2 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk2 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk2 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk3 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk3 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk3 += dist_method [*refptr++ - *srcptr++];
+           LineSadBlk3 += dist_method [*refptr++ - *srcptr++];
+           refptr += img_padded_size_x - MB_BLOCK_SIZE;
+         }
+ 
+         block_sad[bindex++][pos] = LineSadBlk0;
+         block_sad[bindex++][pos] = LineSadBlk1;
+         block_sad[bindex++][pos] = LineSadBlk2;
+         block_sad[bindex++][pos] = LineSadBlk3;
+       }
+ 
+       if (ChromaMEEnable)
+       {
+         for (k = 0; k < 2; k ++)
+         {
+           bindex = 0;
+ 
+           refptr = get_crline[ref_access_method] (ref_pic_sub.crcb[k], abs_y4, abs_x4);
+           for (blky = 0; blky < 4; blky++)
+           {
+             LineSadBlk0 = LineSadBlk1 = LineSadBlk2 = LineSadBlk3 = 0;
+ 
+             for (y = 0; y < img->mb_cr_size_y; y+=BLOCK_SIZE)
+             {
+               for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
+               {
+                 LineSadBlk0 += dist_method [*refptr++ - *srcptr++];
+               }
+               for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
+               {
+                 LineSadBlk1 += dist_method [*refptr++ - *srcptr++];
+               }
+               for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
+               {
+                 LineSadBlk2 += dist_method [*refptr++ - *srcptr++];
+               }
+               for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
+               {
+                 LineSadBlk3 += dist_method [*refptr++ - *srcptr++];
+               }
+               refptr += img_cr_padded_size_x - img->mb_cr_size_x;
+             }
+ 
+             block_sad[bindex++][pos] += LineSadBlk0;
+             block_sad[bindex++][pos] += LineSadBlk1;
+             block_sad[bindex++][pos] += LineSadBlk2;
+             block_sad[bindex++][pos] += LineSadBlk3;
+           }
+         }
+       }
+     }
+   }
+ 
+   //===== combine SAD's for larger block types =====
+   SetupLargerBlocks (list, ref, max_pos);
+ 
+   //===== set flag marking that search setup have been done =====
+   search_setup_done[list][ref] = 1;
+ }
+ #endif
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Fast Full pixel block motion search
+  ***********************************************************************
+  */
+ int                                                   //  ==> minimum motion cost after search
+ FastFullPelBlockMotionSearch (imgpel*   orig_pic,     // <--  not used
+                               short     ref,          // <--  reference frame (0... or -1 (backward))
+                               int       list,
+                               int       pic_pix_x,    // <--  absolute x-coordinate of regarded AxB block
+                               int       pic_pix_y,    // <--  absolute y-coordinate of regarded AxB block
+                               int       blocktype,    // <--  block type (1-16x16 ... 7-4x4)
+                               short     pred_mv_x,    // <--  motion vector predictor (x) in sub-pel units
+                               short     pred_mv_y,    // <--  motion vector predictor (y) in sub-pel units
+                               short*    mv_x,         //  --> motion vector (x) - in pel units
+                               short*    mv_y,         //  --> motion vector (y) - in pel units
+                               int       search_range, // <--  1-d search range in pel units
+                               int       min_mcost,    // <--  minimum motion cost (cost for center or huge value)
+                               int       lambda_factor)       // <--  lagrangian parameter for determining motion cost
+ {
+   int   pos, offset_x, offset_y, cand_x, cand_y, mcost;
+ 
+   int   max_pos       = (2*search_range+1)*(2*search_range+1);              // number of search positions
+   int   best_pos      = 0;                                                  // position with minimum motion cost
+   int   block_index;                                                        // block index for indexing SAD array
+   distpel*  block_sad;                                                          // pointer to SAD array
+ 
+   block_index   = (pic_pix_y-img->opix_y)+((pic_pix_x-img->opix_x)>>2); // block index for indexing SAD array
+   block_sad     = BlockSAD[list][ref][blocktype][block_index];         // pointer to SAD array
+ 
+   //===== set up fast full integer search if needed / set search center =====
+   if (!search_setup_done[list][ref])
+   {
+     SetupFastFullPelSearch (ref, list);
+   }
+ 
+   offset_x = search_center_x[list][ref] - img->opix_x;
+   offset_y = search_center_y[list][ref] - img->opix_y;
+ 
+   //===== cost for (0,0)-vector: it is done before, because MVCost can be negative =====
+   if (!input->rdopt)
+   {
+     mcost = block_sad[pos_00[list][ref]] + MV_COST_SMP (lambda_factor, 0, 0, pred_mv_x, pred_mv_y);
+ 
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos_00[list][ref];
+     }
+   }
+ 
+   //===== loop over all search positions =====
+   for (pos=0; pos<max_pos; pos++, block_sad++)
+   {
+     //--- check residual cost ---
+     if (*block_sad < min_mcost)
+     {
+       //--- get motion vector cost ---
+       cand_x = (offset_x + spiral_search_x[pos])<<2;
+       cand_y = (offset_y + spiral_search_y[pos])<<2;
+       mcost  = *block_sad;
+       mcost += MV_COST_SMP (lambda_factor, cand_x, cand_y, pred_mv_x, pred_mv_y);
+ 
+       //--- check motion cost ---
+       if (mcost < min_mcost)
+       {
+         min_mcost = mcost;
+         best_pos  = pos;
+       }
+     }
+   }
+ 
+   //===== set best motion vector and return minimum motion cost =====
+   *mv_x = offset_x + spiral_search_x[best_pos];
+   *mv_y = offset_y + spiral_search_y[best_pos];
+   return min_mcost;
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/me_fullfast.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/me_fullfast.h:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/me_fullfast.h	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,29 ----
+ 
+ /*!
+  ************************************************************************
+  * \file
+  *     me_fullfast.h
+  *
+  * \author
+  *    Alexis Michael Tourapis        <alexis.tourapis at dolby.com>
+  *
+  * \date
+  *    9 September 2006
+  *
+  * \brief
+  *    Headerfile for Fast Full Search motion estimation
+  **************************************************************************
+  */
+ 
+ 
+ #ifndef _ME_FULLFAST_H_
+ #define _ME_FULLFAST_H_
+ int FastFullPelBlockMotionSearch (imgpel* orig_pic, short ref, int list, int pic_pix_x, int pic_pix_y,
+                               int blocktype, short pred_mv_x, short pred_mv_y, short* mv_x, short* mv_y,
+                               int search_range,  int min_mcost, int lambda_factor);
+ void InitializeFastFullIntegerSearch ();
+ void ResetFastFullIntegerSearch ();
+ void ClearFastFullIntegerSearch ();
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/me_fullsearch.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/me_fullsearch.c:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/me_fullsearch.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,750 ----
+ 
+ /*!
+ *************************************************************************************
+ * \file me_fullsearch.c
+ *
+ * \brief
+ *    Motion Estimation using Fullsearch
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Alexis Michael Tourapis <alexismt at ieee.org>
+ *      - Athanasios Leontaris    <aleon at dolby.com>
+ *
+ *************************************************************************************
+ */
+ 
+ // Includes
+ #include "contributors.h"
+ #include <stdlib.h>
+ #include <limits.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ #include "image.h"
+ #include "memalloc.h"
+ #include "mb_access.h"
+ #include "refbuf.h"
+ 
+ #include "me_distortion.h"
+ #include "me_fullsearch.h"
+ 
+ // Define Global Parameters
+ extern int *mvbits;
+ extern short*  spiral_search_x;
+ extern short*  spiral_search_y;
+ extern short*  spiral_hpel_search_x;
+ extern short*  spiral_hpel_search_y;
+ 
+ // Functions
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Full pixel block motion search
+  ***********************************************************************
+  */
+ int                                               //  ==> minimum motion cost after search
+ FullPelBlockMotionSearch (imgpel*   orig_pic,     // <--  original pixel values for the AxB block
+                           short     ref,          // <--  reference frame (0... or -1 (backward))
+                           int       list,         // <--  current list
+                           int       pic_pix_x,    // <--  absolute x-coordinate of regarded AxB block
+                           int       pic_pix_y,    // <--  absolute y-coordinate of regarded AxB block
+                           int       blocktype,    // <--  block type (1-16x16 ... 7-4x4)
+                           short     pred_mv_x,    // <--  motion vector predictor (x) in sub-pel units
+                           short     pred_mv_y,    // <--  motion vector predictor (y) in sub-pel units
+                           short*    mv_x,         // <--> in: search center (x) / out: motion vector (x) - in pel units
+                           short*    mv_y,         // <--> in: search center (y) / out: motion vector (y) - in pel units
+                           int       search_range, // <--  1-d search range in pel units
+                           int       min_mcost,    // <--  minimum motion cost (cost for center or huge value)
+                           int       lambda_factor)       // <--  lagrangian parameter for determining motion cost
+ {
+   int   pos, cand_x, cand_y, mcost;
+ 
+   int   list_offset   = img->mb_data[img->current_mb_nr].list_offset;
+   StorablePicture *ref_picture = listX[list+list_offset][ref];
+ 
+   int   best_pos      = 0;                                        // position with minimum motion cost
+   int   max_pos       = (2*search_range+1)*(2*search_range+1);    // number of search positions
+   int   blocksize_y   = input->blc_size[blocktype][1];            // vertical block size
+   int   blocksize_x   = input->blc_size[blocktype][0];            // horizontal block size
+ 
+   int   pred_x        = (pic_pix_x << 2) + pred_mv_x;       // predicted position x (in sub-pel units)
+   int   pred_y        = (pic_pix_y << 2) + pred_mv_y;       // predicted position y (in sub-pel units)
+   int   center_x      = pic_pix_x + *mv_x;                        // center position x (in pel units)
+   int   center_y      = pic_pix_y + *mv_y;                        // center position y (in pel units)
+   int   check_for_00  = (blocktype==1 && !input->rdopt && img->type!=B_SLICE && ref==0);
+   int   apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+     (active_pps->weighted_bipred_idc && (img->type == B_SLICE))) && input->UseWeightedReferenceME;
+   int   dist_method = F_PEL + 3 * apply_weights;
+ 
+   ref_pic_sub.luma = ref_picture->imgY_sub;
+   img_width  = ref_picture->size_x;
+   img_height = ref_picture->size_y;
+   width_pad  = ref_picture->size_x_pad;
+   height_pad = ref_picture->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight_luma = wp_weight[list + list_offset][ref][0];
+     offset_luma = wp_offset[list + list_offset][ref][0];
+   }
+ 
+   if (ChromaMEEnable)
+   {
+     ref_pic_sub.crcb[0] = ref_picture->imgUV_sub[0];
+     ref_pic_sub.crcb[1] = ref_picture->imgUV_sub[1];
+     width_pad_cr  = ref_picture->size_x_cr_pad;
+     height_pad_cr = ref_picture->size_y_cr_pad;
+ 
+     if (apply_weights)
+     {
+       weight_cr[0] = wp_weight[list + list_offset][ref][1];
+       weight_cr[1] = wp_weight[list + list_offset][ref][2];
+       offset_cr[0] = wp_offset[list + list_offset][ref][1];
+       offset_cr[1] = wp_offset[list + list_offset][ref][2];
+     }
+   }
+ 
+   //===== set function for getting reference picture lines =====
+   if ((center_x > search_range) && (center_x < img_width -1-search_range-blocksize_x) &&
+     (center_y > search_range) && (center_y < img_height-1-search_range-blocksize_y)   )
+   {
+     ref_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     ref_access_method = UMV_ACCESS;
+   }
+ 
+   //===== loop over all search positions =====
+   for (pos=0; pos<max_pos; pos++)
+   {
+     //--- set candidate position (absolute position in pel units) ---
+     cand_x = (center_x + spiral_search_x[pos])<<2;
+     cand_y = (center_y + spiral_search_y[pos])<<2;
+ 
+     //--- initialize motion cost (cost for motion vector) and check ---
+     mcost = MV_COST_SMP (lambda_factor, cand_x, cand_y, pred_x, pred_y);
+     if (check_for_00 && cand_x==pic_pix_x && cand_y==pic_pix_y)
+     {
+       mcost -= WEIGHTED_COST (lambda_factor, 16);
+     }
+     if (mcost >= min_mcost)   continue;
+ 
+     //--- add residual cost to motion cost ---
+     mcost += computeUniPred[dist_method](orig_pic, blocksize_y, blocksize_x,
+       min_mcost - mcost, cand_x + IMG_PAD_SIZE_TIMES4, cand_y + IMG_PAD_SIZE_TIMES4);
+ 
+     //--- check if motion cost is less than minimum cost ---
+     if (mcost < min_mcost)
+     {
+       best_pos  = pos;
+       min_mcost = mcost;
+     }
+   }
+ 
+ 
+   //===== set best motion vector and return minimum motion cost =====
+   if (best_pos)
+   {
+     *mv_x += spiral_search_x[best_pos];
+     *mv_y += spiral_search_y[best_pos];
+   }
+   return min_mcost;
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Full pixel block motion search
+  ***********************************************************************
+  */
+ int                                                //  ==> minimum motion cost after search
+ FullPelBlockMotionBiPred (imgpel*   orig_pic,      // <--  original pixel values for the AxB block
+                           short       ref,         // <--  reference frame (0... or -1 (backward))
+                           int       list,
+                           int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                           int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                           int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                           short     pred_mv_x1,    // <--  motion vector predictor (x) in sub-pel units
+                           short     pred_mv_y1,    // <--  motion vector predictor (y) in sub-pel units
+                           short     pred_mv_x2,    // <--  motion vector predictor (x) in sub-pel units
+                           short     pred_mv_y2,    // <--  motion vector predictor (y) in sub-pel units
+                           short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                           short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                           short*    s_mv_x,        // <--> in: search center (x) / out: motion vector (x) - in pel units
+                           short*    s_mv_y,        // <--> in: search center (y) / out: motion vector (y) - in pel units
+                           int       search_range,  // <--  1-d search range in pel units
+                           int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                           int       lambda_factor) // <--  lagrangian parameter for determining motion cost
+ {
+   int   pos, cand_x, cand_y, mcost;
+ 
+ 
+   int   list_offset   = img->mb_data[img->current_mb_nr].list_offset;
+ 
+   int   best_pos      = 0;                                        // position with minimum motion cost
+   int   max_pos       = (2*search_range+1)*(2*search_range+1);    // number of search positions
+   int   blocksize_y   = input->blc_size[blocktype][1];            // vertical block size
+   int   blocksize_x   = input->blc_size[blocktype][0];            // horizontal block size
+ 
+   int   pred_x1        = (pic_pix_x << 2) + pred_mv_x1;       // predicted position x (in sub-pel units)
+   int   pred_y1        = (pic_pix_y << 2) + pred_mv_y1;       // predicted position y (in sub-pel units)
+   int   pred_x2        = (pic_pix_x << 2) + pred_mv_x2;       // predicted position x (in sub-pel units)
+   int   pred_y2        = (pic_pix_y << 2) + pred_mv_y2;       // predicted position y (in sub-pel units)
+   short center_x      = pic_pix_x + *mv_x;                      // center position x (in pel units)
+   short center_y      = pic_pix_y + *mv_y;                      // center position y (in pel units)
+   short ref1_center_x = pic_pix_x + *s_mv_x;                      // mvx of second pred (in pel units)
+   short ref1_center_y = pic_pix_y + *s_mv_y;                      // mvy of second pred (in pel units)
+ 
+ 
+   short apply_weights = (active_pps->weighted_bipred_idc>0);
+ 
+   short offset1 = (apply_weights ? (list == 0?  wp_offset[list_offset    ][ref][0]:  wp_offset[list_offset + 1][0  ][ref]) : 0);
+   short offset2 = (apply_weights ? (list == 0?  wp_offset[list_offset + 1][ref][0]:  wp_offset[list_offset    ][0  ][ref]) : 0);
+ 
+   StorablePicture *ref_picture1 = listX[list       + list_offset][ref];
+   StorablePicture *ref_picture2 = listX[(list ^ 1) + list_offset][0];
+ 
+   ref_pic1_sub.luma = ref_picture1->imgY_sub;
+   ref_pic2_sub.luma = ref_picture2->imgY_sub;
+   img_width     = ref_picture1->size_x;
+   img_height    = ref_picture1->size_y;
+   width_pad    = ref_picture1->size_x_pad;
+   height_pad   = ref_picture1->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight1 = list == 0
+       ? wbp_weight[list_offset         ][ref][0][0]
+       : wbp_weight[list_offset + LIST_1][0  ][ref][0];
+     weight2 = list == 0
+       ? wbp_weight[list_offset + LIST_1][ref][0][0]
+       : wbp_weight[list_offset         ][0  ][ref][0];
+     offsetBi=(offset1 + offset2 + 1)>>1;
+     computeBiPred = computeBiPred2[F_PEL];
+   }
+   else
+   {
+     weight1 = 1<<luma_log_weight_denom;
+     weight2 = 1<<luma_log_weight_denom;
+     offsetBi = 0;
+     computeBiPred = computeBiPred1[F_PEL];
+   }
+ 
+   if (ChromaMEEnable )
+   {
+     ref_pic1_sub.crcb[0] = ref_picture1->imgUV_sub[0];
+     ref_pic1_sub.crcb[1] = ref_picture1->imgUV_sub[1];
+     ref_pic2_sub.crcb[0] = ref_picture2->imgUV_sub[0];
+     ref_pic2_sub.crcb[1] = ref_picture2->imgUV_sub[1];
+     width_pad_cr  = ref_picture1->size_x_cr_pad;
+     height_pad_cr = ref_picture1->size_y_cr_pad;
+     if (apply_weights)
+     {
+       weight1_cr[0] = list == 0
+         ? wbp_weight[list_offset         ][ref][0][1]
+         : wbp_weight[list_offset + LIST_1][0  ][ref][1];
+       weight1_cr[1] = list == 0
+         ? wbp_weight[list_offset         ][ref][0][2]
+         : wbp_weight[list_offset + LIST_1][0  ][ref][2];
+       weight2_cr[0] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][1] : wbp_weight[list_offset         ][0  ][ref][1];
+       weight2_cr[1] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][2] : wbp_weight[list_offset         ][0  ][ref][2];
+       offsetBi_cr[0] = (list == 0)
+         ? (wp_offset[list_offset         ][ref][1] + wp_offset[list_offset + LIST_1][ref][1] + 1) >> 1
+         : (wp_offset[list_offset + LIST_1][0  ][1] + wp_offset[list_offset         ][0  ][1] + 1) >> 1;
+       offsetBi_cr[1] = (list == 0)
+         ? (wp_offset[list_offset         ][ref][2] + wp_offset[list_offset + LIST_1][ref][2] + 1) >> 1
+         : (wp_offset[list_offset + LIST_1][0  ][2] + wp_offset[list_offset         ][0  ][2] + 1) >> 1;
+     }
+     else
+     {
+       weight1_cr[0] = 1<<chroma_log_weight_denom;
+       weight1_cr[1] = 1<<chroma_log_weight_denom;
+       weight2_cr[0] = 1<<chroma_log_weight_denom;
+       weight2_cr[1] = 1<<chroma_log_weight_denom;
+       offsetBi_cr[0] = 0;
+       offsetBi_cr[1] = 0;
+     }
+   }
+ 
+   //===== set function for getting reference picture lines =====
+   if ((center_x > search_range) && (center_x < img_width -1-search_range-blocksize_x) &&
+     (center_y > search_range) && (center_y < img_height-1-search_range-blocksize_y)   )
+   {
+     bipred2_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred2_access_method = UMV_ACCESS;
+   }
+ 
+   //===== set function for getting reference picture lines =====
+   if ((ref1_center_x > search_range) && (ref1_center_x < img_width -1-search_range-blocksize_x) &&
+     (ref1_center_y > search_range) && (ref1_center_y < img_height-1-search_range-blocksize_y)   )
+   {
+     bipred1_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred1_access_method = UMV_ACCESS;
+   }
+ 
+   //===== loop over all search positions =====
+   for (pos=0; pos<max_pos; pos++)
+   {
+     //--- set candidate position (absolute position in pel units) ---
+     cand_x = (center_x + spiral_search_x[pos])<<2;
+     cand_y = (center_y + spiral_search_y[pos])<<2;
+ 
+ 
+     //--- initialize motion cost (cost for motion vector) and check ---
+     mcost =  MV_COST_SMP (lambda_factor, (ref1_center_x<<2), (ref1_center_y<<2), pred_x1, pred_y1);
+     mcost += MV_COST_SMP (lambda_factor, cand_x, cand_y, pred_x2, pred_y2);
+ 
+     if (mcost >= min_mcost)   continue;
+ 
+     //--- add residual cost to motion cost ---
+     mcost += computeBiPred(orig_pic,
+       blocksize_y, blocksize_x, min_mcost - mcost,
+       (ref1_center_x << 2) + IMG_PAD_SIZE_TIMES4,
+       (ref1_center_y << 2) + IMG_PAD_SIZE_TIMES4,
+       cand_x + IMG_PAD_SIZE_TIMES4, cand_y + IMG_PAD_SIZE_TIMES4);
+ 
+     //--- check if motion cost is less than minimum cost ---
+     if (mcost < min_mcost)
+     {
+       best_pos  = pos;
+       min_mcost = mcost;
+     }
+   }
+ 
+ 
+   //===== set best motion vector and return minimum motion cost =====
+   if (best_pos)
+   {
+     *mv_x += spiral_search_x[best_pos];
+     *mv_y += spiral_search_y[best_pos];
+   }
+   return min_mcost;
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Sub pixel block motion search
+  ***********************************************************************
+  */
+ int                                               //  ==> minimum motion cost after search
+ SubPelBlockMotionSearch (imgpel*   orig_pic,      // <--  original pixel values for the AxB block
+                          short     ref,           // <--  reference frame (0... or -1 (backward))
+                          int       list,          // <--  reference picture list
+                          int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                          int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                          int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                          short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                          short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                          short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                          short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                          int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                          int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                          int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                          int*      lambda         // <--  lagrangian parameter for determining motion cost
+                          )
+ {
+   int   pos, best_pos, mcost;
+ 
+   int   cand_mv_x, cand_mv_y;
+ 
+   int   check_position0 = (!input->rdopt && img->type!=B_SLICE && ref==0 && blocktype==1 && *mv_x==0 && *mv_y==0);
+   int   blocksize_x     = input->blc_size[blocktype][0];
+   int   blocksize_y     = input->blc_size[blocktype][1];
+   int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<< 2);
+   int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<< 2);
+   int   max_pos2        = ( !start_me_refinement_hp ? imax(1,search_pos2) : search_pos2);
+   int   list_offset     = img->mb_data[img->current_mb_nr].list_offset;
+   int   apply_weights   = ((active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+     (active_pps->weighted_bipred_idc && (img->type == B_SLICE))) && input->UseWeightedReferenceME;
+   int   cmv_x, cmv_y;
+   int dist_method = H_PEL + 3 * apply_weights;
+   StorablePicture *ref_picture = listX[list+list_offset][ref];
+ 
+   int max_pos_x4 = ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
+   int max_pos_y4 = ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
+   int lambda_factor = lambda[H_PEL];
+ 
+   ref_pic_sub.luma = ref_picture->imgY_sub;
+   width_pad  = ref_picture->size_x_pad;
+   height_pad = ref_picture->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight_luma = wp_weight[list + list_offset][ref][0];
+     offset_luma = wp_offset[list + list_offset][ref][0];
+   }
+ 
+   if (ChromaMEEnable)
+   {
+     ref_pic_sub.crcb[0] = ref_picture->imgUV_sub[0];
+     ref_pic_sub.crcb[1] = ref_picture->imgUV_sub[1];
+     width_pad_cr  = ref_picture->size_x_cr_pad;
+     height_pad_cr = ref_picture->size_y_cr_pad;
+ 
+     if (apply_weights)
+     {
+       weight_cr[0] = wp_weight[list + list_offset][ref][1];
+       weight_cr[1] = wp_weight[list + list_offset][ref][2];
+       offset_cr[0] = wp_offset[list + list_offset][ref][1];
+       offset_cr[1] = wp_offset[list + list_offset][ref][2];
+     }
+   }
+ 
+   /*********************************
+    *****                       *****
+    *****  HALF-PEL REFINEMENT  *****
+    *****                       *****
+    *********************************/
+ 
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 1) &&
+     (pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 1)   )
+   {
+     ref_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     ref_access_method = UMV_ACCESS;
+   }
+ 
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = start_me_refinement_hp; pos < max_pos2; pos++)
+   {
+     cand_mv_x = *mv_x + (spiral_hpel_search_x[pos]);    // quarter-pel units
+     cand_mv_y = *mv_y + (spiral_hpel_search_y[pos]);    // quarter-pel units
+ 
+     //----- set motion vector cost -----
+     mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+ 
+ 
+     if (mcost >= min_mcost) continue;
+ 
+     cmv_x = cand_mv_x + pic4_pix_x;
+     cmv_y = cand_mv_y + pic4_pix_y;
+ 
+     mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x, min_mcost - mcost, cmv_x, cmv_y);
+ 
+     if (pos==0 && check_position0)
+     {
+       mcost -= WEIGHTED_COST (lambda_factor, 16);
+     }
+ 
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+   }
+   if (best_pos)
+   {
+     *mv_x += (spiral_hpel_search_x [best_pos]);
+     *mv_y += (spiral_hpel_search_y [best_pos]);
+   }
+ 
+   if ( !start_me_refinement_qp )
+     min_mcost = INT_MAX;
+ 
+ 
+ 
+   /************************************
+   *****                          *****
+   *****  QUARTER-PEL REFINEMENT  *****
+   *****                          *****
+   ************************************/
+ 
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + *mv_x > 0) && (pic4_pix_x + *mv_x < max_pos_x4) &&
+     (pic4_pix_y + *mv_y > 0) && (pic4_pix_y + *mv_y < max_pos_y4)   )
+   {
+     ref_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     ref_access_method = UMV_ACCESS;
+   }
+ 
+   dist_method = Q_PEL + 3 * apply_weights;
+   lambda_factor = lambda[Q_PEL];
+ 
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = start_me_refinement_qp; pos < search_pos4; pos++)
+   {
+     cand_mv_x = *mv_x + spiral_search_x[pos];    // quarter-pel units
+     cand_mv_y = *mv_y + spiral_search_y[pos];    // quarter-pel units
+ 
+     //----- set motion vector cost -----
+     mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+ 
+     if (mcost >= min_mcost) continue;
+ 
+     cmv_x = cand_mv_x + pic4_pix_x;
+     cmv_y = cand_mv_y + pic4_pix_y;
+ 
+     mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x, min_mcost - mcost, cmv_x, cmv_y);
+ 
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+   }
+   if (best_pos)
+   {
+     *mv_x += spiral_search_x [best_pos];
+     *mv_y += spiral_search_y [best_pos];
+   }
+ 
+   //===== return minimum motion cost =====
+   return min_mcost;
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    Bipred Sub pixel block motion search
+ ***********************************************************************
+ */
+ int                                               //  ==> minimum motion cost after search
+ SubPelBlockSearchBiPred (imgpel*   orig_pic,      // <--  original pixel values for the AxB block
+                          short     ref,           // <--  reference frame (0... or -1 (backward))
+                          int       list,          // <--  reference picture list
+                          int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                          int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                          int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                          short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                          short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                          short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                          short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                          short*    s_mv_x,        // <--> in: search center (x) / out: motion vector (x) - in pel units
+                          short*    s_mv_y,        // <--> in: search center (y) / out: motion vector (y) - in pel units
+                          int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                          int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                          int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                          int*      lambda         // <--  lagrangian parameter for determining motion cost
+                          )
+ {
+   int   apply_weights =  (active_pps->weighted_bipred_idc > 0);
+   int   list_offset   = img->mb_data[img->current_mb_nr].list_offset;
+ 
+   short offset1 = (apply_weights ? (list == 0?  wp_offset[list_offset    ][ref]     [0]:  wp_offset[list_offset + 1][0  ]     [0]) : 0);
+   short offset2 = (apply_weights ? (list == 0?  wp_offset[list_offset + 1][ref]     [0]:  wp_offset[list_offset    ][0  ]     [0]) : 0);
+ 
+   int   pos, best_pos, mcost;
+   int   cand_mv_x, cand_mv_y;
+ 
+   int   blocksize_x     = input->blc_size[blocktype][0];
+   int   blocksize_y     = input->blc_size[blocktype][1];
+ 
+   int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<< 2);
+   int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<< 2);
+ 
+   int   max_pos2        = ( !start_me_refinement_hp ? imax(1,search_pos2) : search_pos2);
+   int   cmv_x, cmv_y;
+   int   smv_x = *s_mv_x + pic4_pix_x;
+   int   smv_y = *s_mv_y + pic4_pix_y;
+ 
+   StorablePicture *ref_picture1 = listX[list       + list_offset][ref];
+   StorablePicture *ref_picture2 = listX[(list ^ 1) + list_offset][0];
+ 
+   int max_pos_x4 = ((ref_picture1->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
+   int max_pos_y4 = ((ref_picture1->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
+   int lambda_factor = lambda[H_PEL];
+ 
+   ref_pic1_sub.luma = ref_picture1->imgY_sub;
+   ref_pic2_sub.luma = ref_picture2->imgY_sub;
+   img_width     = ref_picture1->size_x;
+   img_height    = ref_picture1->size_y;
+   width_pad    = ref_picture1->size_x_pad;
+   height_pad   = ref_picture1->size_y_pad;
+ 
+ 
+   if (apply_weights)
+   {
+     weight1 = list == 0
+       ? wbp_weight[list_offset         ][ref][0][0]
+       : wbp_weight[list_offset + LIST_1][0  ][ref][0];
+     weight2 = list == 0
+       ? wbp_weight[list_offset + LIST_1][ref][0][0]
+       : wbp_weight[list_offset         ][0  ][ref][0];
+     offsetBi=(offset1 + offset2 + 1)>>1;
+     computeBiPred = computeBiPred2[H_PEL];
+   }
+   else
+   {
+     weight1 = 1<<luma_log_weight_denom;
+     weight2 = 1<<luma_log_weight_denom;
+     offsetBi = 0;
+     computeBiPred = computeBiPred1[H_PEL];
+   }
+ 
+ 
+   if ( ChromaMEEnable )
+   {
+     ref_pic1_sub.crcb[0] = ref_picture1->imgUV_sub[0];
+     ref_pic1_sub.crcb[1] = ref_picture1->imgUV_sub[1];
+     ref_pic2_sub.crcb[0] = ref_picture2->imgUV_sub[0];
+     ref_pic2_sub.crcb[1] = ref_picture2->imgUV_sub[1];
+     width_pad_cr  = ref_picture1->size_x_cr_pad;
+     height_pad_cr = ref_picture1->size_y_cr_pad;
+ 
+     if (apply_weights)
+     {
+       weight1_cr[0] = list == 0
+         ? wbp_weight[list_offset         ][ref][0][1]
+         : wbp_weight[list_offset + LIST_1][0  ][ref][1];
+       weight1_cr[1] = list == 0
+         ? wbp_weight[list_offset         ][ref][0][2]
+         : wbp_weight[list_offset + LIST_1][0  ][ref][2];
+       weight2_cr[0] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][1] : wbp_weight[list_offset         ][0  ][ref][1];
+       weight2_cr[1] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][2] : wbp_weight[list_offset         ][0  ][ref][2];
+       offsetBi_cr[0] = (list == 0)
+         ? (wp_offset[list_offset         ][ref][1] + wp_offset[list_offset + LIST_1][ref][1] + 1) >> 1
+         : (wp_offset[list_offset + LIST_1][0  ][1] + wp_offset[list_offset         ][0  ][1] + 1) >> 1;
+       offsetBi_cr[1] = (list == 0)
+         ? (wp_offset[list_offset         ][ref][2] + wp_offset[list_offset + LIST_1][ref][2] + 1) >> 1
+         : (wp_offset[list_offset + LIST_1][0  ][2] + wp_offset[list_offset         ][0  ][2] + 1) >> 1;
+     }
+     else
+     {
+       weight1_cr[0] = 1<<chroma_log_weight_denom;
+       weight1_cr[1] = 1<<chroma_log_weight_denom;
+       weight2_cr[0] = 1<<chroma_log_weight_denom;
+       weight2_cr[1] = 1<<chroma_log_weight_denom;
+       offsetBi_cr[0] = 0;
+       offsetBi_cr[1] = 0;
+     }
+   }
+ 
+ 
+   /*********************************
+    *****                       *****
+    *****  HALF-PEL REFINEMENT  *****
+    *****                       *****
+    *********************************/
+ 
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 1) &&
+     (pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 1))
+   {
+     bipred2_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred2_access_method = UMV_ACCESS;
+   }
+ 
+   if ((pic4_pix_x + *s_mv_x > 1) && (pic4_pix_x + *s_mv_x < max_pos_x4 - 1) &&
+     (pic4_pix_y + *s_mv_y > 1) && (pic4_pix_y + *s_mv_y < max_pos_y4 - 1))
+   {
+     bipred1_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred1_access_method = UMV_ACCESS;
+   }
+ 
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = start_me_refinement_hp; pos < max_pos2; pos++)
+   {
+     cand_mv_x = *mv_x + (spiral_hpel_search_x[pos]);    // quarter-pel units
+     cand_mv_y = *mv_y + (spiral_hpel_search_y[pos]);    // quarter-pel units
+ 
+     //----- set motion vector cost -----
+     mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+ 
+     if (mcost >= min_mcost) continue;
+ 
+     cmv_x = cand_mv_x + pic4_pix_x;
+     cmv_y = cand_mv_y + pic4_pix_y;
+ 
+     mcost += computeBiPred(orig_pic, blocksize_y, blocksize_x,
+       min_mcost - mcost, smv_x, smv_y, cmv_x, cmv_y);
+ 
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+   }
+ 
+   if (best_pos)
+   {
+     *mv_x += (spiral_hpel_search_x [best_pos]);
+     *mv_y += (spiral_hpel_search_y [best_pos]);
+   }
+ 
+   computeBiPred = apply_weights? computeBiPred2[Q_PEL] : computeBiPred1[Q_PEL];
+ 
+   /************************************
+   *****                          *****
+   *****  QUARTER-PEL REFINEMENT  *****
+   *****                          *****
+   ************************************/
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + *mv_x > 0) && (pic4_pix_x + *mv_x < max_pos_x4) &&
+     (pic4_pix_y + *mv_y > 0) && (pic4_pix_y + *mv_y < max_pos_y4))
+   {
+     bipred2_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred2_access_method = UMV_ACCESS;
+   }
+ 
+   if ((pic4_pix_x + *s_mv_x > 0) && (pic4_pix_x + *s_mv_x < max_pos_x4) &&
+     (pic4_pix_y + *s_mv_y > 0) && (pic4_pix_y + *s_mv_y < max_pos_y4))
+   {
+     bipred1_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred1_access_method = UMV_ACCESS;
+   }
+ 
+   if ( !start_me_refinement_qp )
+     min_mcost = INT_MAX;
+ 
+   lambda_factor = lambda[Q_PEL];
+ 
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = start_me_refinement_qp; pos < search_pos4; pos++)
+   {
+     cand_mv_x = *mv_x + spiral_search_x[pos];    // quarter-pel units
+     cand_mv_y = *mv_y + spiral_search_y[pos];    // quarter-pel units
+ 
+     //----- set motion vector cost -----
+     mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+ 
+     if (mcost >= min_mcost) continue;
+     cmv_x = cand_mv_x + pic4_pix_x;
+     cmv_y = cand_mv_y + pic4_pix_y;
+ 
+     mcost += computeBiPred(orig_pic, blocksize_y, blocksize_x,
+       min_mcost - mcost, smv_x, smv_y, cmv_x, cmv_y);
+ 
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+ 
+   }
+ 
+   if (best_pos)
+   {
+     *mv_x += spiral_search_x [best_pos];
+     *mv_y += spiral_search_y [best_pos];
+   }
+ 
+   //===== return minimum motion cost =====
+   return min_mcost;
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/me_fullsearch.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/me_fullsearch.h:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/me_fullsearch.h	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,37 ----
+ 
+ /*!
+  ************************************************************************
+  * \file
+  *     me_fullsearch.h
+  *
+  * \author
+  *    Alexis Michael Tourapis        <alexis.tourapis at dolby.com>
+  *
+  * \date
+  *    9 September 2006
+  *
+  * \brief
+  *    Headerfile for Full Search motion estimation
+  **************************************************************************
+  */
+ 
+ 
+ #ifndef _ME_FULLSEARCH_H_
+ #define _ME_FULLSEARCH_H_
+ extern int FullPelBlockMotionSearch (imgpel* orig_pic, short ref, int list, int pic_pix_x, int pic_pix_y,
+                               int blocktype, short pred_mv_x, short pred_mv_y, short* mv_x, short* mv_y,
+                               int search_range,  int min_mcost, int lambda_factor);
+ extern int FullPelBlockMotionBiPred (imgpel* orig_pic, short ref, int list, int pic_pix_x, int pic_pix_y,
+                               int blocktype, short pred_mv_x1, short pred_mv_y1, short pred_mv_x2, short pred_mv_y2,
+                               short* mv_x1, short* mv_y1, short* mv_x2, short* mv_y2,
+                               int search_range, int min_mcost, int lambda_factor);
+ extern int SubPelBlockMotionSearch  (imgpel* orig_pic, short ref, int list, int pic_pix_x, int pic_pix_y,
+                               int blocktype, short pred_mv_x, short pred_mv_y, short* mv_x, short* mv_y,
+                               int search_pos2, int search_pos4, int min_mcost, int* lambda_factor);
+ extern int SubPelBlockSearchBiPred  (imgpel* orig_pic, short ref, int list, int pic_pix_x, int pic_pix_y,
+                               int blocktype, short pred_mv_x, short pred_mv_y,
+                               short* mv_x1, short* mv_y1, short* mv_x2, short* mv_y2,
+                               int search_pos2, int search_pos4, int min_mcost, int* lambda_factor);
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/me_umhex.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/me_umhex.c:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/me_umhex.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,1550 ----
+ 
+ /*!
+  ************************************************************************
+  *
+  * \file me_umhex.c
+  *
+  * \brief
+  *   Fast integer pel motion estimation and fractional pel motion estimation
+  *   algorithms are described in this file.
+  *   1. UMHEX_get_mem() and UMHEX_free_mem() are functions for allocation and release
+  *      of memories about motion estimation
+  *   2. UMHEX_BlockMotionSearch() is the function for fast integer pel motion
+  *      estimation and fractional pel motion estimation
+  *   3. UMHEX_DefineThreshold() defined thresholds for early termination
+  * \author
+  *    Main contributors: (see contributors.h for copyright, address and affiliation details)
+  *    - Zhibo Chen         <chenzhibo at tsinghua.org.cn>
+  *    - JianFeng Xu        <fenax at video.mdc.tsinghua.edu.cn>
+  *    - Wenfang Fu         <fwf at video.mdc.tsinghua.edu.cn>
+  *    - Xiaozhong Xu       <xxz at video.mdc.tsinghua.edu.cn>
+  * \date
+  *    2006.1
+  ************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <string.h>
+ #include <limits.h>
+ 
+ #include "global.h"
+ #include "memalloc.h"
+ #include "me_umhex.h"
+ #include "refbuf.h"
+ #include "mb_access.h"
+ #include "image.h"
+ #include "me_distortion.h"
+ 
+ #define Q_BITS          15
+ #define MIN_IMG_WIDTH   176
+ extern  int*   byte_abs;
+ extern  int*   mvbits;
+ extern  short*   spiral_search_x;
+ extern  short*   spiral_search_y;
+ 
+ 
+ static const int Diamond_x[4] = {-1, 0, 1, 0};
+ static const int Diamond_y[4] = {0, 1, 0, -1};
+ static const int Hexagon_x[6] = {2, 1, -1, -2, -1, 1};
+ static const int Hexagon_y[6] = {0, -2, -2, 0,  2, 2};
+ static const int Big_Hexagon_x[16] = {0,-2, -4,-4,-4, -4, -4, -2,  0,  2,  4,  4, 4, 4, 4, 2};
+ static const int Big_Hexagon_y[16] = {4, 3, 2,  1, 0, -1, -2, -3, -4, -3, -2, -1, 0, 1, 2, 3};
+ 
+ // for bipred mode
+ static int pred_MV_ref_flag;
+ static int dist_method;
+ static StorablePicture *ref_pic_ptr;
+ 
+ static const int   Multi_Ref_Thd[8]   = {0,  300,  120,  120,  60,  30,   30,  15};
+ static const int   Big_Hexagon_Thd[8] = {0, 3000, 1500, 1500, 800, 400,  400, 200};
+ static const int   Median_Pred_Thd[8] = {0,  750,  350,  350, 170,  80,   80,  40};
+ static const int   Threshold_DSR[8]   = {0, 2200, 1000, 1000, 500, 250,  250, 120};
+ 
+ static int Median_Pred_Thd_MB[8];
+ static int Big_Hexagon_Thd_MB[8];
+ static int Multi_Ref_Thd_MB[8];
+ 
+ 
+ static const int quant_coef[6][4][4] = {
+   {{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243},{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243}},
+   {{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660},{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660}},
+   {{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194},{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194}},
+   {{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647},{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647}},
+   {{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355},{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355}},
+   {{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893},{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893}}
+ };
+ 
+ 
+ void UMHEX_DefineThreshold()
+ {
+   AlphaFourth_1[1] = 0.01f;
+   AlphaFourth_1[2] = 0.01f;
+   AlphaFourth_1[3] = 0.01f;
+   AlphaFourth_1[4] = 0.02f;
+   AlphaFourth_1[5] = 0.03f;
+   AlphaFourth_1[6] = 0.03f;
+   AlphaFourth_1[7] = 0.04f;
+ 
+   AlphaFourth_2[1] = 0.06f;
+   AlphaFourth_2[2] = 0.07f;
+   AlphaFourth_2[3] = 0.07f;
+   AlphaFourth_2[4] = 0.08f;
+   AlphaFourth_2[5] = 0.12f;
+   AlphaFourth_2[6] = 0.11f;
+   AlphaFourth_2[7] = 0.15f;
+ 
+   UMHEX_DefineThresholdMB();
+   return;
+ }
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set MB thresholds for fast motion estimation
+  *    Those thresholds may be adjusted to trade off rate-distortion
+  *    performance and UMHEX speed
+  ************************************************************************
+  */
+ 
+ void UMHEX_DefineThresholdMB()
+ {
+   int gb_qp_per    = (input->qpN-MIN_QP)/6;
+   int gb_qp_rem    = (input->qpN-MIN_QP)%6;
+ 
+   int gb_q_bits    = Q_BITS+gb_qp_per;
+   int gb_qp_const,Thresh4x4;
+ 
+   float Quantize_step;
+   int i;
+ // scale factor: defined for different image sizes
+   float scale_factor = (float)((1-input->UMHexScale*0.1)+input->UMHexScale*0.1*(img->width/MIN_IMG_WIDTH));
+ // QP factor: defined for different quantization steps
+   float QP_factor = (float)((1.0-0.90*(input->qpN/51.0f)));
+ 
+   gb_qp_const=(1<<gb_q_bits)/6;
+   Thresh4x4 =   ((1<<gb_q_bits) - gb_qp_const)/quant_coef[gb_qp_rem][0][0];
+   Quantize_step = Thresh4x4/(4*5.61f)*2.0f*scale_factor;
+   Bsize[7]=(16*16)*Quantize_step;
+ 
+   Bsize[6]=Bsize[7]*4;
+   Bsize[5]=Bsize[7]*4;
+   Bsize[4]=Bsize[5]*4;
+   Bsize[3]=Bsize[4]*4;
+   Bsize[2]=Bsize[4]*4;
+   Bsize[1]=Bsize[2]*4;
+ 
+   for(i=1;i<8;i++)
+   {
+     //ET_Thd1: early termination after median prediction
+     Median_Pred_Thd_MB[i]  = (int) (Median_Pred_Thd[i]* scale_factor*QP_factor);
+     //ET_thd2: early termination after every circle of 16 points Big-Hex Search
+     Big_Hexagon_Thd_MB[i]  = (int) (Big_Hexagon_Thd[i]* scale_factor*QP_factor);
+     //threshold for multi ref case
+     Multi_Ref_Thd_MB[i]    = (int) (Multi_Ref_Thd[i]  * scale_factor*QP_factor);
+     //threshold for usage of DSR technique. DSR ref to JVT-R088
+     Threshold_DSR_MB[i]    = (int) (Threshold_DSR[i]  * scale_factor*QP_factor);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocation of space for fast motion estimation
+  ************************************************************************
+  */
+ int UMHEX_get_mem()
+ {
+   int memory_size = 0;
+   if (NULL==(flag_intra = calloc ((img->width>>4)+1,sizeof(byte)))) no_mem_exit("UMHEX_get_mem: flag_intra"); //fwf 20050330
+ 
+   memory_size += get_mem2D(&McostState, 2*input->search_range+1, 2*input->search_range+1);
+   memory_size += get_mem4Dint(&(fastme_ref_cost), img->max_num_references, 9, 4, 4);
+   memory_size += get_mem3Dint(&(fastme_l0_cost), 9, img->height/4, img->width/4);
+   memory_size += get_mem3Dint(&(fastme_l1_cost), 9, img->height/4, img->width/4);
+   memory_size += get_mem2D(&SearchState,7,7);
+   memory_size += get_mem2Dint(&(fastme_best_cost), 7, img->width/4);
+   if(input->BiPredMotionEstimation == 1)//memory allocation for bipred mode
+   {
+     memory_size += get_mem3Dint(&(fastme_l0_cost_bipred), 9, img->height/4, img->width/4);//for bipred
+     memory_size += get_mem3Dint(&(fastme_l1_cost_bipred), 9, img->height/4, img->width/4);//for bipred
+   }
+ 
+   return memory_size;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free space for fast motion estimation
+  ************************************************************************
+  */
+ void UMHEX_free_mem()
+ {
+   free_mem2D(McostState);
+   free_mem4Dint(fastme_ref_cost, img->max_num_references, 9);
+   free_mem3Dint(fastme_l0_cost, 9);
+   free_mem3Dint(fastme_l1_cost, 9);
+   free_mem2D(SearchState);
+   free_mem2Dint(fastme_best_cost);
+   free (flag_intra);
+   if(input->BiPredMotionEstimation == 1)
+   {
+     free_mem3Dint(fastme_l0_cost_bipred, 9);//for bipred
+     free_mem3Dint(fastme_l1_cost_bipred, 9);//for bipred
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    UMHEXIntegerPelBlockMotionSearch: fast pixel block motion search
+  *    this algorithm is called UMHexagonS(see JVT-D016),which includes
+  *    four steps with different kinds of search patterns
+  * \par Input:
+  * imgpel*   orig_pic,     // <--  original picture
+  * int       ref,          // <--  reference frame (0... or -1 (backward))
+  * int       pic_pix_x,    // <--  absolute x-coordinate of regarded AxB block
+  * int       pic_pix_y,    // <--  absolute y-coordinate of regarded AxB block
+  * int       blocktype,    // <--  block type (1-16x16 ... 7-4x4)
+  * int       pred_mv_x,    // <--  motion vector predictor (x) in sub-pel units
+  * int       pred_mv_y,    // <--  motion vector predictor (y) in sub-pel units
+  * int*      mv_x,         //  --> motion vector (x) - in pel units
+  * int*      mv_y,         //  --> motion vector (y) - in pel units
+  * int       search_range, // <--  1-d search range in pel units
+  * int       min_mcost,    // <--  minimum motion cost (cost for center or huge value)
+  * int       lambda_factor // <--  lagrangian parameter for determining motion cost
+  * \par
+  * Two macro definitions defined in this program:
+  * 1. EARLY_TERMINATION: early termination algrithm, refer to JVT-D016.doc
+  * 2. SEARCH_ONE_PIXEL: search one pixel in search range
+  * \author
+  *   Main contributors: (see contributors.h for copyright, address and affiliation details)
+  *   - Zhibo Chen         <chenzhibo at tsinghua.org.cn>
+  *   - JianFeng Xu        <fenax at video.mdc.tsinghua.edu.cn>
+  *   - Xiaozhong Xu       <xxz at video.mdc.tsinghua.edu.cn>
+  * \date   :
+  *   2006.1
+  ************************************************************************
+  */
+ int                                     //  ==> minimum motion cost after search
+ UMHEXIntegerPelBlockMotionSearch  (
+                                   imgpel   *orig_pic,     //!< <--  not used
+                                   short     ref,          //!< <--  reference frame (0... or -1 (backward))
+                                   int       list,         //!< <--  reference picture list
+                                   int       pic_pix_x,    //!< <--  absolute x-coordinate of regarded AxB block
+                                   int       pic_pix_y,    //!< <--  absolute y-coordinate of regarded AxB block
+                                   int       blocktype,    //!< <--  block type (1-16x16 ... 7-4x4)
+                                   short     pred_mv_x,    //!< <--  motion vector predictor (x) in sub-pel units
+                                   short     pred_mv_y,    //!< <--  motion vector predictor (y) in sub-pel units
+                                   short*    mv_x,         //!< --> motion vector (x) - in pel units
+                                   short*    mv_y,         //!< --> motion vector (y) - in pel units
+                                   int       search_range, //!< <--  1-d search range in pel units
+                                   int       min_mcost,    //!< <--  minimum motion cost (cost for center or huge value)
+                                   int       lambda_factor //!< <--  lagrangian parameter for determining motion cost
+                                   )
+ {
+   int   list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))?
+                          img->current_mb_nr%2 ? 4 : 2 : 0;
+   int   mvshift       = 2;                                        //!< motion vector shift for getting sub-pel units
+   int   blocksize_y   = input->blc_size[blocktype][1];            //!< vertical block size
+   int   blocksize_x   = input->blc_size[blocktype][0];            //!< horizontal block size
+   int   pred_x        = (pic_pix_x << mvshift) + pred_mv_x;       //!< predicted position x (in sub-pel units)
+   int   pred_y        = (pic_pix_y << mvshift) + pred_mv_y;       //!< predicted position y (in sub-pel units)
+   int   center_x      = pic_pix_x + *mv_x;                        //!< center position x (in pel units)
+   int   center_y      = pic_pix_y + *mv_y;                        //!< center position y (in pel units)
+   int   best_x        = 0, best_y = 0;
+   int   search_step, iYMinNow, iXMinNow;
+   int   pos, cand_x, cand_y,  mcost;
+   int   i,m,j;
+   float betaFourth_1,betaFourth_2;
+   int  temp_Big_Hexagon_x[16];//  temp for Big_Hexagon_x;
+   int  temp_Big_Hexagon_y[16];//  temp for Big_Hexagon_y;
+   short mb_x = pic_pix_x - img->opix_x;
+   short mb_y = pic_pix_y - img->opix_y;
+   short pic_pix_x2 = pic_pix_x >> 2;
+   short block_x = (mb_x >> 2);
+   short block_y = (mb_y >> 2);
+   int ET_Thred = Median_Pred_Thd_MB[blocktype];//ET threshold in use
+   int   *SAD_prediction = fastme_best_cost[blocktype-1];//multi ref SAD prediction
+   //===== Use weighted Reference for ME ====
+ 
+   int  apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+     (active_pps->weighted_bipred_idc && (img->type == B_SLICE))) && input->UseWeightedReferenceME;
+ 
+   dist_method = F_PEL + 3 * apply_weights;
+ 
+   ref_pic_ptr = listX[list+list_offset][ref];
+ 
+   // Note that following seem to be universal for all functions and could be moved to a separate, clean public function in me_distortion.c
+   ref_pic_sub.luma = ref_pic_ptr->imgY_sub;
+   img_width  = ref_pic_ptr->size_x;
+   img_height = ref_pic_ptr->size_y;
+   width_pad  = ref_pic_ptr->size_x_pad;
+   height_pad = ref_pic_ptr->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight_luma = wp_weight[list + list_offset][ref][0];
+     offset_luma = wp_offset[list + list_offset][ref][0];
+   }
+ 
+   if (ChromaMEEnable)
+   {
+     ref_pic_sub.crcb[0] = ref_pic_ptr->imgUV_sub[0];
+     ref_pic_sub.crcb[1] = ref_pic_ptr->imgUV_sub[1];
+     width_pad_cr  = ref_pic_ptr->size_x_cr_pad;
+     height_pad_cr = ref_pic_ptr->size_y_cr_pad;
+ 
+     if (apply_weights)
+     {
+       weight_cr[0] = wp_weight[list + list_offset][ref][1];
+       weight_cr[1] = wp_weight[list + list_offset][ref][2];
+       offset_cr[0] = wp_offset[list + list_offset][ref][1];
+       offset_cr[1] = wp_offset[list + list_offset][ref][2];
+     }
+   }
+ 
+   //===== set function for getting reference picture lines =====
+   if ((center_x > search_range) && (center_x < img_width - 1 - search_range - blocksize_x) &&
+     (center_y > search_range) && (center_y < img_height - 1 - search_range - blocksize_y))
+   {
+     ref_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     ref_access_method = UMV_ACCESS;
+   }
+ 
+   //////allocate memory for search state//////////////////////////
+   memset(McostState[0],0,(2*input->search_range+1)*(2*input->search_range+1));
+ 
+ 
+   //check the center median predictor
+   cand_x = center_x ;
+   cand_y = center_y ;
+   mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);
+ 
+   mcost += computeUniPred[dist_method](orig_pic, blocksize_y,blocksize_x, min_mcost - mcost,
+     (cand_x << 2) + IMG_PAD_SIZE_TIMES4, (cand_y << 2) + IMG_PAD_SIZE_TIMES4);
+ 
+   McostState[search_range][search_range] = 1;
+   if (mcost < min_mcost)
+   {
+     min_mcost = mcost;
+     best_x    = cand_x;
+     best_y    = cand_y;
+   }
+ 
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+   for (m = 0; m < 4; m++)
+   {
+     cand_x = iXMinNow + Diamond_x[m];
+     cand_y = iYMinNow + Diamond_y[m];
+     SEARCH_ONE_PIXEL
+   }
+ 
+   if(center_x != pic_pix_x || center_y != pic_pix_y)
+   {
+     cand_x = pic_pix_x ;
+     cand_y = pic_pix_y ;
+     SEARCH_ONE_PIXEL
+ 
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 4; m++)
+     {
+       cand_x = iXMinNow + Diamond_x[m];
+       cand_y = iYMinNow + Diamond_y[m];
+       SEARCH_ONE_PIXEL
+     }
+   }
+   /***********************************init process*************************/
+   //for multi ref
+   if(ref>0 && img->structure == FRAME  && min_mcost > ET_Thred && SAD_prediction[pic_pix_x2]<Multi_Ref_Thd_MB[blocktype])
+     goto terminate_step;
+ 
+   //ET_Thd1: early termination for low motion case
+   if( min_mcost < ET_Thred)
+   {
+     goto terminate_step;
+   }
+   else // hybrid search for main search loop
+   {
+     /****************************(MV and SAD prediction)********************************/
+     UMHEX_setup(ref, list, block_y, block_x, blocktype, img->all_mv );
+     ET_Thred = Big_Hexagon_Thd_MB[blocktype];  // ET_Thd2: early termination Threshold for strong motion
+ 
+ 
+ 
+     // Threshold defined for EARLY_TERMINATION
+     if (pred_SAD == 0)
+     {
+       betaFourth_1=0;
+       betaFourth_2=0;
+     }
+     else
+     {
+       betaFourth_1 = Bsize[blocktype]/(pred_SAD*pred_SAD)-AlphaFourth_1[blocktype];
+       betaFourth_2 = Bsize[blocktype]/(pred_SAD*pred_SAD)-AlphaFourth_2[blocktype];
+ 
+     }
+     /*********************************************end of init ***********************************************/
+   }
+   // first_step: initial start point prediction
+ 
+   if(blocktype>1)
+   {
+     cand_x = pic_pix_x + (pred_MV_uplayer[0]/4);
+     cand_y = pic_pix_y + (pred_MV_uplayer[1]/4);
+     SEARCH_ONE_PIXEL
+   }
+ 
+ 
+   //prediction using mV of last ref moiton vector
+   if(pred_MV_ref_flag == 1)      //Notes: for interlace case, ref==1 should be added
+   {
+     cand_x = pic_pix_x + (pred_MV_ref[0]/4);
+     cand_y = pic_pix_y + (pred_MV_ref[1]/4);
+     SEARCH_ONE_PIXEL
+   }
+   //small local search
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+   for (m = 0; m < 4; m++)
+   {
+     cand_x = iXMinNow + Diamond_x[m];
+     cand_y = iYMinNow + Diamond_y[m];
+     SEARCH_ONE_PIXEL
+   }
+ 
+   //early termination algorithm, refer to JVT-G016
+   EARLY_TERMINATION
+ 
+   if(blocktype>6)
+     goto fourth_1_step;
+   else
+     goto sec_step;
+ 
+ sec_step: //Unsymmetrical-cross search
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+ 
+   for(i = 1; i < search_range; i+=2)
+   {
+     search_step = i;
+     cand_x = iXMinNow + search_step;
+     cand_y = iYMinNow ;
+     SEARCH_ONE_PIXEL
+     cand_x = iXMinNow - search_step;
+     cand_y = iYMinNow ;
+     SEARCH_ONE_PIXEL
+   }
+   for(i = 1; i < (search_range/2);i+=2)
+   {
+     search_step = i;
+     cand_x = iXMinNow ;
+     cand_y = iYMinNow + search_step;
+     SEARCH_ONE_PIXEL
+     cand_x = iXMinNow ;
+     cand_y = iYMinNow - search_step;
+     SEARCH_ONE_PIXEL
+   }
+ 
+ 
+   //early termination alogrithm, refer to JVT-G016
+   EARLY_TERMINATION
+ 
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+ 
+   //third_step:    // Uneven Multi-Hexagon-grid Search
+   //sub step 1: 5x5 squre search
+   for(pos=1;pos<25;pos++)
+   {
+     cand_x = iXMinNow + spiral_search_x[pos];
+     cand_y = iYMinNow + spiral_search_y[pos];
+     SEARCH_ONE_PIXEL
+   }
+ 
+   //early termination alogrithm, refer to JVT-G016
+   EARLY_TERMINATION
+ 
+   //sub step 2:  Multi-Hexagon-grid search
+   memcpy(temp_Big_Hexagon_x,Big_Hexagon_x,64);
+   memcpy(temp_Big_Hexagon_y,Big_Hexagon_y,64);
+   for(i=1;i<=(search_range/4); i++)
+   {
+ 
+     for (m = 0; m < 16; m++)
+     {
+       cand_x = iXMinNow + temp_Big_Hexagon_x[m];
+       cand_y = iYMinNow + temp_Big_Hexagon_y[m];
+       temp_Big_Hexagon_x[m] += Big_Hexagon_x[m];
+       temp_Big_Hexagon_y[m] += Big_Hexagon_y[m];
+ 
+       SEARCH_ONE_PIXEL
+     }
+     // ET_Thd2: early termination Threshold for strong motion
+     if(min_mcost < ET_Thred)
+     {
+       goto terminate_step;
+     }
+   }
+ 
+ 
+   //fourth_step:  //Extended Hexagon-based Search
+   // the fourth step with a small search pattern
+ fourth_1_step:  //sub step 1: small Hexagon search
+   for(i = 0; i < search_range; i++)
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 6; m++)
+     {
+       cand_x = iXMinNow + Hexagon_x[m];
+       cand_y = iYMinNow + Hexagon_y[m];
+       SEARCH_ONE_PIXEL
+     }
+ 
+     if (best_x == iXMinNow && best_y == iYMinNow)
+     {
+       break;
+     }
+   }
+ fourth_2_step: //sub step 2: small Diamond search
+ 
+   for(i = 0; i < search_range; i++)
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 4; m++)
+     {
+       cand_x = iXMinNow + Diamond_x[m];
+       cand_y = iYMinNow + Diamond_y[m];
+       SEARCH_ONE_PIXEL
+     }
+     if(best_x == iXMinNow && best_y == iYMinNow)
+       break;
+   }
+ 
+ terminate_step:
+ 
+   // store SAD infomation for prediction
+   //FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3
+   for (i=0; i < (blocksize_x>>2); i++)
+   {
+     for (j=0; j < (blocksize_y>>2); j++)
+     {
+       if(list == 0)
+       {
+         fastme_ref_cost[ref][blocktype][block_y+j][block_x+i] = min_mcost;
+         if (ref==0)
+           fastme_l0_cost[blocktype][(img->pix_y>>2)+block_y+j][(img->pix_x>>2)+block_x+i] = min_mcost;
+       }
+       else
+       {
+         fastme_l1_cost[blocktype][(img->pix_y>>2)+block_y+j][(img->pix_x>>2)+block_x+i] = min_mcost;
+       }
+     }
+   }
+   //for multi ref SAD prediction
+   if ((ref==0) || (SAD_prediction[pic_pix_x2] > min_mcost))
+     SAD_prediction[pic_pix_x2] = min_mcost;
+ 
+   *mv_x = (short) (best_x - pic_pix_x);
+   *mv_y = (short) (best_y - pic_pix_y);
+   return min_mcost;
+ }
+ 
+ int                                                   //  ==> minimum motion cost after search
+ UMHEXSubPelBlockMotionSearch (imgpel*   orig_pic,      // <--  original pixel values for the AxB block
+                              short     ref,           // <--  reference frame (0... or -1 (backward))
+                              int       list,
+                              int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                              int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                              int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                              short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                              short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                              short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                              short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                              int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                              int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                              int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                              int       lambda_factor)
+ {
+   static int Diamond_x[4] = {-1, 0, 1, 0};
+   static int Diamond_y[4] = {0, 1, 0, -1};
+   int   mcost;
+   int   cand_mv_x, cand_mv_y;
+ 
+   int   list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))? img->current_mb_nr%2 ? 4 : 2 : 0;
+   StorablePicture *ref_picture = listX[list+list_offset][ref];
+ 
+   int   mv_shift        = 0;
+   int   blocksize_x     = input->blc_size[blocktype][0];
+   int   blocksize_y     = input->blc_size[blocktype][1];
+   int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<< 2);
+   int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<< 2);
+   short max_pos_x4      = ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
+   short max_pos_y4      = ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
+ 
+   int   search_range_dynamic,iXMinNow,iYMinNow,i;
+   int   m,currmv_x = 0,currmv_y = 0;
+   int   pred_frac_mv_x,pred_frac_mv_y,abort_search;
+ 
+   int   pred_frac_up_mv_x, pred_frac_up_mv_y;
+   int  apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+     (active_pps->weighted_bipred_idc && (img->type == B_SLICE))) && input->UseWeightedReferenceME;
+ 
+   dist_method = Q_PEL + 3 * apply_weights;
+   if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 1) &&
+     (pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 1)   )
+   {
+     ref_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     ref_access_method = UMV_ACCESS;
+   }
+ 
+   ref_pic_sub.luma = ref_picture->imgY_sub;
+   img_width  = ref_picture->size_x;
+   img_height = ref_picture->size_y;
+   width_pad  = ref_picture->size_x_pad;
+   height_pad = ref_picture->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight_luma = wp_weight[list + list_offset][ref][0];
+     offset_luma = wp_offset[list + list_offset][ref][0];
+   }
+ 
+   if (ChromaMEEnable )
+   {
+     ref_pic_sub.crcb[0] = ref_picture->imgUV_sub[0];
+     ref_pic_sub.crcb[1] = ref_picture->imgUV_sub[1];
+     width_pad_cr  = ref_picture->size_x_cr_pad;
+     height_pad_cr = ref_picture->size_y_cr_pad;
+ 
+     if (apply_weights)
+     {
+       weight_cr[0] = wp_weight[list + list_offset][ref][1];
+       weight_cr[1] = wp_weight[list + list_offset][ref][2];
+       offset_cr[0] = wp_offset[list + list_offset][ref][1];
+       offset_cr[1] = wp_offset[list + list_offset][ref][2];
+     }
+   }
+ 
+   search_range_dynamic = 3;
+   pred_frac_mv_x = (pred_mv_x - *mv_x)%4;
+   pred_frac_mv_y = (pred_mv_y - *mv_y)%4;
+ 
+   pred_frac_up_mv_x = (pred_MV_uplayer[0] - *mv_x)%4;
+   pred_frac_up_mv_y = (pred_MV_uplayer[1] - *mv_y)%4;
+ 
+ 
+   memset(SearchState[0],0,(2*search_range_dynamic+1)*(2*search_range_dynamic+1));
+ 
+   if( !start_me_refinement_hp )
+   {
+     cand_mv_x = *mv_x;
+     cand_mv_y = *mv_y;
+     mcost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+ 
+     mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
+       min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+ 
+     SearchState[search_range_dynamic][search_range_dynamic] = 1;
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       currmv_x = cand_mv_x;
+       currmv_y = cand_mv_y;
+     }
+   }
+   else
+   {
+     SearchState[search_range_dynamic][search_range_dynamic] = 1;
+     currmv_x = *mv_x;
+     currmv_y = *mv_y;
+   }
+ 
+   if(pred_frac_mv_x!=0 || pred_frac_mv_y!=0)
+   {
+     cand_mv_x = *mv_x + pred_frac_mv_x;
+     cand_mv_y = *mv_y + pred_frac_mv_y;
+     mcost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+     mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
+       min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+     SearchState[cand_mv_y -*mv_y + search_range_dynamic][cand_mv_x - *mv_x + search_range_dynamic] = 1;
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       currmv_x = cand_mv_x;
+       currmv_y = cand_mv_y;
+     }
+   }
+ 
+ 
+   iXMinNow = currmv_x;
+   iYMinNow = currmv_y;
+   for(i=0;i<search_range_dynamic;i++)
+   {
+     abort_search=1;
+     for (m = 0; m < 4; m++)
+     {
+       cand_mv_x = iXMinNow + Diamond_x[m];
+       cand_mv_y = iYMinNow + Diamond_y[m];
+ 
+       if(iabs(cand_mv_x - *mv_x) <=search_range_dynamic && iabs(cand_mv_y - *mv_y)<= search_range_dynamic)
+       {
+         if(!SearchState[cand_mv_y -*mv_y+ search_range_dynamic][cand_mv_x -*mv_x+ search_range_dynamic])
+         {
+           mcost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+           mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
+             min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+           SearchState[cand_mv_y - *mv_y + search_range_dynamic][cand_mv_x - *mv_x + search_range_dynamic] = 1;
+           if (mcost < min_mcost)
+           {
+             min_mcost = mcost;
+             currmv_x = cand_mv_x;
+             currmv_y = cand_mv_y;
+             abort_search = 0;
+           }
+         }
+       }
+     }
+     iXMinNow = currmv_x;
+     iYMinNow = currmv_y;
+     if(abort_search)
+       break;
+   }
+ 
+   *mv_x = currmv_x;
+   *mv_y = currmv_y;
+ 
+   //===== return minimum motion cost =====
+   return min_mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  * Functions for SAD prediction of intra block cases.
+  * 1. void UMHEX_decide_intrabk_SAD() judges the block coding type(intra/inter)
+  *    of neibouring blocks
+  * 2. void UMHEX_skip_intrabk_SAD() set the SAD to zero if neigouring block coding
+  *    type is intra
+  * \date
+  *    2003.4
+  ************************************************************************
+  */
+ void UMHEX_decide_intrabk_SAD()
+ {
+   if (img->type != I_SLICE)
+   {
+     if (img->pix_x == 0 && img->pix_y == 0)
+     {
+       flag_intra_SAD = 0;
+     }
+     else if (img->pix_x == 0)
+     {
+       flag_intra_SAD = flag_intra[(img->pix_x)>>4];
+     }
+     else if (img->pix_y == 0)
+     {
+       flag_intra_SAD = flag_intra[((img->pix_x)>>4)-1];
+     }
+     else
+     {
+       flag_intra_SAD = ((flag_intra[(img->pix_x)>>4])||(flag_intra[((img->pix_x)>>4)-1])||(flag_intra[((img->pix_x)>>4)+1])) ;
+     }
+   }
+   return;
+ }
+ 
+ void UMHEX_skip_intrabk_SAD(int best_mode, int ref_max)
+ {
+   int i,j,k, ref;
+   if (img->number > 0)
+     flag_intra[(img->pix_x)>>4] = (best_mode == 9 || best_mode == 10) ? 1:0;
+   if (img->type != I_SLICE  && (best_mode == 9 || best_mode == 10))
+   {
+     for (i=0; i < 4; i++)
+     {
+       for (j=0; j < 4; j++)
+       {
+         for (k=0; k < 9;k++)
+         {
+           fastme_l0_cost[k][j][i] = 0;
+           fastme_l1_cost[k][j][i] = 0;
+           for (ref=0; ref<ref_max;ref++)
+           {
+             fastme_ref_cost[ref][k][j][i] = 0;
+           }
+         }
+       }
+     }
+ 
+   }
+   return;
+ }
+ 
+ 
+ void UMHEX_setup(short ref, int list, int block_y, int block_x, int blocktype, short   ******all_mv)
+ {
+   int  N_Bframe=0;
+   int n_Bframe=0;
+   int temp_blocktype = 0;
+   int indication_blocktype[8]={0,0,1,1,2,4,4,5};
+   N_Bframe = input->successive_Bframe;
+   n_Bframe =(N_Bframe) ? (frame_ctr[B_SLICE]%(N_Bframe+1)): 0;
+ 
+ 
+   /**************************** MV prediction **********************/
+   //MV uplayer prediction
+   if (blocktype>1)
+   {
+     temp_blocktype = indication_blocktype[blocktype];
+     pred_MV_uplayer[0] = all_mv[block_y][block_x][list][ref][temp_blocktype][0];
+     pred_MV_uplayer[1] = all_mv[block_y][block_x][list][ref][temp_blocktype][1];
+   }
+ 
+ 
+   //MV ref-frame prediction
+   pred_MV_ref_flag = 0;
+   if(list==0)
+   {
+     if (img->field_picture)
+     {
+       if ( ref > 1)
+       {
+         pred_MV_ref[0] = all_mv[block_y][block_x][0][ref-2][blocktype][0];
+         pred_MV_ref[0] = (int)(pred_MV_ref[0]*((ref>>1)+1)/(float)((ref>>1)));
+         pred_MV_ref[1] = all_mv[block_y][block_x][0][ref-2][blocktype][1];
+         pred_MV_ref[1] = (int)(pred_MV_ref[1]*((ref>>1)+1)/(float)((ref>>1)));
+         pred_MV_ref_flag = 1;
+       }
+       if (img->type == B_SLICE &&  (ref==0 || ref==1) )
+       {
+         pred_MV_ref[0] =(int) (all_mv[block_y][block_x][1][0][blocktype][0]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
+         pred_MV_ref[1] =(int) (all_mv[block_y][block_x][1][0][blocktype][1]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
+         pred_MV_ref_flag = 1;
+       }
+     }
+     else //frame case
+     {
+       if ( ref > 0)
+       {
+         pred_MV_ref[0] = all_mv[block_y][block_x][0][ref-1][blocktype][0];
+         pred_MV_ref[0] = (int)(pred_MV_ref[0]*(ref+1)/(float)(ref));
+         pred_MV_ref[1] = all_mv[block_y][block_x][0][ref-1][blocktype][1];
+         pred_MV_ref[1] = (int)(pred_MV_ref[1]*(ref+1)/(float)(ref));
+         pred_MV_ref_flag = 1;
+       }
+       if (img->type == B_SLICE && (ref==0)) //B frame forward prediction, first ref
+       {
+         pred_MV_ref[0] =(int) (all_mv[block_y][block_x][1][0][blocktype][0]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
+         pred_MV_ref[1] =(int) (all_mv[block_y][block_x][1][0][blocktype][1]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
+         pred_MV_ref_flag = 1;
+       }
+     }
+   }
+   /******************************SAD prediction**********************************/
+   if (list==0 && ref>0)  //pred_SAD_ref
+   {
+ 
+     if (flag_intra_SAD) //add this for irregular motion
+     {
+       pred_SAD = 0;
+     }
+     else
+     {
+       if (img->field_picture)
+       {
+         if (ref > 1)
+         {
+           pred_SAD = fastme_ref_cost[ref-2][blocktype][block_y][block_x];
+         }
+         else
+         {
+           pred_SAD = fastme_ref_cost[0][blocktype][block_y][block_x];
+         }
+       }
+       else
+       {
+         pred_SAD = fastme_ref_cost[ref-1][blocktype][block_y][block_x];
+       }
+ 
+     }
+   }
+   else if (blocktype>1)  // pred_SAD_uplayer
+   {
+     if (flag_intra_SAD)
+     {
+       pred_SAD = 0;
+     }
+     else
+     {
+       pred_SAD = (list==1) ? (fastme_l1_cost[temp_blocktype][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]) : (fastme_l0_cost[temp_blocktype][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
+       pred_SAD /= 2;
+     }
+   }
+   else pred_SAD = 0 ;  // pred_SAD_space
+ 
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    UMHEXBipredIntegerPelBlockMotionSearch: fast pixel block motion search for bipred mode
+  *    this algrithm is called UMHexagonS(see JVT-D016),which includes
+  *    four steps with different kinds of search patterns
+  * \author
+  *   Main contributors: (see contributors.h for copyright, address and affiliation details)
+  *   - Zhibo Chen         <chenzhibo at tsinghua.org.cn>
+  *   - JianFeng Xu        <fenax at video.mdc.tsinghua.edu.cn>
+  *   - Xiaozhong Xu       <xxz at video.mdc.tsinghua.edu.cn>
+  * \date   :
+  *   2006.1
+  ************************************************************************
+  */
+ int                                                //  ==> minimum motion cost after search
+ UMHEXBipredIntegerPelBlockMotionSearch (imgpel*   cur_pic,      // <--  original pixel values for the AxB block
+                           short       ref,         // <--  reference frame (0... or -1 (backward))
+                           int       list,
+                           int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                           int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                           int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                           short     pred_mv_x1,    // <--  motion vector predictor (x) in sub-pel units
+                           short     pred_mv_y1,    // <--  motion vector predictor (y) in sub-pel units
+                           short     pred_mv_x2,    // <--  motion vector predictor (x) in sub-pel units
+                           short     pred_mv_y2,    // <--  motion vector predictor (y) in sub-pel units
+                           short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                           short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                           short*    s_mv_x,        // <--> in: search center (x) / out: motion vector (x) - in pel units
+                           short*    s_mv_y,        // <--> in: search center (y) / out: motion vector (y) - in pel units
+                           int       search_range,  // <--  1-d search range in pel units
+                           int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                           int       lambda_factor) // <--  lagrangian parameter for determining motion cost
+ 
+ {
+   int   temp_Big_Hexagon_x[16];// = Big_Hexagon_x;
+   int   temp_Big_Hexagon_y[16];// = Big_Hexagon_y;
+   int   mvshift       = 2;                  // motion vector shift for getting sub-pel units
+ 
+   int   search_step,iYMinNow, iXMinNow;
+   int   i,m,j;
+   float betaFourth_1,betaFourth_2;
+   int   pos, cand_x, cand_y,mcost;
+   int   list_offset   = img->mb_data[img->current_mb_nr].list_offset;
+   int   blocksize_y   = input->blc_size[blocktype][1];            // vertical block size
+   int   blocksize_x   = input->blc_size[blocktype][0];            // horizontal block size
+   int   pred_x1        = (pic_pix_x << 2) + pred_mv_x1;       // predicted position x (in sub-pel units)
+   int   pred_y1        = (pic_pix_y << 2) + pred_mv_y1;       // predicted position y (in sub-pel units)
+   int   pred_x2        = (pic_pix_x << 2) + pred_mv_x2;       // predicted position x (in sub-pel units)
+   int   pred_y2        = (pic_pix_y << 2) + pred_mv_y2;       // predicted position y (in sub-pel units)
+   short center2_x      = pic_pix_x + *mv_x;                      // center position x (in pel units)
+   short center2_y      = pic_pix_y + *mv_y;                      // center position y (in pel units)
+   short center1_x      = pic_pix_x + *s_mv_x;                      // mvx of second pred (in pel units)
+   short center1_y      = pic_pix_y + *s_mv_y;                      // mvy of second pred (in pel units)
+   short mb_x = pic_pix_x - img->opix_x;
+   short mb_y = pic_pix_y - img->opix_y;
+   short block_x = (mb_x >> 2);
+   short block_y = (mb_y >> 2);
+   int   best_x = center2_x;
+   int   best_y = center2_y;
+   int ET_Thred = Median_Pred_Thd_MB[blocktype];
+ 
+   short apply_weights = (active_pps->weighted_bipred_idc>0);
+   short offset1 = (apply_weights ? (list == 0?  wp_offset[list_offset    ][ref][0]:  wp_offset[list_offset + 1][0  ][ref]) : 0);
+   short offset2 = (apply_weights ? (list == 0?  wp_offset[list_offset + 1][ref][0]:  wp_offset[list_offset    ][0  ][ref]) : 0);
+ 
+   ref_pic1_sub.luma = listX[list + list_offset][ref]->imgY_sub;
+   ref_pic2_sub.luma = listX[list == 0 ? 1 + list_offset: list_offset][ 0 ]->imgY_sub;
+ 
+   img_width  = listX[list + list_offset][ref]->size_x;
+   img_height = listX[list + list_offset][ref]->size_y;
+   width_pad  = listX[list + list_offset][ref]->size_x_pad;
+   height_pad = listX[list + list_offset][ref]->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight1 = list == 0 ? wbp_weight[list_offset         ][ref][0][0] : wbp_weight[list_offset + LIST_1][0  ][ref][0];
+     weight2 = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][0] : wbp_weight[list_offset         ][0  ][ref][0];
+     offsetBi=(offset1 + offset2 + 1)>>1;
+     computeBiPred = computeBiPredSAD2; //ME only supports SAD computations
+   }
+   else
+   {
+     weight1 = 1<<luma_log_weight_denom;
+     weight2 = 1<<luma_log_weight_denom;
+     offsetBi = 0;
+     computeBiPred = computeBiPredSAD1; //ME only supports SAD computations
+   }
+ 
+   if (ChromaMEEnable )
+   {
+     ref_pic1_sub.crcb[0] = listX[list + list_offset][ref]->imgUV_sub[0];
+     ref_pic1_sub.crcb[1] = listX[list + list_offset][ref]->imgUV_sub[1];
+     ref_pic2_sub.crcb[0] = listX[list == 0 ? 1 + list_offset: list_offset][ 0 ]->imgUV_sub[0];
+     ref_pic2_sub.crcb[1] = listX[list == 0 ? 1 + list_offset: list_offset][ 0 ]->imgUV_sub[1];
+     width_pad_cr  = listX[list + list_offset][ref]->size_x_cr_pad;
+     height_pad_cr = listX[list + list_offset][ref]->size_y_cr_pad;
+     if (apply_weights)
+     {
+       weight1_cr[0] = list == 0 ? wbp_weight[list_offset         ][ref][0][1] : wbp_weight[list_offset + LIST_1][0  ][ref][1];
+       weight1_cr[1] = list == 0 ? wbp_weight[list_offset         ][ref][0][2] : wbp_weight[list_offset + LIST_1][0  ][ref][2];
+       weight2_cr[0] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][1] : wbp_weight[list_offset         ][0  ][ref][1];
+       weight2_cr[1] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][2] : wbp_weight[list_offset         ][0  ][ref][2];
+       offsetBi_cr[0] = (list == 0)
+         ? (wp_offset[list_offset         ][ref][1] + wp_offset[list_offset + LIST_1][ref][1] + 1) >> 1
+         : (wp_offset[list_offset + LIST_1][0  ][1] + wp_offset[list_offset         ][0  ][1] + 1) >> 1;
+       offsetBi_cr[1] = (list == 0)
+         ? (wp_offset[list_offset         ][ref][2] + wp_offset[list_offset + LIST_1][ref][2] + 1) >> 1
+         : (wp_offset[list_offset + LIST_1][0  ][2] + wp_offset[list_offset         ][0  ][2] + 1) >> 1;
+     }
+     else
+     {
+       weight1_cr[0] = 1<<chroma_log_weight_denom;
+       weight1_cr[1] = 1<<chroma_log_weight_denom;
+       weight2_cr[0] = 1<<chroma_log_weight_denom;
+       weight2_cr[1] = 1<<chroma_log_weight_denom;
+       offsetBi_cr[0] = 0;
+       offsetBi_cr[1] = 0;
+     }
+   }
+ 
+   //===== set function for getting reference picture lines =====
+   if ((center2_x > search_range) && (center2_x < img_width -1-search_range-blocksize_x) &&
+     (center2_y > search_range) && (center2_y < img_height-1-search_range-blocksize_y)   )
+   {
+     bipred2_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred2_access_method = UMV_ACCESS;
+   }
+ 
+   //===== set function for getting reference picture lines =====
+   if ((center1_y > search_range) && (center1_y < img_height-1-search_range-blocksize_y)   )
+   {
+     bipred1_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred1_access_method = UMV_ACCESS;
+   }
+ 
+   //////////////////////////////////////////////////////////////////////////
+ 
+   //////allocate memory for search state//////////////////////////
+   memset(McostState[0],0,(2*search_range+1)*(2*search_range+1));
+ 
+   //check the center median predictor
+   cand_x = center2_x ;
+   cand_y = center2_y ;
+   mcost  = MV_COST (lambda_factor, mvshift, center1_x, center1_y, pred_x1, pred_y1);
+   mcost += MV_COST (lambda_factor, mvshift, cand_x,    cand_y,    pred_x2, pred_y2);
+ 
+   mcost += computeBiPred( cur_pic,
+                          blocksize_y, blocksize_x, INT_MAX,
+                          (center1_x << 2) + IMG_PAD_SIZE_TIMES4,
+                          (center1_y << 2) + IMG_PAD_SIZE_TIMES4,
+                          (cand_x << 2) + IMG_PAD_SIZE_TIMES4,
+                          (cand_y << 2) + IMG_PAD_SIZE_TIMES4);
+ 
+   McostState[search_range][search_range] = 1;
+ 
+   if (mcost < min_mcost)
+   {
+     min_mcost = mcost;
+     best_x = cand_x;
+     best_y = cand_y;
+   }
+ 
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+   for (m = 0; m < 4; m++)
+   {
+     cand_x = iXMinNow + Diamond_x[m];
+     cand_y = iYMinNow + Diamond_y[m];
+     SEARCH_ONE_PIXEL_BIPRED;
+   }
+ 
+   if(center2_x != pic_pix_x || center2_y != pic_pix_y)
+   {
+     cand_x = pic_pix_x ;
+     cand_y = pic_pix_y ;
+ 
+     SEARCH_ONE_PIXEL_BIPRED;
+ 
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 4; m++)
+     {
+       cand_x = iXMinNow + Diamond_x[m];
+       cand_y = iYMinNow + Diamond_y[m];
+       SEARCH_ONE_PIXEL_BIPRED;
+     }
+   }
+   /***********************************init process*************************/
+ 
+   if( min_mcost < ET_Thred)
+   {
+     goto terminate_step;
+   }
+   else
+   {
+     int  N_Bframe=0;
+     int  n_Bframe=0;
+     short****** bipred_mv = list ? img->bipred_mv1 : img->bipred_mv2;
+     N_Bframe = input->successive_Bframe;
+     n_Bframe = frame_ctr[B_SLICE]%(N_Bframe+1);
+ 
+ 
+     /**************************** MV prediction **********************/
+     //MV uplayer prediction
+     // non for bipred mode
+ 
+     //MV ref-frame prediction
+ 
+     if(list==0)
+     {
+       if (img->field_picture)
+       {
+         pred_MV_ref[0] =(int) (bipred_mv[block_y][block_x][1][0][blocktype][0]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
+         pred_MV_ref[1] =(int) (bipred_mv[block_y][block_x][1][0][blocktype][1]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
+       }
+       else //frame case
+       {
+         pred_MV_ref[0] =(int) (bipred_mv[block_y][block_x][1][0][blocktype][0]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
+         pred_MV_ref[1] =(int) (bipred_mv[block_y][block_x][1][0][blocktype][1]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
+       }
+     }
+     /******************************SAD prediction**********************************/
+ 
+     pred_SAD =imin(imin(SAD_a,SAD_b),SAD_c);  // pred_SAD_space
+     ET_Thred = Big_Hexagon_Thd_MB[blocktype];
+ 
+     ///////Threshold defined for early termination///////////////////
+     if (pred_SAD == 0)
+     {
+       betaFourth_1=0;
+       betaFourth_2=0;
+     }
+     else
+     {
+       betaFourth_1 = Bsize[blocktype]/(pred_SAD*pred_SAD)-AlphaFourth_1[blocktype];
+       betaFourth_2 = Bsize[blocktype]/(pred_SAD*pred_SAD)-AlphaFourth_2[blocktype];
+     }
+   }
+ 
+   /***********************************end of init *************************/
+ 
+ 
+ 
+   // first_step: initial start point prediction
+   //prediction using mV of last ref moiton vector
+   if(list == 0)
+   {
+     cand_x = pic_pix_x + (pred_MV_ref[0]/4);
+     cand_y = pic_pix_y + (pred_MV_ref[1]/4);
+     SEARCH_ONE_PIXEL_BIPRED;
+   }
+ 
+ 
+   //small local search
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+   for (m = 0; m < 4; m++)
+   {
+     cand_x = iXMinNow + Diamond_x[m];
+     cand_y = iYMinNow + Diamond_y[m];
+     SEARCH_ONE_PIXEL_BIPRED;
+   }
+ 
+   //early termination alogrithm, refer to JVT-G016
+   EARLY_TERMINATION;
+ 
+ 
+   //sec_step: //Unsymmetrical-cross search
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+ 
+   for(i = 1; i < search_range; i+=2)
+   {
+     search_step = i;
+     cand_x = iXMinNow + search_step;
+     cand_y = iYMinNow ;
+     SEARCH_ONE_PIXEL_BIPRED;
+     cand_x = iXMinNow - search_step;
+     cand_y = iYMinNow ;
+     SEARCH_ONE_PIXEL_BIPRED;
+   }
+ 
+   for(i = 1; i < (search_range/2);i+=2)
+   {
+     search_step = i;
+     cand_x = iXMinNow ;
+     cand_y = iYMinNow + search_step;
+     SEARCH_ONE_PIXEL_BIPRED;
+     cand_x = iXMinNow ;
+     cand_y = iYMinNow - search_step;
+     SEARCH_ONE_PIXEL_BIPRED;
+   }
+   //early termination alogrithm, refer to JVT-G016
+   EARLY_TERMINATION;
+ 
+   //third_step:     // Uneven Multi-Hexagon-grid Search
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+   //sub step1: 5x5 square search
+   for(pos=1;pos<25;pos++)
+   {
+     cand_x = iXMinNow + spiral_search_x[pos];
+     cand_y = iYMinNow + spiral_search_y[pos];
+     SEARCH_ONE_PIXEL_BIPRED;
+   }
+ 
+   //early termination alogrithm, refer to JVT-G016
+   EARLY_TERMINATION;      //added back by xxz
+ 
+   //sub step2: multi-grid-hexagon-search
+   memcpy(temp_Big_Hexagon_x,Big_Hexagon_x,64);
+   memcpy(temp_Big_Hexagon_y,Big_Hexagon_y,64);
+   for(i=1;i<=(input->search_range>>2); i++)
+   {
+ 
+     for (m = 0; m < 16; m++)
+     {
+       cand_x = iXMinNow + temp_Big_Hexagon_x[m];
+       cand_y = iYMinNow + temp_Big_Hexagon_y[m];
+       temp_Big_Hexagon_x[m] += Big_Hexagon_x[m];
+       temp_Big_Hexagon_y[m] += Big_Hexagon_y[m];
+ 
+       SEARCH_ONE_PIXEL_BIPRED;
+     }
+     if(min_mcost < ET_Thred)
+     {
+       goto terminate_step;
+ 
+     }
+   }
+   //fourth step: Local Refinement: Extended Hexagon-based Search
+ fourth_1_step:
+ 
+   for(i=0; i < search_range; i++)
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 6; m++)
+     {
+       cand_x = iXMinNow + Hexagon_x[m];
+       cand_y = iYMinNow + Hexagon_y[m];
+       SEARCH_ONE_PIXEL_BIPRED;
+     }
+     if(best_x == iXMinNow && best_y == iYMinNow)
+       break;
+   }
+ fourth_2_step:
+ 
+   for(i = 0; i < search_range; i++)
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 4; m++)
+     {
+       cand_x = iXMinNow + Diamond_x[m];
+       cand_y = iYMinNow + Diamond_y[m];
+       SEARCH_ONE_PIXEL_BIPRED;
+     }
+     if(best_x == iXMinNow && best_y == iYMinNow)
+       break;
+   }
+ 
+ terminate_step:
+   for (i=0; i < (blocksize_x>>2); i++)
+   {
+     for (j=0; j < (blocksize_y>>2); j++)
+     {
+       if(list == 0)
+       {
+         fastme_l0_cost_bipred[blocktype][(img->pix_y>>2)+block_y+j][(img->pix_x>>2)+block_x+i] = min_mcost;
+       }
+       else
+       {
+         fastme_l1_cost_bipred[blocktype][(img->pix_y>>2)+block_y+j][(img->pix_x>>2)+block_x+i] = min_mcost;
+       }
+     }
+   }
+ 
+   *mv_x = best_x - pic_pix_x;
+   *mv_y = best_y - pic_pix_y;
+ 
+ 
+   return min_mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set motion vector predictor
+  ************************************************************************
+  */
+ void UMHEXSetMotionVectorPredictor (short  pmv[2],
+                                char   **refPic,
+                                short  ***tmp_mv,
+                                short  ref_frame,
+                                int    list,
+                                int    block_x,
+                                int    block_y,
+                                int    blockshape_x,
+                                int    blockshape_y,
+                                int    *search_range)
+ {
+   int mb_x                 = 4*block_x;
+   int mb_y                 = 4*block_y;
+   int mb_nr                = img->current_mb_nr;
+ 
+   int mv_a, mv_b, mv_c, pred_vec=0;
+   int mvPredType, rFrameL, rFrameU, rFrameUR;
+   int hv;
+ 
+   PixelPos block_a, block_b, block_c, block_d;
+ 
+   // added for bipred mode
+   int *** fastme_l0_cost_flag = (bipred_flag ? fastme_l0_cost_bipred:fastme_l0_cost);
+   int *** fastme_l1_cost_flag = (bipred_flag ? fastme_l1_cost_bipred:fastme_l1_cost);
+ 
+   //Dynamic Search Range
+ 
+   int dsr_temp_search_range[2];
+   int dsr_mv_avail, dsr_mv_max, dsr_mv_sum, dsr_small_search_range;
+ 
+   // neighborhood SAD init
+   SAD_a=0;
+   SAD_b=0;
+   SAD_c=0;
+   SAD_d=0;
+ 
+   getLuma4x4Neighbour(mb_nr, mb_x - 1, mb_y, &block_a);
+   getLuma4x4Neighbour(mb_nr, mb_x, mb_y -1, &block_b);
+   getLuma4x4Neighbour(mb_nr, mb_x + blockshape_x, mb_y-1, &block_c);
+   getLuma4x4Neighbour(mb_nr, mb_x -1, mb_y -1, &block_d);
+ 
+   if (mb_y > 0)
+   {
+     if (mb_x < 8)  // first column of 8x8 blocks
+     {
+       if (mb_y==8)
+       {
+         if (blockshape_x == 16)      block_c.available  = 0;
+       }
+       else
+       {
+         if (mb_x+blockshape_x == 8)  block_c.available = 0;
+       }
+     }
+     else
+     {
+       if (mb_x+blockshape_x == 16)   block_c.available = 0;
+     }
+   }
+ 
+   if (!block_c.available)
+   {
+     block_c=block_d;
+   }
+ 
+   mvPredType = MVPRED_MEDIAN;
+ 
+   if (!img->MbaffFrameFlag)
+   {
+     rFrameL    = block_a.available    ? refPic[block_a.pos_y][block_a.pos_x] : -1;
+     rFrameU    = block_b.available    ? refPic[block_b.pos_y][block_b.pos_x] : -1;
+     rFrameUR   = block_c.available    ? refPic[block_c.pos_y][block_c.pos_x] : -1;
+   }
+   else
+   {
+     if (img->mb_data[img->current_mb_nr].mb_field)
+     {
+       rFrameL  = block_a.available
+         ? (img->mb_data[block_a.mb_addr].mb_field
+         ? refPic[block_a.pos_y][block_a.pos_x]
+         : refPic[block_a.pos_y][block_a.pos_x] * 2) : -1;
+       rFrameU  = block_b.available
+         ? (img->mb_data[block_b.mb_addr].mb_field
+         ? refPic[block_b.pos_y][block_b.pos_x]
+         : refPic[block_b.pos_y][block_b.pos_x] * 2) : -1;
+       rFrameUR = block_c.available
+         ? (img->mb_data[block_c.mb_addr].mb_field
+         ? refPic[block_c.pos_y][block_c.pos_x]
+         : refPic[block_c.pos_y][block_c.pos_x] * 2) : -1;
+     }
+     else
+       {
+       rFrameL = block_a.available
+         ? (img->mb_data[block_a.mb_addr].mb_field
+         ? refPic[block_a.pos_y][block_a.pos_x] >>1
+         : refPic[block_a.pos_y][block_a.pos_x]) : -1;
+       rFrameU    = block_b.available    ?
+         img->mb_data[block_b.mb_addr].mb_field ?
+         refPic[block_b.pos_y][block_b.pos_x] >>1:
+       refPic[block_b.pos_y][block_b.pos_x] :
+       -1;
+       rFrameUR    = block_c.available    ?
+         img->mb_data[block_c.mb_addr].mb_field ?
+         refPic[block_c.pos_y][block_c.pos_x] >>1:
+       refPic[block_c.pos_y][block_c.pos_x] :
+       -1;
+     }
+   }
+ 
+   /* Prediction if only one of the neighbors uses the reference frame
+   * we are checking
+   */
+   if(rFrameL == ref_frame && rFrameU != ref_frame && rFrameUR != ref_frame)       mvPredType = MVPRED_L;
+   else if(rFrameL != ref_frame && rFrameU == ref_frame && rFrameUR != ref_frame)  mvPredType = MVPRED_U;
+   else if(rFrameL != ref_frame && rFrameU != ref_frame && rFrameUR == ref_frame)  mvPredType = MVPRED_UR;
+   // Directional predictions
+   if(blockshape_x == 8 && blockshape_y == 16)
+   {
+     if(mb_x == 0)
+     {
+       if(rFrameL == ref_frame)
+         mvPredType = MVPRED_L;
+     }
+     else
+     {
+       if( rFrameUR == ref_frame)
+         mvPredType = MVPRED_UR;
+     }
+   }
+   else if(blockshape_x == 16 && blockshape_y == 8)
+   {
+     if(mb_y == 0)
+     {
+       if(rFrameU == ref_frame)
+         mvPredType = MVPRED_U;
+     }
+     else
+     {
+       if(rFrameL == ref_frame)
+         mvPredType = MVPRED_L;
+     }
+   }
+ 
+   // neighborhood SAD prediction
+   if((input->UMHexDSR == 1 || input->BiPredMotionEstimation == 1))
+   {
+     SAD_a = block_a.available ? ((list==1) ? (fastme_l1_cost_flag[UMHEX_blocktype][block_a.pos_y][block_a.pos_x]) : (fastme_l0_cost_flag[UMHEX_blocktype][block_a.pos_y][block_a.pos_x])) : 0;
+     SAD_b = block_b.available ? ((list==1) ? (fastme_l1_cost_flag[UMHEX_blocktype][block_b.pos_y][block_b.pos_x]) : (fastme_l0_cost_flag[UMHEX_blocktype][block_b.pos_y][block_b.pos_x])) : 0;
+     SAD_d = block_d.available ? ((list==1) ? (fastme_l1_cost_flag[UMHEX_blocktype][block_d.pos_y][block_d.pos_x]) : (fastme_l0_cost_flag[UMHEX_blocktype][block_d.pos_y][block_d.pos_x])) : 0;
+     SAD_c = block_c.available ? ((list==1) ? (fastme_l1_cost_flag[UMHEX_blocktype][block_c.pos_y][block_c.pos_x]) : (fastme_l0_cost_flag[UMHEX_blocktype][block_c.pos_y][block_c.pos_x])) : SAD_d;
+   }
+   for (hv=0; hv < 2; hv++)
+   {
+     if (!img->MbaffFrameFlag || hv==0)
+     {
+       mv_a = block_a.available  ? tmp_mv[block_a.pos_y][block_a.pos_x][hv] : 0;
+       mv_b = block_b.available  ? tmp_mv[block_b.pos_y][block_b.pos_x][hv] : 0;
+       mv_c = block_c.available  ? tmp_mv[block_c.pos_y][block_c.pos_x][hv] : 0;
+     }
+     else
+     {
+       if (img->mb_data[img->current_mb_nr].mb_field)
+       {
+         mv_a = block_a.available  ? img->mb_data[block_a.mb_addr].mb_field
+           ? tmp_mv[block_a.pos_y][block_a.pos_x][hv]
+           : tmp_mv[block_a.pos_y][block_a.pos_x][hv] / 2
+           : 0;
+         mv_b = block_b.available  ? img->mb_data[block_b.mb_addr].mb_field
+           ? tmp_mv[block_b.pos_y][block_b.pos_x][hv]
+           : tmp_mv[block_b.pos_y][block_b.pos_x][hv] / 2
+           : 0;
+         mv_c = block_c.available  ? img->mb_data[block_c.mb_addr].mb_field
+           ? tmp_mv[block_c.pos_y][block_c.pos_x][hv]
+           : tmp_mv[block_c.pos_y][block_c.pos_x][hv] / 2
+           : 0;
+       }
+       else
+       {
+         mv_a = block_a.available  ? img->mb_data[block_a.mb_addr].mb_field
+           ? tmp_mv[block_a.pos_y][block_a.pos_x][hv] * 2
+           : tmp_mv[block_a.pos_y][block_a.pos_x][hv]
+           : 0;
+         mv_b = block_b.available  ? img->mb_data[block_b.mb_addr].mb_field
+           ? tmp_mv[block_b.pos_y][block_b.pos_x][hv] * 2
+           : tmp_mv[block_b.pos_y][block_b.pos_x][hv]
+           : 0;
+         mv_c = block_c.available  ? img->mb_data[block_c.mb_addr].mb_field
+           ? tmp_mv[block_c.pos_y][block_c.pos_x][hv] * 2
+           : tmp_mv[block_c.pos_y][block_c.pos_x][hv]
+           : 0;
+       }
+     }
+ 
+     switch (mvPredType)
+     {
+     case MVPRED_MEDIAN:
+       if(!(block_b.available || block_c.available))
+       {
+         pred_vec = mv_a;
+       }
+       else
+       {
+         pred_vec = mv_a+mv_b+mv_c-imin(mv_a,imin(mv_b,mv_c))-imax(mv_a,imax(mv_b,mv_c));
+       }
+       break;
+     case MVPRED_L:
+       pred_vec = mv_a;
+       break;
+     case MVPRED_U:
+       pred_vec = mv_b;
+       break;
+     case MVPRED_UR:
+       pred_vec = mv_c;
+       break;
+     default:
+       break;
+     }
+ 
+     pmv[hv] = pred_vec;
+     //Dynamic Search Range
+     if (input->UMHexDSR)
+     {
+       dsr_mv_avail=block_a.available+block_b.available+block_c.available;
+       if(dsr_mv_avail < 2)
+       {
+         dsr_temp_search_range[hv] = input->search_range;
+       }
+       else
+       {
+         dsr_mv_max = imax(iabs(mv_a),imax(iabs(mv_b),iabs(mv_c)));
+         dsr_mv_sum = (iabs(mv_a)+iabs(mv_b)+iabs(mv_c));
+         if(dsr_mv_sum == 0) dsr_small_search_range = (input->search_range + 4) >> 3;
+         else if(dsr_mv_sum > 3 ) dsr_small_search_range = (input->search_range + 2) >>2;
+         else dsr_small_search_range = (3*input->search_range + 8) >> 4;
+         dsr_temp_search_range[hv]=imin(input->search_range,imax(dsr_small_search_range,dsr_mv_max<<1));
+         if(imax(SAD_a,imax(SAD_b,SAD_c)) > Threshold_DSR_MB[UMHEX_blocktype])
+           dsr_temp_search_range[hv] = input->search_range;
+       }
+     }
+   }
+ 
+   //Dynamic Search Range
+   if (input->UMHexDSR) {
+     dsr_new_search_range = imax(dsr_temp_search_range[0],dsr_temp_search_range[1]);
+ 
+ #ifdef _FULL_SEARCH_RANGE_
+ 
+     if      (input->full_search == 2) *search_range = dsr_new_search_range;
+     else if (input->full_search == 1) *search_range = dsr_new_search_range /  (imin(ref_frame,1)+1);
+     else                              *search_range = dsr_new_search_range / ((imin(ref_frame,1)+1) * imin(2,input->blocktype_lut[(blockshape_y >> 2) - 1][(blockshape_x >> 2) - 1]));
+ #else
+     *search_range = dsr_new_search_range / ((imin(ref_frame,1)+1) * imin(2,input->blocktype_lut[(blockshape_y >> 2) - 1][(blockshape_x >> 2) - 1]));
+ #endif
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/me_umhex.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/me_umhex.h:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/me_umhex.h	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,191 ----
+ 
+ /*!
+  ************************************************************************
+  *
+  * \file me_umhex.h
+  *
+  * \brief
+  *   Macro definitions and global variables for UMHEX fast
+  *   integer pel motion estimation and fractional pel motion estimation
+  *
+  * \author
+  *   Main contributors: (see contributors.h for copyright, address and affiliation details)
+  *    - Zhibo Chen         <chenzhibo at tsinghua.org.cn>
+  *    - JianFeng Xu        <fenax at video.mdc.tsinghua.edu.cn>
+  *    - Wenfang Fu         <fwf at video.mdc.tsinghua.edu.cn>
+  *    - Xiaozhong Xu       <xxz at video.mdc.tsinghua.edu.cn>
+  *
+  * \date
+  *   2006.1
+  ************************************************************************
+  */
+ 
+ #ifndef _ME_UMHEX_H_
+ #define _ME_UMHEX_H_
+ 
+ #include "mbuffer.h"
+ 
+ #define EARLY_TERMINATION                                                             \
+   if ((min_mcost-pred_SAD)<pred_SAD*betaFourth_2)                                     \
+   goto fourth_2_step;                                                                 \
+   else if((min_mcost-pred_SAD)<pred_SAD*betaFourth_1)                                 \
+   goto fourth_1_step;
+ 
+ #define SEARCH_ONE_PIXEL                                                              \
+   if(iabs(cand_x - center_x) <=search_range && iabs(cand_y - center_y)<= search_range)\
+   {                                                                                   \
+     if(!McostState[cand_y-center_y+search_range][cand_x-center_x+search_range])       \
+     {                                                                                 \
+       mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);       \
+       if(mcost<min_mcost)                                                             \
+       {                                                                               \
+         mcost += computeUniPred[dist_method](orig_pic,                                \
+         blocksize_y,blocksize_x, min_mcost - mcost,                                   \
+         (cand_x << 2) + IMG_PAD_SIZE_TIMES4, (cand_y << 2) + IMG_PAD_SIZE_TIMES4);    \
+         McostState[cand_y-center_y+search_range][cand_x-center_x+search_range] = 1;   \
+         if (mcost < min_mcost)                                                        \
+         {                                                                             \
+           best_x = cand_x;                                                            \
+           best_y = cand_y;                                                            \
+           min_mcost = mcost;                                                          \
+         }                                                                             \
+       }                                                                               \
+     }                                                                                 \
+    }
+ 
+ #define SEARCH_ONE_PIXEL_BIPRED                                                       \
+ if(iabs(cand_x - center2_x) <=search_range && iabs(cand_y - center2_y)<= search_range)\
+ {                                                                                     \
+   if(!McostState[cand_y-center2_y+search_range][cand_x-center2_x+search_range])       \
+   {                                                                                   \
+     mcost  = MV_COST (lambda_factor, mvshift, center1_x, center1_y, pred_x1, pred_y1);\
+     mcost += MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x2, pred_y2);      \
+   if(mcost<min_mcost)                                                                 \
+   {                                                                                   \
+       mcost  += computeBiPred(cur_pic, blocksize_y, blocksize_x,                      \
+       min_mcost - mcost,                                                              \
+       (center1_x << 2) + IMG_PAD_SIZE_TIMES4,                                         \
+       (center1_y << 2) + IMG_PAD_SIZE_TIMES4,                                         \
+       (cand_x << 2) + IMG_PAD_SIZE_TIMES4,                                            \
+       (cand_y << 2) + IMG_PAD_SIZE_TIMES4);                                           \
+       McostState[cand_y-center2_y+search_range][cand_x-center2_x+search_range] = 1;   \
+       if (mcost < min_mcost)                                                          \
+       {                                                                               \
+         best_x = cand_x;                                                              \
+         best_y = cand_y;                                                              \
+         min_mcost = mcost;                                                            \
+       }                                                                               \
+     }                                                                                   \
+   }                                                                                   \
+ }
+ 
+ byte **McostState;                          //!< state for integer pel search
+ byte **SearchState;                         //!< state for fractional pel search
+ 
+ int ****fastme_ref_cost;                    //!< store SAD information needed for forward ref-frame prediction
+ int ***fastme_l0_cost;                      //!< store SAD information needed for forward median and uplayer prediction
+ int ***fastme_l1_cost;                      //!< store SAD information needed for backward median and uplayer prediction
+ int ***fastme_l0_cost_bipred;               //!< store SAD information for bipred mode
+ int ***fastme_l1_cost_bipred;               //!< store SAD information for bipred mode
+ int bipred_flag;                            //!< flag for bipred
+ int **fastme_best_cost;                     //!< for multi ref early termination threshold
+ int pred_SAD;                               //!<  SAD prediction in use.
+ int pred_MV_ref[2], pred_MV_uplayer[2];     //!< pred motion vector by space or temporal correlation,Median is provided
+ 
+ int UMHEX_blocktype;                        //!< blocktype for UMHEX SetMotionVectorPredictor
+ int predict_point[5][2];
+ int SAD_a,SAD_b,SAD_c,SAD_d;
+ int Threshold_DSR_MB[8];                    //!<  Threshold for usage of DSR. DSR refer to JVT-Q088
+ //for early termination
+ float  Bsize[8];
+ float AlphaFourth_1[8];
+ float AlphaFourth_2[8];
+ byte *flag_intra;
+ int  flag_intra_SAD;
+ 
+ void UMHEX_DefineThreshold(void);
+ void UMHEX_DefineThresholdMB(void);
+ int  UMHEX_get_mem(void);
+ void UMHEX_free_mem(void);
+ 
+ void UMHEX_decide_intrabk_SAD(void);
+ void UMHEX_skip_intrabk_SAD(int best_mode, int ref_max);
+ void UMHEX_setup(short ref, int list, int block_y, int block_x, int blocktype, short   ******all_mv);
+ 
+ int                                     //  ==> minimum motion cost after search
+ UMHEXIntegerPelBlockMotionSearch  (
+                                   imgpel   *orig_pic,      // <--  not used
+                                   short     ref,           // <--  reference frame (0... or -1 (backward))
+                                   int       list,          // <--  reference picture list
+                                   int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                                   int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                                   int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                                   short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                                   short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                                   short*    mv_x,          //  --> motion vector (x) - in pel units
+                                   short*    mv_y,          //  --> motion vector (y) - in pel units
+                                   int       search_range,  // <--  1-d search range in pel units
+                                   int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                                   int       lambda_factor);// <--  lagrangian parameter for determining motion cost
+ 
+ int                                                   //  ==> minimum motion cost after search
+ UMHEXSubPelBlockMotionSearch (
+                              imgpel*   orig_pic,      // <--  original pixel values for the AxB block
+                              short       ref,         // <--  reference frame (0... or -1 (backward))
+                              int       list,          // <--  reference picture list
+                              int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                              int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                              int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                              short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                              short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                              short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                              short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                              int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                              int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                              int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                              int       lambda_factor);// <--  lagrangian parameter for determining motion cost
+ 
+ 
+ extern int                                        //  ==> minimum motion cost after search
+ SubPelBlockMotionSearch (imgpel*   orig_pic,      // <--  original pixel values for the AxB block
+                          short     ref,           // <--  reference frame (0... or -1 (backward))
+                          int       list,
+                          int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                          int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                          int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                          short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                          short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                          short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                          short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                          int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                          int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                          int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                          int       *lambda_factor   // <--  lagrangian parameter for determining motion cost
+                          );
+ 
+ int                                                //  ==> minimum motion cost after search
+ UMHEXBipredIntegerPelBlockMotionSearch (
+                                        imgpel*   orig_pic,      // <--  original pixel values for the AxB block
+                                        short       ref,         // <--  reference frame (0... or -1 (backward))
+                                        int       list,
+                                        int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                                        int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                                        int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                                        short     pred_mv_x1,    // <--  motion vector predictor (x) in sub-pel units
+                                        short     pred_mv_y1,    // <--  motion vector predictor (y) in sub-pel units
+                                        short     pred_mv_x2,    // <--  motion vector predictor (x) in sub-pel units
+                                        short     pred_mv_y2,    // <--  motion vector predictor (y) in sub-pel units
+                                        short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                                        short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                                        short*    s_mv_x,        // <--> in: search center (x) / out: motion vector (x) - in pel units
+                                        short*    s_mv_y,        // <--> in: search center (y) / out: motion vector (y) - in pel units
+                                        int       search_range,  // <--  1-d search range in pel units
+                                        int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                                        int       lambda_factor // <--  lagrangian parameter for determining motion cost
+                                        );
+ 
+ void UMHEXSetMotionVectorPredictor (short pmv[2], char **refPic, short ***tmp_mv,
+                                     short  ref_frame, int list, int block_x, int block_y,
+                                     int blockshape_x, int blockshape_y, int *search_range);
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/me_umhexsmp.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/me_umhexsmp.c:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/me_umhexsmp.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,1251 ----
+ 
+ /*!
+  *************************************************************************************
+  *
+  * \file me_umhexsmp.c
+  *
+  * \brief
+  *   Fast integer pixel and sub pixel motion estimation
+  *   Improved and simplified from the original UMHexagonS algorithms
+  *   See JVT-P021 for details
+  *
+  * \author
+  *    Main contributors: (see contributors.h for copyright, address and affiliation details)
+  *    - Zhibo Chen                      <chenzhibo at tsinghua.org.cn>
+  *    - JianFeng Xu                     <fenax at video.mdc.tsinghua.edu.cn>
+  *    - Wenfang Fu                      <fwf at video.mdc.tsinghua.edu.cn>
+  *
+  *    - Xiaoquan Yi                     <xyi at engr.scu.edu>
+  *    - Jun Zhang                       <jzhang2 at engr.scu.edu>
+  *
+  * \date
+  *    6. Nov. 2006
+  *************************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <string.h>
+ #include <limits.h>
+ 
+ #include "global.h"
+ #include "memalloc.h"
+ #include "me_umhexsmp.h"
+ #include "refbuf.h"
+ #include "me_distortion.h"
+ 
+ extern  int *byte_abs;
+ extern  int *mvbits;
+ 
+ static const short Diamond_X[4]      = {-1, 1, 0, 0};
+ static const short Diamond_Y[4]      = { 0, 0,-1, 1};
+ static const short Hexagon_X[6]      = {-2, 2,-1, 1,-1, 1};
+ static const short Hexagon_Y[6]      = { 0, 0,-2, 2, 2,-2};
+ static const short Big_Hexagon_X[16] = {-4, 4, 0, 0,-4, 4,-4, 4,-4, 4,-4, 4,-2, 2,-2, 2};
+ static const short Big_Hexagon_Y[16] = { 0, 0,-4, 4,-1, 1, 1,-1,-2, 2, 2,-2,-3, 3, 3,-3};
+ 
+ const short block_type_shift_factor[8] = {0, 0, 1, 1, 2, 3, 3, 1}; // last one relaxed to 1 instead 4
+ 
+ static StorablePicture *ref_pic_ptr;
+ static int dist_method;
+ 
+ extern short*  spiral_hpel_search_x;
+ extern short*  spiral_hpel_search_y;
+ extern short*  spiral_search_x;
+ extern short*  spiral_search_y;
+ 
+ // Macro for motion estimation cost computation per match
+ #define SEARCH_ONE_PIXEL_HELPER                                                           \
+   if(iabs(cand_x - center_x) <= search_range && iabs(cand_y - center_y) <= search_range)  \
+   {                                                                                       \
+     mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);             \
+     mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,             \
+           min_mcost - mcost, (cand_x + IMG_PAD_SIZE) << 2, (cand_y + IMG_PAD_SIZE) << 2); \
+     if (mcost < min_mcost)                                                                \
+     {                                                                                     \
+       best_x    = cand_x;                                                                 \
+       best_y    = cand_y;                                                                 \
+       min_mcost = mcost;                                                                  \
+     }                                                                                     \
+ }
+ 
+ #define SEARCH_ONE_PIXEL_BIPRED_HELPER                                                    \
+ if (iabs(cand_x - center2_x) <= search_range && iabs(cand_y - center2_y) <= search_range) \
+ {                                                                                         \
+   mcost  = MV_COST (lambda_factor, mvshift, center1_x, center1_y, pred_x1, pred_y1);      \
+   mcost += MV_COST (lambda_factor, mvshift, cand_x,    cand_y,    pred_x2, pred_y2);      \
+   if (mcost < min_mcost)                                                                  \
+   {                                                                                       \
+     mcost  += computeBiPred(cur_pic, blocksize_y, blocksize_x,                            \
+                            min_mcost - mcost,                                             \
+                            (center1_x << 2) + IMG_PAD_SIZE_TIMES4,                        \
+                            (center1_y << 2) + IMG_PAD_SIZE_TIMES4,                        \
+                            (cand_x << 2) + IMG_PAD_SIZE_TIMES4,                           \
+                            (cand_y << 2) + IMG_PAD_SIZE_TIMES4);                          \
+     if (mcost < min_mcost)                                                                \
+     {                                                                                     \
+       best_x = cand_x;                                                                    \
+       best_y = cand_y;                                                                    \
+       min_mcost = mcost;                                                                  \
+     }                                                                                     \
+   }                                                                                       \
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set thresholds for fast motion estimation
+  *    Those thresholds may be adjusted to trade off rate-distortion
+  *    performance and simplified UMHEX speed
+  ************************************************************************
+  */
+ void smpUMHEX_init()
+ {
+   SymmetricalCrossSearchThreshold1 =  800;
+   SymmetricalCrossSearchThreshold2 = 7000;
+   ConvergeThreshold                = 1000;
+   SubPelThreshold1                 = 1000;
+   SubPelThreshold3                 =  400;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocation of space for fast motion estimation
+  ************************************************************************
+  */
+ int smpUMHEX_get_mem()
+ {
+   int memory_size = 0;
+   if (NULL==(smpUMHEX_flag_intra = calloc((img->width>>4)+1, sizeof(byte))))
+     no_mem_exit("smpUMHEX_get_mem: smpUMHEX_flag_intra");
+ 
+   memory_size += get_mem3Dint(&smpUMHEX_l0_cost, 9, img->height/4, img->width/4);
+   memory_size += get_mem3Dint(&smpUMHEX_l1_cost, 9, img->height/4, img->width/4);
+   memory_size += get_mem2D(&smpUMHEX_SearchState, 7, 7);
+ 
+   return memory_size;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free space for fast motion estimation
+  ************************************************************************
+  */
+ void smpUMHEX_free_mem()
+ {
+   free_mem3Dint(smpUMHEX_l0_cost, 9);
+   free_mem3Dint(smpUMHEX_l1_cost, 9);
+   free_mem2D(smpUMHEX_SearchState);
+ 
+   free (smpUMHEX_flag_intra);
+ }
+ 
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Fast integer pixel block motion estimation
+ ************************************************************************
+ */
+ int                                     //  ==> minimum motion cost after search
+ smpUMHEXIntegerPelBlockMotionSearch (
+                                      imgpel   *orig_pic,      // <--  not used
+                                      short     ref,           // <--  reference frame (0... or -1 (backward))
+                                      int       list,          // <--  reference picture list
+                                      int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                                      int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                                      int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                                      short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                                      short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                                      short*    mv_x,          //  --> motion vector (x) - in pel units
+                                      short*    mv_y,          //  --> motion vector (y) - in pel units
+                                      int       search_range,  // <--  1-d search range in pel units
+                                      int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                                      int       lambda_factor) // <--  lagrangian parameter for determining motion cost
+ {
+   int   list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))?
+                          img->current_mb_nr%2 ? 4 : 2 : 0;
+   int   mvshift       = 2;                                        // motion vector shift for getting sub-pel units
+   int   blocksize_y   = input->blc_size[blocktype][1];            // vertical block size
+   int   blocksize_x   = input->blc_size[blocktype][0];            // horizontal block size
+   int   pred_x        = (pic_pix_x << mvshift) + pred_mv_x;       // predicted position x (in sub-pel units)
+   int   pred_y        = (pic_pix_y << mvshift) + pred_mv_y;       // predicted position y (in sub-pel units)
+   int   center_x      = pic_pix_x + *mv_x;                        // center position x (in pel units)
+   int   center_y      = pic_pix_y + *mv_y;                        // center position y (in pel units)
+   int   best_x        = 0, best_y = 0;
+   int   search_step, iYMinNow, iXMinNow;
+   int   cand_x, cand_y, mcost;
+ 
+   unsigned short        i, m;
+ 
+ 
+   //===== Use weighted Reference for ME ====
+ 
+   int  apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+     (active_pps->weighted_bipred_idc && (img->type == B_SLICE))) && input->UseWeightedReferenceME;
+ 
+   dist_method = F_PEL + 3 * apply_weights;
+ 
+   ref_pic_ptr = listX[list+list_offset][ref];
+ 
+   // Note that following seem to be universal for all functions and could be moved to a separate, clean public function in me_distortion.c
+   ref_pic_sub.luma = ref_pic_ptr->imgY_sub;
+   img_width  = ref_pic_ptr->size_x;
+   img_height = ref_pic_ptr->size_y;
+   width_pad  = ref_pic_ptr->size_x_pad;
+   height_pad = ref_pic_ptr->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight_luma = wp_weight[list + list_offset][ref][0];
+     offset_luma = wp_offset[list + list_offset][ref][0];
+   }
+ 
+   if (ChromaMEEnable)
+   {
+     ref_pic_sub.crcb[0] = ref_pic_ptr->imgUV_sub[0];
+     ref_pic_sub.crcb[1] = ref_pic_ptr->imgUV_sub[1];
+     width_pad_cr  = ref_pic_ptr->size_x_cr_pad;
+     height_pad_cr = ref_pic_ptr->size_y_cr_pad;
+ 
+     if (apply_weights)
+     {
+       weight_cr[0] = wp_weight[list + list_offset][ref][1];
+       weight_cr[1] = wp_weight[list + list_offset][ref][2];
+       offset_cr[0] = wp_offset[list + list_offset][ref][1];
+       offset_cr[1] = wp_offset[list + list_offset][ref][2];
+     }
+   }
+ 
+   //===== set function for getting reference picture lines =====
+   if ((center_x > search_range) && (center_x < img_width - 1 - search_range - blocksize_x) &&
+     (center_y > search_range) && (center_y < img_height - 1 - search_range - blocksize_y))
+   {
+     ref_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     ref_access_method = UMV_ACCESS;
+   }
+ 
+   //check the center median predictor
+   cand_x = center_x ;
+   cand_y = center_y ;
+   mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);
+ 
+ 
+   mcost += computeUniPred[dist_method](orig_pic, blocksize_y,blocksize_x, min_mcost - mcost,
+     (cand_x << 2) + IMG_PAD_SIZE_TIMES4,  (cand_y << 2) + IMG_PAD_SIZE_TIMES4);
+ 
+   if (mcost < min_mcost)
+   {
+     min_mcost = mcost;
+     best_x    = cand_x;
+     best_y    = cand_y;
+   }
+ 
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+   if ((0 != pred_mv_x) || (0 != pred_mv_y))
+   {
+     cand_x = pic_pix_x;
+     cand_y = pic_pix_y;
+     SEARCH_ONE_PIXEL_HELPER
+   }
+ 
+   // If the min_mcost is small enough, do a local search then terminate
+   // Ihis is good for stationary or quasi-stationary areas
+   if (min_mcost < (ConvergeThreshold>>block_type_shift_factor[blocktype]))
+   {
+     for (m = 0; m < 4; m++)
+     {
+       cand_x = iXMinNow + Diamond_X[m];
+       cand_y = iYMinNow + Diamond_Y[m];
+       SEARCH_ONE_PIXEL_HELPER
+     }
+     *mv_x = (short) (best_x - pic_pix_x);
+     *mv_y = (short) (best_y - pic_pix_y);
+     return min_mcost;
+   }
+ 
+   // Small local search
+   for (m = 0; m < 4; m++)
+   {
+     cand_x = iXMinNow + Diamond_X[m];
+     cand_y = iYMinNow + Diamond_Y[m];
+     SEARCH_ONE_PIXEL_HELPER
+   }
+ 
+   // First_step: Symmetrical-cross search
+   // If distortion is large, use large shapes. Otherwise, compact shapes are faster
+   if ( (blocktype == 1 &&
+     min_mcost > (SymmetricalCrossSearchThreshold1>>block_type_shift_factor[blocktype])) ||
+     (min_mcost > (SymmetricalCrossSearchThreshold2>>block_type_shift_factor[blocktype])) )
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+ 
+     for(i = 1; i <= search_range/2; i++)
+     {
+       search_step = (i<<1) - 1;
+       cand_x = iXMinNow + search_step;
+       cand_y = iYMinNow;
+       SEARCH_ONE_PIXEL_HELPER
+ 
+       cand_x = iXMinNow - search_step;
+       SEARCH_ONE_PIXEL_HELPER
+ 
+       cand_x = iXMinNow;
+       cand_y = iYMinNow + search_step;
+       SEARCH_ONE_PIXEL_HELPER
+ 
+       cand_y = iYMinNow - search_step;
+       SEARCH_ONE_PIXEL_HELPER
+     }
+ 
+     // Hexagon Search
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 6; m++)
+     {
+       cand_x = iXMinNow + Hexagon_X[m];
+       cand_y = iYMinNow + Hexagon_Y[m];
+       SEARCH_ONE_PIXEL_HELPER
+     }
+     // Multi Big Hexagon Search
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for(i = 1; i <= search_range/4; i++)
+     {
+       for (m = 0; m < 16; m++)
+       {
+         cand_x = iXMinNow + Big_Hexagon_X[m]*i;
+         cand_y = iYMinNow + Big_Hexagon_Y[m]*i;
+         SEARCH_ONE_PIXEL_HELPER
+       }
+     }
+   }
+ 
+   // Search up_layer predictor for non 16x16 blocks
+   if (blocktype > 1)
+   {
+     cand_x = pic_pix_x + (smpUMHEX_pred_MV_uplayer_X/4);
+     cand_y = pic_pix_y + (smpUMHEX_pred_MV_uplayer_Y/4);
+     SEARCH_ONE_PIXEL_HELPER
+   }
+ 
+   if(center_x != pic_pix_x || center_y != pic_pix_y)
+   {
+     cand_x = pic_pix_x;
+     cand_y = pic_pix_y;
+     SEARCH_ONE_PIXEL_HELPER
+ 
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     // Local diamond search
+     for (m = 0; m < 4; m++)
+     {
+       cand_x = iXMinNow + Diamond_X[m];
+       cand_y = iYMinNow + Diamond_Y[m];
+       SEARCH_ONE_PIXEL_HELPER
+     }
+   }
+ 
+   // If the minimum cost is small enough, do a local search
+   // and finish the search here
+   if (min_mcost < (ConvergeThreshold>>block_type_shift_factor[blocktype]))
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 4; m++)
+     {
+       cand_x = iXMinNow + Diamond_X[m];
+       cand_y = iYMinNow + Diamond_Y[m];
+       SEARCH_ONE_PIXEL_HELPER
+     }
+     *mv_x = (short) (best_x - pic_pix_x);
+     *mv_y = (short) (best_y - pic_pix_y);
+     return min_mcost;
+   }
+ 
+   //second_step:  Extended Hexagon-based Search
+   for(i = 0; i < search_range; i++)
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 6; m++)
+     {
+       cand_x = iXMinNow + Hexagon_X[m];
+       cand_y = iYMinNow + Hexagon_Y[m];
+       SEARCH_ONE_PIXEL_HELPER
+     }
+     // The minimum cost point happens in the center
+     if (best_x == iXMinNow && best_y == iYMinNow)
+     {
+       break;
+     }
+   }
+ 
+   //third_step: Small diamond search
+   for(i = 0; i < search_range; i++)
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 4; m++)
+     {
+       cand_x = iXMinNow + Diamond_X[m];
+       cand_y = iYMinNow + Diamond_Y[m];
+       SEARCH_ONE_PIXEL_HELPER
+     }
+ 
+     // The minimum cost point happens in the center
+     if (best_x == iXMinNow && best_y == iYMinNow)
+     {
+       break;
+     }
+   }
+ 
+   *mv_x = (short) (best_x - pic_pix_x);
+   *mv_y = (short) (best_y - pic_pix_y);
+   return min_mcost;
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Sub pixel block motion search enhanced
+  ***********************************************************************
+  */
+ int                                               //  ==> minimum motion cost after search
+ smpUMHEXFullSubPelBlockMotionSearch (imgpel*   orig_pic,      // <--  original pixel values for the AxB block
+                                      short     ref,           // <--  reference frame (0... or -1 (backward))
+                                      int       list,          // <--  reference picture list
+                                      int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                                      int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                                      int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                                      short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                                      short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                                      short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                                      short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                                      int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                                      int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                                      int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                                      int       lambda_factor  // <--  lagrangian parameter for determining motion cost
+                                      )
+ {
+   int   pos, best_pos, mcost;
+ 
+   int   cand_mv_x, cand_mv_y;
+ 
+   int   check_position0 = (!input->rdopt && img->type!=B_SLICE && ref==0 && blocktype==1 && *mv_x==0 && *mv_y==0);
+   int   blocksize_x     = input->blc_size[blocktype][0];
+   int   blocksize_y     = input->blc_size[blocktype][1];
+   int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<< 2);
+   int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<< 2);
+   int   max_pos2        = ( !start_me_refinement_hp ? imax(1,search_pos2) : search_pos2);
+   int   list_offset     = img->mb_data[img->current_mb_nr].list_offset;
+   int  apply_weights = ( (active_pps->weighted_pred_flag  &&
+     (img->type == P_SLICE || img->type == SP_SLICE)) ||
+     (active_pps->weighted_bipred_idc && (img->type == B_SLICE)) )
+     && input->UseWeightedReferenceME;
+   int   cmv_x, cmv_y;
+ 
+   StorablePicture *ref_picture = listX[list+list_offset][ref];
+ 
+   int max_pos_x4 = ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
+   int max_pos_y4 = ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
+ 
+   dist_method = Q_PEL + 3 * apply_weights;
+ 
+   ref_pic_sub.luma = ref_picture->imgY_sub;
+   img_width  = ref_picture->size_x;
+   img_height = ref_picture->size_y;
+   width_pad  = ref_picture->size_x_pad;
+   height_pad = ref_picture->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight_luma = wp_weight[list + list_offset][ref][0];
+     offset_luma = wp_offset[list + list_offset][ref][0];
+   }
+ 
+   if (ChromaMEEnable)
+   {
+     ref_pic_sub.crcb[0] = ref_picture->imgUV_sub[0];
+     ref_pic_sub.crcb[1] = ref_picture->imgUV_sub[1];
+     width_pad_cr  = ref_picture->size_x_cr_pad;
+     height_pad_cr = ref_picture->size_y_cr_pad;
+ 
+     if (apply_weights)
+     {
+       weight_cr[0] = wp_weight[list + list_offset][ref][1];
+       weight_cr[1] = wp_weight[list + list_offset][ref][2];
+       offset_cr[0] = wp_offset[list + list_offset][ref][1];
+       offset_cr[1] = wp_offset[list + list_offset][ref][2];
+     }
+   }
+ 
+   /*********************************
+    *****                       *****
+    *****  HALF-PEL REFINEMENT  *****
+    *****                       *****
+    *********************************/
+ 
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 1) &&
+     (pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 1)   )
+   {
+     ref_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     ref_access_method = UMV_ACCESS;
+   }
+ 
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = start_me_refinement_hp; pos < max_pos2; pos++)
+   {
+     cand_mv_x = *mv_x + (spiral_hpel_search_x[pos]);    // quarter-pel units
+     cand_mv_y = *mv_y + (spiral_hpel_search_y[pos]);    // quarter-pel units
+ 
+     //----- set motion vector cost -----
+     mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+ 
+     if (mcost >= min_mcost) continue;
+ 
+     cmv_x = cand_mv_x + pic4_pix_x;
+     cmv_y = cand_mv_y + pic4_pix_y;
+ 
+     mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x, min_mcost - mcost, cmv_x, cmv_y);
+ 
+     if (pos==0 && check_position0)
+     {
+       mcost -= WEIGHTED_COST (lambda_factor, 16);
+     }
+ 
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+     if (min_mcost < (SubPelThreshold3>>block_type_shift_factor[blocktype]))
+     {
+       break;
+     }
+   }
+ 
+   if (best_pos)
+   {
+     *mv_x += (spiral_hpel_search_x [best_pos]);
+     *mv_y += (spiral_hpel_search_y [best_pos]);
+   }
+ 
+   if ((*mv_x == 0) && (*mv_y == 0) && (pred_mv_x == 0 && pred_mv_y == 0) &&
+     (min_mcost < (SubPelThreshold1>>block_type_shift_factor[blocktype])) )
+   {
+     best_pos = 0;
+     return min_mcost;
+   }
+ 
+   if ( !start_me_refinement_qp )
+     min_mcost = INT_MAX;
+ 
+   /************************************
+    *****                          *****
+    *****  QUARTER-PEL REFINEMENT  *****
+    *****                          *****
+    ************************************/
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + *mv_x > 0) && (pic4_pix_x + *mv_x < max_pos_x4) &&
+     (pic4_pix_y + *mv_y > 0) && (pic4_pix_y + *mv_y < max_pos_y4)   )
+   {
+     ref_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     ref_access_method = UMV_ACCESS;
+   }
+ 
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = start_me_refinement_qp; pos < search_pos4; pos++)
+   {
+     cand_mv_x = *mv_x + spiral_search_x[pos];    // quarter-pel units
+     cand_mv_y = *mv_y + spiral_search_y[pos];    // quarter-pel units
+ 
+     //----- set motion vector cost -----
+     mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+ 
+     if (mcost >= min_mcost) continue;
+ 
+     cmv_x = cand_mv_x + pic4_pix_x;
+     cmv_y = cand_mv_y + pic4_pix_y;
+ 
+     mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x, min_mcost - mcost, cmv_x, cmv_y);
+ 
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+     if (min_mcost < (SubPelThreshold3>>block_type_shift_factor[blocktype]))
+     {
+       break;
+     }
+   }
+ 
+   if (best_pos)
+   {
+     *mv_x += spiral_search_x [best_pos];
+     *mv_y += spiral_search_y [best_pos];
+   }
+ 
+   //===== return minimum motion cost =====
+   return min_mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Fast sub pixel block motion estimation
+  ************************************************************************
+  */
+ int                                     //  ==> minimum motion cost after search
+ smpUMHEXSubPelBlockMotionSearch  (
+                                   imgpel* orig_pic,        // <--  original pixel values for the AxB block
+                                   short     ref,           // <--  reference frame (0... or -1 (backward))
+                                   int       list,          // <--  reference picture list
+                                   int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                                   int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                                   int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                                   short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                                   short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                                   short*    mv_x,          // <--> in: search center (x) / out: MV (x) - in pel units
+                                   short*    mv_y,          // <--> in: search center (y) / out: MV (y) - in pel units
+                                   int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                                   int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                                   int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                                   int       lambda_factor) // <--  lagrangian parameter for determining motion cost
+ {
+   int   mcost;
+   int   cand_mv_x, cand_mv_y;
+ 
+   int   list_offset     = ((img->MbaffFrameFlag) &&
+     (img->mb_data[img->current_mb_nr].mb_field)) ?
+     img->current_mb_nr%2 ? 4 : 2 : 0;
+   StorablePicture *ref_picture = listX[list+list_offset][ref];
+ 
+   short mv_shift        = 0;
+   short blocksize_x     = (short) input->blc_size[blocktype][0];
+   short blocksize_y     = (short) input->blc_size[blocktype][1];
+   int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<<2);
+   int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<<2);
+   short max_pos_x4      = (short) ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
+   short max_pos_y4      = (short) ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
+ 
+   int   iXMinNow, iYMinNow;
+   short dynamic_search_range, i, m;
+   int   currmv_x = 0, currmv_y = 0;
+   int   pred_frac_mv_x,pred_frac_mv_y,abort_search;
+   int   pred_frac_up_mv_x, pred_frac_up_mv_y;
+   int   apply_weights = ( (active_pps->weighted_pred_flag  &&
+     (img->type == P_SLICE || img->type == SP_SLICE)) ||
+     (active_pps->weighted_bipred_idc && (img->type == B_SLICE)) )
+     && input->UseWeightedReferenceME;
+ 
+   dist_method = Q_PEL + 3 * apply_weights;
+   ref_pic_sub.luma = ref_pic_ptr->imgY_sub;
+   img_width  = ref_pic_ptr->size_x;
+   img_height = ref_pic_ptr->size_y;
+   width_pad  = ref_pic_ptr->size_x_pad;
+   height_pad = ref_pic_ptr->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight_luma = wp_weight[list + list_offset][ref][0];
+     offset_luma = wp_offset[list + list_offset][ref][0];
+   }
+ 
+   if (ChromaMEEnable)
+   {
+     ref_pic_sub.crcb[0] = ref_pic_ptr->imgUV_sub[0];
+     ref_pic_sub.crcb[1] = ref_pic_ptr->imgUV_sub[1];
+     width_pad_cr  = ref_pic_ptr->size_x_cr_pad;
+     height_pad_cr = ref_pic_ptr->size_y_cr_pad;
+ 
+     if (apply_weights)
+     {
+       weight_cr[0] = wp_weight[list + list_offset][ref][1];
+       weight_cr[1] = wp_weight[list + list_offset][ref][2];
+       offset_cr[0] = wp_offset[list + list_offset][ref][1];
+       offset_cr[1] = wp_offset[list + list_offset][ref][2];
+     }
+   }
+ 
+ 
+   if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 1) &&
+     (pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 1))
+   {
+     ref_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     ref_access_method = UMV_ACCESS;
+   }
+ 
+   dynamic_search_range = 3;
+   pred_frac_mv_x = (pred_mv_x - *mv_x) % 4;
+   pred_frac_mv_y = (pred_mv_y - *mv_y) % 4;
+ 
+   pred_frac_up_mv_x = (smpUMHEX_pred_MV_uplayer_X - *mv_x) % 4;
+   pred_frac_up_mv_y = (smpUMHEX_pred_MV_uplayer_Y - *mv_y) % 4;
+ 
+   memset(smpUMHEX_SearchState[0], 0,
+     (2*dynamic_search_range+1)*(2*dynamic_search_range+1));
+ 
+   smpUMHEX_SearchState[dynamic_search_range][dynamic_search_range] = 1;
+   if( !start_me_refinement_hp )
+   {
+     cand_mv_x = *mv_x;
+     cand_mv_y = *mv_y;
+     mcost   = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+     mcost   += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
+       min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       currmv_x  = cand_mv_x;
+       currmv_y  = cand_mv_y;
+     }
+   }
+   else
+   {
+     currmv_x = *mv_x;
+     currmv_y = *mv_y;
+   }
+ 
+   // If the min_mcost is small enough and other statistics are positive,
+   // better to stop the search now
+   if ( ((*mv_x) == 0) && ((*mv_y) == 0) &&
+     (pred_frac_mv_x == 0 && pred_frac_up_mv_x == 0) &&
+     (pred_frac_mv_y == 0 && pred_frac_up_mv_y == 0) &&
+     (min_mcost < (SubPelThreshold1>>block_type_shift_factor[blocktype])) )
+   {
+     *mv_x = (short) currmv_x;
+     *mv_y = (short) currmv_y;
+     return min_mcost;
+   }
+ 
+   if(pred_frac_mv_x || pred_frac_mv_y)
+   {
+     cand_mv_x = *mv_x + pred_frac_mv_x;
+     cand_mv_y = *mv_y + pred_frac_mv_y;
+     mcost   = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+     mcost   += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
+       min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+ 
+     smpUMHEX_SearchState[cand_mv_y -*mv_y + dynamic_search_range][cand_mv_x - *mv_x + dynamic_search_range] = 1;
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       currmv_x  = cand_mv_x;
+       currmv_y  = cand_mv_y;
+     }
+   }
+ 
+   // Multiple small diamond search
+   for(i = 0; i < dynamic_search_range; i++)
+   {
+     abort_search = 1;
+ 
+     iXMinNow = currmv_x;
+     iYMinNow = currmv_y;
+     for (m = 0; m < 4; m++)
+     {
+       cand_mv_x = iXMinNow + Diamond_X[m];
+       cand_mv_y = iYMinNow + Diamond_Y[m];
+ 
+       if(iabs(cand_mv_x - *mv_x) <= dynamic_search_range && iabs(cand_mv_y - *mv_y) <= dynamic_search_range)
+       {
+         if(!smpUMHEX_SearchState[cand_mv_y - *mv_y + dynamic_search_range][cand_mv_x - *mv_x + dynamic_search_range])
+         {
+           mcost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+           mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
+             min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
+ 
+           smpUMHEX_SearchState[cand_mv_y - *mv_y + dynamic_search_range][cand_mv_x - *mv_x + dynamic_search_range] = 1;
+ 
+           if (mcost < min_mcost)
+           {
+             min_mcost    = mcost;
+             currmv_x     = cand_mv_x;
+             currmv_y     = cand_mv_y;
+             abort_search = 0;
+           }
+           if (min_mcost < (SubPelThreshold3>>block_type_shift_factor[blocktype]))
+           {
+             *mv_x = (short) currmv_x;
+             *mv_y = (short) currmv_y;
+             return min_mcost;
+           }
+         }
+       }
+     }
+     // If the minimum cost point is in the center, break out the loop
+     if (abort_search)
+     {
+       break;
+     }
+   }
+ 
+   *mv_x = (short) currmv_x;
+   *mv_y = (short) currmv_y;
+   return min_mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    smpUMHEXBipredIntegerPelBlockMotionSearch: fast pixel block motion search for bipred mode
+  *
+  ************************************************************************
+  */
+ int                                                           //  ==> minimum motion cost after search
+ smpUMHEXBipredIntegerPelBlockMotionSearch (imgpel* cur_pic,  // <--  original pixel values for the AxB block
+                                      short     ref,           // <--  reference frame (0... or -1 (backward))
+                                      int       list,
+                                      int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                                      int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                                      int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                                      short     pred_mv_x1,    // <--  motion vector predictor (x) in sub-pel units
+                                      short     pred_mv_y1,    // <--  motion vector predictor (y) in sub-pel units
+                                      short     pred_mv_x2,    // <--  motion vector predictor (x) in sub-pel units
+                                      short     pred_mv_y2,    // <--  motion vector predictor (y) in sub-pel units
+                                      short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                                      short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                                      short*    s_mv_x,        // <--> in: search center (x) / out: motion vector (x) - in pel units
+                                      short*    s_mv_y,        // <--> in: search center (y) / out: motion vector (y) - in pel units
+                                      int       search_range,  // <--  1-d search range in pel units
+                                      int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                                      int       lambda_factor) // <--  lagrangian parameter for determining motion cost
+ {
+   int   mvshift       = 2;                              // motion vector shift for getting sub-pel units
+ 
+   int   search_step, iYMinNow, iXMinNow;
+   int   i, m;
+   int   cand_x, cand_y, mcost;
+   int   list_offset   = img->mb_data[img->current_mb_nr].list_offset;
+   int   blocksize_y   = input->blc_size[blocktype][1];  // vertical block size
+   int   blocksize_x   = input->blc_size[blocktype][0];  // horizontal block size
+   int   pred_x1       = (pic_pix_x << 2) + pred_mv_x1;  // predicted position x (in sub-pel units)
+   int   pred_y1       = (pic_pix_y << 2) + pred_mv_y1;  // predicted position y (in sub-pel units)
+   int   pred_x2       = (pic_pix_x << 2) + pred_mv_x2;  // predicted position x (in sub-pel units)
+   int   pred_y2       = (pic_pix_y << 2) + pred_mv_y2;  // predicted position y (in sub-pel units)
+   short center2_x     = pic_pix_x + *mv_x;              // center position x (in pel units)
+   short center2_y     = pic_pix_y + *mv_y;              // center position y (in pel units)
+   short center1_x     = pic_pix_x + *s_mv_x;            // mvx of second pred (in pel units)
+   short center1_y     = pic_pix_y + *s_mv_y;            // mvy of second pred (in pel units)
+   int   best_x        = center2_x;
+   int   best_y        = center2_y;
+ 
+   short apply_weights = (active_pps->weighted_bipred_idc>0);
+   short offset1 = (apply_weights ? (list == 0?  wp_offset[list_offset    ][ref][0]:  wp_offset[list_offset + 1][0  ][ref]) : 0);
+   short offset2 = (apply_weights ? (list == 0?  wp_offset[list_offset + 1][ref][0]:  wp_offset[list_offset    ][0  ][ref]) : 0);
+ 
+   ref_pic1_sub.luma = listX[list + list_offset][ref]->imgY_sub;
+   ref_pic2_sub.luma = listX[list == 0 ? 1 + list_offset: list_offset][ 0 ]->imgY_sub;
+   img_width  = listX[list + list_offset][ref]->size_x;
+   img_height = listX[list + list_offset][ref]->size_y;
+   width_pad  = listX[list + list_offset][ref]->size_x_pad;
+   height_pad = listX[list + list_offset][ref]->size_y_pad;
+ 
+   if (apply_weights)
+   {
+     weight1 = list == 0 ? wbp_weight[list_offset         ][ref][0][0] : wbp_weight[list_offset + LIST_1][0  ][ref][0];
+     weight2 = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][0] : wbp_weight[list_offset         ][0  ][ref][0];
+     offsetBi=(offset1 + offset2 + 1)>>1;
+     computeBiPred = computeBiPredSAD2; //ME only supports SAD computations
+   }
+   else
+   {
+     weight1 = 1<<luma_log_weight_denom;
+     weight2 = 1<<luma_log_weight_denom;
+     offsetBi = 0;
+     computeBiPred = computeBiPredSAD1; //ME only supports SAD computations
+   }
+ 
+   if (ChromaMEEnable )
+   {
+     ref_pic1_sub.crcb[0] = listX[list + list_offset][ref]->imgUV_sub[0];
+     ref_pic1_sub.crcb[1] = listX[list + list_offset][ref]->imgUV_sub[1];
+     ref_pic2_sub.crcb[0] = listX[list == 0 ? 1 + list_offset: list_offset][ 0 ]->imgUV_sub[0];
+     ref_pic2_sub.crcb[1] = listX[list == 0 ? 1 + list_offset: list_offset][ 0 ]->imgUV_sub[1];
+     width_pad_cr  = listX[list + list_offset][ref]->size_x_cr_pad;
+     height_pad_cr = listX[list + list_offset][ref]->size_y_cr_pad;
+     if (apply_weights)
+     {
+       weight1_cr[0] = list == 0 ? wbp_weight[list_offset         ][ref][0][1] : wbp_weight[list_offset + LIST_1][0  ][ref][1];
+       weight1_cr[1] = list == 0 ? wbp_weight[list_offset         ][ref][0][2] : wbp_weight[list_offset + LIST_1][0  ][ref][2];
+       weight2_cr[0] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][1] : wbp_weight[list_offset         ][0  ][ref][1];
+       weight2_cr[1] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][2] : wbp_weight[list_offset         ][0  ][ref][2];
+       offsetBi_cr[0] = (list == 0)
+         ? (wp_offset[list_offset         ][ref][1] + wp_offset[list_offset + LIST_1][ref][1] + 1) >> 1
+         : (wp_offset[list_offset + LIST_1][0  ][1] + wp_offset[list_offset         ][0  ][1] + 1) >> 1;
+       offsetBi_cr[1] = (list == 0)
+         ? (wp_offset[list_offset         ][ref][2] + wp_offset[list_offset + LIST_1][ref][2] + 1) >> 1
+         : (wp_offset[list_offset + LIST_1][0  ][2] + wp_offset[list_offset         ][0  ][2] + 1) >> 1;
+     }
+     else
+     {
+       weight1_cr[0] = 1<<chroma_log_weight_denom;
+       weight1_cr[1] = 1<<chroma_log_weight_denom;
+       weight2_cr[0] = 1<<chroma_log_weight_denom;
+       weight2_cr[1] = 1<<chroma_log_weight_denom;
+       offsetBi_cr[0] = 0;
+       offsetBi_cr[1] = 0;
+     }
+   }
+ 
+   // Set function for getting reference picture lines
+   if ((center2_x > search_range) && (center2_x < img_width -1-search_range-blocksize_x) &&
+       (center2_y > search_range) && (center2_y < img_height-1-search_range-blocksize_y))
+   {
+     bipred2_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred2_access_method = UMV_ACCESS;
+   }
+ 
+   // Set function for getting reference picture lines
+   if ((center1_y > search_range) && (center1_y < img_height-1-search_range-blocksize_y))
+   {
+     bipred1_access_method = FAST_ACCESS;
+   }
+   else
+   {
+     bipred1_access_method = UMV_ACCESS;
+   }
+ 
+   // Check the center median predictor
+   cand_x = center2_x ;
+   cand_y = center2_y ;
+   mcost  = MV_COST (lambda_factor, mvshift, center1_x, center1_y, pred_x1, pred_y1);
+   mcost += MV_COST (lambda_factor, mvshift, cand_x,    cand_y,    pred_x2, pred_y2);
+ 
+   mcost += computeBiPred(cur_pic,
+                         blocksize_y, blocksize_x, INT_MAX,
+                         (center1_x << 2) + IMG_PAD_SIZE_TIMES4,
+                         (center1_y << 2) + IMG_PAD_SIZE_TIMES4,
+                         (cand_x << 2) + IMG_PAD_SIZE_TIMES4,
+                         (cand_y << 2) + IMG_PAD_SIZE_TIMES4);
+ 
+   if (mcost < min_mcost)
+   {
+     min_mcost = mcost;
+     best_x = cand_x;
+     best_y = cand_y;
+   }
+ 
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+   if (0 != pred_mv_x1 || 0 != pred_mv_y1 || 0 != pred_mv_x2 || 0 != pred_mv_y2)
+   {
+     cand_x = pic_pix_x;
+     cand_y = pic_pix_y;
+     SEARCH_ONE_PIXEL_BIPRED_HELPER;
+   }
+ 
+   // If the min_mcost is small enough, do a local search then terminate
+   // This is good for stationary or quasi-stationary areas
+   if ((min_mcost<<3) < (ConvergeThreshold>>(block_type_shift_factor[blocktype])))
+   {
+     for (m = 0; m < 4; m++)
+     {
+       cand_x = iXMinNow + Diamond_X[m];
+       cand_y = iYMinNow + Diamond_Y[m];
+       SEARCH_ONE_PIXEL_BIPRED_HELPER;
+     }
+     *mv_x = best_x - pic_pix_x;
+     *mv_y = best_y - pic_pix_y;
+     return min_mcost;
+   }
+ 
+   // Small local search
+   for (m = 0; m < 4; m++)
+   {
+     cand_x = iXMinNow + Diamond_X[m];
+     cand_y = iYMinNow + Diamond_Y[m];
+     SEARCH_ONE_PIXEL_BIPRED_HELPER;
+   }
+ 
+   // First_step: Symmetrical-cross search
+   // If distortion is large, use large shapes. Otherwise, compact shapes are faster
+   if ((blocktype == 1 &&
+     (min_mcost<<2) > (SymmetricalCrossSearchThreshold1>>block_type_shift_factor[blocktype])) ||
+     ((min_mcost<<2) > (SymmetricalCrossSearchThreshold2>>block_type_shift_factor[blocktype])))
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+ 
+     for (i = 1; i <= search_range / 2; i++)
+     {
+       search_step = (i<<1) - 1;
+       cand_x = iXMinNow + search_step;
+       cand_y = iYMinNow;
+       SEARCH_ONE_PIXEL_BIPRED_HELPER
+ 
+       cand_x = iXMinNow - search_step;
+       SEARCH_ONE_PIXEL_BIPRED_HELPER
+ 
+       cand_x = iXMinNow;
+       cand_y = iYMinNow + search_step;
+       SEARCH_ONE_PIXEL_BIPRED_HELPER
+ 
+       cand_y = iYMinNow - search_step;
+       SEARCH_ONE_PIXEL_BIPRED_HELPER
+     }
+ 
+     // Hexagon Search
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 6; m++)
+     {
+       cand_x = iXMinNow + Hexagon_X[m];
+       cand_y = iYMinNow + Hexagon_Y[m];
+       SEARCH_ONE_PIXEL_BIPRED_HELPER
+     }
+     // Multi Big Hexagon Search
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for(i = 1; i <= search_range / 4; i++)
+     {
+       for (m = 0; m < 16; m++)
+       {
+         cand_x = iXMinNow + Big_Hexagon_X[m] * i;
+         cand_y = iYMinNow + Big_Hexagon_Y[m] * i;
+         SEARCH_ONE_PIXEL_BIPRED_HELPER
+       }
+     }
+   }
+ 
+   // Search up_layer predictor for non 16x16 blocks
+   if (blocktype > 1)
+   {
+     cand_x = pic_pix_x + (smpUMHEX_pred_MV_uplayer_X / 4);
+     cand_y = pic_pix_y + (smpUMHEX_pred_MV_uplayer_Y / 4);
+     SEARCH_ONE_PIXEL_BIPRED_HELPER
+   }
+ 
+   if(center2_x != pic_pix_x || center2_y != pic_pix_y)
+   {
+     cand_x = pic_pix_x;
+     cand_y = pic_pix_y;
+     SEARCH_ONE_PIXEL_BIPRED_HELPER
+ 
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     // Local diamond search
+     for (m = 0; m < 4; m++)
+     {
+       cand_x = iXMinNow + Diamond_X[m];
+       cand_y = iYMinNow + Diamond_Y[m];
+       SEARCH_ONE_PIXEL_BIPRED_HELPER
+     }
+   }
+ 
+   // If the minimum cost is small enough, do a local search
+   // and finish the search here
+   if ((min_mcost<<2) < (ConvergeThreshold>>block_type_shift_factor[blocktype]))
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 4; m++)
+     {
+       cand_x = iXMinNow + Diamond_X[m];
+       cand_y = iYMinNow + Diamond_Y[m];
+       SEARCH_ONE_PIXEL_BIPRED_HELPER
+     }
+     *mv_x = (short) (best_x - pic_pix_x);
+     *mv_y = (short) (best_y - pic_pix_y);
+     return min_mcost;
+   }
+ 
+   // Second_step:  Extended Hexagon-based Search
+   for (i = 0; i < search_range; i++)
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 6; m++)
+     {
+       cand_x = iXMinNow + Hexagon_X[m];
+       cand_y = iYMinNow + Hexagon_Y[m];
+       SEARCH_ONE_PIXEL_BIPRED_HELPER
+     }
+     // The minimum cost point happens in the center
+     if (best_x == iXMinNow && best_y == iYMinNow)
+     {
+       break;
+     }
+   }
+ 
+   // Third_step: Small diamond search
+   for (i = 0; i < search_range; i++)
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 4; m++)
+     {
+       cand_x = iXMinNow + Diamond_X[m];
+       cand_y = iYMinNow + Diamond_Y[m];
+       SEARCH_ONE_PIXEL_BIPRED_HELPER
+     }
+ 
+     // The minimum cost point happens in the center
+     if (best_x == iXMinNow && best_y == iYMinNow)
+     {
+       break;
+     }
+   }
+ 
+   *mv_x = (short) (best_x - pic_pix_x);
+   *mv_y = (short) (best_y - pic_pix_y);
+   return min_mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set neighbouring block mode (intra/inter)
+  *    used for fast motion estimation
+  ************************************************************************
+  */
+ void smpUMHEX_decide_intrabk_SAD()
+ {
+   if (img->type != I_SLICE)
+   {
+     if (img->pix_x == 0 && img->pix_y == 0)
+     {
+       smpUMHEX_flag_intra_SAD = 0;
+     }
+     else if (img->pix_x == 0)
+     {
+       smpUMHEX_flag_intra_SAD = smpUMHEX_flag_intra[(img->pix_x)>>4];
+     }
+     else if (img->pix_y == 0)
+     {
+       smpUMHEX_flag_intra_SAD = smpUMHEX_flag_intra[((img->pix_x)>>4)-1];
+     }
+     else
+     {
+       smpUMHEX_flag_intra_SAD = ((smpUMHEX_flag_intra[(img->pix_x)>>4])||
+         (smpUMHEX_flag_intra[((img->pix_x)>>4)-1])||
+         (smpUMHEX_flag_intra[((img->pix_x)>>4)+1])) ;
+     }
+   }
+   return;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set cost to zero if neighbouring block is intra
+  *    used for fast motion estimation
+  ************************************************************************
+  */
+ void smpUMHEX_skip_intrabk_SAD(int best_mode, int ref_max)
+ {
+   short i, j, k;
+ 
+   if (img->number > 0)
+   {
+     smpUMHEX_flag_intra[(img->pix_x)>>4] = (best_mode == 9 || best_mode == 10) ? 1 : 0;
+   }
+ 
+   if (img->type != I_SLICE  && (best_mode == 9 || best_mode == 10))
+   {
+     for (i=0; i < 4; i++)
+     {
+       for (j=0; j < 4; j++)
+       {
+         for (k=0; k < 9;k++)
+         {
+           smpUMHEX_l0_cost[k][j][i] = 0;
+           smpUMHEX_l1_cost[k][j][i] = 0;
+         }
+       }
+     }
+   }
+   return;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set up prediction MV and prediction up layer cost
+  *    used for fast motion estimation
+  ************************************************************************
+  */
+ void smpUMHEX_setup(short ref,
+                           int list,
+                           int block_y,
+                           int block_x,
+                           int blocktype,
+                           short ******all_mv)
+ {
+   if (blocktype > 6)
+   {
+     smpUMHEX_pred_MV_uplayer_X = all_mv[block_y][block_x][list][ref][5][0];
+     smpUMHEX_pred_MV_uplayer_Y = all_mv[block_y][block_x][list][ref][5][1];
+   }
+   else if (blocktype > 4)
+   {
+     smpUMHEX_pred_MV_uplayer_X = all_mv[block_y][block_x][list][ref][4][0];
+     smpUMHEX_pred_MV_uplayer_Y = all_mv[block_y][block_x][list][ref][4][1];
+   }
+   else if (blocktype == 4)
+   {
+     smpUMHEX_pred_MV_uplayer_X = all_mv[block_y][block_x][list][ref][2][0];
+     smpUMHEX_pred_MV_uplayer_Y = all_mv[block_y][block_x][list][ref][2][1];
+   }
+   else if (blocktype > 1)
+   {
+     smpUMHEX_pred_MV_uplayer_X = all_mv[block_y][block_x][list][ref][1][0];
+     smpUMHEX_pred_MV_uplayer_Y = all_mv[block_y][block_x][list][ref][1][1];
+   }
+ 
+   if (blocktype > 1)
+   {
+     if (blocktype > 6)
+     {
+       smpUMHEX_pred_SAD_uplayer = (list==1) ?
+         (smpUMHEX_l1_cost[5][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
+         : (smpUMHEX_l0_cost[5][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
+       smpUMHEX_pred_SAD_uplayer /= 2;
+     }
+     else if (blocktype > 4)
+     {
+       smpUMHEX_pred_SAD_uplayer = (list==1) ?
+         (smpUMHEX_l1_cost[4][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
+         : (smpUMHEX_l0_cost[4][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
+       smpUMHEX_pred_SAD_uplayer /= 2;
+     }
+     else if (blocktype == 4)
+     {
+       smpUMHEX_pred_SAD_uplayer = (list==1) ?
+         (smpUMHEX_l1_cost[2][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
+         : (smpUMHEX_l0_cost[2][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
+       smpUMHEX_pred_SAD_uplayer /= 2;
+     }
+     else
+     {
+       smpUMHEX_pred_SAD_uplayer = (list==1) ?
+         (smpUMHEX_l1_cost[1][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
+         : (smpUMHEX_l0_cost[1][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
+       smpUMHEX_pred_SAD_uplayer /= 2;
+     }
+ 
+     smpUMHEX_pred_SAD_uplayer = smpUMHEX_flag_intra_SAD ? 0 : smpUMHEX_pred_SAD_uplayer;
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/me_umhexsmp.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/me_umhexsmp.h:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/me_umhexsmp.h	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,123 ----
+ 
+ /*!
+  *************************************************************************************
+  *
+  * \file me_umhexsmp.h
+  *
+  * \brief
+  *   Fast integer pixel and sub pixel motion estimation
+  *   Improved and simplified from the original UMHexagonS algorithms
+  *   See JVT-P021 for details
+  *
+  * \author
+  *    Main contributors: (see contributors.h for copyright, address and affiliation details)
+  *    - Zhibo Chen                      <chenzhibo at tsinghua.org.cn>
+  *    - JianFeng Xu                     <fenax at video.mdc.tsinghua.edu.cn>
+  *    - Wenfang Fu                      <fwf at video.mdc.tsinghua.edu.cn>
+  *
+  *    - Xiaoquan Yi                     <xyi at engr.scu.edu>
+  *    - Jun Zhang                       <jzhang2 at engr.scu.edu>
+  *
+  * \date
+  *    6. Nov. 2006
+  *************************************************************************************
+  */
+ 
+ #ifndef _ME_UMHEXSMP_H_
+ #define _ME_UMHEXSMP_H_
+ 
+ #include "mbuffer.h"
+ 
+ unsigned short  SymmetricalCrossSearchThreshold1;
+ unsigned short  SymmetricalCrossSearchThreshold2;
+ unsigned short  ConvergeThreshold;
+ unsigned short  SubPelThreshold1;
+ unsigned short  SubPelThreshold3;
+ 
+ byte  **smpUMHEX_SearchState;          //state for fractional pel search
+ int  ***smpUMHEX_l0_cost;       //store SAD information needed for forward median and uplayer prediction
+ int  ***smpUMHEX_l1_cost;       //store SAD information needed for backward median and uplayer prediction
+ byte   *smpUMHEX_flag_intra;
+ int     smpUMHEX_flag_intra_SAD;
+ 
+ int     smpUMHEX_pred_SAD_uplayer;     // Up layer SAD prediction
+ short   smpUMHEX_pred_MV_uplayer_X;    // Up layer MV predictor X-component
+ short   smpUMHEX_pred_MV_uplayer_Y;    // Up layer MV predictor Y-component
+ 
+ void    smpUMHEX_init(void);
+ int     smpUMHEX_get_mem(void);
+ void    smpUMHEX_free_mem(void);
+ void    smpUMHEX_decide_intrabk_SAD(void);
+ void    smpUMHEX_skip_intrabk_SAD(int, int);
+ void    smpUMHEX_setup(short, int, int, int, int, short ******);
+ 
+ int                                     //  ==> minimum motion cost after search
+ smpUMHEXIntegerPelBlockMotionSearch (
+                imgpel   *orig_pic,      // <--  not used
+                short     ref,           // <--  reference frame (0... or -1 (backward))
+                int       list,          // <--  reference picture list
+                int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                short*    mv_x,          //  --> motion vector (x) - in pel units
+                short*    mv_y,          //  --> motion vector (y) - in pel units
+                int       search_range,  // <--  1-d search range in pel units
+                int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                int       lambda_factor);// <--  lagrangian parameter for determining motion cost
+ 
+ int                                     //  ==> minimum motion cost after search
+ smpUMHEXSubPelBlockMotionSearch  (
+                imgpel* orig_pic,        // <--  original pixel values for the AxB block
+                short     ref,           // <--  reference frame (0... or -1 (backward))
+                int       list,          // <--  reference picture list
+                int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                int       lambda_factor);// <--  lagrangian parameter for determining motion cost
+ 
+ int                                               //  ==> minimum motion cost after search
+ smpUMHEXFullSubPelBlockMotionSearch (imgpel*   orig_pic,      // <--  original pixel values for the AxB block
+                          short     ref,           // <--  reference frame (0... or -1 (backward))
+                          int       list,          // <--  reference picture list
+                          int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                          int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                          int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                          short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                          short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                          short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                          short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                          int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                          int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                          int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                          int       lambda_factor);// <--  lagrangian parameter for determining motion cost
+ 
+ int                                                           //  ==> minimum motion cost after search
+ smpUMHEXBipredIntegerPelBlockMotionSearch (imgpel* cur_pic,  // <--  original pixel values for the AxB block
+                                            short     ref,           // <--  reference frame (0... or -1 (backward))
+                                            int       list,
+                                            int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                                            int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                                            int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                                            short     pred_mv_x1,    // <--  motion vector predictor (x) in sub-pel units
+                                            short     pred_mv_y1,    // <--  motion vector predictor (y) in sub-pel units
+                                            short     pred_mv_x2,    // <--  motion vector predictor (x) in sub-pel units
+                                            short     pred_mv_y2,    // <--  motion vector predictor (y) in sub-pel units
+                                            short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                                            short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                                            short*    s_mv_x,        // <--> in: search center (x) / out: motion vector (x) - in pel units
+                                            short*    s_mv_y,        // <--> in: search center (y) / out: motion vector (y) - in pel units
+                                            int       search_range,  // <--  1-d search range in pel units
+                                            int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                                            int       lambda_factor);// <--  lagrangian parameter for determining motion cost
+ 
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/memalloc.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/memalloc.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/memalloc.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,1145 @@
+
+/*!
+ ************************************************************************
+ * \file  memalloc.c
+ *
+ * \brief
+ *    Memory allocation and free helper functions
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ ************************************************************************
+ */
+
+#include <stdlib.h>
+
+#include "global.h"
+
+ /*!
+ ************************************************************************
+ * \brief
+ *    Initialize 2-dimensional top and bottom field to point to the proper
+ *    lines in frame
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************/
+int init_top_bot_planes(imgpel **imgFrame, int rows, int columns, imgpel ***imgTopField, imgpel ***imgBotField)
+{
+  int i;
+
+  if((*imgTopField   = (imgpel**)calloc(rows/2,        sizeof(imgpel*))) == NULL)
+    no_mem_exit("init_top_bot_planes: imgTopField");
+
+  if((*imgBotField   = (imgpel**)calloc(rows/2,        sizeof(imgpel*))) == NULL)
+    no_mem_exit("init_top_bot_planes: imgBotField");
+
+  for(i=0 ; i<rows/2 ; i++)
+  {
+    (*imgTopField)[i] =  imgFrame[2*i  ];
+    (*imgBotField)[i] =  imgFrame[2*i+1];
+  }
+
+  return rows*sizeof(imgpel*);
+}
+
+ /*!
+ ************************************************************************
+ * \brief
+ *    free 2-dimensional top and bottom fields without freeing target memory
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************/
+void free_top_bot_planes(imgpel **imgTopField, imgpel **imgBotField)
+{
+  free (imgTopField);
+  free (imgBotField);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 2D memory array -> imgpel array2D[rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************/
+int get_mem2Dpel(imgpel ***array2D, int rows, int columns)
+{
+  int i;
+
+  if((*array2D      = (imgpel**)calloc(rows,        sizeof(imgpel*))) == NULL)
+    no_mem_exit("get_mem2Dpel: array2D");
+  if(((*array2D)[0] = (imgpel* )calloc(rows*columns,sizeof(imgpel ))) == NULL)
+    no_mem_exit("get_mem2Dpel: array2D");
+
+  for(i=1 ; i<rows ; i++)
+    (*array2D)[i] =  (*array2D)[i-1] + columns  ;
+
+  return rows*columns*sizeof(imgpel);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 3D memory array -> imgpel array3D[frames][rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dpel(imgpel ****array3D, int frames, int rows, int columns)
+{
+  int  j;
+
+  if(((*array3D) = (imgpel***)calloc(frames,sizeof(imgpel**))) == NULL)
+    no_mem_exit("get_mem3Dpel: array3D");
+
+  for(j=0;j<frames;j++)
+    get_mem2Dpel( (*array3D)+j, rows, columns ) ;
+
+  return frames*rows*columns*sizeof(imgpel);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 4D memory array -> imgpel array4D[sub_x][sub_y][rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem4Dpel(imgpel *****array4D, int sub_x, int sub_y, int rows, int columns)
+{
+  int  j;
+
+  if(((*array4D) = (imgpel****)calloc(sub_x,sizeof(imgpel***))) == NULL)
+    no_mem_exit("get_mem4Dpel: array4D");
+
+  for(j=0;j<sub_x;j++)
+    get_mem3Dpel( (*array4D)+j, sub_y, rows, columns ) ;
+
+  return sub_x*sub_y*rows*columns*sizeof(imgpel);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 5D memory array -> imgpel array5D[dims][sub_x][sub_y][rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem5Dpel(imgpel ******array5D, int dims, int sub_x, int sub_y, int rows, int columns)
+{
+  int  j;
+
+  if(((*array5D) = (imgpel*****)calloc(dims,sizeof(imgpel****))) == NULL)
+    no_mem_exit("get_mem5Dpel: array5D");
+
+  for(j=0;j<dims;j++)
+    get_mem4Dpel( (*array5D)+j, sub_x, sub_y, rows, columns ) ;
+
+  return dims*sub_x*sub_y*rows*columns*sizeof(imgpel);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 2D memory array
+ *    which was allocated with get_mem2Dpel()
+ ************************************************************************
+ */
+void free_mem2Dpel(imgpel **array2D)
+{
+  if (array2D)
+  {
+    if (array2D[0])
+      free (array2D[0]);
+    else error ("free_mem2Dpel: trying to free unused memory",100);
+
+    free (array2D);
+  } else
+  {
+    error ("free_mem2Dpel: trying to free unused memory",100);
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 3D memory array
+ *    which was allocated with get_mem3Dpel()
+ ************************************************************************
+ */
+void free_mem3Dpel(imgpel ***array3D, int frames)
+{
+  int i;
+
+  if (array3D)
+  {
+    for (i=0;i<frames;i++)
+    {
+      free_mem2Dpel(array3D[i]);
+    }
+   free (array3D);
+  } else
+  {
+    error ("free_mem3Dpel: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 4D memory array
+ *    which was allocated with get_mem4Dpel()
+ ************************************************************************
+ */
+void free_mem4Dpel(imgpel ****array4D, int sub_x, int sub_y)
+{
+  int i;
+
+  if (array4D)
+  {
+    for (i=0;i<sub_x;i++)
+      free_mem3Dpel(array4D[i], sub_y);
+    free (array4D);
+  }
+  else
+  {
+    error ("free_mem4Dpel: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 5D memory array
+ *    which was allocated with get_mem5Dpel()
+ ************************************************************************
+ */
+void free_mem5Dpel(imgpel *****array5D, int dims, int sub_x, int sub_y)
+{
+  int i;
+
+  if (array5D)
+  {
+    for (i=0;i<dims;i++)
+      free_mem4Dpel(array5D[i], sub_x, sub_y);
+    free (array5D);
+  }
+  else
+  {
+    error ("free_mem5Dpel: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 2D memory array -> unsigned char array2D[rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************/
+int get_mem2D(byte ***array2D, int rows, int columns)
+{
+  int i;
+
+  if((*array2D      = (byte**)calloc(rows,        sizeof(byte*))) == NULL)
+    no_mem_exit("get_mem2D: array2D");
+  if(((*array2D)[0] = (byte* )calloc(columns*rows,sizeof(byte ))) == NULL)
+    no_mem_exit("get_mem2D: array2D");
+
+  for(i=1;i<rows;i++)
+    (*array2D)[i] = (*array2D)[i-1] + columns ;
+
+  return rows*columns;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 2D memory array -> int array2D[rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Dint(int ***array2D, int rows, int columns)
+{
+  int i;
+
+  if((*array2D      = (int**)calloc(rows,        sizeof(int*))) == NULL)
+    no_mem_exit("get_mem2Dint: array2D");
+  if(((*array2D)[0] = (int* )calloc(rows*columns,sizeof(int ))) == NULL)
+    no_mem_exit("get_mem2Dint: array2D");
+
+  for(i=1 ; i<rows ; i++)
+    (*array2D)[i] =  (*array2D)[i-1] + columns  ;
+
+  return rows*columns*sizeof(int);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 2D memory array -> int64 array2D[rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Dint64(int64 ***array2D, int rows, int columns)
+{
+  int i;
+
+  if((*array2D      = (int64**)calloc(rows,        sizeof(int64*))) == NULL)
+    no_mem_exit("get_mem2Dint64: array2D");
+  if(((*array2D)[0] = (int64* )calloc(rows*columns,sizeof(int64 ))) == NULL)
+    no_mem_exit("get_mem2Dint64: array2D");
+
+  for(i=1 ; i<rows ; i++)
+    (*array2D)[i] =  (*array2D)[i-1] + columns  ;
+
+  return rows*columns*sizeof(int64);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 3D memory array -> unsigned char array3D[frames][rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem3D(byte ****array3D, int frames, int rows, int columns)
+{
+  int  j;
+
+  if(((*array3D) = (byte***)calloc(frames,sizeof(byte**))) == NULL)
+    no_mem_exit("get_mem3D: array3D");
+
+  for(j=0;j<frames;j++)
+    get_mem2D( (*array3D)+j, rows, columns ) ;
+
+  return frames*rows*columns;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 3D memory array -> int array3D[frames][rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dint(int ****array3D, int frames, int rows, int columns)
+{
+  int  j;
+
+  if(((*array3D) = (int***)calloc(frames,sizeof(int**))) == NULL)
+    no_mem_exit("get_mem3Dint: array3D");
+
+  for(j=0;j<frames;j++)
+    get_mem2Dint( (*array3D)+j, rows, columns ) ;
+
+  return frames*rows*columns*sizeof(int);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 3D memory array -> int64 array3D[frames][rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dint64(int64 ****array3D, int frames, int rows, int columns)
+{
+  int  j;
+
+  if(((*array3D) = (int64***)calloc(frames,sizeof(int64**))) == NULL)
+    no_mem_exit("get_mem3Dint64: array3D");
+
+  for(j=0;j<frames;j++)
+    get_mem2Dint64( (*array3D)+j, rows, columns ) ;
+
+  return frames*rows*columns*sizeof(int64);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 4D memory array -> int array4D[frames][rows][columns][component]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem4Dint(int *****array4D, int idx, int frames, int rows, int columns )
+{
+  int  j;
+
+  if(((*array4D) = (int****)calloc(idx,sizeof(int***))) == NULL)
+    no_mem_exit("get_mem4Dint: array4D");
+
+  for(j=0;j<idx;j++)
+    get_mem3Dint( (*array4D)+j, frames, rows, columns ) ;
+
+  return idx*frames*rows*columns*sizeof(int);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 5D memory array -> int array5D[refs][blocktype][rows][columns][component]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem5Dint(int ******array5D, int refs, int blocktype, int rows, int columns, int component)
+{
+  int  j;
+
+  if(((*array5D) = (int*****)calloc(refs,sizeof(int****))) == NULL)
+    no_mem_exit("get_mem5Dint: array5D");
+
+  ;
+  for(j=0;j<refs;j++)
+    get_mem4Dint( (*array5D)+j, blocktype, rows, columns, component) ;
+
+  return refs*blocktype*rows*columns*component*sizeof(int);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 2D memory array
+ *    which was allocated with get_mem2D()
+ ************************************************************************
+ */
+void free_mem2D(byte **array2D)
+{
+  if (array2D)
+  {
+    if (array2D[0])
+      free (array2D[0]);
+    else error ("free_mem2D: trying to free unused memory",100);
+
+    free (array2D);
+  } else
+  {
+    error ("free_mem2D: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 2D memory array
+ *    which was allocated with get_mem2Dint()
+ ************************************************************************
+ */
+void free_mem2Dint(int **array2D)
+{
+  if (array2D)
+  {
+    if (array2D[0])
+      free (array2D[0]);
+    else error ("free_mem2Dint: trying to free unused memory",100);
+
+    free (array2D);
+
+  } else
+  {
+    error ("free_mem2Dint: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 2D memory array
+ *    which was allocated with get_mem2Dint64()
+ ************************************************************************
+ */
+void free_mem2Dint64(int64 **array2D)
+{
+  if (array2D)
+  {
+    if (array2D[0])
+      free (array2D[0]);
+    else error ("free_mem2Dint64: trying to free unused memory",100);
+
+    free (array2D);
+
+  } else
+  {
+    error ("free_mem2Dint64: trying to free unused memory",100);
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 3D memory array
+ *    which was allocated with get_mem3D()
+ ************************************************************************
+ */
+void free_mem3D(byte ***array3D, int frames)
+{
+  int i;
+
+  if (array3D)
+  {
+    for (i=0;i<frames;i++)
+    {
+      free_mem2D(array3D[i]);
+    }
+   free (array3D);
+  } else
+  {
+    error ("free_mem3D: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 3D memory array
+ *    which was allocated with get_mem3Dint()
+ ************************************************************************
+ */
+void free_mem3Dint(int ***array3D, int frames)
+{
+  int i;
+
+  if (array3D)
+  {
+    for (i=0;i<frames;i++)
+    {
+      free_mem2Dint(array3D[i]);
+    }
+   free (array3D);
+  } else
+  {
+    error ("free_mem3Dint: trying to free unused memory",100);
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 3D memory array
+ *    which was allocated with get_mem3Dint64()
+ ************************************************************************
+ */
+void free_mem3Dint64(int64 ***array3D, int frames)
+{
+  int i;
+
+  if (array3D)
+  {
+    for (i=0;i<frames;i++)
+    {
+      free_mem2Dint64(array3D[i]);
+    }
+   free (array3D);
+  } else
+  {
+    error ("free_mem3Dint64: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 4D memory array
+ *    which was allocated with get_mem4Dint()
+ ************************************************************************
+ */
+void free_mem4Dint(int ****array4D, int idx, int frames )
+{
+  int  j;
+
+  if (array4D)
+  {
+    for(j=0;j<idx;j++)
+      free_mem3Dint( array4D[j], frames) ;
+    free (array4D);
+  } else
+  {
+    error ("free_mem4Dint: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 5D int memory array
+ *    which was allocated with get_mem5Dint()
+ ************************************************************************
+ */
+void free_mem5Dint(int *****array5D, int refs, int blocktype, int height)
+{
+  int  j;
+
+  if (array5D)
+  {
+    for(j=0;j<refs;j++)
+      free_mem4Dint( array5D[j], blocktype, height) ;
+    free (array5D);
+  } else
+  {
+    error ("free_mem5Dint: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Exit program if memory allocation failed (using error())
+ * \param where
+ *    string indicating which memory allocation failed
+ ************************************************************************
+ */
+void no_mem_exit(char *where)
+{
+   snprintf(errortext, ET_SIZE, "Could not allocate memory: %s",where);
+   error (errortext, 100);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 2D short memory array -> short array2D[rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Dshort(short ***array2D, int rows, int columns)
+{
+  int i;
+
+  if((*array2D      = (short**)calloc(rows,        sizeof(short*))) == NULL)
+    no_mem_exit("get_mem2Dshort: array2D");
+  if(((*array2D)[0] = (short* )calloc(rows*columns,sizeof(short ))) == NULL)
+    no_mem_exit("get_mem2Dshort: array2D");
+
+  for(i=1 ; i<rows ; i++)
+    (*array2D)[i] =  (*array2D)[i-1] + columns  ;
+
+  return rows*columns*sizeof(short);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 3D memory short array -> short array3D[frames][rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Dshort(short ****array3D, int frames, int rows, int columns)
+{
+  int  j;
+
+  if(((*array3D) = (short***)calloc(frames,sizeof(short**))) == NULL)
+    no_mem_exit("get_mem3Dshort: array3D");
+
+  for(j=0;j<frames;j++)
+    get_mem2Dshort( (*array3D)+j, rows, columns ) ;
+
+  return frames*rows*columns*sizeof(short);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 4D memory short array -> short array3D[frames][rows][columns][component]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem4Dshort(short *****array4D, int idx, int frames, int rows, int columns )
+{
+  int  j;
+
+  if(((*array4D) = (short****)calloc(idx,sizeof(short**))) == NULL)
+    no_mem_exit("get_mem4Dshort: array4D");
+
+  for(j=0;j<idx;j++)
+    get_mem3Dshort( (*array4D)+j, frames, rows, columns ) ;
+
+  return idx*frames*rows*columns*sizeof(short);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 5D memory array -> short array5D[refs][blocktype][rows][columns][component]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem5Dshort(short ******array5D, int refs, int blocktype, int rows, int columns, int component)
+{
+  int  j;
+
+  if(((*array5D) = (short*****)calloc(refs,sizeof(short****))) == NULL)
+    no_mem_exit("get_mem5Dshort: array5D");
+
+  ;
+  for(j=0;j<refs;j++)
+    get_mem4Dshort( (*array5D)+j, blocktype, rows, columns, component) ;
+
+  return refs*blocktype*rows*columns*component*sizeof(short);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 6D memory array -> short array6D[list][refs][blocktype][rows][columns][component]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem6Dshort(short *******array6D, int list, int refs, int blocktype, int rows, int columns, int component)
+{
+  int  j;
+
+  if(((*array6D) = (short******)calloc(list,sizeof(short*****))) == NULL)
+    no_mem_exit("get_mem6Dshort: array6D");
+
+  ;
+  for(j=0;j<list;j++)
+    get_mem5Dshort( (*array6D)+j, refs, blocktype, rows, columns, component) ;
+
+  return list * refs * blocktype * rows * columns * component * sizeof(short);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 2D short memory array
+ *    which was allocated with get_mem2Dshort()
+ ************************************************************************
+ */
+void free_mem2Dshort(short **array2D)
+{
+  if (array2D)
+  {
+    if (array2D[0])
+      free (array2D[0]);
+    else error ("free_mem2Dshort: trying to free unused memory",100);
+
+    free (array2D);
+
+  } else
+  {
+    error ("free_mem2Dshort: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 3D short memory array
+ *    which was allocated with get_mem3Dshort()
+ ************************************************************************
+ */
+void free_mem3Dshort(short ***array3D, int frames)
+{
+  int i;
+
+  if (array3D)
+  {
+    for (i=0;i<frames;i++)
+    {
+      free_mem2Dshort(array3D[i]);
+    }
+   free (array3D);
+  } else
+  {
+    error ("free_mem3Dshort: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 4D short memory array
+ *    which was allocated with get_mem4Dshort()
+ ************************************************************************
+ */
+void free_mem4Dshort(short ****array4D, int idx, int frames )
+{
+  int  j;
+
+  if (array4D)
+  {
+    for(j=0;j<idx;j++)
+      free_mem3Dshort( array4D[j], frames) ;
+    free (array4D);
+  } else
+  {
+    error ("free_mem4Dshort: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 5D short memory array
+ *    which was allocated with get_mem5Dshort()
+ ************************************************************************
+ */
+void free_mem5Dshort(short *****array5D, int refs, int blocktype, int height)
+{
+  int  j;
+
+  if (array5D)
+  {
+    for(j=0;j<refs;j++)
+      free_mem4Dshort( array5D[j], blocktype, height) ;
+    free (array5D);
+  }
+  else
+  {
+    error ("free_mem5Dshort: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 6D short memory array
+ *    which was allocated with get_mem6Dshort()
+ ************************************************************************
+ */
+void free_mem6Dshort(short ******array6D, int list, int refs, int blocktype, int height)
+{
+  int  j;
+
+  if (array6D)
+  {
+    for(j=0;j<list;j++)
+      free_mem5Dshort( array6D[j], refs, blocktype, height) ;
+    free (array6D);
+  }
+  else
+  {
+    error ("free_mem6Dshort: trying to free unused memory",100);
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 2D memory array -> double array2D[rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Ddouble(double ***array2D, int rows, int columns)
+{
+  int i;
+
+  if((*array2D      = (double**)calloc(rows,        sizeof(double*))) == NULL)
+    no_mem_exit("get_mem2Ddouble: array2D");
+  if(((*array2D)[0] = (double* )calloc(rows*columns,sizeof(double ))) == NULL)
+    no_mem_exit("get_mem2Ddouble: array2D");
+
+  for(i=1 ; i<rows ; i++)
+    (*array2D)[i] =  (*array2D)[i-1] + columns  ;
+
+  return rows*columns*sizeof(double);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 2D memory array -> double array2D[rows][columns]
+ *    Note that array is shifted towards offset allowing negative values
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Ddb_offset(double ***array2D, int rows, int columns, int offset)
+{
+  int i;
+
+  if((*array2D      = (double**)calloc(rows,        sizeof(double*))) == NULL)
+    no_mem_exit("get_mem2Ddb_offset: array2D");
+  if(((*array2D)[0] = (double* )calloc(rows*columns,sizeof(double ))) == NULL)
+    no_mem_exit("get_mem2Ddb_offset: array2D");
+
+  (*array2D)[0] += offset;
+
+  for(i=1 ; i<rows ; i++)
+    (*array2D)[i] =  (*array2D)[i-1] + columns  ;
+
+  return rows*columns*sizeof(double);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 3D memory double array -> double array3D[pels][rows][columns]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem3Ddb_offset(double ****array3D, int rows, int columns, int pels, int offset)
+{
+  int  i,j;
+
+  if(((*array3D) = (double***)calloc(rows,sizeof(double**))) == NULL)
+    no_mem_exit("get_mem3Ddb_offset: array3D");
+
+  if(((*array3D)[0] = (double** )calloc(rows*columns,sizeof(double*))) == NULL)
+    no_mem_exit("get_mem3Ddb_offset: array3D");
+
+  (*array3D) [0] += offset;
+
+  for(i=1 ; i<rows ; i++)
+    (*array3D)[i] =  (*array3D)[i-1] + columns  ;
+
+  for (i = 0; i < rows; i++)
+    for (j = -offset; j < columns - offset; j++)
+      if(((*array3D)[i][j] = (double* )calloc(pels,sizeof(double))) == NULL)
+        no_mem_exit("get_mem3Ddb_offset: array3D");
+
+  return rows*columns*pels*sizeof(double);
+}
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 3D memory int array -> int array3D[rows][columns][pels]
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+
+
+int get_mem3Dint_offset(int ****array3D, int rows, int columns, int pels, int offset)
+{
+  int  i,j;
+
+  if(((*array3D) = (int***)calloc(rows,sizeof(int**))) == NULL)
+    no_mem_exit("get_mem3Dint_offset: array3D");
+
+  if(((*array3D)[0] = (int** )calloc(rows*columns,sizeof(int*))) == NULL)
+    no_mem_exit("get_mem3Dint_offset: array3D");
+
+  (*array3D) [0] += offset;
+
+  for(i=1 ; i<rows ; i++)
+    (*array3D)[i] =  (*array3D)[i-1] + columns  ;
+
+  for (i = 0; i < rows; i++)
+    for (j = -offset; j < columns - offset; j++)
+      if(((*array3D)[i][j] = (int* )calloc(pels,sizeof(int))) == NULL)
+        no_mem_exit("get_mem3Dint_offset: array3D");
+
+  return rows*columns*pels*sizeof(int);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 2D double memory array
+ *    which was allocated with get_mem2Ddouble()
+ ************************************************************************
+ */
+void free_mem2Ddouble(double **array2D)
+{
+  if (array2D)
+  {
+    if (array2D[0])
+      free (array2D[0]);
+    else 
+      error ("free_mem2Ddouble: trying to free unused memory",100);
+
+    free (array2D);
+
+  }
+  else
+  {
+    error ("free_mem2Ddouble: trying to free unused memory",100);
+  }
+}
+
+
+/*!
+************************************************************************
+* \brief
+*    free 2D double memory array (with offset)
+*    which was allocated with get_mem2Ddouble()
+************************************************************************
+*/
+void free_mem2Ddb_offset(double **array2D, int offset)
+{
+  if (array2D)
+  {
+    array2D[0] -= offset;
+    if (array2D[0])
+      free (array2D[0]);
+    else error ("free_mem2Ddb_offset: trying to free unused memory",100);
+
+    free (array2D);
+
+  } else
+  {
+    error ("free_mem2Ddb_offset: trying to free unused memory",100);
+  }
+}
+/*!
+ ************************************************************************
+ * \brief
+ *    free 3D memory array with offset
+ ************************************************************************
+ */
+void free_mem3Ddb_offset(double ***array3D, int rows, int columns, int offset)
+{
+  int i, j;
+
+  if (array3D)
+  {
+    for (i = 0; i < rows; i++)
+    {
+      for (j = -offset; j < columns - offset; j++)
+      {
+        if (array3D[i][j])
+          free(array3D[i][j]);
+        else
+          error ("free_mem3Ddb_offset: trying to free unused memory",100);
+      }
+    }
+    array3D[0] -= offset;
+    if (array3D[0])
+      free(array3D[0]);
+    else
+      error ("free_mem3Ddb_offset: trying to free unused memory",100);
+    free (array3D);
+  }
+  else
+  {
+    error ("free_mem3Ddb_offset: trying to free unused memory",100);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    free 3D memory array with offset
+ ************************************************************************
+ */
+void free_mem3Dint_offset(int ***array3D, int rows, int columns, int offset)
+{
+  int i, j;
+
+  if (array3D)
+  {
+    for (i = 0; i < rows; i++)
+    {
+      for (j = -offset; j < columns - offset; j++)
+      {
+        if (array3D[i][j])
+          free(array3D[i][j]);
+        else
+          error ("free_mem3Dint_offset: trying to free unused memory",100);
+      }
+    }
+    array3D[0] -= offset;
+    if (array3D[0])
+      free(array3D[0]);
+    else
+      error ("free_mem3Dint_offset: trying to free unused memory",100);
+    free (array3D);
+  }
+  else
+  {
+    error ("free_mem3Dint_offset: trying to free unused memory",100);
+  }
+}
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocate 2D memory array -> int array2D[rows][columns]
+ *    Note that array is shifted towards offset allowing negative values
+ *
+ * \par Output:
+ *    memory size in bytes
+ ************************************************************************
+ */
+int get_mem2Dint_offset(int ***array2D, int rows, int columns, int offset)
+{
+  int i;
+
+  if((*array2D      = (int**)calloc(rows, sizeof(int*))) == NULL)
+    no_mem_exit("get_mem2Dint: array2D");
+  if(((*array2D)[0] = (int* )calloc(rows*columns,sizeof(int))) == NULL)
+    no_mem_exit("get_mem2Dint: array2D");
+
+  (*array2D)[0] += offset;
+
+  for(i=1 ; i<rows ; i++)
+    (*array2D)[i] =  (*array2D)[i-1] + columns  ;
+
+  return rows*columns*sizeof(int);
+}
+
+
+/*!
+************************************************************************
+* \brief
+*    free 2D double memory array (with offset)
+*    which was allocated with get_mem2Ddouble()
+************************************************************************
+*/
+void free_mem2Dint_offset(int **array2D, int offset)
+{
+  if (array2D)
+  {
+    (*array2D)[0] -= offset;
+    if (array2D[0])
+      free (array2D[0]);
+    else 
+      error ("free_mem2Dint_offset: trying to free unused memory",100);
+
+    free (array2D);
+
+  } 
+  else
+  {
+    error ("free_mem2Dint_offset: trying to free unused memory",100);
+  }
+}


Index: llvm-test/MultiSource/Applications/JM/lencod/memalloc.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/memalloc.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/memalloc.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,80 @@
+
+/*!
+ ************************************************************************
+ * \file  memalloc.h
+ *
+ * \brief
+ *    Memory allocation and free helper funtions
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ ************************************************************************
+ */
+
+#ifndef _MEMALLOC_H_
+#define _MEMALLOC_H_
+
+int  get_mem2D(byte ***array2D, int rows, int columns);
+int  get_mem3D(byte ****array2D, int frames, int rows, int columns);
+
+int  get_mem2Dint(int ***array2D, int rows, int columns);
+int  get_mem3Dint(int ****array3D, int frames, int rows, int columns);
+int  get_mem4Dint(int *****array4D, int idx, int frames, int rows, int columns );
+int  get_mem5Dint(int ******array5D, int refs, int blocktype, int rows, int columns, int component);
+
+int  get_mem2Dint64(int64 ***array2D, int rows, int columns);
+int  get_mem3Dint64(int64 ****array3D, int frames, int rows, int columns);
+
+int  get_mem2Dshort(short ***array2D, int rows, int columns);
+int  get_mem3Dshort(short ****array3D, int frames, int rows, int columns);
+int  get_mem4Dshort(short *****array4D, int idx, int frames, int rows, int columns );
+int  get_mem5Dshort(short ******array5D, int refs, int blocktype, int rows, int columns, int component);
+int  get_mem6Dshort(short *******array6D, int list, int refs, int blocktype, int rows, int columns, int component);
+
+int get_mem2Dpel(imgpel ***array2D, int rows, int columns);
+int get_mem3Dpel(imgpel ****array3D, int frames, int rows, int columns);
+int get_mem4Dpel(imgpel *****array4D, int sub_x, int sub_y, int rows, int columns);
+int get_mem5Dpel(imgpel ******array5D, int dims, int sub_x, int sub_y, int rows, int columns);
+
+int get_mem2Ddouble(double ***array2D, int rows, int columns);
+int get_mem2Ddb_offset(double ***array2D, int rows, int columns, int offset);
+int get_mem3Ddb_offset(double ****array2D, int rows, int columns, int pels, int offset);
+
+int get_mem2Dint_offset(int ***array2D, int rows, int columns, int offset);
+int get_mem3Dint_offset(int ****array3D, int rows, int columns, int pels, int offset);
+
+void free_mem2D(byte **array2D);
+void free_mem3D(byte ***array2D, int frames);
+
+void free_mem2Dint(int **array2D);
+void free_mem3Dint(int ***array3D, int frames);
+void free_mem4Dint(int ****array4D, int idx, int frames);
+void free_mem5Dint(int *****array5D, int refs, int blocktype, int rows);
+
+void free_mem2Dint64(int64 **array2D);
+void free_mem3Dint64(int64 ***array3D64, int frames);
+
+void free_mem2Dshort(short **array2D);
+void free_mem3Dshort(short ***array3D, int frames);
+void free_mem4Dshort(short ****array4D, int idx, int frames);
+void free_mem5Dshort(short *****array5D, int refs, int blocktype, int height);
+void free_mem6Dshort(short ******array5D, int list, int refs, int blocktype, int height);
+
+void free_mem2Dpel(imgpel **array2D);
+void free_mem3Dpel(imgpel ***array3D, int frames);
+void free_mem4Dpel(imgpel ****array4D, int sub_x, int sub_y);
+void free_mem5Dpel(imgpel *****array5D, int dims, int sub_x, int sub_y);
+
+void free_mem2Ddouble(double **array2D);
+void free_mem2Ddb_offset(double **array2D, int offset);
+void free_mem2Dint_offset(int **array2D, int offset);
+void free_mem3Ddb_offset(double ***array3D, int rows, int columns, int offset);
+void free_mem3Dint_offset(int ***array3D, int rows, int columns, int offset);
+
+int init_top_bot_planes(imgpel **imgFrame, int rows, int columns, imgpel ***imgTopField, imgpel ***imgBotField);
+void free_top_bot_planes(imgpel **imgTopField, imgpel **imgBotField);
+
+
+void no_mem_exit(char *where);
+
+#endif


Index: llvm-test/MultiSource/Applications/JM/lencod/minmax.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/minmax.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/minmax.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,17 @@
+
+/*!
+ ************************************************************************
+ *  \file
+ *     minmax.h
+ *  \brief
+ *     defines min and max macros for non WIN32 environments
+ ************************************************************************
+ */
+#ifndef _MINMAX_
+#define _MINMAX_
+
+#if !defined(WIN32) || defined(__GNUC__)
+#endif
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/mode_decision.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/mode_decision.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/mode_decision.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,1008 @@
+
+/*!
+ ***************************************************************************
+ * \file mode_decision.c
+ *
+ * \brief
+ *    Main macroblock mode decision functions and helpers
+ *
+ **************************************************************************
+ */
+
+#include <stdlib.h>
+#include <math.h>
+#include <assert.h>
+#include <limits.h>
+#include <float.h>
+#include <memory.h>
+#include <string.h>
+
+#include "global.h"
+#include "rdopt_coding_state.h"
+#include "mb_access.h"
+#include "intrarefresh.h"
+#include "image.h"
+#include "transform8x8.h"
+#include "ratectl.h"
+#include "mode_decision.h"
+#include "fmo.h"
+#include "me_umhex.h"
+#include "me_umhexsmp.h"
+#include "macroblock.h"
+
+
+//==== MODULE PARAMETERS ====
+imgpel temp_imgY[16][16]; // to temp store the Y data for 8x8 transform
+
+const int  b8_mode_table[6]  = {0, 4, 5, 6, 7};         // DO NOT CHANGE ORDER !!!
+const int  mb_mode_table[9]  = {0, 1, 2, 3, P8x8, I16MB, I4MB, I8MB, IPCM}; // DO NOT CHANGE ORDER !!!
+
+double *mb16x16_cost_frame;
+
+/*!
+*************************************************************************************
+* \brief
+*    Update Rate Control Difference
+*************************************************************************************
+*/
+void rc_store_diff(int cpix_x, int cpix_y, imgpel prediction[16][16])
+{
+  int i, j;
+  int *iDst;
+  imgpel *Src1, *Src2;
+
+  for(j=0; j<MB_BLOCK_SIZE; j++)
+  {
+    iDst = diffy[j];
+    Src1 = imgY_org[cpix_y + j];
+    Src2 = prediction[j];
+    for (i=0; i<MB_BLOCK_SIZE; i++)
+    {
+      iDst[i] = Src1[cpix_x + i] - Src2[i];
+    }
+  }
+}
+
+
+/*!
+*************************************************************************************
+* \brief
+*    Fast intra decision
+*************************************************************************************
+*/
+void fast_mode_intra_decision(short *intra_skip, double min_rate)
+{
+  int i;
+  int mb_available_up, mb_available_left, mb_available_up_left;
+  long SBE;
+  double AR = 0, ABE = 0;
+  PixelPos up;       //!< pixel position p(0,-1)
+  PixelPos left[2];  //!< pixel positions p(-1, -1..0)
+
+  for (i=0;i<2;i++)
+  {
+    getNeighbour(img->current_mb_nr, -1 ,  i-1 , IS_LUMA, &left[i]);
+  }
+  getNeighbour(img->current_mb_nr, 0     ,  -1 , IS_LUMA, &up);
+
+  mb_available_up       = up.available;
+  mb_available_up_left  = left[0].available;
+  mb_available_left     = left[1].available;
+
+  AR=(1.0/384)*min_rate;
+
+  SBE = 0;
+
+  if( (img->mb_y != (int)img->FrameHeightInMbs-1) && (img->mb_x != (int)img->PicWidthInMbs-1) && mb_available_left && mb_available_up)
+  {
+    for(i = 0; i < MB_BLOCK_SIZE; i++)
+    {
+      SBE += iabs(imgY_org[img->opix_y][img->opix_x+i] - enc_picture->imgY[img->pix_y-1][img->pix_x+i]);
+      SBE += iabs(imgY_org[img->opix_y+i][img->opix_x] - enc_picture->imgY[img->pix_y+i][img->pix_x-1]);
+    }
+    for(i = 0; i < 8; i++)
+    {
+      SBE += iabs(imgUV_org[0][img->opix_c_y][img->opix_c_x+i] - enc_picture->imgUV[0][img->pix_c_y-1][img->pix_c_x+i]);
+      SBE += iabs(imgUV_org[0][img->opix_c_y+i][img->opix_c_x] - enc_picture->imgUV[0][img->pix_c_y+i][img->pix_c_x-1]);
+      SBE += iabs(imgUV_org[1][img->opix_c_y][img->opix_c_x+i] - enc_picture->imgUV[1][img->pix_c_y-1][img->pix_c_x+i]);
+      SBE += iabs(imgUV_org[1][img->opix_c_y+i][img->opix_c_x] - enc_picture->imgUV[1][img->pix_c_y+i][img->pix_c_x-1]);
+    }
+    ABE = 1.0/64 * SBE;
+  }
+  else  // Image boundary
+  {
+    ABE = 0;
+  }
+
+  if(AR <= ABE)
+  {
+    *intra_skip = 1;
+  }
+}
+
+/*!
+*************************************************************************************
+* \brief
+*    Initialize Encoding parameters for Macroblock
+*************************************************************************************
+*/
+void init_enc_mb_params(Macroblock* currMB, RD_PARAMS *enc_mb, int intra, int bslice)
+{
+  int mode;
+  int l,k;
+
+  //Setup list offset
+  enc_mb->list_offset[LIST_0] = LIST_0 + currMB->list_offset;
+  enc_mb->list_offset[LIST_1] = LIST_1 + currMB->list_offset;
+
+  enc_mb->curr_mb_field = ((img->MbaffFrameFlag)&&(currMB->mb_field));
+  enc_mb->best_ref[LIST_0] = 0;
+  enc_mb->best_ref[LIST_1] = -1;
+
+  // Set valid modes
+  enc_mb->valid[I8MB]  = input->Transform8x8Mode;
+  enc_mb->valid[I4MB]  = (input->Transform8x8Mode==2) ? 0:1;
+  enc_mb->valid[I16MB] = 1;
+  enc_mb->valid[IPCM]  = input->EnableIPCM;
+
+  enc_mb->valid[0]     = (!intra );
+  enc_mb->valid[1]     = (!intra && input->InterSearch16x16);
+  enc_mb->valid[2]     = (!intra && input->InterSearch16x8);
+  enc_mb->valid[3]     = (!intra && input->InterSearch8x16);
+  enc_mb->valid[4]     = (!intra && input->InterSearch8x8);
+  enc_mb->valid[5]     = (!intra && input->InterSearch8x4 && !(input->Transform8x8Mode==2));
+  enc_mb->valid[6]     = (!intra && input->InterSearch4x8 && !(input->Transform8x8Mode==2));
+  enc_mb->valid[7]     = (!intra && input->InterSearch4x4 && !(input->Transform8x8Mode==2));
+  enc_mb->valid[P8x8]  = (enc_mb->valid[4] || enc_mb->valid[5] || enc_mb->valid[6] || enc_mb->valid[7]);
+  enc_mb->valid[12]    = (img->type == SI_SLICE);
+
+  if(img->type==SP_SLICE)
+  {
+    if(si_frame_indicator)
+    {
+      enc_mb->valid[I8MB]  = 0;
+      enc_mb->valid[IPCM]  = 0;
+      enc_mb->valid[0]     = 0;
+      enc_mb->valid[1]     = 0;
+      enc_mb->valid[2]     = 0;
+      enc_mb->valid[3]     = 0;
+      enc_mb->valid[4]     = 0;
+      enc_mb->valid[5]     = 0;
+      enc_mb->valid[6]     = 0;
+      enc_mb->valid[7]     = 0;
+      enc_mb->valid[P8x8]  = 0;
+      enc_mb->valid[12]    = 0;
+      if(check_for_SI16())
+      {
+        enc_mb->valid[I4MB]  = 0;
+        enc_mb->valid[I16MB] = 1;
+      }
+      else
+      {
+        enc_mb->valid[I4MB]  = 1;
+        enc_mb->valid[I16MB] = 0;
+      }
+    }
+  }
+
+  if(img->type==SP_SLICE)
+  {
+    if(sp2_frame_indicator)
+    {
+      if(check_for_SI16())
+      {
+        enc_mb->valid[I8MB]  = 0;
+        enc_mb->valid[IPCM]  = 0;
+        enc_mb->valid[0]     = 0;
+        enc_mb->valid[1]     = 0;
+        enc_mb->valid[2]     = 0;
+        enc_mb->valid[3]     = 0;
+        enc_mb->valid[4]     = 0;
+        enc_mb->valid[5]     = 0;
+        enc_mb->valid[6]     = 0;
+        enc_mb->valid[7]     = 0;
+        enc_mb->valid[P8x8]  = 0;
+        enc_mb->valid[12]    = 0;
+        enc_mb->valid[I4MB]  = 0;
+        enc_mb->valid[I16MB] = 1;
+      }
+      else
+      {
+        enc_mb->valid[I8MB]  = 0;
+        enc_mb->valid[IPCM]  = 0;
+        enc_mb->valid[0]     = 0;
+        enc_mb->valid[I16MB] = 0;
+      }
+    }
+  }
+
+  //===== SET LAGRANGE PARAMETERS =====
+  // Note that these are now computed at the slice level to reduce
+  // computations and cleanup code.
+  if (bslice && img->nal_reference_idc)
+  {
+    enc_mb->lambda_md = img->lambda_md[5][img->qp];
+
+    enc_mb->lambda_me[F_PEL] = img->lambda_me[5][img->qp][F_PEL];
+    enc_mb->lambda_me[H_PEL] = img->lambda_me[5][img->qp][H_PEL];
+    enc_mb->lambda_me[Q_PEL] = img->lambda_mf[5][img->qp][Q_PEL];
+
+    enc_mb->lambda_mf[F_PEL] = img->lambda_mf[5][img->qp][F_PEL];
+    enc_mb->lambda_mf[H_PEL] = img->lambda_mf[5][img->qp][H_PEL];
+    enc_mb->lambda_mf[Q_PEL] = img->lambda_mf[5][img->qp][Q_PEL];
+
+  }
+  else
+  {
+    enc_mb->lambda_md = img->lambda_md[img->type][img->qp];
+
+    enc_mb->lambda_me[F_PEL] = img->lambda_me[img->type][img->qp][F_PEL];
+    enc_mb->lambda_me[H_PEL] = img->lambda_me[img->type][img->qp][H_PEL];
+    enc_mb->lambda_me[Q_PEL] = img->lambda_me[img->type][img->qp][Q_PEL];
+
+    enc_mb->lambda_mf[F_PEL] = img->lambda_mf[img->type][img->qp][F_PEL];
+    enc_mb->lambda_mf[H_PEL] = img->lambda_mf[img->type][img->qp][H_PEL];
+    enc_mb->lambda_mf[Q_PEL] = img->lambda_mf[img->type][img->qp][Q_PEL];
+  }
+
+  // Initialize bipredME decisions
+  for (mode=0; mode<MAXMODE; mode++)
+  {
+    img->bi_pred_me[mode]=0;
+  }
+
+  if (!img->MbaffFrameFlag)
+  {
+    for (l = LIST_0; l < BI_PRED; l++)
+    {
+      for(k = 0; k < listXsize[l]; k++)
+      {
+        listX[l][k]->chroma_vector_adjustment= 0;
+        if(img->structure == TOP_FIELD && img->structure != listX[l][k]->structure)
+          listX[l][k]->chroma_vector_adjustment = -2;
+        if(img->structure == BOTTOM_FIELD && img->structure != listX[l][k]->structure)
+          listX[l][k]->chroma_vector_adjustment = 2;
+      }
+    }
+  }
+  else
+  {
+    if (enc_mb->curr_mb_field)
+    {
+      for (l = enc_mb->list_offset[LIST_0]; l <= enc_mb->list_offset[LIST_1]; l++)
+      {
+        for(k = 0; k < listXsize[l]; k++)
+        {
+          listX[l][k]->chroma_vector_adjustment= 0;
+          if(img->current_mb_nr % 2 == 0 && listX[l][k]->structure == BOTTOM_FIELD)
+            listX[l][k]->chroma_vector_adjustment = -2;
+          if(img->current_mb_nr % 2 == 1 && listX[l][k]->structure == TOP_FIELD)
+            listX[l][k]->chroma_vector_adjustment = 2;
+        }
+      }
+    }
+    else
+    {
+      for (l = enc_mb->list_offset[LIST_0]; l <= enc_mb->list_offset[LIST_1]; l++)
+      {
+        for(k = 0; k < listXsize[l]; k++)
+          listX[l][k]->chroma_vector_adjustment= 0;
+      }
+    }
+  }
+}
+
+/*!
+*************************************************************************************
+* \brief
+*    computation of prediction list (including biprediction) cost
+*************************************************************************************
+*/
+void list_prediction_cost(int list, int block, int mode, RD_PARAMS enc_mb, int bmcost[5], char best_ref[2])
+{
+  short ref;
+  int mcost;
+  int cur_list = list < BI_PRED ? enc_mb.list_offset[list] : enc_mb.list_offset[LIST_0];
+
+  //--- get cost and reference frame for forward prediction ---
+
+  if (list < BI_PRED)
+  {
+    for (ref=0; ref < listXsize[cur_list]; ref++)
+    {
+      if (!img->checkref || list || ref==0 || (input->RestrictRef && CheckReliabilityOfRef (block, list, ref, mode)))
+      {
+        // limit the number of reference frames to 1 when switching SP frames are used
+        if((!input->sp2_frame_indicator && !input->sp_output_indicator)||
+          ((input->sp2_frame_indicator || input->sp_output_indicator) && (img->type!=P_SLICE && img->type!=SP_SLICE))||
+          ((input->sp2_frame_indicator || input->sp_output_indicator) && ((img->type==P_SLICE || img->type==SP_SLICE) &&(ref==0))))
+        {
+          mcost  = (input->rdopt
+            ? REF_COST (enc_mb.lambda_mf[Q_PEL], ref, cur_list)
+            : (int) (2 * enc_mb.lambda_me[Q_PEL] * imin(ref, 1)));
+
+          mcost += motion_cost[mode][list][ref][block];
+          if (mcost < bmcost[list])
+          {
+            bmcost[list]   = mcost;
+            best_ref[list] = (char)ref;
+          }
+        }
+      }
+    }
+  }
+  else if (list == BI_PRED)
+  {
+    if (active_pps->weighted_bipred_idc == 1)
+    {
+      int weight_sum = wbp_weight[0][(int) best_ref[LIST_0]][(int) best_ref[LIST_1]][0] + wbp_weight[1][(int) best_ref[LIST_0]][(int) best_ref[LIST_1]][0];
+      if (weight_sum < -128 ||  weight_sum > 127)
+      {
+        bmcost[list] = INT_MAX;
+      }
+      else
+      {
+        bmcost[list]  = (input->rdopt
+          ? (REF_COST  (enc_mb.lambda_mf[Q_PEL], (short)best_ref[LIST_0], cur_list)
+          +  REF_COST  (enc_mb.lambda_mf[Q_PEL], (short)best_ref[LIST_1], cur_list + LIST_1))
+          : (int) (2 * (enc_mb.lambda_me[Q_PEL] * (imin((short)best_ref[LIST_0], 1) + imin((short)best_ref[LIST_1], 1)))));
+        bmcost[list] += BIDPartitionCost (mode, block, (short)best_ref[LIST_0], (short)best_ref[LIST_1], enc_mb.lambda_mf[Q_PEL]);
+      }
+    }
+    else
+    {
+      bmcost[list]  = (input->rdopt
+        ? (REF_COST  (enc_mb.lambda_mf[Q_PEL], (short)best_ref[LIST_0], cur_list)
+        +  REF_COST  (enc_mb.lambda_mf[Q_PEL], (short)best_ref[LIST_1], cur_list + LIST_1))
+        : (int) (2 * (enc_mb.lambda_me[Q_PEL] * (imin((short)best_ref[LIST_0], 1) + imin((short)best_ref[LIST_1], 1)))));
+      bmcost[list] += BIDPartitionCost (mode, block, (short)best_ref[LIST_0], (short)best_ref[LIST_1], enc_mb.lambda_mf[Q_PEL]);
+    }
+  }
+  else
+  {
+    bmcost[list]  = (input->rdopt
+      ? (REF_COST (enc_mb.lambda_mf[Q_PEL], 0, cur_list)
+      +  REF_COST (enc_mb.lambda_mf[Q_PEL], 0, cur_list + LIST_1))
+      : (int) (4 * enc_mb.lambda_me[Q_PEL]));
+    bmcost[list] += BPredPartitionCost(mode, block, 0, 0, enc_mb.lambda_mf[Q_PEL], !(list&1));
+  }
+}
+
+int compute_ref_cost(RD_PARAMS enc_mb, int ref, int list)
+{
+  return WEIGHTED_COST(enc_mb.lambda_mf[Q_PEL],((listXsize[enc_mb.list_offset[list]] <= 1)? 0:refbits[ref]));
+}
+
+/*!
+*************************************************************************************
+* \brief
+*    Determination of prediction list based on simple distortion computation
+*************************************************************************************
+*/
+void determine_prediction_list(int mode, int bmcost[5], char best_ref[2], char *best_pdir, int *cost, short *bi_pred_me)
+{
+  if ((!input->BiPredMotionEstimation) || (mode != 1))
+  {
+    //--- get prediction direction ----
+    if  (bmcost[LIST_0] <= bmcost[LIST_1]
+      && bmcost[LIST_0] <= bmcost[BI_PRED])
+    {
+      *best_pdir = 0;
+      *cost += bmcost[LIST_0];
+    }
+    else if (bmcost[LIST_1] <= bmcost[LIST_0]
+      &&     bmcost[LIST_1] <= bmcost[BI_PRED])
+    {
+      *best_pdir = 1;
+      *cost += bmcost[LIST_1];
+    }
+    else
+    {
+      *best_pdir = 2;
+      *cost += bmcost[BI_PRED];
+    }
+  }
+  else
+  {
+    img->bi_pred_me[mode]=0;
+    *bi_pred_me = 0;
+    //--- get prediction direction ----
+    if  (bmcost[LIST_0] <= bmcost[LIST_1]
+      && bmcost[LIST_0] <= bmcost[BI_PRED]
+      && bmcost[LIST_0] <= bmcost[BI_PRED_L0]
+      && bmcost[LIST_0] <= bmcost[BI_PRED_L1])
+    {
+      *best_pdir = 0;
+      *cost += bmcost[LIST_0];
+      //best_ref[LIST_1] = 0;
+    }
+    else if (bmcost[LIST_1] <= bmcost[LIST_0]
+      &&     bmcost[LIST_1] <= bmcost[BI_PRED]
+      &&     bmcost[LIST_1] <= bmcost[BI_PRED_L0]
+      &&     bmcost[LIST_1] <= bmcost[BI_PRED_L1])
+    {
+      *best_pdir = 1;
+      *cost += bmcost[LIST_1];
+      //best_ref[LIST_0] = 0;
+    }
+    else if (bmcost[BI_PRED] <= bmcost[LIST_0]
+      &&     bmcost[BI_PRED] <= bmcost[LIST_1]
+      &&     bmcost[BI_PRED] <= bmcost[BI_PRED_L0]
+      &&     bmcost[BI_PRED] <= bmcost[BI_PRED_L1])
+    {
+      *best_pdir = 2;
+      *cost += bmcost[BI_PRED];
+      //best_ref[LIST_1] = 0;
+    }
+    else if (bmcost[BI_PRED_L0] <= bmcost[LIST_0]
+      &&     bmcost[BI_PRED_L0] <= bmcost[LIST_1]
+      &&     bmcost[BI_PRED_L0] <= bmcost[BI_PRED]
+      &&     bmcost[BI_PRED_L0] <= bmcost[BI_PRED_L1])
+    {
+      *best_pdir = 2;
+      *cost += bmcost[BI_PRED_L0];
+      *bi_pred_me = 1;
+      img->bi_pred_me[mode]=1;
+      best_ref[LIST_1] = 0;
+      best_ref[LIST_0] = 0;
+    }
+    else
+    {
+      *best_pdir = 2;
+      *cost += bmcost[BI_PRED_L1];
+      *bi_pred_me = 2;
+      best_ref[LIST_1] = 0;
+      best_ref[LIST_0] = 0;
+      img->bi_pred_me[mode]=2;
+    }
+  }
+}
+
+/*!
+*************************************************************************************
+* \brief
+*    RD decision process
+*************************************************************************************
+*/
+void compute_mode_RD_cost(int mode,
+                          Macroblock *currMB,
+                          RD_PARAMS enc_mb,
+                          double *min_rdcost,
+                          double *min_rate,
+                          int i16mode,
+                          short bslice,
+                          short *inter_skip)
+{
+  //--- transform size ---
+  currMB->luma_transform_size_8x8_flag = input->Transform8x8Mode==2
+    ?  (mode >= 1 && mode <= 3)
+    || (mode == 0 && bslice && active_sps->direct_8x8_inference_flag)
+    || ((mode == P8x8) && (enc_mb.valid[4]))
+    :  0;
+  //store_coding_state (cs_cm); // RD
+  SetModesAndRefframeForBlocks (mode);
+
+  // Encode with coefficients
+  img->NoResidueDirect = 0;
+
+  if ((input->FastCrIntraDecision )
+    || (currMB->c_ipred_mode == DC_PRED_8 || (IS_INTRA(currMB) )))
+  {
+    while(1)
+    {
+      if (RDCost_for_macroblocks (enc_mb.lambda_md, mode, min_rdcost, min_rate, i16mode))
+      {
+        //Rate control
+        if (input->RCEnable)
+        {
+          if(mode == P8x8)
+            rc_store_diff(img->opix_x,img->opix_y,
+            currMB->luma_transform_size_8x8_flag == 1 ? tr8x8.mpr8x8 : tr4x4.mpr8x8);
+          else
+            rc_store_diff(img->opix_x, img->opix_y, pred);
+        }
+        store_macroblock_parameters (mode);
+
+        if(input->rdopt == 2 && mode == 0 && input->EarlySkipEnable)
+        {
+          // check transform quantized coeff.
+          if(currMB->cbp == 0)
+            *inter_skip = 1;
+        }
+      }
+
+      // Go through transform modes.
+      // Note that if currMB->cbp is 0 one could choose to skip 8x8 mode
+      // although this could be due to deadzoning decisions.
+      //if (input->Transform8x8Mode==1 && currMB->cbp!=0)
+      if (input->Transform8x8Mode==1)
+      {
+        //=========== try mb_types 1,2,3 with 8x8 transform ===========
+        if ((mode >= 1 && mode <= 3) && currMB->luma_transform_size_8x8_flag == 0)
+        {
+          //try with 8x8 transform size
+          currMB->luma_transform_size_8x8_flag = 1;
+          continue;
+        }
+        //=========== try DIRECT-MODE with 8x8 transform ===========
+        else if (mode == 0 && bslice && active_sps->direct_8x8_inference_flag && currMB->luma_transform_size_8x8_flag == 0)
+        {
+          //try with 8x8 transform size
+          currMB->luma_transform_size_8x8_flag = 1;
+          continue;
+        }
+        //=========== try mb_type P8x8 for mode 4 with 4x4/8x8 transform ===========
+        else if ((mode == P8x8) && (enc_mb.valid[4]) && (currMB->luma_transform_size_8x8_flag == 0))
+        {
+          currMB->luma_transform_size_8x8_flag = 1; //check 8x8 partition for transform size 8x8
+          continue;
+        }
+        else
+        {
+          currMB->luma_transform_size_8x8_flag = 0;
+          break;
+        }
+      }
+      else
+        break;
+    }
+
+    // Encode with no coefficients. Currently only for direct. This could be extended to all other modes as in example.
+    //if (mode < P8x8 && (*inter_skip == 0) && enc_mb.valid[mode] && currMB->cbp && (currMB->cbp&15) != 15 && !input->nobskip)
+    if ( bslice && mode == 0 && (*inter_skip == 0) && enc_mb.valid[mode]
+    && currMB->cbp && (currMB->cbp&15) != 15 && !input->nobskip)
+    {
+      img->NoResidueDirect = 1;
+      if (RDCost_for_macroblocks (enc_mb.lambda_md, mode, min_rdcost, min_rate, i16mode))
+      {
+        //Rate control
+        if (input->RCEnable)
+          rc_store_diff(img->opix_x,img->opix_y,pred);
+
+        store_macroblock_parameters (mode);
+      }
+    }
+  }
+}
+
+
+/*!
+*************************************************************************************
+* \brief
+*    Mode Decision for an 8x8 sub-macroblock
+*************************************************************************************
+*/
+void submacroblock_mode_decision(RD_PARAMS enc_mb,
+                                 RD_8x8DATA *dataTr,
+                                 Macroblock *currMB,
+                                 int ***cofACtr,
+                                 int *have_direct,
+                                 short bslice,
+                                 int block,
+                                 int *cost_direct,
+                                 int *cost,
+                                 int *cost8x8_direct,
+                                 int transform8x8)
+{
+  int64 curr_cbp_blk;
+  double min_rdcost, rdcost = 0.0;
+  int j0, i0, j1, i1;
+  int i,j, k;
+  int min_cost8x8, index;
+  int mode;
+  int direct4x4_tmp, direct8x8_tmp;
+  int bmcost[5] = {INT_MAX};
+  int cnt_nonz = 0;
+  int dummy;
+  int best_cnt_nonz = 0;
+  int maxindex =  (transform8x8) ? 2 : 5;
+  int pix_x, pix_y;
+  int block_x, block_y;
+  int lambda_mf[3];
+  static int fadjust[16][16], fadjustCr[2][16][16];
+  int ***fadjustTransform = transform8x8? img->fadjust8x8 : img->fadjust4x4;
+  int ****fadjustTransformCr = transform8x8? img->fadjust8x8Cr : img->fadjust4x4Cr;
+  int lumaAdjustIndex = transform8x8? 2 : 3;
+  int chromaAdjustIndex = transform8x8? 0 : 2;
+  short pdir;
+  short bi_pred_me;
+
+  char best_pdir = 0;
+  char best_ref[2] = {0, -1};
+#ifdef BEST_NZ_COEFF
+  int best_nz_coeff[2][2];
+#endif
+
+
+  //--- set coordinates ---
+  j0 = ((block>>1)<<3);
+  j1 = (j0>>2);
+  i0 = ((block&0x01)<<3);
+  i1 = (i0>>2);
+
+#ifdef BEST_NZ_COEFF
+  for(j = 0; j <= 1; j++)
+  {
+    for(i = 0; i <= 1; i++)
+      best_nz_coeff[i][j] = img->nz_coeff[img->current_mb_nr][i1 + i][j1 + j] = 0;
+  }
+#endif
+
+  if (transform8x8)
+    currMB->luma_transform_size_8x8_flag = 1; //switch to transform size 8x8
+
+  //--- store coding state before coding ---
+  store_coding_state (cs_cm);
+
+  //=====  LOOP OVER POSSIBLE CODING MODES FOR 8x8 SUB-PARTITION  =====
+  for (min_cost8x8=INT_MAX, min_rdcost=1e30, index=(bslice?0:1); index<maxindex; index++)
+  {
+    mode = b8_mode_table[index];
+    *cost = 0;
+    if (enc_mb.valid[mode] && (transform8x8 == 0 || mode != 0 || (mode == 0 && active_sps->direct_8x8_inference_flag)))
+    {
+      curr_cbp_blk = 0;
+
+      if (mode==0)
+      {
+        //--- Direct Mode ---
+        if (!input->rdopt )
+        {
+          direct4x4_tmp = 0;
+          direct8x8_tmp = 0;
+          direct4x4_tmp = GetDirectCost8x8 ( block, &direct8x8_tmp);
+
+          if ((direct4x4_tmp==INT_MAX)||(*cost_direct==INT_MAX))
+          {
+            *cost_direct = INT_MAX;
+            if (transform8x8)
+              *cost8x8_direct = INT_MAX;
+          }
+          else
+          {
+            *cost_direct += direct4x4_tmp;
+            if (transform8x8)
+              *cost8x8_direct += direct8x8_tmp;
+          }
+          (*have_direct) ++;
+
+          if (transform8x8)
+          {
+            switch(input->Transform8x8Mode)
+            {
+            case 1: // Mixture of 8x8 & 4x4 transform
+              if((direct8x8_tmp < direct4x4_tmp) || !(enc_mb.valid[5] && enc_mb.valid[6] && enc_mb.valid[7]))
+                *cost = direct8x8_tmp;
+              else
+                *cost = direct4x4_tmp;
+              break;
+            case 2: // 8x8 Transform only
+              *cost = direct8x8_tmp;
+              break;
+            default: // 4x4 Transform only
+              *cost = direct4x4_tmp;
+              break;
+            }
+            if (input->Transform8x8Mode==2)
+              *cost = INT_MAX;
+          }
+          else
+          {
+            *cost = direct4x4_tmp;
+          }
+        }
+
+        block_x = img->block_x+(block&1)*2;
+        block_y = img->block_y+(block&2);
+        best_ref[LIST_0] = direct_ref_idx[LIST_0][block_y][block_x];
+        best_ref[LIST_1] = direct_ref_idx[LIST_1][block_y][block_x];
+        best_pdir        = direct_pdir[block_y][block_x];
+      } // if (mode==0)
+      else
+      {
+        //======= motion estimation for all reference frames ========
+        //-----------------------------------------------------------
+        lambda_mf[F_PEL] = (input->CtxAdptLagrangeMult == 0)
+          ? enc_mb.lambda_mf[F_PEL] :(int)(enc_mb.lambda_mf[F_PEL] * lambda_mf_factor);
+
+        lambda_mf[H_PEL] = (input->CtxAdptLagrangeMult == 0)
+          ? enc_mb.lambda_mf[H_PEL] :(int)(enc_mb.lambda_mf[H_PEL] * lambda_mf_factor);
+
+        lambda_mf[Q_PEL] = (input->CtxAdptLagrangeMult == 0)
+          ? enc_mb.lambda_mf[Q_PEL] :(int)(enc_mb.lambda_mf[Q_PEL] * lambda_mf_factor);
+
+
+        PartitionMotionSearch (mode, block, lambda_mf);
+
+        //--- get cost and reference frame for LIST 0 prediction ---
+        bmcost[LIST_0] = INT_MAX;
+        list_prediction_cost(LIST_0, block, mode, enc_mb, bmcost, best_ref);
+
+        //store LIST 0 reference index for every block
+        block_x = img->block_x+(block&1)*2;
+        block_y = img->block_y+(block&2);
+        for (j = block_y; j< block_y + 2; j++)
+        {
+          for (i = block_x; i < block_x + 2; i++)
+          {
+            enc_picture->ref_idx   [LIST_0][j][i] = best_ref[LIST_0];
+            enc_picture->ref_pic_id[LIST_0][j][i] =
+              enc_picture->ref_pic_num[enc_mb.list_offset[LIST_0]][(short)best_ref[LIST_0]];
+          }
+        }
+
+        if (bslice)
+        {
+          //--- get cost and reference frame for LIST 1 prediction ---
+          bmcost[LIST_1] = INT_MAX;
+          bmcost[BI_PRED] = INT_MAX;
+          list_prediction_cost(LIST_1, block, mode, enc_mb, bmcost, best_ref);
+
+          // Compute bipredictive cost between best list 0 and best list 1 references
+          list_prediction_cost(BI_PRED, block, mode, enc_mb, bmcost, best_ref);
+
+          //--- get prediction direction ----
+          determine_prediction_list(mode, bmcost, best_ref, &best_pdir, cost, &bi_pred_me);
+
+          //store backward reference index for every block
+          for (j = block_y; j< block_y + 2; j++)
+          {
+            memset(&enc_picture->ref_idx[LIST_0][j][block_x], best_ref[LIST_0], 2 * sizeof(char));
+            memset(&enc_picture->ref_idx[LIST_1][j][block_x], best_ref[LIST_1], 2 * sizeof(char));
+          }
+        } // if (bslice)
+        else
+        {
+          best_pdir = 0;
+          *cost     = bmcost[LIST_0];
+        }
+      } // if (mode!=0)
+
+      if (input->rdopt)
+      {
+        //--- get and check rate-distortion cost ---
+        rdcost = RDCost_for_8x8blocks (&cnt_nonz, &curr_cbp_blk, enc_mb.lambda_md,
+          block, mode, best_pdir, best_ref[LIST_0], best_ref[LIST_1]);
+      }
+      else
+      {
+        if (*cost!=INT_MAX)
+          *cost += (REF_COST (enc_mb.lambda_mf[Q_PEL], B8Mode2Value (mode, best_pdir),
+          enc_mb.list_offset[(best_pdir<1?LIST_0:LIST_1)]) - 1);
+      }
+
+      //--- set variables if best mode has changed ---
+      if ( ( input->rdopt && rdcost < min_rdcost)
+        || (!input->rdopt && *cost < min_cost8x8))
+      {
+        min_cost8x8                 = *cost;
+        min_rdcost                  = rdcost;
+        dataTr->part8x8mode [block] = mode;
+        dataTr->part8x8pdir [block] = best_pdir;
+        dataTr->part8x8fwref[block] = best_ref[LIST_0];
+        dataTr->part8x8bwref[block] = best_ref[LIST_1];
+
+        img->mb_data[img->current_mb_nr].b8mode[block] = mode;
+
+#ifdef BEST_NZ_COEFF
+        for(j = 0; j <= 1; j++)
+        {
+          for(i = 0; i <= 1; i++)
+            best_nz_coeff[i][j]= cnt_nonz ? img->nz_coeff[img->current_mb_nr][i1 + i][j1 + j] : 0;
+        }
+#endif
+
+        //--- store number of nonzero coefficients ---
+        best_cnt_nonz  = cnt_nonz;
+
+        if (input->rdopt)
+        {
+          //--- store block cbp ---
+          cbp_blk8x8    &= (~(0x33 << (((block>>1)<<3)+((block%2)<<1)))); // delete bits for block
+          cbp_blk8x8    |= curr_cbp_blk;
+
+          //--- store coefficients ---
+          for (k=0; k< 4; k++)
+          {
+            for (j=0; j< 2; j++)
+              memcpy(&cofACtr[k][j][0],&img->cofAC[block][k][j][0], 65 * sizeof(int));
+          }
+          //--- store reconstruction and prediction ---
+          for (j=j0; j<j0+8; j++)
+          {
+            pix_y = img->pix_y + j;
+            for (i=i0; i<i0+8; i++)
+            {
+              pix_x = img->pix_x + i;
+              dataTr->rec_mbY8x8[j][i] = enc_picture->imgY[pix_y][pix_x];
+              dataTr->mpr8x8[j][i] = img->mpr[j][i];
+              if(img->type==SP_SLICE && (!si_frame_indicator))
+                dataTr->lrec[j][i]=lrec[pix_y][pix_x]; // store the coefficients for primary SP slice
+            }
+          }
+        }
+        if (img->AdaptiveRounding)
+        {
+          for (j=j0; j<j0+8; j++)
+          {
+            memcpy(&fadjust[j][i0], &fadjustTransform[0][j][i0], 8 * sizeof(int));
+          }
+
+          if (input->AdaptRndChroma)
+          {
+            int j0_cr = (j0 * img->mb_cr_size_y) / MB_BLOCK_SIZE;
+            int i0_cr = (i0 * img->mb_cr_size_x) / MB_BLOCK_SIZE;
+            for (j=j0_cr; j<j0_cr+(img->mb_cr_size_y >> 1); j++)
+            {
+              memcpy(&fadjustCr[0][j][i0_cr], &fadjustTransformCr[0][0][j][i0_cr], (img->mb_cr_size_x >> 1) * sizeof(int));
+              memcpy(&fadjustCr[1][j][i0_cr], &fadjustTransformCr[0][1][j][i0_cr], (img->mb_cr_size_x >> 1) * sizeof(int));
+            }
+          }
+        }
+        //--- store best 8x8 coding state ---
+        if (block < 3)
+          store_coding_state (cs_b8);
+      } // if (rdcost <= min_rdcost)
+
+      //--- re-set coding state as it was before coding with current mode was performed ---
+      reset_coding_state (cs_cm);
+    } // if ((enc_mb.valid[mode] && (transform8x8 == 0 || mode != 0 || (mode == 0 && active_sps->direct_8x8_inference_flag)))
+  } // for (min_rdcost=1e30, index=(bslice?0:1); index<6; index++)
+
+#ifdef BEST_NZ_COEFF
+  for(j = 0; j <= 1; j++)
+  {
+    for(i = 0; i <= 1; i++)
+      img->nz_coeff[img->current_mb_nr][i1 + i][j1 + j] = best_nz_coeff[i][j];
+  }
+#endif
+
+  if (!transform8x8)
+    dataTr->cost8x8 += min_cost8x8;
+
+  if (!input->rdopt)
+  {
+    if (transform8x8)
+    {
+      dataTr->cost8x8 += min_cost8x8;
+      mode = dataTr->part8x8mode[block];
+      pdir = dataTr->part8x8pdir[block];
+    }
+    else
+    {
+      mode = dataTr->part8x8mode[block];
+      pdir = dataTr->part8x8pdir[block];
+    }
+    curr_cbp_blk  = 0;
+    best_cnt_nonz = LumaResidualCoding8x8 (&dummy, &curr_cbp_blk, block, pdir,
+      (pdir==0||pdir==2?mode:0), (pdir==1||pdir==2?mode:0), dataTr->part8x8fwref[block], dataTr->part8x8bwref[block]);
+
+    cbp_blk8x8   &= (~(0x33 << (((block>>1)<<3)+((block%2)<<1)))); // delete bits for block
+    cbp_blk8x8   |= curr_cbp_blk;
+
+    //--- store coefficients ---
+    for (k=0; k< 4; k++)
+    {
+        for (j=0; j< 2; j++)
+          memcpy(cofACtr[k][j],img->cofAC[block][k][j],65 * sizeof(int));
+    }
+
+    //--- store reconstruction and prediction ---
+    for (j=j0; j<j0+2* BLOCK_SIZE; j++)
+    {
+      memcpy(&dataTr->rec_mbY8x8[j][i0], &enc_picture->imgY[img->pix_y + j][img->pix_x + i0], 2* BLOCK_SIZE * sizeof (imgpel));
+      memcpy(&dataTr->mpr8x8[j][i0], &img->mpr[j][i0], 2* BLOCK_SIZE * sizeof (imgpel));
+      if(img->type==SP_SLICE &&(!si_frame_indicator))
+        memcpy(&dataTr->lrec[j][i0],&lrec[img->pix_y+j][img->pix_x+i0],2*BLOCK_SIZE*sizeof(int)); // store coefficients for primary SP slice
+    }
+  }
+
+  //----- set cbp and count of nonzero coefficients ---
+  if (best_cnt_nonz)
+  {
+    cbp8x8       |= (1 << block);
+    cnt_nonz_8x8 += best_cnt_nonz;
+  }
+
+  if (!transform8x8)
+  {
+    if (block<3)
+    {
+      //===== re-set reconstructed block =====
+      j0   = 8*(block >> 1);
+      i0   = 8*(block & 0x01);
+      for (j=j0; j<j0 + 2 * BLOCK_SIZE; j++)
+        {
+        memcpy(&enc_picture->imgY[img->pix_y + j][img->pix_x], dataTr->rec_mbY8x8[j], 2 * BLOCK_SIZE * sizeof(imgpel));
+        if(img->type==SP_SLICE &&(!si_frame_indicator))
+          memcpy(&lrec[img->pix_y + j][img->pix_x], dataTr->lrec[j],2*BLOCK_SIZE*sizeof(imgpel)); // reset the coefficients for SP slice
+      }
+    } // if (block<3)
+  }
+  else
+  {
+    //======= save motion data for 8x8 partition for transform size 8x8 ========
+    StoreNewMotionVectorsBlock8x8(0, block, dataTr->part8x8mode[block], dataTr->part8x8fwref[block], dataTr->part8x8bwref[block], dataTr->part8x8pdir[block], bslice);
+  }
+  //===== set motion vectors and reference frames (prediction) =====
+  SetRefAndMotionVectors (block, dataTr->part8x8mode[block], dataTr->part8x8pdir[block], dataTr->part8x8fwref[block], dataTr->part8x8bwref[block]);
+
+  //===== set the coding state after current block =====
+  //if (transform8x8 == 0 || block < 3)
+  if (block < 3)
+    reset_coding_state (cs_b8);
+
+  if (img->AdaptiveRounding)
+  {
+    for (j=j0; j<j0+2 * BLOCK_SIZE; j++)
+    {
+      memcpy(&fadjustTransform  [lumaAdjustIndex][j][i0], &fadjust[j][i0], 2 * BLOCK_SIZE * sizeof(int));
+    }
+
+    if (input->AdaptRndChroma)
+    {
+      int j0_cr = (j0 * img->mb_cr_size_y) >> MB_BLOCK_SHIFT;
+      int i0_cr = (i0 * img->mb_cr_size_x) >> MB_BLOCK_SHIFT;
+
+      for (j=j0_cr; j<j0_cr+(img->mb_cr_size_y >> 1); j++)
+      {
+        memcpy(&fadjustTransformCr[chromaAdjustIndex][0][j][i0_cr], &fadjustCr[0][j][i0_cr], (img->mb_cr_size_x >> 1) * sizeof(int));
+        memcpy(&fadjustTransformCr[chromaAdjustIndex][1][j][i0_cr], &fadjustCr[1][j][i0_cr], (img->mb_cr_size_x >> 1) * sizeof(int));
+      }
+    }
+  }
+}
+
+
+/*!
+*************************************************************************************
+* \brief
+*    Checks whether a primary SP slice macroblock was encoded as I16
+*************************************************************************************
+*/
+int check_for_SI16()
+{
+  int i,j;
+  for(i=img->pix_y;i<img->pix_y+MB_BLOCK_SIZE;i++)
+  {
+    for(j=img->pix_x;j<img->pix_x+MB_BLOCK_SIZE;j++)
+      if(lrec[i][j]!=-16)
+        return 0;
+  }
+  return 1;
+}
+
+void get_initial_mb16x16_cost()
+{
+  Macroblock* currMB      = &img->mb_data[img->current_mb_nr];
+
+  if (currMB->mb_available_left && currMB->mb_available_up)
+  {
+    mb16x16_cost = (mb16x16_cost_frame[img->current_mb_nr - 1] +
+    mb16x16_cost_frame[img->current_mb_nr - (img->width>>4)] + 1)/2.0;
+  }
+  else if (currMB->mb_available_left)
+  {
+  mb16x16_cost = mb16x16_cost_frame[img->current_mb_nr - 1];
+  }
+  else if (currMB->mb_available_up)
+  {
+    mb16x16_cost = mb16x16_cost_frame[img->current_mb_nr - (img->width>>4)];
+  }
+  else
+  {
+    mb16x16_cost = CALM_MF_FACTOR_THRESHOLD;
+  }
+
+  lambda_mf_factor = mb16x16_cost < CALM_MF_FACTOR_THRESHOLD ? 1.0 : sqrt(mb16x16_cost / (CALM_MF_FACTOR_THRESHOLD * img->lambda_mf_factor[img->type][img->qp]));
+}
+
+void adjust_mb16x16_cost(int cost)
+{
+  mb16x16_cost = (double) cost;
+  mb16x16_cost_frame[img->current_mb_nr] = mb16x16_cost;
+
+  lambda_mf_factor = (mb16x16_cost < CALM_MF_FACTOR_THRESHOLD)
+  ? 1.0
+  : sqrt(mb16x16_cost / (CALM_MF_FACTOR_THRESHOLD * img->lambda_mf_factor[img->type][img->qp]));
+}


Index: llvm-test/MultiSource/Applications/JM/lencod/mode_decision.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/mode_decision.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/mode_decision.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,91 @@
+/*!
+ ***************************************************************************
+ * \file
+ *    mode_decision.h
+ *
+ * \author
+ *    Alexis Michael Tourapis
+ *
+ * \date
+ *    21. February 2005
+ *
+ * \brief
+ *    Headerfile for mode decision
+ **************************************************************************
+ */
+
+#ifndef _MODE_DECISION_H_
+#define _MODE_DECISION_H_
+
+extern CSptr cs_mb, cs_b8, cs_cm, cs_imb, cs_ib8, cs_ib4, cs_pc;
+//extern imgpel   mpr_8x8ts[16][16];
+//extern imgpel   rec_mbY[16][16], rec_mbU[16][16], rec_mbV[16][16];    // reconstruction values
+extern RD_8x8DATA tr4x4, tr8x8;
+
+// Adaptive Lagrangian variables
+extern double mb16x16_cost;
+extern double lambda_mf_factor;
+
+extern const  int LEVELMVLIMIT[17][6];
+extern int    ****cofAC_8x8ts;        // [8x8block][4x4block][level/run][scan_pos]
+extern int    ****cofAC, ****cofAC8x8;        // [8x8block][4x4block][level/run][scan_pos]
+extern int    QP2QUANT[40];
+extern int    cbp_blk8x8;
+extern int    cbp, cbp8x8, cnt_nonz_8x8;
+extern int64  cbp_blk;
+extern int64  cbp_blk8_8x8ts;
+extern int    cbp8_8x8ts;
+extern int    cnt_nonz8_8x8ts;
+extern int    qp_mbaff[2][2], qp_mbaff[2][2];
+extern int    delta_qp_mbaff[2][2],delta_qp_mbaff[2][2];
+
+// Residue Color Transform
+extern char   b4_ipredmode[16], b4_intra_pred_modes[16];
+
+extern short  bi_pred_me;
+extern short  best_mode;
+extern short  best8x8mode          [4]; // [block]
+extern char   best8x8pdir [MAXMODE][4]; // [mode][block]
+extern char   best8x8fwref[MAXMODE][4]; // [mode][block]
+extern char   best8x8bwref[MAXMODE][4]; // [mode][block]
+extern imgpel pred[16][16];
+
+extern void   set_stored_macroblock_parameters (void);
+extern void   StoreMV8x8(int);
+extern void   RestoreMV8x8(int);
+extern void   store_macroblock_parameters (int);
+extern void   SetModesAndRefframeForBlocks (int);
+extern void   SetRefAndMotionVectors (int, int, int, int, int);
+extern void   StoreNewMotionVectorsBlock8x8(int, int, int, int, int, int, int);
+extern void   assign_enc_picture_params(int, char, int, int, int, int, int);
+extern void   update_refresh_map(int intra, int intra1, Macroblock *currMB);
+extern void   SetMotionVectorsMB (Macroblock*, int);
+extern void   SetCoeffAndReconstruction8x8 (Macroblock*);
+extern void   fast_mode_intra_decision(short *intra_skip, double min_rate);
+
+extern int    GetBestTransformP8x8(void);
+extern int    I16Offset (int, int);
+extern int    CheckReliabilityOfRef (int, int, int, int);
+extern int    Mode_Decision_for_Intra4x4Macroblock (double, int*);
+extern int    RDCost_for_macroblocks (double, int, double*, double*, int);
+extern double RDCost_for_8x8blocks (int*, int64*, double, int, int, short, short, short);
+extern double *mb16x16_cost_frame;
+
+extern const int  b8_mode_table[6];
+extern const int  mb_mode_table[9];
+
+void rc_store_diff(int cpix_x, int cpix_y, imgpel prediction[16][16]);
+void submacroblock_mode_decision(RD_PARAMS, RD_8x8DATA *, Macroblock *,int ***, int *, short, int, int *, int *, int *, int);
+void init_enc_mb_params(Macroblock* currMB, RD_PARAMS *enc_mb, int intra, int bslice);
+void list_prediction_cost(int list, int block, int mode, RD_PARAMS enc_mb, int bmcost[5], char best_ref[2]);
+void determine_prediction_list(int, int [5], char [2], char *, int *, short *);
+void compute_mode_RD_cost(int mode, Macroblock *currMB, RD_PARAMS enc_mb,
+                               double *min_rdcost, double *min_rate,
+                               int i16mode, short bslice, short *inter_skip);
+
+
+void get_initial_mb16x16_cost(void);
+void adjust_mb16x16_cost(int);
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/mv-search.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/mv-search.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/mv-search.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,1854 @@
+
+/*!
+ *************************************************************************************
+ * \file mv-search.c
+ *
+ * \brief
+ *    Motion Vector Search, unified for B and P Pictures
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+ *      - Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+ *      - Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+ *      - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+ *      - Jani Lainema                    <jani.lainema at nokia.com>
+ *      - Detlev Marpe                    <marpe at hhi.de>
+ *      - Thomas Wedi                     <wedi at tnt.uni-hannover.de>
+ *      - Heiko Schwarz                   <hschwarz at hhi.de>
+ *      - Alexis Michael Tourapis         <alexismt at ieee.org>
+ *
+ *************************************************************************************
+*/
+
+#include "contributors.h"
+
+#include <stdlib.h>
+#include <math.h>
+#include <limits.h>
+#include <string.h>
+#include <time.h>
+#include <sys/timeb.h>
+
+#include "global.h"
+
+#include "image.h"
+#include "mv-search.h"
+#include "refbuf.h"
+#include "memalloc.h"
+#include "mb_access.h"
+#include "macroblock.h"
+
+// Motion estimation distortion header file
+#include "me_distortion.h"
+
+// Motion estimation search algorithms
+#include "me_epzs.h"
+#include "me_fullfast.h"
+#include "me_fullsearch.h"
+#include "me_umhex.h"
+#include "me_umhexsmp.h"
+
+// Statistics, temporary
+int     max_mvd;
+short*  spiral_search_x;
+short*  spiral_search_y;
+short*  spiral_hpel_search_x;
+short*  spiral_hpel_search_y;
+
+int*    mvbits;
+int*    refbits;
+int*    byte_abs;
+int**** motion_cost;
+int     byte_abs_range;
+
+static int diff  [16];
+static int diff64[64];
+static imgpel orig_pic [768];
+void SetMotionVectorPredictor (short  pmv[2],
+                               char   **refPic,
+                               short  ***tmp_mv,
+                               short  ref_frame,
+                               int    list,
+                               int    block_x,
+                               int    block_y,
+                               int    blockshape_x,
+                               int    blockshape_y);
+
+extern ColocatedParams *Co_located;
+extern const short block_type_shift_factor[8];
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Set motion vector predictor
+ ************************************************************************
+ */
+void SetMotionVectorPredictor (short  pmv[2],
+                               char   **refPic,
+                               short  ***tmp_mv,
+                               short  ref_frame,
+                               int    list,
+                               int    block_x,
+                               int    block_y,
+                               int    blockshape_x,
+                               int    blockshape_y)
+{
+  int mb_x                 = 4*block_x;
+  int mb_y                 = 4*block_y;
+  int mb_nr                = img->current_mb_nr;
+
+  int mv_a, mv_b, mv_c, pred_vec=0;
+  int mvPredType, rFrameL, rFrameU, rFrameUR;
+  int hv;
+
+  PixelPos block_a, block_b, block_c, block_d;
+
+  getLuma4x4Neighbour(mb_nr, mb_x - 1,            mb_y,     &block_a);
+  getLuma4x4Neighbour(mb_nr, mb_x,                mb_y - 1, &block_b);
+  getLuma4x4Neighbour(mb_nr, mb_x + blockshape_x, mb_y - 1, &block_c);
+  getLuma4x4Neighbour(mb_nr, mb_x - 1,            mb_y - 1, &block_d);
+
+  if (mb_y > 0)
+  {
+    if (mb_x < 8)  // first column of 8x8 blocks
+    {
+      if (mb_y==8)
+      {
+        if (blockshape_x == 16)      block_c.available  = 0;
+      }
+      else
+      {
+        if (mb_x+blockshape_x == 8)  block_c.available = 0;
+      }
+    }
+    else
+    {
+      if (mb_x+blockshape_x == 16)   block_c.available = 0;
+    }
+  }
+
+  if (!block_c.available)
+  {
+    block_c=block_d;
+  }
+
+  mvPredType = MVPRED_MEDIAN;
+
+  if (!img->MbaffFrameFlag)
+  {
+    rFrameL    = block_a.available    ? refPic[block_a.pos_y][block_a.pos_x] : -1;
+    rFrameU    = block_b.available    ? refPic[block_b.pos_y][block_b.pos_x] : -1;
+    rFrameUR   = block_c.available    ? refPic[block_c.pos_y][block_c.pos_x] : -1;
+  }
+  else
+  {
+    if (img->mb_data[img->current_mb_nr].mb_field)
+    {
+      rFrameL  = block_a.available
+        ? (img->mb_data[block_a.mb_addr].mb_field
+        ? refPic[block_a.pos_y][block_a.pos_x]
+        : refPic[block_a.pos_y][block_a.pos_x] * 2) : -1;
+      rFrameU  = block_b.available
+        ? (img->mb_data[block_b.mb_addr].mb_field
+        ? refPic[block_b.pos_y][block_b.pos_x]
+        : refPic[block_b.pos_y][block_b.pos_x] * 2) : -1;
+      rFrameUR = block_c.available
+        ? (img->mb_data[block_c.mb_addr].mb_field
+        ? refPic[block_c.pos_y][block_c.pos_x]
+        : refPic[block_c.pos_y][block_c.pos_x] * 2) : -1;
+    }
+    else
+    {
+      rFrameL = block_a.available
+        ? (img->mb_data[block_a.mb_addr].mb_field
+        ? refPic[block_a.pos_y][block_a.pos_x] >>1
+        : refPic[block_a.pos_y][block_a.pos_x]) : -1;
+      rFrameU  = block_b.available
+        ? (img->mb_data[block_b.mb_addr].mb_field
+        ? refPic[block_b.pos_y][block_b.pos_x] >>1
+        : refPic[block_b.pos_y][block_b.pos_x]) : -1;
+      rFrameUR = block_c.available
+        ? (img->mb_data[block_c.mb_addr].mb_field
+        ? refPic[block_c.pos_y][block_c.pos_x] >>1
+        : refPic[block_c.pos_y][block_c.pos_x]) : -1;
+    }
+  }
+
+  /* Prediction if only one of the neighbors uses the reference frame
+  *  we are checking
+  */
+  if(rFrameL == ref_frame && rFrameU != ref_frame && rFrameUR != ref_frame)       mvPredType = MVPRED_L;
+  else if(rFrameL != ref_frame && rFrameU == ref_frame && rFrameUR != ref_frame)  mvPredType = MVPRED_U;
+  else if(rFrameL != ref_frame && rFrameU != ref_frame && rFrameUR == ref_frame)  mvPredType = MVPRED_UR;
+  // Directional predictions
+  if(blockshape_x == 8 && blockshape_y == 16)
+  {
+    if(mb_x == 0)
+    {
+      if(rFrameL == ref_frame)
+        mvPredType = MVPRED_L;
+    }
+    else
+    {
+      if( rFrameUR == ref_frame)
+        mvPredType = MVPRED_UR;
+    }
+  }
+  else if(blockshape_x == 16 && blockshape_y == 8)
+  {
+    if(mb_y == 0)
+    {
+      if(rFrameU == ref_frame)
+        mvPredType = MVPRED_U;
+    }
+    else
+    {
+      if(rFrameL == ref_frame)
+        mvPredType = MVPRED_L;
+    }
+  }
+
+  for (hv=0; hv < 2; hv++)
+  {
+    if (!img->MbaffFrameFlag || hv==0)
+    {
+      mv_a = block_a.available  ? tmp_mv[block_a.pos_y][block_a.pos_x][hv] : 0;
+      mv_b = block_b.available  ? tmp_mv[block_b.pos_y][block_b.pos_x][hv] : 0;
+      mv_c = block_c.available  ? tmp_mv[block_c.pos_y][block_c.pos_x][hv] : 0;
+    }
+    else
+    {
+      if (img->mb_data[img->current_mb_nr].mb_field)
+      {
+        mv_a = block_a.available  ? img->mb_data[block_a.mb_addr].mb_field
+          ? tmp_mv[block_a.pos_y][block_a.pos_x][hv]
+          : tmp_mv[block_a.pos_y][block_a.pos_x][hv] / 2
+          : 0;
+        mv_b = block_b.available  ? img->mb_data[block_b.mb_addr].mb_field
+          ? tmp_mv[block_b.pos_y][block_b.pos_x][hv]
+          : tmp_mv[block_b.pos_y][block_b.pos_x][hv] / 2
+          : 0;
+        mv_c = block_c.available  ? img->mb_data[block_c.mb_addr].mb_field
+          ? tmp_mv[block_c.pos_y][block_c.pos_x][hv]
+          : tmp_mv[block_c.pos_y][block_c.pos_x][hv] / 2
+          : 0;
+      }
+      else
+      {
+        mv_a = block_a.available  ? img->mb_data[block_a.mb_addr].mb_field
+          ? tmp_mv[block_a.pos_y][block_a.pos_x][hv] * 2
+          : tmp_mv[block_a.pos_y][block_a.pos_x][hv]
+          : 0;
+        mv_b = block_b.available  ? img->mb_data[block_b.mb_addr].mb_field
+          ? tmp_mv[block_b.pos_y][block_b.pos_x][hv] * 2
+          : tmp_mv[block_b.pos_y][block_b.pos_x][hv]
+          : 0;
+        mv_c = block_c.available  ? img->mb_data[block_c.mb_addr].mb_field
+          ? tmp_mv[block_c.pos_y][block_c.pos_x][hv] * 2
+          : tmp_mv[block_c.pos_y][block_c.pos_x][hv]
+          : 0;
+      }
+    }
+
+    switch (mvPredType)
+    {
+    case MVPRED_MEDIAN:
+      if(!(block_b.available || block_c.available))
+      {
+        pred_vec = mv_a;
+      }
+      else
+      {
+        pred_vec = mv_a+mv_b+mv_c-imin(mv_a,imin(mv_b,mv_c))-imax(mv_a,imax(mv_b,mv_c));
+      }
+      break;
+    case MVPRED_L:
+      pred_vec = mv_a;
+      break;
+    case MVPRED_U:
+      pred_vec = mv_b;
+      break;
+    case MVPRED_UR:
+      pred_vec = mv_c;
+      break;
+    default:
+      break;
+    }
+
+    pmv[hv] = pred_vec;
+  }
+}
+
+/*!
+************************************************************************
+* \brief
+*    Initialize the motion search
+************************************************************************
+*/
+void
+Init_Motion_Search_Module ()
+{
+  int bits, i_min, i_max, k;
+  int i, l;
+
+  int search_range               = input->search_range;
+  int max_search_points          = imax(9, (2*search_range+1)*(2*search_range+1));
+  int max_ref_bits               = 1 + 2 * (int)floor(log(imax(16,img->max_num_references+1)) / log(2) + 1e-10);
+  int max_ref                    = (1<<((max_ref_bits>>1)+1))-1;
+  int number_of_subpel_positions = 4 * (2*search_range+3);
+  int max_mv_bits                = 3 + 2 * (int)ceil (log(number_of_subpel_positions+1) / log(2) + 1e-10);
+  max_mvd                        = (1<<( max_mv_bits >>1)   )-1;
+  byte_abs_range                 = (img->max_imgpel_value > img->max_imgpel_value_uv) ? (img->max_imgpel_value + 1) * 64 : (img->max_imgpel_value_uv + 1) * 64;
+
+  //=====   CREATE ARRAYS   =====
+  //-----------------------------
+  if ((spiral_search_x = (short*)calloc(max_search_points, sizeof(short))) == NULL)
+    no_mem_exit("Init_Motion_Search_Module: spiral_search_x");
+  if ((spiral_search_y = (short*)calloc(max_search_points, sizeof(short))) == NULL)
+    no_mem_exit("Init_Motion_Search_Module: spiral_search_y");
+  if ((spiral_hpel_search_x = (short*)calloc(max_search_points, sizeof(short))) == NULL)
+    no_mem_exit("Init_Motion_Search_Module: spiral_hpel_search_x");
+  if ((spiral_hpel_search_y = (short*)calloc(max_search_points, sizeof(short))) == NULL)
+    no_mem_exit("Init_Motion_Search_Module: spiral_hpel_search_y");
+  if ((mvbits = (int*)calloc(2*max_mvd+1, sizeof(int))) == NULL)
+    no_mem_exit("Init_Motion_Search_Module: mvbits");
+  if ((refbits = (int*)calloc(max_ref, sizeof(int))) == NULL)
+    no_mem_exit("Init_Motion_Search_Module: refbits");
+  if ((byte_abs = (int*)calloc(byte_abs_range, sizeof(int))) == NULL)
+    no_mem_exit("Init_Motion_Search_Module: byte_abs");
+
+  get_mem4Dint (&motion_cost, 8, 2, img->max_num_references, 4);
+
+  //--- set array offsets ---
+  mvbits   += max_mvd;
+  byte_abs += byte_abs_range/2;
+
+  //=====   INIT ARRAYS   =====
+  //---------------------------
+  //--- init array: motion vector bits ---
+  mvbits[0] = 1;
+  for (bits=3; bits<=max_mv_bits; bits+=2)
+  {
+    i_max = 1    << (bits >> 1);
+    i_min = i_max >> 1;
+
+    for (i = i_min; i < i_max; i++)
+      mvbits[-i] = mvbits[i] = bits;
+  }
+
+  //--- init array: reference frame bits ---
+  refbits[0] = 1;
+  for (bits=3; bits<=max_ref_bits; bits+=2)
+  {
+    i_max = (1   << ((bits >> 1) + 1)) - 1;
+    i_min = i_max >> 1;
+
+    for (i = i_min; i < i_max; i++)
+      refbits[i] = bits;
+  }
+
+  //--- init array: absolute value ---
+  byte_abs[0] = 0;
+  // Set scaler for integer/subpel motion refinement.
+  // Currently only EPZS supports subpel positions
+
+
+  for (i=1; i<byte_abs_range/2; i++)
+  {
+    byte_abs[i] = byte_abs[-i] = i;
+  }
+
+  //--- init array: search pattern ---
+  spiral_search_x[0] = spiral_search_y[0] = 0;
+  spiral_hpel_search_x[0] = spiral_hpel_search_y[0] = 0;
+
+  for (k=1, l=1; l <= imax(1,search_range); l++)
+  {
+    for (i=-l+1; i< l; i++)
+    {
+      spiral_search_x[k] =  i;
+      spiral_search_y[k] = -l;
+      spiral_hpel_search_x[k] =  i<<1;
+      spiral_hpel_search_y[k++] = -l<<1;
+      spiral_search_x[k] =  i;
+      spiral_search_y[k] =  l;
+      spiral_hpel_search_x[k] =  i<<1;
+      spiral_hpel_search_y[k++] =  l<<1;
+    }
+    for (i=-l;   i<=l; i++)
+    {
+      spiral_search_x[k] = -l;
+      spiral_search_y[k] =  i;
+      spiral_hpel_search_x[k] = -l<<1;
+      spiral_hpel_search_y[k++] = i<<1;
+      spiral_search_x[k] =  l;
+      spiral_search_y[k] =  i;
+      spiral_hpel_search_x[k] =  l<<1;
+      spiral_hpel_search_y[k++] = i<<1;
+    }
+  }
+
+  // set global variable prior to ME
+  start_me_refinement_hp = (input->ChromaMEEnable == 1 || input->MEErrorMetric[F_PEL] != input->MEErrorMetric[H_PEL] ) ? 0 : 1;
+  start_me_refinement_qp = (input->ChromaMEEnable == 1 || input->MEErrorMetric[H_PEL] != input->MEErrorMetric[Q_PEL] ) ? 0 : 1;
+
+  // Setup Distortion Metrics depending on refinement level
+  for (i=0; i<3; i++)
+  {
+    switch(input->MEErrorMetric[i])
+    {
+    case ERROR_SAD:
+      computeUniPred[i] = computeSAD;
+      computeUniPred[i + 3] = computeSADWP;
+      computeBiPred1[i] = computeBiPredSAD1;
+      computeBiPred2[i] = computeBiPredSAD2;
+      break;
+    case ERROR_SSE:
+      computeUniPred[i] = computeSSE;
+      computeUniPred[i + 3] = computeSSEWP;
+      computeBiPred1[i] = computeBiPredSSE1;
+      computeBiPred2[i] = computeBiPredSSE2;
+      break;
+    case ERROR_SATD :
+    default:
+      computeUniPred[i] = computeSATD;
+      computeUniPred[i + 3] = computeSATDWP;
+      computeBiPred1[i] = computeBiPredSATD1;
+      computeBiPred2[i] = computeBiPredSATD2;
+      break;
+    }
+  }
+  // Setup buffer access methods
+  get_line[0] = FastLine4X;
+  get_line[1] = UMVLine4X;
+  get_crline[0] = FastLine8X_chroma;
+  get_crline[1] = UMVLine8X_chroma;
+
+  if(input->SearchMode == FAST_FULL_SEARCH)
+    InitializeFastFullIntegerSearch ();
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Free memory used by motion search
+ ************************************************************************
+ */
+void
+Clear_Motion_Search_Module ()
+{
+  //--- correct array offset ---
+  mvbits   -= max_mvd;
+  byte_abs -= byte_abs_range/2;
+
+  //--- delete arrays ---
+  free (spiral_search_x);
+  free (spiral_search_y);
+  free (spiral_hpel_search_x);
+  free (spiral_hpel_search_y);
+  free (mvbits);
+  free (refbits);
+  free (byte_abs);
+  free_mem4Dint (motion_cost, 8, 2);
+
+  if(input->SearchMode == FAST_FULL_SEARCH)
+    ClearFastFullIntegerSearch ();
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Motion Cost for Bidirectional modes
+ ***********************************************************************
+ */
+int BPredPartitionCost (int   blocktype,
+                        int   block8x8,
+                        short ref_l0,
+                        short ref_l1,
+                        int   lambda_factor,
+                        int   list)
+{
+  static int  bx0[5][4] = {{0,0,0,0}, {0,0,0,0}, {0,0,0,0}, {0,2,0,0}, {0,2,0,2}};
+  static int  by0[5][4] = {{0,0,0,0}, {0,0,0,0}, {0,2,0,0}, {0,0,0,0}, {0,0,2,2}};
+  
+  int   curr_blk[MB_BLOCK_SIZE][MB_BLOCK_SIZE]; // ABT pred.error buffer
+  int   bsx       = imin(input->blc_size[blocktype][0],8);
+  int   bsy       = imin(input->blc_size[blocktype][1],8);
+
+  int   pic_pix_x, pic_pix_y, block_x, block_y;
+  int   v, h, mcost, i, j, k;
+  int   mvd_bits  = 0;
+  int   parttype  = (blocktype<4?blocktype:4);
+  int   step_h0   = (input->part_size[ parttype][0]);
+  int   step_v0   = (input->part_size[ parttype][1]);
+  int   step_h    = (input->part_size[blocktype][0]);
+  int   step_v    = (input->part_size[blocktype][1]);
+  int   bxx, byy;                               // indexing curr_blk
+
+  short   ******all_mv = list ? img->bipred_mv1 : img->bipred_mv2;
+  short   ******  p_mv = img->pred_mv;
+
+  for (v=by0[parttype][block8x8]; v<by0[parttype][block8x8]+step_v0; v+=step_v)
+  {
+    for (h=bx0[parttype][block8x8]; h<bx0[parttype][block8x8]+step_h0; h+=step_h)
+    {
+      mvd_bits += mvbits[ all_mv [v][h][LIST_0][ref_l0][blocktype][0] - p_mv[v][h][LIST_0][ref_l0][blocktype][0] ];
+      mvd_bits += mvbits[ all_mv [v][h][LIST_0][ref_l0][blocktype][1] - p_mv[v][h][LIST_0][ref_l0][blocktype][1] ];
+
+      mvd_bits += mvbits[ all_mv [v][h][LIST_1][ref_l1][blocktype][0] - p_mv[v][h][LIST_1][ref_l1][blocktype][0] ];
+      mvd_bits += mvbits[ all_mv [v][h][LIST_1][ref_l1][blocktype][1] - p_mv[v][h][LIST_1][ref_l1][blocktype][1] ];
+    }
+  }
+    mcost = WEIGHTED_COST (lambda_factor, mvd_bits);
+
+    //----- cost of residual signal -----
+    for (byy=0, v=by0[parttype][block8x8]; v<by0[parttype][block8x8]+step_v0; byy+=4, v++)
+    {
+
+      pic_pix_y = img->opix_y + (block_y = (v<<2));
+      for (bxx=0, h=bx0[parttype][block8x8]; h<bx0[parttype][block8x8]+step_h0; bxx+=4, h++)
+      {
+        pic_pix_x = img->opix_x + (block_x = (h<<2));
+        LumaPrediction4x4Bi (block_x, block_y, blocktype, blocktype, ref_l0, ref_l1, list);
+
+        for (k=j=0; j<4; j++)
+        {
+          for (  i=0; i<4; i++)
+            diff64[k++] = curr_blk[byy+j][bxx+i] =
+            imgY_org[pic_pix_y+j][pic_pix_x+i] - img->mpr[j+block_y][i+block_x];
+        }
+        if ((!input->Transform8x8Mode) || (blocktype>4))
+        {
+          mcost += distortion4x4 (diff64);
+        }
+      }
+    }
+    if (input->Transform8x8Mode && (blocktype<=4))  // tchen 4-29-04
+    {
+      for (byy=0; byy < input->blc_size[parttype][1]; byy+=bsy)
+        for (bxx=0; bxx<input->blc_size[parttype][0]; bxx+=bsx)
+        {
+          for (k=0, j=byy;j<byy + 8;j++, k += 8)
+            memcpy(&diff64[k], &(curr_blk[j][bxx]), 8 * sizeof(int));
+
+          mcost += distortion8x8(diff64);
+        }
+    }
+    return mcost;
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Block motion search
+ ***********************************************************************
+ */
+int                                         //!< minimum motion cost after search
+BlockMotionSearch (short     ref,           //!< reference idx
+                   int       list,          //!< reference pciture list
+                   int       mb_x,          //!< x-coordinate inside macroblock
+                   int       mb_y,          //!< y-coordinate inside macroblock
+                   int       blocktype,     //!< block type (1-16x16 ... 7-4x4)
+                   int       search_range,  //!< 1-d search range for integer-position search
+                   int*      lambda_factor) //!< lagrangian parameter for determining motion cost
+{
+  // each 48-pel line stores the 16 luma pels (at 0) followed by 8 or 16 crcb[0] (at 16) and crcb[1] (at 32) pels
+  // depending on the type of chroma subsampling used: YUV 4:4:4, 4:2:2, and 4:2:0
+  imgpel *orig_pic_tmp = orig_pic;
+
+  short     mv[2];
+  int       i, j;
+
+  int       max_value = INT_MAX;
+  int       min_mcost = max_value;
+
+  int       block_x   = (mb_x>>2);
+  int       block_y   = (mb_y>>2);
+
+  int       bsx       = input->blc_size[blocktype][0];
+  int       bsy       = input->blc_size[blocktype][1];
+
+  int       pic_pix_x = img->opix_x + mb_x;
+  int       pic_pix_y = img->opix_y + mb_y;
+
+  int pic_pix_x_c = pic_pix_x >> (chroma_shift_x - 2);
+  int pic_pix_y_c = pic_pix_y >> (chroma_shift_y - 2);
+  int bsx_c = bsx >> (chroma_shift_x - 2);
+  int bsy_c = bsy >> (chroma_shift_y - 2);
+
+  short*    pred_mv = img->pred_mv[block_y][block_x][list][ref][blocktype];
+  short****** all_mv    = img->all_mv;
+  int list_offset = ((img->MbaffFrameFlag) && (img->mb_data[img->current_mb_nr].mb_field)) ? img->current_mb_nr % 2 ? 4 : 2 : 0;
+  int *prevSad = (input->SearchMode == EPZS)? EPZSDistortion[list + list_offset][blocktype - 1]: NULL;
+
+#if GET_METIME
+  static struct TIMEB tstruct1;
+  static struct TIMEB tstruct2;
+  time_t me_tmp_time;
+
+  ftime( &tstruct1 );    // start time ms
+#endif
+  //==================================
+  //=====   GET ORIGINAL BLOCK   =====
+  //==================================
+  for (j = 0; j < bsy; j++)
+  {
+    memcpy(orig_pic_tmp,&imgY_org[pic_pix_y+j][pic_pix_x], bsx *sizeof(imgpel));
+    orig_pic_tmp += bsx;
+  }
+  ChromaMEEnable = input->ChromaMEEnable;
+
+  if ( ChromaMEEnable )
+  {
+    // copy the original cmp1 and cmp2 data to the orig_pic matrix
+    orig_pic_tmp = orig_pic + 256;
+    for (j = 0; j < bsy_c; j++)
+    {
+      memcpy(orig_pic_tmp, &(imgUV_org[0][pic_pix_y_c+j][pic_pix_x_c]), bsx_c *sizeof(imgpel));
+      orig_pic_tmp += bsx_c;
+    }
+    orig_pic_tmp = orig_pic + 512;
+    for (j = 0; j < bsy_c; j++)
+    {
+      memcpy(orig_pic_tmp, &(imgUV_org[1][pic_pix_y_c+j][pic_pix_x_c]), bsx_c *sizeof(imgpel));
+      orig_pic_tmp += bsx_c;
+    }
+  }
+
+
+  if(input->SearchMode == UM_HEX)
+  {
+    UMHEX_blocktype = blocktype;
+    bipred_flag = 0;
+  }
+  else if (input->SearchMode == UM_HEX_SIMPLE)
+  {
+    smpUMHEX_setup(ref, list, block_y, block_x, blocktype, all_mv );
+  }
+
+  // Set if 8x8 transform will be used if SATD is used
+  test8x8transform = input->Transform8x8Mode && blocktype <= 4;
+
+  //===========================================
+  //=====   GET MOTION VECTOR PREDICTOR   =====
+  //===========================================
+
+  if (input->SearchMode == UM_HEX)
+    UMHEXSetMotionVectorPredictor(pred_mv, enc_picture->ref_idx[list], enc_picture->mv[list], ref, list, block_x, block_y, bsx, bsy, &search_range);
+  else
+    SetMotionVectorPredictor (pred_mv, enc_picture->ref_idx[list], enc_picture->mv[list], ref, list, block_x, block_y, bsx, bsy);
+
+  //==================================
+  //=====   INTEGER-PEL SEARCH   =====
+  //==================================
+
+  if (input->SearchMode == UM_HEX)
+  {
+    mv[0] = pred_mv[0] / 4;
+    mv[1] = pred_mv[1] / 4;
+
+    if (!input->rdopt)
+    {
+      //--- adjust search center so that the (0,0)-vector is inside ---
+      mv[0] = iClip3(-search_range, search_range, mv[0]);
+      mv[1] = iClip3(-search_range, search_range, mv[1]);
+    }
+
+    mv[0] = iClip3(-2047 + search_range, 2047 - search_range, mv[0]);
+    mv[1] = iClip3(LEVELMVLIMIT[img->LevelIndex][0] + search_range, LEVELMVLIMIT[img->LevelIndex][1]  - search_range, mv[1]);
+
+    min_mcost = UMHEXIntegerPelBlockMotionSearch(orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+      pred_mv[0], pred_mv[1], &mv[0], &mv[1], search_range,
+      min_mcost, lambda_factor[F_PEL]);
+  }
+  else if (input->SearchMode == UM_HEX_SIMPLE)
+  {
+    mv[0] = pred_mv[0] / 4;
+    mv[1] = pred_mv[1] / 4;
+
+    if (!input->rdopt)
+    {
+      //--- adjust search center so that the (0,0)-vector is inside ---
+      mv[0] = iClip3(-search_range, search_range, mv[0]);
+      mv[1] = iClip3(-search_range, search_range, mv[1]);
+    }
+
+    mv[0] = iClip3(-2047 + search_range, 2047 - search_range, mv[0]);
+    mv[1] = iClip3(LEVELMVLIMIT[img->LevelIndex][0] + search_range, LEVELMVLIMIT[img->LevelIndex][1]  - search_range, mv[1]);
+
+
+    min_mcost = smpUMHEXIntegerPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+                                                 pred_mv[0], pred_mv[1], &mv[0], &mv[1], search_range,
+                                                 min_mcost, lambda_factor[F_PEL]);
+    for (i=0; i < (bsx>>2); i++)
+    {
+      for (j=0; j < (bsy>>2); j++)
+      {
+        if(list == 0)
+        {
+         smpUMHEX_l0_cost[blocktype][(img->pix_y>>2)+block_y+j][(img->pix_x>>2)+block_x+i] = min_mcost;
+        }
+        else
+        {
+          smpUMHEX_l1_cost[blocktype][(img->pix_y>>2)+block_y+j][(img->pix_x>>2)+block_x+i] = min_mcost;
+        }
+      }
+    }
+  }
+  //--- perform motion search using EPZS schemes---
+  else if (input->SearchMode == EPZS)
+  {
+    //--- set search center ---
+    // This has to be modified in the future
+    if (input->EPZSSubPelGrid)
+    {
+      mv[0] = pred_mv[0];
+      mv[1] = pred_mv[1];
+    }
+    else
+    {
+      mv[0] = (pred_mv[0] + 2)>> 2;
+      mv[1] = (pred_mv[1] + 2)>> 2;
+    }
+
+    if (!input->rdopt)
+    {
+      //--- adjust search center so that the (0,0)-vector is inside ---
+      mv[0] = iClip3 (-search_range<<(input->EPZSSubPelGrid * 2), search_range<<(input->EPZSSubPelGrid * 2), mv[0]);
+      mv[1] = iClip3 (-search_range<<(input->EPZSSubPelGrid * 2), search_range<<(input->EPZSSubPelGrid * 2), mv[1]);
+    }
+
+    // valid search range limits could be precomputed once during the initialization process
+    mv[0] = iClip3((-2047 + search_range)<<(input->EPZSSubPelGrid * 2), (2047 - search_range)<<(input->EPZSSubPelGrid * 2), mv[0]);
+    mv[1] = iClip3((LEVELMVLIMIT[img->LevelIndex][0] + search_range)<<(input->EPZSSubPelGrid * 2),
+      (LEVELMVLIMIT[img->LevelIndex][1]  - search_range)<<(input->EPZSSubPelGrid * 2), mv[1]);
+
+    min_mcost = EPZSPelBlockMotionSearch (orig_pic, ref, list, list_offset,
+      enc_picture->ref_idx, enc_picture->mv, pic_pix_x, pic_pix_y, blocktype,
+      pred_mv, mv, search_range<<(input->EPZSSubPelGrid * 2), min_mcost, lambda_factor[F_PEL]);
+
+  }
+  else if (input->SearchMode == FAST_FULL_SEARCH)
+  {
+    // comments:   - orig_pic is not used  -> be careful
+    //             - search center is automatically determined
+    min_mcost = FastFullPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+      pred_mv[0], pred_mv[1], &mv[0], &mv[1], search_range,
+    min_mcost, lambda_factor[F_PEL]);
+  }
+  else
+  {
+    //--- set search center ---
+    mv[0] = pred_mv[0] / 4;
+    mv[1] = pred_mv[1] / 4;
+    if (!input->rdopt)
+    {
+      //--- adjust search center so that the (0,0)-vector is inside ---
+      mv[0] = iClip3 (-search_range, search_range, mv[0]);
+      mv[1] = iClip3 (-search_range, search_range, mv[1]);
+    }
+
+    mv[0] = iClip3(-2047 + search_range, 2047 - search_range, mv[0]);
+    mv[1] = iClip3(LEVELMVLIMIT[img->LevelIndex][0] + search_range, LEVELMVLIMIT[img->LevelIndex][1]  - search_range, mv[1]);
+
+    //--- perform motion search ---
+    min_mcost = FullPelBlockMotionSearch     (orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+      pred_mv[0], pred_mv[1], &mv[0], &mv[1], search_range,
+      min_mcost, lambda_factor[F_PEL]);
+  }
+  //===== convert search center to quarter-pel units =====
+  if (input->EPZSSubPelGrid == 0 || input->SearchMode != EPZS)
+  {
+    mv[0] <<= 2;
+    mv[1] <<= 2;
+  }
+  //==============================
+  //=====   SUB-PEL SEARCH   =====
+  //==============================
+  ChromaMEEnable = (input->ChromaMEEnable == ME_YUV_FP_SP ) ? 1 : 0; // set it externally
+
+  if (!input->DisableSubpelME)
+  {
+    if (input->SearchMode != EPZS || (ref == 0 || img->structure != FRAME || (ref > 0 && min_mcost < 3.5 * prevSad[pic_pix_x >> 2])))
+    {
+      if ( !start_me_refinement_hp )
+      {
+        min_mcost = max_value;
+      }
+
+      if (input->SearchMode == UM_HEX)
+      {
+        if(blocktype >3)
+        {
+          min_mcost =  UMHEXSubPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+                       pred_mv[0], pred_mv[1], &mv[0], &mv[1], 9, 9, min_mcost, lambda_factor[Q_PEL]);
+        }
+        else
+        {
+          min_mcost =  SubPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+                       pred_mv[0], pred_mv[1], &mv[0], &mv[1], 9, 9, min_mcost, lambda_factor);
+        }
+      }
+      else if (input->SearchMode == UM_HEX_SIMPLE)
+      {
+        if(blocktype > 1)
+        {
+          min_mcost =  smpUMHEXSubPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y,
+                       blocktype, pred_mv[0], pred_mv[1], &mv[0], &mv[1], 9, 9, min_mcost, lambda_factor[Q_PEL]);
+        }
+        else
+        {
+          min_mcost =  smpUMHEXFullSubPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y,
+                       blocktype, pred_mv[0], pred_mv[1], &mv[0], &mv[1], 9, 9, min_mcost, lambda_factor[Q_PEL]);
+        }
+      }
+      else if (input->SearchMode == EPZS && input->EPZSSubPelME)
+      {
+          min_mcost =  EPZSSubPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+                       pred_mv, mv, 9, 9, min_mcost, lambda_factor);
+      }
+      else
+      {
+          min_mcost =  SubPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+          pred_mv[0], pred_mv[1], &mv[0], &mv[1], 9, 9, min_mcost, lambda_factor);
+      }
+    }
+  }
+
+  if (!input->rdopt)
+  {
+    // Get the skip mode cost
+    if (blocktype == 1 && (img->type == P_SLICE||img->type == SP_SLICE))
+    {
+      int cost;
+
+      FindSkipModeMotionVector ();
+
+      cost  = GetSkipCostMB ();
+      cost -= ((lambda_factor[Q_PEL] + 4096) >> 13);
+
+      if (cost < min_mcost)
+      {
+        min_mcost = cost;
+        mv[0]      = img->all_mv [0][0][0][0][0][0];
+        mv[1]      = img->all_mv [0][0][0][0][0][1];
+      }
+    }
+  }
+
+  //===============================================
+  //=====   SET MV'S AND RETURN MOTION COST   =====
+  //===============================================
+
+  for (j=block_y; j < block_y + (bsy>>2); j++)
+  {
+    for (i=block_x; i < block_x + (bsx>>2); i++)
+    {
+      all_mv[j][i][list][ref][blocktype][0] = mv[0];
+      all_mv[j][i][list][ref][blocktype][1] = mv[1];
+    }
+  }
+
+  if (img->type==B_SLICE && input->BiPredMotionEstimation!=0 && (blocktype == 1) && (ref==0))
+  {
+    short   ******bipred_mv = list ? img->bipred_mv1 : img->bipred_mv2;
+    int     min_mcostbi = max_value;
+    short   bimv[2] = {0, 0}, tempmv[2] = {0, 0};
+    short   *pred_mv1 = NULL;
+    short   *pred_mv2 = NULL;
+    short   iterlist=list;
+    short   pred_mv_bi[2];
+
+    if (input->SearchMode == UM_HEX)
+    {
+      bipred_flag = 1;
+      UMHEXSetMotionVectorPredictor(pred_mv_bi, enc_picture->ref_idx[list ^ 1], enc_picture->mv[(list == LIST_0? LIST_1: LIST_0)], 0, (list == LIST_0? LIST_1: LIST_0), block_x, block_y, bsx, bsy, &search_range);
+    }
+    else
+      SetMotionVectorPredictor     (pred_mv_bi, enc_picture->ref_idx[list ^ 1], enc_picture->mv[(list == LIST_0? LIST_1: LIST_0)], 0, (list == LIST_0? LIST_1: LIST_0), block_x, block_y, bsx, bsy);
+
+    if ((input->SearchMode != EPZS) || (input->EPZSSubPelGrid == 0))
+    {
+      mv[0]=(mv[0] + 2)>>2;
+      mv[1]=(mv[1] + 2)>>2;
+    }
+
+    //Bi-predictive motion Refinements
+    for (i=0;i<=input->BiPredMERefinements;i++)
+    {
+      if (i%2)
+      {
+        pred_mv2=pred_mv;
+        pred_mv1=pred_mv_bi;
+        tempmv[0]=bimv[0];
+        tempmv[1]=bimv[1];
+        bimv[0]=mv[0];
+        bimv[1]=mv[1];
+        iterlist= list ^ 1;
+      }
+      else
+      {
+        pred_mv1=pred_mv;
+        pred_mv2=pred_mv_bi;
+
+        if (i!=0)
+        {
+          tempmv[0]=bimv[0];
+          tempmv[1]=bimv[1];
+          bimv[0]=mv[0];
+          bimv[1]=mv[1];
+        }
+        else
+        {
+          tempmv[0]=mv[0];
+          tempmv[1]=mv[1];
+          if ((input->SearchMode != EPZS) || (input->EPZSSubPelGrid == 0))
+          {
+            bimv[0] = (pred_mv2[0] + 2)>>2;
+            bimv[1] = (pred_mv2[1] + 2)>>2;
+          }
+          else
+          {
+            bimv[0] = pred_mv2[0];
+            bimv[1] = pred_mv2[1];
+          }
+        }
+
+        iterlist=list;
+      }
+      mv[0]=bimv[0];
+      mv[1]=bimv[1];
+
+      if (input->SearchMode == EPZS)
+      {
+        min_mcostbi = EPZSBiPredBlockMotionSearch (orig_pic, ref, iterlist,
+          list_offset, enc_picture->ref_idx, enc_picture->mv,
+          pic_pix_x, pic_pix_y, blocktype,
+          pred_mv1, pred_mv2, bimv, tempmv,
+          (input->BiPredMESearchRange<<(input->EPZSSubPelGrid * 2))>>i, min_mcostbi, lambda_factor[F_PEL]);
+      }
+      else if(input->SearchMode == UM_HEX)
+      {
+        min_mcostbi = UMHEXBipredIntegerPelBlockMotionSearch (orig_pic, ref, iterlist,
+          pic_pix_x, pic_pix_y, blocktype,
+          pred_mv1[0], pred_mv1[1], pred_mv2[0], pred_mv2[1],
+          &bimv[0], &bimv[1], &tempmv[0], &tempmv[1],
+          input->BiPredMESearchRange>>i, min_mcostbi, lambda_factor[F_PEL]);
+      }
+      else if(input->SearchMode == UM_HEX_SIMPLE)
+      {
+        min_mcostbi = smpUMHEXBipredIntegerPelBlockMotionSearch (orig_pic, ref, iterlist,
+          pic_pix_x, pic_pix_y, blocktype,
+          pred_mv[0], pred_mv[1], pred_mv[0], pred_mv[1],
+          &bimv[0], &bimv[1], &tempmv[0], &tempmv[1],
+          input->BiPredMESearchRange>>i, min_mcostbi, lambda_factor[F_PEL]);
+      }
+      else
+      {
+        min_mcostbi = FullPelBlockMotionBiPred (orig_pic, ref, iterlist,
+          pic_pix_x, pic_pix_y, blocktype,
+          pred_mv1[0], pred_mv1[1], pred_mv2[0], pred_mv2[1],
+          &bimv[0], &bimv[1], &tempmv[0], &tempmv[1],
+          input->BiPredMESearchRange>>i, min_mcostbi, lambda_factor[F_PEL]);
+      }
+      if ((mv[0] == bimv[0]) && (mv[1] == bimv[1]))
+      {
+        //mv[0]=tempmv[0];
+        //mv[1]=tempmv[1];
+        //break;
+      }
+
+      mv[0]=tempmv[0];
+      mv[1]=tempmv[1];
+    }
+    if ((input->SearchMode != EPZS) || (input->EPZSSubPelGrid == 0))
+    {
+      mv[0]=tempmv[0] << 2;
+      mv[1]=tempmv[1] << 2;
+      bimv[0] = bimv[0] << 2;
+      bimv[1] = bimv[1] << 2;
+    }
+
+    if (input->BiPredMESubPel && !input->DisableSubpelME)
+    {
+      if ( !start_me_refinement_hp )
+      {
+        min_mcostbi = max_value;
+      }
+
+      if (input->SearchMode == EPZS && input->EPZSSubPelMEBiPred)
+      {
+        min_mcostbi =  EPZSSubPelBlockSearchBiPred (orig_pic, ref, iterlist, pic_pix_x, pic_pix_y, blocktype,
+          pred_mv2, pred_mv1, bimv, mv, 9, 9, min_mcostbi, lambda_factor);
+      }
+      else
+      {
+        min_mcostbi =  SubPelBlockSearchBiPred (orig_pic, ref, iterlist, pic_pix_x, pic_pix_y, blocktype,
+          pred_mv2[0], pred_mv2[1], &bimv[0], &bimv[1], &mv[0], &mv[1], 9, 9,
+          min_mcostbi, lambda_factor);
+      }
+    }
+
+    if (input->BiPredMESubPel==2 && !input->DisableSubpelME)
+    {
+      if ( !start_me_refinement_hp || !start_me_refinement_qp)
+      {
+        min_mcostbi = max_value;
+      }
+
+      if (input->SearchMode == EPZS && input->EPZSSubPelMEBiPred)
+      {
+        min_mcostbi =  EPZSSubPelBlockSearchBiPred (orig_pic, ref, iterlist ^ 1, pic_pix_x, pic_pix_y, blocktype,
+          pred_mv1, pred_mv2, mv, bimv, 9, 9, min_mcostbi, lambda_factor);
+      }
+      else
+      {
+        min_mcostbi =  SubPelBlockSearchBiPred (orig_pic, ref, iterlist ^ 1, pic_pix_x, pic_pix_y, blocktype,
+          pred_mv1[0], pred_mv1[1], &mv[0], &mv[1], &bimv[0], &bimv[1], 9, 9,
+          min_mcostbi, lambda_factor);
+      }
+    }
+
+    for (j=block_y; j < block_y + (bsy>>2); j++)
+    {
+      for (i=block_x ; i < block_x + (bsx>>2); i++)
+      {
+        bipred_mv[j][i][iterlist    ][0][blocktype][0] = mv[0];
+        bipred_mv[j][i][iterlist    ][0][blocktype][1] = mv[1];
+        bipred_mv[j][i][iterlist ^ 1][0][blocktype][0] = bimv[0];
+        bipred_mv[j][i][iterlist ^ 1][0][blocktype][1] = bimv[1];
+      }
+    }
+  }
+
+#if GET_METIME
+  ftime(&tstruct2);   // end time ms
+  me_tmp_time=(tstruct2.time*1000+tstruct2.millitm) - (tstruct1.time*1000+tstruct1.millitm);
+  me_tot_time += me_tmp_time;
+  me_time += me_tmp_time;
+#endif
+  return min_mcost;
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Motion Cost for Bidirectional modes
+ ***********************************************************************
+ */
+int BIDPartitionCost (int   blocktype,
+                      int   block8x8,
+                      short ref_l0,
+                      short ref_l1,
+                      int   lambda_factor)
+{
+  static int  bx0[5][4] = {{0,0,0,0}, {0,0,0,0}, {0,0,0,0}, {0,2,0,0}, {0,2,0,2}};
+  static int  by0[5][4] = {{0,0,0,0}, {0,0,0,0}, {0,2,0,0}, {0,0,0,0}, {0,0,2,2}};
+  
+  int   curr_blk[MB_BLOCK_SIZE][MB_BLOCK_SIZE]; // ABT pred.error buffer
+  int   bsx       = imin(input->blc_size[blocktype][0],8);
+  int   bsy       = imin(input->blc_size[blocktype][1],8);
+
+  int   pic_pix_x, pic_pix_y, block_x, block_y;
+  int   v, h, mcost, i, j, k;
+  int   mvd_bits  = 0;
+  int   parttype  = (blocktype<4?blocktype:4);
+  int   step_h0   = (input->part_size[ parttype][0]);
+  int   step_v0   = (input->part_size[ parttype][1]);
+  int   step_h    = (input->part_size[blocktype][0]);
+  int   step_v    = (input->part_size[blocktype][1]);
+  int   bxx, byy;                               // indexing curr_blk
+  int   bx = bx0[parttype][block8x8];
+  int   by = by0[parttype][block8x8];
+  short   ******all_mv = img->all_mv;
+  short   ******  p_mv = img->pred_mv;
+
+  //----- cost for motion vector bits -----
+  for (v=by; v<by + step_v0; v+=step_v)
+  {
+    for (h=bx; h<bx + step_h0; h+=step_h)
+    {
+      mvd_bits += mvbits[ all_mv [v][h][LIST_0][ref_l0][blocktype][0] - p_mv[v][h][LIST_0][ref_l0][blocktype][0] ];
+      mvd_bits += mvbits[ all_mv [v][h][LIST_0][ref_l0][blocktype][1] - p_mv[v][h][LIST_0][ref_l0][blocktype][1] ];
+
+      mvd_bits += mvbits[ all_mv [v][h][LIST_1][ref_l1][blocktype][0] - p_mv[v][h][LIST_1][ref_l1][blocktype][0] ];
+      mvd_bits += mvbits[ all_mv [v][h][LIST_1][ref_l1][blocktype][1] - p_mv[v][h][LIST_1][ref_l1][blocktype][1] ];
+    }
+  }
+
+  mcost = WEIGHTED_COST (lambda_factor, mvd_bits);
+
+  //----- cost of residual signal -----
+  for (byy=0, v=by; v<by + step_v0; byy+=4, v++)
+  {
+    pic_pix_y = img->opix_y + (block_y = (v<<2));
+    for (bxx=0, h=bx; h<bx + step_h0; bxx+=4, h++)
+    {
+      pic_pix_x = img->opix_x + (block_x = (h<<2));
+      LumaPrediction4x4 (block_x, block_y, 2, blocktype, blocktype, ref_l0, ref_l1);
+
+      for (k=j=0; j<4; j++)
+      {
+        for (  i=0; i<4; i++)
+          diff64[k++] = curr_blk[byy+j][bxx+i] =
+          imgY_org[pic_pix_y+j][pic_pix_x+i] - img->mpr[j+block_y][i+block_x];
+      }
+      if ((!input->Transform8x8Mode) || (blocktype>4))
+        mcost += distortion4x4 (diff64);
+    }
+  }
+  if (input->Transform8x8Mode && (blocktype<=4))  // tchen 4-29-04
+  {
+    for (byy=0; byy < input->blc_size[parttype][1]; byy+=bsy)
+      for (bxx=0; bxx<input->blc_size[parttype][0]; bxx+=bsx)
+      {
+        for (k=0, j=byy;j<byy + 8;j++, k += 8)
+          memcpy(&diff64[k], &(curr_blk[j][bxx]), 8 * sizeof(int));
+
+        mcost += distortion8x8(diff64);
+      }
+  }
+  return mcost;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Get cost for skip mode for an macroblock
+ ************************************************************************
+ */
+int GetSkipCostMB (void)
+{
+  int block_y, block_x, pic_pix_y, pic_pix_x, i, j, k;
+  int cost = 0;
+
+  int curr_diff[8][8];
+  int mb_x, mb_y;
+  int block;
+  for(block=0;block<4;block++)
+  {
+    mb_y    = (block/2)<<3;
+    mb_x    = (block%2)<<3;
+    for (block_y=mb_y; block_y<mb_y+8; block_y+=4)
+    {
+      pic_pix_y = img->opix_y + block_y;
+      for (block_x=mb_x; block_x<mb_x+8; block_x+=4)
+      {
+        pic_pix_x = img->opix_x + block_x;
+
+        //===== prediction of 4x4 block =====
+        LumaPrediction4x4 (block_x, block_y, 0, 0, 0, 0, 0);
+
+        //===== get displaced frame difference ======
+        for (k=j=0; j<4; j++)
+          for (i=0; i<4; i++, k++)
+          {
+            diff[k] = curr_diff[block_y-mb_y+j][block_x-mb_x+i] = imgY_org[pic_pix_y+j][pic_pix_x+i] - img->mpr[j+block_y][i+block_x];
+          }
+
+          if(!((input->rdopt==0)&&(input->Transform8x8Mode)))
+            cost += distortion4x4 (diff);
+      }
+    }
+
+    if((input->rdopt==0)&&(input->Transform8x8Mode))
+    {
+      for(k=j=0; j<8; j++, k+=8)
+        memcpy(&diff64[k], &(curr_diff[j]), 8 * sizeof(int));
+      cost += distortion8x8 (diff64);
+    }
+  }
+
+  return cost;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Find motion vector for the Skip mode
+ ************************************************************************
+ */
+void FindSkipModeMotionVector ()
+{
+  int   bx, by;
+  short ******all_mv = img->all_mv;
+
+  short pmv[2];
+
+  int zeroMotionAbove;
+  int zeroMotionLeft;
+  PixelPos mb_a, mb_b;
+  int      a_mv_y = 0;
+  int      a_ref_idx = 0;
+  int      b_mv_y = 0;
+  int      b_ref_idx = 0;
+  short    ***mv = enc_picture->mv[LIST_0];
+
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+
+  getLuma4x4Neighbour(img->current_mb_nr,-1, 0, &mb_a);
+  getLuma4x4Neighbour(img->current_mb_nr, 0,-1, &mb_b);
+
+  if (mb_a.available)
+  {
+    a_mv_y    = mv[mb_a.pos_y][mb_a.pos_x][1];
+    a_ref_idx = enc_picture->ref_idx[LIST_0][mb_a.pos_y][mb_a.pos_x];
+
+    if (currMB->mb_field && !img->mb_data[mb_a.mb_addr].mb_field)
+    {
+      a_mv_y    /=2;
+      a_ref_idx *=2;
+    }
+    if (!currMB->mb_field && img->mb_data[mb_a.mb_addr].mb_field)
+    {
+      a_mv_y    *=2;
+      a_ref_idx >>=1;
+    }
+  }
+
+  if (mb_b.available)
+  {
+    b_mv_y    = mv[mb_b.pos_y][mb_b.pos_x][1];
+    b_ref_idx = enc_picture->ref_idx[LIST_0][mb_b.pos_y][mb_b.pos_x];
+
+    if (currMB->mb_field && !img->mb_data[mb_b.mb_addr].mb_field)
+    {
+      b_mv_y    /=2;
+      b_ref_idx *=2;
+    }
+    if (!currMB->mb_field && img->mb_data[mb_b.mb_addr].mb_field)
+    {
+      b_mv_y    *=2;
+      b_ref_idx >>=1;
+    }
+  }
+
+  zeroMotionLeft  = !mb_a.available ? 1 : a_ref_idx==0 && mv[mb_a.pos_y][mb_a.pos_x][0]==0 && a_mv_y==0 ? 1 : 0;
+  zeroMotionAbove = !mb_b.available ? 1 : b_ref_idx==0 && mv[mb_b.pos_y][mb_b.pos_x][0]==0 && b_mv_y==0 ? 1 : 0;
+
+  if (zeroMotionAbove || zeroMotionLeft)
+  {
+    for (by = 0;by < 4;by++)
+      for (bx = 0;bx < 4;bx++)
+      {
+        memset(all_mv [by][bx][0][0][0], 0, 2* sizeof(short));
+        //all_mv [by][bx][0][0][0][0] = 0;
+        //all_mv [by][bx][0][0][0][1] = 0;
+      }
+  }
+  else
+  {
+    SetMotionVectorPredictor (pmv, enc_picture->ref_idx[LIST_0], mv, 0, LIST_0, 0, 0, 16, 16);
+    for (by = 0;by < 4;by++)
+      for (bx = 0;bx < 4;bx++)
+      {
+        memcpy(all_mv [by][bx][0][0][0], pmv, 2* sizeof(short));
+      }
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Get cost for direct mode for an 8x8 block
+ ************************************************************************
+ */
+int GetDirectCost8x8 (int block, int *cost8x8)
+{
+  int block_y, block_x, pic_pix_y, pic_pix_x, i, j, k;
+  int curr_diff[8][8];
+  int cost  = 0;
+  int mb_y  = (block/2)<<3;
+  int mb_x  = (block%2)<<3;
+
+  for (block_y=mb_y; block_y<mb_y+8; block_y+=4)
+  {
+    pic_pix_y = img->opix_y + block_y;
+
+    for (block_x=mb_x; block_x<mb_x+8; block_x+=4)
+    {
+      pic_pix_x = img->opix_x + block_x;
+
+      if (direct_pdir[pic_pix_y>>2][pic_pix_x>>2]<0)
+      {
+        *cost8x8=INT_MAX;
+        return INT_MAX; //mode not allowed
+      }
+
+      //===== prediction of 4x4 block =====
+
+      LumaPrediction4x4 (block_x, block_y, direct_pdir[pic_pix_y>>2][pic_pix_x>>2], 0, 0,
+        direct_ref_idx[LIST_0][pic_pix_y>>2][pic_pix_x>>2],
+        direct_ref_idx[LIST_1][pic_pix_y>>2][pic_pix_x>>2]);
+
+      //===== get displaced frame difference ======
+      for (k=j=0; j<4; j++)
+        for (i=0; i<4; i++, k++)
+        {
+          diff[k] = curr_diff[block_y-mb_y+j][block_x-mb_x+i] =
+            imgY_org[pic_pix_y+j][pic_pix_x+i] - img->mpr[j+block_y][i+block_x];
+        }
+
+        cost += distortion4x4 (diff);
+    }
+  }
+
+  if((input->rdopt==0)&&(input->Transform8x8Mode))
+  {
+    k=0;
+    for(j=0; j<8; j++, k+=8)
+      memcpy(&diff64[k], &(curr_diff[j]), 8 * sizeof(int));          
+
+    *cost8x8 += distortion8x8 (diff64);
+  }
+
+  return cost;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Get cost for direct mode for an macroblock
+ ************************************************************************
+ */
+int GetDirectCostMB (void)
+{
+  int i;
+  int cost = 0;
+  int cost8x8 = 0;
+
+  for (i=0; i<4; i++)
+  {
+    cost += GetDirectCost8x8 (i, &cost8x8);
+    if (cost8x8 == INT_MAX) return INT_MAX;
+  }
+
+  switch(input->Transform8x8Mode)
+  {
+  case 1: // Mixture of 8x8 & 4x4 transform
+    if((cost8x8 < cost)||
+      !(input->InterSearch8x4 &&
+      input->InterSearch4x8 &&
+      input->InterSearch4x4)
+      )
+    {
+      cost = cost8x8; //return 8x8 cost
+    }
+    break;
+  case 2: // 8x8 Transform only
+    cost = cost8x8;
+    break;
+  default: // 4x4 Transform only
+    break;
+  }
+
+  return cost;
+  // T.Nishi(MEI ACC) 04-28-2004 end
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Motion search for a partition
+ ************************************************************************
+ */
+void
+PartitionMotionSearch (int    blocktype,
+                       int    block8x8,
+                       int    *lambda_factor)
+{
+  static int  bx0[5][4] = {{0,0,0,0}, {0,0,0,0}, {0,0,0,0}, {0,2,0,0}, {0,2,0,2}};
+  static int  by0[5][4] = {{0,0,0,0}, {0,0,0,0}, {0,2,0,0}, {0,0,0,0}, {0,0,2,2}};
+
+  char  **ref_array;
+  short ***mv_array;
+  short *all_mv;
+  short ref;
+  int   v, h, mcost, search_range, i, j;
+  int   pic_block_x, pic_block_y;
+  int   bslice    = (img->type==B_SLICE);
+  int   parttype  = (blocktype<4?blocktype:4);
+  int   step_h0   = (input->part_size[ parttype][0]);
+  int   step_v0   = (input->part_size[ parttype][1]);
+  int   step_h    = (input->part_size[blocktype][0]);
+  int   step_v    = (input->part_size[blocktype][1]);
+  int   list;
+  int   numlists  = bslice ? 2 : 1;
+  int   list_offset = img->mb_data[img->current_mb_nr].list_offset;
+  int   *m_cost;
+  int   by = by0[parttype][block8x8];
+  int   bx = bx0[parttype][block8x8];
+
+  //===== LOOP OVER REFERENCE FRAMES =====
+  for (list=0; list<numlists;list++)
+  {
+    for (ref=0; ref < listXsize[list+list_offset]; ref++)
+    {
+       m_cost = &motion_cost[blocktype][list][ref][block8x8];
+      //----- set search range ---
+#ifdef _FULL_SEARCH_RANGE_
+      if      (input->full_search == 2)
+        search_range = input->search_range;
+      else if (input->full_search == 1)
+        search_range = input->search_range /  (imin(ref,1)+1);
+      else
+        search_range = input->search_range / ((imin(ref,1)+1) * imin(2,blocktype));
+#else
+      search_range = input->search_range / ((imin(ref,1)+1) * imin(2,blocktype));
+#endif
+
+      //----- set arrays -----
+      ref_array = enc_picture->ref_idx[list];
+      mv_array  = enc_picture->mv[list];
+
+      //----- init motion cost -----
+      //motion_cost[blocktype][list][ref][block8x8] = 0;
+      *m_cost = 0;
+
+      //===== LOOP OVER SUB MACRO BLOCK partitions
+      for (v=by; v<by + step_v0; v += step_v)
+      {
+        pic_block_y = img->block_y + v;
+
+        for (h=bx; h<bx+step_h0; h+=step_h)
+        {
+          all_mv = img->all_mv[v][h][list][ref][blocktype];
+          pic_block_x = img->block_x + h;
+
+          //--- motion search for block ---
+          mcost = BlockMotionSearch     (ref, list, h<<2, v<<2, blocktype, search_range, lambda_factor);
+          *m_cost += mcost;
+
+          //--- set motion vectors and reference frame (for motion vector prediction) ---
+          for (j=pic_block_y; j<pic_block_y + step_v; j++)
+          {
+            memset(&ref_array [j][pic_block_x], ref, step_h * sizeof(char));
+
+            for (i=pic_block_x; i<pic_block_x + step_h; i++)
+            {
+              memcpy(mv_array  [j][i], all_mv, 2* sizeof(short));
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Calculate Direct Motion Vectors  *****
+ ************************************************************************
+ */
+void Get_Direct_Motion_Vectors ()
+{
+
+  int   block_x, block_y, pic_block_x, pic_block_y, opic_block_x, opic_block_y;
+  short ****all_mvs;
+  int   mv_scale;
+  int refList;
+  int ref_idx;
+
+  byte  **   moving_block;
+  short ****   co_located_mv;
+  char  ***    co_located_ref_idx;
+  int64 ***    co_located_ref_id;
+  char  **     ref_pic_l0 = enc_picture->ref_idx[LIST_0];
+  char  **     ref_pic_l1 = enc_picture->ref_idx[LIST_1];
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+
+  if (currMB->list_offset)
+  {
+    if(img->current_mb_nr%2)
+    {
+      moving_block = Co_located->bottom_moving_block;
+      co_located_mv = Co_located->bottom_mv;
+      co_located_ref_idx = Co_located->bottom_ref_idx;
+      co_located_ref_id = Co_located->bottom_ref_pic_id;
+    }
+    else
+    {
+      moving_block = Co_located->top_moving_block;
+      co_located_mv = Co_located->top_mv;
+      co_located_ref_idx = Co_located->top_ref_idx;
+      co_located_ref_id = Co_located->top_ref_pic_id;
+    }
+  }
+  else
+  {
+    moving_block = Co_located->moving_block;
+    co_located_mv = Co_located->mv;
+    co_located_ref_idx = Co_located->ref_idx;
+    co_located_ref_id = Co_located->ref_pic_id;
+  }
+
+  if (img->direct_spatial_mv_pred_flag)  //spatial direct mode copy from decoder
+  {
+
+    short l0_refA, l0_refB, l0_refD, l0_refC;
+    short l1_refA, l1_refB, l1_refD, l1_refC;
+    short l0_refX,l1_refX;
+    short pmvfw[2]={0,0},pmvbw[2]={0,0};
+
+    PixelPos mb_a, mb_b, mb_d, mb_c;
+
+    getLuma4x4Neighbour(img->current_mb_nr, -1,  0,&mb_a);
+    getLuma4x4Neighbour(img->current_mb_nr,  0, -1,&mb_b);
+    getLuma4x4Neighbour(img->current_mb_nr, 16, -1,&mb_c);
+    getLuma4x4Neighbour(img->current_mb_nr, -1, -1,&mb_d);
+
+    if (!img->MbaffFrameFlag)
+    {
+      l0_refA = mb_a.available ? ref_pic_l0[mb_a.pos_y][mb_a.pos_x] : -1;
+      l0_refB = mb_b.available ? ref_pic_l0[mb_b.pos_y][mb_b.pos_x] : -1;
+      l0_refD = mb_d.available ? ref_pic_l0[mb_d.pos_y][mb_d.pos_x] : -1;
+      l0_refC = mb_c.available ? ref_pic_l0[mb_c.pos_y][mb_c.pos_x] : l0_refD;
+
+      l1_refA = mb_a.available ? ref_pic_l1[mb_a.pos_y][mb_a.pos_x] : -1;
+      l1_refB = mb_b.available ? ref_pic_l1[mb_b.pos_y][mb_b.pos_x] : -1;
+      l1_refD = mb_d.available ? ref_pic_l1[mb_d.pos_y][mb_d.pos_x] : -1;
+      l1_refC = mb_c.available ? ref_pic_l1[mb_c.pos_y][mb_c.pos_x] : l1_refD;
+    }
+    else
+    {
+      if (currMB->mb_field)
+      {
+        l0_refA = mb_a.available
+          ? (img->mb_data[mb_a.mb_addr].mb_field  || ref_pic_l0[mb_a.pos_y][mb_a.pos_x] < 0
+          ?  ref_pic_l0[mb_a.pos_y][mb_a.pos_x]
+          :  ref_pic_l0[mb_a.pos_y][mb_a.pos_x] * 2) : -1;
+
+        l0_refB = mb_b.available
+          ? (img->mb_data[mb_b.mb_addr].mb_field || ref_pic_l0[mb_b.pos_y][mb_b.pos_x] < 0
+          ?  ref_pic_l0[mb_b.pos_y][mb_b.pos_x]
+          :  ref_pic_l0[mb_b.pos_y][mb_b.pos_x] * 2) : -1;
+
+        l0_refD = mb_d.available
+          ? (img->mb_data[mb_d.mb_addr].mb_field || ref_pic_l0[mb_d.pos_y][mb_d.pos_x] < 0
+          ?  ref_pic_l0[mb_d.pos_y][mb_d.pos_x]
+          :  ref_pic_l0[mb_d.pos_y][mb_d.pos_x] * 2) : -1;
+
+        l0_refC = mb_c.available
+          ? (img->mb_data[mb_c.mb_addr].mb_field || ref_pic_l0[mb_c.pos_y][mb_c.pos_x] < 0
+          ?  ref_pic_l0[mb_c.pos_y][mb_c.pos_x]
+          :  ref_pic_l0[mb_c.pos_y][mb_c.pos_x] * 2) : l0_refD;
+
+        l1_refA = mb_a.available
+          ? (img->mb_data[mb_a.mb_addr].mb_field || ref_pic_l1[mb_a.pos_y][mb_a.pos_x] < 0
+          ?  ref_pic_l1[mb_a.pos_y][mb_a.pos_x]
+          :  ref_pic_l1[mb_a.pos_y][mb_a.pos_x] * 2) : -1;
+
+        l1_refB = mb_b.available
+          ? (img->mb_data[mb_b.mb_addr].mb_field || ref_pic_l1[mb_b.pos_y][mb_b.pos_x] < 0
+          ?  ref_pic_l1[mb_b.pos_y][mb_b.pos_x]
+          :  ref_pic_l1[mb_b.pos_y][mb_b.pos_x] * 2) : -1;
+
+        l1_refD = mb_d.available
+          ? (img->mb_data[mb_d.mb_addr].mb_field || ref_pic_l1[mb_d.pos_y][mb_d.pos_x] < 0
+          ?  ref_pic_l1[mb_d.pos_y][mb_d.pos_x]
+          :  ref_pic_l1[mb_d.pos_y][mb_d.pos_x] * 2) : -1;
+
+        l1_refC = mb_c.available
+          ? (img->mb_data[mb_c.mb_addr].mb_field || ref_pic_l1[mb_c.pos_y][mb_c.pos_x] < 0
+          ?  ref_pic_l1[mb_c.pos_y][mb_c.pos_x]
+          :  ref_pic_l1[mb_c.pos_y][mb_c.pos_x] * 2) : l1_refD;
+      }
+      else
+      {
+        l0_refA = mb_a.available
+          ? (img->mb_data[mb_a.mb_addr].mb_field || ref_pic_l0[mb_a.pos_y][mb_a.pos_x]  < 0
+          ?  ref_pic_l0[mb_a.pos_y][mb_a.pos_x] >> 1
+          :  ref_pic_l0[mb_a.pos_y][mb_a.pos_x]) : -1;
+
+        l0_refB = mb_b.available
+          ? (img->mb_data[mb_b.mb_addr].mb_field || ref_pic_l0[mb_b.pos_y][mb_b.pos_x] < 0
+          ?  ref_pic_l0[mb_b.pos_y][mb_b.pos_x] >> 1
+          :  ref_pic_l0[mb_b.pos_y][mb_b.pos_x]) : -1;
+
+        l0_refD = mb_d.available
+          ? (img->mb_data[mb_d.mb_addr].mb_field || ref_pic_l0[mb_d.pos_y][mb_d.pos_x] < 0
+          ?  ref_pic_l0[mb_d.pos_y][mb_d.pos_x] >> 1
+          :  ref_pic_l0[mb_d.pos_y][mb_d.pos_x]) : -1;
+
+        l0_refC = mb_c.available
+          ? (img->mb_data[mb_c.mb_addr].mb_field || ref_pic_l0[mb_c.pos_y][mb_c.pos_x] < 0
+          ?  ref_pic_l0[mb_c.pos_y][mb_c.pos_x] >> 1
+          :  ref_pic_l0[mb_c.pos_y][mb_c.pos_x]) : l0_refD;
+
+        l1_refA = mb_a.available
+          ? (img->mb_data[mb_a.mb_addr].mb_field || ref_pic_l1[mb_a.pos_y][mb_a.pos_x] < 0
+          ?  ref_pic_l1[mb_a.pos_y][mb_a.pos_x] >> 1
+          :  ref_pic_l1[mb_a.pos_y][mb_a.pos_x]) : -1;
+
+        l1_refB = mb_b.available
+          ? (img->mb_data[mb_b.mb_addr].mb_field || ref_pic_l1[mb_b.pos_y][mb_b.pos_x] < 0
+          ?  ref_pic_l1[mb_b.pos_y][mb_b.pos_x] >> 1
+          :  ref_pic_l1[mb_b.pos_y][mb_b.pos_x]) : -1;
+
+        l1_refD = mb_d.available
+          ? (img->mb_data[mb_d.mb_addr].mb_field || ref_pic_l1[mb_d.pos_y][mb_d.pos_x] < 0
+          ?  ref_pic_l1[mb_d.pos_y][mb_d.pos_x] >> 1
+          :  ref_pic_l1[mb_d.pos_y][mb_d.pos_x]) : -1;
+
+        l1_refC = mb_c.available
+          ? (img->mb_data[mb_c.mb_addr].mb_field || ref_pic_l1[mb_c.pos_y][mb_c.pos_x] < 0
+          ?  ref_pic_l1[mb_c.pos_y][mb_c.pos_x] >> 1
+          :  ref_pic_l1[mb_c.pos_y][mb_c.pos_x]) : l1_refD;
+      }
+    }
+
+    l0_refX = (l0_refA >= 0 && l0_refB >= 0) ? imin(l0_refA,l0_refB): imax(l0_refA,l0_refB);
+    l0_refX = (l0_refX >= 0 && l0_refC >= 0) ? imin(l0_refX,l0_refC): imax(l0_refX,l0_refC);
+
+    l1_refX = (l1_refA >= 0 && l1_refB >= 0) ? imin(l1_refA,l1_refB): imax(l1_refA,l1_refB);
+    l1_refX = (l1_refX >= 0 && l1_refC >= 0) ? imin(l1_refX,l1_refC): imax(l1_refX,l1_refC);
+
+    if (l0_refX >=0)
+      SetMotionVectorPredictor (pmvfw, enc_picture->ref_idx[LIST_0], enc_picture->mv[LIST_0], l0_refX, LIST_0, 0, 0, 16, 16);
+
+    if (l1_refX >=0)
+      SetMotionVectorPredictor (pmvbw, enc_picture->ref_idx[LIST_1], enc_picture->mv[LIST_1], l1_refX, LIST_1, 0, 0, 16, 16);
+
+    for (block_y=0; block_y<4; block_y++)
+    {
+      pic_block_y  = (img->pix_y  >> 2) + block_y;
+      opic_block_y = (img->opix_y >> 2) + block_y;
+
+      for (block_x=0; block_x<4; block_x++)
+      {
+        pic_block_x  = (img->pix_x  >> 2) + block_x;
+        opic_block_x = (img->opix_x >> 2) + block_x;
+
+        all_mvs = img->all_mv[block_y][block_x];
+
+        if (l0_refX >=0)
+        {
+          if (!l0_refX  && !moving_block[opic_block_y][opic_block_x])
+          {
+
+            memset(all_mvs[LIST_0][0][0], 0, 2* sizeof(short));
+            direct_ref_idx[LIST_0][pic_block_y][pic_block_x]=0;
+          }
+          else
+          {
+            all_mvs[LIST_0][l0_refX][0][0] = pmvfw[0];
+            all_mvs[LIST_0][l0_refX][0][1] = pmvfw[1];
+            direct_ref_idx[LIST_0][pic_block_y][pic_block_x]= (char)l0_refX;
+          }
+        }
+        else
+        {
+          all_mvs[LIST_0][0][0][0] = 0;
+          all_mvs[LIST_0][0][0][1] = 0;
+          direct_ref_idx[LIST_0][pic_block_y][pic_block_x]=-1;
+        }
+
+        if (l1_refX >=0)
+        {
+          if(l1_refX==0 && !moving_block[opic_block_y][opic_block_x])
+          {
+            all_mvs[LIST_1][0][0][0] = 0;
+            all_mvs[LIST_1][0][0][1] = 0;
+            direct_ref_idx[LIST_1][pic_block_y][pic_block_x]= (char)l1_refX;
+          }
+          else
+          {
+            all_mvs[LIST_1][l1_refX][0][0] = pmvbw[0];
+            all_mvs[LIST_1][l1_refX][0][1] = pmvbw[1];
+            direct_ref_idx[LIST_1][pic_block_y][pic_block_x]= (char)l1_refX;
+          }
+        }
+        else
+        {
+          direct_ref_idx[LIST_1][pic_block_y][pic_block_x]=-1;
+
+          all_mvs[LIST_1][0][0][0] = 0;
+          all_mvs[LIST_1][0][0][1] = 0;
+        }
+
+        // Test Level Limits if satisfied.
+        if (img->MbaffFrameFlag
+          && (all_mvs[LIST_0][l0_refX < 0? 0 : l0_refX][0][0] < -8192
+          ||  all_mvs[LIST_0][l0_refX < 0? 0 : l0_refX][0][0] >  8191
+          ||  all_mvs[LIST_0][l0_refX < 0? 0 : l0_refX][0][1] < LEVELMVLIMIT[img->LevelIndex][4]
+          ||  all_mvs[LIST_0][l0_refX < 0? 0 : l0_refX][0][1] > LEVELMVLIMIT[img->LevelIndex][5]
+          ||  all_mvs[LIST_1][l1_refX < 0? 0 : l1_refX][0][0] < -8192
+          ||  all_mvs[LIST_1][l1_refX < 0? 0 : l1_refX][0][0] > 8191
+          ||  all_mvs[LIST_1][l1_refX < 0? 0 : l1_refX][0][1] < LEVELMVLIMIT[img->LevelIndex][4]
+          ||  all_mvs[LIST_1][l1_refX < 0? 0 : l1_refX][0][1] > LEVELMVLIMIT[img->LevelIndex][5]))
+        {
+          direct_ref_idx[LIST_0][pic_block_y][pic_block_x] = -1;
+          direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = -1;
+          direct_pdir           [pic_block_y][pic_block_x] = -1;
+        }
+        else
+        {
+          if (l0_refX < 0 && l1_refX < 0)
+          {
+            direct_ref_idx[LIST_0][pic_block_y][pic_block_x] =
+              direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = 0;
+            l0_refX = 0;
+            l1_refX = 0;
+          }
+          if      (direct_ref_idx[LIST_1][pic_block_y][pic_block_x] == -1)
+            direct_pdir[pic_block_y][pic_block_x] = 0;
+          else if (direct_ref_idx[LIST_0][pic_block_y][pic_block_x] == -1)
+            direct_pdir[pic_block_y][pic_block_x] = 1;
+          else if (active_pps->weighted_bipred_idc == 1)
+          {
+            int weight_sum, i;
+            Boolean invalid_wp = FALSE;
+            for (i=0;i< (active_sps->chroma_format_idc == YUV400 ? 1 : 3); i++)
+            {
+              weight_sum = wbp_weight[0][l0_refX][l1_refX][i] + wbp_weight[1][l0_refX][l1_refX][i];
+              if (weight_sum < -128 ||  weight_sum > 127)
+              {
+                invalid_wp = TRUE;
+                break;
+              }
+            }
+            if (invalid_wp == FALSE)
+              direct_pdir[pic_block_y][pic_block_x] = 2;
+            else
+            {
+              direct_ref_idx[LIST_0][pic_block_y][pic_block_x] = -1;
+              direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = -1;
+              direct_pdir           [pic_block_y][pic_block_x] = -1;
+            }
+          }
+          else
+            direct_pdir[pic_block_y][pic_block_x] = 2;
+        }
+      }
+    }
+  }
+  else
+  {
+    int64 *refpic = enc_picture->ref_pic_num[LIST_0 +currMB->list_offset];
+
+    //temporal direct mode copy from decoder
+    for (block_y = 0; block_y < 4; block_y++)
+    {
+      pic_block_y  = (img->pix_y  >> 2) + block_y;
+      opic_block_y = (img->opix_y >> 2) + block_y;
+
+      for (block_x = 0; block_x < 4; block_x++)
+      {
+        pic_block_x  = (img->pix_x>>2) + block_x;
+        opic_block_x = (img->opix_x>>2) + block_x;
+        all_mvs = img->all_mv[block_y][block_x];
+
+        refList = (co_located_ref_idx[LIST_0][opic_block_y][opic_block_x]== -1 ? LIST_1 : LIST_0);
+        ref_idx = co_located_ref_idx[refList][opic_block_y][opic_block_x];
+
+        // next P is intra mode
+        if (ref_idx==-1)
+        {
+          all_mvs[LIST_0][0][0][0] = 0;
+          all_mvs[LIST_0][0][0][1] = 0;
+          all_mvs[LIST_1][0][0][0] = 0;
+          all_mvs[LIST_1][0][0][1] = 0;
+          direct_ref_idx[LIST_0][pic_block_y][pic_block_x] = 0;
+          direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = 0;
+          direct_pdir[pic_block_y][pic_block_x] = 2;
+        }
+        // next P is skip or inter mode
+        else
+        {
+          int mapped_idx=INVALIDINDEX;
+          int iref;
+
+          for (iref=0;iref<imin(img->num_ref_idx_l0_active,listXsize[LIST_0+currMB->list_offset]);iref++)
+          {
+            if (refpic[iref]==co_located_ref_id[refList ][opic_block_y][opic_block_x])
+            {
+              mapped_idx=iref;
+              break;
+            }
+            else //! invalid index. Default to zero even though this case should not happen
+            {
+              mapped_idx=INVALIDINDEX;
+            }
+          }
+
+          if (mapped_idx !=INVALIDINDEX)
+          {
+            mv_scale = img->mvscale[LIST_0+currMB->list_offset][mapped_idx];
+
+            if (mv_scale==9999)
+            {
+              // forward
+              all_mvs[LIST_0][0][0][0] = co_located_mv[refList][opic_block_y][opic_block_x][0];
+              all_mvs[LIST_0][0][0][1] = co_located_mv[refList][opic_block_y][opic_block_x][1];
+              // backward
+              all_mvs[LIST_1][0][0][0] = 0;
+              all_mvs[LIST_1][0][0][1] = 0;
+            }
+            else
+            {
+              // forward
+              all_mvs[LIST_0][mapped_idx][0][0] = (mv_scale * co_located_mv[refList][opic_block_y][opic_block_x][0] + 128) >> 8;
+              all_mvs[LIST_0][mapped_idx][0][1] = (mv_scale * co_located_mv[refList][opic_block_y][opic_block_x][1] + 128) >> 8;
+              // backward
+              all_mvs[LIST_1][         0][0][0] = ((mv_scale - 256)* co_located_mv[refList][opic_block_y][opic_block_x][0] + 128) >> 8;
+              all_mvs[LIST_1][         0][0][1] = ((mv_scale - 256)* co_located_mv[refList][opic_block_y][opic_block_x][1] + 128) >> 8;
+            }
+
+            // Test Level Limits if satisfied.
+            if ( all_mvs[LIST_0][mapped_idx][0][0] < -8192
+              || all_mvs[LIST_0][mapped_idx][0][0] >  8191
+              || all_mvs[LIST_0][mapped_idx][0][1] < LEVELMVLIMIT[img->LevelIndex][4]
+              || all_mvs[LIST_0][mapped_idx][0][1] > LEVELMVLIMIT[img->LevelIndex][5]
+              || all_mvs[LIST_1][0][0][0] < -8192
+              || all_mvs[LIST_1][0][0][0] > 8191
+              || all_mvs[LIST_1][0][0][1] < LEVELMVLIMIT[img->LevelIndex][4]
+              || all_mvs[LIST_1][0][0][1] > LEVELMVLIMIT[img->LevelIndex][5])
+            {
+              direct_ref_idx[LIST_0][pic_block_y][pic_block_x] = -1;
+              direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = -1;
+              direct_pdir[pic_block_y][pic_block_x] = -1;
+            }
+            else
+            {
+              direct_ref_idx[LIST_0][pic_block_y][pic_block_x] = mapped_idx;
+              direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = 0;
+              direct_pdir[pic_block_y][pic_block_x] = 2;
+            }
+          }
+          else
+          {
+            direct_ref_idx[LIST_0][pic_block_y][pic_block_x] = -1;
+            direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = -1;
+            direct_pdir[pic_block_y][pic_block_x] = -1;
+          }
+        }
+        if (active_pps->weighted_bipred_idc == 1 && direct_pdir[pic_block_y][pic_block_x] == 2)
+        {
+          int weight_sum, i;
+          short l0_refX = direct_ref_idx[LIST_0][pic_block_y][pic_block_x];
+          short l1_refX = direct_ref_idx[LIST_1][pic_block_y][pic_block_x];
+          for (i=0;i< (active_sps->chroma_format_idc == YUV400 ? 1 : 3); i++)
+          {
+            weight_sum = wbp_weight[0][l0_refX][l1_refX][i] + wbp_weight[1][l0_refX][l1_refX][i];
+            if (weight_sum < -128 ||  weight_sum > 127)
+            {
+              direct_ref_idx[LIST_0][pic_block_y][pic_block_x] = -1;
+              direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = -1;
+              direct_pdir           [pic_block_y][pic_block_x] = -1;
+              break;
+            }
+          }
+        }
+      }
+    }
+  }
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/mv-search.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/mv-search.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/mv-search.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,77 @@
+
+/*!
+ ************************************************************************
+ * \file mv-search.h
+ *
+ * \brief
+ *   array definition for motion search
+ *
+ * \author
+ *    Inge Lille-Langoy               <inge.lille-langoy at telenor.com>   \n
+ *    Alexis Michael Tourapis         <alexis.tourapis at dolby.com>       \n
+ *    Copyright (C) 1999  Telenor Satellite Services, Norway
+ *
+ ************************************************************************
+ */
+
+#ifndef _MV_SEARCH_H_
+#define _MV_SEARCH_H_
+
+//! convert from H.263 QP to H.264 quant given by: quant=pow(2,QP/6)
+const int QP2QUANT[40]=
+{
+   1, 1, 1, 1, 2, 2, 2, 2,
+   3, 3, 3, 4, 4, 4, 5, 6,
+   6, 7, 8, 9,10,11,13,14,
+  16,18,20,23,25,29,32,36,
+  40,45,51,57,64,72,81,91
+};
+
+// Vertical MV Limits (integer/halfpel/quarterpel)
+// Currently only Integer Pel restrictions are used,
+// since the way values are specified
+// (i.e. mvlowbound = (levelmvlowbound + 1) and the way
+// Subpel ME is performed, subpel will always be within range.
+
+const int LEVELMVLIMIT[17][6] =
+{
+  {  -63,  63,  -128,  127,  -256,  255},
+  {  -63,  63,  -128,  127,  -256,  255},
+  { -127, 127,  -256,  255,  -512,  511},
+  { -127, 127,  -256,  255,  -512,  511},
+  { -127, 127,  -256,  255,  -512,  511},
+  { -127, 127,  -256,  255,  -512,  511},
+  { -255, 255,  -512,  511, -1024, 1023},
+  { -255, 255,  -512,  511, -1024, 1023},
+  { -255, 255,  -512,  511, -1024, 1023},
+  { -511, 511, -1024, 1023, -2048, 2047},
+  { -511, 511, -1024, 1023, -2048, 2047},
+  { -511, 511, -1024, 1023, -2048, 2047},
+  { -511, 511, -1024, 1023, -2048, 2047},
+  { -511, 511, -1024, 1023, -2048, 2047},
+  { -511, 511, -1024, 1023, -2048, 2047},
+  { -511, 511, -1024, 1023, -2048, 2047},
+  { -511, 511, -1024, 1023, -2048, 2047}
+
+  /*
+  {  -64,  63,  -128,  127,  -256,  255},
+  {  -64,  63,  -128,  127,  -256,  255},
+  { -128, 127,  -256,  255,  -512,  511},
+  { -128, 127,  -256,  255,  -512,  511},
+  { -128, 127,  -256,  255,  -512,  511},
+  { -128, 127,  -256,  255,  -512,  511},
+  { -256, 255,  -512,  511, -1024, 1023},
+  { -256, 255,  -512,  511, -1024, 1023},
+  { -256, 255,  -512,  511, -1024, 1023},
+  { -512, 511, -1024, 1023, -2048, 2047},
+  { -512, 511, -1024, 1023, -2048, 2047},
+  { -512, 511, -1024, 1023, -2048, 2047},
+  { -512, 511, -1024, 1023, -2048, 2047},
+  { -512, 511, -1024, 1023, -2048, 2047},
+  { -512, 511, -1024, 1023, -2048, 2047},
+  { -512, 511, -1024, 1023, -2048, 2047},
+  { -512, 511, -1024, 1023, -2048, 2047}
+  */
+};
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/nal.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/nal.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/nal.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,147 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ *    nal.c
+ * \brief
+ *    Handles the operations on converting String of Data Bits (SODB)
+ *    to Raw Byte Sequence Payload (RBSP), and then
+ *    onto Encapsulate Byte Sequence Payload (EBSP).
+ *  \date 14 June 2002
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Shankar Regunathan                  <shanre at microsoft.de>
+ *      - Stephan Wenger                      <stewe at cs.tu-berlin.de>
+ ***************************************************************************************
+ */
+
+
+#include "contributors.h"
+
+#include <stdlib.h>
+#include <assert.h>
+#include <memory.h>
+
+#include "global.h"
+
+ /*!
+ ************************************************************************
+ * \brief
+ *    Converts String Of Data Bits (SODB) to Raw Byte Sequence
+ *    Packet (RBSP)
+ * \param currStream
+ *        Bitstream which contains data bits.
+ * \return None
+ * \note currStream is byte-aligned at the end of this function
+ *
+ ************************************************************************
+*/
+
+static byte *NAL_Payload_buffer;
+
+void SODBtoRBSP(Bitstream *currStream)
+{
+  currStream->byte_buf <<= 1;
+  currStream->byte_buf |= 1;
+  currStream->bits_to_go--;
+  currStream->byte_buf <<= currStream->bits_to_go;
+  currStream->streamBuffer[currStream->byte_pos++] = currStream->byte_buf;
+  currStream->bits_to_go = 8;
+  currStream->byte_buf = 0;
+}
+
+
+/*!
+************************************************************************
+*  \brief
+*     This function converts a RBSP payload to an EBSP payload
+*
+*  \param streamBuffer
+*       pointer to data bits
+*  \param begin_bytepos
+*            The byte position after start-code, after which stuffing to
+*            prevent start-code emulation begins.
+*  \param end_bytepos
+*           Size of streamBuffer in bytes.
+*  \param min_num_bytes
+*           Minimum number of bytes in payload. Should be 0 for VLC entropy
+*           coding mode. Determines number of stuffed words for CABAC mode.
+*  \return
+*           Size of streamBuffer after stuffing.
+*  \note
+*      NAL_Payload_buffer is used as temporary buffer to store data.
+*
+*
+************************************************************************
+*/
+
+int RBSPtoEBSP(byte *streamBuffer, int begin_bytepos, int end_bytepos, int min_num_bytes)
+{
+
+  int i, j, count;
+
+  memcpy(&NAL_Payload_buffer[begin_bytepos],&streamBuffer[begin_bytepos], (end_bytepos - begin_bytepos) * sizeof(unsigned char));
+
+  count = 0;
+  j = begin_bytepos;
+  for(i = begin_bytepos; i < end_bytepos; i++)
+  {
+    if(count == ZEROBYTES_SHORTSTARTCODE && !(NAL_Payload_buffer[i] & 0xFC))
+    {
+      streamBuffer[j] = 0x03;
+      j++;
+      count = 0;
+    }
+    streamBuffer[j] = NAL_Payload_buffer[i];
+    if(NAL_Payload_buffer[i] == 0x00)
+      count++;
+    else
+      count = 0;
+    j++;
+  }
+
+  for (i = 0; i< (min_num_bytes - end_bytepos); i+=3 )
+  {
+    streamBuffer[j]   = 0x00; // CABAC zero word
+    streamBuffer[j+1] = 0x00;
+    streamBuffer[j+2] = 0x03;
+    j += 3;
+    stats->bit_use_stuffingBits[img->type]+=16;
+  }
+  return j;
+}
+
+ /*!
+ ************************************************************************
+ * \brief
+ *    Initializes NAL module (allocates NAL_Payload_buffer)
+ ************************************************************************
+*/
+
+void AllocNalPayloadBuffer()
+{
+  const int buffer_size = ((input->img_width+img->auto_crop_right) * (input->img_height+img->auto_crop_bottom) * 5); // AH 190202: There can be data expansion with
+                                                          // low QP values. So, we make sure that buffer
+                                                          // does not overflow. 4 is probably safe multiplier.
+  FreeNalPayloadBuffer();
+
+  NAL_Payload_buffer = (byte *) calloc(buffer_size, sizeof(byte));
+  assert (NAL_Payload_buffer != NULL);
+}
+
+
+ /*!
+ ************************************************************************
+ * \brief
+ *   Finits NAL module (frees NAL_Payload_buffer)
+ ************************************************************************
+*/
+
+void FreeNalPayloadBuffer()
+{
+  if(NAL_Payload_buffer)
+  {
+    free(NAL_Payload_buffer);
+    NAL_Payload_buffer=NULL;
+  }
+}


Index: llvm-test/MultiSource/Applications/JM/lencod/nalu.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/nalu.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/nalu.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,78 @@
+
+/*!
+ ************************************************************************
+ * \file  nalu.c
+ *
+ * \brief
+ *    Common NALU support functions
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Stephan Wenger   <stewe at cs.tu-berlin.de>
+ ************************************************************************
+ */
+
+#include <assert.h>
+#include <memory.h>
+
+#include "global.h"
+#include "nalu.h"
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Converts an RBSP to a NALU
+ *
+ * \param rbsp
+ *    byte buffer with the rbsp
+ * \param nalu
+ *    nalu structure to be filled
+ * \param rbsp_size
+ *    size of the rbsp in bytes
+ * \param nal_unit_type
+ *    as in JVT doc
+ * \param nal_reference_idc
+ *    as in JVT doc
+ * \param min_num_bytes
+ *    some incomprehensible CABAC stuff
+ * \param UseAnnexbLongStartcode
+ *    when 1 and when using AnnexB bytestreams, then use a long startcode prefix
+ *
+ * \return
+ *    length of the NALU in bytes
+ *************************************************************************************
+ */
+
+int RBSPtoNALU (unsigned char *rbsp, NALU_t *nalu, int rbsp_size, int nal_unit_type, int nal_reference_idc,
+                int min_num_bytes, int UseAnnexbLongStartcode)
+{
+  int len;
+
+  assert (nalu != NULL);
+  assert (nal_reference_idc <=3 && nal_reference_idc >=0);
+  assert (nal_unit_type > 0 && nal_unit_type <= 10);
+  assert (rbsp_size < MAXRBSPSIZE);
+
+  nalu->forbidden_bit = 0;
+  nalu->nal_reference_idc = nal_reference_idc;
+  nalu->nal_unit_type = nal_unit_type;
+  nalu->startcodeprefix_len = UseAnnexbLongStartcode?4:3;
+  nalu->buf[0] =
+    nalu->forbidden_bit << 7      |
+    nalu->nal_reference_idc << 5  |
+    nalu->nal_unit_type;
+
+  memcpy (&nalu->buf[1], rbsp, rbsp_size);
+// printf ("First Byte %x\n", nalu->buf[0]);
+// printf ("RBSPtoNALU: Before: NALU len %d\t RBSP %x %x %x %x\n", rbsp_size, (unsigned) nalu->buf[1], (unsigned) nalu->buf[2], (unsigned) nalu->buf[3], (unsigned) nalu->buf[4]);
+
+  len = 1 + RBSPtoEBSP (&nalu->buf[1], 0, rbsp_size, min_num_bytes);
+
+// printf ("RBSPtoNALU: After : NALU len %d\t EBSP %x %x %x %x\n", rbsp_size, (unsigned) nalu->buf[1], (unsigned) nalu->buf[2], (unsigned) nalu->buf[3], (unsigned) nalu->buf[4]);
+// printf ("len %d\n\n", len);
+  nalu->len = len;
+
+  return len;
+}
+
+


Index: llvm-test/MultiSource/Applications/JM/lencod/nalu.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/nalu.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/nalu.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,28 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ *    parset.h
+ * \brief
+ *    Picture and Sequence Parameter Sets, encoder operations
+ *    This code reflects JVT version xxx
+ *  \date 25 November 2002
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+ ***************************************************************************************
+ */
+
+
+#ifndef _NALU_H_
+#define _NALU_H_
+
+#include "nalucommon.h"
+
+int RBSPtoNALU (unsigned char *rbsp, NALU_t *nalu, int rbsp_size, int nal_unit_type, int nal_reference_idc,
+                int min_num_bytes, int UseAnnexbLongStartcode);
+
+int (*WriteNALU)(NALU_t *n);     //! Hides the write function in Annex B or RTP
+
+
+#endif


Index: llvm-test/MultiSource/Applications/JM/lencod/nalucommon.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/nalucommon.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/nalucommon.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,73 @@
+
+/*!
+ ************************************************************************
+ * \file  nalucommon.c
+ *
+ * \brief
+ *    Common NALU support functions
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Stephan Wenger   <stewe at cs.tu-berlin.de>
+ ************************************************************************
+ */
+
+#include <stdlib.h>
+
+#include "global.h"
+#include "nalu.h"
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Allocates memory for a NALU
+ *
+ * \param buffersize
+ *     size of NALU buffer
+ *
+ * \return
+ *    pointer to a NALU
+ *************************************************************************************
+ */
+NALU_t *AllocNALU(int buffersize)
+{
+  NALU_t *n;
+
+  if ((n = (NALU_t*)calloc (1, sizeof (NALU_t))) == NULL)
+    no_mem_exit ("AllocNALU: n");
+
+  n->max_size=buffersize;
+
+  if ((n->buf = (byte*)calloc (buffersize, sizeof (byte))) == NULL)
+  {
+    free (n);
+    no_mem_exit ("AllocNALU: n->buf");
+  }
+
+  return n;
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Frees a NALU
+ *
+ * \param n
+ *    NALU to be freed
+ *
+ *************************************************************************************
+ */
+void FreeNALU(NALU_t *n)
+{
+  if (n)
+  {
+    if (n->buf)
+    {
+      free(n->buf);
+      n->buf=NULL;
+    }
+    free (n);
+  }
+}


Index: llvm-test/MultiSource/Applications/JM/lencod/nalucommon.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/nalucommon.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/nalucommon.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,62 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ *    nalucommon.h
+ * \brief
+ *    NALU handling common to encoder and decoder
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+ *      - Karsten Suehring      <suehring at hhi.de>
+ ***************************************************************************************
+ */
+
+#ifndef _NALUCOMMON_H_
+#define _NALUCOMMON_H_
+
+#define MAXRBSPSIZE 64000
+
+//! values for nal_unit_type
+typedef enum {
+ NALU_TYPE_SLICE    = 1,
+ NALU_TYPE_DPA      = 2,
+ NALU_TYPE_DPB      = 3,
+ NALU_TYPE_DPC      = 4,
+ NALU_TYPE_IDR      = 5,
+ NALU_TYPE_SEI      = 6,
+ NALU_TYPE_SPS      = 7,
+ NALU_TYPE_PPS      = 8,
+ NALU_TYPE_AUD      = 9,
+ NALU_TYPE_EOSEQ    = 10,
+ NALU_TYPE_EOSTREAM = 11,
+ NALU_TYPE_FILL     = 12
+} NaluType;
+
+//! values for nal_ref_idc
+typedef enum {
+ NALU_PRIORITY_HIGHEST     = 3,
+ NALU_PRIORITY_HIGH        = 2,
+ NALU_PRIRITY_LOW          = 1,
+ NALU_PRIORITY_DISPOSABLE  = 0
+} NalRefIdc;
+
+//! NAL unit structure
+typedef struct
+{
+  int       startcodeprefix_len;   //!< 4 for parameter sets and first slice in picture, 3 for everything else (suggested)
+  unsigned  len;                   //!< Length of the NAL unit (Excluding the start code, which does not belong to the NALU)
+  unsigned  max_size;              //!< NAL Unit Buffer size
+  NaluType  nal_unit_type;         //!< NALU_TYPE_xxxx
+  NalRefIdc nal_reference_idc;     //!< NALU_PRIORITY_xxxx
+  int       forbidden_bit;         //!< should be always FALSE
+  byte     *buf;                   //!< contains the first byte followed by the EBSP
+} NALU_t;
+
+//! allocate one NAL Unit
+NALU_t *AllocNALU(int);
+
+//! free one NAL Unit
+void FreeNALU(NALU_t *n);
+
+#endif


Index: llvm-test/MultiSource/Applications/JM/lencod/output.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/output.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/output.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,498 @@
+
+/*!
+ ************************************************************************
+ * \file output.c
+ *
+ * \brief
+ *    Output an image and Trance support
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Karsten Suehring               <suehring at hhi.de>
+ ************************************************************************
+ */
+
+#include "contributors.h"
+
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+#include "global.h"
+#include "image.h"
+
+void write_out_picture(StorablePicture *p, int p_out);
+
+FrameStore* out_buffer;
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *      checks if the System is big- or little-endian
+ * \return
+ *      0, little-endian (e.g. Intel architectures)
+ *      1, big-endian (e.g. SPARC, MIPS, PowerPC)
+ ************************************************************************
+ */
+int testEndian()
+{
+  short s;
+  byte *p;
+
+  p=(byte*)&s;
+
+  s=1;
+
+  return (*p==0);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Convert image plane to temporary buffer for file writing
+ * \param imgX
+ *    Pointer to image plane
+ * \param buf
+ *    Buffer for file output
+ * \param size_x
+ *    horizontal size
+ * \param size_y
+ *    vertical size
+ * \param symbol_size_in_bytes
+ *    number of bytes used per pel
+ * \param crop_left
+ *    pixels to crop from left
+ * \param crop_right
+ *    pixels to crop from right
+ * \param crop_top
+ *    pixels to crop from top
+ * \param crop_bottom
+ *    pixels to crop from bottom
+ ************************************************************************
+ */
+void img2buf (imgpel** imgX, unsigned char* buf, int size_x, int size_y, int symbol_size_in_bytes, int crop_left, int crop_right, int crop_top, int crop_bottom)
+{
+  int i,j;
+
+  int twidth  = size_x - crop_left - crop_right;
+  int theight = size_y - crop_top - crop_bottom;
+
+  int size = 0;
+
+  unsigned char  ui8;
+  unsigned short tmp16, ui16;
+  unsigned long  tmp32, ui32;
+
+  if (( sizeof(char) == sizeof (imgpel)) && ( sizeof(char) == symbol_size_in_bytes))
+  {
+    // imgpel == pixel_in_file == 1 byte -> simple copy
+    for(i=0;i<theight;i++)
+      memcpy(buf+crop_left+(i*twidth),&(imgX[i+crop_top][crop_left]), twidth);
+  }
+  else
+  {
+    // sizeof (imgpel) > sizeof(char)
+    if (testEndian())
+    {
+      // big endian
+      switch (symbol_size_in_bytes)
+      {
+      case 1:
+        {
+          for(i=crop_top;i<size_y-crop_bottom;i++)
+            for(j=crop_left;j<size_x-crop_right;j++)
+            {
+              ui8 = (unsigned char) (imgX[i][j]);
+              buf[(j-crop_left+((i-crop_top)*(twidth)))] = ui8;
+            }
+          break;
+        }
+      case 2:
+        {
+          for(i=crop_top;i<size_y-crop_bottom;i++)
+            for(j=crop_left;j<size_x-crop_right;j++)
+            {
+              tmp16 = (unsigned short) (imgX[i][j]);
+              ui16  = (tmp16 >> 8) | ((tmp16&0xFF)<<8);
+              memcpy(buf+((j-crop_left+((i-crop_top)*(twidth)))*2),&(ui16), 2);
+            }
+          break;
+        }
+      case 4:
+        {
+          for(i=crop_top;i<size_y-crop_bottom;i++)
+            for(j=crop_left;j<size_x-crop_right;j++)
+            {
+              tmp32 = (unsigned long) (imgX[i][j]);
+              ui32  = ((tmp32&0xFF00)<<8) | ((tmp32&0xFF)<<24) | ((tmp32&0xFF0000)>>8) | ((tmp32&0xFF000000)>>24);
+              memcpy(buf+((j-crop_left+((i-crop_top)*(twidth)))*4),&(ui32), 4);
+            }
+          break;
+        }
+      default:
+        {
+           error ("writing only to formats of 8, 16 or 32 bit allowed on big endian architecture", 500);
+           break;
+        }
+      }
+
+    }
+    else
+    {
+      // little endian
+      if (sizeof (imgpel) < symbol_size_in_bytes)
+      {
+        // this should not happen. we should not have smaller imgpel than our source material.
+        size = sizeof (imgpel);
+        // clear buffer
+        memset (buf, 0, (twidth*theight*symbol_size_in_bytes));
+      }
+      else
+      {
+        size = symbol_size_in_bytes;
+      }
+
+      for(i=crop_top;i<size_y-crop_bottom;i++)
+        for(j=crop_left;j<size_x-crop_right;j++)
+        {
+          memcpy(buf+((j-crop_left+((i-crop_top)*(twidth)))*symbol_size_in_bytes),&(imgX[i][j]), size);
+        }
+
+    }
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Writes out a storable picture without doing any output modifications
+ * \param p
+ *    Picture to be written
+ * \param p_out
+ *    Output file
+ * \param real_structure
+ *    real picture structure
+ ************************************************************************
+ */
+void write_picture(StorablePicture *p, int p_out, int real_structure)
+{
+  write_out_picture(p, p_out);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Writes out a storable picture
+ * \param p
+ *    Picture to be written
+ * \param p_out
+ *    Output file
+ ************************************************************************
+ */
+void write_out_picture(StorablePicture *p, int p_out)
+{
+  int SubWidthC  [4]= { 1, 2, 2, 1};
+  int SubHeightC [4]= { 1, 2, 1, 1};
+
+  int crop_left, crop_right, crop_top, crop_bottom;
+  int symbol_size_in_bytes = img->pic_unit_size_on_disk/8;
+  Boolean rgb_output = (Boolean) (input->rgb_input_flag != 0 && input->yuv_format==3);
+  unsigned char *buf;
+
+  if (p->non_existing)
+    return;
+  if (p_out == -1)
+    return;
+
+  if (p->frame_cropping_flag)
+  {
+    crop_left   = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_left_offset;
+    crop_right  = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_right_offset;
+    crop_top    = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset;
+    crop_bottom = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset;
+  }
+  else
+  {
+    crop_left = crop_right = crop_top = crop_bottom = 0;
+  }
+
+  //printf ("write frame size: %dx%d\n", p->size_x-crop_left-crop_right,p->size_y-crop_top-crop_bottom );
+
+  // KS: this buffer should actually be allocated only once, but this is still much faster than the previous version
+  buf = malloc (p->size_x*p->size_y*symbol_size_in_bytes);
+  if (NULL==buf)
+  {
+    no_mem_exit("write_out_picture: buf");
+  }
+
+  if(rgb_output)
+  {
+    crop_left   = p->frame_cropping_rect_left_offset;
+    crop_right  = p->frame_cropping_rect_right_offset;
+    crop_top    = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset;
+    crop_bottom = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset;
+
+    img2buf (p->imgUV[1], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom);
+    write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes);
+
+    if (p->frame_cropping_flag)
+    {
+      crop_left   = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_left_offset;
+      crop_right  = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_right_offset;
+      crop_top    = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset;
+      crop_bottom = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset;
+    }
+    else
+    {
+      crop_left = crop_right = crop_top = crop_bottom = 0;
+    }
+  }
+
+  img2buf (p->imgY, buf, p->size_x, p->size_y, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom);
+  write(p_out, buf, (p->size_y-crop_bottom-crop_top)*(p->size_x-crop_right-crop_left)*symbol_size_in_bytes);
+
+  if (p->chroma_format_idc!=YUV400)
+  {
+    crop_left   = p->frame_cropping_rect_left_offset;
+    crop_right  = p->frame_cropping_rect_right_offset;
+    crop_top    = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset;
+    crop_bottom = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset;
+
+    img2buf (p->imgUV[0], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom);
+    write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)* symbol_size_in_bytes);
+
+    if (!rgb_output)
+    {
+      img2buf (p->imgUV[1], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom);
+      write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes);
+    }
+  }
+
+  free(buf);
+
+//  fsync(p_out);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Initialize output buffer for direct output
+ ************************************************************************
+ */
+void init_out_buffer()
+{
+  out_buffer = alloc_frame_store();
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Uninitialize output buffer for direct output
+ ************************************************************************
+ */
+void uninit_out_buffer()
+{
+  free_frame_store(out_buffer);
+  out_buffer=NULL;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Initialize picture memory with (Y:0,U:128,V:128)
+ ************************************************************************
+ */
+void clear_picture(StorablePicture *p)
+{
+  int i;
+
+  for(i=0;i<p->size_y;i++)
+    memset(p->imgY[i], img->dc_pred_value_luma, p->size_x*sizeof(imgpel));
+  for(i=0;i<p->size_y_cr;i++)
+    memset(p->imgUV[0][i], img->dc_pred_value_chroma, p->size_x_cr*sizeof(imgpel));
+  for(i=0;i<p->size_y_cr;i++)
+    memset(p->imgUV[1][i], img->dc_pred_value_chroma, p->size_x_cr*sizeof(imgpel));
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Write out not paired direct output fields. A second empty field is generated
+ *    and combined into the frame buffer.
+ * \param fs
+ *    FrameStore that contains a single field
+ * \param p_out
+ *    Output file
+ ************************************************************************
+ */
+void write_unpaired_field(FrameStore* fs, int p_out)
+{
+  StorablePicture *p;
+  assert (fs->is_used<3);
+  if(fs->is_used &1)
+  {
+    // we have a top field
+    // construct an empty bottom field
+    p = fs->top_field;
+    fs->bottom_field = alloc_storable_picture(BOTTOM_FIELD, p->size_x, p->size_y, p->size_x_cr, p->size_y_cr);
+    fs->bottom_field->chroma_format_idc = p->chroma_format_idc;
+    clear_picture(fs->bottom_field);
+    dpb_combine_field_yuv(fs);
+    write_picture (fs->frame, p_out, TOP_FIELD);
+  }
+
+  if(fs->is_used &2)
+  {
+    // we have a bottom field
+    // construct an empty top field
+    p = fs->bottom_field;
+    fs->top_field = alloc_storable_picture(TOP_FIELD, p->size_x, p->size_y, p->size_x_cr, p->size_y_cr);
+    clear_picture(fs->top_field);
+    fs->top_field->chroma_format_idc = p->chroma_format_idc;
+    clear_picture(fs->top_field);
+    fs ->top_field->frame_cropping_flag = fs->bottom_field->frame_cropping_flag;
+    if(fs ->top_field->frame_cropping_flag)
+    {
+      fs ->top_field->frame_cropping_rect_top_offset = fs->bottom_field->frame_cropping_rect_top_offset;
+      fs ->top_field->frame_cropping_rect_bottom_offset = fs->bottom_field->frame_cropping_rect_bottom_offset;
+      fs ->top_field->frame_cropping_rect_left_offset = fs->bottom_field->frame_cropping_rect_left_offset;
+      fs ->top_field->frame_cropping_rect_right_offset = fs->bottom_field->frame_cropping_rect_right_offset;
+    }
+    dpb_combine_field_yuv(fs);
+    write_picture (fs->frame, p_out, BOTTOM_FIELD);
+  }
+
+  fs->is_used=3;
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Write out unpaired fields from output buffer.
+ * \param p_out
+ *    Output file
+ ************************************************************************
+ */
+void flush_direct_output(int p_out)
+{
+  write_unpaired_field(out_buffer, p_out);
+
+  free_storable_picture(out_buffer->frame);
+  out_buffer->frame = NULL;
+  free_storable_picture(out_buffer->top_field);
+  out_buffer->top_field = NULL;
+  free_storable_picture(out_buffer->bottom_field);
+  out_buffer->bottom_field = NULL;
+  out_buffer->is_used = 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Write a frame (from FrameStore)
+ * \param fs
+ *    FrameStore containing the frame
+ * \param p_out
+ *    Output file
+ ************************************************************************
+ */
+void write_stored_frame( FrameStore *fs,int p_out)
+{
+  // make sure no direct output field is pending
+  flush_direct_output(p_out);
+
+  if (fs->is_used<3)
+  {
+    write_unpaired_field(fs, p_out);
+  }
+  else
+  {
+    write_picture(fs->frame, p_out, FRAME);
+  }
+
+  fs->is_output = 1;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Directly output a picture without storing it in the DPB. Fields
+ *    are buffered before they are written to the file.
+ * \param p
+ *    Picture for output
+ * \param p_out
+ *    Output file
+ ************************************************************************
+ */
+void direct_output(StorablePicture *p, int p_out)
+{
+  if (p->structure==FRAME)
+  {
+    // we have a frame (or complementary field pair)
+    // so output it directly
+    flush_direct_output(p_out);
+    write_picture (p, p_out, FRAME);
+    free_storable_picture(p);
+    return;
+  }
+
+  if (p->structure == TOP_FIELD)
+  {
+    if (out_buffer->is_used &1)
+      flush_direct_output(p_out);
+    out_buffer->top_field = p;
+    out_buffer->is_used |= 1;
+  }
+
+  if (p->structure == BOTTOM_FIELD)
+  {
+    if (out_buffer->is_used &2)
+      flush_direct_output(p_out);
+    out_buffer->bottom_field = p;
+    out_buffer->is_used |= 2;
+  }
+
+  if (out_buffer->is_used == 3)
+  {
+    // we have both fields, so output them
+    dpb_combine_field_yuv(out_buffer);
+    write_picture (out_buffer->frame, p_out, FRAME);
+    free_storable_picture(out_buffer->frame);
+    out_buffer->frame = NULL;
+    free_storable_picture(out_buffer->top_field);
+    out_buffer->top_field = NULL;
+    free_storable_picture(out_buffer->bottom_field);
+    out_buffer->bottom_field = NULL;
+    out_buffer->is_used = 0;
+  }
+}
+
+
+/*!
+************************************************************************
+* \brief
+*    For adaptive frame/field coding remove dangling top field from direct
+*    output frame version instead.
+* \param p
+*    Picture for output
+* \param p_out
+*    Output file
+************************************************************************
+*/
+void direct_output_paff(StorablePicture *p, int p_out)
+{
+  printf("Warning!!! Frame can't fit in DPB. Displayed out of sequence.\n");
+  free_storable_picture(out_buffer->frame);
+  out_buffer->frame = NULL;
+  free_storable_picture(out_buffer->top_field);
+  out_buffer->top_field = NULL;
+  free_storable_picture(out_buffer->bottom_field);
+  out_buffer->bottom_field = NULL;
+  out_buffer->is_used = 0;
+
+  direct_output(p, p_out);
+}


Index: llvm-test/MultiSource/Applications/JM/lencod/output.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/output.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/output.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,28 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ *    output.h
+ * \brief
+ *    Picture writing routine headers
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Karsten Suehring        <suehring at hhi.de>
+ ***************************************************************************************
+ */
+
+#ifndef _OUTPUT_H_
+#define _OUTPUT_H_
+
+int testEndian(void);
+
+void flush_direct_output(int p_out);
+void direct_output_paff(StorablePicture *p, int p_out);
+void write_out_picture(StorablePicture *p, int p_out);
+void write_stored_frame(FrameStore *fs, int p_out);
+void direct_output(StorablePicture *p, int p_out);
+void direct_output_paff(StorablePicture *p, int p_out);
+void init_out_buffer(void);
+void uninit_out_buffer(void);
+
+#endif //_OUTPUT_H_


Index: llvm-test/MultiSource/Applications/JM/lencod/parset.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/parset.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/parset.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,1048 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ *    parset.c
+ * \brief
+ *    Picture and Sequence Parameter set generation and handling
+ *  \date 25 November 2002
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+ *
+ **************************************************************************************
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <time.h>
+#include <sys/timeb.h>
+
+#include "global.h"
+
+#include "contributors.h"
+#include "mbuffer.h"
+#include "parset.h"
+#include "vlc.h"
+
+// Local helpers
+static int IdentifyProfile(void);
+static int IdentifyLevel(void);
+static int GenerateVUISequenceParameters(Bitstream *bitstream);
+
+extern ColocatedParams *Co_located;
+
+pic_parameter_set_rbsp_t *PicParSet[MAXPPS];
+
+static const byte ZZ_SCAN[16]  =
+{  0,  1,  4,  8,  5,  2,  3,  6,  9, 12, 13, 10,  7, 11, 14, 15
+};
+
+static const byte ZZ_SCAN8[64] =
+{  0,  1,  8, 16,  9,  2,  3, 10, 17, 24, 32, 25, 18, 11,  4,  5,
+   12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13,  6,  7, 14, 21, 28,
+   35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+   58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    generates a sequence and picture parameter set and stores these in global
+ *    active_sps and active_pps
+ *
+ * \return
+ *    A NALU containing the Sequence ParameterSet
+ *
+ *************************************************************************************
+*/
+void GenerateParameterSets (void)
+{
+  int i;
+  seq_parameter_set_rbsp_t *sps = NULL;
+
+  sps = AllocSPS();
+
+  for (i=0; i<MAXPPS; i++)
+  {
+    PicParSet[i] = NULL;
+  }
+
+
+  GenerateSequenceParameterSet(sps, 0);
+
+  if (input->GenerateMultiplePPS)
+  {
+    PicParSet[0] = AllocPPS();
+    PicParSet[1] = AllocPPS();
+    PicParSet[2] = AllocPPS();
+
+    if (sps->profile_idc >= FREXT_HP)
+    {
+      GeneratePictureParameterSet( PicParSet[0], sps, 0, 0, 0, input->cb_qp_index_offset, input->cr_qp_index_offset);
+      GeneratePictureParameterSet( PicParSet[1], sps, 1, 1, 1, input->cb_qp_index_offset, input->cr_qp_index_offset);
+      GeneratePictureParameterSet( PicParSet[2], sps, 2, 1, 2, input->cb_qp_index_offset, input->cr_qp_index_offset);
+
+    }
+    else
+    {
+      GeneratePictureParameterSet( PicParSet[0], sps, 0, 0, 0, input->chroma_qp_index_offset, 0);
+      GeneratePictureParameterSet( PicParSet[1], sps, 1, 1, 1, input->chroma_qp_index_offset, 0);
+      GeneratePictureParameterSet( PicParSet[2], sps, 2, 1, 2, input->chroma_qp_index_offset, 0);
+    }
+  }
+  else
+  {
+    PicParSet[0] = AllocPPS();
+    if (sps->profile_idc >= FREXT_HP)
+      GeneratePictureParameterSet( PicParSet[0], sps, 0, input->WeightedPrediction, input->WeightedBiprediction,
+                                   input->cb_qp_index_offset, input->cr_qp_index_offset);
+    else
+      GeneratePictureParameterSet( PicParSet[0], sps, 0, input->WeightedPrediction, input->WeightedBiprediction,
+                                   input->chroma_qp_index_offset, 0);
+
+  }
+
+  active_sps = sps;
+  active_pps = PicParSet[0];
+}
+
+/*!
+*************************************************************************************
+* \brief
+*    frees global parameter sets active_sps and active_pps
+*
+* \return
+*    A NALU containing the Sequence ParameterSet
+*
+*************************************************************************************
+*/
+void FreeParameterSets (void)
+{
+  int i;
+  for (i=0; i<MAXPPS; i++)
+  {
+    if ( NULL != PicParSet[i])
+    {
+      FreePPS(PicParSet[i]);
+      PicParSet[i] = NULL;
+    }
+  }
+  FreeSPS (active_sps);
+}
+
+/*!
+*************************************************************************************
+* \brief
+*    int GenerateSeq_parameter_set_NALU (void);
+*
+* \note
+*    Uses the global variables through GenerateSequenceParameterSet()
+*    and GeneratePictureParameterSet
+*
+* \return
+*    A NALU containing the Sequence ParameterSet
+*
+*************************************************************************************
+*/
+
+NALU_t *GenerateSeq_parameter_set_NALU (void)
+{
+  NALU_t *n = AllocNALU(64000);
+  int RBSPlen = 0;
+  int NALUlen;
+  byte rbsp[MAXRBSPSIZE];
+
+  RBSPlen = GenerateSeq_parameter_set_rbsp (active_sps, rbsp);
+  NALUlen = RBSPtoNALU (rbsp, n, RBSPlen, NALU_TYPE_SPS, NALU_PRIORITY_HIGHEST, 0, 1);
+  n->startcodeprefix_len = 4;
+
+  return n;
+}
+
+
+/*!
+*************************************************************************************
+* \brief
+*    NALU_t *GeneratePic_parameter_set_NALU (int PPS_id);
+*
+* \note
+*    Uses the global variables through GenerateSequenceParameterSet()
+*    and GeneratePictureParameterSet
+*
+* \return
+*    A NALU containing the Picture Parameter Set
+*
+*************************************************************************************
+*/
+
+NALU_t *GeneratePic_parameter_set_NALU(int PPS_id)
+{
+  NALU_t *n = AllocNALU(64000);
+  int RBSPlen = 0;
+  int NALUlen;
+  byte rbsp[MAXRBSPSIZE];
+
+  RBSPlen = GeneratePic_parameter_set_rbsp (PicParSet[PPS_id], rbsp);
+  NALUlen = RBSPtoNALU (rbsp, n, RBSPlen, NALU_TYPE_PPS, NALU_PRIORITY_HIGHEST, 0, 1);
+  n->startcodeprefix_len = 4;
+
+  return n;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    GenerateSequenceParameterSet: extracts info from global variables and
+ *    generates sequence parameter set structure
+ *
+ * \par
+ *    Function reads all kinds of values from several global variables,
+ *    including input-> and image-> and fills in the sps.  Many
+ *    values are current hard-coded to defaults.
+ *
+ ************************************************************************
+ */
+
+void GenerateSequenceParameterSet( seq_parameter_set_rbsp_t *sps, //!< Sequence Parameter Set to be filled
+                                   int SPS_id                     //!< SPS ID
+                                   )
+{
+  unsigned i;
+  int SubWidthC  [4]= { 1, 2, 2, 1};
+  int SubHeightC [4]= { 1, 2, 1, 1};
+
+  int frext_profile = ((IdentifyProfile()==FREXT_HP) ||
+                      (IdentifyProfile()==FREXT_Hi10P) ||
+                      (IdentifyProfile()==FREXT_Hi422) ||
+                      (IdentifyProfile()==FREXT_Hi444));
+
+  // *************************************************************************
+  // Sequence Parameter Set
+  // *************************************************************************
+  assert (sps != NULL);
+  // Profile and Level should be calculated using the info from the config
+  // file.  Calculation is hidden in IndetifyProfile() and IdentifyLevel()
+  sps->profile_idc = IdentifyProfile();
+  sps->level_idc = IdentifyLevel();
+
+  // needs to be set according to profile
+  sps->constrained_set0_flag = FALSE;
+  sps->constrained_set1_flag = FALSE;
+  sps->constrained_set2_flag = FALSE;
+
+  if ( (sps->level_idc == 9) && (sps->profile_idc < FREXT_HP) ) // Level 1.b
+  {
+    sps->constrained_set3_flag = TRUE;
+    sps->level_idc = 11;
+  }
+  else
+  {
+    sps->constrained_set3_flag = FALSE;
+  }
+
+  // Parameter Set ID hard coded to zero
+  sps->seq_parameter_set_id = 0;
+
+  // Fidelity Range Extensions stuff
+  sps->bit_depth_luma_minus8   = input->BitDepthLuma - 8;
+  sps->bit_depth_chroma_minus8 = input->BitDepthChroma - 8;
+  img->lossless_qpprime_flag = input->lossless_qpprime_y_zero_flag & (sps->profile_idc==FREXT_Hi444);
+
+  //! POC stuff:
+  //! The following values are hard-coded in init_poc().  Apparently,
+  //! the poc implementation covers only a subset of the poc functionality.
+  //! Here, the same subset is implemented.  Changes in the POC stuff have
+  //! also to be reflected here
+  sps->log2_max_frame_num_minus4 = log2_max_frame_num_minus4;
+  sps->log2_max_pic_order_cnt_lsb_minus4 = log2_max_pic_order_cnt_lsb_minus4;
+
+  sps->pic_order_cnt_type = input->pic_order_cnt_type;
+  sps->num_ref_frames_in_pic_order_cnt_cycle = img->num_ref_frames_in_pic_order_cnt_cycle;
+  sps->delta_pic_order_always_zero_flag = img->delta_pic_order_always_zero_flag;
+  sps->offset_for_non_ref_pic = img->offset_for_non_ref_pic;
+  sps->offset_for_top_to_bottom_field = img->offset_for_top_to_bottom_field;
+
+  for (i=0; i<img->num_ref_frames_in_pic_order_cnt_cycle; i++)
+  {
+    sps->offset_for_ref_frame[i] = img->offset_for_ref_frame[i];
+  }
+  // End of POC stuff
+
+  // Number of Reference Frames
+  sps->num_ref_frames = input->num_ref_frames;
+
+  //required_frame_num_update_behaviour_flag hardcoded to zero
+  sps->gaps_in_frame_num_value_allowed_flag = FALSE;    // double check
+
+  sps->frame_mbs_only_flag = (Boolean) !(input->PicInterlace || input->MbInterlace);
+
+  // Picture size, finally a simple one :-)
+  sps->pic_width_in_mbs_minus1 = ((input->img_width+img->auto_crop_right)/16) -1;
+  sps->pic_height_in_map_units_minus1 = (((input->img_height+img->auto_crop_bottom)/16)/ (2 - sps->frame_mbs_only_flag)) - 1;
+
+  // a couple of flags, simple
+  sps->mb_adaptive_frame_field_flag = (Boolean) (FRAME_CODING != input->MbInterlace);
+  sps->direct_8x8_inference_flag = (Boolean) input->directInferenceFlag;
+
+  // Sequence VUI not implemented, signalled as not present
+  sps->vui_parameters_present_flag = (Boolean) ((input->rgb_input_flag && input->yuv_format==3)|| input->Generate_SEIVUI);
+
+  sps->chroma_format_idc = input->yuv_format;
+
+  // This should be moved somewhere else.
+  {
+    int PicWidthInMbs, PicHeightInMapUnits, FrameHeightInMbs;
+    int width, height;
+    PicWidthInMbs = (sps->pic_width_in_mbs_minus1 +1);
+    PicHeightInMapUnits = (sps->pic_height_in_map_units_minus1 +1);
+    FrameHeightInMbs = ( 2 - sps->frame_mbs_only_flag ) * PicHeightInMapUnits;
+
+    width = PicWidthInMbs * MB_BLOCK_SIZE;
+    height = FrameHeightInMbs * MB_BLOCK_SIZE;
+
+    Co_located = alloc_colocated (width, height,sps->mb_adaptive_frame_field_flag);
+
+  }
+
+  // Fidelity Range Extensions stuff
+  if(frext_profile)
+  {
+
+    sps->seq_scaling_matrix_present_flag = (Boolean) (input->ScalingMatrixPresentFlag&1);
+    for(i=0; i<8; i++)
+    {
+      if(i<6)
+        sps->seq_scaling_list_present_flag[i] = (input->ScalingListPresentFlag[i]&1);
+      else
+      {
+        if(input->Transform8x8Mode)
+          sps->seq_scaling_list_present_flag[i] = (input->ScalingListPresentFlag[i]&1);
+        else
+          sps->seq_scaling_list_present_flag[i] = 0;
+      }
+    }
+  }
+  else
+  {
+    sps->seq_scaling_matrix_present_flag = FALSE;
+    for(i=0; i<8; i++)
+      sps->seq_scaling_list_present_flag[i] = 0;
+
+  }
+
+
+  if (img->auto_crop_right || img->auto_crop_bottom)
+  {
+    sps->frame_cropping_flag = TRUE;
+    sps->frame_cropping_rect_left_offset=0;
+    sps->frame_cropping_rect_top_offset=0;
+    sps->frame_cropping_rect_right_offset=  (img->auto_crop_right / SubWidthC[sps->chroma_format_idc]);
+    sps->frame_cropping_rect_bottom_offset= (img->auto_crop_bottom / (SubHeightC[sps->chroma_format_idc] * (2 - sps->frame_mbs_only_flag)));
+    if (img->auto_crop_right % SubWidthC[sps->chroma_format_idc])
+    {
+      error("automatic frame cropping (width) not possible",500);
+    }
+    if (img->auto_crop_bottom % (SubHeightC[sps->chroma_format_idc] * (2 - sps->frame_mbs_only_flag)))
+    {
+      error("automatic frame cropping (height) not possible",500);
+    }
+  }
+  else
+  {
+    sps->frame_cropping_flag = FALSE;
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    GeneratePictureParameterSet:
+ *    Generates a Picture Parameter Set structure
+ *
+ * \par
+ *    Regarding the QP
+ *    The previous software versions coded the absolute QP only in the
+ *    slice header.  This is kept, and the offset in the PPS is coded
+ *    even if we could save bits by intelligently using this field.
+ *
+ ************************************************************************
+ */
+
+void GeneratePictureParameterSet( pic_parameter_set_rbsp_t *pps, //!< Picture Parameter Set to be filled
+                                  seq_parameter_set_rbsp_t *sps, //!< used Sequence Parameter Set
+                                  int PPS_id,                    //!< PPS ID
+                                  int WeightedPrediction,        //!< value of weighted_pred_flag
+                                  int WeightedBiprediction,      //!< value of weighted_bipred_idc
+                                  int cb_qp_index_offset,        //!< value of cb_qp_index_offset
+                                  int cr_qp_index_offset         //!< value of cr_qp_index_offset
+                                  )
+{
+  unsigned i;
+
+  int frext_profile = ((IdentifyProfile()==FREXT_HP) ||
+                      (IdentifyProfile()==FREXT_Hi10P) ||
+                      (IdentifyProfile()==FREXT_Hi422) ||
+                      (IdentifyProfile()==FREXT_Hi444));
+
+  // *************************************************************************
+  // Picture Parameter Set
+  // *************************************************************************
+
+  pps->seq_parameter_set_id = sps->seq_parameter_set_id;
+  pps->pic_parameter_set_id = PPS_id;
+  pps->entropy_coding_mode_flag = (input->symbol_mode==UVLC ? FALSE : TRUE);
+
+  // Fidelity Range Extensions stuff
+  if(frext_profile)
+  {
+    pps->transform_8x8_mode_flag = (input->Transform8x8Mode ? TRUE:FALSE);
+    pps->pic_scaling_matrix_present_flag = (Boolean) ((input->ScalingMatrixPresentFlag&2)>>1);
+    for(i=0; i<8; i++)
+    {
+      if(i<6)
+        pps->pic_scaling_list_present_flag[i] = (input->ScalingListPresentFlag[i]&2)>>1;
+      else
+      {
+        if(pps->transform_8x8_mode_flag)
+          pps->pic_scaling_list_present_flag[i] = (input->ScalingListPresentFlag[i]&2)>>1;
+        else
+          pps->pic_scaling_list_present_flag[i] = 0;
+      }
+    }
+  }
+  else
+  {
+    pps->pic_scaling_matrix_present_flag = FALSE;
+    for(i=0; i<8; i++)
+      pps->pic_scaling_list_present_flag[i] = 0;
+
+    pps->transform_8x8_mode_flag = FALSE;
+    input->Transform8x8Mode = 0;
+  }
+
+  // JVT-Fxxx (by Stephan Wenger, make this flag unconditional
+  pps->pic_order_present_flag = img->pic_order_present_flag;
+
+
+  // Begin FMO stuff
+  pps->num_slice_groups_minus1 = input->num_slice_groups_minus1;
+
+
+  //! Following set the parameter for different slice group types
+  if (pps->num_slice_groups_minus1 > 0)
+  {
+     if ((pps->slice_group_id = calloc ((sps->pic_height_in_map_units_minus1+1)*(sps->pic_width_in_mbs_minus1+1), sizeof(byte))) == NULL)
+       no_mem_exit ("GeneratePictureParameterSet: slice_group_id");
+
+    switch (input->slice_group_map_type)
+    {
+    case 0:
+      pps->slice_group_map_type = 0;
+      for(i=0; i<=pps->num_slice_groups_minus1; i++)
+      {
+        pps->run_length_minus1[i]=input->run_length_minus1[i];
+      }
+      break;
+    case 1:
+      pps->slice_group_map_type = 1;
+      break;
+    case 2:
+      // i loops from 0 to num_slice_groups_minus1-1, because no info for background needed
+      pps->slice_group_map_type = 2;
+      for(i=0; i<pps->num_slice_groups_minus1; i++)
+      {
+        pps->top_left[i] = input->top_left[i];
+        pps->bottom_right[i] = input->bottom_right[i];
+      }
+     break;
+    case 3:
+    case 4:
+    case 5:
+      pps->slice_group_map_type = input->slice_group_map_type;
+      pps->slice_group_change_direction_flag = (Boolean) input->slice_group_change_direction_flag;
+      pps->slice_group_change_rate_minus1 = input->slice_group_change_rate_minus1;
+      break;
+    case 6:
+      pps->slice_group_map_type = 6;
+      pps->pic_size_in_map_units_minus1 =
+        (((input->img_height+img->auto_crop_bottom)/MB_BLOCK_SIZE)/(2-sps->frame_mbs_only_flag))
+        *((input->img_width+img->auto_crop_right)/MB_BLOCK_SIZE) -1;
+
+      for (i=0;i<=pps->pic_size_in_map_units_minus1; i++)
+        pps->slice_group_id[i] = input->slice_group_id[i];
+
+      break;
+    default:
+      printf ("Parset.c: slice_group_map_type invalid, default\n");
+      assert (0==1);
+    }
+  }
+// End FMO stuff
+
+  pps->num_ref_idx_l0_active_minus1 = sps->frame_mbs_only_flag ? (sps->num_ref_frames-1) : (2 * sps->num_ref_frames - 1) ;   // set defaults
+  pps->num_ref_idx_l1_active_minus1 = sps->frame_mbs_only_flag ? (sps->num_ref_frames-1) : (2 * sps->num_ref_frames - 1) ;   // set defaults
+
+  pps->weighted_pred_flag = (Boolean) WeightedPrediction;
+  pps->weighted_bipred_idc = WeightedBiprediction;
+
+  pps->pic_init_qp_minus26 = 0;         // hard coded to zero, QP lives in the slice header
+  pps->pic_init_qs_minus26 = 0;
+
+  pps->chroma_qp_index_offset = cb_qp_index_offset;
+  if (frext_profile)
+  {
+    pps->cb_qp_index_offset     = cb_qp_index_offset;
+    pps->cr_qp_index_offset     = cr_qp_index_offset;
+  }
+  else
+    pps->cb_qp_index_offset = pps->cr_qp_index_offset = pps->chroma_qp_index_offset;
+
+  pps->deblocking_filter_control_present_flag = (Boolean) input->LFSendParameters;
+  pps->constrained_intra_pred_flag = (Boolean) input->UseConstrainedIntraPred;
+
+  // if redundant slice is in use.
+  pps->redundant_pic_cnt_present_flag = (Boolean) input->redundant_pic_flag;
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    syntax for scaling list matrix values
+ *
+ * \param scalingListinput
+ *    input scaling list
+ * \param scalingList
+ *    scaling list to be used
+ * \param sizeOfScalingList
+ *    size of the scaling list
+ * \param UseDefaultScalingMatrix
+ *    usage of default Scaling Matrix
+ * \param bitstream
+ *    target bitstream for writing syntax
+ *
+ * \return
+ *    size of the RBSP in bytes
+ *
+ *************************************************************************************
+ */
+int Scaling_List(short *scalingListinput, short *scalingList, int sizeOfScalingList, short *UseDefaultScalingMatrix, Bitstream *bitstream)
+{
+  int j, scanj;
+  int len=0;
+  int delta_scale, lastScale, nextScale;
+
+  lastScale = 8;
+  nextScale = 8;
+
+  for(j=0; j<sizeOfScalingList; j++)
+  {
+    scanj = (sizeOfScalingList==16) ? ZZ_SCAN[j]:ZZ_SCAN8[j];
+
+    if(nextScale!=0)
+    {
+      delta_scale = scalingListinput[scanj]-lastScale; // Calculate delta from the scalingList data from the input file
+      if(delta_scale>127)
+        delta_scale=delta_scale-256;
+      else if(delta_scale<-128)
+        delta_scale=delta_scale+256;
+
+      len+=se_v ("   : delta_sl   ",                      delta_scale,                       bitstream);
+      nextScale = scalingListinput[scanj];
+      *UseDefaultScalingMatrix|=(scanj==0 && nextScale==0); // Check first matrix value for zero
+    }
+
+    scalingList[scanj] = (short) ((nextScale==0) ? lastScale:nextScale); // Update the actual scalingList matrix with the correct values
+    lastScale = scalingList[scanj];
+  }
+
+  return len;
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    int GenerateSeq_parameter_set_rbsp (seq_parameter_set_rbsp_t *sps, char *rbsp);
+ *
+ * \param sps
+ *    sequence parameter structure
+ * \param rbsp
+ *    buffer to be filled with the rbsp, size should be at least MAXIMUMPARSETRBSPSIZE
+ *
+ * \return
+ *    size of the RBSP in bytes
+ *
+ * \note
+ *    Sequence Parameter VUI function is called, but the function implements
+ *    an exit (-1)
+ *************************************************************************************
+ */
+int GenerateSeq_parameter_set_rbsp (seq_parameter_set_rbsp_t *sps, byte *rbsp)
+{
+  Bitstream *bitstream;
+  int len = 0, LenInBytes;
+  unsigned i;
+
+  assert (rbsp != NULL);
+
+  if ((bitstream=calloc(1, sizeof(Bitstream)))==NULL) no_mem_exit("SeqParameterSet:bitstream");
+
+  // .. and use the rbsp provided (or allocated above) for the data
+  bitstream->streamBuffer = rbsp;
+  bitstream->bits_to_go = 8;
+
+  len+=u_v  (8, "SPS: profile_idc",                             sps->profile_idc,                               bitstream);
+
+  len+=u_1  ("SPS: constrained_set0_flag",                      sps->constrained_set0_flag,    bitstream);
+  len+=u_1  ("SPS: constrained_set1_flag",                      sps->constrained_set1_flag,    bitstream);
+  len+=u_1  ("SPS: constrained_set2_flag",                      sps->constrained_set2_flag,    bitstream);
+  len+=u_1  ("SPS: constrained_set3_flag",                      sps->constrained_set3_flag,    bitstream);
+  len+=u_v  (4, "SPS: reserved_zero_4bits",                     0,                             bitstream);
+
+  len+=u_v  (8, "SPS: level_idc",                               sps->level_idc,                                 bitstream);
+
+  len+=ue_v ("SPS: seq_parameter_set_id",                    sps->seq_parameter_set_id,                      bitstream);
+
+  // Fidelity Range Extensions stuff
+  if((sps->profile_idc==FREXT_HP) ||
+     (sps->profile_idc==FREXT_Hi10P) ||
+     (sps->profile_idc==FREXT_Hi422) ||
+     (sps->profile_idc==FREXT_Hi444))
+  {
+    len+=ue_v ("SPS: chroma_format_idc",                        sps->chroma_format_idc,                          bitstream);
+    if(img->yuv_format == 3)
+      len+=u_1  ("SPS: residue_transform_flag",                 0,                                               bitstream);
+    len+=ue_v ("SPS: bit_depth_luma_minus8",                    sps->bit_depth_luma_minus8,                      bitstream);
+    len+=ue_v ("SPS: bit_depth_chroma_minus8",                  sps->bit_depth_chroma_minus8,                    bitstream);
+    len+=u_1  ("SPS: lossless_qpprime_y_zero_flag",             img->lossless_qpprime_flag,                      bitstream);
+    //other chroma info to be added in the future
+
+    len+=u_1 ("SPS: seq_scaling_matrix_present_flag",           sps->seq_scaling_matrix_present_flag,            bitstream);
+
+    if(sps->seq_scaling_matrix_present_flag)
+    {
+      for(i=0; i<8; i++)
+      {
+        len+=u_1 ("SPS: seq_scaling_list_present_flag",         sps->seq_scaling_list_present_flag[i],           bitstream);
+        if(sps->seq_scaling_list_present_flag[i])
+        {
+          if(i<6)
+            len+=Scaling_List(ScalingList4x4input[i], ScalingList4x4[i], 16, &UseDefaultScalingMatrix4x4Flag[i], bitstream);
+          else
+            len+=Scaling_List(ScalingList8x8input[i-6], ScalingList8x8[i-6], 64, &UseDefaultScalingMatrix8x8Flag[i-6], bitstream);
+        }
+      }
+    }
+  }
+
+  len+=ue_v ("SPS: log2_max_frame_num_minus4",               sps->log2_max_frame_num_minus4,                 bitstream);
+  len+=ue_v ("SPS: pic_order_cnt_type",                      sps->pic_order_cnt_type,                        bitstream);
+
+  if (sps->pic_order_cnt_type == 0)
+    len+=ue_v ("SPS: log2_max_pic_order_cnt_lsb_minus4",     sps->log2_max_pic_order_cnt_lsb_minus4,         bitstream);
+  else if (sps->pic_order_cnt_type == 1)
+  {
+    len+=u_1  ("SPS: delta_pic_order_always_zero_flag",        sps->delta_pic_order_always_zero_flag,          bitstream);
+    len+=se_v ("SPS: offset_for_non_ref_pic",                  sps->offset_for_non_ref_pic,                    bitstream);
+    len+=se_v ("SPS: offset_for_top_to_bottom_field",          sps->offset_for_top_to_bottom_field,            bitstream);
+    len+=ue_v ("SPS: num_ref_frames_in_pic_order_cnt_cycle",   sps->num_ref_frames_in_pic_order_cnt_cycle,     bitstream);
+    for (i=0; i<sps->num_ref_frames_in_pic_order_cnt_cycle; i++)
+      len+=se_v ("SPS: offset_for_ref_frame",                  sps->offset_for_ref_frame[i],                      bitstream);
+  }
+  len+=ue_v ("SPS: num_ref_frames",                          sps->num_ref_frames,                            bitstream);
+  len+=u_1  ("SPS: gaps_in_frame_num_value_allowed_flag",    sps->gaps_in_frame_num_value_allowed_flag,      bitstream);
+  len+=ue_v ("SPS: pic_width_in_mbs_minus1",                 sps->pic_width_in_mbs_minus1,                   bitstream);
+  len+=ue_v ("SPS: pic_height_in_map_units_minus1",          sps->pic_height_in_map_units_minus1,            bitstream);
+  len+=u_1  ("SPS: frame_mbs_only_flag",                     sps->frame_mbs_only_flag,                       bitstream);
+  if (!sps->frame_mbs_only_flag)
+  {
+    len+=u_1  ("SPS: mb_adaptive_frame_field_flag",            sps->mb_adaptive_frame_field_flag,              bitstream);
+  }
+  len+=u_1  ("SPS: direct_8x8_inference_flag",               sps->direct_8x8_inference_flag,                 bitstream);
+
+  len+=u_1  ("SPS: frame_cropping_flag",                      sps->frame_cropping_flag,                       bitstream);
+  if (sps->frame_cropping_flag)
+  {
+    len+=ue_v ("SPS: frame_cropping_rect_left_offset",          sps->frame_cropping_rect_left_offset,           bitstream);
+    len+=ue_v ("SPS: frame_cropping_rect_right_offset",         sps->frame_cropping_rect_right_offset,          bitstream);
+    len+=ue_v ("SPS: frame_cropping_rect_top_offset",           sps->frame_cropping_rect_top_offset,            bitstream);
+    len+=ue_v ("SPS: frame_cropping_rect_bottom_offset",        sps->frame_cropping_rect_bottom_offset,         bitstream);
+  }
+
+  len+=u_1  ("SPS: vui_parameters_present_flag",             sps->vui_parameters_present_flag,               bitstream);
+
+  if (sps->vui_parameters_present_flag)
+    len+=GenerateVUISequenceParameters(bitstream);    // currently a dummy, asserting
+
+  SODBtoRBSP(bitstream);     // copies the last couple of bits into the byte buffer
+
+  LenInBytes=bitstream->byte_pos;
+
+  free (bitstream);
+
+  return LenInBytes;
+}
+
+
+/*!
+ ***********************************************************************************************
+ * \brief
+ *    int GeneratePic_parameter_set_rbsp (pic_parameter_set_rbsp_t *sps, char *rbsp);
+ *
+ * \param pps
+ *    picture parameter structure
+ * \param rbsp
+ *    buffer to be filled with the rbsp, size should be at least MAXIMUMPARSETRBSPSIZE
+ *
+ * \return
+ *    size of the RBSP in bytes, negative in case of an error
+ *
+ * \note
+ *    Picture Parameter VUI function is called, but the function implements
+ *    an exit (-1)
+ ************************************************************************************************
+ */
+
+int GeneratePic_parameter_set_rbsp (pic_parameter_set_rbsp_t *pps, byte *rbsp)
+{
+  Bitstream *bitstream;
+  int len = 0, LenInBytes;
+  unsigned i;
+  unsigned NumberBitsPerSliceGroupId;
+  int profile_idc;
+
+  assert (rbsp != NULL);
+
+  if ((bitstream=calloc(1, sizeof(Bitstream)))==NULL) no_mem_exit("PicParameterSet:bitstream");
+
+  // .. and use the rbsp provided (or allocated above) for the data
+  bitstream->streamBuffer = rbsp;
+  bitstream->bits_to_go = 8;
+
+  pps->pic_order_present_flag = img->pic_order_present_flag;
+
+  len+=ue_v ("PPS: pic_parameter_set_id",                    pps->pic_parameter_set_id,                      bitstream);
+  len+=ue_v ("PPS: seq_parameter_set_id",                    pps->seq_parameter_set_id,                      bitstream);
+  len+=u_1  ("PPS: entropy_coding_mode_flag",                pps->entropy_coding_mode_flag,                  bitstream);
+  len+=u_1  ("PPS: pic_order_present_flag",                  pps->pic_order_present_flag,                    bitstream);
+  len+=ue_v ("PPS: num_slice_groups_minus1",                 pps->num_slice_groups_minus1,                   bitstream);
+
+  // FMO stuff
+  if(pps->num_slice_groups_minus1 > 0 )
+  {
+    len+=ue_v ("PPS: slice_group_map_type",                 pps->slice_group_map_type,                   bitstream);
+    if (pps->slice_group_map_type == 0)
+      for (i=0; i<=pps->num_slice_groups_minus1; i++)
+        len+=ue_v ("PPS: run_length_minus1[i]",                           pps->run_length_minus1[i],                             bitstream);
+    else if (pps->slice_group_map_type==2)
+      for (i=0; i<pps->num_slice_groups_minus1; i++)
+      {
+
+        len+=ue_v ("PPS: top_left[i]",                          pps->top_left[i],                           bitstream);
+        len+=ue_v ("PPS: bottom_right[i]",                      pps->bottom_right[i],                       bitstream);
+      }
+    else if (pps->slice_group_map_type == 3 ||
+             pps->slice_group_map_type == 4 ||
+             pps->slice_group_map_type == 5)
+    {
+      len+=u_1  ("PPS: slice_group_change_direction_flag",         pps->slice_group_change_direction_flag,         bitstream);
+      len+=ue_v ("PPS: slice_group_change_rate_minus1",            pps->slice_group_change_rate_minus1,            bitstream);
+    }
+    else if (pps->slice_group_map_type == 6)
+    {
+      if (pps->num_slice_groups_minus1>=4)
+        NumberBitsPerSliceGroupId=3;
+      else if (pps->num_slice_groups_minus1>=2)
+        NumberBitsPerSliceGroupId=2;
+      else if (pps->num_slice_groups_minus1>=1)
+        NumberBitsPerSliceGroupId=1;
+      else
+        NumberBitsPerSliceGroupId=0;
+
+      len+=ue_v ("PPS: pic_size_in_map_units_minus1",                       pps->pic_size_in_map_units_minus1,             bitstream);
+      for(i=0; i<=pps->pic_size_in_map_units_minus1; i++)
+        len+= u_v  (NumberBitsPerSliceGroupId, "PPS: >slice_group_id[i]",   pps->slice_group_id[i],                        bitstream);
+    }
+  }
+  // End of FMO stuff
+
+  len+=ue_v ("PPS: num_ref_idx_l0_active_minus1",             pps->num_ref_idx_l0_active_minus1,              bitstream);
+  len+=ue_v ("PPS: num_ref_idx_l1_active_minus1",             pps->num_ref_idx_l1_active_minus1,              bitstream);
+  len+=u_1  ("PPS: weighted_pred_flag",                       pps->weighted_pred_flag,                        bitstream);
+  len+=u_v  (2, "PPS: weighted_bipred_idc",                   pps->weighted_bipred_idc,                       bitstream);
+  len+=se_v ("PPS: pic_init_qp_minus26",                      pps->pic_init_qp_minus26,                       bitstream);
+  len+=se_v ("PPS: pic_init_qs_minus26",                      pps->pic_init_qs_minus26,                       bitstream);
+
+  profile_idc = IdentifyProfile();
+  if((profile_idc==FREXT_HP) ||
+     (profile_idc==FREXT_Hi10P) ||
+     (profile_idc==FREXT_Hi422) ||
+     (profile_idc==FREXT_Hi444))
+    len+=se_v ("PPS: chroma_qp_index_offset",                 pps->cb_qp_index_offset,                        bitstream);
+  else
+    len+=se_v ("PPS: chroma_qp_index_offset",                 pps->chroma_qp_index_offset,                    bitstream);
+
+  len+=u_1  ("PPS: deblocking_filter_control_present_flag",   pps->deblocking_filter_control_present_flag,    bitstream);
+  len+=u_1  ("PPS: constrained_intra_pred_flag",              pps->constrained_intra_pred_flag,               bitstream);
+  len+=u_1  ("PPS: redundant_pic_cnt_present_flag",           pps->redundant_pic_cnt_present_flag,            bitstream);
+
+  // Fidelity Range Extensions stuff
+  if((profile_idc==FREXT_HP) ||
+     (profile_idc==FREXT_Hi10P) ||
+     (profile_idc==FREXT_Hi422) ||
+     (profile_idc==FREXT_Hi444))
+  {
+    len+=u_1  ("PPS: transform_8x8_mode_flag",                pps->transform_8x8_mode_flag,                   bitstream);
+
+    len+=u_1  ("PPS: pic_scaling_matrix_present_flag",        pps->pic_scaling_matrix_present_flag,           bitstream);
+
+    if(pps->pic_scaling_matrix_present_flag)
+    {
+      for(i=0; i<(6+((unsigned)pps->transform_8x8_mode_flag<<1)); i++)
+      {
+        len+=u_1  ("PPS: pic_scaling_list_present_flag",      pps->pic_scaling_list_present_flag[i],          bitstream);
+
+        if(pps->pic_scaling_list_present_flag[i])
+        {
+          if(i<6)
+            len+=Scaling_List(ScalingList4x4input[i], ScalingList4x4[i], 16, &UseDefaultScalingMatrix4x4Flag[i], bitstream);
+          else
+            len+=Scaling_List(ScalingList8x8input[i-6], ScalingList8x8[i-6], 64, &UseDefaultScalingMatrix8x8Flag[i-6], bitstream);
+        }
+      }
+    }
+    len+=se_v ("PPS: second_chroma_qp_index_offset",          pps->cr_qp_index_offset,                        bitstream);
+  }
+
+  SODBtoRBSP(bitstream);     // copies the last couple of bits into the byte buffer
+
+  LenInBytes=bitstream->byte_pos;
+
+  // Get rid of the helper structures
+  free (bitstream);
+
+  return LenInBytes;
+}
+
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Returns the Profile
+ *
+ * \return
+ *    Profile according to Annex A
+ *
+ * \note
+ *    Function is currently a dummy.  Should "calculate" the profile from those
+ *    config file parameters.  E.g.
+ *
+ *    Profile = Baseline;
+ *    if (CABAC Used || Interlace used) Profile=Main;
+ *    if (!Cabac Used) && (Bframes | SPframes) Profile = Streaming;
+ *
+ *************************************************************************************
+ */
+int IdentifyProfile(void)
+{
+  return input->ProfileIDC;
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Returns the Level
+ *
+ * \return
+ *    Level according to Annex A
+ *
+ * \note
+ *    This function is currently a dummy, but should calculate the level out of
+ *    the config file parameters (primarily the picture size)
+ *************************************************************************************
+ */
+int IdentifyLevel(void)
+{
+  return input->LevelIDC;
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Function body for VUI Parameter generation (to be done)
+ *
+ * \return
+ *    exits with error message
+ *************************************************************************************
+ */
+static int GenerateVUISequenceParameters(Bitstream *bitstream)
+{
+  int len=0;
+
+  // special case to signal the RGB format
+  if(input->rgb_input_flag && input->yuv_format==3)
+  {
+    //still pretty much a dummy VUI
+    printf   ("VUI: writing Sequence Parameter VUI to signal RGB format\n");
+    len+=u_1 ("VUI: aspect_ratio_info_present_flag", 0, bitstream);
+    len+=u_1 ("VUI: overscan_info_present_flag", 0, bitstream);
+    len+=u_1 ("VUI: video_signal_type_present_flag", 1, bitstream);
+    len+=u_v (3, "VUI: video format", 2, bitstream);
+    len+=u_1 ("VUI: video_full_range_flag", 1, bitstream);
+    len+=u_1 ("VUI: color_description_present_flag", 1, bitstream);
+    len+=u_v (8, "VUI: colour primaries", 2, bitstream);
+    len+=u_v (8, "VUI: transfer characteristics", 2, bitstream);
+    len+=u_v (8, "VUI: matrix coefficients", 0, bitstream);
+    len+=u_1 ("VUI: chroma_loc_info_present_flag", 0, bitstream);
+    len+=u_1 ("VUI: timing_info_present_flag", 0, bitstream);
+    len+=u_1 ("VUI: nal_hrd_parameters_present_flag", 0, bitstream);
+    len+=u_1 ("VUI: vcl_hrd_parameters_present_flag", 0, bitstream);
+    len+=u_1 ("VUI: pic_struc_present_flag", 0, bitstream);
+    len+=u_1 ("VUI: bitstream_restriction_flag", 0, bitstream);
+
+    return len;
+  }
+  else if (input->Generate_SEIVUI)
+  {
+      int bitstream_restriction_flag = 0;
+      int timing_info_present_flag = 0;
+      int aspect_ratio_info_present_flag = 0;
+      len+=u_1 ("VUI: aspect_ratio_info_present_flag", 0, bitstream);
+      if (aspect_ratio_info_present_flag)
+      {
+        len+=u_v (8,"VUI: aspect_ratio_idc", 1, bitstream);
+      }
+      len+=u_1 ("VUI: overscan_info_present_flag", 0, bitstream);
+      len+=u_1 ("VUI: video_signal_type_present_flag", 0, bitstream);
+      len+=u_1 ("VUI: chroma_loc_info_present_flag", 0, bitstream);
+      len+=u_1 ("VUI: timing_info_present_flag", timing_info_present_flag, bitstream);
+            // timing parameters
+      if (timing_info_present_flag)
+      {
+        len+=u_v (32,"VUI: num_units_in_tick", 416667, bitstream);
+        len+=u_v (32,"VUI: time_scale", 20000000, bitstream);
+        len+=u_1 ("VUI: fixed_frame_rate_flag", 1, bitstream);
+      }
+      // end of timing parameters
+      len+=u_1 ("VUI: nal_hrd_parameters_present_flag", 0, bitstream);
+      len+=u_1 ("VUI: vcl_hrd_parameters_present_flag", 0, bitstream);
+      len+=u_1 ("VUI: pic_struc_present_flag", 0, bitstream);
+
+      len+=u_1 ("VUI: bitstream_restriction_flag", bitstream_restriction_flag, bitstream);
+      if (bitstream_restriction_flag)
+      {
+        len+=u_1 ("VUI: motion_vectors_over_pic_boundaries_flag", 1, bitstream);
+        len+=ue_v ("VUI: max_bytes_per_pic_denom", 0, bitstream);
+        len+=ue_v ("VUI: max_bits_per_mb_denom", 0, bitstream);
+        len+=ue_v ("VUI: log2_max_mv_length_horizontal", 11, bitstream);
+        len+=ue_v ("VUI: log2_max_mv_length_vertical", 11, bitstream);
+        len+=ue_v ("VUI: num_reorder_frames", 3, bitstream);
+        len+=ue_v ("VUI: max_dec_frame_buffering", 4, bitstream);
+      }
+    }
+  else
+  {
+    printf ("Sequence Parameter VUI not yet implemented, this should never happen, exit\n");
+    exit (-1);
+  }
+
+  return 1;
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Function body for SEI message NALU generation
+ *
+ * \return
+ *    A NALU containing the SEI messages
+ *
+ *************************************************************************************
+ */
+NALU_t *GenerateSEImessage_NALU()
+{
+  NALU_t *n = AllocNALU(64000);
+  int RBSPlen = 0;
+  int NALUlen;
+  byte rbsp[MAXRBSPSIZE];
+
+  RBSPlen = GenerateSEImessage_rbsp (NORMAL_SEI, rbsp);
+  NALUlen = RBSPtoNALU (rbsp, n, RBSPlen, NALU_TYPE_SEI, NALU_PRIORITY_DISPOSABLE, 0, 1);
+  n->startcodeprefix_len = 4;
+
+  return n;
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    int GenerateSEImessage_rbsp (int, bufferingperiod_information_struct*, char*)
+ *
+ *
+ * \return
+ *    size of the RBSP in bytes, negative in case of an error
+ *
+ * \note
+ *************************************************************************************
+ */
+int GenerateSEImessage_rbsp (int id, byte *rbsp)
+{
+  Bitstream *bitstream;
+
+  int len = 0, LenInBytes;
+  assert (rbsp != NULL);
+
+  if ((bitstream=calloc(1, sizeof(Bitstream)))==NULL) no_mem_exit("SeqParameterSet:bitstream");
+
+  // .. and use the rbsp provided (or allocated above) for the data
+  bitstream->streamBuffer = rbsp;
+  bitstream->bits_to_go = 8;
+
+  {
+    char sei_message[500];
+    char uuid_message[9] = "RandomMSG"; // This is supposed to be Random
+    unsigned int i, message_size = strlen(input->SEIMessageText);
+    struct TIMEB tstruct;
+    ftime( &tstruct);    // start time ms
+
+    if (message_size == 0)
+    {
+      message_size = 13;
+      strncpy(sei_message,"Empty Message",message_size);
+    }
+    else
+      strncpy(sei_message,input->SEIMessageText,message_size);
+
+    len+=u_v (8,"SEI: last_payload_type_byte", 5, bitstream);
+    message_size += 17;
+    while (message_size > 254)
+    {
+      len+=u_v (8,"SEI: ff_byte",255, bitstream);
+      message_size -= 255;
+    }
+    len+=u_v (8,"SEI: last_payload_size_byte",message_size, bitstream);
+
+    // Lets randomize uuid based on time
+    len+=u_v (32,"SEI: uuid_iso_iec_11578",(int) tstruct.timezone, bitstream);
+    len+=u_v (32,"SEI: uuid_iso_iec_11578",(int) tstruct.time*1000+tstruct.millitm, bitstream);
+    len+=u_v (32,"SEI: uuid_iso_iec_11578",(int) (uuid_message[0] << 24) + (uuid_message[1] << 16)  + (uuid_message[2] << 8) + (uuid_message[3] << 0), bitstream);
+    len+=u_v (32,"SEI: uuid_iso_iec_11578",(int) (uuid_message[4] << 24) + (uuid_message[5] << 16)  + (uuid_message[6] << 8) + (uuid_message[7] << 0), bitstream);
+    for (i = 0; i < strlen(sei_message); i++)
+      len+=u_v (8,"SEI: user_data_payload_byte",sei_message[i], bitstream);
+
+    len+=u_v (8,"SEI: user_data_payload_byte",   0, bitstream);
+  }
+  SODBtoRBSP(bitstream);     // copies the last couple of bits into the byte buffer
+
+  LenInBytes=bitstream->byte_pos;
+
+  free(bitstream);
+  return LenInBytes;
+}


Index: llvm-test/MultiSource/Applications/JM/lencod/parset.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/parset.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/parset.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,48 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ *    parset.h
+ * \brief
+ *    Picture and Sequence Parameter Sets, encoder operations
+ *    This code reflects JVT version xxx
+ *  \date 25 November 2002
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+ ***************************************************************************************
+ */
+
+
+#ifndef _PARSET_H_
+#define _PARSET_H_
+
+#include "parsetcommon.h"
+#include "nalu.h"
+#include "sei.h"
+
+void GenerateParameterSets (void);
+void FreeParameterSets (void);
+
+NALU_t *GenerateSeq_parameter_set_NALU (void);
+NALU_t *GeneratePic_parameter_set_NALU (int);
+NALU_t *GenerateSEImessage_NALU();
+
+// The following are local helpers, but may come handy in the future, hence public
+void GenerateSequenceParameterSet(seq_parameter_set_rbsp_t *sps, int SPS_id);
+void GeneratePictureParameterSet( pic_parameter_set_rbsp_t *pps, seq_parameter_set_rbsp_t *sps, int PPS_id,
+                                 int WeightedPrediction, int WeightedBiprediction,
+                                 int cb_qp_index_offset, int cr_qp_index_offset);
+
+int Scaling_List(short *scalingListinput, short *scalingList, int sizeOfScalingList, short *UseDefaultScalingMatrix, Bitstream *bitstream);
+int GenerateSeq_parameter_set_rbsp (seq_parameter_set_rbsp_t *sps, byte *buf);
+int GeneratePic_parameter_set_rbsp (pic_parameter_set_rbsp_t *pps, byte *buf);
+int GenerateSEImessage_rbsp (int id, byte *buf);
+void FreeSPS (seq_parameter_set_rbsp_t *sps);
+void FreePPS (pic_parameter_set_rbsp_t *pps);
+
+pic_parameter_set_rbsp_t *AllocPPS (void);
+seq_parameter_set_rbsp_t *AllocSPS (void);
+
+
+#endif


Index: llvm-test/MultiSource/Applications/JM/lencod/parsetcommon.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/parsetcommon.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/parsetcommon.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,100 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ *    parset.c
+ * \brief
+ *    Picture and Sequence Parameter set generation and handling
+ *  \date 25 November 2002
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+ *
+ **************************************************************************************
+ */
+#include <stdlib.h>
+#include <assert.h>
+#include <memory.h>
+
+#include "global.h"
+#include "memalloc.h"
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Allocates memory for a pps
+ *
+ * \return
+ *    pointer to a pps
+ *************************************************************************************
+ */
+pic_parameter_set_rbsp_t *AllocPPS ()
+ {
+   pic_parameter_set_rbsp_t *p;
+
+   if ((p=calloc (sizeof (pic_parameter_set_rbsp_t), 1)) == NULL)
+     no_mem_exit ("AllocPPS: PPS");
+   p->slice_group_id = NULL;
+   return p;
+ }
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Allocates memory for am sps
+ *
+ * \return
+ *    pointer to a sps
+ *************************************************************************************
+ */
+seq_parameter_set_rbsp_t *AllocSPS ()
+ {
+   seq_parameter_set_rbsp_t *p;
+
+   if ((p=calloc (sizeof (seq_parameter_set_rbsp_t), 1)) == NULL)
+     no_mem_exit ("AllocSPS: SPS");
+   return p;
+ }
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Frees a pps
+ *
+ * \param pps
+ *     pps to be freed
+ *
+ * \return
+ *    none
+ *************************************************************************************
+ */
+
+ void FreePPS (pic_parameter_set_rbsp_t *pps)
+ {
+   assert (pps != NULL);
+   if (pps->slice_group_id != NULL)
+     free (pps->slice_group_id);
+   free (pps);
+ }
+
+
+ /*!
+ *************************************************************************************
+ * \brief
+ *    Frees a sps
+ *
+ * \param sps
+ *     sps to be freed
+ *
+ * \return
+ *    none
+ *************************************************************************************
+ */
+
+ void FreeSPS (seq_parameter_set_rbsp_t *sps)
+ {
+   assert (sps != NULL);
+   free (sps);
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/parsetcommon.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/parsetcommon.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/parsetcommon.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,198 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ *    parsetcommon.h
+ * \brief
+ *    Picture and Sequence Parameter Sets, structures common to encoder and decoder
+ *    This code reflects JVT version xxx
+ *  \date 25 November 2002
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+ ***************************************************************************************
+ */
+
+
+
+// In the JVT syntax, frequently flags are used that indicate the presence of
+// certain pieces of information in the NALU.  Here, these flags are also
+// present.  In the encoder, those bits indicate that the values signalled to
+// be present are meaningful and that this part of the syntax should be
+// written to the NALU.  In the decoder, the flag indicates that information
+// was received from the decoded NALU and should be used henceforth.
+// The structure names were chosen as indicated in the JVT syntax
+
+#ifndef _PARSETCOMMON_H_
+#define _PARSETCOMMON_H_
+
+#define MAXIMUMPARSETRBSPSIZE   1500
+#define MAXIMUMPARSETNALUSIZE   1500
+
+#define MAXSPS  32
+#define MAXPPS  256
+
+//! Boolean Type
+#ifdef FALSE
+#  define Boolean int
+#else
+typedef enum {
+  FALSE,
+  TRUE
+} Boolean;
+#endif
+
+#define MAXIMUMVALUEOFcpb_cnt   32
+typedef struct
+{
+  unsigned int  cpb_cnt;                                          // ue(v)
+  unsigned int  bit_rate_scale;                                   // u(4)
+  unsigned int  cpb_size_scale;                                   // u(4)
+    unsigned int  bit_rate_value [MAXIMUMVALUEOFcpb_cnt];         // ue(v)
+    unsigned int  cpb_size_value[MAXIMUMVALUEOFcpb_cnt];          // ue(v)
+    unsigned int  vbr_cbr_flag[MAXIMUMVALUEOFcpb_cnt];            // u(1)
+  unsigned int  initial_cpb_removal_delay_length_minus1;          // u(5)
+  unsigned int  cpb_removal_delay_length_minus1;                  // u(5)
+  unsigned int  dpb_output_delay_length_minus1;                   // u(5)
+  unsigned int  time_offset_length;                               // u(5)
+} hrd_parameters_t;
+
+
+typedef struct
+{
+  Boolean      aspect_ratio_info_present_flag;                   // u(1)
+    unsigned int  aspect_ratio_idc;                               // u(8)
+      unsigned int  sar_width;                                    // u(16)
+      unsigned int  sar_height;                                   // u(16)
+  Boolean      overscan_info_present_flag;                       // u(1)
+    Boolean      overscan_appropriate_flag;                      // u(1)
+  Boolean      video_signal_type_present_flag;                   // u(1)
+    unsigned int  video_format;                                   // u(3)
+    Boolean      video_full_range_flag;                          // u(1)
+    Boolean      colour_description_present_flag;                // u(1)
+      unsigned int  colour_primaries;                             // u(8)
+      unsigned int  transfer_characteristics;                     // u(8)
+      unsigned int  matrix_coefficients;                          // u(8)
+  Boolean      chroma_location_info_present_flag;                // u(1)
+    unsigned int  chroma_location_frame;                          // ue(v)
+    unsigned int  chroma_location_field;                          // ue(v)
+  Boolean      timing_info_present_flag;                         // u(1)
+    unsigned int  num_units_in_tick;                              // u(32)
+    unsigned int  time_scale;                                     // u(32)
+    Boolean      fixed_frame_rate_flag;                          // u(1)
+  Boolean      nal_hrd_parameters_present_flag;                  // u(1)
+    hrd_parameters_t nal_hrd_parameters;                      // hrd_paramters_t
+  Boolean      vcl_hrd_parameters_present_flag;                  // u(1)
+    hrd_parameters_t vcl_hrd_parameters;                      // hrd_paramters_t
+  // if ((nal_hrd_parameters_present_flag || (vcl_hrd_parameters_present_flag))
+    Boolean      low_delay_hrd_flag;                             // u(1)
+  Boolean      bitstream_restriction_flag;                       // u(1)
+    Boolean      motion_vectors_over_pic_boundaries_flag;        // u(1)
+    unsigned int  max_bytes_per_pic_denom;                        // ue(v)
+    unsigned int  max_bits_per_mb_denom;                          // ue(v)
+    unsigned int  log2_max_mv_length_vertical;                    // ue(v)
+    unsigned int  log2_max_mv_length_horizontal;                  // ue(v)
+    unsigned int  max_dec_frame_reordering;                       // ue(v)
+    unsigned int  max_dec_frame_buffering;                        // ue(v)
+} vui_seq_parameters_t;
+
+
+#define MAXnum_slice_groups_minus1  8
+typedef struct
+{
+  Boolean   Valid;                  // indicates the parameter set is valid
+  unsigned int  pic_parameter_set_id;                             // ue(v)
+  unsigned int  seq_parameter_set_id;                             // ue(v)
+  Boolean   entropy_coding_mode_flag;                         // u(1)
+
+  Boolean   transform_8x8_mode_flag;                          // u(1)
+  Boolean   pic_scaling_matrix_present_flag;                  // u(1)
+  int       pic_scaling_list_present_flag[8];                 // u(1)
+
+  // if( pic_order_cnt_type < 2 )  in the sequence parameter set
+  Boolean      pic_order_present_flag;                           // u(1)
+  unsigned int  num_slice_groups_minus1;                          // ue(v)
+    unsigned int  slice_group_map_type;                        // ue(v)
+    // if( slice_group_map_type = = 0 )
+      unsigned int  run_length_minus1[MAXnum_slice_groups_minus1]; // ue(v)
+    // else if( slice_group_map_type = = 2 )
+      unsigned int  top_left[MAXnum_slice_groups_minus1];         // ue(v)
+      unsigned int  bottom_right[MAXnum_slice_groups_minus1];     // ue(v)
+    // else if( slice_group_map_type = = 3 || 4 || 5
+      Boolean   slice_group_change_direction_flag;            // u(1)
+      unsigned int  slice_group_change_rate_minus1;               // ue(v)
+    // else if( slice_group_map_type = = 6 )
+      unsigned int  pic_size_in_map_units_minus1;                 // ue(v)
+      byte      *slice_group_id;                              // complete MBAmap u(v)
+
+  int       num_ref_idx_l0_active_minus1;                     // ue(v)
+  int       num_ref_idx_l1_active_minus1;                     // ue(v)
+  Boolean   weighted_pred_flag;                               // u(1)
+  unsigned int  weighted_bipred_idc;                              // u(2)
+  int       pic_init_qp_minus26;                              // se(v)
+  int       pic_init_qs_minus26;                              // se(v)
+  int       chroma_qp_index_offset;                           // se(v)
+
+  int       cb_qp_index_offset;                               // se(v)
+  int       cr_qp_index_offset;                               // se(v)
+
+  Boolean   deblocking_filter_control_present_flag;           // u(1)
+  Boolean   constrained_intra_pred_flag;                      // u(1)
+  Boolean   redundant_pic_cnt_present_flag;                   // u(1)
+  Boolean   vui_pic_parameters_flag;                          // u(1)
+} pic_parameter_set_rbsp_t;
+
+
+#define MAXnum_ref_frames_in_pic_order_cnt_cycle  256
+typedef struct
+{
+  Boolean   Valid;                  // indicates the parameter set is valid
+
+  unsigned int  profile_idc;                                      // u(8)
+  Boolean   constrained_set0_flag;                            // u(1)
+  Boolean   constrained_set1_flag;                            // u(1)
+  Boolean   constrained_set2_flag;                            // u(1)
+  Boolean   constrained_set3_flag;                            // u(1)
+  unsigned int  level_idc;                                        // u(8)
+  unsigned int  seq_parameter_set_id;                             // ue(v)
+  unsigned int  chroma_format_idc;                                // ue(v)
+
+  Boolean   seq_scaling_matrix_present_flag;                  // u(1)
+  int       seq_scaling_list_present_flag[8];                 // u(1)
+
+  unsigned int  bit_depth_luma_minus8;                            // ue(v)
+  unsigned int  bit_depth_chroma_minus8;                          // ue(v)
+  unsigned int  log2_max_frame_num_minus4;                        // ue(v)
+  unsigned int pic_order_cnt_type;
+  // if( pic_order_cnt_type == 0 )
+  unsigned int log2_max_pic_order_cnt_lsb_minus4;                 // ue(v)
+  // else if( pic_order_cnt_type == 1 )
+    Boolean delta_pic_order_always_zero_flag;               // u(1)
+    int     offset_for_non_ref_pic;                         // se(v)
+    int     offset_for_top_to_bottom_field;                 // se(v)
+    unsigned int  num_ref_frames_in_pic_order_cnt_cycle;          // ue(v)
+    // for( i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++ )
+      int   offset_for_ref_frame[MAXnum_ref_frames_in_pic_order_cnt_cycle];   // se(v)
+  unsigned int  num_ref_frames;                                   // ue(v)
+  Boolean   gaps_in_frame_num_value_allowed_flag;             // u(1)
+  unsigned int  pic_width_in_mbs_minus1;                          // ue(v)
+  unsigned int  pic_height_in_map_units_minus1;                   // ue(v)
+  Boolean   frame_mbs_only_flag;                              // u(1)
+  // if( !frame_mbs_only_flag )
+    Boolean   mb_adaptive_frame_field_flag;                   // u(1)
+  Boolean   direct_8x8_inference_flag;                        // u(1)
+  Boolean   frame_cropping_flag;                              // u(1)
+    unsigned int  frame_cropping_rect_left_offset;                // ue(v)
+    unsigned int  frame_cropping_rect_right_offset;               // ue(v)
+    unsigned int  frame_cropping_rect_top_offset;                 // ue(v)
+    unsigned int  frame_cropping_rect_bottom_offset;              // ue(v)
+  Boolean   vui_parameters_present_flag;                      // u(1)
+    vui_seq_parameters_t vui_seq_parameters;                  // vui_seq_parameters_t
+} seq_parameter_set_rbsp_t;
+
+pic_parameter_set_rbsp_t *AllocPPS (void);
+seq_parameter_set_rbsp_t *AllocSPS (void);
+void FreePPS (pic_parameter_set_rbsp_t *pps);
+void FreeSPS (seq_parameter_set_rbsp_t *sps);
+
+#endif


Index: llvm-test/MultiSource/Applications/JM/lencod/q_matrix.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/q_matrix.c:1.4
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/q_matrix.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,654 @@
+
+/*!
+ *************************************************************************************
+ * \file q_matrix.c
+ *
+ * \brief
+ *    read q_matrix parameters from input file: q_matrix.cfg
+ *
+ *************************************************************************************
+ */
+#include <stdlib.h>
+#include <string.h>
+
+#include "global.h"
+#include "memalloc.h"
+
+extern char *GetConfigFileContent (char *Filename, int error_type);
+
+#define MAX_ITEMS_TO_PARSE  1000
+
+extern const int quant_coef[6][4][4];
+extern const int dequant_coef[6][4][4];
+
+extern const int quant_coef8[6][8][8];
+extern const int dequant_coef8[6][8][8];
+
+
+int matrix4x4_check[6] = {0, 0, 0, 0, 0, 0};
+int matrix8x8_check[2] = {0, 0};
+
+static const char MatrixType4x4[6][20] =
+{
+  "INTRA4X4_LUMA",
+  "INTRA4X4_CHROMAU",
+  "INTRA4X4_CHROMAV",
+  "INTER4X4_LUMA",
+  "INTER4X4_CHROMAU",
+  "INTER4X4_CHROMAV"
+};
+
+static const char MatrixType8x8[2][20] =
+{
+  "INTRA8X8_LUMA",
+  "INTER8X8_LUMA",
+};
+
+int ****LevelScale4x4Luma;
+int *****LevelScale4x4Chroma;
+int ****LevelScale8x8Luma;
+
+int ****InvLevelScale4x4Luma;
+int *****InvLevelScale4x4Chroma;
+int ****InvLevelScale8x8Luma;
+
+short ScalingList4x4input[6][16];
+short ScalingList8x8input[2][64];
+short ScalingList4x4[6][16];
+short ScalingList8x8[2][64];
+
+short UseDefaultScalingMatrix4x4Flag[6];
+short UseDefaultScalingMatrix8x8Flag[2];
+
+
+int *qp_per_matrix;
+int *qp_rem_matrix;
+
+static const short Quant_intra_default[16] =
+{
+ 6,13,20,28,
+13,20,28,32,
+20,28,32,37,
+28,32,37,42
+};
+
+static const short Quant_inter_default[16] =
+{
+10,14,20,24,
+14,20,24,27,
+20,24,27,30,
+24,27,30,34
+};
+
+static const short Quant8_intra_default[64] =
+{
+ 6,10,13,16,18,23,25,27,
+10,11,16,18,23,25,27,29,
+13,16,18,23,25,27,29,31,
+16,18,23,25,27,29,31,33,
+18,23,25,27,29,31,33,36,
+23,25,27,29,31,33,36,38,
+25,27,29,31,33,36,38,40,
+27,29,31,33,36,38,40,42
+};
+
+static const short Quant8_inter_default[64] =
+{
+ 9,13,15,17,19,21,22,24,
+13,13,17,19,21,22,24,25,
+15,17,19,21,22,24,25,27,
+17,19,21,22,24,25,27,28,
+19,21,22,24,25,27,28,30,
+21,22,24,25,27,28,30,32,
+22,24,25,27,28,30,32,33,
+24,25,27,28,30,32,33,35
+};
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Check the parameter name.
+ * \param s
+ *    parameter name string
+ * \param type
+ *    4x4 or 8x8 matrix type
+ * \return
+ *    the index number if the string is a valid parameter name,         \n
+ *    -1 for error
+ ***********************************************************************
+ */
+int CheckParameterName (char *s, int *type)
+{
+  int i = 0;
+
+  *type = 0;
+  while ((MatrixType4x4[i] != NULL) && (i<6))
+  {
+    if (0==strcmp (MatrixType4x4[i], s))
+      return i;
+    else
+      i++;
+  }
+
+  i = 0;
+  *type = 1;
+  while ((MatrixType8x8[i] != NULL) && (i<2))
+  {
+    if (0==strcmp (MatrixType8x8[i], s))
+      return i;
+    else
+      i++;
+  }
+
+  return -1;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Parse the Q matrix values read from cfg file.
+ * \param buf
+ *    buffer to be parsed
+ * \param bufsize
+ *    buffer size of buffer
+ ***********************************************************************
+ */
+void ParseMatrix (char *buf, int bufsize)
+{
+  char *items[MAX_ITEMS_TO_PARSE];
+  int MapIdx;
+  int item = 0;
+  int InString = 0, InItem = 0;
+  char *p = buf;
+  char *bufend = &buf[bufsize];
+  int IntContent;
+  int i, j, range, type, cnt;
+  short *ScalingList;
+
+  while (p < bufend)
+  {
+    switch (*p)
+    {
+      case 13:
+        p++;
+        break;
+      case '#':                 // Found comment
+        *p = '\0';              // Replace '#' with '\0' in case of comment immediately following integer or string
+        while (*p != '\n' && p < bufend)  // Skip till EOL or EOF, whichever comes first
+          p++;
+        InString = 0;
+        InItem = 0;
+        break;
+      case '\n':
+        InItem = 0;
+        InString = 0;
+        *p++='\0';
+        break;
+      case ' ':
+      case '\t':              // Skip whitespace, leave state unchanged
+        if (InString)
+          p++;
+        else
+        {                     // Terminate non-strings once whitespace is found
+          *p++ = '\0';
+          InItem = 0;
+        }
+        break;
+
+      case '"':               // Begin/End of String
+        *p++ = '\0';
+        if (!InString)
+        {
+          items[item++] = p;
+          InItem = ~InItem;
+        }
+        else
+          InItem = 0;
+        InString = ~InString; // Toggle
+        break;
+
+      case ',':
+        p++;
+        InItem = 0;
+        break;
+
+      default:
+        if (!InItem)
+        {
+          items[item++] = p;
+          InItem = ~InItem;
+        }
+        p++;
+    }
+  }
+
+  item--;
+
+  for (i=0; i<item; i+=cnt)
+  {
+    cnt=0;
+    if (0 > (MapIdx = CheckParameterName (items[i+cnt], &type)))
+    {
+      snprintf (errortext, ET_SIZE, " Parsing error in config file: Parameter Name '%s' not recognized.", items[i+cnt]);
+      error (errortext, 300);
+    }
+    cnt++;
+    if (strcmp ("=", items[i+cnt]))
+    {
+      snprintf (errortext, ET_SIZE, " Parsing error in config file: '=' expected as the second token in each item.");
+      error (errortext, 300);
+    }
+    cnt++;
+
+    if (!type) //4x4 Matrix
+    {
+      range = 16;
+      ScalingList = ScalingList4x4input[MapIdx];
+      matrix4x4_check[MapIdx] = 1; //to indicate matrix found in cfg file
+    }
+    else //8x8 matrix
+    {
+      range = 64;
+      ScalingList = ScalingList8x8input[MapIdx];
+      matrix8x8_check[MapIdx] = 1; //to indicate matrix found in cfg file
+    }
+
+    for(j=0; j<range; j++)
+    {
+      if (1 != sscanf (items[i+cnt+j], "%d", &IntContent))
+      {
+        snprintf (errortext, ET_SIZE, " Parsing error: Expected numerical value for Parameter of %s, found '%s'.", items[i], items[i+cnt+j]);
+        error (errortext, 300);
+      }
+
+      ScalingList[j] = (short)IntContent; //save value in matrix
+    }
+    cnt+=j;
+    printf (".");
+  }
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Check Q Matrix values. If invalid values found in matrix,
+ *    whole matrix will be patch with default value 16.
+ ***********************************************************************
+ */
+void PatchMatrix(void)
+{
+  short *ScalingList;
+  int i, cnt, fail;
+
+  for(i=0; i<6; i++)
+  {
+    if(input->ScalingListPresentFlag[i])
+    {
+      ScalingList=ScalingList4x4input[i];
+      if(matrix4x4_check[i])
+      {
+        fail=0;
+        for(cnt=0; cnt<16; cnt++)
+        {
+          if(ScalingList[cnt]<0 || ScalingList[cnt]>255) // ScalingList[0]=0 to indicate use default matrix
+          {
+            fail=1;
+            break;
+          }
+        }
+
+        if(fail) //value of matrix exceed range
+        {
+          printf("\n%s value exceed range. (Value must be 1 to 255)\n", MatrixType4x4[i]);
+          printf("Setting default values for this matrix.");
+          if(i>2)
+            memcpy(ScalingList, Quant_inter_default, sizeof(short)*16);
+          else
+            memcpy(ScalingList, Quant_intra_default, sizeof(short)*16);
+        }
+      }
+      else //matrix not found, pad with default value
+      {
+        printf("\n%s matrix definition not found. Setting default values.", MatrixType4x4[i]);
+        if(i>2)
+          memcpy(ScalingList, Quant_inter_default, sizeof(short)*16);
+        else
+          memcpy(ScalingList, Quant_intra_default, sizeof(short)*16);
+      }
+    }
+
+    if((i<2) && input->ScalingListPresentFlag[i+6])
+    {
+      ScalingList=ScalingList8x8input[i];
+      if(matrix8x8_check[i])
+      {
+        fail=0;
+        for(cnt=0; cnt<64; cnt++)
+        {
+          if(ScalingList[cnt]<0 || ScalingList[cnt]>255) // ScalingList[0]=0 to indicate use default matrix
+          {
+            fail=1;
+            break;
+          }
+        }
+
+        if(fail) //value of matrix exceed range
+        {
+          printf("\n%s value exceed range. (Value must be 1 to 255)\n", MatrixType8x8[i]);
+          printf("Setting default values for this matrix.");
+          if(i==7)
+            memcpy(ScalingList, Quant8_inter_default, sizeof(short)*64);
+          else
+            memcpy(ScalingList, Quant8_intra_default, sizeof(short)*64);
+        }
+      }
+      else //matrix not found, pad with default value
+      {
+        printf("\n%s matrix definition not found. Setting default values.", MatrixType8x8[i]);
+        if(i==7)
+          memcpy(ScalingList, Quant8_inter_default, sizeof(short)*64);
+        else
+          memcpy(ScalingList, Quant8_intra_default, sizeof(short)*64);
+      }
+    }
+  }
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Allocate Q matrix arrays
+ ***********************************************************************
+ */
+void allocate_QMatrix ()
+{
+  int bitdepth_qp_scale = 6*(input->BitDepthLuma - 8);
+  int i;
+
+  if ((qp_per_matrix = (int*)malloc((MAX_QP + 1 +  bitdepth_qp_scale)*sizeof(int))) == NULL)
+    no_mem_exit("init_global_buffers: qp_per_matrix");
+  if ((qp_rem_matrix = (int*)malloc((MAX_QP + 1 +  bitdepth_qp_scale)*sizeof(int))) == NULL)
+    no_mem_exit("init_global_buffers: qp_per_matrix");
+
+  for (i = 0; i < MAX_QP + bitdepth_qp_scale + 1; i++)
+  {
+    qp_per_matrix[i] = i / 6;
+    qp_rem_matrix[i] = i % 6;
+  }
+
+  get_mem4Dint(&LevelScale4x4Luma,      2, 6, 4, 4);
+  get_mem5Dint(&LevelScale4x4Chroma, 2, 2, 6, 4, 4);
+  get_mem4Dint(&LevelScale8x8Luma,      2, 6, 8, 8);
+
+  get_mem4Dint(&InvLevelScale4x4Luma,      2, 6, 4, 4);
+  get_mem5Dint(&InvLevelScale4x4Chroma, 2, 2, 6, 4, 4);
+  get_mem4Dint(&InvLevelScale8x8Luma,      2, 6, 8, 8);
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Free Q matrix arrays
+ ***********************************************************************
+ */
+void free_QMatrix ()
+{
+  free(qp_rem_matrix);
+  free(qp_per_matrix);
+
+  free_mem4Dint(LevelScale4x4Luma,      2, 6);
+  free_mem5Dint(LevelScale4x4Chroma, 2, 2, 6);
+  free_mem4Dint(LevelScale8x8Luma,      2, 6);
+
+  free_mem4Dint(InvLevelScale4x4Luma,      2, 6);
+  free_mem5Dint(InvLevelScale4x4Chroma, 2, 2, 6);
+  free_mem4Dint(InvLevelScale8x8Luma,      2, 6);
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Initialise Q matrix values.
+ ***********************************************************************
+ */
+void Init_QMatrix (void)
+{
+  char *content;
+
+
+  allocate_QMatrix ();
+
+  if(input->ScalingMatrixPresentFlag)
+  {
+    printf ("Parsing QMatrix file %s ", input->QmatrixFile);
+    content = GetConfigFileContent(input->QmatrixFile, 0);
+    if(content!='\0')
+      ParseMatrix(content, strlen (content));
+    else
+      printf("\nError: %s\nProceeding with default values for all matrices.", errortext);
+
+    PatchMatrix();
+    printf("\n");
+
+    memset(UseDefaultScalingMatrix4x4Flag, 0, 6 * sizeof(short));
+    UseDefaultScalingMatrix8x8Flag[0]=UseDefaultScalingMatrix8x8Flag[1]=0;
+
+    free(content);
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    For calculating the quantisation values at frame level
+ *
+ * \par Input:
+ *    none
+ *
+ * \par Output:
+ *    none
+ ************************************************************************
+ */
+void CalculateQuantParam(void)
+{
+  int i, j, k, temp;
+  int present[6];
+  int no_q_matrix=FALSE;
+
+  if(!active_sps->seq_scaling_matrix_present_flag && !active_pps->pic_scaling_matrix_present_flag) //set to no q-matrix
+    no_q_matrix=TRUE;
+  else
+  {
+    memset(present, 0, sizeof(int)*6);
+
+    if(active_sps->seq_scaling_matrix_present_flag)
+      for(i=0; i<6; i++)
+        present[i] = active_sps->seq_scaling_list_present_flag[i];
+
+    if(active_pps->pic_scaling_matrix_present_flag)
+      for(i=0; i<6; i++)
+      {
+        if((i==0) || (i==3))
+          present[i] |= active_pps->pic_scaling_list_present_flag[i];
+        else
+          present[i] = active_pps->pic_scaling_list_present_flag[i];
+      }
+  }
+
+  if(no_q_matrix==TRUE)
+  {
+    for(k=0; k<6; k++)
+      for(j=0; j<4; j++)
+        for(i=0; i<4; i++)
+        {
+          LevelScale4x4Luma[1][k][j][i]         = quant_coef[k][j][i];
+          InvLevelScale4x4Luma[1][k][j][i]      = dequant_coef[k][j][i]<<4;
+
+          LevelScale4x4Chroma[0][1][k][j][i]    = quant_coef[k][j][i];
+          InvLevelScale4x4Chroma[0][1][k][j][i] = dequant_coef[k][j][i]<<4;
+
+          LevelScale4x4Chroma[1][1][k][j][i]    = quant_coef[k][j][i];
+          InvLevelScale4x4Chroma[1][1][k][j][i] = dequant_coef[k][j][i]<<4;
+
+          // Inter
+          LevelScale4x4Luma[0][k][j][i]         = quant_coef[k][j][i];
+          InvLevelScale4x4Luma[0][k][j][i]      = dequant_coef[k][j][i]<<4;
+
+          LevelScale4x4Chroma[0][0][k][j][i]    = quant_coef[k][j][i];
+          InvLevelScale4x4Chroma[0][0][k][j][i] = dequant_coef[k][j][i]<<4;
+
+          LevelScale4x4Chroma[1][0][k][j][i]    = quant_coef[k][j][i];
+          InvLevelScale4x4Chroma[1][0][k][j][i] = dequant_coef[k][j][i]<<4;
+        }
+  }
+  else
+  {
+    for(k=0; k<6; k++)
+      for(j=0; j<4; j++)
+        for(i=0; i<4; i++)
+        {
+          temp = (j<<2)+i;
+          if((!present[0]) || UseDefaultScalingMatrix4x4Flag[0])
+          {
+            LevelScale4x4Luma[1][k][j][i]         = (quant_coef[k][j][i]<<4)/Quant_intra_default[temp];
+            InvLevelScale4x4Luma[1][k][j][i]      = dequant_coef[k][j][i]*Quant_intra_default[temp];
+          }
+          else
+          {
+            LevelScale4x4Luma[1][k][j][i]         = (quant_coef[k][j][i]<<4)/ScalingList4x4[0][temp];
+            InvLevelScale4x4Luma[1][k][j][i]      = dequant_coef[k][j][i]*ScalingList4x4[0][temp];
+          }
+
+          if(!present[1])
+          {
+            LevelScale4x4Chroma[0][1][k][j][i]    = LevelScale4x4Luma[1][k][j][i];
+            InvLevelScale4x4Chroma[0][1][k][j][i] = InvLevelScale4x4Luma[1][k][j][i];
+          }
+          else
+          {
+            LevelScale4x4Chroma[0][1][k][j][i]    = (quant_coef[k][j][i]<<4)/(UseDefaultScalingMatrix4x4Flag[1] ? Quant_intra_default[temp]:ScalingList4x4[1][temp]);
+            InvLevelScale4x4Chroma[0][1][k][j][i] = dequant_coef[k][j][i]*(UseDefaultScalingMatrix4x4Flag[1] ? Quant_intra_default[temp]:ScalingList4x4[1][temp]);
+          }
+
+          if(!present[2])
+          {
+            LevelScale4x4Chroma[1][1][k][j][i]    = LevelScale4x4Chroma[0][1][k][j][i];
+            InvLevelScale4x4Chroma[1][1][k][j][i] = InvLevelScale4x4Chroma[0][1][k][j][i];
+          }
+          else
+          {
+            LevelScale4x4Chroma[1][1][k][j][i]    = (quant_coef[k][j][i]<<4)/(UseDefaultScalingMatrix4x4Flag[2] ? Quant_intra_default[temp]:ScalingList4x4[2][temp]);
+            InvLevelScale4x4Chroma[1][1][k][j][i] = dequant_coef[k][j][i]*(UseDefaultScalingMatrix4x4Flag[2] ? Quant_intra_default[temp]:ScalingList4x4[2][temp]);
+          }
+
+          if((!present[3]) || UseDefaultScalingMatrix4x4Flag[3])
+          {
+            LevelScale4x4Luma[0][k][j][i]         = (quant_coef[k][j][i]<<4)/Quant_inter_default[temp];
+            InvLevelScale4x4Luma[0][k][j][i]      = dequant_coef[k][j][i]*Quant_inter_default[temp];
+          }
+          else
+          {
+            LevelScale4x4Luma[0][k][j][i]         = (quant_coef[k][j][i]<<4)/ScalingList4x4[3][temp];
+            InvLevelScale4x4Luma[0][k][j][i]      = dequant_coef[k][j][i]*ScalingList4x4[3][temp];
+          }
+
+          if(!present[4])
+          {
+            LevelScale4x4Chroma[0][0][k][j][i]    = LevelScale4x4Luma[0][k][j][i];
+            InvLevelScale4x4Chroma[0][0][k][j][i] = InvLevelScale4x4Luma[0][k][j][i];
+          }
+          else
+          {
+            LevelScale4x4Chroma[0][0][k][j][i]    = (quant_coef[k][j][i]<<4)/(UseDefaultScalingMatrix4x4Flag[4] ? Quant_inter_default[temp]:ScalingList4x4[4][temp]);
+            InvLevelScale4x4Chroma[0][0][k][j][i] = dequant_coef[k][j][i]*(UseDefaultScalingMatrix4x4Flag[4] ? Quant_inter_default[temp]:ScalingList4x4[4][temp]);
+          }
+
+          if(!present[5])
+          {
+            LevelScale4x4Chroma[1][0][k][j][i]    = LevelScale4x4Chroma[0][0][k][j][i];
+            InvLevelScale4x4Chroma[1][0][k][j][i] = InvLevelScale4x4Chroma[0][0][k][j][i];
+          }
+          else
+          {
+            LevelScale4x4Chroma[1][0][k][j][i]    = (quant_coef[k][j][i]<<4)/(UseDefaultScalingMatrix4x4Flag[5] ? Quant_inter_default[temp]:ScalingList4x4[5][temp]);
+            InvLevelScale4x4Chroma[1][0][k][j][i] = dequant_coef[k][j][i]*(UseDefaultScalingMatrix4x4Flag[5] ? Quant_inter_default[temp]:ScalingList4x4[5][temp]);
+          }
+        }
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Calculate the quantisation and inverse quantisation parameters
+ *
+ ************************************************************************
+ */
+void CalculateQuant8Param()
+{
+  int i, j, k, temp;
+  int present[2];
+  int no_q_matrix=FALSE;
+
+  if(!active_sps->seq_scaling_matrix_present_flag && !active_pps->pic_scaling_matrix_present_flag) //set to default matrix
+    no_q_matrix=TRUE;
+  else
+  {
+    memset(present, 0, sizeof(int)*2);
+
+    if(active_sps->seq_scaling_matrix_present_flag)
+      for(i=0; i<2; i++)
+        present[i] = active_sps->seq_scaling_list_present_flag[i+6];
+
+      if(active_pps->pic_scaling_matrix_present_flag)
+        for(i=0; i<2; i++)
+          present[i] |= active_pps->pic_scaling_list_present_flag[i+6];
+  }
+
+  if(no_q_matrix==TRUE)
+  {
+    for(k=0; k<6; k++)
+      for(j=0; j<8; j++)
+        for(i=0; i<8; i++)
+        {
+          LevelScale8x8Luma[1][k][j][i]         = quant_coef8[k][j][i];
+          InvLevelScale8x8Luma[1][k][j][i]      = dequant_coef8[k][j][i]<<4;
+
+          LevelScale8x8Luma[0][k][j][i]         = quant_coef8[k][j][i];
+          InvLevelScale8x8Luma[0][k][j][i]      = dequant_coef8[k][j][i]<<4;
+        }
+  }
+  else
+  {
+    for(k=0; k<6; k++)
+      for(j=0; j<8; j++)
+        for(i=0; i<8; i++)
+        {
+          temp = (j<<3)+i;
+          if((!present[0]) || UseDefaultScalingMatrix8x8Flag[0])
+          {
+            LevelScale8x8Luma[1][k][j][i]    = (quant_coef8[k][j][i]<<4)/Quant8_intra_default[temp];
+            InvLevelScale8x8Luma[1][k][j][i] = dequant_coef8[k][j][i]*Quant8_intra_default[temp];
+          }
+          else
+          {
+            LevelScale8x8Luma[1][k][j][i]    = (quant_coef8[k][j][i]<<4)/ScalingList8x8[0][temp];
+            InvLevelScale8x8Luma[1][k][j][i] = dequant_coef8[k][j][i]*ScalingList8x8[0][temp];
+          }
+
+          if((!present[1]) || UseDefaultScalingMatrix8x8Flag[1])
+          {
+            LevelScale8x8Luma[0][k][j][i]    = (quant_coef8[k][j][i]<<4)/Quant8_inter_default[temp];
+            InvLevelScale8x8Luma[0][k][j][i] = dequant_coef8[k][j][i]*Quant8_inter_default[temp];
+          }
+          else
+          {
+            LevelScale8x8Luma[0][k][j][i]    = (quant_coef8[k][j][i]<<4)/ScalingList8x8[1][temp];
+            InvLevelScale8x8Luma[0][k][j][i] = dequant_coef8[k][j][i]*ScalingList8x8[1][temp];
+          }
+        }
+  }
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/q_matrix.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/q_matrix.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/q_matrix.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,43 @@
+
+/*!
+ ***************************************************************************
+ * \file
+ *    q_matrix.h
+ *
+ * \brief
+ *    Headerfile for q_matrix array
+ *
+ * \date
+ *    07. Apr 2004
+ ***************************************************************************
+ */
+
+#ifndef _Q_MATRIX_H_
+#define _Q_MATRIX_H_
+
+extern int ****LevelScale4x4Luma;
+extern int *****LevelScale4x4Chroma;
+extern int ****LevelScale8x8Luma;
+
+extern int ****InvLevelScale4x4Luma;
+extern int *****InvLevelScale4x4Chroma;
+extern int ****InvLevelScale8x8Luma;
+
+extern short ScalingList4x4input[6][16];
+extern short ScalingList8x8input[2][64];
+extern short ScalingList4x4[6][16];
+extern short ScalingList8x8[2][64];
+
+extern short UseDefaultScalingMatrix4x4Flag[6];
+extern short UseDefaultScalingMatrix8x8Flag[2];
+
+extern int *qp_per_matrix;
+extern int *qp_rem_matrix;
+
+
+void Init_QMatrix (void);
+void CalculateQuantParam(void);
+void CalculateQuant8Param(void);
+void free_QMatrix(void);
+
+#endif


Index: llvm-test/MultiSource/Applications/JM/lencod/q_offsets.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/q_offsets.c:1.4
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/q_offsets.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,544 @@
+
+/*!
+ *************************************************************************************
+ * \file q_offsets.c
+ *
+ * \brief
+ *    read Quantization Offset matrix parameters from input file: q_OffsetMatrix.cfg
+ *
+ *************************************************************************************
+ */
+#include <stdlib.h>
+#include <string.h>
+
+#include "global.h"
+#include "memalloc.h"
+
+extern char *GetConfigFileContent (char *Filename, int error_type);
+
+#define MAX_ITEMS_TO_PARSE  1000
+
+int offset4x4_check[6] = { 0, 0, 0, 0, 0, 0 };
+int offset8x8_check[2] = { 0, 0 };
+
+static const char OffsetType4x4[15][24] = {
+  "INTRA4X4_LUMA_INTRA",
+  "INTRA4X4_CHROMAU_INTRA",
+  "INTRA4X4_CHROMAV_INTRA",
+  "INTRA4X4_LUMA_INTERP",
+  "INTRA4X4_CHROMAU_INTERP",
+  "INTRA4X4_CHROMAV_INTERP",
+  "INTRA4X4_LUMA_INTERB",
+  "INTRA4X4_CHROMAU_INTERB",
+  "INTRA4X4_CHROMAV_INTERB",
+  "INTER4X4_LUMA_INTERP",
+  "INTER4X4_CHROMAU_INTERP",
+  "INTER4X4_CHROMAV_INTERP",
+  "INTER4X4_LUMA_INTERB",
+  "INTER4X4_CHROMAU_INTERB",
+  "INTER4X4_CHROMAV_INTERB"
+};
+
+static const char OffsetType8x8[5][24] = {
+  "INTRA8X8_LUMA_INTRA",
+  "INTRA8X8_LUMA_INTERP",
+  "INTRA8X8_LUMA_INTERB",
+  "INTER8X8_LUMA_INTERP",
+  "INTER8X8_LUMA_INTERB"
+};
+
+
+int ****LevelOffset4x4Luma;
+int *****LevelOffset4x4Chroma;
+int ****LevelOffset8x8Luma;
+
+int AdaptRndWeight;
+int AdaptRndCrWeight;
+
+short **OffsetList4x4input;
+short **OffsetList8x8input;
+short **OffsetList4x4;
+short **OffsetList8x8;
+
+void InitOffsetParam ();
+
+const int OffsetBits = 11;
+
+static const short Offset_intra_default_intra[16] = {
+  682, 682, 682, 682,
+  682, 682, 682, 682,
+  682, 682, 682, 682,
+  682, 682, 682, 682
+};
+
+static const short Offset_intra_default_chroma[16] = {
+  682, 682, 682, 682,
+  682, 682, 682, 682,
+  682, 682, 682, 682,
+  682, 682, 682, 682
+};
+
+
+static const short Offset_intra_default_inter[16] = {
+  342, 342, 342, 342,
+  342, 342, 342, 342,
+  342, 342, 342, 342,
+  342, 342, 342, 342,
+};
+
+static const short Offset_inter_default[16] = {
+  342, 342, 342, 342,
+  342, 342, 342, 342,
+  342, 342, 342, 342,
+  342, 342, 342, 342,
+};
+
+static const short Offset8_intra_default_intra[64] = {
+  682, 682, 682, 682, 682, 682, 682, 682,
+  682, 682, 682, 682, 682, 682, 682, 682,
+  682, 682, 682, 682, 682, 682, 682, 682,
+  682, 682, 682, 682, 682, 682, 682, 682,
+  682, 682, 682, 682, 682, 682, 682, 682,
+  682, 682, 682, 682, 682, 682, 682, 682,
+  682, 682, 682, 682, 682, 682, 682, 682,
+  682, 682, 682, 682, 682, 682, 682, 682
+};
+
+static const short Offset8_intra_default_inter[64] = {
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342
+};
+
+static const short Offset8_inter_default[64] = {
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342,
+  342, 342, 342, 342, 342, 342, 342, 342
+};
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Allocate Q matrix arrays
+ ***********************************************************************
+ */
+void allocate_QOffsets ()
+{
+  int max_qp_per_luma = (3 + 6*(input->BitDepthLuma) - MIN_QP)/6 + 1;
+  int max_qp_per_cr = (3 + 6*(input->BitDepthChroma) - MIN_QP)/6 + 1;
+  int max_qp_per = imax(max_qp_per_luma,max_qp_per_cr);
+  get_mem4Dint(&LevelOffset4x4Luma,      2, max_qp_per, 4, 4);
+  get_mem5Dint(&LevelOffset4x4Chroma, 2, 2, max_qp_per, 4, 4);
+  get_mem4Dint(&LevelOffset8x8Luma,      2, max_qp_per, 8, 8);
+
+  get_mem2Dshort(&OffsetList4x4input, 15, 16);
+  get_mem2Dshort(&OffsetList8x8input,  5, 64);
+  get_mem2Dshort(&OffsetList4x4, 15, 16);
+  get_mem2Dshort(&OffsetList8x8,  5, 64);
+
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Free Q matrix arrays
+ ***********************************************************************
+ */
+void free_QOffsets ()
+{
+  int max_qp_per_luma = (3 + 6*(input->BitDepthLuma) - MIN_QP)/6 + 1;
+  int max_qp_per_cr = (3 + 6*(input->BitDepthChroma) - MIN_QP)/6 + 1;
+  int max_qp_per = imax(max_qp_per_luma,max_qp_per_cr);
+  free_mem4Dint(LevelOffset4x4Luma,      2, max_qp_per);
+  free_mem5Dint(LevelOffset4x4Chroma, 2, 2, max_qp_per);
+  free_mem4Dint(LevelOffset8x8Luma,      2, max_qp_per);
+
+  free_mem2Dshort(OffsetList8x8);
+  free_mem2Dshort(OffsetList4x4);
+  free_mem2Dshort(OffsetList8x8input);
+  free_mem2Dshort(OffsetList4x4input);
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Check the parameter name.
+ * \param s
+ *    parameter name string
+ * \param type
+ *    4x4 or 8x8 offset matrix type
+ * \return
+ *    the index number if the string is a valid parameter name,         \n
+ *    -1 for error
+ ***********************************************************************
+ */
+
+int CheckOffsetParameterName (char *s, int *type)
+{
+  int i = 0;
+
+  *type = 0;
+  while ((OffsetType4x4[i] != NULL) && (i < 15))
+  {
+    if (0 == strcmp (OffsetType4x4[i], s))
+      return i;
+    else
+      i++;
+  }
+
+  i = 0;
+  *type = 1;
+  while ((OffsetType8x8[i] != NULL) && (i < 5))
+  {
+    if (0 == strcmp (OffsetType8x8[i], s))
+      return i;
+    else
+      i++;
+  }
+
+  return -1;
+}
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Parse the Q Offset Matrix values read from cfg file.
+ * \param buf
+ *    buffer to be parsed
+ * \param bufsize
+ *    buffer size of buffer
+ ***********************************************************************
+ */
+void ParseQOffsetMatrix (char *buf, int bufsize)
+{
+  char *items[MAX_ITEMS_TO_PARSE];
+  int MapIdx;
+  int item = 0;
+  int InString = 0, InItem = 0;
+  char *p = buf;
+  char *bufend = &buf[bufsize];
+  int IntContent;
+  int i, j, range, type, cnt;
+  short *OffsetList;
+
+  while (p < bufend)
+  {
+    switch (*p)
+    {
+      case 13:
+        p++;
+        break;
+      case '#':                 // Found comment
+        *p = '\0';              // Replace '#' with '\0' in case of comment immediately following integer or string
+        while (*p != '\n' && p < bufend)  // Skip till EOL or EOF, whichever comes first
+          p++;
+        InString = 0;
+        InItem = 0;
+        break;
+      case '\n':
+        InItem = 0;
+        InString = 0;
+      *p++ = '\0';
+        break;
+      case ' ':
+      case '\t':              // Skip whitespace, leave state unchanged
+        if (InString)
+          p++;
+        else
+        {                     // Terminate non-strings once whitespace is found
+          *p++ = '\0';
+          InItem = 0;
+        }
+        break;
+
+      case '"':               // Begin/End of String
+        *p++ = '\0';
+        if (!InString)
+        {
+          items[item++] = p;
+          InItem = ~InItem;
+        }
+        else
+          InItem = 0;
+        InString = ~InString; // Toggle
+        break;
+
+      case ',':
+        p++;
+        InItem = 0;
+        break;
+
+      default:
+        if (!InItem)
+        {
+          items[item++] = p;
+          InItem = ~InItem;
+        }
+        p++;
+    }
+  }
+
+  item--;
+
+  for (i = 0; i < item; i += cnt)
+  {
+    cnt = 0;
+    if (0 > (MapIdx = CheckOffsetParameterName (items[i + cnt], &type)))
+    {
+      snprintf (errortext, ET_SIZE,
+        " Parsing error in config file: Parameter Name '%s' not recognized.",
+        items[i + cnt]);
+      error (errortext, 300);
+    }
+    cnt++;
+    if (strcmp ("=", items[i + cnt]))
+    {
+      snprintf (errortext, ET_SIZE,
+        " Parsing error in config file: '=' expected as the second token in each item.");
+      error (errortext, 300);
+    }
+    cnt++;
+
+    if (!type) //4x4 Matrix
+    {
+      range = 16;
+      OffsetList = OffsetList4x4input[MapIdx];
+      offset4x4_check[MapIdx] = 1; //to indicate matrix found in cfg file
+    }
+    else //8x8 matrix
+    {
+      range = 64;
+      OffsetList = OffsetList8x8input[MapIdx];
+      offset8x8_check[MapIdx] = 1; //to indicate matrix found in cfg file
+    }
+
+    for (j = 0; j < range; j++)
+    {
+      if (1 != sscanf (items[i + cnt + j], "%d", &IntContent))
+      {
+        snprintf (errortext, ET_SIZE,
+          " Parsing error: Expected numerical value for Parameter of %s, found '%s'.",
+          items[i], items[i + cnt + j]);
+        error (errortext, 300);
+      }
+
+      OffsetList[j] = (short) IntContent; //save value in matrix
+    }
+    cnt += j;
+    printf (".");
+  }
+}
+
+
+/*!
+ ***********************************************************************
+ * \brief
+ *    Initialise Q offset matrix values.
+ ***********************************************************************
+ */
+void Init_QOffsetMatrix ()
+{
+  char *content;
+
+  allocate_QOffsets ();
+
+  if (input->OffsetMatrixPresentFlag)
+  {
+    printf ("Parsing Quantization Offset Matrix file %s ",
+      input->QOffsetMatrixFile);
+    content = GetConfigFileContent (input->QOffsetMatrixFile, 0);
+    if (content != '\0')
+      ParseQOffsetMatrix (content, strlen (content));
+    else
+    {
+      printf
+        ("\nError: %s\nProceeding with default values for all matrices.",
+        errortext);
+      input->OffsetMatrixPresentFlag = 0;
+    }
+
+    printf ("\n");
+
+    free (content);
+  }
+  //! Now set up all offset params. This process could be reused if we wish to re-init offsets
+  InitOffsetParam ();
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Intit quantization offset params
+ *
+ * \par Input:
+ *    none
+ *
+ * \par Output:
+ *    none
+ ************************************************************************
+ */
+void InitOffsetParam ()
+{
+  int k;
+
+  if (input->OffsetMatrixPresentFlag)
+  {
+    memcpy(&(OffsetList4x4[0][0]),&(OffsetList4x4input[0][0]), 15 * 16 * sizeof(short));
+    memcpy(&(OffsetList8x8[0][0]),&(OffsetList8x8input[0][0]),  5 * 64 * sizeof(short));
+  }
+  else
+  {
+    memcpy(&(OffsetList4x4[0][0]),&(Offset_intra_default_intra[0]), 16 * sizeof(short));
+    for (k = 1; k < 3; k++)
+      memcpy(&(OffsetList4x4[k][0]),&(Offset_intra_default_chroma[0]),  16 * sizeof(short));
+    for (k = 3; k < 9; k++)
+      memcpy(&(OffsetList4x4[k][0]),&(Offset_intra_default_inter[0]),  16 * sizeof(short));
+    for (k = 9; k < 15; k++)
+      memcpy(&(OffsetList4x4[k][0]),&(Offset_inter_default[0]),  16 * sizeof(short));
+
+    memcpy(&(OffsetList8x8[0][0]),&(Offset8_intra_default_intra[0]), 64 * sizeof(short));
+    memcpy(&(OffsetList8x8[1][0]),&(Offset8_intra_default_inter[0]), 64 * sizeof(short));
+    memcpy(&(OffsetList8x8[2][0]),&(Offset8_intra_default_inter[0]), 64 * sizeof(short));
+    memcpy(&(OffsetList8x8[3][0]),&(Offset8_inter_default[0]), 64 * sizeof(short));
+    memcpy(&(OffsetList8x8[4][0]),&(Offset8_inter_default[0]), 64 * sizeof(short));
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Calculation of the quantization offset params at the frame level
+ *
+ * \par Input:
+ *    none
+ *
+ * \par Output:
+ *    none
+ ************************************************************************
+ */
+void CalculateOffsetParam ()
+{
+  int i, j, k, temp;
+  int qp_per;
+  int img_type = (img->type == SI_SLICE ? I_SLICE : (img->type == SP_SLICE ? P_SLICE : img->type));
+
+  int max_qp_per_luma = qp_per_matrix[(51 + img->bitdepth_luma_qp_scale - MIN_QP)] + 1;
+  int max_qp_per_cr   = qp_per_matrix[(51 + img->bitdepth_chroma_qp_scale - MIN_QP)] + 1;
+
+  AdaptRndWeight = input->AdaptRndWFactor[img->nal_reference_idc!=0][img_type];
+  AdaptRndCrWeight = input->AdaptRndCrWFactor[img->nal_reference_idc!=0][img_type];
+  for (k = 0; k < imax(max_qp_per_luma,max_qp_per_cr); k++)
+  {
+    qp_per = Q_BITS + k - OffsetBits;
+    for (j = 0; j < 4; j++)
+    {
+      for (i = 0; i < 4; i++)
+      {
+        temp = (j << 2) + i;
+        if (img_type == I_SLICE)
+        {
+          LevelOffset4x4Luma[1][k][j][i] =
+            (int) OffsetList4x4[0][temp] << qp_per;
+          LevelOffset4x4Chroma[0][1][k][j][i] =
+            (int) OffsetList4x4[1][temp] << qp_per;
+          LevelOffset4x4Chroma[1][1][k][j][i] =
+            (int) OffsetList4x4[2][temp] << qp_per;
+        }
+        else if (img_type == B_SLICE)
+        {
+          LevelOffset4x4Luma[1][k][j][i] =
+            (int) OffsetList4x4[6][temp] << qp_per;
+          LevelOffset4x4Chroma[0][1][k][j][i] =
+            (int) OffsetList4x4[7][temp] << qp_per;
+          LevelOffset4x4Chroma[1][1][k][j][i] =
+            (int) OffsetList4x4[8][temp] << qp_per;
+        }
+        else
+        {
+          LevelOffset4x4Luma[1][k][j][i] =
+            (int) OffsetList4x4[3][temp] << qp_per;
+          LevelOffset4x4Chroma[0][1][k][j][i] =
+            (int) OffsetList4x4[4][temp] << qp_per;
+          LevelOffset4x4Chroma[1][1][k][j][i] =
+            (int) OffsetList4x4[5][temp] << qp_per;
+        }
+
+        if (img_type == B_SLICE)
+        {
+          LevelOffset4x4Luma[0][k][j][i] =
+            (int) OffsetList4x4[12][temp] << qp_per;
+          LevelOffset4x4Chroma[0][0][k][j][i] =
+            (int) OffsetList4x4[13][temp] << qp_per;
+          LevelOffset4x4Chroma[1][0][k][j][i] =
+            (int) OffsetList4x4[14][temp] << qp_per;
+        }
+        else
+        {
+          LevelOffset4x4Luma[0][k][j][i] =
+            (int) OffsetList4x4[9][temp] << qp_per;
+          LevelOffset4x4Chroma[0][0][k][j][i] =
+            (int) OffsetList4x4[10][temp] << qp_per;
+          LevelOffset4x4Chroma[1][0][k][j][i] =
+            (int) OffsetList4x4[11][temp] << qp_per;
+        }
+      }
+    }
+  }
+}
+
+ /*!
+ ************************************************************************
+ * \brief
+ *    Calculate the quantisation offset parameters
+ *
+ ************************************************************************
+ */
+void CalculateOffset8Param ()
+{
+  int i, j, k, temp;
+  int q_bits;
+
+  int max_qp_per_luma = qp_per_matrix[(51 + img->bitdepth_luma_qp_scale - MIN_QP)] + 1;
+  int max_qp_per_cr   = qp_per_matrix[(51 + img->bitdepth_chroma_qp_scale - MIN_QP)] + 1;
+
+  for (k = 0; k < imax(max_qp_per_luma,max_qp_per_cr); k++)
+  {
+    q_bits = Q_BITS_8 + k - OffsetBits;
+    for (j = 0; j < 8; j++)
+    {
+      for (i = 0; i < 8; i++)
+      {
+        temp = (j << 3) + i;
+        if (img->type == I_SLICE)
+          LevelOffset8x8Luma[1][k][j][i] =
+          (int) OffsetList8x8[0][temp] << q_bits;
+        else if (img->type == B_SLICE)
+          LevelOffset8x8Luma[1][k][j][i] =
+          (int) OffsetList8x8[2][temp] << q_bits;
+        else
+          LevelOffset8x8Luma[1][k][j][i] =
+          (int) OffsetList8x8[1][temp] << q_bits;
+
+        if (img->type == B_SLICE)
+          LevelOffset8x8Luma[0][k][j][i] =
+          (int) OffsetList8x8[4][temp] << q_bits;
+        else
+          LevelOffset8x8Luma[0][k][j][i] =
+          (int) OffsetList8x8[3][temp] << q_bits;
+      }
+    }
+  }
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/q_offsets.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/q_offsets.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/q_offsets.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,33 @@
+
+/*!
+ ***************************************************************************
+ * \file
+ *    q_offsets.h
+ *
+ * \brief
+ *    Headerfile for q_offsets array
+ *
+ * \date
+ *    18. Nov 2004
+ ***************************************************************************
+ */
+
+#ifndef _Q_OFFSETS_H_
+#define _Q_OFFSETS_H_
+
+extern int ****LevelOffset4x4Luma;
+extern int *****LevelOffset4x4Chroma;
+extern int ****LevelOffset8x8Luma;
+extern short **OffsetList4x4;
+extern short **OffsetList8x8;
+extern const int OffsetBits;
+
+
+extern int AdaptRndWeight;
+extern int AdaptRndCrWeight;
+
+void Init_QOffsetMatrix (void);
+void CalculateOffsetParam(void);
+void CalculateOffset8Param(void);
+void free_QOffsets (void);
+#endif


Index: llvm-test/MultiSource/Applications/JM/lencod/ratectl.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/ratectl.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/ratectl.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,247 @@
+
+/*!
+ ***************************************************************************
+ * \file ratectl.c
+ *
+ * \brief
+ *    Rate Control algorithm
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *     - Siwei Ma <swma at jdl.ac.cn>
+ *     - Zhengguo LI<ezgli at lit.a-star.edu.sg>
+ *
+ * \date
+ *   16 Jan. 2003
+ **************************************************************************
+ */
+
+#include <stdlib.h>
+#include <math.h>
+#include <assert.h>
+#include <memory.h>
+#include <limits.h>
+
+#include "global.h"
+#include "ratectl.h"
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Update Rate Control Parameters
+ *************************************************************************************
+ */
+void update_rc(Macroblock *currMB, short best_mode)
+{
+  generic_RC->MADofMB[img->current_mb_nr] = calc_MAD();
+
+  if(input->basicunit < img->FrameSizeInMbs)
+  {
+    generic_RC->TotalMADBasicUnit += generic_RC->MADofMB[img->current_mb_nr];
+
+    // delta_qp is present only for non-skipped macroblocks
+    if ((currMB->cbp!=0 || best_mode==I16MB) && (best_mode!=IPCM))
+      currMB->prev_cbp = 1;
+    else
+    {
+      currMB->delta_qp = 0;
+      currMB->qp = currMB->prev_qp;
+      img->qp = currMB->qp;
+      currMB->prev_cbp = 0;
+    }
+
+    if (input->MbInterlace)
+    {
+      // update rdopt buffered qps...
+      rdopt->delta_qp = currMB->delta_qp;
+      rdopt->qp = currMB->qp;
+      rdopt->prev_cbp = currMB->prev_cbp;
+
+      delta_qp_mbaff[currMB->mb_field][img->bot_MB] = currMB->delta_qp;
+      qp_mbaff      [currMB->mb_field][img->bot_MB] = currMB->qp;
+    }
+  }
+  set_chroma_qp(currMB);
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    map QP to Qstep
+ *
+ *************************************************************************************
+*/
+double QP2Qstep( int QP )
+{
+  int i;
+  double Qstep;
+  static const double QP2QSTEP[6] = { 0.625, 0.6875, 0.8125, 0.875, 1.0, 1.125 };
+
+  Qstep = QP2QSTEP[QP % 6];
+  for( i=0; i<(QP/6); i++)
+    Qstep *= 2;
+
+  return Qstep;
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    map Qstep to QP
+ *
+ *************************************************************************************
+*/
+int Qstep2QP( double Qstep )
+{
+  int q_per = 0, q_rem = 0;
+
+  //  assert( Qstep >= QP2Qstep(0) && Qstep <= QP2Qstep(51) );
+  if( Qstep < QP2Qstep(0))
+    return 0;
+  else if (Qstep > QP2Qstep(51) )
+    return 51;
+
+  while( Qstep > QP2Qstep(5) )
+  {
+    Qstep /= 2.0;
+    q_per += 1;
+  }
+
+  if (Qstep <= 0.65625)
+  {
+    Qstep = 0.625;
+    q_rem = 0;
+  }
+  else if (Qstep <= 0.75)
+  {
+    Qstep = 0.6875;
+    q_rem = 1;
+  }
+  else if (Qstep <= 0.84375)
+  {
+    Qstep = 0.8125;
+    q_rem = 2;
+  }
+  else if (Qstep <= 0.9375)
+  {
+    Qstep = 0.875;
+    q_rem = 3;
+  }
+  else if (Qstep <= 1.0625)
+  {
+    Qstep = 1.0;
+    q_rem = 4;
+  }
+  else
+  {
+    Qstep = 1.125;
+    q_rem = 5;
+  }
+
+  return (q_per * 6 + q_rem);
+}
+
+/*!
+ ************************************************************************************
+ * \brief
+ *    calculate MAD for the current macroblock
+ *
+ * \return
+ *    calculated MAD
+ *
+ *************************************************************************************
+*/
+int calc_MAD()
+{
+  int k, l, sum = 0;
+
+  for (k = 0; k < 16; k++)
+    for (l = 0; l < 16; l++)
+      sum += iabs(diffy[k][l]);
+
+  return sum;
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Compute Frame MAD
+ *
+ *************************************************************************************
+*/
+double ComputeFrameMAD()
+{
+  int64 TotalMAD = 0;
+  unsigned int i;
+  for(i = 0; i < img->FrameSizeInMbs; i++)
+    TotalMAD += generic_RC->MADofMB[i];
+  return (double)TotalMAD / (256.0 * (double)img->FrameSizeInMbs);
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Copy JVT rate control objects
+ *
+ *************************************************************************************
+*/
+void copy_rc_generic( rc_generic *dst, rc_generic *src )
+{
+  /* buffer original addresses for which memory has been allocated */
+  int *tmpMADofMB = dst->MADofMB;
+
+  /* copy object */
+  memcpy( (void *)dst, (void *)src, sizeof(rc_generic) );
+
+  /* restore original addresses */
+  dst->MADofMB = tmpMADofMB;
+
+  /* copy MADs */
+  memcpy( (void *)dst->MADofMB, (void *)src->MADofMB, img->FrameSizeInMbs * sizeof (int) );
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Dynamically allocate memory needed for generic rate control
+ *
+ *************************************************************************************
+ */
+void generic_alloc( rc_generic **prc )
+{
+  *prc = (rc_generic *) malloc ( sizeof( rc_generic ) );
+  if (NULL==*prc)
+  {
+    no_mem_exit("init_global_buffers: generic_alloc");
+  }
+  (*prc)->MADofMB = (int *) calloc (img->FrameSizeInMbs, sizeof (int));
+  if (NULL==(*prc)->MADofMB)
+  {
+    no_mem_exit("init_global_buffers: (*prc)->MADofMB");
+  }
+  (*prc)->FieldFrame = 1;
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Free memory needed for generic rate control
+ *
+ *************************************************************************************
+ */
+void generic_free(rc_generic **prc)
+{
+  if (NULL!=(*prc)->MADofMB)
+  {
+    free ((*prc)->MADofMB);
+    (*prc)->MADofMB = NULL;
+  }
+  if (NULL!=(*prc))
+  {
+    free ((*prc));
+    (*prc) = NULL;
+  }
+}


Index: llvm-test/MultiSource/Applications/JM/lencod/ratectl.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/ratectl.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/ratectl.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,80 @@
+
+/*!
+ ***************************************************************************
+ * \file
+ *    ratectl.h
+ *
+ * \author
+ *    Zhengguo LI
+ *
+ * \date
+ *    14 Jan 2003
+ *
+ * \brief
+ *    Headerfile for rate control
+ **************************************************************************
+ */
+
+#ifndef _RATE_CTL_H_
+#define _RATE_CTL_H_
+
+/* generic rate control variables */
+typedef struct {
+  // RC flags
+  int TopFieldFlag;
+  int FieldControl;
+  int FieldFrame;
+  int NoGranularFieldRC;
+  // bits stats
+  int NumberofHeaderBits;
+  int NumberofTextureBits;
+  int NumberofBasicUnitHeaderBits;
+  int NumberofBasicUnitTextureBits;
+  // frame stats
+  int NumberofCodedBFrame;
+  int NumberofCodedPFrame;
+  int NumberofGOP;
+  int TotalQpforPPicture;
+  int NumberofPPicture;
+  // MAD stats
+  int64  TotalMADBasicUnit;
+  int   *MADofMB;
+  // buffer and budget
+  int64 CurrentBufferFullness; //LIZG 25/10/2002
+  int   RemainingBits;
+  // bit allocations for RC_MODE_3
+  int   RCPSliceBits;
+  int   RCISliceBits;
+  int   RCBSliceBits[RC_MAX_TEMPORAL_LEVELS];
+  int   temporal_levels;
+  int   hierNb[RC_MAX_TEMPORAL_LEVELS];
+  int   NPslice;
+  int   NIslice;
+} rc_generic;
+
+// macroblock activity
+int    diffy[16][16];
+int    qp_mbaff[2][2], qp_mbaff[2][2];
+int    delta_qp_mbaff[2][2], delta_qp_mbaff[2][2];
+
+// generic functions
+int    Qstep2QP( double Qstep );
+double QP2Qstep( int QP );
+int    calc_MAD( void );
+double ComputeFrameMAD( void );
+void   update_rc(Macroblock *currMB, short best_mode);
+
+// rate control functions
+// init/copy
+void generic_alloc( rc_generic **prc );
+void generic_free( rc_generic **prc );
+void copy_rc_generic( rc_generic *dst, rc_generic *src );
+
+// rate control CURRENT pointers
+rc_generic   *generic_RC;
+// rate control object pointers for RDPictureDecision buffering...
+rc_generic   *generic_RC_init,   *generic_RC_best;
+
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/rc_quadratic.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/rc_quadratic.c:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/rc_quadratic.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,2355 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file ratectl.c
+  *
+  * \brief
+  *    Rate Control algorithm
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *     - Siwei Ma             <swma at jdl.ac.cn>
+  *     - Zhengguo LI          <ezgli at lit.a-star.edu.sg>
+  *     - Athanasios Leontaris <aleon at dolby.com>
+  *
+  * \date
+  *   16 Jan. 2003
+  **************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <math.h>
+ #include <assert.h>
+ #include <memory.h>
+ #include <limits.h>
+ 
+ #include "global.h"
+ #include "ratectl.h"
+ #include "rc_quadratic.h"
+ 
+ static const float THETA = 1.3636F;
+ static const float OMEGA = 0.9F;
+ static const float MINVALUE = 4.0F;
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Dynamically allocate memory needed for rate control
+  *
+  *************************************************************************************
+  */
+ void rc_alloc( rc_quadratic **prc )
+ {
+   int rcBufSize = img->FrameSizeInMbs/input->basicunit;
+   rc_quadratic *lprc;
+ 
+   (*prc) = (rc_quadratic *) malloc ( sizeof( rc_quadratic ) );
+   if (NULL==(*prc))
+   {
+     no_mem_exit("init_global_buffers: (*prc)");
+   }
+   lprc = *prc;
+ 
+   lprc->PreviousFrameMAD = 1.0;
+   lprc->CurrentFrameMAD = 1.0;
+   lprc->Pprev_bits = 0;
+   lprc->Iprev_bits = 0;
+   lprc->Target = 0;
+   lprc->TargetField = 0;
+   lprc->LowerBound = 0;
+   lprc->UpperBound1 = INT_MAX;
+   lprc->UpperBound2 = INT_MAX;
+   lprc->Wp = 0.0;
+   lprc->Wb = 0.0;
+   lprc->PAveFrameQP   = input->qp0;
+   lprc->m_Qc          = lprc->PAveFrameQP;
+   lprc->FieldQPBuffer = lprc->PAveFrameQP;
+   lprc->FrameQPBuffer = lprc->PAveFrameQP;
+   lprc->PAverageQp    = lprc->PAveFrameQP;
+   lprc->MyInitialQp   = lprc->PAveFrameQP;
+ 
+   lprc->RC_MAX_QUANT = 51;
+   lprc->RC_MIN_QUANT = -img->bitdepth_luma_qp_scale;//clipping
+ 
+   lprc->BUPFMAD = (double*) calloc ((rcBufSize), sizeof (double));
+   if (NULL==lprc->BUPFMAD)
+   {
+     no_mem_exit("rc_alloc: lprc->BUPFMAD");
+   }
+ 
+   lprc->BUCFMAD = (double*) calloc ((rcBufSize), sizeof (double));
+   if (NULL==lprc->BUCFMAD)
+   {
+     no_mem_exit("rc_alloc: lprc->BUCFMAD");
+   }
+ 
+   lprc->FCBUCFMAD = (double*) calloc ((rcBufSize), sizeof (double));
+   if (NULL==lprc->FCBUCFMAD)
+   {
+     no_mem_exit("rc_alloc: lprc->FCBUCFMAD");
+   }
+ 
+   lprc->FCBUPFMAD = (double*) calloc ((rcBufSize), sizeof (double));
+   if (NULL==lprc->FCBUPFMAD)
+   {
+     no_mem_exit("rc_alloc: lprc->FCBUPFMAD");
+   }
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Copy JVT rate control objects
+  *
+  *************************************************************************************
+ */
+ void copy_rc_jvt( rc_quadratic *dst, rc_quadratic *src )
+ {
+   int rcBufSize = img->FrameSizeInMbs/input->basicunit;
+   /* buffer original addresses for which memory has been allocated */
+   double   *tmpBUPFMAD = dst->BUPFMAD;
+   double   *tmpBUCFMAD = dst->BUCFMAD;
+   double *tmpFCBUPFMAD = dst->FCBUPFMAD;
+   double *tmpFCBUCFMAD = dst->FCBUCFMAD;
+ 
+   /* copy object */
+   memcpy( (void *)dst, (void *)src, sizeof(rc_quadratic) );
+ 
+   /* restore original addresses */
+   dst->BUPFMAD   = tmpBUPFMAD;
+   dst->BUCFMAD   = tmpBUCFMAD;
+   dst->FCBUPFMAD = tmpFCBUPFMAD;
+   dst->FCBUCFMAD = tmpFCBUCFMAD;
+ 
+   /* copy MADs */
+   memcpy( (void *)dst->BUPFMAD,   (void *)src->BUPFMAD,   (rcBufSize) * sizeof (double) );
+   memcpy( (void *)dst->BUCFMAD,   (void *)src->BUCFMAD,   (rcBufSize) * sizeof (double) );
+   memcpy( (void *)dst->FCBUPFMAD, (void *)src->FCBUPFMAD, (rcBufSize) * sizeof (double) );
+   memcpy( (void *)dst->FCBUCFMAD, (void *)src->FCBUCFMAD, (rcBufSize) * sizeof (double) );
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Free memory needed for rate control
+  *
+  *************************************************************************************
+ */
+ void rc_free(rc_quadratic **prc)
+ {
+   if (NULL!=(*prc)->BUPFMAD)
+   {
+     free ((*prc)->BUPFMAD);
+     (*prc)->BUPFMAD = NULL;
+   }
+   if (NULL!=(*prc)->BUCFMAD)
+   {
+     free ((*prc)->BUCFMAD);
+     (*prc)->BUCFMAD = NULL;
+   }
+   if (NULL!=(*prc)->FCBUCFMAD)
+   {
+     free ((*prc)->FCBUCFMAD);
+     (*prc)->FCBUCFMAD = NULL;
+   }
+   if (NULL!=(*prc)->FCBUPFMAD)
+   {
+     free ((*prc)->FCBUPFMAD);
+     (*prc)->FCBUPFMAD = NULL;
+   }
+   if (NULL!=(*prc))
+   {
+     free ((*prc));
+     (*prc) = NULL;
+   }
+ }
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Initialize rate control parameters
+  *
+  *************************************************************************************
+ */
+ void rc_init_seq(rc_quadratic *prc)
+ {
+   double L1,L2,L3,bpp;
+   int qp, i;
+ 
+   switch ( input->RCUpdateMode )
+   {
+   case RC_MODE_0:
+     updateQP = updateQPRC0;
+     break;
+   case RC_MODE_1:
+     updateQP = updateQPRC1;
+     break;
+   case RC_MODE_2:
+     updateQP = updateQPRC2;
+     break;
+   case RC_MODE_3:
+     updateQP = updateQPRC3;
+     break;
+   default:
+     updateQP = updateQPRC0;
+     break;
+   }
+ 
+   prc->Xp=0;
+   prc->Xb=0;
+ 
+   prc->bit_rate = (float) input->bit_rate;
+   prc->frame_rate = (img->framerate *(float)(input->successive_Bframe + 1)) / (float) (input->jumpd + 1);
+   prc->PrevBitRate = prc->bit_rate;
+ 
+   /*compute the total number of MBs in a frame*/
+   if(input->basicunit > img->FrameSizeInMbs)
+     input->basicunit = img->FrameSizeInMbs;
+   if(input->basicunit < img->FrameSizeInMbs)
+     prc->TotalNumberofBasicUnit = img->FrameSizeInMbs/input->basicunit;
+ 
+   /*initialize the parameters of fluid flow traffic model*/
+   generic_RC->CurrentBufferFullness = 0;
+   prc->GOPTargetBufferLevel = (double) generic_RC->CurrentBufferFullness;
+ 
+   /*initialize the previous window size*/
+   prc->m_windowSize    = 0;
+   prc->MADm_windowSize = 0;
+   generic_RC->NumberofCodedBFrame = 0;
+   generic_RC->NumberofCodedPFrame = 0;
+   generic_RC->NumberofGOP         = 0;
+   /*remaining # of bits in GOP */
+   generic_RC->RemainingBits = 0;
+   /*control parameter */
+   if(input->successive_Bframe>0)
+   {
+     prc->GAMMAP=0.25;
+     prc->BETAP=0.9;
+   }
+   else
+   {
+     prc->GAMMAP=0.5;
+     prc->BETAP=0.5;
+   }
+ 
+   /*quadratic rate-distortion model*/
+   prc->PPreHeader=0;
+ 
+   prc->Pm_X1 = prc->bit_rate * 1.0;
+   prc->Pm_X2 = 0.0;
+   /* linear prediction model for P picture*/
+   prc->PMADPictureC1 = 1.0;
+   prc->PMADPictureC2 = 0.0;
+ 
+   // Initialize values
+   for(i=0;i<21;i++)
+   {
+     prc->Pm_rgQp[i] = 0;
+     prc->Pm_rgRp[i] = 0.0;
+     prc->PPictureMAD[i] = 0.0;
+   }
+ 
+   //Define the largest variation of quantization parameters
+   prc->PDuantQp=2;
+ 
+   /*basic unit layer rate control*/
+   prc->PAveHeaderBits1 = 0;
+   prc->PAveHeaderBits3 = 0;
+   prc->DDquant = (prc->TotalNumberofBasicUnit>=9 ? 1 : 2);
+ 
+   prc->MBPerRow = img->PicWidthInMbs;
+ 
+   /*adaptive field/frame coding*/
+   generic_RC->FieldControl=0;
+ 
+   /*compute the initial QP*/
+   bpp = 1.0*prc->bit_rate /(prc->frame_rate*img->size);
+ 
+   if (img->width == 176)
+   {
+     L1 = 0.1;
+     L2 = 0.3;
+     L3 = 0.6;
+   }
+   else if (img->width == 352)
+   {
+     L1 = 0.2;
+     L2 = 0.6;
+     L3 = 1.2;
+   }
+   else
+   {
+     L1 = 0.6;
+     L2 = 1.4;
+     L3 = 2.4;
+   }
+ 
+   if (input->SeinitialQP==0)
+   {
+     if (bpp<= L1)
+       qp = 35;
+     else if(bpp<=L2)
+       qp = 25;
+     else if(bpp<=L3)
+       qp = 20;
+     else
+       qp = 10;
+     input->SeinitialQP = qp;
+   }
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Initialize one GOP
+  *
+  *************************************************************************************
+ */
+ void rc_init_GOP(rc_quadratic *prc, int np, int nb)
+ {
+   Boolean Overum=FALSE;
+   int OverBits, OverDuantQp;
+   int AllocatedBits, GOPDquant;
+ 
+   // bit allocation for RC_MODE_3
+   switch( input->RCUpdateMode )
+   {
+   case RC_MODE_3:
+     {
+       int sum = 0, tmp, level, levels = 0, num_frames[RC_MAX_TEMPORAL_LEVELS];
+       float numer, denom;
+       int gop = input->successive_Bframe + 1;
+       memset( num_frames, 0, RC_MAX_TEMPORAL_LEVELS * sizeof(int) );
+       // are there any B frames?
+       if ( input->successive_Bframe )
+       {
+         if ( input->HierarchicalCoding == 1 ) // two layers: even/odd
+         {
+           levels = 2;
+           num_frames[0] = input->successive_Bframe >> 1;
+           num_frames[1] = (input->successive_Bframe - num_frames[0]) >= 0 ? (input->successive_Bframe - num_frames[0]) : 0;
+         }
+         else if ( input->HierarchicalCoding == 2 ) // binary hierarchical structure
+         {
+           // check if gop is power of two
+           tmp = gop;
+           while ( tmp )
+           {
+             sum += tmp & 1;
+             tmp >>= 1;
+           }
+           assert( sum == 1 );
+ 
+           // determine number of levels
+           levels = 0;
+           tmp = gop;
+           while ( tmp > 1 )
+           {
+             tmp >>= 1; // divide by 2          
+             num_frames[levels] = 1 << levels;
+             levels++;          
+           }
+           assert( levels >= 1 && levels <= RC_MAX_TEMPORAL_LEVELS );        
+         }
+         else if ( input->HierarchicalCoding == 3 )
+         {
+           fprintf(stderr, "\n RCUpdateMode=3 and HierarchicalCoding == 3 are currently not supported"); // This error message should be moved elsewhere and have proper memory deallocation
+           exit(1);
+         }
+         else // all frames of the same priority - level
+         {
+           levels = 1;
+           num_frames[0] = input->successive_Bframe;
+         }
+         generic_RC->temporal_levels = levels;      
+       }
+       else
+       {
+         for ( level = 0; level < RC_MAX_TEMPORAL_LEVELS; level++ )
+         {
+           input->RCBSliceBitRatio[level] = 0.0F;
+         }
+         generic_RC->temporal_levels = 0;
+       }
+       // calculate allocated bits for each type of frame
+       numer = (float)(( (!input->intra_period ? 1 : input->intra_period) * gop) * ((double)input->bit_rate / input->FrameRate));
+       denom = 0.0F;
+ 
+       for ( level = 0; level < levels; level++ )
+       {
+         denom += (float)(num_frames[level] * input->RCBSliceBitRatio[level]);
+         generic_RC->hierNb[level] = num_frames[level] * np;
+       }
+       denom += 1.0F;
+       if ( input->intra_period >= 1 )
+       {
+         denom *= (float)input->intra_period;
+         denom += (float)input->RCISliceBitRatio - 1.0F;
+       }
+ 
+       // set bit targets for each type of frame
+       generic_RC->RCPSliceBits = (int) floor( numer / denom + 0.5F );
+       generic_RC->RCISliceBits = (input->intra_period) ? (int)(input->RCISliceBitRatio * generic_RC->RCPSliceBits + 0.5) : 0;
+ 
+       for ( level = 0; level < levels; level++ )
+       {
+         generic_RC->RCBSliceBits[level] = (int)floor(input->RCBSliceBitRatio[level] * generic_RC->RCPSliceBits + 0.5);
+       }
+ 
+       generic_RC->NIslice = (input->intra_period) ? ((input->no_frames - 1) / input->intra_period) : 0;
+       generic_RC->NPslice = input->no_frames - 1 - generic_RC->NIslice;
+     }
+     break;
+   default:
+     break;
+   }
+ 
+   /* check if the last GOP over uses its budget. If yes, the initial QP of the I frame in
+   the coming  GOP will be increased.*/
+ 
+   if(generic_RC->RemainingBits<0)
+     Overum=TRUE;
+   OverBits=-generic_RC->RemainingBits;
+ 
+   /*initialize the lower bound and the upper bound for the target bits of each frame, HRD consideration*/
+   prc->LowerBound  = (int)(generic_RC->RemainingBits + prc->bit_rate / prc->frame_rate);
+   prc->UpperBound1 = (int)(generic_RC->RemainingBits + (prc->bit_rate * 2.048));
+ 
+   /*compute the total number of bits for the current GOP*/
+   AllocatedBits = (int) floor((1 + np + nb) * prc->bit_rate / prc->frame_rate + 0.5);
+   generic_RC->RemainingBits += AllocatedBits;
+   prc->Np = np;
+   prc->Nb = nb;
+ 
+   OverDuantQp=(int)(8 * OverBits/AllocatedBits+0.5);
+   prc->GOPOverdue=FALSE;
+ 
+   /*field coding*/
+   //generic_RC->NoGranularFieldRC = ( input->PicInterlace || !input->MbInterlace || input->basicunit != img->FrameSizeInMbs );
+   if ( !input->PicInterlace && input->MbInterlace && input->basicunit == img->FrameSizeInMbs )
+     generic_RC->NoGranularFieldRC = 0;
+   else
+     generic_RC->NoGranularFieldRC = 1;
+ 
+   /*Compute InitialQp for each GOP*/
+   prc->TotalPFrame=np;
+   generic_RC->NumberofGOP++;
+   if(generic_RC->NumberofGOP==1)
+   {
+     prc->MyInitialQp = input->SeinitialQP;
+     prc->CurrLastQP = prc->MyInitialQp - 1; //recent change -0;
+     prc->QPLastGOP   = prc->MyInitialQp;
+ 
+     prc->PAveFrameQP   = prc->MyInitialQp;
+     prc->m_Qc          = prc->PAveFrameQP;
+     prc->FieldQPBuffer = prc->PAveFrameQP;
+     prc->FrameQPBuffer = prc->PAveFrameQP;
+     prc->PAverageQp    = prc->PAveFrameQP;
+   }
+   else
+   {
+     /*adaptive field/frame coding*/
+     if( input->PicInterlace == ADAPTIVE_CODING || input->MbInterlace )
+     {
+       if (generic_RC->FieldFrame == 1)
+       {
+         generic_RC->TotalQpforPPicture += prc->FrameQPBuffer;
+         prc->QPLastPFrame = prc->FrameQPBuffer;
+       }
+       else
+       {
+         generic_RC->TotalQpforPPicture += prc->FieldQPBuffer;
+         prc->QPLastPFrame = prc->FieldQPBuffer;
+       }
+     }
+     /*compute the average QP of P frames in the previous GOP*/
+     prc->PAverageQp=(int)(1.0 * generic_RC->TotalQpforPPicture / generic_RC->NumberofPPicture+0.5);
+ 
+     GOPDquant=(int)((1.0*(np+nb+1)/15.0) + 0.5);
+     if(GOPDquant>2)
+       GOPDquant=2;
+ 
+     prc->PAverageQp -= GOPDquant;
+ 
+     if (prc->PAverageQp > (prc->QPLastPFrame - 2))
+       prc->PAverageQp--;
+ 
+     // QP is constrained by QP of previous QP
+     prc->PAverageQp = iClip3(prc->QPLastGOP - 2, prc->QPLastGOP + 2, prc->PAverageQp);
+     // Also clipped within range.
+     prc->PAverageQp = iClip3(prc->RC_MIN_QUANT,  prc->RC_MAX_QUANT,  prc->PAverageQp);
+ 
+     prc->MyInitialQp = prc->PAverageQp;
+     prc->Pm_Qp       = prc->PAverageQp;
+     prc->PAveFrameQP = prc->PAverageQp;
+     prc->QPLastGOP   = prc->MyInitialQp;
+     prc->PrevLastQP = prc->CurrLastQP;
+     prc->CurrLastQP = prc->MyInitialQp - 1;
+   }
+ 
+   generic_RC->TotalQpforPPicture=0;
+   generic_RC->NumberofPPicture=0;
+   prc->NumberofBFrames=0;
+ }
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Initialize one picture
+  *
+  *************************************************************************************
+ */
+ void rc_init_pict(rc_quadratic *prc, int fieldpic,int topfield,int targetcomputation, float mult)
+ {
+   int tmp_T;
+ 
+   /* compute the total number of basic units in a frame */
+   if(input->MbInterlace)
+     prc->TotalNumberofBasicUnit = img->FrameSizeInMbs / img->BasicUnit;
+ 
+   img->NumberofCodedMacroBlocks = 0;
+ 
+   /* Normally, the bandwidth for the VBR case is estimated by
+      a congestion control algorithm. A bandwidth curve can be predefined if we only want to
+      test the proposed algorithm */
+   if(input->channel_type==1)
+   {
+     if(generic_RC->NumberofCodedPFrame==58)
+       prc->bit_rate *= 1.5;
+     else if(generic_RC->NumberofCodedPFrame==59)
+       prc->PrevBitRate = prc->bit_rate;
+   }
+ 
+   /* predefine a target buffer level for each frame */
+   if((fieldpic||topfield) && targetcomputation)
+   {
+     if ( (img->type == P_SLICE || input->RCUpdateMode == RC_MODE_1) && (IMG_NUMBER) )
+     {
+       /* Since the available bandwidth may vary at any time, the total number of
+       bits is updated picture by picture*/
+       if(prc->PrevBitRate!=prc->bit_rate)
+         generic_RC->RemainingBits +=(int) floor((prc->bit_rate-prc->PrevBitRate)*(prc->Np + prc->Nb)/prc->frame_rate+0.5);
+ 
+       /* predefine the  target buffer level for each picture.
+       frame layer rate control */
+       if(img->BasicUnit==img->FrameSizeInMbs)
+       {
+         if(generic_RC->NumberofPPicture==1)
+         {
+           prc->TargetBufferLevel = (double) generic_RC->CurrentBufferFullness;
+           prc->DeltaP = (generic_RC->CurrentBufferFullness - prc->GOPTargetBufferLevel) / (prc->TotalPFrame-1);
+           prc->TargetBufferLevel -= prc->DeltaP;
+         }
+         else if(generic_RC->NumberofPPicture>1)
+           prc->TargetBufferLevel -= prc->DeltaP;
+       }
+       /* basic unit layer rate control */
+       else
+       {
+         if(generic_RC->NumberofCodedPFrame>0)
+         {
+           /* adaptive frame/field coding */
+           if(((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))&&(generic_RC->FieldControl==1))
+             memcpy((void *)prc->FCBUPFMAD,(void *)prc->FCBUCFMAD, prc->TotalNumberofBasicUnit * sizeof(double));
+           else
+             memcpy((void *)prc->BUPFMAD,(void *)prc->BUCFMAD, prc->TotalNumberofBasicUnit * sizeof(double));
+         }
+ 
+         if(generic_RC->NumberofGOP==1)
+         {
+           if(generic_RC->NumberofPPicture==1)
+           {
+             prc->TargetBufferLevel = (double) generic_RC->CurrentBufferFullness;
+             prc->DeltaP = (generic_RC->CurrentBufferFullness - prc->GOPTargetBufferLevel)/(prc->TotalPFrame - 1);
+             prc->TargetBufferLevel -= prc->DeltaP;
+           }
+           else if(generic_RC->NumberofPPicture>1)
+             prc->TargetBufferLevel -= prc->DeltaP;
+         }
+         else if(generic_RC->NumberofGOP>1)
+         {
+           if(generic_RC->NumberofPPicture==0)
+           {
+             prc->TargetBufferLevel = (double) generic_RC->CurrentBufferFullness;
+             prc->DeltaP = (generic_RC->CurrentBufferFullness - prc->GOPTargetBufferLevel) / prc->TotalPFrame;
+             prc->TargetBufferLevel -= prc->DeltaP;
+           }
+           else if(generic_RC->NumberofPPicture>0)
+             prc->TargetBufferLevel -= prc->DeltaP;
+         }
+       }
+ 
+       if(generic_RC->NumberofCodedPFrame==1)
+         prc->AveWp = prc->Wp;
+ 
+       if((generic_RC->NumberofCodedPFrame<8)&&(generic_RC->NumberofCodedPFrame>1))
+         prc->AveWp = (prc->AveWp + prc->Wp * (generic_RC->NumberofCodedPFrame-1))/generic_RC->NumberofCodedPFrame;
+       else if(generic_RC->NumberofCodedPFrame>1)
+         prc->AveWp = (prc->Wp + 7 * prc->AveWp) / 8;
+ 
+       // compute the average complexity of B frames
+       if(input->successive_Bframe>0)
+       {
+         // compute the target buffer level
+         prc->TargetBufferLevel += (prc->AveWp * (input->successive_Bframe + 1)*prc->bit_rate\
+           /(prc->frame_rate*(prc->AveWp+prc->AveWb*input->successive_Bframe))-prc->bit_rate/prc->frame_rate);
+       }
+     }
+     else if ( img->type == B_SLICE )
+     {
+       /* update the total number of bits if the bandwidth is changed*/
+       if(prc->PrevBitRate != prc->bit_rate)
+         generic_RC->RemainingBits +=(int) floor((prc->bit_rate-prc->PrevBitRate) * (prc->Np + prc->Nb) / prc->frame_rate+0.5);
+       if((generic_RC->NumberofCodedPFrame==1)&&(generic_RC->NumberofCodedBFrame==1))
+       {
+         prc->AveWp = prc->Wp;
+         prc->AveWb = prc->Wb;
+       }
+       else if(generic_RC->NumberofCodedBFrame > 1)
+       {
+         //compute the average weight
+         if(generic_RC->NumberofCodedBFrame<8)
+           prc->AveWb = (prc->AveWb + prc->Wb*(generic_RC->NumberofCodedBFrame-1)) / generic_RC->NumberofCodedBFrame;
+         else
+           prc->AveWb = (prc->Wb + 7 * prc->AveWb) / 8;
+       }
+     }
+     /* Compute the target bit for each frame */
+     if( (img->type==P_SLICE || input->RCUpdateMode == RC_MODE_1 || input->RCUpdateMode == RC_MODE_3 ) && (IMG_NUMBER) )
+     {
+       /* frame layer rate control */
+       if(img->BasicUnit==img->FrameSizeInMbs || (input->RCUpdateMode == RC_MODE_3) )
+       {
+         if(generic_RC->NumberofCodedPFrame>0)
+         {
+           if (input->RCUpdateMode == RC_MODE_3)
+           {
+             int level_idx = (img->type == B_SLICE && input->HierarchicalCoding) ? (generic_RC->temporal_levels - 1 - gop_structure[img->b_frame_to_code-1].hierarchy_layer) : 0;
+             int bitrate = (img->type == B_SLICE) ? generic_RC->RCBSliceBits[ level_idx ]
+             : ( img->type == P_SLICE ? generic_RC->RCPSliceBits : generic_RC->RCISliceBits );
+             int level, denom = generic_RC->NIslice * generic_RC->RCISliceBits + generic_RC->NPslice * generic_RC->RCPSliceBits;
+             if ( input->HierarchicalCoding )
+             {
+               for ( level = 0; level < generic_RC->temporal_levels; level++ )
+                 denom += generic_RC->hierNb[ level ] * generic_RC->RCBSliceBits[ level ];
+             }
+             else
+             {
+               denom += generic_RC->hierNb[0] * generic_RC->RCBSliceBits[0];
+             }
+             // target due to remaining bits
+             prc->Target = (int) floor( (float)(1.0 * bitrate * generic_RC->RemainingBits) / (float)denom + 0.5F );
+             // target given original taget rate and buffer considerations
+             tmp_T  = imax(0, (int) floor( (double)bitrate - prc->GAMMAP * (generic_RC->CurrentBufferFullness-prc->TargetBufferLevel) + 0.5) );
+             // translate Target rate from B or I "domain" to P domain since the P RC model is going to be used to select the QP
+             // for hierarchical coding adjust the target QP to account for different temporal levels
+             switch( img->type )
+             {
+             case B_SLICE:
+               prc->Target = (int) floor( (float)prc->Target / input->RCBoverPRatio + 0.5F);
+               break;
+             case I_SLICE:
+               prc->Target = (int) floor( (float)prc->Target / (input->RCIoverPRatio * 4.0) + 0.5F); // 4x accounts for the fact that header bits reduce the percentage of texture
+               break;
+             case P_SLICE:
+             default:
+               break;
+             }
+           }
+           else
+           {
+             prc->Target = (int) floor( prc->Wp * generic_RC->RemainingBits / (prc->Np * prc->Wp + prc->Nb * prc->Wb) + 0.5);
+             tmp_T  = imax(0, (int) floor(prc->bit_rate / prc->frame_rate - prc->GAMMAP * (generic_RC->CurrentBufferFullness-prc->TargetBufferLevel) + 0.5));
+             prc->Target = (int) floor(prc->BETAP * (prc->Target - tmp_T) + tmp_T + 0.5);
+           }
+         }
+       }
+       /* basic unit layer rate control */
+       else
+       {
+         if(((generic_RC->NumberofGOP == 1)&&(generic_RC->NumberofCodedPFrame>0))
+           || (generic_RC->NumberofGOP > 1))
+         {
+           prc->Target = (int) (floor( prc->Wp * generic_RC->RemainingBits / (prc->Np * prc->Wp + prc->Nb * prc->Wb) + 0.5));
+           tmp_T  = imax(0, (int) (floor(prc->bit_rate / prc->frame_rate - prc->GAMMAP * (generic_RC->CurrentBufferFullness-prc->TargetBufferLevel) + 0.5)));
+           prc->Target = (int) (floor(prc->BETAP * (prc->Target - tmp_T) + tmp_T + 0.5));
+         }
+       }
+       prc->Target = (int)(mult * prc->Target);
+ 
+       /* reserve some bits for smoothing */
+       prc->Target = (int)((1.0 - 0.0 * input->successive_Bframe) * prc->Target);
+ 
+       /* HRD consideration */
+       if ( input->RCUpdateMode != RC_MODE_3 || img->type == P_SLICE )
+         prc->Target = iClip3(prc->LowerBound,prc->UpperBound2, prc->Target);
+       if((topfield) || (fieldpic && ((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))))
+         prc->TargetField=prc->Target;
+     }
+   }
+ 
+   if(fieldpic || topfield)
+   {
+     /* frame layer rate control */
+     generic_RC->NumberofHeaderBits  = 0;
+     generic_RC->NumberofTextureBits = 0;
+ 
+     /* basic unit layer rate control */
+     if(img->BasicUnit<img->FrameSizeInMbs)
+     {
+       prc->TotalFrameQP = 0;
+       generic_RC->NumberofBasicUnitHeaderBits  = 0;
+       generic_RC->NumberofBasicUnitTextureBits = 0;
+       generic_RC->TotalMADBasicUnit = 0;
+       if(generic_RC->FieldControl==0)
+         prc->NumberofBasicUnit = prc->TotalNumberofBasicUnit;
+       else
+         prc->NumberofBasicUnit = prc->TotalNumberofBasicUnit >> 1;
+     }
+   }
+ 
+   if( ( img->type==P_SLICE || input->RCUpdateMode == RC_MODE_1 ) && img->BasicUnit < img->FrameSizeInMbs && generic_RC->FieldControl == 1 && (IMG_NUMBER) )
+   {
+     /* top field at basic unit layer rate control */
+     if(topfield)
+     {
+       prc->bits_topfield=0;
+       prc->Target=(int)(prc->TargetField*0.6);
+     }
+     /* bottom field at basic unit layer rate control */
+     else
+     {
+       prc->Target=prc->TargetField-prc->bits_topfield;
+       generic_RC->NumberofBasicUnitHeaderBits=0;
+       generic_RC->NumberofBasicUnitTextureBits=0;
+       generic_RC->TotalMADBasicUnit=0;
+       prc->NumberofBasicUnit=prc->TotalNumberofBasicUnit >> 1;
+     }
+   }
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    update one picture after frame/field encoding
+  *
+  * \param nbits
+  *    number of bits used for picture
+  *
+  *************************************************************************************
+ */
+ void rc_update_pict(rc_quadratic *prc, int nbits)
+ {
+   int delta_bits = (nbits - (int)floor(prc->bit_rate / prc->frame_rate + 0.5F) );
+   generic_RC->RemainingBits -= nbits; /* remaining # of bits in GOP */
+   generic_RC->CurrentBufferFullness += delta_bits;
+ 
+   /*update the lower bound and the upper bound for the target bits of each frame, HRD consideration*/
+   prc->LowerBound  -= (int) delta_bits;
+   prc->UpperBound1 -= (int) delta_bits;
+   prc->UpperBound2  = (int)(OMEGA * prc->UpperBound1);
+ 
+   return;
+ }
+ 
+ int updateComplexity( rc_quadratic *prc, Boolean is_updated, int nbits )
+ {
+   double Avem_Qc;
+ 
+   /* frame layer rate control */
+   if(img->BasicUnit == img->FrameSizeInMbs)
+     return ((int) floor(nbits * prc->m_Qc + 0.5));
+   /* basic unit layer rate control */
+   else
+   {
+     if( is_updated )
+     {
+       if( (generic_RC->NoGranularFieldRC == 0 && generic_RC->FieldControl == 1) || generic_RC->FieldControl == 0 )
+       {
+         Avem_Qc = (double)prc->TotalFrameQP / (double)prc->TotalNumberofBasicUnit;
+         return ((int)floor(nbits * Avem_Qc + 0.5));
+       }
+     }
+     else if( img->type == B_SLICE )
+       return ((int) floor(nbits * prc->m_Qc + 0.5));
+   }
+   return 0;
+ }
+ 
+ void updatePparams( rc_quadratic *prc, int complexity )
+ {
+   prc->Xp = complexity;
+   prc->Np--;
+   prc->Wp = prc->Xp;
+   prc->Pm_Hp = generic_RC->NumberofHeaderBits;
+   generic_RC->NumberofCodedPFrame++;
+   generic_RC->NumberofPPicture++;
+ }
+ 
+ void updateBparams( rc_quadratic *prc, int complexity )
+ {
+   prc->Xb = complexity;
+   prc->Nb--;
+   prc->Wb = prc->Xb / THETA;     
+   prc->NumberofBFrames++;
+   generic_RC->NumberofCodedBFrame++;
+ }
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    update after frame encoding
+  *
+  * \param nbits
+  *    number of bits used for frame
+  *
+  *************************************************************************************
+ */
+ void rc_update_pict_frame(rc_quadratic *prc, int nbits)
+ {
+   /* update the complexity weight of I, P, B frame */  
+   int complexity = 0;
+ 
+   switch( input->RCUpdateMode )
+   {
+   case RC_MODE_0:
+   case RC_MODE_2:
+   default:
+     complexity = updateComplexity( prc, (Boolean) (img->type == P_SLICE && (IMG_NUMBER)), nbits );
+     if ( img->type == P_SLICE && (IMG_NUMBER) )
+     {
+       if( generic_RC->NoGranularFieldRC == 0 || generic_RC->FieldControl == 0 )
+         updatePparams( prc, complexity );
+       else
+         generic_RC->NoGranularFieldRC = 0;
+     }
+     else if ( img->type == B_SLICE )
+       updateBparams( prc, complexity );
+     break;
+   case RC_MODE_1:
+     complexity = updateComplexity( prc, (Boolean) (IMG_NUMBER), nbits );
+     if ( (IMG_NUMBER) )
+     {
+       if( generic_RC->NoGranularFieldRC == 0 || generic_RC->FieldControl == 0 )
+         updatePparams( prc, complexity );
+       else
+         generic_RC->NoGranularFieldRC = 0;
+     }
+     break;
+   case RC_MODE_3:
+     complexity = updateComplexity( prc, (Boolean) (img->type == P_SLICE && (IMG_NUMBER)), nbits );
+     if (img->type == I_SLICE && IMG_NUMBER)
+       generic_RC->NIslice--;
+ 
+     if ( (img->type == P_SLICE) && (IMG_NUMBER) )
+     {
+       if( generic_RC->NoGranularFieldRC == 0 || generic_RC->FieldControl == 0 )
+       {
+         updatePparams( prc, complexity );
+         generic_RC->NPslice--;
+       }
+       else
+         generic_RC->NoGranularFieldRC = 0;
+     }
+     else if ( img->type == B_SLICE )
+     {
+       updateBparams( prc, complexity );
+       generic_RC->hierNb[ input->HierarchicalCoding ? (generic_RC->temporal_levels - 1 - gop_structure[img->b_frame_to_code-1].hierarchy_layer) : 0 ]--;
+     }
+     break;
+   }   
+ }
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    update the parameters of quadratic R-D model
+  *
+  *************************************************************************************
+ */
+ void updateRCModel (rc_quadratic *prc)
+ {
+   int n_windowSize;
+   int i;
+   double std = 0.0, threshold;
+   int m_Nc = generic_RC->NumberofCodedPFrame;
+   Boolean MADModelFlag = FALSE;
+   static Boolean m_rgRejected[RC_MODEL_HISTORY];
+   static double  error       [RC_MODEL_HISTORY];
+ 
+   if( (img->type == P_SLICE || input->RCUpdateMode == RC_MODE_1) && (IMG_NUMBER) )
+   {
+     /*frame layer rate control*/
+     if(img->BasicUnit == img->FrameSizeInMbs)
+     {
+       prc->CurrentFrameMAD = ComputeFrameMAD();
+       m_Nc=generic_RC->NumberofCodedPFrame;
+     }
+     /*basic unit layer rate control*/
+     else
+     {
+       /*compute the MAD of the current basic unit*/
+       prc->CurrentFrameMAD = (double) ((generic_RC->TotalMADBasicUnit >> 8)/img->BasicUnit);
+       generic_RC->TotalMADBasicUnit=0;
+ 
+       /* compute the average number of header bits*/
+       prc->CodedBasicUnit=prc->TotalNumberofBasicUnit-prc->NumberofBasicUnit;
+       if(prc->CodedBasicUnit > 0)
+       {
+         prc->PAveHeaderBits1=(int)((double)(prc->PAveHeaderBits1*(prc->CodedBasicUnit-1)+
+           generic_RC->NumberofBasicUnitHeaderBits)/prc->CodedBasicUnit+0.5);
+         if(prc->PAveHeaderBits3 == 0)
+           prc->PAveHeaderBits2 = prc->PAveHeaderBits1;
+         else
+         {
+           prc->PAveHeaderBits2 = (int)((double)(prc->PAveHeaderBits1 * prc->CodedBasicUnit+
+             prc->PAveHeaderBits3 * prc->NumberofBasicUnit)/prc->TotalNumberofBasicUnit+0.5);
+         }
+       }
+       /*update the record of MADs for reference*/
+       if(((input->PicInterlace == ADAPTIVE_CODING) || (input->MbInterlace)) && (generic_RC->FieldControl == 1))
+         prc->FCBUCFMAD[prc->TotalNumberofBasicUnit-1-prc->NumberofBasicUnit]=prc->CurrentFrameMAD;
+       else
+         prc->BUCFMAD[prc->TotalNumberofBasicUnit-1-prc->NumberofBasicUnit]=prc->CurrentFrameMAD;
+ 
+       if(prc->NumberofBasicUnit != 0)
+         m_Nc = generic_RC->NumberofCodedPFrame * prc->TotalNumberofBasicUnit + prc->CodedBasicUnit;
+       else
+         m_Nc = (generic_RC->NumberofCodedPFrame-1) * prc->TotalNumberofBasicUnit + prc->CodedBasicUnit;
+     }
+ 
+     if(m_Nc > 1)
+       MADModelFlag=TRUE;
+ 
+     prc->PPreHeader = generic_RC->NumberofHeaderBits;
+     for (i = (RC_MODEL_HISTORY-2); i > 0; i--)
+     {// update the history
+       prc->Pm_rgQp[i] = prc->Pm_rgQp[i - 1];
+       prc->m_rgQp[i]  = prc->Pm_rgQp[i];
+       prc->Pm_rgRp[i] = prc->Pm_rgRp[i - 1];
+       prc->m_rgRp[i]  = prc->Pm_rgRp[i];
+     }
+     prc->Pm_rgQp[0] = QP2Qstep(prc->m_Qc); //*1.0/prc->CurrentFrameMAD;
+     /*frame layer rate control*/
+     if(img->BasicUnit==img->FrameSizeInMbs)
+       prc->Pm_rgRp[0] = generic_RC->NumberofTextureBits*1.0/prc->CurrentFrameMAD;
+     /*basic unit layer rate control*/
+     else
+       prc->Pm_rgRp[0] = generic_RC->NumberofBasicUnitTextureBits*1.0/prc->CurrentFrameMAD;
+ 
+     prc->m_rgQp[0] = prc->Pm_rgQp[0];
+     prc->m_rgRp[0] = prc->Pm_rgRp[0];
+     prc->m_X1 = prc->Pm_X1;
+     prc->m_X2 = prc->Pm_X2;
+ 
+     /*compute the size of window*/
+     n_windowSize = (prc->CurrentFrameMAD>prc->PreviousFrameMAD)
+       ? (int)(prc->PreviousFrameMAD/prc->CurrentFrameMAD * (RC_MODEL_HISTORY-1) )
+       : (int)(prc->CurrentFrameMAD/prc->PreviousFrameMAD *(RC_MODEL_HISTORY-1));
+     n_windowSize=iClip3(1, m_Nc, n_windowSize);
+     n_windowSize=imin(n_windowSize,prc->m_windowSize+1);
+     n_windowSize=imin(n_windowSize,(RC_MODEL_HISTORY-1));
+ 
+     /*update the previous window size*/
+     prc->m_windowSize=n_windowSize;
+ 
+     for (i = 0; i < (RC_MODEL_HISTORY-1); i++)
+     {
+       m_rgRejected[i] = FALSE;
+     }
+ 
+     // initial RD model estimator
+     RCModelEstimator (prc, n_windowSize, m_rgRejected);
+ 
+     n_windowSize = prc->m_windowSize;
+     // remove outlier
+ 
+     for (i = 0; i < (int) n_windowSize; i++)
+     {
+       error[i] = prc->m_X1 / prc->m_rgQp[i] + prc->m_X2 / (prc->m_rgQp[i] * prc->m_rgQp[i]) - prc->m_rgRp[i];
+       std += error[i] * error[i];
+     }
+     threshold = (n_windowSize == 2) ? 0 : sqrt (std / n_windowSize);
+     for (i = 0; i < (int) n_windowSize; i++)
+     {
+       if (fabs(error[i]) > threshold)
+         m_rgRejected[i] = TRUE;
+     }
+     // always include the last data point
+     m_rgRejected[0] = FALSE;
+ 
+     // second RD model estimator
+     RCModelEstimator (prc, n_windowSize, m_rgRejected);
+ 
+     if( MADModelFlag )
+       updateMADModel(prc);
+     else if( (img->type == P_SLICE || input->RCUpdateMode == RC_MODE_1) && (IMG_NUMBER) )
+       prc->PPictureMAD[0]=prc->CurrentFrameMAD;
+   }
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Model Estimator
+  *
+  *************************************************************************************
+ */
+ void RCModelEstimator (rc_quadratic *prc, int n_windowSize, Boolean *m_rgRejected)
+ {
+   int n_realSize = n_windowSize;
+   int i;
+   double oneSampleQ = 0;
+   double a00 = 0.0, a01 = 0.0, a10 = 0.0, a11 = 0.0, b0 = 0.0, b1 = 0.0;
+   double MatrixValue;
+   Boolean estimateX2 = FALSE;
+ 
+   for (i = 0; i < n_windowSize; i++)
+   {// find the number of samples which are not rejected
+     if (m_rgRejected[i])
+       n_realSize--;
+   }
+ 
+   // default RD model estimation results
+   prc->m_X1 = prc->m_X2 = 0.0;
+ 
+   for (i = 0; i < n_windowSize; i++)
+   {
+     if (!m_rgRejected[i])
+       oneSampleQ = prc->m_rgQp[i];
+   }
+   for (i = 0; i < n_windowSize; i++)
+   {// if all non-rejected Q are the same, take 1st order model
+     if ((prc->m_rgQp[i] != oneSampleQ) && !m_rgRejected[i])
+       estimateX2 = TRUE;
+     if (!m_rgRejected[i])
+       prc->m_X1 += (prc->m_rgQp[i] * prc->m_rgRp[i]) / n_realSize;
+   }
+ 
+   // take 2nd order model to estimate X1 and X2
+   if ((n_realSize >= 1) && estimateX2)
+   {
+     for (i = 0; i < n_windowSize; i++)
+     {
+       if (!m_rgRejected[i])
+       {
+         a00  = a00 + 1.0;
+         a01 += 1.0 / prc->m_rgQp[i];
+         a10  = a01;
+         a11 += 1.0 / (prc->m_rgQp[i] * prc->m_rgQp[i]);
+         b0  += prc->m_rgQp[i] * prc->m_rgRp[i];
+         b1  += prc->m_rgRp[i];
+       }
+     }
+     // solve the equation of AX = B
+     MatrixValue=a00*a11-a01*a10;
+     if(fabs(MatrixValue) > 0.000001)
+     {
+       prc->m_X1 = (b0 * a11 - b1 * a01) / MatrixValue;
+       prc->m_X2 = (b1 * a00 - b0 * a10) / MatrixValue;
+     }
+     else
+     {
+       prc->m_X1 = b0 / a00;
+       prc->m_X2 = 0.0;
+     }
+   }
+   if( (img->type == P_SLICE || input->RCUpdateMode == RC_MODE_1) && (IMG_NUMBER) )
+   {
+     prc->Pm_X1 = prc->m_X1;
+     prc->Pm_X2 = prc->m_X2;
+   }
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    update the parameters of linear prediction model
+  *
+  *************************************************************************************
+ */
+ void updateMADModel (rc_quadratic *prc)
+ {
+   int    n_windowSize;
+   int    i;
+   double std = 0.0, threshold;
+   int    m_Nc = generic_RC->NumberofCodedPFrame;
+   static Boolean PictureRejected[RC_MODEL_HISTORY];
+   static double  error          [RC_MODEL_HISTORY];
+ 
+   if(generic_RC->NumberofCodedPFrame>0)
+   {
+     //assert (img->type!=P_SLICE);
+     /*frame layer rate control*/
+     if(img->BasicUnit == img->FrameSizeInMbs)
+       m_Nc=generic_RC->NumberofCodedPFrame;
+     /*basic unit layer rate control*/
+     else
+       m_Nc=generic_RC->NumberofCodedPFrame*prc->TotalNumberofBasicUnit+prc->CodedBasicUnit;
+ 
+     for (i = (RC_MODEL_HISTORY-2); i > 0; i--)
+     {// update the history
+       prc->PPictureMAD[i]  = prc->PPictureMAD[i - 1];
+       prc->PictureMAD[i]   = prc->PPictureMAD[i];
+       prc->ReferenceMAD[i] = prc->ReferenceMAD[i-1];
+     }
+     prc->PPictureMAD[0] = prc->CurrentFrameMAD;
+     prc->PictureMAD[0]  = prc->PPictureMAD[0];
+ 
+     if(img->BasicUnit == img->FrameSizeInMbs)
+       prc->ReferenceMAD[0]=prc->PictureMAD[1];
+     else
+     {
+       if(((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace)) &&(generic_RC->FieldControl==1))
+         prc->ReferenceMAD[0]=prc->FCBUPFMAD[prc->TotalNumberofBasicUnit-1-prc->NumberofBasicUnit];
+       else
+         prc->ReferenceMAD[0]=prc->BUPFMAD[prc->TotalNumberofBasicUnit-1-prc->NumberofBasicUnit];
+     }
+     prc->MADPictureC1 = prc->PMADPictureC1;
+     prc->MADPictureC2 = prc->PMADPictureC2;
+ 
+     /*compute the size of window*/
+     n_windowSize = (prc->CurrentFrameMAD > prc->PreviousFrameMAD)
+       ? (int) ((float)(RC_MODEL_HISTORY-1) * prc->PreviousFrameMAD / prc->CurrentFrameMAD)
+       : (int) ((float)(RC_MODEL_HISTORY-1) * prc->CurrentFrameMAD / prc->PreviousFrameMAD);
+     n_windowSize = iClip3(1, (m_Nc-1), n_windowSize);
+     n_windowSize=imin(n_windowSize, imin(20, prc->MADm_windowSize + 1));
+ 
+     /*update the previous window size*/
+     prc->MADm_windowSize=n_windowSize;
+ 
+     for (i = 0; i < (RC_MODEL_HISTORY-1); i++)
+     {
+       PictureRejected[i] = FALSE;
+     }
+ 
+     //update the MAD for the previous frame
+     if( (img->type == P_SLICE || input->RCUpdateMode == RC_MODE_1) && (IMG_NUMBER) )
+       prc->PreviousFrameMAD=prc->CurrentFrameMAD;
+ 
+     // initial MAD model estimator
+     MADModelEstimator (prc, n_windowSize, PictureRejected);
+ 
+     // remove outlier
+     for (i = 0; i < n_windowSize; i++)
+     {
+       error[i] = prc->MADPictureC1 * prc->ReferenceMAD[i] + prc->MADPictureC2 - prc->PictureMAD[i];
+       std += (error[i] * error[i]);
+     }
+ 
+     threshold = (n_windowSize == 2) ? 0 : sqrt (std / n_windowSize);
+     for (i = 0; i < n_windowSize; i++)
+     {
+       if (fabs(error[i]) > threshold)
+         PictureRejected[i] = TRUE;
+     }
+     // always include the last data point
+     PictureRejected[0] = FALSE;
+ 
+     // second MAD model estimator
+     MADModelEstimator (prc, n_windowSize, PictureRejected);
+   }
+ }
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    MAD mode estimator
+  *
+  *************************************************************************************
+ */
+ void MADModelEstimator (rc_quadratic *prc, int n_windowSize, Boolean *PictureRejected)
+ {
+   int    n_realSize = n_windowSize;
+   int    i;
+   double oneSampleQ = 0.0;
+   double a00 = 0.0, a01 = 0.0, a10 = 0.0, a11 = 0.0, b0 = 0.0, b1 = 0.0;
+   double MatrixValue;
+   Boolean estimateX2 = FALSE;
+ 
+   for (i = 0; i < n_windowSize; i++)
+   {// find the number of samples which are not rejected
+     if (PictureRejected[i])
+       n_realSize--;
+   }
+ 
+   // default MAD model estimation results
+   prc->MADPictureC1 = prc->MADPictureC2 = 0.0;
+ 
+   for (i = 0; i < n_windowSize; i++)
+   {
+     if (!PictureRejected[i])
+       oneSampleQ = prc->PictureMAD[i];
+   }
+ 
+   for (i = 0; i < n_windowSize; i++)
+   {// if all non-rejected MAD are the same, take 1st order model
+     if ((prc->PictureMAD[i] != oneSampleQ) && !PictureRejected[i])
+       estimateX2 = TRUE;
+     if (!PictureRejected[i])
+       prc->MADPictureC1 += prc->PictureMAD[i] / (prc->ReferenceMAD[i]*n_realSize);
+   }
+ 
+   // take 2nd order model to estimate X1 and X2
+   if ((n_realSize >= 1) && estimateX2)
+   {
+     for (i = 0; i < n_windowSize; i++)
+     {
+       if (!PictureRejected[i])
+       {
+         a00  = a00 + 1.0;
+         a01 += prc->ReferenceMAD[i];
+         a10  = a01;
+         a11 += prc->ReferenceMAD[i] * prc->ReferenceMAD[i];
+         b0  += prc->PictureMAD[i];
+         b1  += prc->PictureMAD[i]   * prc->ReferenceMAD[i];
+       }
+     }
+     // solve the equation of AX = B
+     MatrixValue = a00 * a11 - a01 * a10;
+     if(fabs(MatrixValue) > 0.000001)
+     {
+       prc->MADPictureC2 = (b0 * a11 - b1 * a01) / MatrixValue;
+       prc->MADPictureC1 = (b1 * a00 - b0 * a10) / MatrixValue;
+     }
+     else
+     {
+       prc->MADPictureC1 = b0/a01;
+       prc->MADPictureC2 = 0.0;
+     }
+   }
+   if( (img->type == P_SLICE || input->RCUpdateMode == RC_MODE_1) && (IMG_NUMBER) )
+   {
+     prc->PMADPictureC1 = prc->MADPictureC1;
+     prc->PMADPictureC2 = prc->MADPictureC2;
+   }
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    compute a  quantization parameter for each frame (RC_MODE_0)
+  *
+  *************************************************************************************
+ */
+ int updateQPRC0(rc_quadratic *prc, int topfield)
+ {
+   int m_Bits;
+   int BFrameNumber;
+   int StepSize;
+   int SumofBasicUnit;
+   int DuantQp, m_Qp, m_Hp;
+ 
+   /* frame layer rate control */
+   if(img->BasicUnit==img->FrameSizeInMbs )
+   {
+     /* fixed quantization parameter is used to coded I frame, the first P frame and the first B frame
+     the quantization parameter is adjusted according the available channel bandwidth and
+     the type of video */
+     /*top field*/
+     if((topfield) || (generic_RC->FieldControl==0))
+     {
+       if (img->type==I_SLICE)
+       {
+         prc->m_Qc = prc->MyInitialQp;
+         return prc->m_Qc;
+       }
+       else if(img->type == B_SLICE)
+       {
+         if(input->successive_Bframe==1)
+         {
+           if((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))
+             updateQPInterlace( prc );
+ 
+           prc->m_Qc = imin(prc->PrevLastQP, prc->CurrLastQP) + 2;
+           prc->m_Qc = imax(prc->m_Qc, imax(prc->PrevLastQP, prc->CurrLastQP));
+           prc->m_Qc = imax(prc->m_Qc, prc->CurrLastQP + 1);
+           prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // Clipping
+         }
+         else
+         {
+           BFrameNumber = (prc->NumberofBFrames + 1) % input->successive_Bframe;
+           if(BFrameNumber==0)
+             BFrameNumber = input->successive_Bframe;
+ 
+           /*adaptive field/frame coding*/
+           if(BFrameNumber==1)
+           {
+             if((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))
+               updateQPInterlace( prc );
+           }
+ 
+           if((prc->CurrLastQP-prc->PrevLastQP)<=(-2*input->successive_Bframe-3))
+             StepSize=-3;
+           else  if((prc->CurrLastQP-prc->PrevLastQP)==(-2*input->successive_Bframe-2))
+             StepSize=-2;
+           else if((prc->CurrLastQP-prc->PrevLastQP)==(-2*input->successive_Bframe-1))
+             StepSize=-1;
+           else if((prc->CurrLastQP-prc->PrevLastQP)==(-2*input->successive_Bframe))
+             StepSize=0;
+           else if((prc->CurrLastQP-prc->PrevLastQP)==(-2*input->successive_Bframe+1))
+             StepSize=1;
+           else
+             StepSize=2;
+ 
+           prc->m_Qc  = prc->PrevLastQP + StepSize;
+           prc->m_Qc += iClip3( -2 * (BFrameNumber - 1), 2*(BFrameNumber-1),
+             (BFrameNumber-1)*(prc->CurrLastQP-prc->PrevLastQP)/(input->successive_Bframe-1));
+           prc->m_Qc  = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // Clipping
+         }
+         return prc->m_Qc;
+       }
+       else if( img->type == P_SLICE && generic_RC->NumberofPPicture == 0 && (IMG_NUMBER) )
+       {
+         prc->m_Qc=prc->MyInitialQp;
+ 
+         if(generic_RC->FieldControl==0)
+           updateQPNonPicAFF( prc );
+         return prc->m_Qc;
+       }
+       else
+       {
+         /*adaptive field/frame coding*/
+         if( ( input->PicInterlace == ADAPTIVE_CODING || input->MbInterlace ) && generic_RC->FieldControl == 0 )
+           updateQPInterlaceBU( prc );
+ 
+         prc->m_X1 = prc->Pm_X1;
+         prc->m_X2 = prc->Pm_X2;
+         prc->MADPictureC1 = prc->PMADPictureC1;
+         prc->MADPictureC2 = prc->PMADPictureC2;
+         prc->PreviousPictureMAD = prc->PPictureMAD[0];
+ 
+         DuantQp = prc->PDuantQp;
+         m_Qp = prc->Pm_Qp;
+         m_Hp = prc->PPreHeader;
+ 
+         /* predict the MAD of current picture*/
+         prc->CurrentFrameMAD=prc->MADPictureC1*prc->PreviousPictureMAD + prc->MADPictureC2;
+ 
+         /*compute the number of bits for the texture*/
+         if(prc->Target < 0)
+         {
+           prc->m_Qc=m_Qp+DuantQp;
+           prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // Clipping
+         }
+         else
+         {
+           m_Bits = prc->Target-m_Hp;
+           m_Bits = imax(m_Bits, (int)(prc->bit_rate/(MINVALUE*prc->frame_rate)));
+ 
+           updateModelQPFrame( prc, m_Bits );
+ 
+           prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // clipping
+           prc->m_Qc = iClip3(m_Qp-DuantQp, m_Qp+DuantQp, prc->m_Qc); // control variation
+         }
+ 
+         if( generic_RC->FieldControl == 0 )
+           updateQPNonPicAFF( prc );
+ 
+         return prc->m_Qc;
+       }
+     }
+     /*bottom field*/
+     else
+     {
+       if( img->type==P_SLICE && generic_RC->NoGranularFieldRC == 0 && (IMG_NUMBER) )
+         updateBottomField( prc );
+       return prc->m_Qc;
+     }
+   }
+   /*basic unit layer rate control*/
+   else
+   {
+     /*top field of I frame*/
+     if (img->type==I_SLICE || (!IMG_NUMBER))
+     {
+       prc->m_Qc = prc->MyInitialQp;
+       return prc->m_Qc;
+     }
+     else if( img->type == B_SLICE )
+     {
+       /*top field of B frame*/
+       if((topfield)||(generic_RC->FieldControl==0))
+       {
+         if(input->successive_Bframe==1)
+         {
+           /*adaptive field/frame coding*/
+           if((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))
+             updateQPInterlace( prc );
+ 
+           if(prc->PrevLastQP==prc->CurrLastQP)
+             prc->m_Qc=prc->PrevLastQP+2;
+           else
+             prc->m_Qc=(prc->PrevLastQP+prc->CurrLastQP)/2+1;
+           prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // Clipping
+         }
+         else
+         {
+           BFrameNumber=(prc->NumberofBFrames+1)%input->successive_Bframe;
+           if(BFrameNumber==0)
+             BFrameNumber=input->successive_Bframe;
+ 
+           /*adaptive field/frame coding*/
+           if(BFrameNumber==1)
+           {
+             if((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))
+               updateQPInterlace( prc );
+           }
+ 
+           if((prc->CurrLastQP-prc->PrevLastQP)<=(-2*input->successive_Bframe-3))
+             StepSize=-3;
+           else  if((prc->CurrLastQP-prc->PrevLastQP)==(-2*input->successive_Bframe-2))
+             StepSize=-2;
+           else if((prc->CurrLastQP-prc->PrevLastQP)==(-2*input->successive_Bframe-1))
+             StepSize=-1;
+           else if((prc->CurrLastQP-prc->PrevLastQP)==(-2*input->successive_Bframe))
+             StepSize=0;//0
+           else if((prc->CurrLastQP-prc->PrevLastQP)==(-2*input->successive_Bframe+1))
+             StepSize=1;//1
+           else
+             StepSize=2;//2
+           prc->m_Qc=prc->PrevLastQP+StepSize;
+           prc->m_Qc +=
+             iClip3( -2*(BFrameNumber-1), 2*(BFrameNumber-1), (BFrameNumber-1)*(prc->CurrLastQP-prc->PrevLastQP)/(input->successive_Bframe-1) );
+           prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // Clipping
+         }
+         return prc->m_Qc;
+       }
+       /*bottom field of B frame*/
+       else
+       {
+         return prc->m_Qc;
+       }
+     }
+     else if( img->type == P_SLICE )
+     {
+       if( generic_RC->NumberofGOP == 1 && generic_RC->NumberofPPicture == 0 )
+       {
+         if((generic_RC->FieldControl==0)||((generic_RC->FieldControl==1) && (generic_RC->NoGranularFieldRC==0)))
+           return updateFirstP( prc, topfield );
+       }
+       else
+       {
+         prc->m_X1=prc->Pm_X1;
+         prc->m_X2=prc->Pm_X2;
+         prc->MADPictureC1=prc->PMADPictureC1;
+         prc->MADPictureC2=prc->PMADPictureC2;
+ 
+         m_Qp=prc->Pm_Qp;
+ 
+         if(generic_RC->FieldControl==0)
+           SumofBasicUnit=prc->TotalNumberofBasicUnit;
+         else
+           SumofBasicUnit=prc->TotalNumberofBasicUnit>>1;
+ 
+         /*the average QP of the previous frame is used to coded the first basic unit of the current frame or field*/
+         if(prc->NumberofBasicUnit==SumofBasicUnit)
+           return updateFirstBU( prc, topfield );
+         else
+         {
+           /*compute the number of remaining bits*/
+           prc->Target -= (generic_RC->NumberofBasicUnitHeaderBits + generic_RC->NumberofBasicUnitTextureBits);
+           generic_RC->NumberofBasicUnitHeaderBits  = 0;
+           generic_RC->NumberofBasicUnitTextureBits = 0;
+           if(prc->Target<0)
+             return updateNegativeTarget( prc, topfield, m_Qp );
+           else
+           {
+             /*predict the MAD of current picture*/
+             predictCurrPicMAD( prc );
+ 
+             /*compute the total number of bits for the current basic unit*/
+             updateModelQPBU( prc, topfield, m_Qp );
+ 
+             prc->TotalFrameQP +=prc->m_Qc;
+             prc->Pm_Qp=prc->m_Qc;
+             prc->NumberofBasicUnit--;
+             if( prc->NumberofBasicUnit == 0 && img->type == P_SLICE && (IMG_NUMBER) )
+               updateLastBU( prc, topfield );
+ 
+             return prc->m_Qc;
+           }
+         }
+       }
+     }
+   }
+   return prc->m_Qc;
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    compute a  quantization parameter for each frame
+  *
+  *************************************************************************************
+ */
+ int updateQPRC1(rc_quadratic *prc, int topfield)
+ {
+   int m_Bits;
+   int SumofBasicUnit;
+   int DuantQp, m_Qp, m_Hp;
+ 
+   /* frame layer rate control */
+   if(img->BasicUnit == img->FrameSizeInMbs)
+   {
+     /* fixed quantization parameter is used to coded I frame, the first P frame and the first B frame
+     the quantization parameter is adjusted according the available channel bandwidth and
+     the type of vide */
+     /*top field*/
+     if((topfield) || (generic_RC->FieldControl==0))
+     {
+       if ((!IMG_NUMBER))
+       {
+         prc->m_Qc = prc->MyInitialQp;
+         return prc->m_Qc;
+       }
+       else if( generic_RC->NumberofPPicture == 0 && (IMG_NUMBER) )
+       {
+         prc->m_Qc=prc->MyInitialQp;
+ 
+         if(generic_RC->FieldControl==0)
+           updateQPNonPicAFF( prc );
+         return prc->m_Qc;
+       }
+       else
+       {
+         /*adaptive field/frame coding*/
+         if( ( input->PicInterlace == ADAPTIVE_CODING || input->MbInterlace ) && generic_RC->FieldControl == 0 )
+           updateQPInterlaceBU( prc );
+ 
+         prc->m_X1 = prc->Pm_X1;
+         prc->m_X2 = prc->Pm_X2;
+         prc->MADPictureC1 = prc->PMADPictureC1;
+         prc->MADPictureC2 = prc->PMADPictureC2;
+         prc->PreviousPictureMAD = prc->PPictureMAD[0];
+ 
+         DuantQp = prc->PDuantQp;
+         m_Qp = prc->Pm_Qp;
+         m_Hp = prc->PPreHeader;
+ 
+         /* predict the MAD of current picture*/
+         prc->CurrentFrameMAD=prc->MADPictureC1*prc->PreviousPictureMAD + prc->MADPictureC2;
+ 
+         /*compute the number of bits for the texture*/
+         if(prc->Target < 0)
+         {
+           prc->m_Qc=m_Qp+DuantQp;
+           prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // Clipping
+         }
+         else
+         {
+           m_Bits = prc->Target-m_Hp;
+           m_Bits = imax(m_Bits, (int)(prc->bit_rate/(MINVALUE*prc->frame_rate)));
+ 
+           updateModelQPFrame( prc, m_Bits );
+ 
+           prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // clipping
+           prc->m_Qc = iClip3(m_Qp-DuantQp, m_Qp+DuantQp, prc->m_Qc); // control variation
+         }
+ 
+         if( generic_RC->FieldControl == 0 )
+           updateQPNonPicAFF( prc );
+ 
+         return prc->m_Qc;
+       }
+     }
+     /*bottom field*/
+     else
+     {
+       if( generic_RC->NoGranularFieldRC == 0 && (IMG_NUMBER) )
+         updateBottomField( prc );
+       return prc->m_Qc;
+     }
+   }
+   /*basic unit layer rate control*/
+   else
+   {
+     /*top field of I frame*/
+     if ((!IMG_NUMBER))
+     {
+       prc->m_Qc = prc->MyInitialQp;
+       return prc->m_Qc;
+     }
+     else
+     {
+       if((generic_RC->NumberofGOP==1)&&(generic_RC->NumberofPPicture==0))
+       {
+         if((generic_RC->FieldControl==0)||((generic_RC->FieldControl==1) && (generic_RC->NoGranularFieldRC==0)))
+           return updateFirstP( prc, topfield );
+       }
+       else
+       {
+         prc->m_X1=prc->Pm_X1;
+         prc->m_X2=prc->Pm_X2;
+         prc->MADPictureC1=prc->PMADPictureC1;
+         prc->MADPictureC2=prc->PMADPictureC2;
+ 
+         m_Qp=prc->Pm_Qp;
+ 
+         if(generic_RC->FieldControl==0)
+           SumofBasicUnit=prc->TotalNumberofBasicUnit;
+         else
+           SumofBasicUnit=prc->TotalNumberofBasicUnit>>1;
+ 
+         /*the average QP of the previous frame is used to coded the first basic unit of the current frame or field*/
+         if(prc->NumberofBasicUnit==SumofBasicUnit)
+           return updateFirstBU( prc, topfield );
+         else
+         {
+           /*compute the number of remaining bits*/
+           prc->Target -= (generic_RC->NumberofBasicUnitHeaderBits + generic_RC->NumberofBasicUnitTextureBits);
+           generic_RC->NumberofBasicUnitHeaderBits  = 0;
+           generic_RC->NumberofBasicUnitTextureBits = 0;
+           if(prc->Target<0)
+             return updateNegativeTarget( prc, topfield, m_Qp );
+           else
+           {
+             /*predict the MAD of current picture*/
+             predictCurrPicMAD( prc );
+ 
+             /*compute the total number of bits for the current basic unit*/
+             updateModelQPBU( prc, topfield, m_Qp );
+ 
+             prc->TotalFrameQP +=prc->m_Qc;
+             prc->Pm_Qp=prc->m_Qc;
+             prc->NumberofBasicUnit--;
+             if((prc->NumberofBasicUnit==0) && (IMG_NUMBER) )
+               updateLastBU( prc, topfield );
+ 
+             return prc->m_Qc;
+           }
+         }
+       }
+     }
+   }
+   return prc->m_Qc;
+ }
+ 
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    compute a  quantization parameter for each frame
+  *
+  *************************************************************************************
+ */
+ int updateQPRC2(rc_quadratic *prc, int topfield)
+ {
+   int m_Bits;
+   int SumofBasicUnit;
+   int DuantQp, m_Qp, m_Hp;
+ 
+   /* frame layer rate control */
+   if(img->BasicUnit==img->FrameSizeInMbs )
+   {
+     /* fixed quantization parameter is used to coded I frame, the first P frame and the first B frame
+     the quantization parameter is adjusted according the available channel bandwidth and
+     the type of vide */
+     /*top field*/
+     if((topfield) || (generic_RC->FieldControl==0))
+     {
+       if ((!IMG_NUMBER))
+       {
+         prc->m_Qc = prc->MyInitialQp;
+         return prc->m_Qc;
+       }
+       else if (img->type==I_SLICE)
+       {
+         if((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))
+           updateQPInterlace( prc );
+ 
+         prc->m_Qc = prc->CurrLastQP; // Set QP to average qp of last P frame
+         return prc->m_Qc;
+       }
+       else if(img->type == B_SLICE)
+       {
+         int prevQP = imax(prc->PrevLastQP, prc->CurrLastQP);
+         if((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))
+           updateQPInterlace( prc );
+ 
+         if (input->HierarchicalCoding)
+         {
+           if (img->b_frame_to_code == 0)
+             prc->m_Qc = prevQP;
+           else
+             prc->m_Qc = prevQP + img->GopLevels - gop_structure[img->b_frame_to_code-1].hierarchy_layer;
+         }
+         else
+           prc->m_Qc = prevQP + 2 - img->nal_reference_idc;
+         prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // Clipping
+ 
+         return prc->m_Qc;
+       }
+       else if( img->type == P_SLICE && generic_RC->NumberofPPicture == 0 && (IMG_NUMBER) )
+       {
+         prc->m_Qc=prc->MyInitialQp;
+ 
+         if(generic_RC->FieldControl==0)
+           updateQPNonPicAFF( prc );
+         return prc->m_Qc;
+       }
+       else
+       {
+         /*adaptive field/frame coding*/
+         if( ( input->PicInterlace == ADAPTIVE_CODING || input->MbInterlace ) && generic_RC->FieldControl == 0 )
+           updateQPInterlaceBU( prc );
+ 
+         prc->m_X1 = prc->Pm_X1;
+         prc->m_X2 = prc->Pm_X2;
+         prc->MADPictureC1 = prc->PMADPictureC1;
+         prc->MADPictureC2 = prc->PMADPictureC2;
+         prc->PreviousPictureMAD = prc->PPictureMAD[0];
+ 
+         DuantQp = prc->PDuantQp;
+         m_Qp = prc->Pm_Qp;
+         m_Hp = prc->PPreHeader;
+ 
+         /* predict the MAD of current picture*/
+         prc->CurrentFrameMAD=prc->MADPictureC1*prc->PreviousPictureMAD + prc->MADPictureC2;
+ 
+         /*compute the number of bits for the texture*/
+         if(prc->Target < 0)
+         {
+           prc->m_Qc=m_Qp+DuantQp;
+           prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // Clipping
+         }
+         else
+         {
+           m_Bits = prc->Target-m_Hp;
+           m_Bits = imax(m_Bits, (int)(prc->bit_rate/(MINVALUE*prc->frame_rate)));
+ 
+           updateModelQPFrame( prc, m_Bits );
+ 
+           prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // clipping
+           prc->m_Qc = iClip3(m_Qp-DuantQp, m_Qp+DuantQp, prc->m_Qc); // control variation
+         }
+ 
+         if( generic_RC->FieldControl == 0 )
+           updateQPNonPicAFF( prc );
+ 
+         return prc->m_Qc;
+       }
+     }
+     /*bottom field*/
+     else
+     {
+       if( img->type==P_SLICE && generic_RC->NoGranularFieldRC == 0 && (IMG_NUMBER) )
+         updateBottomField( prc );
+       return prc->m_Qc;
+     }
+   }
+   /*basic unit layer rate control*/
+   else
+   {
+     /*top field of I frame*/
+     if ((!IMG_NUMBER))
+     {
+       prc->m_Qc = prc->MyInitialQp;
+       return prc->m_Qc;
+     }
+     else if (img->type==I_SLICE)
+     {
+       /*adaptive field/frame coding*/
+       if((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))
+         updateQPInterlace( prc );
+ 
+       prc->m_Qc = prc->PrevLastQP; // Set QP to average qp of last P frame
+       prc->PrevLastQP = prc->CurrLastQP;
+       prc->CurrLastQP = prc->PrevLastQP;
+       prc->PAveFrameQP = prc->CurrLastQP;
+ 
+       return prc->m_Qc;
+     }
+     else if(img->type == B_SLICE)
+     {
+       int prevQP = imax(prc->PrevLastQP, prc->CurrLastQP);
+       if((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))
+         updateQPInterlace( prc );
+ 
+       if (input->HierarchicalCoding)
+       {
+ 
+         if (img->b_frame_to_code == 0)
+           prc->m_Qc = prevQP;
+         else
+           prc->m_Qc = prevQP + img->GopLevels - gop_structure[img->b_frame_to_code-1].hierarchy_layer;
+       }
+       else
+         prc->m_Qc = prevQP + 2 - img->nal_reference_idc;
+       prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // Clipping
+ 
+       return prc->m_Qc;
+ 
+     }
+     else if( img->type == P_SLICE )
+     {
+       if((generic_RC->NumberofGOP==1)&&(generic_RC->NumberofPPicture==0))
+       {
+         if((generic_RC->FieldControl==0)||((generic_RC->FieldControl==1) && (generic_RC->NoGranularFieldRC==0)))
+           return updateFirstP( prc, topfield );
+       }
+       else
+       {
+         prc->m_X1=prc->Pm_X1;
+         prc->m_X2=prc->Pm_X2;
+         prc->MADPictureC1=prc->PMADPictureC1;
+         prc->MADPictureC2=prc->PMADPictureC2;
+ 
+         m_Qp=prc->Pm_Qp;
+ 
+         if(generic_RC->FieldControl==0)
+           SumofBasicUnit=prc->TotalNumberofBasicUnit;
+         else
+           SumofBasicUnit=prc->TotalNumberofBasicUnit>>1;
+ 
+         /*the average QP of the previous frame is used to coded the first basic unit of the current frame or field*/
+         if(prc->NumberofBasicUnit==SumofBasicUnit)
+           return updateFirstBU( prc, topfield );
+         else
+         {
+           /*compute the number of remaining bits*/
+           prc->Target -= (generic_RC->NumberofBasicUnitHeaderBits + generic_RC->NumberofBasicUnitTextureBits);
+           generic_RC->NumberofBasicUnitHeaderBits  = 0;
+           generic_RC->NumberofBasicUnitTextureBits = 0;
+           if(prc->Target<0)
+             return updateNegativeTarget( prc, topfield, m_Qp );
+           else
+           {
+             /*predict the MAD of current picture*/
+             predictCurrPicMAD( prc );
+ 
+             /*compute the total number of bits for the current basic unit*/
+             updateModelQPBU( prc, topfield, m_Qp );
+ 
+             prc->TotalFrameQP +=prc->m_Qc;
+             prc->Pm_Qp=prc->m_Qc;
+             prc->NumberofBasicUnit--;
+             if((prc->NumberofBasicUnit==0) && img->type == P_SLICE && (IMG_NUMBER) )
+               updateLastBU( prc, topfield );
+ 
+             return prc->m_Qc;
+           }
+         }
+       }
+     }
+   }
+   return prc->m_Qc;
+ }
+ 
+ 
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    compute a  quantization parameter for each frame
+  *
+  *************************************************************************************
+ */
+ int updateQPRC3(rc_quadratic *prc, int topfield)
+ {
+   int m_Bits;
+   int SumofBasicUnit;
+   int DuantQp, m_Qp, m_Hp;
+ 
+   /* frame layer rate control */
+   if(img->BasicUnit==img->FrameSizeInMbs || img->type != P_SLICE )
+   {
+     /* fixed quantization parameter is used to coded I frame, the first P frame and the first B frame
+     the quantization parameter is adjusted according the available channel bandwidth and
+     the type of video */
+     /*top field*/
+     if((topfield) || (generic_RC->FieldControl==0))
+     {
+       if ((!IMG_NUMBER))
+       {
+         prc->m_Qc = prc->MyInitialQp;
+         return prc->m_Qc;
+       }
+       else if( img->type == P_SLICE && generic_RC->NumberofPPicture == 0 && (IMG_NUMBER) )
+       {
+         prc->m_Qc=prc->MyInitialQp;
+ 
+         if(generic_RC->FieldControl==0)
+           updateQPNonPicAFF( prc );
+         return prc->m_Qc;
+       }
+       else
+       {
+         /*adaptive field/frame coding*/
+         if( img->type == P_SLICE && ( input->PicInterlace == ADAPTIVE_CODING || input->MbInterlace ) && generic_RC->FieldControl == 0 )
+           updateQPInterlaceBU( prc );
+ 
+         prc->m_X1 = prc->Pm_X1;
+         prc->m_X2 = prc->Pm_X2;
+         prc->MADPictureC1 = prc->PMADPictureC1;
+         prc->MADPictureC2 = prc->PMADPictureC2;
+         prc->PreviousPictureMAD = prc->PPictureMAD[0];
+ 
+         DuantQp = prc->PDuantQp;
+         m_Qp = prc->Pm_Qp;
+         m_Hp = prc->PPreHeader;
+ 
+         if ( img->BasicUnit < img->FrameSizeInMbs && img->type != P_SLICE )
+         {
+           // when RC_MODE_3 is set and basic unit is smaller than a frame, note that:
+           // the linear MAD model and the quadratic QP model operate on small units and not on a whole frame;
+           // we therefore have to account for this
+           prc->PreviousPictureMAD = prc->PreviousWholeFrameMAD;
+         }
+         if ( img->type == I_SLICE )
+           m_Hp = 0; // it is usually a very small portion of the total I_SLICE bit budget
+ 
+         /* predict the MAD of current picture*/
+         prc->CurrentFrameMAD=prc->MADPictureC1*prc->PreviousPictureMAD + prc->MADPictureC2;
+ 
+         /*compute the number of bits for the texture*/
+         if(prc->Target < 0)
+         {
+           prc->m_Qc=m_Qp+DuantQp;
+           prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // Clipping
+         }
+         else
+         {
+           if ( img->type != P_SLICE )
+           {
+             if ( img->BasicUnit < img->FrameSizeInMbs )
+               m_Bits =(prc->Target-m_Hp)/prc->TotalNumberofBasicUnit;
+             else
+               m_Bits =prc->Target-m_Hp;
+           }
+           else {
+             m_Bits = prc->Target-m_Hp;
+             m_Bits = imax(m_Bits, (int)(prc->bit_rate/(MINVALUE*prc->frame_rate)));
+           }
+           updateModelQPFrame( prc, m_Bits );
+ 
+           prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // clipping
+           if ( img->type == P_SLICE )
+             prc->m_Qc = iClip3(m_Qp-DuantQp, m_Qp+DuantQp, prc->m_Qc); // control variation
+         }
+ 
+         if( img->type == P_SLICE && generic_RC->FieldControl == 0 )
+           updateQPNonPicAFF( prc );
+ 
+         if ( img->type == B_SLICE )
+         {
+           // hierarchical adjustment
+           int prevqp = ((prc->PrevLastQP+prc->CurrLastQP) >> 1) + 1;
+           if ( input->HierarchicalCoding && img->b_frame_to_code)
+             prc->m_Qc -= gop_structure[img->b_frame_to_code-1].hierarchy_layer;
+           // check bounds
+           prc->m_Qc = iClip3(prevqp - (input->HierarchicalCoding ? 0 : 5), prevqp + 5, prc->m_Qc); // control variation
+           prc->m_Qc = iClip3(prc->RC_MIN_QUANT, prc->RC_MAX_QUANT, prc->m_Qc); // clipping
+         }
+         return prc->m_Qc;
+       }
+     }
+     /*bottom field*/
+     else
+     {
+       if( img->type==P_SLICE && generic_RC->NoGranularFieldRC == 0 && (IMG_NUMBER) )
+         updateBottomField( prc );
+       return prc->m_Qc;
+     }
+   }
+   /*basic unit layer rate control*/
+   else
+   {
+     /*top field of I frame*/
+     if ((!IMG_NUMBER))
+     {
+       prc->m_Qc = prc->MyInitialQp;
+       return prc->m_Qc;
+     }
+     else if( img->type == P_SLICE )
+     {
+       if((generic_RC->NumberofGOP==1)&&(generic_RC->NumberofPPicture==0))
+       {
+         if((generic_RC->FieldControl==0)||((generic_RC->FieldControl==1) && (generic_RC->NoGranularFieldRC==0)))
+           return updateFirstP( prc, topfield );
+       }
+       else
+       {
+         prc->m_X1=prc->Pm_X1;
+         prc->m_X2=prc->Pm_X2;
+         prc->MADPictureC1=prc->PMADPictureC1;
+         prc->MADPictureC2=prc->PMADPictureC2;
+ 
+         m_Qp=prc->Pm_Qp;
+ 
+         if(generic_RC->FieldControl==0)
+           SumofBasicUnit=prc->TotalNumberofBasicUnit;
+         else
+           SumofBasicUnit=prc->TotalNumberofBasicUnit>>1;
+ 
+         /*the average QP of the previous frame is used to coded the first basic unit of the current frame or field*/
+         if(prc->NumberofBasicUnit==SumofBasicUnit)
+           return updateFirstBU( prc, topfield );
+         else
+         {
+           /*compute the number of remaining bits*/
+           prc->Target -= (generic_RC->NumberofBasicUnitHeaderBits + generic_RC->NumberofBasicUnitTextureBits);
+           generic_RC->NumberofBasicUnitHeaderBits  = 0;
+           generic_RC->NumberofBasicUnitTextureBits = 0;
+           if(prc->Target<0)
+             return updateNegativeTarget( prc, topfield, m_Qp );
+           else
+           {
+             /*predict the MAD of current picture*/
+             predictCurrPicMAD( prc );
+ 
+             /*compute the total number of bits for the current basic unit*/
+             updateModelQPBU( prc, topfield, m_Qp );
+ 
+             prc->TotalFrameQP +=prc->m_Qc;
+             prc->Pm_Qp=prc->m_Qc;
+             prc->NumberofBasicUnit--;
+             if((prc->NumberofBasicUnit==0) && img->type == P_SLICE && (IMG_NUMBER) )
+               updateLastBU( prc, topfield );
+ 
+             return prc->m_Qc;
+           }
+         }
+       }
+     }
+   }
+   return prc->m_Qc;
+ }
+ 
+ void updateQPInterlace( rc_quadratic *prc )
+ {
+   if(generic_RC->FieldControl==0)
+   {
+     /*previous choice is frame coding*/
+     if(generic_RC->FieldFrame==1)
+     {
+       prc->PrevLastQP=prc->CurrLastQP;
+       prc->CurrLastQP=prc->FrameQPBuffer;
+     }
+     /*previous choice is field coding*/
+     else
+     {
+       prc->PrevLastQP=prc->CurrLastQP;
+       prc->CurrLastQP=prc->FieldQPBuffer;
+     }
+   }
+ }
+ 
+ void updateQPNonPicAFF( rc_quadratic *prc )
+ {
+   if(active_sps->frame_mbs_only_flag)
+   {
+     generic_RC->TotalQpforPPicture +=prc->m_Qc;
+     prc->PrevLastQP=prc->CurrLastQP;
+     prc->CurrLastQP=prc->m_Qc;
+     prc->Pm_Qp=prc->m_Qc;
+   }
+   /*adaptive field/frame coding*/
+   else
+     prc->FrameQPBuffer=prc->m_Qc;
+ }
+ 
+ void updateBottomField( rc_quadratic *prc )
+ {
+   /*field coding*/
+   if(input->PicInterlace==FIELD_CODING)
+   {
+     generic_RC->TotalQpforPPicture +=prc->m_Qc;
+     prc->PrevLastQP=prc->CurrLastQP+1;
+     prc->CurrLastQP=prc->m_Qc;//+0 Recent change 13/1/2003
+     prc->Pm_Qp=prc->m_Qc;
+   }
+   /*adaptive field/frame coding*/
+   else
+     prc->FieldQPBuffer=prc->m_Qc;
+ }
+ 
+ int updateFirstP( rc_quadratic *prc, int topfield )
+ {
+   /*top field of the first P frame*/
+   prc->m_Qc=prc->MyInitialQp;
+   generic_RC->NumberofBasicUnitHeaderBits=0;
+   generic_RC->NumberofBasicUnitTextureBits=0;
+   prc->NumberofBasicUnit--;
+   /*bottom field of the first P frame*/
+   if((!topfield)&&(prc->NumberofBasicUnit==0))
+   {
+     /*frame coding or field coding*/
+     if((active_sps->frame_mbs_only_flag)||(input->PicInterlace==FIELD_CODING))
+     {
+       generic_RC->TotalQpforPPicture +=prc->m_Qc;
+       prc->PrevLastQP=prc->CurrLastQP;
+       prc->CurrLastQP=prc->m_Qc;
+       prc->PAveFrameQP=prc->m_Qc;
+       prc->PAveHeaderBits3=prc->PAveHeaderBits2;
+     }
+     /*adaptive frame/field coding*/
+     else if((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))
+     {
+       if(generic_RC->FieldControl==0)
+       {
+         prc->FrameQPBuffer=prc->m_Qc;
+         prc->FrameAveHeaderBits=prc->PAveHeaderBits2;
+       }
+       else
+       {
+         prc->FieldQPBuffer=prc->m_Qc;
+         prc->FieldAveHeaderBits=prc->PAveHeaderBits2;
+       }
+     }
+   }
+   prc->Pm_Qp=prc->m_Qc;
+   prc->TotalFrameQP +=prc->m_Qc;
+   return prc->m_Qc;
+ }
+ 
+ int updateNegativeTarget( rc_quadratic *prc, int topfield, int m_Qp )
+ {
+   int PAverageQP;
+ 
+   if(prc->GOPOverdue==TRUE)
+     prc->m_Qc=m_Qp+2;
+   else
+     prc->m_Qc=m_Qp+prc->DDquant;//2
+ 
+   prc->m_Qc = imin(prc->m_Qc, prc->RC_MAX_QUANT);  // clipping
+   if(input->basicunit>=prc->MBPerRow)
+     prc->m_Qc = imin(prc->m_Qc, prc->PAveFrameQP + 6);
+   else
+     prc->m_Qc = imin(prc->m_Qc, prc->PAveFrameQP + 3);
+ 
+   prc->TotalFrameQP +=prc->m_Qc;
+   prc->NumberofBasicUnit--;
+   if(prc->NumberofBasicUnit==0)
+   {
+     if((!topfield)||(generic_RC->FieldControl==0))
+     {
+       /*frame coding or field coding*/
+       if((active_sps->frame_mbs_only_flag)||(input->PicInterlace==FIELD_CODING))
+       {
+         PAverageQP=(int)((double)prc->TotalFrameQP/(double)prc->TotalNumberofBasicUnit+0.5);
+         if (generic_RC->NumberofPPicture == (input->intra_period - 2))
+           prc->QPLastPFrame = PAverageQP;
+ 
+         generic_RC->TotalQpforPPicture +=PAverageQP;
+         if(prc->GOPOverdue==TRUE)
+         {
+           prc->PrevLastQP=prc->CurrLastQP+1;
+           prc->CurrLastQP=PAverageQP;
+         }
+         else
+         {
+           if((generic_RC->NumberofPPicture==0)&&(generic_RC->NumberofGOP>1))
+           {
+             prc->PrevLastQP=prc->CurrLastQP;
+             prc->CurrLastQP=PAverageQP;
+           }
+           else if(generic_RC->NumberofPPicture>0)
+           {
+             prc->PrevLastQP=prc->CurrLastQP+1;
+             prc->CurrLastQP=PAverageQP;
+           }
+         }
+         prc->PAveFrameQP=PAverageQP;
+         prc->PAveHeaderBits3=prc->PAveHeaderBits2;
+       }
+       /*adaptive field/frame coding*/
+       else if((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))
+       {
+         if(generic_RC->FieldControl==0)
+         {
+           PAverageQP=(int)((double)prc->TotalFrameQP/(double)prc->TotalNumberofBasicUnit+0.5);
+           prc->FrameQPBuffer=PAverageQP;
+           prc->FrameAveHeaderBits=prc->PAveHeaderBits2;
+         }
+         else
+         {
+           PAverageQP=(int)((double)prc->TotalFrameQP/(double)prc->TotalNumberofBasicUnit+0.5);
+           prc->FieldQPBuffer=PAverageQP;
+           prc->FieldAveHeaderBits=prc->PAveHeaderBits2;
+         }
+       }
+     }
+   }
+   if(prc->GOPOverdue==TRUE)
+     prc->Pm_Qp=prc->PAveFrameQP;
+   else
+     prc->Pm_Qp=prc->m_Qc;
+ 
+   return prc->m_Qc;
+ }
+ 
+ int updateFirstBU( rc_quadratic *prc, int topfield )
+ {
+   /*adaptive field/frame coding*/
+   if(((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))&&(generic_RC->FieldControl==0))
+   {
+     /*previous choice is frame coding*/
+     if(generic_RC->FieldFrame==1)
+     {
+       if(generic_RC->NumberofPPicture>0)
+         generic_RC->TotalQpforPPicture +=prc->FrameQPBuffer;
+       prc->PAveFrameQP=prc->FrameQPBuffer;
+       prc->PAveHeaderBits3=prc->FrameAveHeaderBits;
+     }
+     /*previous choice is field coding*/
+     else
+     {
+       if(generic_RC->NumberofPPicture>0)
+         generic_RC->TotalQpforPPicture +=prc->FieldQPBuffer;
+       prc->PAveFrameQP=prc->FieldQPBuffer;
+       prc->PAveHeaderBits3=prc->FieldAveHeaderBits;
+     }
+   }
+ 
+   if(prc->Target<=0)
+   {
+     prc->m_Qc = prc->PAveFrameQP + 2;
+     if(prc->m_Qc > prc->RC_MAX_QUANT)
+       prc->m_Qc = prc->RC_MAX_QUANT;
+ 
+     if(topfield||(generic_RC->FieldControl==0))
+       prc->GOPOverdue=TRUE;
+   }
+   else
+   {
+     prc->m_Qc=prc->PAveFrameQP;
+   }
+   prc->TotalFrameQP +=prc->m_Qc;
+   prc->NumberofBasicUnit--;
+   prc->Pm_Qp = prc->PAveFrameQP;
+ 
+   return prc->m_Qc;
+ }
+ 
+ void updateLastBU( rc_quadratic *prc, int topfield )
+ {
+   int PAverageQP;
+ 
+   if((!topfield)||(generic_RC->FieldControl==0))
+   {
+     /*frame coding or field coding*/
+     if((active_sps->frame_mbs_only_flag)||(input->PicInterlace==FIELD_CODING))
+     {
+       PAverageQP=(int)((double)prc->TotalFrameQP/(double) prc->TotalNumberofBasicUnit+0.5);
+       if (generic_RC->NumberofPPicture == (input->intra_period - 2))
+         prc->QPLastPFrame = PAverageQP;
+ 
+       generic_RC->TotalQpforPPicture +=PAverageQP;
+       prc->PrevLastQP=prc->CurrLastQP;
+       prc->CurrLastQP=PAverageQP;
+       prc->PAveFrameQP=PAverageQP;
+       prc->PAveHeaderBits3=prc->PAveHeaderBits2;
+     }
+     else if((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))
+     {
+       if(generic_RC->FieldControl==0)
+       {
+         PAverageQP=(int)((double) prc->TotalFrameQP/(double)prc->TotalNumberofBasicUnit+0.5);
+         prc->FrameQPBuffer=PAverageQP;
+         prc->FrameAveHeaderBits=prc->PAveHeaderBits2;
+       }
+       else
+       {
+         PAverageQP=(int)((double) prc->TotalFrameQP/(double) prc->TotalNumberofBasicUnit+0.5);
+         prc->FieldQPBuffer=PAverageQP;
+         prc->FieldAveHeaderBits=prc->PAveHeaderBits2;
+       }
+     }
+   }
+ }
+ 
+ void predictCurrPicMAD( rc_quadratic *prc )
+ {
+   int i;
+   if(((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))&&(generic_RC->FieldControl==1))
+   {
+     prc->CurrentFrameMAD=prc->MADPictureC1*prc->FCBUPFMAD[prc->TotalNumberofBasicUnit-prc->NumberofBasicUnit]+prc->MADPictureC2;
+     prc->TotalBUMAD=0;
+     for(i=prc->TotalNumberofBasicUnit-1; i>=(prc->TotalNumberofBasicUnit-prc->NumberofBasicUnit);i--)
+     {
+       prc->CurrentBUMAD=prc->MADPictureC1*prc->FCBUPFMAD[i]+prc->MADPictureC2;
+       prc->TotalBUMAD +=prc->CurrentBUMAD*prc->CurrentBUMAD;
+     }
+   }
+   else
+   {
+     prc->CurrentFrameMAD=prc->MADPictureC1*prc->BUPFMAD[prc->TotalNumberofBasicUnit-prc->NumberofBasicUnit]+prc->MADPictureC2;
+     prc->TotalBUMAD=0;
+     for(i=prc->TotalNumberofBasicUnit-1; i>=(prc->TotalNumberofBasicUnit-prc->NumberofBasicUnit);i--)
+     {
+       prc->CurrentBUMAD=prc->MADPictureC1*prc->BUPFMAD[i]+prc->MADPictureC2;
+       prc->TotalBUMAD +=prc->CurrentBUMAD*prc->CurrentBUMAD;
+     }
+   }
+ }
+ 
+ void updateModelQPBU( rc_quadratic *prc, int topfield, int m_Qp )
+ {
+   double dtmp, m_Qstep;
+   int m_Bits;
+   /*compute the total number of bits for the current basic unit*/
+   m_Bits =(int)(prc->Target * prc->CurrentFrameMAD * prc->CurrentFrameMAD / prc->TotalBUMAD);
+   /*compute the number of texture bits*/
+   m_Bits -=prc->PAveHeaderBits2;
+ 
+   m_Bits=imax(m_Bits,(int)(prc->bit_rate/(MINVALUE*prc->frame_rate*prc->TotalNumberofBasicUnit)));
+ 
+   dtmp = prc->CurrentFrameMAD * prc->CurrentFrameMAD * prc->m_X1 * prc->m_X1 \
+     + 4 * prc->m_X2 * prc->CurrentFrameMAD * m_Bits;
+   if ((prc->m_X2 == 0.0) || (dtmp < 0) || ((sqrt (dtmp) - prc->m_X1 * prc->CurrentFrameMAD) <= 0.0))  // fall back 1st order mode
+     m_Qstep = (float)(prc->m_X1 * prc->CurrentFrameMAD / (double) m_Bits);
+   else // 2nd order mode
+     m_Qstep = (float) ((2 * prc->m_X2 * prc->CurrentFrameMAD) / (sqrt (dtmp) - prc->m_X1 * prc->CurrentFrameMAD));
+ 
+   prc->m_Qc = Qstep2QP(m_Qstep);
+   prc->m_Qc = imin(m_Qp+prc->DDquant,  prc->m_Qc); // control variation
+ 
+   if(input->basicunit>=prc->MBPerRow)
+     prc->m_Qc = imin(prc->PAveFrameQP+6, prc->m_Qc);
+   else
+     prc->m_Qc = imin(prc->PAveFrameQP+3, prc->m_Qc);
+ 
+   prc->m_Qc = iClip3(m_Qp-prc->DDquant, prc->RC_MAX_QUANT, prc->m_Qc); // clipping
+   if(input->basicunit>=prc->MBPerRow)
+     prc->m_Qc = imax(prc->PAveFrameQP-6, prc->m_Qc);
+   else
+     prc->m_Qc = imax(prc->PAveFrameQP-3, prc->m_Qc);
+ 
+   prc->m_Qc = imax(prc->RC_MIN_QUANT, prc->m_Qc);
+ }
+ 
+ void updateQPInterlaceBU( rc_quadratic *prc )
+ {
+   /*previous choice is frame coding*/
+   if(generic_RC->FieldFrame==1)
+   {
+     generic_RC->TotalQpforPPicture +=prc->FrameQPBuffer;
+     prc->Pm_Qp=prc->FrameQPBuffer;
+   }
+   /*previous choice is field coding*/
+   else
+   {
+     generic_RC->TotalQpforPPicture +=prc->FieldQPBuffer;
+     prc->Pm_Qp=prc->FieldQPBuffer;
+   }
+ }
+ 
+ void updateModelQPFrame( rc_quadratic *prc, int m_Bits )
+ {
+   double dtmp, m_Qstep;
+ 
+   dtmp = prc->CurrentFrameMAD * prc->m_X1 * prc->CurrentFrameMAD * prc->m_X1
+     + 4 * prc->m_X2 * prc->CurrentFrameMAD * m_Bits;
+   if ((prc->m_X2 == 0.0) || (dtmp < 0) || ((sqrt (dtmp) - prc->m_X1 * prc->CurrentFrameMAD) <= 0.0)) // fall back 1st order mode
+     m_Qstep = (float) (prc->m_X1 * prc->CurrentFrameMAD / (double) m_Bits);
+   else // 2nd order mode
+     m_Qstep = (float) ((2 * prc->m_X2 * prc->CurrentFrameMAD) / (sqrt (dtmp) - prc->m_X1 * prc->CurrentFrameMAD));
+ 
+   prc->m_Qc = Qstep2QP(m_Qstep);
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/rc_quadratic.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/rc_quadratic.h:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/rc_quadratic.h	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,164 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file
+  *    ratectl.h
+  *
+  * \author
+  *    Zhengguo LI
+  *
+  * \date
+  *    14 Jan 2003
+  *
+  * \brief
+  *    Headerfile for rate control
+  **************************************************************************
+  */
+ 
+ #ifndef _RC_QUADRATIC_H_
+ #define _RC_QUADRATIC_H_
+ 
+ #include "ratectl.h"
+ 
+ #define RC_MODEL_HISTORY 21
+ 
+ typedef struct
+ {
+   float  bit_rate;
+   float  frame_rate;
+   float  PrevBitRate;           //LIZG  25/10/2002
+   double GAMMAP;                //LIZG, JVT019r1
+   double BETAP;                 //LIZG, JVT019r1
+   double GOPTargetBufferLevel;
+   double TargetBufferLevel;     //LIZG 25/10/2002
+   double AveWp;
+   double AveWb;
+   int    RC_MAX_QUANT;          //LIZG 28/10/2002
+   int    RC_MIN_QUANT;          //LIZG 28/10/2002
+   int    MyInitialQp;
+   int    PAverageQp;
+   /*LIZG JVT50V2 distortion prediction model*/
+   /*coefficients of the prediction model*/
+   double PreviousPictureMAD;
+   double MADPictureC1;
+   double MADPictureC2;
+   double PMADPictureC1;
+   double PMADPictureC2;
+   /* LIZG JVT50V2 picture layer MAD */
+   double PPictureMAD [RC_MODEL_HISTORY];
+   double PictureMAD  [RC_MODEL_HISTORY];
+   double ReferenceMAD[RC_MODEL_HISTORY];
+   double m_rgQp      [RC_MODEL_HISTORY];
+   double m_rgRp      [RC_MODEL_HISTORY];
+   double Pm_rgQp     [RC_MODEL_HISTORY];
+   double Pm_rgRp     [RC_MODEL_HISTORY];
+ 
+   double m_X1;
+   double m_X2;
+   double Pm_X1;
+   double Pm_X2;
+   int    Pm_Qp;
+   int    Pm_Hp;
+ 
+   int    MADm_windowSize;
+   int    m_windowSize;
+   int    m_Qc;
+ 
+   int    PPreHeader;
+   int    PrevLastQP; // QP of the second-to-last coded frame in the primary layer
+   int    CurrLastQP; // QP of the last coded frame in the primary layer
+   int    NumberofBFrames;
+   /*basic unit layer rate control*/
+   int    TotalFrameQP;
+   int    NumberofBasicUnit;
+   int    PAveHeaderBits1;
+   int    PAveHeaderBits2;
+   int    PAveHeaderBits3;
+   int    PAveFrameQP;
+   int    TotalNumberofBasicUnit;
+   int    CodedBasicUnit;
+   double CurrentFrameMAD;
+   double CurrentBUMAD;
+   double TotalBUMAD;
+   double PreviousFrameMAD;
+   double PreviousWholeFrameMAD;
+ 
+   int    DDquant;
+   unsigned int    MBPerRow;
+   int    QPLastPFrame;
+   int    QPLastGOP;
+ 
+   /* adaptive field/frame coding*/
+   int    FieldQPBuffer;
+   int    FrameQPBuffer;
+   int    FrameAveHeaderBits;
+   int    FieldAveHeaderBits;
+   double *BUPFMAD;
+   double *BUCFMAD;
+   double *FCBUCFMAD;
+   double *FCBUPFMAD;
+ 
+   Boolean GOPOverdue;
+   int64   Iprev_bits;
+   int64   Pprev_bits;
+ 
+   /* rate control variables */
+   int    Xp, Xb;
+   int    Target;
+   int    TargetField;
+   int    Np, Nb, bits_topfield;
+   //HRD consideration
+   int    UpperBound1, UpperBound2, LowerBound;
+   double Wp, Wb; // complexity weights
+   double DeltaP;
+   int    TotalPFrame;
+   int    PDuantQp;
+ } rc_quadratic;
+ 
+ // rate control functions
+ // init/copy
+ void rc_alloc   ( rc_quadratic **prc );
+ void rc_free    ( rc_quadratic **prc );
+ void copy_rc_jvt( rc_quadratic *dst, rc_quadratic *src );
+ 
+ // rate control (externally visible)
+ void rc_init_seq   (rc_quadratic *prc);
+ void rc_init_GOP   (rc_quadratic *prc, int np, int nb);
+ void rc_update_pict_frame(rc_quadratic *prc, int nbits);
+ void rc_init_pict  (rc_quadratic *prc, int fieldpic,int topfield, int targetcomputation, float mult);
+ void rc_update_pict(rc_quadratic *prc, int nbits);
+ 
+ void updateQPInterlace( rc_quadratic *prc );
+ void updateQPNonPicAFF( rc_quadratic *prc );
+ void updateBottomField( rc_quadratic *prc );
+ int  updateFirstP( rc_quadratic *prc, int topfield );
+ int  updateNegativeTarget( rc_quadratic *prc, int topfield, int m_Qp );
+ int  updateFirstBU( rc_quadratic *prc, int topfield );
+ void updateLastBU( rc_quadratic *prc, int topfield );
+ void predictCurrPicMAD( rc_quadratic *prc );
+ void updateModelQPBU( rc_quadratic *prc, int topfield, int m_Qp );
+ void updateQPInterlaceBU( rc_quadratic *prc );
+ void updateModelQPFrame( rc_quadratic *prc, int m_Bits );
+ 
+ void updateRCModel (rc_quadratic *prc);
+ int  (*updateQP)(rc_quadratic *prc, int topfield);
+ int  updateQPRC0(rc_quadratic *prc, int topfield);
+ int  updateQPRC1(rc_quadratic *prc, int topfield);
+ int  updateQPRC2(rc_quadratic *prc, int topfield);
+ int  updateQPRC3(rc_quadratic *prc, int topfield);
+ 
+ 
+ // internal functions
+ void updateMADModel   (rc_quadratic *prc);
+ void RCModelEstimator (rc_quadratic *prc, int n_windowSize, Boolean *m_rgRejected);
+ void MADModelEstimator(rc_quadratic *prc, int n_windowSize, Boolean *PictureRejected);
+ int  updateComplexity( rc_quadratic *prc, Boolean is_updated, int nbits );
+ void updatePparams( rc_quadratic *prc, int complexity );
+ void updateBparams( rc_quadratic *prc, int complexity );
+ 
+ // rate control CURRENT pointers
+ rc_quadratic *quadratic_RC;
+ // rate control object pointers for RDPictureDecision buffering...
+ rc_quadratic *quadratic_RC_init, *quadratic_RC_best;
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/rdopt.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/rdopt.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/rdopt.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,3163 @@
+
+/*!
+ ***************************************************************************
+ * \file rdopt.c
+ *
+ * \brief
+ *    Rate-Distortion optimized mode decision
+ *
+ * \author
+ *    - Heiko Schwarz              <hschwarz at hhi.de>
+ *    - Valeri George              <george at hhi.de>
+ *    - Lowell Winger              <lwinger at lsil.com>
+ *    - Alexis Michael Tourapis    <alexismt at ieee.org>
+ * \date
+ *    12. April 2001
+ **************************************************************************
+ */
+
+#include <stdlib.h>
+#include <math.h>
+#include <assert.h>
+#include <limits.h>
+#include <memory.h>
+#include <string.h>
+
+#include "global.h"
+
+#include "rdopt_coding_state.h"
+#include "memalloc.h"
+#include "mb_access.h"
+#include "elements.h"
+#include "intrarefresh.h"
+#include "image.h"
+#include "transform8x8.h"
+#include "cabac.h"
+#include "vlc.h"
+#include "me_umhex.h"
+#include "ratectl.h"            // head file for rate control
+#include "mode_decision.h"
+#include "fmo.h"
+#include "macroblock.h"
+#include "symbol.h"
+
+
+imgpel pred[16][16];
+
+#define FASTMODE 1
+//#define RESET_STATE
+
+extern const int LEVELMVLIMIT[17][6];
+extern int   QP2QUANT[40];
+
+const int AdaptRndCrPos[2][5] =
+{
+  //  P,   B,   I,  SP,  SI
+  {   4,   7,   1,   4,   1}, // Intra MB
+  {  10,  13,  10,  10,  10}  // Inter MB
+};
+
+const int AdaptRndPos[4][5] =
+{
+  //  P,   B,   I,  SP,  SI
+  {   3,   6,   0,   3,   0}, // 4x4 Intra MB
+  {   1,   2,   0,   1,   2}, // 8x8 Intra MB
+  {   9,  12,   9,   9,   9}, // 4x4 Inter MB
+  {   3,   4,   3,   3,   3}, // 8x8 Inter MB
+};
+
+imgpel   rec_mbY[16][16], rec_mbU[16][16], rec_mbV[16][16];    // reconstruction values
+
+int lrec_rec[16][16],lrec_rec_U[16][16],lrec_rec_V[16][16]; // store the transf. and quantized coefficients for SP frames
+
+static int diff[16];
+static int diff4x4[64];
+static int diff8x8[64];
+RD_8x8DATA tr4x4, tr8x8;
+
+int   **bestInterFAdjust4x4=NULL, **bestIntraFAdjust4x4=NULL;
+int   **bestInterFAdjust8x8=NULL, **bestIntraFAdjust8x8=NULL;
+int   ***bestInterFAdjust4x4Cr=NULL, ***bestIntraFAdjust4x4Cr=NULL;
+int   **fadjust8x8=NULL, **fadjust4x4=NULL, ***fadjust4x4Cr=NULL, ***fadjust8x8Cr=NULL;
+
+int   ****cofAC=NULL, ****cofAC8x8=NULL;        // [8x8block][4x4block][level/run][scan_pos]
+int   ***cofDC=NULL;                       // [yuv][level/run][scan_pos]
+int   **cofAC4x4=NULL, ****cofAC4x4intern=NULL; // [level/run][scan_pos]
+int   cbp, cbp8x8, cnt_nonz_8x8;
+int64 cbp_blk;
+int   cbp_blk8x8;
+char  frefframe[4][4], brefframe[4][4];
+int   b8mode[4], b8pdir[4];
+short best8x8mode [4];                // [block]
+char  best8x8pdir  [MAXMODE][4];       // [mode][block]
+char  best8x8fwref [MAXMODE][4];       // [mode][block]
+char  best8x8bwref [MAXMODE][4];       // [mode][block]
+
+
+CSptr cs_mb=NULL, cs_b8=NULL, cs_cm=NULL, cs_imb=NULL, cs_ib8=NULL, cs_ib4=NULL, cs_pc=NULL;
+int   best_c_imode;
+int   best_i16offset;
+short best_mode;
+short  bi_pred_me;
+
+//mixed transform sizes definitions
+int   luma_transform_size_8x8_flag;
+
+short all_mv8x8[2][2][4][4][2];       //[8x8_data/temp_data][LIST][block_x][block_y][MVx/MVy]
+short pred_mv8x8[2][2][4][4][2];
+
+int   ****cofAC_8x8ts = NULL;        // [8x8block][4x4block][level/run][scan_pos]
+
+int64    cbp_blk8_8x8ts;
+int      cbp8_8x8ts;
+int      cost8_8x8ts;
+int      cnt_nonz8_8x8ts;
+
+// adaptive langrangian parameters
+double mb16x16_cost;
+double lambda_mf_factor;
+
+void StoreMV8x8(int dir);
+void RestoreMV8x8(int dir);
+// end of mixed transform sizes definitions
+
+//Adaptive Rounding update function
+void update_offset_params(int mode, int luma_transform_size_8x8_flag);
+
+char  b4_ipredmode[16], b4_intra_pred_modes[16];
+
+/*!
+ ************************************************************************
+ * \brief
+ *    delete structure for RD-optimized mode decision
+ ************************************************************************
+ */
+void clear_rdopt ()
+{
+  free_mem_DCcoeff (cofDC);
+  free_mem_ACcoeff (cofAC);
+  free_mem_ACcoeff (cofAC8x8);
+  free_mem_ACcoeff (cofAC4x4intern);
+
+  if (input->Transform8x8Mode)
+  {
+    free_mem_ACcoeff (cofAC_8x8ts);
+  }
+
+  if (input->AdaptiveRounding)
+  {
+    free_mem2Dint(bestInterFAdjust4x4);
+    free_mem2Dint(bestIntraFAdjust4x4);
+    free_mem2Dint(bestInterFAdjust8x8);
+    free_mem2Dint(bestIntraFAdjust8x8);
+    free_mem3Dint(bestInterFAdjust4x4Cr, 2);
+    free_mem3Dint(bestIntraFAdjust4x4Cr, 2);
+    free_mem2Dint(fadjust8x8);
+    free_mem2Dint(fadjust4x4);
+    free_mem3Dint(fadjust4x4Cr, 2);
+    free_mem3Dint(fadjust8x8Cr, 2);
+  }
+
+  // structure for saving the coding state
+  delete_coding_state (cs_mb);
+  delete_coding_state (cs_b8);
+  delete_coding_state (cs_cm);
+  delete_coding_state (cs_imb);
+  delete_coding_state (cs_ib8);
+  delete_coding_state (cs_ib4);
+  delete_coding_state (cs_pc);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    create structure for RD-optimized mode decision
+ ************************************************************************
+ */
+void init_rdopt ()
+{
+  rdopt = NULL;
+
+  get_mem_DCcoeff (&cofDC);
+  get_mem_ACcoeff (&cofAC);
+  get_mem_ACcoeff (&cofAC8x8);
+  get_mem_ACcoeff (&cofAC4x4intern);
+  cofAC4x4 = cofAC4x4intern[0][0];
+
+  if (input->Transform8x8Mode)
+  {
+    get_mem_ACcoeff (&cofAC_8x8ts);
+  }
+
+  switch (input->rdopt)
+  {
+  case 0:
+    encode_one_macroblock = encode_one_macroblock_low;
+    break;
+  case 1:
+    encode_one_macroblock = encode_one_macroblock_high;
+    break;
+  case 2:
+    encode_one_macroblock = encode_one_macroblock_highfast;
+    break;
+  case 3:
+    encode_one_macroblock = encode_one_macroblock_highloss;
+    break;
+  default:
+    encode_one_macroblock = encode_one_macroblock_high;
+    break;
+  }
+  if (input->AdaptiveRounding)
+  {
+    get_mem2Dint(&bestInterFAdjust4x4, 16, 16);
+    get_mem2Dint(&bestIntraFAdjust4x4, 16, 16);
+    get_mem2Dint(&bestInterFAdjust8x8, 16, 16);
+    get_mem2Dint(&bestIntraFAdjust8x8, 16, 16);
+    get_mem3Dint(&bestInterFAdjust4x4Cr, 2, img->mb_cr_size_y, img->mb_cr_size_x);
+    get_mem3Dint(&bestIntraFAdjust4x4Cr, 2, img->mb_cr_size_y, img->mb_cr_size_x);
+    get_mem2Dint(&fadjust8x8, 16, 16);
+    get_mem2Dint(&fadjust4x4, 16, 16);
+    get_mem3Dint(&fadjust4x4Cr, 2, img->mb_cr_size_y, img->mb_cr_size_x);
+    get_mem3Dint(&fadjust8x8Cr, 2, img->mb_cr_size_y, img->mb_cr_size_x);
+  }
+
+  // structure for saving the coding state
+  cs_mb  = create_coding_state ();
+  cs_b8  = create_coding_state ();
+  cs_cm  = create_coding_state ();
+  cs_imb = create_coding_state ();
+  cs_ib8 = create_coding_state ();
+  cs_ib4 = create_coding_state ();
+  cs_pc  = create_coding_state ();
+
+  if (input->CtxAdptLagrangeMult == 1)
+  {
+    mb16x16_cost = CALM_MF_FACTOR_THRESHOLD;
+    lambda_mf_factor = 1.0;
+  }
+}
+
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Updates the pixel map that shows, which reference frames are reliable for
+ *    each MB-area of the picture.
+ *
+ * \note
+ *    The new values of the pixel_map are taken from the temporary buffer refresh_map
+ *
+ *************************************************************************************
+ */
+void UpdatePixelMap()
+{
+  int mx,my,y,x,i,j;
+  if (img->type==I_SLICE)
+  {
+    for (y=0; y<img->height; y++)
+      for (x=0; x<img->width; x++)
+      {
+        pixel_map[y][x]=1;
+      }
+  }
+  else
+  {
+    for (my=0; my<img->height >> 3; my++)
+      for (mx=0; mx<img->width >> 3;  mx++)
+      {
+        j = my*8 + 8;
+        i = mx*8 + 8;
+        if (refresh_map[my][mx])
+        {
+          for (y=my*8; y<j; y++)
+            for (x=mx*8; x<i; x++)
+              pixel_map[y][x] = 1;
+        }
+        else
+        {
+          for (y=my*8; y<j; y++)
+            for (x=mx*8; x<i; x++)
+            {
+              pixel_map[y][x] = imin(pixel_map[y][x] + 1, input->num_ref_frames+1);
+            }
+        }
+     }
+  }
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Checks if a given reference frame is reliable for the current
+ *    macroblock, given the motion vectors that the motion search has
+ *    returned.
+ *
+ * \return
+ *    If the return value is 1, the reference frame is reliable. If it
+ *    is 0, then it is not reliable.
+ *
+ * \note
+ *    A specific area in each reference frame is assumed to be unreliable
+ *    if the same area has been intra-refreshed in a subsequent frame.
+ *    The information about intra-refreshed areas is kept in the pixel_map.
+ *
+ *************************************************************************************
+ */
+int CheckReliabilityOfRef (int block, int list_idx, int ref, int mode)
+{
+  int y,x, block_y, block_x, dy, dx, y_pos, x_pos, yy, xx, pres_x, pres_y;
+  int maxold_x  = img->width-1;
+  int maxold_y  = img->height-1;
+  int ref_frame = ref+1;
+
+  int by0 = (mode>=4?2*(block >> 1):mode==2?2*block:0);
+  int by1 = by0 + (mode>=4||mode==2?2:4);
+  int bx0 = (mode>=4?2*(block & 0x01):mode==3?2*block:0);
+  int bx1 = bx0 + (mode>=4||mode==3?2:4);
+
+  for (block_y=by0; block_y<by1; block_y++)
+  {
+    for (block_x=bx0; block_x<bx1; block_x++)
+    {
+      y_pos  = img->all_mv[block_y][block_x][list_idx][ref][mode][1];
+      y_pos += (img->block_y + block_y) * BLOCK_SIZE * 4;
+      x_pos  = img->all_mv[block_y][block_x][list_idx][ref][mode][0];
+      x_pos += (img->block_x + block_x) * BLOCK_SIZE * 4;
+
+      /* Here we specify which pixels of the reference frame influence
+      the reference values and check their reliability. This is
+      based on the function Get_Reference_Pixel */
+
+      dy = y_pos & 3;
+      dx = x_pos & 3;
+
+      y_pos = (y_pos-dy) >> 2;
+      x_pos = (x_pos-dx) >> 2;
+
+      if (dy==0 && dx==0) //full-pel
+      {
+        for (y=y_pos ; y < y_pos + BLOCK_SIZE ; y++)
+          for (x=x_pos ; x < x_pos + BLOCK_SIZE ; x++)
+            if (pixel_map[iClip3(0,maxold_y,y)][iClip3(0,maxold_x,x)] < ref_frame)
+              return 0;
+      }
+      else  /* other positions */
+      {
+        if (dy == 0)
+        {
+          for (y = y_pos ; y < y_pos + BLOCK_SIZE ; y++)
+          {
+            pres_y = iClip3(0,maxold_y,y);
+            for (x = x_pos ; x < x_pos + BLOCK_SIZE ; x++)
+            {
+              for(xx = -2 ; xx < 4 ; xx++) {
+                pres_x = iClip3(0, maxold_x, x + xx);
+                if (pixel_map[pres_y][pres_x] < ref_frame)
+                  return 0;
+              }
+            }
+          }
+        }
+        else if (dx == 0)
+        {
+          for (y = y_pos ; y < y_pos + BLOCK_SIZE ; y++)
+            for (x=x_pos ; x < x_pos + BLOCK_SIZE ; x++)
+            {
+              pres_x = iClip3(0,maxold_x,x);
+              for(yy=-2;yy<4;yy++) {
+                pres_y = iClip3(0,maxold_y, yy + y);
+                if (pixel_map[pres_y][pres_x] < ref_frame)
+                  return 0;
+              }
+            }
+        }
+        else if (dx == 2)
+        {
+          for (y = y_pos ; y < y_pos + BLOCK_SIZE ; y++)
+            for (x = x_pos ; x < x_pos + BLOCK_SIZE ; x++)
+            {
+              for(yy=-2;yy<4;yy++) {
+                pres_y = iClip3(0,maxold_y, yy + y);
+                for(xx=-2;xx<4;xx++) {
+                  pres_x = iClip3(0,maxold_x, xx + x);
+                  if (pixel_map[pres_y][pres_x] < ref_frame)
+                    return 0;
+                }
+              }
+            }
+        }
+        else if (dy == 2)
+        {
+          for (y = y_pos ; y < y_pos + BLOCK_SIZE ; y++)
+            for (x = x_pos ; x < x_pos + BLOCK_SIZE ; x++)
+            {
+              for(xx=-2;xx<4;xx++) {
+                pres_x = iClip3(0,maxold_x, xx + x);
+                for(yy=-2;yy<4;yy++) {
+                  pres_y = iClip3(0,maxold_y, yy + y);
+                  if (pixel_map[pres_y][pres_x] < ref_frame)
+                    return 0;
+                }
+              }
+            }
+        }
+        else
+        {
+          for (y = y_pos ; y < y_pos + BLOCK_SIZE ; y++)
+          {
+            for (x = x_pos ; x < x_pos + BLOCK_SIZE ; x++)
+            {
+              pres_y = dy == 1 ? y : y + 1;
+              pres_y = iClip3(0,maxold_y,pres_y);
+
+              for(xx=-2;xx<4;xx++)
+              {
+                pres_x = iClip3(0,maxold_x,xx + x);
+                if (pixel_map[pres_y][pres_x] < ref_frame)
+                  return 0;
+              }
+
+              pres_x = dx == 1 ? x : x + 1;
+              pres_x = iClip3(0,maxold_x,pres_x);
+
+              for(yy=-2;yy<4;yy++)
+              {
+                pres_y = iClip3(0,maxold_y, yy + y);
+                if (pixel_map[pres_y][pres_x] < ref_frame)
+                  return 0;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return 1;
+}
+
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    R-D Cost for an 4x4 Intra block
+ *************************************************************************************
+ */
+double RDCost_for_4x4IntraBlocks (int*    nonzero,
+                                  int     b8,
+                                  int     b4,
+                                  int     ipmode,
+                                  double  lambda,
+                                  double  min_rdcost,
+                                  int mostProbableMode)
+{
+  double  rdcost;
+  int     dummy, x, y, rate;
+  int64   distortion  = 0;
+  int     block_x     = 8*(b8 & 0x01)+4*(b4 & 0x01);
+  int     block_y     = 8*(b8 >> 1)+4*(b4 >> 1);
+  int     pic_pix_x   = img->pix_x+block_x;
+  int     pic_pix_y   = img->pix_y+block_y;
+  int     pic_opix_y  = img->opix_y+block_y;
+  imgpel  **imgY      = enc_picture->imgY;
+
+  Slice          *currSlice    =  img->currentSlice;
+  SyntaxElement  se;
+  const int      *partMap      = assignSE2partition[input->partition_mode];
+  DataPartition  *dataPart;
+
+  //===== perform DCT, Q, IQ, IDCT, Reconstruction =====
+  dummy = 0;
+
+  if(img->type!=SP_SLICE)
+    *nonzero = dct_luma (block_x, block_y, &dummy, 1);
+  else if(!si_frame_indicator && !sp2_frame_indicator)
+  {
+    *nonzero = dct_luma_sp(block_x, block_y, &dummy);
+  }
+  else
+  {
+    *nonzero = dct_luma_sp2(block_x, block_y, &dummy);
+  }
+
+  //===== get distortion (SSD) of 4x4 block =====
+  for (y=0; y<4; y++)
+  {
+    for (x=pic_pix_x; x<pic_pix_x+4; x++)
+    {
+      distortion += img->quad [imgY_org[pic_opix_y+y][x] - imgY[pic_pix_y+y][x]];
+    }
+  }
+
+  //===== RATE for INTRA PREDICTION MODE  (SYMBOL MODE MUST BE SET TO UVLC) =====
+  se.value1 = (mostProbableMode == ipmode) ? -1 : ipmode < mostProbableMode ? ipmode : ipmode-1;
+
+  //--- set position and type ---
+  se.context = 4*b8 + b4;
+  se.type    = SE_INTRAPREDMODE;
+
+  //--- choose data partition ---
+  dataPart = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);
+  //--- encode and update rate ---
+  writeIntraPredMode (&se, dataPart);
+  rate = se.len;
+
+  //===== RATE for LUMINANCE COEFFICIENTS =====
+  if (input->symbol_mode == UVLC)
+  {
+    rate  += writeCoeff4x4_CAVLC (LUMA, b8, b4, 0);
+  }
+  else
+  {
+    rate  += writeLumaCoeff4x4_CABAC (b8, b4, 1);
+  }
+  //reset_coding_state (cs_cm);
+  rdcost = (double)distortion + lambda*(double)rate;
+
+  return rdcost;
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Mode Decision for an 4x4 Intra block
+ *************************************************************************************
+ */
+int Mode_Decision_for_4x4IntraBlocks (int  b8,  int  b4,  double  lambda,  int*  min_cost)
+{
+  int     ipmode, best_ipmode = 0, i, j, k, y, cost, dummy;
+  int     c_nz, nonzero = 0;
+  imgpel  rec4x4[4][4];
+  double  rdcost;
+  int     block_x     = 8 * (b8 & 0x01) + 4 * (b4 & 0x01);
+  int     block_y     = 8 * (b8 >> 1)   + 4 * (b4 >> 1);
+  int     pic_pix_x   = img->pix_x  + block_x;
+  int     pic_pix_y   = img->pix_y  + block_y;
+  int     pic_opix_x  = img->opix_x + block_x;
+  int     pic_opix_y  = img->opix_y + block_y;
+  int     pic_block_x = pic_pix_x >> 2;
+  int     pic_block_y = pic_pix_y >> 2;
+  double  min_rdcost  = 1e30;
+
+  int left_available, up_available, all_available;
+
+  char   upMode;
+  char   leftMode;
+  int     mostProbableMode;
+
+  PixelPos left_block;
+  PixelPos top_block;
+
+  int  lrec4x4[4][4];
+  int  fixedcost = (int) floor(4 * lambda );
+
+#ifdef BEST_NZ_COEFF
+  int best_nz_coeff = 0;
+  int best_coded_block_flag = 0;
+  int bit_pos = 1 + ((((b8>>1)<<1)+(b4>>1))<<2) + (((b8&1)<<1)+(b4&1));
+  static int64 cbp_bits;
+
+  if (b8==0 && b4==0)
+   cbp_bits = 0;
+#endif
+
+  getLuma4x4Neighbour(img->current_mb_nr, block_x - 1, block_y,   &left_block);
+  getLuma4x4Neighbour(img->current_mb_nr, block_x,     block_y-1, &top_block);
+
+  // constrained intra pred
+  if (input->UseConstrainedIntraPred)
+  {
+    left_block.available = left_block.available ? img->intra_block[left_block.mb_addr] : 0;
+    top_block.available  = top_block.available  ? img->intra_block[top_block.mb_addr]  : 0;
+  }
+
+  upMode            =  top_block.available ? img->ipredmode[top_block.pos_y ][top_block.pos_x ] : -1;
+  leftMode          = left_block.available ? img->ipredmode[left_block.pos_y][left_block.pos_x] : -1;
+
+  mostProbableMode  = (upMode < 0 || leftMode < 0) ? DC_PRED : upMode < leftMode ? upMode : leftMode;
+
+  *min_cost = INT_MAX;
+
+  //===== INTRA PREDICTION FOR 4x4 BLOCK =====
+  intrapred_luma (pic_pix_x, pic_pix_y, &left_available, &up_available, &all_available);
+
+  //===== LOOP OVER ALL 4x4 INTRA PREDICTION MODES =====
+  for (ipmode=0; ipmode<NO_INTRA_PMODE; ipmode++)
+  {
+    int available_mode =  (ipmode==DC_PRED) ||
+      ((ipmode==VERT_PRED||ipmode==VERT_LEFT_PRED||ipmode==DIAG_DOWN_LEFT_PRED) && up_available ) ||
+      ((ipmode==HOR_PRED||ipmode==HOR_UP_PRED) && left_available ) ||(all_available);
+
+    if (input->IntraDisableInterOnly==0 || img->type != I_SLICE)
+    {
+      if (input->Intra4x4ParDisable && (ipmode==VERT_PRED||ipmode==HOR_PRED))
+        continue;
+
+      if (input->Intra4x4DiagDisable && (ipmode==DIAG_DOWN_LEFT_PRED||ipmode==DIAG_DOWN_RIGHT_PRED))
+        continue;
+
+      if (input->Intra4x4DirDisable && ipmode>=VERT_RIGHT_PRED)
+        continue;
+    }
+
+    if( available_mode)
+    {
+      if (!input->rdopt)
+      {
+        for (k=j=0; j<4; j++)
+        {
+          int jj = pic_opix_y+j;
+          for (i=0; i<4; i++, k++)
+          {
+            diff[k] = imgY_org[jj][pic_opix_x+i] - img->mprr[ipmode][j][i];
+          }
+        }
+        cost  = (ipmode == mostProbableMode) ? 0 : fixedcost;
+        cost += distortion4x4 (diff);
+        if (cost < *min_cost)
+        {
+          best_ipmode = ipmode;
+          *min_cost   = cost;
+        }
+      }
+      else
+      {
+        // get prediction and prediction error
+        for (j=0; j<4; j++)
+        {
+          memcpy(&img->mpr[block_y+j][block_x], img->mprr[ipmode][j], BLOCK_SIZE * sizeof(imgpel));
+          for (i=0; i<4; i++)
+          {
+            img->m7[j][i] = (int) (imgY_org[pic_opix_y+j][pic_opix_x+i] - img->mprr[ipmode][j][i]);
+          }
+        }
+
+        //===== store the coding state =====
+        //store_coding_state (cs_cm);
+        // get and check rate-distortion cost
+#ifdef BEST_NZ_COEFF
+        img->mb_data[img->current_mb_nr].cbp_bits = cbp_bits;
+#endif
+        if ((rdcost = RDCost_for_4x4IntraBlocks (&c_nz, b8, b4, ipmode, lambda, min_rdcost, mostProbableMode)) < min_rdcost)
+        {
+          //--- set coefficients ---
+          memcpy(cofAC4x4[0],img->cofAC[b8][b4][0], 18 * sizeof(int));
+          memcpy(cofAC4x4[1],img->cofAC[b8][b4][1], 18 * sizeof(int));
+
+          //--- set reconstruction ---
+          for (y=0; y<4; y++)
+          {
+            memcpy(rec4x4[y],&enc_picture->imgY[pic_pix_y+y][pic_pix_x], BLOCK_SIZE * sizeof(imgpel));
+            if(img->type==SP_SLICE &&(!si_frame_indicator && !sp2_frame_indicator))
+              memcpy(lrec4x4[y],&lrec[pic_pix_y+y][pic_pix_x], BLOCK_SIZE * sizeof(int));// stores the mode coefficients
+          }
+          //--- flag if dct-coefficients must be coded ---
+          nonzero = c_nz;
+
+          //--- set best mode update minimum cost ---
+          min_rdcost    = rdcost;
+          best_ipmode   = ipmode;
+#ifdef BEST_NZ_COEFF
+          best_nz_coeff = img->nz_coeff [img->current_mb_nr][block_x4][block_y4];
+          best_coded_block_flag = (int)((img->mb_data[img->current_mb_nr].cbp_bits>>bit_pos)&(int64)(1));
+#endif
+          //store_coding_state (cs_ib4);
+          if (img->AdaptiveRounding)
+          {
+            for (j=0; j<4; j++)
+              memcpy(&fadjust4x4[block_y+j][block_x],&img->fadjust4x4[1][block_y+j][block_x], BLOCK_SIZE * sizeof(int));
+          }
+        }
+
+#ifndef RESET_STATE
+        reset_coding_state (cs_cm);
+#endif
+      }
+    }
+  }
+
+#ifdef BEST_NZ_COEFF
+  img->nz_coeff [img->current_mb_nr][block_x4][block_y4] = best_nz_coeff;
+  cbp_bits &= (~(int64)(1<<bit_pos));
+  cbp_bits |= (int64)(best_coded_block_flag<<bit_pos);
+#endif
+  //===== set intra mode prediction =====
+  img->ipredmode[pic_block_y][pic_block_x] = (char) best_ipmode;
+  img->mb_data[img->current_mb_nr].intra_pred_modes[4*b8+b4] =
+    (char) (mostProbableMode == best_ipmode ? -1 : (best_ipmode < mostProbableMode ? best_ipmode : best_ipmode-1));
+
+  if (!input->rdopt)
+  {
+    // get prediction and prediction error
+    for (j=0; j<4; j++)
+    {
+      int jj = pic_opix_y+j;
+      for (i=0; i<4; i++)
+      {
+        img->mpr[block_y+j][block_x+i]  = img->mprr[best_ipmode][j][i];
+        img->m7[j][i]                   = imgY_org[jj][pic_opix_x+i] - img->mprr[best_ipmode][j][i];
+      }
+    }
+    nonzero = dct_luma (block_x, block_y, &dummy, 1);
+  }
+  else
+  {
+    //===== restore coefficients =====
+    for (j=0; j<2; j++)
+    {
+      memcpy (img->cofAC[b8][b4][j],cofAC4x4[j], 18 * sizeof(int));
+    }
+
+    //===== restore reconstruction and prediction (needed if single coeffs are removed) =====
+    for (y=0; y<BLOCK_SIZE; y++)
+    {
+      memcpy (&enc_picture->imgY[pic_pix_y+y][pic_pix_x],rec4x4[y],    BLOCK_SIZE * sizeof(imgpel));
+      memcpy (&img->mpr[block_y+y][block_x],img->mprr[best_ipmode][y], BLOCK_SIZE * sizeof(imgpel));
+      if(img->type==SP_SLICE &&(!si_frame_indicator && !sp2_frame_indicator))
+        memcpy (&lrec[pic_pix_y+y][pic_pix_x],lrec4x4[y], BLOCK_SIZE * sizeof(int));//restore coefficients when encoding primary SP frame
+    }
+
+    if (img->AdaptiveRounding)
+    {
+      for (j=0; j<BLOCK_SIZE; j++)
+        memcpy (&img->fadjust4x4[1][block_y+j][block_x],&fadjust4x4[block_y+j][block_x], BLOCK_SIZE * sizeof(int));
+    }
+
+  }
+  return nonzero;
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Mode Decision for an 8x8 Intra block
+ *************************************************************************************
+ */
+int Mode_Decision_for_8x8IntraBlocks(int b8,double lambda,int *cost)
+{
+  int  nonzero=0, b4;
+  int  cost4x4;
+
+  *cost = (int)floor(6.0 * lambda + 0.4999);
+
+  for (b4=0; b4<4; b4++)
+  {
+    if (Mode_Decision_for_4x4IntraBlocks (b8, b4, lambda, &cost4x4))
+    {
+      nonzero        = 1;
+    }
+    *cost += cost4x4;
+  }
+#ifdef RESET_STATE
+  //reset_coding_state (cs_cm);
+#endif
+
+  return nonzero;
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    4x4 Intra mode decision for an macroblock
+ *************************************************************************************
+ */
+int Mode_Decision_for_Intra4x4Macroblock (double lambda,  int* cost)
+{
+  int  cbp=0, b8, cost8x8;
+
+  for (*cost=0, b8=0; b8<4; b8++)
+  {
+    if (Mode_Decision_for_8x8IntraBlocks (b8, lambda, &cost8x8))
+    {
+      cbp |= (1<<b8);
+    }
+    *cost += cost8x8;
+  }
+
+  return cbp;
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    R-D Cost for an 8x8 Partition
+ *************************************************************************************
+ */
+double RDCost_for_8x8blocks (int*    cnt_nonz,   // --> number of nonzero coefficients
+                             int64*  cbp_blk,    // --> cbp blk
+                             double  lambda,     // <-- lagrange multiplier
+                             int     block,      // <-- 8x8 block number
+                             int     mode,       // <-- partitioning mode
+                             short   pdir,       // <-- prediction direction
+                             short   l0_ref,     // <-- L0 reference picture
+                             short   l1_ref)     // <-- L1 reference picture
+{
+  int  i, j, k;
+  int  rate=0;
+  int64 distortion=0;
+  int  dummy = 0, mrate;
+  int  fw_mode, bw_mode;
+  int  cbp     = 0;
+  int  pax     = 8*(block & 0x01);
+  int  pay     = 8*(block >> 1);
+  int  i0      = pax >> 2;
+  int  j0      = pay >> 2;
+  int  bframe  = (img->type==B_SLICE);
+  int  direct  = (bframe && mode==0);
+  int  b8value = B8Mode2Value (mode, pdir);
+
+  Macroblock    *currMB    = &img->mb_data[img->current_mb_nr];
+  SyntaxElement se;
+  Slice         *currSlice = img->currentSlice;
+  DataPartition *dataPart;
+  const int     *partMap   = assignSE2partition[input->partition_mode];
+
+  EncodingEnvironmentPtr eep_dp;
+
+  //=====
+  //=====  GET COEFFICIENTS, RECONSTRUCTIONS, CBP
+  //=====
+  currMB->bi_pred_me=0;
+
+  if (direct)
+  {
+    if (direct_pdir[img->block_y+j0][img->block_x+i0]<0) // mode not allowed
+      return (1e20);
+    else
+      *cnt_nonz = LumaResidualCoding8x8 (&cbp, cbp_blk, block, direct_pdir[img->block_y+j0][img->block_x+i0], 0, 0,
+      (short)imax(0,direct_ref_idx[LIST_0][img->block_y+j0][img->block_x+i0]),
+      direct_ref_idx[LIST_1][img->block_y+j0][img->block_x+i0]);
+  }
+  else
+  {
+    if (pdir == 2 && active_pps->weighted_bipred_idc == 1)
+    {
+      int weight_sum = (active_pps->weighted_bipred_idc == 1)? wbp_weight[0][l0_ref][l1_ref][0] + wbp_weight[1][l0_ref][l1_ref][0] : 0;
+      if (weight_sum < -128 ||  weight_sum > 127)
+      {
+        return (1e20);
+      }
+    }
+
+    fw_mode   = (pdir==0||pdir==2 ? mode : 0);
+    bw_mode   = (pdir==1||pdir==2 ? mode : 0);
+    *cnt_nonz = LumaResidualCoding8x8 (&cbp, cbp_blk, block, pdir, fw_mode, bw_mode, l0_ref, l1_ref);
+  }
+
+  //===== get residue =====
+  if (input->rdopt==3 && img->type!=B_SLICE)
+  {
+    // We need the reconstructed prediction residue for the simulated decoders.
+    compute_residue_b8block (block, -1);
+  }
+
+  //=====
+  //=====   GET DISTORTION
+  //=====
+  if (input->rdopt==3 && img->type!=B_SLICE)
+  {
+    for (k=0; k<input->NoOfDecoders ;k++)
+    {
+      decode_one_b8block (k, P8x8, block, mode, l0_ref);
+      for (j=img->opix_y+pay; j<img->opix_y+pay+8; j++)
+        for (i=img->opix_x+pax; i<img->opix_x+pax+8; i++)
+        {
+          distortion += img->quad[imgY_org[j][i] - decs->decY[k][j][i]];
+        }
+    }
+    distortion /= input->NoOfDecoders;
+  }
+  else
+  {
+    for (j=pay; j<pay+8; j++)
+      for (i=img->pix_x+pax; i<img->pix_x+pax+8; i++)
+      {
+        distortion += img->quad [imgY_org[img->opix_y+j][i] - enc_picture->imgY[img->pix_y+j][i]];
+      }
+  }
+
+  //=====
+  //=====   GET RATE
+  //=====
+  //----- block 8x8 mode -----
+  if (input->symbol_mode == UVLC)
+  {
+    ue_linfo (b8value, dummy, &mrate, &dummy);
+    rate += mrate;
+  }
+  else
+  {
+    se.value1  = b8value;
+    se.type    = SE_MBTYPE;
+    dataPart = &(currSlice->partArr[partMap[se.type]]);
+    writeB8_typeInfo(&se, dataPart);
+    rate += se.len;
+  }
+
+  //----- motion information -----
+  if (!direct)
+  {
+    if ((img->num_ref_idx_l0_active > 1 ) && (pdir==0 || pdir==2))
+      rate  += writeReferenceFrame (mode, i0, j0, 1, l0_ref);
+    if(img->num_ref_idx_l1_active > 1 && img->type== B_SLICE)
+    {
+      if (pdir==1 || pdir==2)
+      {
+        rate  += writeReferenceFrame (mode, i0, j0, 0, l1_ref);
+      }
+    }
+
+    if (pdir==0 || pdir==2)
+    {
+      rate  += writeMotionVector8x8 (i0, j0, i0+2, j0+2, l0_ref,LIST_0, mode);
+    }
+    if (pdir==1 || pdir==2)
+    {
+      rate  += writeMotionVector8x8 (i0, j0, i0+2, j0+2, l1_ref, LIST_1, mode);
+    }
+  }
+
+  //----- coded block pattern (for CABAC only) -----
+  if (input->symbol_mode == CABAC)
+  {
+    dataPart = &(currSlice->partArr[partMap[SE_CBP]]);
+    eep_dp   = &(dataPart->ee_cabac);
+    mrate    = arienco_bits_written (eep_dp);
+    writeCBP_BIT_CABAC (block, ((*cnt_nonz>0)?1:0), cbp8x8, currMB, 1, eep_dp);
+    mrate    = arienco_bits_written (eep_dp) - mrate;
+    rate    += mrate;
+  }
+
+  //----- luminance coefficients -----
+  if (*cnt_nonz)
+  {
+    rate += writeLumaCoeff8x8 (block, mode, currMB->luma_transform_size_8x8_flag);
+  }
+
+  return (double)distortion + lambda * (double)rate;
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Gets mode offset for intra16x16 mode
+ *************************************************************************************
+ */
+int I16Offset (int cbp, int i16mode)
+{
+  return (cbp&15?13:1) + i16mode + ((cbp&0x30)>>2);
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Sets modes and reference frames for a macroblock
+ *************************************************************************************
+ */
+void SetModesAndRefframeForBlocks (int mode)
+{
+  int i,j,k,l;
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+  int  bframe  = (img->type==B_SLICE);
+  int  block_x, block_y;
+  int  cur_ref[2];
+
+  //--- macroblock type ---
+  currMB->mb_type = mode;
+  currMB->bi_pred_me= (mode == 1 ? img->bi_pred_me[mode] : 0);
+
+  //--- block 8x8 mode and prediction direction ---
+  switch (mode)
+  {
+  case 0:
+    for(i=0;i<4;i++)
+    {
+      currMB->b8mode[i] = 0;
+      currMB->b8pdir[i] = (bframe ? direct_pdir[img->block_y + (i >> 1)*2][img->block_x + (i & 0x01)*2] : 0);
+    }
+    break;
+  case 1:
+  case 2:
+  case 3:
+    for(i=0;i<4;i++)
+    {
+      currMB->b8mode[i] = mode;
+      currMB->b8pdir[i] = best8x8pdir[mode][i];
+    }
+    break;
+  case P8x8:
+    for(i=0;i<4;i++)
+    {
+      currMB->b8mode[i]   = best8x8mode[i];
+      currMB->b8pdir[i]   = best8x8pdir[mode][i];
+    }
+    break;
+  case I4MB:
+    for(i=0;i<4;i++)
+    {
+      currMB->b8mode[i] = IBLOCK;
+      currMB->b8pdir[i] = -1;
+    }
+    break;
+  case I16MB:
+    for(i=0;i<4;i++)
+    {
+      currMB->b8mode[i] =  0;
+      currMB->b8pdir[i] = -1;
+    }
+    break;
+  case I8MB:
+    for(i=0;i<4;i++)
+    {
+      currMB->b8mode[i] = I8MB;
+      currMB->b8pdir[i] = -1;
+    }
+    //switch to 8x8 transform
+    currMB->luma_transform_size_8x8_flag = 1;
+    break;
+  case IPCM:
+    for(i=0;i<4;i++)
+    {
+      currMB->b8mode[i] = IPCM;
+      currMB->b8pdir[i] = -1;
+    }
+    currMB->luma_transform_size_8x8_flag = 0;
+    break;
+  default:
+    printf ("Unsupported mode in SetModesAndRefframeForBlocks!\n");
+    exit (1);
+  }
+
+#define IS_FW ((best8x8pdir[mode][k]==0 || best8x8pdir[mode][k]==2) && (mode!=P8x8 || best8x8mode[k]!=0 || !bframe))
+#define IS_BW ((best8x8pdir[mode][k]==1 || best8x8pdir[mode][k]==2) && (mode!=P8x8 || best8x8mode[k]!=0))
+  //--- reference frame arrays ---
+  if (mode==0 || mode==I4MB || mode==I16MB || mode==I8MB)
+  {
+    if (bframe)
+    {
+      if (!mode)
+      {
+        for (j = img->block_y; j < img->block_y + 4; j++)
+        {
+          memcpy(&enc_picture->ref_idx[LIST_0][j][img->block_x],&direct_ref_idx[LIST_0][j][img->block_x], 4 * sizeof(char));
+          memcpy(&enc_picture->ref_idx[LIST_1][j][img->block_x],&direct_ref_idx[LIST_1][j][img->block_x], 4 * sizeof(char));
+        }
+      }
+      else
+      {
+        for (j = img->block_y; j < img->block_y + 4; j++)
+        {
+          memset(&enc_picture->ref_idx[LIST_0][j][img->block_x],-1, 4 * sizeof(char));
+          memset(&enc_picture->ref_idx[LIST_1][j][img->block_x],-1, 4 * sizeof(char));
+        }
+      }
+    }
+    else
+    {
+      if (!mode)
+      {
+        for (j = img->block_y; j < img->block_y + 4; j++)
+          memset(&enc_picture->ref_idx[LIST_0][j][img->block_x],0, 4 * sizeof(char));
+      }
+      else
+      {
+        for (j = img->block_y; j < img->block_y + 4; j++)
+          memset(&enc_picture->ref_idx[LIST_0][j][img->block_x],-1, 4 * sizeof(char));
+      }
+    }
+  }
+  else
+  {
+    if (bframe)
+    {
+      for (j=0;j<4;j++)
+      {
+        block_y = img->block_y + j;
+        for (i=0;i<4;i++)
+        {
+          block_x = img->block_x + i;
+          k = 2*(j >> 1) + (i >> 1);
+          l = 2*(j & 0x01) + (i & 0x01);
+
+          if(mode == P8x8 && best8x8mode[k]==0)
+          {
+            enc_picture->ref_idx[LIST_0][block_y][block_x] = direct_ref_idx[LIST_0][block_y][block_x];
+            enc_picture->ref_idx[LIST_1][block_y][block_x] = direct_ref_idx[LIST_1][block_y][block_x];
+          }
+          else if (mode ==1 && currMB->bi_pred_me && IS_FW && IS_BW)
+          {
+            enc_picture->ref_idx[LIST_0][block_y][block_x] = 0;
+            enc_picture->ref_idx[LIST_1][block_y][block_x] = 0;
+          }
+          else
+          {
+            enc_picture->ref_idx[LIST_0][block_y][block_x] = (IS_FW ? best8x8fwref[mode][k] : -1);
+            enc_picture->ref_idx[LIST_1][block_y][block_x] = (IS_BW ? best8x8bwref[mode][k] : -1);
+          }
+        }
+      }
+    }
+    else
+    {
+      for (j=0;j<4;j++)
+      {
+        block_y = img->block_y + j;
+        for (i=0;i<4;i++)
+        {
+          block_x = img->block_x + i;
+          k = 2*(j >> 1) + (i >> 1);
+          l = 2*(j & 0x01) + (i & 0x01);
+          enc_picture->ref_idx[LIST_0][block_y][block_x] = (IS_FW ? best8x8fwref[mode][k] : -1);
+        }
+      }
+    }
+  }
+
+  if (bframe)
+  {
+
+    for (j = img->block_y; j < img->block_y + 4; j++)
+      for (i = img->block_x; i < img->block_x + 4;i++)
+      {
+        cur_ref[LIST_0] = (int) enc_picture->ref_idx[LIST_0][j][i];
+        cur_ref[LIST_1] = (int) enc_picture->ref_idx[LIST_1][j][i];
+
+        enc_picture->ref_pic_id [LIST_0][j][i] = (cur_ref[LIST_0]>=0
+          ? enc_picture->ref_pic_num[LIST_0 + currMB->list_offset][cur_ref[LIST_0]]
+          : -1);
+        enc_picture->ref_pic_id [LIST_1][j][i] = (cur_ref[LIST_1]>=0
+          ? enc_picture->ref_pic_num[LIST_1 + currMB->list_offset][cur_ref[LIST_1]]
+          : -1);
+      }
+  }
+  else
+  {
+    for (j = img->block_y; j < img->block_y + 4; j++)
+      for (i = img->block_x; i < img->block_x + 4;i++)
+      {
+        cur_ref[LIST_0] = (int) enc_picture->ref_idx[LIST_0][j][i];
+        enc_picture->ref_pic_id [LIST_0][j][i] = (cur_ref[LIST_0]>=0
+          ? enc_picture->ref_pic_num[LIST_0 + currMB->list_offset][cur_ref[LIST_0]]
+          : -1);
+      }
+  }
+
+#undef IS_FW
+#undef IS_BW
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Intra 16x16 mode decision
+ *************************************************************************************
+ */
+void Intra16x16_Mode_Decision (Macroblock* currMB, int* i16mode)
+{
+  intrapred_luma_16x16 ();   /* make intra pred for all 4 new modes */
+
+  find_sad_16x16 (i16mode);   /* get best new intra mode */
+
+  currMB->cbp = dct_luma_16x16 (*i16mode);
+}
+
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Sets Coefficients and reconstruction for an 8x8 block
+ *************************************************************************************
+ */
+void SetCoeffAndReconstruction8x8 (Macroblock* currMB)
+{
+  int block, k, j, i;
+  int cur_ref[2];
+
+  //============= MIXED TRANSFORM SIZES FOR 8x8 PARTITION ==============
+  //--------------------------------------------------------------------
+  int l;
+  int bframe = img->type==B_SLICE;
+
+  if (currMB->luma_transform_size_8x8_flag)
+  {
+
+    //============= set mode and ref. frames ==============
+    for(i = 0;i<4;i++)
+    {
+      currMB->b8mode[i]   = tr8x8.part8x8mode[i];
+      currMB->b8pdir[i]   = tr8x8.part8x8pdir[i];
+    }
+
+    if (bframe)
+    {
+      for (j = 0;j<4;j++)
+        for (i = 0;i<4;i++)
+        {
+          k = 2*(j >> 1)+(i >> 1);
+          l = 2*(j & 0x01)+(i & 0x01);
+          enc_picture->ref_idx[LIST_0][img->block_y+j][img->block_x+i] = ((currMB->b8pdir[k] & 0x01) == 0) ? tr8x8.part8x8fwref[k] : - 1;
+          enc_picture->ref_idx[LIST_1][img->block_y+j][img->block_x+i] = (currMB->b8pdir[k] > 0) ? tr8x8.part8x8bwref[k] : - 1;
+        }
+    }
+    else
+    {
+      for (j = 0;j<4;j++)
+        for (i = 0;i<4;i++)
+        {
+          k = 2*(j >> 1)+(i >> 1);
+          l = 2*(j & 0x01)+(i & 0x01);
+          enc_picture->ref_idx[LIST_0][img->block_y+j][img->block_x+i] = tr8x8.part8x8fwref[k];
+        }
+    }
+
+
+    for (j = img->block_y;j<img->block_y + BLOCK_MULTIPLE;j++)
+    {
+      for (i = img->block_x;i<img->block_x + BLOCK_MULTIPLE;i++)
+      {
+        cur_ref[LIST_0] = (int) enc_picture->ref_idx[LIST_0][j][i];
+
+        enc_picture->ref_pic_id [LIST_0][j][i] =(cur_ref[LIST_0]>=0
+        ? enc_picture->ref_pic_num[LIST_0 + currMB->list_offset][cur_ref[LIST_0]]
+        : -1);
+      }
+    }
+
+    if (bframe)
+    {
+      for (j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+      {
+        for (i = img->block_x;i<img->block_x + BLOCK_MULTIPLE;i++)
+        {
+          cur_ref[LIST_1] = (int) enc_picture->ref_idx[LIST_1][j][i];
+
+          enc_picture->ref_pic_id [LIST_1][j][i] = (cur_ref[LIST_1]>=0
+            ? enc_picture->ref_pic_num[LIST_1 + currMB->list_offset][cur_ref[LIST_1]]
+            : -1);
+        }
+
+      }
+    }
+
+    //====== set the mv's for 8x8 partition with transform size 8x8 ======
+    //save the mv data for 4x4 transform
+
+    StoreMV8x8(1);
+    //set new mv data for 8x8 transform
+    RestoreMV8x8(0);
+
+    //============= get pre-calculated data ==============
+    //restore coefficients from 8x8 transform
+
+    for (block = 0; block<4; block++)
+    {
+      for (k = 0; k<4; k++)
+        for (j = 0; j<2; j++)
+          memcpy (img->cofAC[block][k][j],cofAC_8x8ts[block][k][j], 65 * sizeof(int));
+    }
+    //restore reconstruction
+    if (cnt_nonz8_8x8ts <= _LUMA_8x8_COEFF_COST_ &&
+      ((img->qp_scaled)!=0 || img->lossless_qpprime_flag==0) &&
+      (img->type!=SP_SLICE))// modif ES added last condition (we probably never go there so is the next modification useful ? check)
+    {
+      currMB->cbp     = 0;
+      currMB->cbp_blk = 0;
+      for (j = 0; j < MB_BLOCK_SIZE; j++)
+      {
+        memcpy(&enc_picture->imgY[img->pix_y+j][img->pix_x], tr8x8.mpr8x8[j], MB_BLOCK_SIZE * sizeof(imgpel));
+        if(img->type==SP_SLICE &&(!si_frame_indicator && !sp2_frame_indicator ))
+          memcpy(&lrec[img->pix_y+j][img->pix_x],tr8x8.lrec[j], MB_BLOCK_SIZE * sizeof(int));
+      }
+    }
+    else
+    {
+      currMB->cbp     = cbp8_8x8ts;
+      currMB->cbp_blk = cbp_blk8_8x8ts;
+      for (j = 0; j < MB_BLOCK_SIZE; j++)
+      {
+        memcpy (&enc_picture->imgY[img->pix_y+j][img->pix_x],tr8x8.rec_mbY8x8[j], MB_BLOCK_SIZE * sizeof(imgpel));
+        if(img->type==SP_SLICE &&(!si_frame_indicator && !sp2_frame_indicator))
+          memcpy (&lrec[img->pix_y+j][img->pix_x],tr8x8.lrec[j], MB_BLOCK_SIZE * sizeof(int));
+      }
+    }
+  }
+  else
+  {
+    //============= get pre-calculated data ==============
+    //---------------------------------------------------
+    //--- restore coefficients ---
+    for (block = 0; block<4+img->num_blk8x8_uv; block++)
+    {
+      for (k = 0; k<4; k++)
+        for (j = 0; j<2; j++)
+          memcpy (img->cofAC[block][k][j],cofAC8x8[block][k][j], 65 * sizeof(int));
+    }
+
+    if (cnt_nonz_8x8<=5 && img->type!=SP_SLICE &&
+      ((img->qp_scaled)!=0 || img->lossless_qpprime_flag==0))
+    {
+      currMB->cbp     = 0;
+      currMB->cbp_blk = 0;
+      for (j = 0; j < MB_BLOCK_SIZE; j++)
+      {
+        memcpy (&enc_picture->imgY[img->pix_y+j][img->pix_x],tr4x4.mpr8x8[j], MB_BLOCK_SIZE * sizeof(imgpel));
+        if(img->type ==SP_SLICE &&(!si_frame_indicator && !sp2_frame_indicator))
+          memcpy (&lrec[img->pix_y+j][img->pix_x],tr4x4.lrec[j], MB_BLOCK_SIZE * sizeof(int)); // restore coeff. SP frame
+      }
+    }
+    else
+    {
+      currMB->cbp     = cbp8x8;
+      currMB->cbp_blk = cbp_blk8x8;
+      for (j = 0; j < MB_BLOCK_SIZE; j++)
+      {
+        memcpy (&enc_picture->imgY[img->pix_y+j][img->pix_x],tr4x4.rec_mbY8x8[j], MB_BLOCK_SIZE * sizeof(imgpel));
+        if(img->type==SP_SLICE &&(!si_frame_indicator && !sp2_frame_indicator))
+          memcpy (&lrec[img->pix_y+j][img->pix_x],tr4x4.lrec[j], MB_BLOCK_SIZE * sizeof(int));
+      }
+    }
+  }
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Sets motion vectors for a macroblock
+ *************************************************************************************
+ */
+void SetMotionVectorsMB (Macroblock* currMB, int bframe)
+{
+  int i, j, k, l, m, mode8, pdir8, ref, by, bx;
+  short ******all_mv  = img->all_mv;
+  short ******pred_mv = img->pred_mv;
+  int  bw_ref;
+  int jdiv, jmod;
+
+  if (!bframe)
+  {
+    for (j = 0; j<4; j++)
+    {
+      jmod = j & 0x01;
+      jdiv = j >>   1;
+      by    = img->block_y+j;
+      for (i = 0; i<4; i++)
+      {
+        mode8 = currMB->b8mode[k=2*jdiv+(i>>1)];
+        l     = 2*jmod + (i & 0x01);
+
+        bx   = img->block_x+i;
+
+        pdir8 = currMB->b8pdir[k];
+        ref    = enc_picture->ref_idx[LIST_0][by][bx];
+
+        if (pdir8>=0)
+        {
+          enc_picture->mv[LIST_0][by][bx][0] = all_mv [j][i][LIST_0][ ref][mode8][0];
+          enc_picture->mv[LIST_0][by][bx][1] = all_mv [j][i][LIST_0][ ref][mode8][1];
+        }
+        else
+        {
+          enc_picture->mv[LIST_0][by][bx][0] = 0;
+          enc_picture->mv[LIST_0][by][bx][1] = 0;
+        }
+      }
+    }
+  }
+else
+{
+  for (j = 0; j<4; j++)
+  {
+    jmod = j & 0x01;
+    jdiv = j >>   1;
+    by    = img->block_y+j;
+    for (i = 0; i<4; i++)
+    {
+      mode8 = currMB->b8mode[k=2*jdiv+(i>>1)];
+      l     = 2*jmod + (i & 0x01);
+
+      bx    = img->block_x+i;
+
+      pdir8 = currMB->b8pdir[k];
+      ref    = enc_picture->ref_idx[LIST_0][by][bx];
+      bw_ref = enc_picture->ref_idx[LIST_1][by][bx];
+
+      if (currMB->bi_pred_me && (pdir8 == 2) && currMB->mb_type==1)
+      {
+        all_mv  = currMB->bi_pred_me == 1 ? img->bipred_mv1 : img->bipred_mv2;
+        ref = 0;
+        bw_ref = 0;
+      }
+
+      if (pdir8==-1) // intra
+      {
+        enc_picture->mv[LIST_0][by][bx][0] = 0;
+        enc_picture->mv[LIST_0][by][bx][1] = 0;
+        enc_picture->mv[LIST_1][by][bx][0] = 0;
+        enc_picture->mv[LIST_1][by][bx][1] = 0;
+      }
+      else if (pdir8==0) // list 0
+      {
+        enc_picture->mv[LIST_0][by][bx][0] = all_mv [j][i][LIST_0][ ref][mode8][0];
+        enc_picture->mv[LIST_0][by][bx][1] = all_mv [j][i][LIST_0][ ref][mode8][1];
+        enc_picture->mv[LIST_1][by][bx][0] = 0;
+        enc_picture->mv[LIST_1][by][bx][1] = 0;
+        enc_picture->ref_idx[LIST_1][by][bx] = -1;
+      }
+      else if (pdir8==1) // list 1
+      {
+        enc_picture->mv[LIST_0][by][bx][0] = 0;
+        enc_picture->mv[LIST_0][by][bx][1] = 0;
+        enc_picture->ref_idx[LIST_0][by][bx] = -1;
+        enc_picture->mv[LIST_1][by][bx][0] = all_mv [j][i][LIST_1][bw_ref][mode8][0];
+        enc_picture->mv[LIST_1][by][bx][1] = all_mv [j][i][LIST_1][bw_ref][mode8][1];
+      }
+      else if (pdir8==2) // bipredictive
+      {
+        enc_picture->mv[LIST_0][by][bx][0] = all_mv [j][i][LIST_0][ ref][mode8][0];
+        enc_picture->mv[LIST_0][by][bx][1] = all_mv [j][i][LIST_0][ ref][mode8][1];
+        enc_picture->mv[LIST_1][by][bx][0] = all_mv [j][i][LIST_1][bw_ref][mode8][0];
+        enc_picture->mv[LIST_1][by][bx][1] = all_mv [j][i][LIST_1][bw_ref][mode8][1];
+      }
+      else
+      {
+        error("invalid direction mode", 255);
+      }
+    }
+  }
+}
+
+  // copy all the motion vectors into rdopt structure
+  // Can simplify this by copying the MV's of the best mode (TBD)
+  if(img->MbaffFrameFlag)
+  {
+    for(i = 0;i<4;i++)
+    {
+      for(j = 0;j<4;j++)
+      {
+        for (k = 0;k<2;k++)
+        {
+          for(l = 0;l<img->max_num_references;l++)
+          {
+            for(m = 0;m<9;m++)
+            {
+              rdopt->all_mv [j][i][k][l][m][0]  = all_mv [j][i][k][l][m][0];
+              rdopt->pred_mv[j][i][k][l][m][0]  = pred_mv[j][i][k][l][m][0];
+
+              rdopt->all_mv [j][i][k][l][m][1]  = all_mv [j][i][k][l][m][1];
+              rdopt->pred_mv[j][i][k][l][m][1]  = pred_mv[j][i][k][l][m][1];
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    R-D Cost for a macroblock
+ *************************************************************************************
+ */
+int RDCost_for_macroblocks (double   lambda,       // <-- lagrange multiplier
+                            int      mode,         // <-- modus (0-COPY/DIRECT, 1-16x16, 2-16x8, 3-8x16, 4-8x8(+), 5-Intra4x4, 6-Intra16x16)
+                            double*  min_rdcost,   // <-> minimum rate-distortion cost
+                            double*  min_rate,     // --> bitrate of mode which has minimum rate-distortion cost.
+                            int i16mode )
+{
+  int         i, j, k; //, k, ****ip4;
+  int         j1, j2;
+  int         rate = 0, coeff_rate = 0;
+  int64       distortion = 0;
+  double      rdcost;
+  int         prev_mb_nr  = FmoGetPreviousMBNr(img->current_mb_nr);
+  Macroblock  *currMB   = &img->mb_data[img->current_mb_nr];
+  Macroblock  *prevMB   = (prev_mb_nr >= 0) ? &img->mb_data[prev_mb_nr] : NULL;
+  int         bframe    = (img->type==B_SLICE);
+  int         tmp_cc;
+  int         use_of_cc =  (img->type!=I_SLICE &&  input->symbol_mode!=CABAC);
+  int         cc_rate, dummy;
+
+  //=====
+  //=====  SET REFERENCE FRAMES AND BLOCK MODES
+  //=====
+  SetModesAndRefframeForBlocks (mode);
+
+  //=====
+  //=====  GET COEFFICIENTS, RECONSTRUCTIONS, CBP
+  //=====
+  if (bframe && mode==0)
+  {
+    int block_x=img->pix_x>>2;
+    int block_y=img->pix_y>>2;
+    for (j = block_y;j< block_y + 4;j++)
+      for (i = block_x;i<block_x + 4;i++)
+        if (direct_pdir[j][i] < 0)
+          return 0;
+  }
+
+  // Test MV limits for Skip Mode. This could be necessary for MBAFF case Frame MBs.
+  if ((img->MbaffFrameFlag) && (!currMB->mb_field) && (img->type==P_SLICE) && (mode==0) )
+  {
+    if ( img->all_mv[0][0][0][0][0][0] < -8192
+      || img->all_mv[0][0][0][0][0][0] > 8191
+      || img->all_mv[0][0][0][0][0][1] < LEVELMVLIMIT[img->LevelIndex][4]
+      || img->all_mv[0][0][0][0][0][1] > LEVELMVLIMIT[img->LevelIndex][5])
+      return 0;
+  }
+
+  if (img->AdaptiveRounding)
+  {
+    memset(&(img->fadjust4x4[0][0][0]), 0, MB_PIXELS * sizeof(int));
+    memset(&(img->fadjust8x8[0][0][0]), 0, MB_PIXELS * sizeof(int));
+    memset(&(img->fadjust4x4Cr[0][0][0][0]), 0, img->mb_cr_size_y * img->mb_cr_size_x * sizeof(int));
+    memset(&(img->fadjust4x4Cr[0][1][0][0]), 0, img->mb_cr_size_y * img->mb_cr_size_x  * sizeof(int));
+  }
+
+  if (mode<P8x8)
+  {
+    LumaResidualCoding ();
+
+    if(mode==0 && currMB->cbp!=0 && (img->type != B_SLICE || img->NoResidueDirect==1))
+      return 0;
+    if(mode==0 && currMB->cbp==0 && currMB->luma_transform_size_8x8_flag==1) //for B_skip, luma_transform_size_8x8_flag=0 only
+      return 0;
+  }
+  else if (mode==P8x8)
+  {
+    SetCoeffAndReconstruction8x8 (currMB);
+  }
+  else if (mode==I4MB)
+  {
+    currMB->cbp = Mode_Decision_for_Intra4x4Macroblock (lambda, &dummy);
+  }
+  else if (mode==I16MB)
+  {
+    Intra16x16_Mode_Decision  (currMB, &i16mode);
+  }
+  else if(mode==I8MB)
+  {
+    currMB->cbp = Mode_Decision_for_new_Intra8x8Macroblock(lambda, &dummy);
+  }
+  else if(mode==IPCM)
+  {
+    for (j = 0; j < MB_BLOCK_SIZE; j++)
+    {
+      j1 = j + img->opix_y;
+      j2 = j + img->pix_y;
+      for (i=img->opix_x; i<img->opix_x+MB_BLOCK_SIZE; i++)
+        enc_picture->imgY[j2][i] = imgY_org[j1][i];
+    }
+    if (img->yuv_format != YUV400)
+    {
+      // CHROMA
+      for (j = 0; j<img->mb_cr_size_y; j++)
+      {
+        j1 = j + img->opix_c_y;
+        j2 = j + img->pix_c_y;
+        for (i=img->opix_c_x; i<img->opix_c_x+img->mb_cr_size_x; i++)
+        {
+          enc_picture->imgUV[0][j2][i] = imgUV_org[0][j1][i];
+          enc_picture->imgUV[1][j2][i] = imgUV_org[1][j1][i];
+        }
+      }
+    }
+    for (j=0;j<4;j++)
+      for (i=0; i<(4+img->num_blk8x8_uv); i++)
+        img->nz_coeff[img->current_mb_nr][j][i] = 16;
+
+  }
+
+  if (input->rdopt==3 && img->type!=B_SLICE)
+  {
+    // We need the reconstructed prediction residue for the simulated decoders.
+    compute_residue_mb (mode==I16MB?i16mode:-1);
+  }
+
+  //Rate control
+  if (input->RCEnable)
+  {
+    if (mode == I16MB)
+      memcpy(pred,img->mprr_2[i16mode],MB_PIXELS * sizeof(imgpel));
+    else
+      memcpy(pred,img->mpr,MB_PIXELS * sizeof(imgpel));
+  }
+
+  img->i16offset = 0;
+  dummy = 0;
+  if ((img->yuv_format!=YUV400) && (mode != IPCM))
+    ChromaResidualCoding (&dummy);
+
+  if (mode==I16MB)
+    img->i16offset = I16Offset  (currMB->cbp, i16mode);
+
+  //=====
+  //=====   GET DISTORTION
+  //=====
+  // LUMA
+  if (input->rdopt==3 && img->type!=B_SLICE)
+  {
+    for (k = 0; k<input->NoOfDecoders ;k++)
+    {
+      decode_one_mb (k, currMB);
+      for (j = 0; j<MB_BLOCK_SIZE; j++)
+      {
+        for (i=img->opix_x; i<img->opix_x+MB_BLOCK_SIZE; i++)
+          distortion += img->quad [imgY_org[img->opix_y+j][i] - decs->decY[k][img->opix_y+j][i]];
+      }
+    }
+    distortion /= input->NoOfDecoders;
+
+    if (img->yuv_format != YUV400)
+    {
+      // CHROMA
+      for (j = 0; j<img->mb_cr_size_y; j++)
+      {
+        j1 = j + img->opix_c_y;
+        j2 = j + img->pix_c_y;
+        for (i=img->opix_c_x; i<img->opix_c_x+img->mb_cr_size_x; i++)
+        {
+          distortion += img->quad [imgUV_org[0][j1][i] - enc_picture->imgUV[0][j2][i]];
+          distortion += img->quad [imgUV_org[1][j1][i] - enc_picture->imgUV[1][j2][i]];
+        }
+      }
+    }
+  }
+  else
+  {
+    // LUMA
+    for (j = 0; j < MB_BLOCK_SIZE; j++)
+    {
+      j1 = j + img->opix_y;
+      j2 = j + img->pix_y;
+      for (i=img->opix_x; i<img->opix_x+MB_BLOCK_SIZE; i++)
+        distortion += img->quad [imgY_org[j1][i] - enc_picture->imgY[j2][i]];
+    }
+
+    if (img->yuv_format != YUV400)
+    {
+      // CHROMA
+      for (j = 0; j<img->mb_cr_size_y; j++)
+      {
+        j1 = j + img->opix_c_y;
+        j2 = j + img->pix_c_y;
+        for (i=img->opix_c_x; i<img->opix_c_x+img->mb_cr_size_x; i++)
+        {
+          distortion += img->quad [imgUV_org[0][j1][i] - enc_picture->imgUV[0][j2][i]];
+          distortion += img->quad [imgUV_org[1][j1][i] - enc_picture->imgUV[1][j2][i]];
+        }
+      }
+    }
+  }
+
+  //=====   S T O R E   C O D I N G   S T A T E   =====
+  //---------------------------------------------------
+  store_coding_state (cs_cm);
+
+  //=====
+  //=====   GET RATE
+  //=====
+  //----- macroblock header -----
+  if (use_of_cc)
+  {
+    if (currMB->mb_type!=0 || (bframe && currMB->cbp!=0))
+    {
+      // cod counter and macroblock mode are written ==> do not consider code counter
+      tmp_cc = img->cod_counter;
+      rate   = writeMBLayer (1, &coeff_rate);
+      ue_linfo (tmp_cc, dummy, &cc_rate, &dummy);
+      rate  -= cc_rate;
+      img->cod_counter = tmp_cc;
+    }
+    else
+    {
+      // cod counter is just increased  ==> get additional rate
+      ue_linfo (img->cod_counter+1, dummy, &rate,    &dummy);
+      ue_linfo (img->cod_counter,   dummy, &cc_rate, &dummy);
+      rate -= cc_rate;
+    }
+  }
+  else
+  {
+    rate = writeMBLayer (1, &coeff_rate);
+  }
+
+  //=====   R E S T O R E   C O D I N G   S T A T E   =====
+  //-------------------------------------------------------
+  reset_coding_state (cs_cm);
+
+  rdcost = (double)distortion + lambda * dmax(0.5,(double)rate);
+
+  if (rdcost >= *min_rdcost ||
+    ((img->qp_scaled)==0 && img->lossless_qpprime_flag==1 && distortion!=0))
+  {
+#if FASTMODE
+    // Reordering RDCost comparison order of mode 0 and mode 1 in P_SLICE
+    // if RDcost of mode 0 and mode 1 is same, we choose best_mode is 0
+    // This might not always be good since mode 0 is more biased towards rate than quality.
+    if((img->type!=P_SLICE || mode != 0 || rdcost != *min_rdcost) || input->ProfileIDC>=FREXT_HP)
+#endif
+      return 0;
+  }
+
+
+  if ((img->MbaffFrameFlag) && (mode ? 0: ((img->type == B_SLICE) ? !currMB->cbp:1)))  // AFF and current is skip
+  {
+    if (img->current_mb_nr & 0x01) //bottom
+    {
+      if (prevMB->mb_type ? 0:((img->type == B_SLICE) ? !prevMB->cbp:1)) //top is skip
+      {
+        if (!(field_flag_inference() == currMB->mb_field)) //skip only allowed when correct inference
+          return 0;
+      }
+    }
+  }
+
+  //=====   U P D A T E   M I N I M U M   C O S T   =====
+  //-----------------------------------------------------
+  *min_rdcost = rdcost;
+  *min_rate = lambda * (double)coeff_rate;
+
+#ifdef BEST_NZ_COEFF
+  for (j=0;j<4;j++)
+    for (i=0; i<(4+img->num_blk8x8_uv); i++)
+      gaaiMBAFF_NZCoeff[j][i] = img->nz_coeff[img->current_mb_nr][j][i];
+#endif
+
+  return 1;
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Store adaptive rounding parameters
+ *************************************************************************************
+ */
+void store_adaptive_rounding_parameters (int mode, Macroblock *currMB)
+{
+  int j;
+  int is_inter = (mode != I4MB)&&(mode != I16MB)&&(mode != I8MB);
+
+  if (currMB->luma_transform_size_8x8_flag)
+  {
+    if ((mode == P8x8))
+      memcpy(&(bestInterFAdjust8x8[0][0]),&(img->fadjust8x8[2][0][0]),MB_PIXELS * sizeof(int));
+    else if (is_inter)
+      memcpy(&(bestInterFAdjust8x8[0][0]),&(img->fadjust8x8[0][0][0]),MB_PIXELS * sizeof(int));
+    else
+      memcpy(&(bestIntraFAdjust8x8[0][0]),&(img->fadjust8x8[1][0][0]),MB_PIXELS * sizeof(int));
+  }
+  else
+  {
+    if ((mode == P8x8))
+      memcpy(&(bestInterFAdjust4x4[0][0]),&(img->fadjust4x4[3][0][0]),MB_PIXELS * sizeof(int));
+    else if (is_inter)
+      memcpy(&(bestInterFAdjust4x4[0][0]),&(img->fadjust4x4[0][0][0]),MB_PIXELS * sizeof(int));
+    else
+      memcpy(&(bestIntraFAdjust4x4[0][0]),&(img->fadjust4x4[1 + mode == I16MB][0][0]),MB_PIXELS * sizeof(int));
+  }
+  if (input->AdaptRndChroma)
+  {
+    if (currMB->luma_transform_size_8x8_flag && mode == P8x8)
+    {
+      for (j = 0; j < img->mb_cr_size_y; j++)
+      {
+        memcpy(bestInterFAdjust4x4Cr[0][j],img->fadjust8x8Cr[0][0][j],img->mb_cr_size_x * sizeof(int));
+        memcpy(bestInterFAdjust4x4Cr[1][j],img->fadjust8x8Cr[0][1][j],img->mb_cr_size_x * sizeof(int));
+      }
+    }
+    else if (mode == P8x8)
+    {
+      for (j = 0; j < img->mb_cr_size_y; j++)
+      {
+        memcpy(bestInterFAdjust4x4Cr[0][j],img->fadjust4x4Cr[2][0][j],img->mb_cr_size_x * sizeof(int));
+        memcpy(bestInterFAdjust4x4Cr[1][j],img->fadjust4x4Cr[2][1][j],img->mb_cr_size_x * sizeof(int));
+      }
+    }
+    else if (is_inter)
+    {
+      for (j = 0; j < img->mb_cr_size_y; j++)
+      {
+        memcpy(bestInterFAdjust4x4Cr[0][j],img->fadjust4x4Cr[0][0][j],img->mb_cr_size_x * sizeof(int));
+        memcpy(bestInterFAdjust4x4Cr[1][j],img->fadjust4x4Cr[0][1][j],img->mb_cr_size_x * sizeof(int));
+      }
+    }
+    else
+    {
+      for (j = 0; j < img->mb_cr_size_y; j++)
+      {
+        memcpy(bestIntraFAdjust4x4Cr[0][j],img->fadjust4x4Cr[1][0][j],img->mb_cr_size_x * sizeof(int));
+        memcpy(bestIntraFAdjust4x4Cr[1][j],img->fadjust4x4Cr[1][1][j],img->mb_cr_size_x * sizeof(int));
+      }
+    }
+  }
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Store macroblock parameters
+ *************************************************************************************
+ */
+void store_macroblock_parameters (int mode)
+{
+  int  i, j, k, ****i4p, ***i3p;
+  Macroblock *currMB  = &img->mb_data[img->current_mb_nr];
+  int        bframe   = (img->type==B_SLICE);
+
+  //--- store best mode ---
+  best_mode = mode;
+  best_c_imode = currMB->c_ipred_mode;
+  best_i16offset = img->i16offset;
+
+  // If condition is not really necessary.
+  bi_pred_me = (mode == 1) ? currMB->bi_pred_me : 0;
+
+  memcpy(b8mode, currMB->b8mode, BLOCK_MULTIPLE * sizeof(int));
+  memcpy(b8pdir, currMB->b8pdir, BLOCK_MULTIPLE * sizeof(int));
+  memcpy(b4_intra_pred_modes,currMB->intra_pred_modes, MB_BLOCK_PARTITIONS * sizeof(char));
+  memcpy(b8_intra_pred_modes8x8,currMB->intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));
+
+  for (j = 0 ; j < BLOCK_MULTIPLE; j++)
+  {
+    memcpy(&b4_ipredmode[j * BLOCK_MULTIPLE],&img->ipredmode[img->block_y + j][img->block_x],BLOCK_MULTIPLE * sizeof(char));
+    memcpy(b8_ipredmode8x8[j],&img->ipredmode8x8[img->block_y + j][img->block_x],BLOCK_MULTIPLE * sizeof(char));
+  }
+  //--- reconstructed blocks ----
+  for (j = 0; j < MB_BLOCK_SIZE; j++)
+  {
+    memcpy(rec_mbY[j],&enc_picture->imgY[img->pix_y+j][img->pix_x], MB_BLOCK_SIZE * sizeof(imgpel));
+  }
+  if((img->type==SP_SLICE) && (si_frame_indicator==0 && sp2_frame_indicator==0))
+  {
+    for (j = 0; j < MB_BLOCK_SIZE; j++)
+    {
+      memcpy(lrec_rec[j],&lrec[img->pix_y+j][img->pix_x], MB_BLOCK_SIZE * sizeof(int));//store coefficients SP frame
+    }
+  }
+
+  if (img->AdaptiveRounding)
+    store_adaptive_rounding_parameters (mode, currMB);
+
+  if (img->yuv_format != YUV400)
+  {
+    for (j = 0; j<img->mb_cr_size_y; j++)
+    {
+      memcpy(rec_mbU[j],&enc_picture->imgUV[0][img->pix_c_y+j][img->pix_c_x], img->mb_cr_size_x * sizeof(imgpel));
+      memcpy(rec_mbV[j],&enc_picture->imgUV[1][img->pix_c_y+j][img->pix_c_x], img->mb_cr_size_x * sizeof(imgpel));
+    }
+    if((img->type==SP_SLICE) && (si_frame_indicator==0 && sp2_frame_indicator==0))
+    {
+      //store uv coefficients SP frame
+      for (j = 0; j<img->mb_cr_size_y; j++)
+      {
+        memcpy(lrec_rec_U[j],&lrec_uv[0][img->pix_c_y+j][img->pix_c_x], img->mb_cr_size_x * sizeof(int));
+        memcpy(lrec_rec_V[j],&lrec_uv[1][img->pix_c_y+j][img->pix_c_x], img->mb_cr_size_x * sizeof(int));
+      }
+    }
+  }
+
+  //--- store results of decoders ---
+  if (input->rdopt==3 && img->type!=B_SLICE)
+  {
+    for (k = 0; k<input->NoOfDecoders; k++)
+    {
+      for (j=img->pix_y; j<img->pix_y+16; j++)
+        for (i=img->pix_x; i<img->pix_x+16; i++)
+        {
+          // Keep the decoded values of each MB for updating the ref frames
+          decs->decY_best[k][j][i] = decs->decY[k][j][i];
+        }
+    }
+  }
+
+  //--- coeff, cbp, kac ---
+  if (mode || bframe)
+  {
+    i4p=cofAC; cofAC=img->cofAC; img->cofAC=i4p;
+    i3p=cofDC; cofDC=img->cofDC; img->cofDC=i3p;
+    cbp     = currMB->cbp;
+    cbp_blk = currMB->cbp_blk;
+  }
+  else
+  {
+    cbp_blk = cbp = 0;
+  }
+
+  //--- store transform size ---
+  luma_transform_size_8x8_flag = currMB->luma_transform_size_8x8_flag;
+
+
+  for (j = 0; j<4; j++)
+    memcpy(frefframe[j],&enc_picture->ref_idx[LIST_0][img->block_y+j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+
+  if (bframe)
+  {
+    for (j = 0; j<4; j++)
+      memcpy(brefframe[j],&enc_picture->ref_idx[LIST_1][img->block_y+j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+    }
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Set stored macroblock parameters
+ *************************************************************************************
+ */
+void set_stored_macroblock_parameters ()
+{
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+  imgpel     **imgY  = enc_picture->imgY;
+  imgpel    ***imgUV = enc_picture->imgUV;
+
+  int         mode   = best_mode;
+  int         bframe = (img->type==B_SLICE);
+  int         i, j, k, ****i4p, ***i3p;
+  int         block_x, block_y;
+  char    **ipredmodes = img->ipredmode;
+  short   *cur_mv;
+
+  //===== reconstruction values =====
+  for (j = 0; j < MB_BLOCK_SIZE; j++)
+  {
+    memcpy(&imgY[img->pix_y+j][img->pix_x],rec_mbY[j], MB_BLOCK_SIZE * sizeof(imgpel));
+  }
+  if((img->type==SP_SLICE) &&(si_frame_indicator==0 && sp2_frame_indicator==0 ))
+  {
+    for (j = 0; j < MB_BLOCK_SIZE; j++)
+      memcpy(&lrec[img->pix_y+j][img->pix_x],lrec_rec[j], MB_BLOCK_SIZE * sizeof(int)); //restore coeff SP frame
+  }
+  if(img->MbaffFrameFlag)
+  {
+    for (j = 0; j < MB_BLOCK_SIZE; j++)
+      memcpy(rdopt->rec_mbY[j],rec_mbY[j], MB_BLOCK_SIZE * sizeof(imgpel));
+  }
+
+  if (img->AdaptiveRounding)
+  {
+    update_offset_params(mode,luma_transform_size_8x8_flag);
+  }
+
+  if (img->yuv_format != YUV400)
+  {
+    for (j = 0; j<img->mb_cr_size_y; j++)
+    {
+      memcpy(&imgUV[0][img->pix_c_y+j][img->pix_c_x],rec_mbU[j], img->mb_cr_size_x * sizeof(imgpel));
+      memcpy(&imgUV[1][img->pix_c_y+j][img->pix_c_x],rec_mbV[j], img->mb_cr_size_x * sizeof(imgpel));
+      if((img->type==SP_SLICE) &&(!si_frame_indicator && !sp2_frame_indicator))
+      {
+        memcpy(&lrec_uv[0][img->pix_c_y+j][img->pix_c_x],lrec_rec_U[j], img->mb_cr_size_x * sizeof(int));
+        memcpy(&lrec_uv[1][img->pix_c_y+j][img->pix_c_x],lrec_rec_V[j], img->mb_cr_size_x * sizeof(int));
+      }
+      if(img->MbaffFrameFlag)
+      {
+        memcpy(rdopt->rec_mbU[j],rec_mbU[j], img->mb_cr_size_x * sizeof(imgpel));
+        memcpy(rdopt->rec_mbV[j],rec_mbV[j], img->mb_cr_size_x * sizeof(imgpel));
+      }
+    }
+
+    if((img->type==SP_SLICE) &&(!si_frame_indicator && !sp2_frame_indicator))
+    {
+      for (j = 0; j<img->mb_cr_size_y; j++)
+      {
+        memcpy(&lrec_uv[0][img->pix_c_y+j][img->pix_c_x],lrec_rec_U[j], img->mb_cr_size_x * sizeof(int));
+        memcpy(&lrec_uv[1][img->pix_c_y+j][img->pix_c_x],lrec_rec_V[j], img->mb_cr_size_x * sizeof(int));
+      }
+    }
+
+    if(img->MbaffFrameFlag)
+    {
+      for (j = 0; j<img->mb_cr_size_y; j++)
+      {
+
+        memcpy(rdopt->rec_mbU[j],rec_mbU[j], img->mb_cr_size_x * sizeof(imgpel));
+        memcpy(rdopt->rec_mbV[j],rec_mbV[j], img->mb_cr_size_x * sizeof(imgpel));
+      }
+    }
+  }
+
+  //===== coefficients and cbp =====
+  i4p=cofAC; cofAC=img->cofAC; img->cofAC=i4p;
+  i3p=cofDC; cofDC=img->cofDC; img->cofDC=i3p;
+  currMB->cbp      = cbp;
+  currMB->cbp_blk = cbp_blk;
+  //==== macroblock type ====
+  currMB->mb_type = mode;
+
+  if(img->MbaffFrameFlag)
+  {
+    rdopt->mode = mode;
+    rdopt->i16offset = img->i16offset;
+    rdopt->cbp = cbp;
+    rdopt->cbp_blk = cbp_blk;
+    rdopt->mb_type  = mode;
+
+    rdopt->prev_qp       = currMB->prev_qp;
+    rdopt->prev_delta_qp = currMB->prev_delta_qp;
+    rdopt->delta_qp      = currMB->delta_qp;
+    rdopt->qp            = currMB->qp;
+    rdopt->prev_cbp      = currMB->prev_cbp;
+
+    for(i = 0;i<4+img->num_blk8x8_uv;i++)
+    {
+      for(j = 0;j<4;j++)
+        for(k = 0;k<2;k++)
+          memcpy(rdopt->cofAC[i][j][k], img->cofAC[i][j][k], 65 * sizeof(int));
+    }
+    for(i = 0;i<3;i++)
+      for(k = 0;k<2;k++)
+        memcpy(rdopt->cofDC[i][k], img->cofDC[i][k], 18 * sizeof(int));
+  }
+
+
+  memcpy(currMB->b8mode,b8mode, BLOCK_MULTIPLE * sizeof(int));
+  memcpy(currMB->b8pdir,b8pdir, BLOCK_MULTIPLE * sizeof(int));
+  if(img->MbaffFrameFlag)
+  {
+    memcpy(rdopt->b8mode,b8mode, BLOCK_MULTIPLE * sizeof(int));
+    memcpy(rdopt->b8pdir,b8pdir, BLOCK_MULTIPLE * sizeof(int));
+  }
+
+  currMB->bi_pred_me = currMB->mb_type == 1 ? bi_pred_me : 0;
+
+
+  //if P8x8 mode and transform size 4x4 choosen, restore motion vector data for this transform size
+  if (mode == P8x8 && !luma_transform_size_8x8_flag && input->Transform8x8Mode)
+    RestoreMV8x8(1);
+
+  //==== transform size flag ====
+  if (((currMB->cbp & 15) == 0) && !(IS_OLDINTRA(currMB) || currMB->mb_type == I8MB))
+    currMB->luma_transform_size_8x8_flag = 0;
+  else
+    currMB->luma_transform_size_8x8_flag = luma_transform_size_8x8_flag;
+
+  rdopt->luma_transform_size_8x8_flag  = currMB->luma_transform_size_8x8_flag;
+
+  if (input->rdopt==3 && img->type!=B_SLICE)
+  {
+    //! save the MB Mode of every macroblock
+    decs->dec_mb_mode[img->mb_y][img->mb_x] = mode;
+  }
+
+  //==== reference frames =====
+  for (j = 0; j < 4; j++)
+  {
+    block_y = img->block_y + j;
+    for (i = 0; i < 4; i++)
+    {
+      block_x = img->block_x + i;
+      k = 2*(j >> 1)+(i >> 1);
+
+      // backward prediction or intra
+      if ((currMB->b8pdir[k] == 1) || IS_INTRA(currMB))
+      {
+        enc_picture->ref_idx    [LIST_0][block_y][block_x]    = -1;
+        enc_picture->ref_pic_id [LIST_0][block_y][block_x]    = -1;
+        enc_picture->mv         [LIST_0][block_y][block_x][0] = 0;
+        enc_picture->mv         [LIST_0][block_y][block_x][1] = 0;
+        if(img->MbaffFrameFlag)
+          rdopt->refar[LIST_0][j][i] = -1;
+      }
+      else
+      {
+        if (currMB->bi_pred_me && (currMB->b8pdir[k] == 2) && currMB->mb_type==1)
+        {
+          cur_mv = currMB->bi_pred_me == 1
+            ? img->bipred_mv1[j][i][LIST_0][0][currMB->b8mode[k]]
+            : img->bipred_mv2[j][i][LIST_0][0][currMB->b8mode[k]];
+
+          enc_picture->ref_idx    [LIST_0][block_y][block_x] = 0;
+          enc_picture->ref_pic_id [LIST_0][block_y][block_x] = enc_picture->ref_pic_num[LIST_0 + currMB->list_offset][0];
+          enc_picture->mv         [LIST_0][block_y][block_x][0] = cur_mv[0];
+          enc_picture->mv         [LIST_0][block_y][block_x][1] = cur_mv[1];
+          if(img->MbaffFrameFlag)
+            rdopt->refar[LIST_0][j][i] = 0;
+        }
+        else
+        {
+          cur_mv = img->all_mv[j][i][LIST_0][(short)frefframe[j][i]][currMB->b8mode[k]];
+
+          enc_picture->ref_idx    [LIST_0][block_y][block_x]    = frefframe[j][i];
+          enc_picture->ref_pic_id [LIST_0][block_y][block_x]    = enc_picture->ref_pic_num[LIST_0 + currMB->list_offset][(short)frefframe[j][i]];
+          enc_picture->mv         [LIST_0][block_y][block_x][0] = cur_mv[0];
+          enc_picture->mv         [LIST_0][block_y][block_x][1] = cur_mv[1];
+          if(img->MbaffFrameFlag)
+            rdopt->refar[LIST_0][j][i] = frefframe[j][i];
+        }
+      }
+
+      // forward prediction or intra
+      if ((currMB->b8pdir[k] == 0) || IS_INTRA(currMB))
+      {
+        enc_picture->ref_idx    [LIST_1][block_y][block_x]    = -1;
+        enc_picture->ref_pic_id [LIST_1][block_y][block_x]    = -1;
+        enc_picture->mv         [LIST_1][block_y][block_x][0] = 0;
+        enc_picture->mv         [LIST_1][block_y][block_x][1] = 0;
+        if(img->MbaffFrameFlag)
+          rdopt->refar[LIST_1][j][i] = -1;
+      }
+    }
+  }
+
+  if (bframe)
+  {
+    for (j=0; j<4; j++)
+    {
+      block_y = img->block_y + j;
+      for (i=0; i<4; i++)
+      {
+        block_x = img->block_x + i;
+        k = 2*(j >> 1)+(i >> 1);
+
+        // forward
+        if (IS_INTRA(currMB)||(currMB->b8pdir[k] == 0))
+        {
+          enc_picture->ref_idx    [LIST_1][block_y][block_x]    = -1;
+          enc_picture->ref_pic_id [LIST_1][block_y][block_x]    = -1;
+          enc_picture->mv         [LIST_1][block_y][block_x][0] = 0;
+          enc_picture->mv         [LIST_1][block_y][block_x][1] = 0;
+          if(img->MbaffFrameFlag)
+            rdopt->refar[LIST_1][j][i] = -1;
+        }
+        else
+        {
+          if (currMB->bi_pred_me && (currMB->b8pdir[k] == 2) && currMB->mb_type==1)
+          {
+            cur_mv = currMB->bi_pred_me == 1
+              ? img->bipred_mv1[j][i][LIST_1][0][currMB->b8mode[k]]
+              : img->bipred_mv2[j][i][LIST_1][0][currMB->b8mode[k]];
+
+            enc_picture->ref_idx    [LIST_1][block_y][block_x] = 0;
+            enc_picture->ref_pic_id [LIST_1][block_y][block_x] = enc_picture->ref_pic_num[LIST_1 + currMB->list_offset][0];
+            enc_picture->mv         [LIST_1][block_y][block_x][0] = cur_mv[0];
+            enc_picture->mv         [LIST_1][block_y][block_x][1] = cur_mv[1];
+            if(img->MbaffFrameFlag)
+              rdopt->refar[LIST_1][j][i] = 0;
+          }
+          else
+          {
+            cur_mv = img->all_mv[j][i][LIST_1][(short)brefframe[j][i]][currMB->b8mode[k]];
+
+            enc_picture->ref_idx    [LIST_1][block_y][block_x] = brefframe[j][i];
+            enc_picture->ref_pic_id [LIST_1][block_y][block_x] = enc_picture->ref_pic_num[LIST_1 + currMB->list_offset][(short)brefframe[j][i]];
+            enc_picture->mv         [LIST_1][block_y][block_x][0] = cur_mv[0];
+            enc_picture->mv         [LIST_1][block_y][block_x][1] = cur_mv[1];
+            if(img->MbaffFrameFlag)
+              rdopt->refar[LIST_1][j][i] = brefframe[j][i];
+          }
+        }
+      }
+    }
+  }
+
+  //==== intra prediction modes ====
+  currMB->c_ipred_mode = best_c_imode;
+  img->i16offset = best_i16offset;
+
+  if(currMB->mb_type == I8MB)
+  {
+    memcpy(currMB->intra_pred_modes8x8,b8_intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));
+    memcpy(currMB->intra_pred_modes,b8_intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));
+    for(j = 0; j < BLOCK_MULTIPLE; j++)
+    {
+      memcpy(&img->ipredmode[img->block_y+j][img->block_x],b8_ipredmode8x8[j], BLOCK_MULTIPLE * sizeof(char));
+      memcpy(&img->ipredmode8x8[img->block_y+j][img->block_x], b8_ipredmode8x8[j], BLOCK_MULTIPLE * sizeof(char));
+    }
+  }
+  else if (mode!=I4MB && mode!=I8MB)
+  {
+    memset(currMB->intra_pred_modes,DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
+    for(j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+      memset(&img->ipredmode[j][img->block_x], DC_PRED, BLOCK_MULTIPLE * sizeof(char));
+  }
+  // Residue Color Transform
+  else if (mode == I4MB)
+  {
+    memcpy(currMB->intra_pred_modes,b4_intra_pred_modes, MB_BLOCK_PARTITIONS * sizeof(char));
+    for(j = 0; j < BLOCK_MULTIPLE; j++)
+      memcpy(&img->ipredmode[img->block_y + j][img->block_x],&b4_ipredmode[BLOCK_MULTIPLE * j], BLOCK_MULTIPLE * sizeof(char));
+  }
+
+  if(img->MbaffFrameFlag)
+  {
+    rdopt->c_ipred_mode = currMB->c_ipred_mode;
+    rdopt->i16offset = img->i16offset;
+    memcpy(rdopt->intra_pred_modes,currMB->intra_pred_modes, MB_BLOCK_PARTITIONS * sizeof(char));
+    memcpy(rdopt->intra_pred_modes8x8,currMB->intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));
+    for(j = img->block_y; j < img->block_y +BLOCK_MULTIPLE; j++)
+      memcpy(&rdopt->ipredmode[j][img->block_x],&ipredmodes[j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+  }
+
+  //==== motion vectors =====
+  SetMotionVectorsMB (currMB, bframe);
+}
+
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Set reference frames and motion vectors
+ *************************************************************************************
+ */
+void SetRefAndMotionVectors (int block, int mode, int pdir, int fwref, int bwref)
+{
+  int     i, j=0;
+  int     bslice  = (img->type==B_SLICE);
+  int     pmode   = (mode==1||mode==2||mode==3?mode:4);
+  int     j0      = ((block >> 1)<<1);
+  int     i0      = ((block & 0x01)<<1);
+  int     j1      = j0 + (input->part_size[pmode][1]);
+  int     i1      = i0 + (input->part_size[pmode][0]);
+  int     block_x, block_y;
+  short   *cur_mv;
+  Macroblock  *currMB  = &img->mb_data[img->current_mb_nr];
+
+  if (pdir<0)
+  {
+    for (j = img->block_y + j0; j < img->block_y + j1; j++)
+    {
+      for (i=img->block_x + i0; i<img->block_x +i1; i++)
+      {
+        enc_picture->ref_pic_id[LIST_0][j][i] = -1;
+        enc_picture->ref_pic_id[LIST_1][j][i] = -1;
+      }
+      memset(&enc_picture->ref_idx[LIST_0][j][img->block_x + i0], -1, (input->part_size[pmode][0]) * sizeof(char));
+      memset(&enc_picture->ref_idx[LIST_1][j][img->block_x + i0], -1, (input->part_size[pmode][0]) * sizeof(char));
+      memset(enc_picture->mv[LIST_0][j][img->block_x + i0], 0, 2*(input->part_size[pmode][0]) * sizeof(short));
+      memset(enc_picture->mv[LIST_1][j][img->block_x + i0], 0, 2*(input->part_size[pmode][0]) * sizeof(short));
+    }
+    return;
+  }
+
+  if (!bslice)
+  {
+    for (j=j0; j<j1; j++)
+    {
+      block_y = img->block_y + j;
+      memset(&enc_picture->ref_idx   [LIST_0][block_y][img->block_x + i0], fwref, (input->part_size[pmode][0]) * sizeof(char));
+      for (i=i0; i<i1; i++)
+      {
+        block_x = img->block_x + i;
+        cur_mv = img->all_mv[j][i][LIST_0][fwref][mode];
+        enc_picture->mv        [LIST_0][block_y][block_x][0] = cur_mv[0];
+        enc_picture->mv        [LIST_0][block_y][block_x][1] = cur_mv[1];
+        enc_picture->ref_pic_id[LIST_0][block_y][block_x] = enc_picture->ref_pic_num[LIST_0+currMB->list_offset][fwref];
+      }
+    }
+    return;
+  }
+  else
+  {
+    for (j=j0; j<j1; j++)
+    {
+      block_y = img->block_y + j;
+      for (i=i0; i<i1; i++)
+      {
+        block_x = img->block_x + i;
+        if (mode==0)
+        {
+          pdir  = direct_pdir[block_y][block_x];
+          fwref = direct_ref_idx[LIST_0][block_y][block_x];
+          bwref = direct_ref_idx[LIST_1][block_y][block_x];
+        }
+
+        if ((pdir==0 || pdir==2))
+        {
+          if (currMB->bi_pred_me && (pdir == 2) && mode == 1)
+          {
+            cur_mv = currMB->bi_pred_me == 1
+              ? img->bipred_mv1[j][i][LIST_0][0][mode]
+              : img->bipred_mv2[j][i][LIST_0][0][mode];
+
+            enc_picture->mv        [LIST_0][block_y][block_x][0] = cur_mv[0];
+            enc_picture->mv        [LIST_0][block_y][block_x][1] = cur_mv[1];
+            enc_picture->ref_idx   [LIST_0][block_y][block_x]    = 0;
+            enc_picture->ref_pic_id[LIST_0][block_y][block_x]    = enc_picture->ref_pic_num[LIST_0+currMB->list_offset][0];
+          }
+          else
+          {
+            cur_mv = img->all_mv[j][i][LIST_0][fwref][mode];
+
+            enc_picture->mv        [LIST_0][block_y][block_x][0] = cur_mv[0];
+            enc_picture->mv        [LIST_0][block_y][block_x][1] = cur_mv[1];
+            enc_picture->ref_idx   [LIST_0][block_y][block_x] = fwref;
+            enc_picture->ref_pic_id[LIST_0][block_y][block_x] =
+            enc_picture->ref_pic_num[LIST_0+currMB->list_offset][(short)enc_picture->ref_idx[LIST_0][block_y][block_x]];
+          }
+        }
+        else
+        {
+          enc_picture->mv        [LIST_0][block_y][block_x][0] = 0;
+          enc_picture->mv        [LIST_0][block_y][block_x][1] = 0;
+          enc_picture->ref_idx   [LIST_0][block_y][block_x]    = -1;
+          enc_picture->ref_pic_id[LIST_0][block_y][block_x]    = -1;
+        }
+
+        if ((pdir==1 || pdir==2))
+        {
+          if (currMB->bi_pred_me && (pdir == 2) && mode == 1)
+          {
+            cur_mv = currMB->bi_pred_me == 1
+              ? img->bipred_mv1[j][i][LIST_1][0][mode]
+              : img->bipred_mv2[j][i][LIST_1][0][mode];
+
+            enc_picture->mv        [LIST_1][block_y][block_x][0] = cur_mv[0];
+            enc_picture->mv        [LIST_1][block_y][block_x][1] = cur_mv[1];
+            enc_picture->ref_idx   [LIST_1][block_y][block_x]    = 0;
+            enc_picture->ref_pic_id[LIST_1][block_y][block_x]    = enc_picture->ref_pic_num[LIST_1+currMB->list_offset][0];
+          }
+          else
+          {
+            cur_mv = img->all_mv[j][i][LIST_1][bwref][mode];
+
+            enc_picture->mv        [LIST_1][block_y][block_x][0] = cur_mv[0];
+            enc_picture->mv        [LIST_1][block_y][block_x][1] = cur_mv[1];
+            enc_picture->ref_idx   [LIST_1][block_y][block_x] = bwref;
+            enc_picture->ref_pic_id[LIST_1][block_y][block_x] =
+            enc_picture->ref_pic_num[LIST_1+currMB->list_offset][(short)enc_picture->ref_idx[LIST_1][block_y][block_x]];
+          }
+        }
+        else
+        {
+          enc_picture->mv        [LIST_1][block_y][block_x][0] = 0;
+          enc_picture->mv        [LIST_1][block_y][block_x][1] = 0;
+          enc_picture->ref_idx   [LIST_1][block_y][block_x]    = -1;
+          enc_picture->ref_pic_id[LIST_1][block_y][block_x]    = -1;
+        }
+      }
+    }
+  }
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    skip macroblock field inference
+ * \return
+ *    inferred field flag
+ *************************************************************************************
+ */
+int field_flag_inference()
+{
+  int mb_field;
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+
+  if (currMB->mbAvailA)
+  {
+    mb_field = img->mb_data[currMB->mbAddrA].mb_field;
+  }
+  else
+  {
+    // check top macroblock pair
+    if (currMB->mbAvailB)
+      mb_field = img->mb_data[currMB->mbAddrB].mb_field;
+    else
+      mb_field = 0;
+  }
+
+  return mb_field;
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Store motion vectors for 8x8 partition
+ *************************************************************************************
+ */
+
+void StoreMVBlock8x8(int dir, int block8x8, int mode, int ref, int bw_ref, int pdir8, int bframe)
+{
+  int i, j, i0, j0, ii, jj;
+  short ******all_mv  = img->all_mv;
+  short ******pred_mv = img->pred_mv;
+  short (*lc_l0_mv8x8)[4][2] = all_mv8x8[dir][LIST_0];
+  short (*lc_l1_mv8x8)[4][2] = all_mv8x8[dir][LIST_1];
+  short (*lc_pr_mv8x8)[4][2] = NULL;
+
+  i0 = (block8x8 & 0x01) << 1;
+  j0 = (block8x8 >> 1) << 1;
+  ii = i0+2;
+  jj = j0+2;
+
+  if (!bframe)
+  {
+    if (pdir8>=0) //(mode8!=IBLOCK)&&(mode8!=I16MB))  // && ref != -1)
+    {
+      lc_pr_mv8x8 = pred_mv8x8[dir][LIST_0];
+      for (j=j0; j<jj; j++)
+        for (i=i0; i<ii; i++)
+        {
+          lc_l0_mv8x8[j][i][0] = all_mv [j][i][LIST_0][ref][4][0];
+          lc_l0_mv8x8[j][i][1] = all_mv [j][i][LIST_0][ref][4][1];
+          lc_pr_mv8x8[j][i][0] = pred_mv[j][i][LIST_0][ref][4][0];
+          lc_pr_mv8x8[j][i][1] = pred_mv[j][i][LIST_0][ref][4][1];
+        }
+    }
+  }
+  else
+  {
+    if (pdir8 == 0) // list0
+    {
+      lc_pr_mv8x8 = pred_mv8x8[dir][LIST_0];
+      for (j=j0; j<jj; j++)
+        for (i=i0; i<ii; i++)
+        {
+          lc_l0_mv8x8[j][i][0] = all_mv [j][i][LIST_0][ref][mode][0];
+          lc_l0_mv8x8[j][i][1] = all_mv [j][i][LIST_0][ref][mode][1];
+          lc_pr_mv8x8[j][i][0] = pred_mv[j][i][LIST_0][ref][mode][0];
+          lc_pr_mv8x8[j][i][1] = pred_mv[j][i][LIST_0][ref][mode][1];
+        }
+    }
+    else if (pdir8 == 1) // list1
+    {
+      lc_pr_mv8x8 = pred_mv8x8[dir][LIST_1];
+      for (j=j0; j<jj; j++)
+        for (i=i0; i<ii; i++)
+        {
+          lc_l1_mv8x8[j][i][0] = all_mv [j][i][LIST_1][bw_ref][mode][0];
+          lc_l1_mv8x8[j][i][1] = all_mv [j][i][LIST_1][bw_ref][mode][1];
+          lc_pr_mv8x8[j][i][0] = pred_mv[j][i][LIST_1][bw_ref][mode][0];
+          lc_pr_mv8x8[j][i][1] = pred_mv[j][i][LIST_1][bw_ref][mode][1];
+        }
+    }
+    else if (pdir8==2) // bipred
+    {
+      lc_pr_mv8x8 = pred_mv8x8[dir][LIST_0];
+      for (j=j0; j<jj; j++)
+      {
+        for (i=i0; i<ii; i++)
+        {
+          lc_l0_mv8x8[j][i][0] = all_mv [j][i][LIST_0][ref][mode][0];
+          lc_l0_mv8x8[j][i][1] = all_mv [j][i][LIST_0][ref][mode][1];
+          lc_pr_mv8x8[j][i][0] = pred_mv[j][i][LIST_0][ref][mode][0];
+          lc_pr_mv8x8[j][i][1] = pred_mv[j][i][LIST_0][ref][mode][1];
+        }
+      }
+      lc_pr_mv8x8 = pred_mv8x8[dir][LIST_1];
+      for (j=j0; j<jj; j++)
+      {
+        for (i=i0; i<ii; i++)
+        {
+          lc_l1_mv8x8[j][i][0] = all_mv [j][i][LIST_1][bw_ref][mode][0];
+          lc_l1_mv8x8[j][i][1] = all_mv [j][i][LIST_1][bw_ref][mode][1];
+          lc_pr_mv8x8[j][i][0] = pred_mv[j][i][LIST_1][bw_ref][mode][0];
+          lc_pr_mv8x8[j][i][1] = pred_mv[j][i][LIST_1][bw_ref][mode][1];
+        }
+      }
+    }
+    else
+    {
+      error("invalid direction mode", 255);
+    }
+  }
+}
+
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Store motion vectors of 8x8 partitions of one macroblock
+ *************************************************************************************
+ */
+void StoreMV8x8(int dir)
+{
+  int block8x8;
+
+  int bframe = (img->type == B_SLICE);
+
+  for (block8x8=0; block8x8<4; block8x8++)
+    StoreMVBlock8x8(dir, block8x8, tr8x8.part8x8mode[block8x8], tr8x8.part8x8fwref[block8x8],
+    tr8x8.part8x8bwref[block8x8], tr8x8.part8x8pdir[block8x8], bframe);
+}
+
+/*!
+*************************************************************************************
+* \brief
+*    Restore motion vectors for 8x8 partition
+*************************************************************************************
+*/
+void RestoreMVBlock8x8(int dir, int block8x8, RD_8x8DATA tr, int bframe)
+{
+  int i, j, i0, j0, ii, jj;
+  short ******all_mv  = img->all_mv;
+  short ******pred_mv = img->pred_mv;
+  short (*lc_l0_mv8x8)[4][2] = all_mv8x8[dir][LIST_0];
+  short (*lc_l1_mv8x8)[4][2] = all_mv8x8[dir][LIST_1];
+  short (*lc_pr_mv8x8)[4][2] = NULL;
+
+  short pdir8  = tr.part8x8pdir [block8x8];
+  short mode   = tr.part8x8mode [block8x8];
+  short ref    = tr.part8x8fwref[block8x8];
+  short bw_ref = tr.part8x8bwref[block8x8];
+
+  i0 = (block8x8 & 0x01) << 1;
+  j0 = (block8x8 >> 1) << 1;
+  ii = i0+2;
+  jj = j0+2;
+
+  if (!bframe)
+  {
+    if (pdir8>=0) //(mode8!=IBLOCK)&&(mode8!=I16MB))  // && ref != -1)
+    {
+      lc_pr_mv8x8 = pred_mv8x8[dir][LIST_0];
+      for (j=j0; j<jj; j++)
+        for (i=i0; i<ii; i++)
+        {
+          all_mv [j][i][LIST_0][ref][4][0] = lc_l0_mv8x8[j][i][0] ;
+          all_mv [j][i][LIST_0][ref][4][1] = lc_l0_mv8x8[j][i][1] ;
+          pred_mv[j][i][LIST_0][ref][4][0] = lc_pr_mv8x8[j][i][0];
+          pred_mv[j][i][LIST_0][ref][4][1] = lc_pr_mv8x8[j][i][1];
+        }
+    }
+  }
+  else
+  {
+    if (pdir8==0) // forward
+    {
+      lc_pr_mv8x8 = pred_mv8x8[dir][LIST_0];
+      for (j=j0; j<jj; j++)
+      {
+        for (i=i0; i<ii; i++)
+        {
+          all_mv [j][i][LIST_0][ref][mode][0] = lc_l0_mv8x8[j][i][0] ;
+          all_mv [j][i][LIST_0][ref][mode][1] = lc_l0_mv8x8[j][i][1] ;
+          pred_mv[j][i][LIST_0][ref][mode][0] = lc_pr_mv8x8[j][i][0];
+          pred_mv[j][i][LIST_0][ref][mode][1] = lc_pr_mv8x8[j][i][1];
+        }
+      }
+    }
+    else if (pdir8==1) // backward
+    {
+      lc_pr_mv8x8 = pred_mv8x8[dir][LIST_1];
+      for (j=j0; j<jj; j++)
+      {
+        for (i=i0; i<ii; i++)
+        {
+          all_mv [j][i][LIST_1][bw_ref][mode][0] = lc_l1_mv8x8[j][i][0] ;
+          all_mv [j][i][LIST_1][bw_ref][mode][1] = lc_l1_mv8x8[j][i][1] ;
+          pred_mv[j][i][LIST_1][bw_ref][mode][0] = lc_pr_mv8x8[j][i][0];
+          pred_mv[j][i][LIST_1][bw_ref][mode][1] = lc_pr_mv8x8[j][i][1];
+        }
+      }
+    }
+    else if (pdir8==2) // bidir
+    {
+      lc_pr_mv8x8 = pred_mv8x8[dir][LIST_0];
+      for (j=j0; j<jj; j++)
+      {
+        for (i=i0; i<ii; i++)
+        {
+          all_mv [j][i][LIST_0][ref][mode][0] = lc_l0_mv8x8[j][i][0] ;
+          all_mv [j][i][LIST_0][ref][mode][1] = lc_l0_mv8x8[j][i][1] ;
+          pred_mv[j][i][LIST_0][ref][mode][0] = lc_pr_mv8x8[j][i][0];
+          pred_mv[j][i][LIST_0][ref][mode][1] = lc_pr_mv8x8[j][i][1];
+        }
+      }
+      lc_pr_mv8x8 = pred_mv8x8[dir][LIST_1];
+      for (j=j0; j<jj; j++)
+      {
+        for (i=i0; i<ii; i++)
+        {
+          all_mv [j][i][LIST_1][bw_ref][mode][0] = lc_l1_mv8x8[j][i][0] ;
+          all_mv [j][i][LIST_1][bw_ref][mode][1] = lc_l1_mv8x8[j][i][1] ;
+          pred_mv[j][i][LIST_1][bw_ref][mode][0] = lc_pr_mv8x8[j][i][0];
+          pred_mv[j][i][LIST_1][bw_ref][mode][1] = lc_pr_mv8x8[j][i][1];
+        }
+      }
+    }
+    else
+    {
+      error("invalid direction mode", 255);
+    }
+  }
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Restore motion vectors of 8x8 partitions of one macroblock
+ *************************************************************************************
+ */
+void RestoreMV8x8(int dir)
+{
+  int block8x8;
+
+  int bframe = (img->type == B_SLICE);
+
+  for (block8x8=0; block8x8<4; block8x8++)
+    RestoreMVBlock8x8(dir, block8x8, tr8x8, bframe);
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Store predictors for 8x8 partition
+ *************************************************************************************
+ */
+
+void StoreNewMotionVectorsBlock8x8(int dir, int block8x8, int mode, int fw_ref, int bw_ref, int pdir8, int bframe)
+{
+  int i, j, i0, j0, ii, jj;
+  short ******all_mv  = img->all_mv;
+  short ******pred_mv = img->pred_mv;
+  short (*lc_l0_mv8x8)[4][2] = all_mv8x8[dir][LIST_0];
+  short (*lc_l1_mv8x8)[4][2] = all_mv8x8[dir][LIST_1];
+  short (*lc_pr_mv8x8)[4][2] = NULL;
+
+  i0 = (block8x8 & 0x01) << 1;
+  j0 = (block8x8 >> 1) << 1;
+  ii = i0+2;
+  jj = j0+2;
+
+  if (pdir8<0)
+  {
+    for (j=j0; j<jj; j++)
+    {
+      memset(&lc_l0_mv8x8[j][i0], 0, 4 * sizeof(short));
+      memset(&lc_l1_mv8x8[j][i0], 0, 4 * sizeof(short));
+    }
+    return;
+  }
+
+  if (!bframe)
+  {
+
+    lc_pr_mv8x8 = pred_mv8x8[dir][LIST_0];
+    for (j=j0; j<jj; j++)
+    {
+      for (i=i0; i<ii; i++)
+      {
+        lc_l0_mv8x8[j][i][0] = all_mv [j][i][LIST_0][fw_ref][4][0];
+        lc_l0_mv8x8[j][i][1] = all_mv [j][i][LIST_0][fw_ref][4][1];
+        lc_pr_mv8x8[j][i][0] = pred_mv[j][i][LIST_0][fw_ref][4][0];
+        lc_pr_mv8x8[j][i][1] = pred_mv[j][i][LIST_0][fw_ref][4][1];
+      }
+      memset(&lc_l1_mv8x8[j][i0], 0, 4 * sizeof(short));
+    }
+    return;
+  }
+  else
+  {
+    if ((pdir8==0 || pdir8==2))
+    {
+      lc_pr_mv8x8 = pred_mv8x8[dir][LIST_0];
+
+      for (j=j0; j<jj; j++)
+        for (i=i0; i<ii; i++)
+        {
+          lc_l0_mv8x8[j][i][0] = all_mv [j][i][LIST_0][fw_ref][mode][0];
+          lc_l0_mv8x8[j][i][1] = all_mv [j][i][LIST_0][fw_ref][mode][1];
+          lc_pr_mv8x8[j][i][0] = pred_mv[j][i][LIST_0][fw_ref][mode][0];
+          lc_pr_mv8x8[j][i][1] = pred_mv[j][i][LIST_0][fw_ref][mode][1];
+        }
+    }
+    else
+    {
+      for (j=j0; j<jj; j++)
+        memset(&lc_l0_mv8x8[j][i0], 0, 4 * sizeof(short));
+    }
+
+    if ((pdir8==1 || pdir8==2))
+    {
+      lc_pr_mv8x8 = pred_mv8x8[dir][LIST_1];
+
+      for (j=j0; j<jj; j++)
+        for (i=i0; i<ii; i++)
+        {
+          lc_l1_mv8x8[j][i][0] = all_mv [j][i][LIST_1][bw_ref][mode][0];
+          lc_l1_mv8x8[j][i][1] = all_mv [j][i][LIST_1][bw_ref][mode][1];
+          lc_pr_mv8x8[j][i][0] = pred_mv[j][i][LIST_1][bw_ref][mode][0];
+          lc_pr_mv8x8[j][i][1] = pred_mv[j][i][LIST_1][bw_ref][mode][1];
+        }
+    }
+    else
+    {
+      for (j=j0; j<jj; j++)
+        memset(&lc_l1_mv8x8[j][i0], 0, 4 * sizeof(short));
+    }
+  }
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Makes the decision if 8x8 tranform will be used (for RD-off)
+ ************************************************************************
+ */
+int GetBestTransformP8x8()
+{
+  int    block_y, block_x, pic_pix_y, pic_pix_x, i, j, k;
+  int    mb_y, mb_x, block8x8;
+  int    cost8x8=0, cost4x4=0;
+  int    *diff_ptr;
+  
+  if(input->Transform8x8Mode==2) //always allow 8x8 transform
+    return 1;
+
+  for (block8x8=0; block8x8<4; block8x8++)
+  {
+    mb_y = (block8x8 >>   1) << 3;
+    mb_x = (block8x8 & 0x01) << 3;
+    //===== loop over 4x4 blocks =====
+    k=0;
+    for (block_y=mb_y; block_y<mb_y+8; block_y+=4)
+    {
+      pic_pix_y = img->opix_y + block_y;
+
+      //get cost for transform size 4x4
+      for (block_x=mb_x; block_x<mb_x+8; block_x+=4)
+      {
+        pic_pix_x = img->opix_x + block_x;
+
+        //===== get displaced frame difference ======
+        diff_ptr=&diff4x4[k];
+        for (j=0; j<4; j++)
+        {
+          for (i=0; i<4; i++, k++)
+          {
+            //4x4 transform size
+            diff4x4[k] = imgY_org[pic_pix_y+j][pic_pix_x+i] - tr4x4.mpr8x8[j+block_y][i+block_x];
+            //8x8 transform size
+            diff8x8[k] = imgY_org[pic_pix_y+j][pic_pix_x+i] - tr8x8.mpr8x8[j+block_y][i+block_x];
+          }
+        }
+
+        cost4x4 += distortion4x4 (diff_ptr);
+      }
+    }
+    cost8x8 += distortion8x8 (diff8x8);
+  }
+  return (cost8x8 < cost4x4);
+}
+
+/*!
+************************************************************************
+* \brief
+*    Sets MBAFF RD parameters
+************************************************************************
+*/
+void set_mbaff_parameters()
+{
+  int  i, j, k;
+  Macroblock  *currMB  = &img->mb_data[img->current_mb_nr];
+  int         mode     = best_mode;
+  int         bframe   = (img->type==B_SLICE);
+  char    **ipredmodes = img->ipredmode;
+
+
+  //===== reconstruction values =====
+  for (j=0; j < MB_BLOCK_SIZE; j++)
+    memcpy(rdopt->rec_mbY[j],&enc_picture->imgY[img->pix_y + j][img->pix_x], MB_BLOCK_SIZE * sizeof(imgpel));
+
+  if (img->yuv_format != YUV400)
+  {
+    for (j=0; j<img->mb_cr_size_y; j++)
+    {
+      memcpy(rdopt->rec_mbU[j],&enc_picture->imgUV[0][img->pix_c_y + j][img->pix_c_x], img->mb_cr_size_x * sizeof(imgpel));
+      memcpy(rdopt->rec_mbV[j],&enc_picture->imgUV[1][img->pix_c_y + j][img->pix_c_x], img->mb_cr_size_x * sizeof(imgpel));
+    }
+  }
+
+  //===== coefficients and cbp =====
+  rdopt->mode      = mode;
+  rdopt->i16offset = img->i16offset;
+  rdopt->cbp       = currMB->cbp;
+  rdopt->cbp_blk   = currMB->cbp_blk;
+  rdopt->mb_type   = currMB->mb_type;
+
+  rdopt->luma_transform_size_8x8_flag = currMB->luma_transform_size_8x8_flag;
+
+  if(rdopt->mb_type == 0 && mode != 0)
+  {
+    mode=0;
+    rdopt->mode=0;
+  }
+
+  for(i=0;i<4+img->num_blk8x8_uv;i++)
+  {
+    for(j=0;j<4;j++)
+      for(k=0;k<2;k++)
+        memcpy(rdopt->cofAC[i][j][k], img->cofAC[i][j][k], 65 * sizeof(int));
+  }
+
+  for(i=0;i<3;i++)
+  {
+    for(k=0;k<2;k++)
+        memcpy(rdopt->cofDC[i][k], img->cofDC[i][k], 18 * sizeof(int));
+  }
+
+  memcpy(rdopt->b8mode,currMB->b8mode, BLOCK_MULTIPLE * sizeof(int));
+  memcpy(rdopt->b8pdir,currMB->b8pdir, BLOCK_MULTIPLE * sizeof(int));
+
+  //==== reference frames =====
+  if (bframe)
+  {
+    for (j = 0; j < BLOCK_MULTIPLE; j++)
+    {
+      memcpy(rdopt->refar[LIST_0][j],&enc_picture->ref_idx[LIST_0][img->block_y + j][img->block_x] , BLOCK_MULTIPLE * sizeof(char));
+      memcpy(rdopt->refar[LIST_1][j],&enc_picture->ref_idx[LIST_1][img->block_y + j][img->block_x] , BLOCK_MULTIPLE * sizeof(char));
+    }
+    rdopt->bi_pred_me = currMB->bi_pred_me;
+  }
+  else
+  {
+    for (j = 0; j < BLOCK_MULTIPLE; j++)
+      memcpy(rdopt->refar[LIST_0][j],&enc_picture->ref_idx[LIST_0][img->block_y + j][img->block_x] , BLOCK_MULTIPLE * sizeof(char));
+  }
+
+  memcpy(rdopt->intra_pred_modes,currMB->intra_pred_modes, MB_BLOCK_PARTITIONS * sizeof(char));
+  memcpy(rdopt->intra_pred_modes8x8,currMB->intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));
+  for (j = img->block_y; j < img->block_y + 4; j++)
+  {
+    memcpy(&rdopt->ipredmode[j][img->block_x],&ipredmodes[j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+  }
+}
+
+/*!
+************************************************************************
+* \brief
+*    store coding state (for rd-optimized mode decision), used for 8x8 transformation
+************************************************************************
+*/
+void store_coding_state_cs_cm()
+{
+  store_coding_state(cs_cm);
+}
+
+/*!
+************************************************************************
+* \brief
+*    restore coding state (for rd-optimized mode decision), used for 8x8 transformation
+************************************************************************
+*/
+void reset_coding_state_cs_cm()
+{
+  reset_coding_state(cs_cm);
+}
+
+/*!
+************************************************************************
+* \brief
+*    update rounding offsets based on JVT-N011
+************************************************************************
+*/
+void update_offset_params(int mode, int luma_transform_size_8x8_flag)
+{
+  int is_inter = (mode != I4MB)&&(mode != I16MB) && (mode != I8MB);
+  int luma_pos = AdaptRndPos[(is_inter<<1) + luma_transform_size_8x8_flag][img->type];
+  int i,j;
+  int temp = 0;
+  int offsetRange = 1 << (OffsetBits - 1);
+  int blk_mask = 0x03 + (luma_transform_size_8x8_flag<<2);
+  int blk_shift = 2 + luma_transform_size_8x8_flag;
+  short **offsetList = luma_transform_size_8x8_flag ? OffsetList8x8 : OffsetList4x4;
+
+  int **fAdjust = is_inter
+    ? (luma_transform_size_8x8_flag ? bestInterFAdjust8x8 : bestInterFAdjust4x4)
+    : (luma_transform_size_8x8_flag ? bestIntraFAdjust8x8 : bestIntraFAdjust4x4);
+
+  for (j=0; j < MB_BLOCK_SIZE; j++)
+  {
+    int j_pos = ((j & blk_mask)<<blk_shift);
+    for (i=0; i < MB_BLOCK_SIZE; i++)
+    {
+      temp = j_pos + (i & blk_mask);
+      offsetList[luma_pos][temp] += fAdjust[j][i];
+      offsetList[luma_pos][temp] = iClip3(0,offsetRange,offsetList[luma_pos][temp]);
+    }
+  }
+
+  if (input->AdaptRndChroma)
+  {
+    int u_pos = AdaptRndCrPos[is_inter][img->type];
+    int v_pos = u_pos + 1;
+    int jpos;
+
+    int ***fAdjustCr = is_inter ? bestInterFAdjust4x4Cr : bestIntraFAdjust4x4Cr;
+
+    for (j=0; j < img->mb_cr_size_y; j++)
+    {
+      jpos = ((j & 0x03)<<2);
+      for (i=0; i < img->mb_cr_size_x; i++)
+      {
+        temp = jpos + (i & 0x03);
+        OffsetList4x4[u_pos][temp] += fAdjustCr[0][j][i];
+        OffsetList4x4[u_pos][temp] = iClip3(0,offsetRange,OffsetList4x4[u_pos][temp]);
+        OffsetList4x4[v_pos][temp] += fAdjustCr[1][j][i];
+        OffsetList4x4[v_pos][temp] = iClip3(0,offsetRange,OffsetList4x4[v_pos][temp]);
+      }
+    }
+  }
+}
+
+void assign_enc_picture_params(int mode, char best_pdir, int block, int list_offset, int best_fw_ref, int best_bw_ref, int bframe)
+{
+  int i,j;
+  int block_x, block_y;
+  short *cur_mv;
+
+  if (mode==1)
+  {
+    if (best_pdir==1)
+    {
+      for (j=img->block_y+(block&2); j<img->block_y+(block&2) + BLOCK_MULTIPLE; j++)
+      {
+        block_x = img->block_x+(block&1)*2;
+
+        memset(&enc_picture->ref_idx[LIST_0][j][block_x], -1 ,     BLOCK_MULTIPLE * sizeof(char));
+        memset(enc_picture->mv      [LIST_0][j][block_x],  0 , 2 * BLOCK_MULTIPLE * sizeof(short));
+        for (i=block_x; i<block_x + BLOCK_MULTIPLE; i++)
+        {
+          enc_picture->ref_pic_id [LIST_0][j][i]    = -1;
+        }
+      }
+    }
+    else if (img->bi_pred_me[mode])
+    {
+      for (j=0; j<BLOCK_MULTIPLE; j++)
+      {
+        block_y = img->block_y+(block&2)+j;
+        block_x = img->block_x+(block&1)*2;
+        memset(&enc_picture->ref_idx[LIST_0][block_y][block_x], 0, BLOCK_MULTIPLE * sizeof(char));
+        for (i=0; i<BLOCK_MULTIPLE; i++)
+        {
+          cur_mv = img->bi_pred_me[mode] == 1
+            ? img->bipred_mv1[i][j][LIST_0][0][mode]
+            : img->bipred_mv2[i][j][LIST_0][0][mode];
+
+          enc_picture->ref_pic_id [LIST_0][block_y][block_x + i]    = enc_picture->ref_pic_num[LIST_0 + list_offset][0];
+          enc_picture->mv         [LIST_0][block_y][block_x + i][0] = cur_mv[0];
+          enc_picture->mv         [LIST_0][block_y][block_x + i][1] = cur_mv[1];
+        }
+      }
+    }
+    else
+    {
+      for (j=0; j<BLOCK_MULTIPLE; j++)
+      {
+        block_y = img->block_y+(block&2)+j;
+        block_x = img->block_x+(block&1)*2;
+        memset(&enc_picture->ref_idx[LIST_0][block_y][block_x], best_fw_ref , BLOCK_MULTIPLE * sizeof(char));
+        for (i=0; i<BLOCK_MULTIPLE; i++)
+        {
+          cur_mv = img->all_mv[j][i][LIST_0][best_fw_ref][mode];
+
+          enc_picture->ref_pic_id [LIST_0][block_y][block_x + i]    = enc_picture->ref_pic_num[LIST_0 + list_offset][best_fw_ref];
+          enc_picture->mv         [LIST_0][block_y][block_x + i][0] = cur_mv[0];
+          enc_picture->mv         [LIST_0][block_y][block_x + i][1] = cur_mv[1];
+        }
+      }
+    }
+
+    if (bframe)
+    {
+      if (best_pdir==0)
+      {
+        for (j=img->block_y+(block&2); j<img->block_y+(block&2) + BLOCK_MULTIPLE; j++)
+        {
+          block_x = img->block_x+(block&1)*2;
+          memset(&enc_picture->ref_idx[LIST_1][j][block_x], -1 , BLOCK_MULTIPLE * sizeof(char));
+          memset(enc_picture->mv[LIST_1][j][block_x], 0 , 2 * BLOCK_MULTIPLE * sizeof(short));
+          for (i=block_x; i<block_x + BLOCK_MULTIPLE; i++)
+          {
+            enc_picture->ref_pic_id [LIST_1][j][i] = -1;
+          }
+        }
+      }
+      else
+      {
+        if (img->bi_pred_me[mode])
+        {
+          for (j=0; j<BLOCK_MULTIPLE; j++)
+          {
+            block_y = img->block_y+(block&2)+j;
+            block_x = img->block_x+(block&1)*2;
+            memset(&enc_picture->ref_idx[LIST_1][block_y][block_x], 0, BLOCK_MULTIPLE * sizeof(char));
+            for (i=0; i<BLOCK_MULTIPLE; i++)
+            {
+              cur_mv = img->bi_pred_me[mode] == 1
+                ? img->bipred_mv1[i][j][LIST_1][0][mode]
+                : img->bipred_mv2[i][j][LIST_1][0][mode];
+
+              enc_picture->ref_pic_id [LIST_1][block_y][block_x + i] =
+                enc_picture->ref_pic_num[LIST_1 + list_offset][0];
+              enc_picture->mv         [LIST_1][block_y][block_x + i][0] = cur_mv[0];
+              enc_picture->mv         [LIST_1][block_y][block_x + i][1] = cur_mv[1];
+            }
+          }
+        }
+        else
+        {
+          for (j=0; j<BLOCK_MULTIPLE; j++)
+          {
+            block_y = img->block_y+(block&2)+j;
+            block_x = img->block_x+(block&1)*2;
+            memset(&enc_picture->ref_idx[LIST_1][block_y][block_x], best_bw_ref, BLOCK_MULTIPLE * sizeof(char));
+            for (i=0; i<BLOCK_MULTIPLE; i++)
+            {
+
+              enc_picture->ref_pic_id [LIST_1][block_y][block_x + i] =
+                enc_picture->ref_pic_num[LIST_1 + list_offset][best_bw_ref];
+              if(best_bw_ref>=0)
+              {
+                cur_mv = img->all_mv[j][i][LIST_1][best_bw_ref][mode];
+                enc_picture->mv[LIST_1][block_y][block_x + i][0] = cur_mv[0];
+                enc_picture->mv[LIST_1][block_y][block_x + i][1] = cur_mv[1];
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  else if (mode==2)
+  {
+    for (j=0; j<2; j++)
+    {
+      block_y = img->block_y + block * 2 + j;
+      for (i=0; i<BLOCK_MULTIPLE; i++)
+      {
+        block_x = img->block_x + i;
+        if (best_pdir==1)
+        {
+          enc_picture->ref_idx    [LIST_0][block_y][block_x]    = -1;
+          enc_picture->ref_pic_id [LIST_0][block_y][block_x]    = -1;
+          enc_picture->mv         [LIST_0][block_y][block_x][0] = 0;
+          enc_picture->mv         [LIST_0][block_y][block_x][1] = 0;
+        }
+        else
+        {
+          cur_mv = img->all_mv[j+block*2][i][LIST_0][best_fw_ref][mode];
+
+          enc_picture->ref_idx    [LIST_0][block_y][block_x]    = best_fw_ref;
+          enc_picture->ref_pic_id [LIST_0][block_y][block_x]    =
+            enc_picture->ref_pic_num[LIST_0 + list_offset][best_fw_ref];
+          enc_picture->mv         [LIST_0][block_y][block_x][0] = cur_mv[0];
+          enc_picture->mv         [LIST_0][block_y][block_x][1] = cur_mv[1];
+        }
+
+        if (bframe)
+        {
+          if (best_pdir==0)
+          {
+            enc_picture->ref_idx    [LIST_1][block_y][block_x]    = -1;
+            enc_picture->ref_pic_id [LIST_1][block_y][block_x]    = -1;
+            enc_picture->mv         [LIST_1][block_y][block_x][0] = 0;
+            enc_picture->mv         [LIST_1][block_y][block_x][1] = 0;
+          }
+          else
+          {
+            enc_picture->ref_idx[LIST_1][block_y][block_x] = best_bw_ref;
+            if(best_bw_ref>=0)
+            {
+              cur_mv = img->all_mv[j+ block*2][i][LIST_1][best_bw_ref][mode];
+
+              enc_picture->ref_pic_id [LIST_1][block_y][block_x] =
+                enc_picture->ref_pic_num[LIST_1 + list_offset][best_bw_ref];
+              enc_picture->mv[LIST_1][block_y][block_x][0] = cur_mv[0];
+              enc_picture->mv[LIST_1][block_y][block_x][1] = cur_mv[1];
+            }
+          }
+        }
+      }
+    }
+  }
+  else
+  {
+    for (j=0; j<BLOCK_MULTIPLE; j++)
+    {
+      block_y = img->block_y+j;
+      for (i=0; i<2; i++)
+      {
+        block_x = img->block_x + block*2 + i;
+        if (best_pdir==1)
+        {
+          enc_picture->ref_idx    [LIST_0][block_y][block_x]    = -1;
+          enc_picture->ref_pic_id [LIST_0][block_y][block_x]    = -1;
+          enc_picture->mv         [LIST_0][block_y][block_x][0] = 0;
+          enc_picture->mv         [LIST_0][block_y][block_x][1] = 0;
+        }
+        else
+        {
+          cur_mv = img->all_mv[j][block*2+i][LIST_0][best_fw_ref][mode];
+
+          enc_picture->ref_idx    [LIST_0][block_y][block_x] = best_fw_ref;
+          enc_picture->ref_pic_id [LIST_0][block_y][block_x] =
+            enc_picture->ref_pic_num[LIST_0 + list_offset][best_fw_ref];
+          enc_picture->mv[LIST_0][block_y][block_x][0] = cur_mv[0];
+          enc_picture->mv[LIST_0][block_y][block_x][1] = cur_mv[1];
+        }
+
+        if (bframe)
+        {
+          if (best_pdir==0)
+          {
+            enc_picture->ref_idx    [LIST_1][block_y][block_x]    = -1;
+            enc_picture->ref_pic_id [LIST_1][block_y][block_x]    = -1;
+            enc_picture->mv         [LIST_1][block_y][block_x][0] = 0;
+            enc_picture->mv         [LIST_1][block_y][block_x][1] = 0;
+          }
+          else
+          {
+            enc_picture->ref_idx[LIST_1][block_y][block_x] = best_bw_ref;
+            if(best_bw_ref>=0)
+            {
+              cur_mv = img->all_mv[j][block*2+i][LIST_1][best_bw_ref][mode];
+              enc_picture->ref_pic_id [LIST_1][block_y][block_x] =
+                enc_picture->ref_pic_num[LIST_1 + list_offset][best_bw_ref];
+
+              enc_picture->mv[LIST_1][block_y][block_x][0] = cur_mv[0];
+              enc_picture->mv[LIST_1][block_y][block_x][1] = cur_mv[1];
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+void update_refresh_map(int intra, int intra1, Macroblock *currMB)
+{
+  if (input->RestrictRef==1)
+  {
+    // Modified for Fast Mode Decision. Inchoon Choi, SungKyunKwan Univ.
+    if (input->rdopt<2)
+    {
+      refresh_map[2*img->mb_y  ][2*img->mb_x  ] = (intra ? 1 : 0);
+      refresh_map[2*img->mb_y  ][2*img->mb_x+1] = (intra ? 1 : 0);
+      refresh_map[2*img->mb_y+1][2*img->mb_x  ] = (intra ? 1 : 0);
+      refresh_map[2*img->mb_y+1][2*img->mb_x+1] = (intra ? 1 : 0);
+    }
+    else if (input->rdopt==3)
+    {
+      refresh_map[2*img->mb_y  ][2*img->mb_x  ] = (intra1==0 && (currMB->mb_type==I16MB || currMB->mb_type==I4MB) ? 1 : 0);
+      refresh_map[2*img->mb_y  ][2*img->mb_x+1] = (intra1==0 && (currMB->mb_type==I16MB || currMB->mb_type==I4MB) ? 1 : 0);
+      refresh_map[2*img->mb_y+1][2*img->mb_x  ] = (intra1==0 && (currMB->mb_type==I16MB || currMB->mb_type==I4MB) ? 1 : 0);
+      refresh_map[2*img->mb_y+1][2*img->mb_x+1] = (intra1==0 && (currMB->mb_type==I16MB || currMB->mb_type==I4MB) ? 1 : 0);
+    }
+  }
+  else if (input->RestrictRef==2)
+  {
+    refresh_map[2*img->mb_y  ][2*img->mb_x  ] = (currMB->mb_type==I16MB || currMB->mb_type==I4MB ? 1 : 0);
+    refresh_map[2*img->mb_y  ][2*img->mb_x+1] = (currMB->mb_type==I16MB || currMB->mb_type==I4MB ? 1 : 0);
+    refresh_map[2*img->mb_y+1][2*img->mb_x  ] = (currMB->mb_type==I16MB || currMB->mb_type==I4MB ? 1 : 0);
+    refresh_map[2*img->mb_y+1][2*img->mb_x+1] = (currMB->mb_type==I16MB || currMB->mb_type==I4MB ? 1 : 0);
+  }
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/rdopt_coding_state.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/rdopt_coding_state.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/rdopt_coding_state.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,195 @@
+
+/*!
+ ***************************************************************************
+ * \file rdopt_coding_state.c
+ *
+ * \brief
+ *    Storing/restoring coding state for
+ *    Rate-Distortion optimized mode decision
+ *
+ * \author
+ *    Heiko Schwarz
+ *
+ * \date
+ *    17. April 2001
+ **************************************************************************/
+
+#include <stdlib.h>
+#include <memory.h>
+
+#include "global.h"
+
+#include "rdopt_coding_state.h"
+#include "cabac.h"
+
+/*!
+ ************************************************************************
+ * \brief
+ *    delete structure for storing coding state
+ ************************************************************************
+ */
+void
+delete_coding_state (CSptr cs)
+{
+  if (cs != NULL)
+  {
+    //=== structures of data partition array ===
+    if (cs->encenv    != NULL)   free (cs->encenv);
+    if (cs->bitstream != NULL)   free (cs->bitstream);
+
+    //=== contexts for binary arithmetic coding ===
+    delete_contexts_MotionInfo  (cs->mot_ctx);
+    delete_contexts_TextureInfo (cs->tex_ctx);
+
+    //=== coding state structure ===
+    free (cs);
+    cs=NULL;
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    create structure for storing coding state
+ ************************************************************************
+ */
+CSptr
+create_coding_state ()
+{
+  CSptr cs;
+
+  //=== coding state structure ===
+  if ((cs = (CSptr) calloc (1, sizeof(CSobj))) == NULL)
+    no_mem_exit("init_coding_state: cs");
+
+  //=== important variables of data partition array ===
+  cs->no_part = input->partition_mode==0?1:3;
+  if (input->symbol_mode == CABAC)
+  {
+    if ((cs->encenv = (EncodingEnvironment*) calloc (cs->no_part, sizeof(EncodingEnvironment))) == NULL)
+      no_mem_exit("init_coding_state: cs->encenv");
+  }
+  else
+  {
+    cs->encenv = NULL;
+  }
+  if ((cs->bitstream = (Bitstream*) calloc (cs->no_part, sizeof(Bitstream))) == NULL)
+    no_mem_exit("init_coding_state: cs->bitstream");
+
+  //=== context for binary arithmetic coding ===
+  cs->symbol_mode = input->symbol_mode;
+  if (cs->symbol_mode == CABAC)
+  {
+    cs->mot_ctx = create_contexts_MotionInfo ();
+    cs->tex_ctx = create_contexts_TextureInfo();
+  }
+  else
+  {
+    cs->mot_ctx = NULL;
+    cs->tex_ctx = NULL;
+  }
+
+  return cs;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    store coding state (for rd-optimized mode decision)
+ ************************************************************************
+ */
+void
+store_coding_state (CSptr cs)
+{
+  int  i;
+  int  i_last = img->currentPicture->idr_flag? 1:cs->no_part;
+  Slice *currSlice = img->currentSlice;
+
+  Macroblock *currMB  = &(img->mb_data [img->current_mb_nr]);
+
+
+  if (!input->rdopt)  return;
+
+  if (cs->symbol_mode==CABAC)
+  {
+    //=== important variables of data partition array ===
+    //only one partition for IDR img
+    for (i = 0; i < i_last; i++)
+    {
+      cs->encenv[i] = currSlice->partArr[i].ee_cabac;;
+      cs->bitstream[i] = *currSlice->partArr[i].bitstream;;
+    }
+
+    //=== contexts for binary arithmetic coding ===
+    *cs->mot_ctx = *currSlice->mot_ctx;
+    *cs->tex_ctx = *currSlice->tex_ctx;
+  }
+  else
+  {
+    //=== important variables of data partition array ===
+    for (i = 0; i < i_last; i++)
+    {
+      cs->bitstream[i] = *currSlice->partArr[i].bitstream;;
+    }
+  }
+  //=== syntax element number and bitcounters ===
+  memcpy (cs->bitcounter, currMB->bitcounter, MAX_BITCOUNTER_MB * sizeof(int));
+
+  //=== elements of current macroblock ===
+  memcpy (cs->mvd, currMB->mvd, BLOCK_CONTEXT * sizeof(int));
+  cs->cbp_bits = currMB->cbp_bits;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    restore coding state (for rd-optimized mode decision)
+ ************************************************************************
+ */
+void reset_coding_state (CSptr cs)
+{
+  int  i;
+  int  i_last = img->currentPicture->idr_flag? 1:cs->no_part;
+  Slice *currSlice = img->currentSlice;
+
+  Macroblock *currMB  = &(img->mb_data [img->current_mb_nr]);
+
+  if (!input->rdopt)  return;
+
+  if (cs->symbol_mode==CABAC)
+  {
+    //=== important variables of data partition array ===
+    //only one partition for IDR img
+    for (i = 0; i < i_last; i++)
+    {
+      //--- parameters of encoding environments ---
+      currSlice->partArr[i].ee_cabac = cs->encenv[i];
+      *currSlice->partArr[i].bitstream = cs->bitstream[i];
+    }
+
+    //=== contexts for binary arithmetic coding ===
+    *currSlice->mot_ctx = *cs->mot_ctx;
+    *currSlice->tex_ctx = *cs->tex_ctx;
+  }
+  else
+  {
+    //=== important variables of data partition array ===
+    //only one partition for IDR img
+    for (i = 0; i < i_last; i++)
+    {
+      //--- parameters of encoding environments ---
+      *currSlice->partArr[i].bitstream = cs->bitstream[i];
+    }
+  }
+
+  //=== syntax element number and bit counters ===
+  memcpy (currMB->bitcounter, cs->bitcounter, MAX_BITCOUNTER_MB * sizeof(int));
+
+  //=== elements of current macroblock ===
+  memcpy (currMB->mvd, cs->mvd, BLOCK_CONTEXT * sizeof(int));
+  currMB->cbp_bits = cs->cbp_bits;
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/rdopt_coding_state.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/rdopt_coding_state.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/rdopt_coding_state.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,52 @@
+
+/*!
+ ***************************************************************************
+ * \file
+ *    rdopt_coding_state.h
+ *
+ * \author
+ *    Heiko Schwarz
+ *
+ * \date
+ *    17. April 2001
+ *
+ * \brief
+ *    Headerfile for storing/restoring coding state
+ *    (for rd-optimized mode decision)
+ **************************************************************************
+ */
+
+#ifndef _RD_OPT_CS_H_
+#define _RD_OPT_CS_H_
+
+typedef struct {
+
+  // important variables of data partition array
+  int                   no_part;
+  EncodingEnvironment  *encenv;
+  Bitstream            *bitstream;
+
+  // contexts for binary arithmetic coding
+  int                   symbol_mode;
+  MotionInfoContexts   *mot_ctx;
+  TextureInfoContexts  *tex_ctx;
+
+  // bit counter
+  int                   bitcounter[MAX_BITCOUNTER_MB];
+
+  // elements of current macroblock
+  int                   mvd[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE][2];
+  int64                 cbp_bits;
+} CSobj;
+typedef CSobj* CSptr;
+
+
+void  delete_coding_state  (CSptr);  //!< delete structure
+CSptr create_coding_state  (void);       //!< create structure
+
+void  store_coding_state   (CSptr);  //!< store parameters
+void  reset_coding_state   (CSptr);  //!< restore parameters
+
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/rdpicdecision.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/rdpicdecision.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/rdpicdecision.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,64 @@
+
+/*!
+*************************************************************************************
+* \file rdpicdecision.c
+*
+* \brief
+*    Perform RD optimal decisions between multiple coded versions of the same picture
+*
+* \author
+*    Main contributors (see contributors.h for copyright, address and affiliation details)
+*     - Alexis Michael Tourapis         <alexismt at ieee.org>
+*************************************************************************************
+*/
+
+#include "global.h"
+#include <math.h>
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    RD decision between possible encoding cases
+ ************************************************************************
+ */
+int rd_pic_decision(double snrY_version1, double snrY_version2, int bits_version1, int bits_version2, double lambda_picture)
+{
+  double cost_version1, cost_version2;
+
+  cost_version1 = (double) bits_version1 * lambda_picture + snrY_version1;
+  cost_version2 = (double) bits_version2 * lambda_picture + snrY_version2;
+  //printf("%d %d %.2f %.2f %.2f %.2f \n",bits_version1,bits_version2,snrY_version1,snrY_version2,cost_version1,cost_version2);
+  if (cost_version2 > cost_version1 || (cost_version2 == cost_version1 && snrY_version2 >= snrY_version1) )
+    return (0);
+  else
+    return (1);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Picture Coding Decision
+ ************************************************************************
+ */
+int picture_coding_decision (Picture *picture1, Picture *picture2, int qp)
+{
+  double lambda_picture;
+  int spframe = (img->type == SP_SLICE);
+  int bframe = (img->type == B_SLICE);
+  double snr_picture1, snr_picture2;
+  int bit_picture1, bit_picture2;
+
+  if (input->successive_Bframe)
+    lambda_picture = 0.68 * pow (2, (qp - SHIFT_QP) / 3.0) * (bframe || spframe ? 2 : 1);
+  else
+    lambda_picture = 0.68 * pow (2, (qp - SHIFT_QP) / 3.0);
+
+  snr_picture1 = picture1->distortion_y + picture1->distortion_u + picture1->distortion_v;
+  snr_picture2 = picture2->distortion_y + picture2->distortion_u + picture2->distortion_v;
+  bit_picture2 = picture2->bits_per_picture ;
+  bit_picture1 = picture1->bits_per_picture;
+
+  return rd_pic_decision(snr_picture1, snr_picture2, bit_picture1, bit_picture2, lambda_picture);
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/refbuf.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/refbuf.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/refbuf.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,75 @@
+
+/*!
+ ************************************************************************
+ * \file refbuf.c
+ *
+ * \brief
+ *    Declarations of teh reference frame buffer types and functions
+ ************************************************************************
+ */
+
+#include <stdlib.h>
+#include <memory.h>
+#include "global.h"
+
+#include "refbuf.h"
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Yields a pel line _pointer_ from one of the 16 sub-images
+ *    Input does not require subpixel image indices
+ ************************************************************************
+ */
+imgpel *FastLine4X (imgpel ****Pic, int y, int x)
+{
+  return &(Pic[(y & 0x03)][(x & 0x03)][y >> 2][x >> 2]);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Yields a pel line _pointer_ from one of the 16 sub-images
+ *    Input does not require subpixel image indices
+ ************************************************************************
+ */
+imgpel *UMVLine4X (imgpel ****Pic, int y, int x)
+{
+  int xpos = iClip3( 0, width_pad , x >> 2);
+  int ypos = iClip3( 0, height_pad, y >> 2);
+
+  return &(Pic[(y & 0x03)][(x & 0x03)][ypos][xpos]);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Yields a pel line _pointer_ from one of the 16 (4:4:4), 32 (4:2:2),
+ *    or 64 (4:2:0) sub-images
+ *    Input does not require subpixel image indices
+ ************************************************************************
+ */
+imgpel *UMVLine8X_chroma (imgpel ****Pic, int y, int x)
+{
+  int xpos  = iClip3 (0, width_pad_cr , x >> chroma_shift_x);
+  int ypos  = iClip3 (0, height_pad_cr, y >> chroma_shift_y);
+
+  return &(Pic[y & chroma_mask_mv_y][x & chroma_mask_mv_x][ypos][xpos]);
+}
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Yields a pel line _pointer_ from one of the 16 (4:4:4), 32 (4:2:2),
+ *    or 64 (4:2:0) sub-images
+ *    Input does not require subpixel image indices
+ ************************************************************************
+ */
+imgpel *FastLine8X_chroma (imgpel ****Pic, int y, int x)
+{
+  return &(Pic[y & chroma_mask_mv_y][x & chroma_mask_mv_x][y >> chroma_shift_y][x >> chroma_shift_x]);
+}
+
+
+


Index: llvm-test/MultiSource/Applications/JM/lencod/refbuf.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/refbuf.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/refbuf.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,23 @@
+
+/*!
+ ************************************************************************
+ * \file refbuf.h
+ *
+ * \brief
+ *    Declarations of the reference frame buffer types and functions
+ ************************************************************************
+ */
+#ifndef _REBUF_H_
+#define _REBUF_H_
+
+// global sub-pel image access variables
+int height_pad, width_pad;
+int height_pad_cr, width_pad_cr;
+
+imgpel *UMVLine4X (imgpel ****Pic, int y, int x);
+imgpel *FastLine4X(imgpel ****Pic, int y, int x);
+imgpel *UMVLine8X_chroma (imgpel ****Pic, int y, int x);
+imgpel *FastLine8X_chroma(imgpel ****Pic, int y, int x);
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/rtp.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/rtp.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/rtp.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,629 @@
+
+/*!
+ *****************************************************************************
+ *
+ * \file rtp.c
+ *
+ * \brief
+ *    Functions to handle RTP headers and packets per RFC1889 and RTP NAL spec
+ *    Functions support little endian systems only (Intel, not Motorola/Sparc)
+ *
+ * \date
+ *    30 September 2001
+ *
+ * \author
+ *    Stephan Wenger   stewe at cs.tu-berlin.de
+ *****************************************************************************/
+
+#include <stdlib.h>
+#include <assert.h>
+#include <memory.h>
+
+#include "global.h"
+
+#include "rtp.h"
+
+#ifdef WIN32
+#include <Winsock2.h>
+#else
+#include <netinet/in.h>
+#endif
+
+// A little trick to avoid those horrible #if TRACE all over the source code
+#if TRACE
+#define SYMTRACESTRING(s) strncpy(sym.tracestring,s,TRACESTRING_SIZE)
+#else
+#define SYMTRACESTRING(s) // to nothing
+#endif
+
+
+int CurrentRTPTimestamp = 0;      //! The RTP timestamp of the current packet,
+                                  //! incremented with all P and I frames
+int CurrentRTPSequenceNumber = 0; //! The RTP sequence number of the current packet
+                                  //! incremented by one for each sent packet
+
+FILE *f;
+/*!
+ *****************************************************************************
+ *
+ * \brief
+ *    ComposeRTPpacket composes the complete RTP packet using the various
+ *    structure members of the RTPpacket_t structure
+ *
+ * \return
+ *    0 in case of success
+ *    negative error code in case of failure
+ *
+ * \par Parameters
+ *    Caller is responsible to allocate enough memory for the generated packet
+ *    in parameter->packet. Typically a malloc of 12+paylen bytes is sufficient
+ *
+ * \par Side effects
+ *    none
+ *
+ * \note
+ *    Function contains assert() tests for debug purposes (consistency checks
+ *    for RTP header fields
+ *
+ * \date
+ *    30 Spetember 2001
+ *
+ * \author
+ *    Stephan Wenger   stewe at cs.tu-berlin.de
+ *****************************************************************************/
+
+
+int ComposeRTPPacket (RTPpacket_t *p)
+
+{
+  unsigned int temp32;
+  unsigned short temp16;
+
+  // Consistency checks through assert, only used for debug purposes
+  assert (p->v == 2);
+  assert (p->p == 0);
+  assert (p->x == 0);
+  assert (p->cc == 0);    // mixer designers need to change this one
+  assert (p->m == 0 || p->m == 1);
+  assert (p->pt < 128);
+  assert (p->seq < 65536);
+  assert (p->payload != NULL);
+  assert (p->paylen < 65536 - 40);  // 2**16 -40 for IP/UDP/RTP header
+  assert (p->packet != NULL);
+
+  // Compose RTP header, little endian
+
+  p->packet[0] = (byte)
+    ( ((p->v  & 0x03) << 6)
+    | ((p->p  & 0x01) << 5)
+    | ((p->x  & 0x01) << 4)
+    | ((p->cc & 0x0F) << 0) );
+
+  p->packet[1] = (byte)
+    ( ((p->m  & 0x01) << 7)
+    | ((p->pt & 0x7F) << 0) );
+
+  // sequence number, msb first
+  temp16 = htons((unsigned short)p->seq);
+  memcpy (&p->packet[2], &temp16, 2);  // change to shifts for unified byte sex
+
+  //declare a temporary variable to perform network byte order converson
+  temp32 = htonl(p->timestamp);
+  memcpy (&p->packet[4], &temp32, 4);  // change to shifts for unified byte sex
+
+  temp32 = htonl(p->ssrc);
+  memcpy (&p->packet[8], &temp32, 4);// change to shifts for unified byte sex
+
+  // Copy payload
+
+  memcpy (&p->packet[12], p->payload, p->paylen);
+  p->packlen = p->paylen+12;
+  return 0;
+}
+
+
+
+/*!
+ *****************************************************************************
+ *
+ * \brief
+ *    WriteRTPPacket writes the supplied RTP packet to the output file
+ *
+ * \return
+ *    0 in case of access
+ *    <0 in case of write failure (typically fatal)
+ *
+ * \param p
+ *    the RTP packet to be written (after ComposeRTPPacket() )
+ * \param f
+ *    output file
+ *
+ * \date
+ *    October 23, 2001
+ *
+ * \author
+ *    Stephan Wenger   stewe at cs.tu-berlin.de
+ *****************************************************************************/
+
+int WriteRTPPacket (RTPpacket_t *p, FILE *f)
+
+{
+  int intime = -1;
+
+  assert (f != NULL);
+  assert (p != NULL);
+
+
+  if (1 != fwrite (&p->packlen, 4, 1, f))
+    return -1;
+  if (1 != fwrite (&intime, 4, 1, f))
+    return -1;
+  if (1 != fwrite (p->packet, p->packlen, 1, f))
+    return -1;
+  return 0;
+}
+
+
+
+
+
+/*!
+ *****************************************************************************
+ *
+ * \brief
+ *    int RTPWriteNALU write a NALU to the RTP file
+ *
+ * \return
+ *    Number of bytes written to output file
+ *
+ * \par Side effects
+ *    Packet written, RTPSequenceNumber and RTPTimestamp updated
+ *
+ * \date
+ *    December 13, 2002
+ *
+ * \author
+ *    Stephan Wenger   stewe at cs.tu-berlin.de
+ *****************************************************************************/
+
+
+int WriteRTPNALU (NALU_t *n)
+{
+  RTPpacket_t *p;
+
+  assert (f != NULL);
+  assert (n != NULL);
+  assert (n->len < 65000);
+
+  n->buf[0] = (byte)
+    (n->forbidden_bit << 7      |
+     n->nal_reference_idc << 5  |
+     n->nal_unit_type );
+
+  // Set RTP structure elements and alloca() memory foor the buffers
+  if ((p = (RTPpacket_t *) malloc (sizeof (RTPpacket_t))) == NULL)
+    no_mem_exit ("RTPWriteNALU-1");
+  if ((p->packet = malloc (MAXRTPPACKETSIZE)) == NULL)
+    no_mem_exit ("RTPWriteNALU-2");
+  if ((p->payload = malloc (MAXRTPPACKETSIZE)) == NULL)
+    no_mem_exit ("RTPWriteNALU-3");
+
+  p->v=2;
+  p->p=0;
+  p->x=0;
+  p->cc=0;
+  p->m=(n->startcodeprefix_len==4)&1;     // a long startcode of Annex B sets marker bit of RTP
+                                          // Not exactly according to the RTP paylaod spec, but
+                                          // good enough for now (hopefully).
+                                          //! For error resilience work, we need the correct
+                                          //! marker bit.  Introduce a nalu->marker and set it in
+                                          //! terminate_slice()?
+  p->pt=H264PAYLOADTYPE;
+  p->seq=CurrentRTPSequenceNumber++;
+  p->timestamp=CurrentRTPTimestamp;
+  p->ssrc=H264SSRC;
+  p->paylen = n->len;
+  memcpy (p->payload, n->buf, n->len);
+
+  // Generate complete RTP packet
+  if (ComposeRTPPacket (p) < 0)
+  {
+    printf ("Cannot compose RTP packet, exit\n");
+    exit (-1);
+  }
+  if (WriteRTPPacket (p, f) < 0)
+  {
+    printf ("Cannot write %d bytes of RTP packet to outfile, exit\n", p->packlen);
+    exit (-1);
+  }
+  free (p->packet);
+  free (p->payload);
+  free (p);
+  return (n->len * 8);
+}
+
+
+/*!
+ ********************************************************************************************
+ * \brief
+ *    RTPUpdateTimestamp: patches the RTP timestamp depending on the TR
+ *
+ * \param
+ *    tr: TRof the following NALUs
+ *
+ * \return
+ *    none.
+ *
+ ********************************************************************************************
+*/
+
+
+void RTPUpdateTimestamp (int tr)
+{
+  int delta;
+  static int oldtr = -1;
+
+  if (oldtr == -1)            // First invocation
+  {
+    CurrentRTPTimestamp = 0;  //! This is a violation of the security req. of
+                              //! RTP (random timestamp), but easier to debug
+    oldtr = 0;
+    return;
+  }
+
+  /*! The following code assumes a wrap around of TR at 256, and
+      needs to be changed as soon as this is no more true.
+
+      The support for B frames is a bit tricky, because it is not easy to distinguish
+      between a natural wrap-around of the tr, and the intentional going back of the
+      tr because of a B frame.  It is solved here by a heuristic means: It is assumed that
+      B frames are never "older" than 10 tr ticks.  Everything higher than 10 is considered
+      a wrap around.
+  */
+
+  delta = tr - oldtr;
+
+  if (delta < -10)        // wrap-around
+    delta+=256;
+
+  CurrentRTPTimestamp += delta * RTP_TR_TIMESTAMP_MULT;
+  oldtr = tr;
+}
+
+
+/*!
+ ********************************************************************************************
+ * \brief
+ *    Opens the output file for the RTP packet stream
+ *
+ * \param Filename
+ *    The filename of the file to be opened
+ *
+ * \return
+ *    none.  Function terminates the program in case of an error
+ *
+ ********************************************************************************************
+*/
+
+void OpenRTPFile (char *Filename)
+{
+  if ((f = fopen (Filename, "wb")) == NULL)
+  {
+    printf ("Fatal: cannot open bitstream file '%s', exit (-1)\n", Filename);
+    exit (-1);
+  }
+}
+
+
+/*!
+ ********************************************************************************************
+ * \brief
+ *    Closes the output file for the RTP packet stream
+ *
+ * \return
+ *    none.  Function terminates the program in case of an error
+ *
+ ********************************************************************************************
+*/
+
+void CloseRTPFile ()
+{
+  fclose(f);
+}
+
+
+
+
+
+
+
+
+#if 0
+/*!
+ *****************************************************************************
+ *
+ * \brief
+ *    int aggregationRTPWriteBits (int marker) write the Slice header for the RTP NAL
+ *
+ * \return
+ *    Number of bytes written to output file
+ *
+ * \param marker
+ *    marker bit,
+ *
+ * \par Side effects
+ *    Packet written, RTPSequenceNumber and RTPTimestamp updated
+ *
+ * \date
+ *    September 10, 2002
+ *
+ * \author
+ *    Dong Tian   tian at cs.tut.fi
+ *****************************************************************************/
+
+int aggregationRTPWriteBits (int Marker, int PacketType, int subPacketType, void * bitstream,
+                    int BitStreamLenInByte, FILE *out)
+{
+  RTPpacket_t *p;
+  int offset;
+
+//  printf( "writing aggregation packet...\n");
+  assert (out != NULL);
+  assert (BitStreamLenInByte < 65000);
+  assert (bitstream != NULL);
+  assert ((PacketType&0xf) == 4);
+
+  // Set RTP structure elements and alloca() memory foor the buffers
+  p = (RTPpacket_t *) alloca (sizeof (RTPpacket_t));
+  p->packet=alloca (MAXRTPPACKETSIZE);
+  p->payload=alloca (MAXRTPPACKETSIZE);
+  p->v=2;
+  p->p=0;
+  p->x=0;
+  p->cc=0;
+  p->m=Marker&1;
+  p->pt=H264PAYLOADTYPE;
+  p->seq=CurrentRTPSequenceNumber++;
+  p->timestamp=CurrentRTPTimestamp;
+  p->ssrc=H264SSRC;
+
+  offset = 0;
+  p->payload[offset++] = PacketType; // This is the first byte of the compound packet
+
+  // FIRST, write the sei message to aggregation packet, if it is available
+  if ( HaveAggregationSEI() )
+  {
+    p->payload[offset++] = sei_message[AGGREGATION_SEI].subPacketType; // this is the first byte of the first subpacket
+    *(short*)&(p->payload[offset]) = sei_message[AGGREGATION_SEI].payloadSize;
+    offset += 2;
+    memcpy (&p->payload[offset], sei_message[AGGREGATION_SEI].data, sei_message[AGGREGATION_SEI].payloadSize);
+    offset += sei_message[AGGREGATION_SEI].payloadSize;
+
+    clear_sei_message(AGGREGATION_SEI);
+  }
+
+  // SECOND, write other payload to the aggregation packet
+  // to do ...
+
+  // LAST, write the slice data to the aggregation packet
+  p->payload[offset++] = subPacketType;  // this is the first byte of the second subpacket
+  *(short*)&(p->payload[offset]) = BitStreamLenInByte;
+  offset += 2;
+  memcpy (&p->payload[offset], bitstream, BitStreamLenInByte);
+  offset += BitStreamLenInByte;
+
+  p->paylen = offset;  // 1 +3 +seiPayload.payloadSize +3 +BitStreamLenInByte
+
+  // Now the payload is ready, we can ...
+  // Generate complete RTP packet
+  if (ComposeRTPPacket (p) < 0)
+  {
+    printf ("Cannot compose RTP packet, exit\n");
+    exit (-1);
+  }
+  if (WriteRTPPacket (p, out) < 0)
+  {
+    printf ("Cannot write %d bytes of RTP packet to outfile, exit\n", p->packlen);
+    exit (-1);
+  }
+  return (p->packlen);
+
+}
+
+
+/*!
+ *****************************************************************************
+ * \isAggregationPacket
+ * \brief
+ *    Determine if current packet is normal packet or compound packet (aggregation
+ *    packet)
+ *
+ * \return
+ *    return TRUE, if it is compound packet.
+ *    return FALSE, otherwise.
+ *
+ * \date
+ *    September 10, 2002
+ *
+ * \author
+ *    Dong Tian   tian at cs.tut.fi
+ *****************************************************************************/
+Boolean isAggregationPacket()
+{
+  if (HaveAggregationSEI())
+  {
+    return TRUE;
+  }
+  // Until Sept 2002, the JM will produce aggregation packet only for some SEI messages
+
+  return FALSE;
+}
+
+/*!
+ *****************************************************************************
+ * \PrepareAggregationSEIMessage
+ * \brief
+ *    Prepare the aggregation sei message.
+ *
+ * \date
+ *    September 10, 2002
+ *
+ * \author
+ *    Dong Tian   tian at cs.tut.fi
+ *****************************************************************************/
+void PrepareAggregationSEIMessage()
+{
+  Boolean has_aggregation_sei_message = FALSE;
+  // prepare the sei message here
+  // write the spare picture sei payload to the aggregation sei message
+  if (seiHasSparePicture && img->type != B_SLICE)
+  {
+    FinalizeSpareMBMap();
+    assert(seiSparePicturePayload.data->byte_pos == seiSparePicturePayload.payloadSize);
+    write_sei_message(AGGREGATION_SEI, seiSparePicturePayload.data->streamBuffer, seiSparePicturePayload.payloadSize, SEI_SPARE_PICTURE);
+    has_aggregation_sei_message = TRUE;
+  }
+  // write the sub sequence information sei paylaod to the aggregation sei message
+  if (seiHasSubseqInfo)
+  {
+    FinalizeSubseqInfo(img->layer);
+    write_sei_message(AGGREGATION_SEI, seiSubseqInfo[img->layer].data->streamBuffer, seiSubseqInfo[img->layer].payloadSize, SEI_SUBSEQ_INFORMATION);
+    ClearSubseqInfoPayload(img->layer);
+    has_aggregation_sei_message = TRUE;
+  }
+  // write the sub sequence layer information sei paylaod to the aggregation sei message
+  if (seiHasSubseqLayerInfo && img->number == 0)
+  {
+    FinalizeSubseqLayerInfo();
+    write_sei_message(AGGREGATION_SEI, seiSubseqLayerInfo.data, seiSubseqLayerInfo.payloadSize, SEI_SUBSEQ_LAYER_CHARACTERISTICS);
+    seiHasSubseqLayerInfo = FALSE;
+    has_aggregation_sei_message = TRUE;
+  }
+  // write the sub sequence characteristics payload to the aggregation sei message
+  if (seiHasSubseqChar)
+  {
+    FinalizeSubseqChar();
+    write_sei_message(AGGREGATION_SEI, seiSubseqChar.data->streamBuffer, seiSubseqChar.payloadSize, SEI_SUBSEQ_CHARACTERISTICS);
+    ClearSubseqCharPayload();
+    has_aggregation_sei_message = TRUE;
+  }
+  // write the pan scan rectangle info sei playload to the aggregation sei message
+  if (seiHasPanScanRectInfo)
+  {
+    FinalizePanScanRectInfo();
+    write_sei_message(AGGREGATION_SEI, seiPanScanRectInfo.data->streamBuffer, seiPanScanRectInfo.payloadSize, SEI_PANSCAN_RECT);
+    ClearPanScanRectInfoPayload();
+    has_aggregation_sei_message = TRUE;
+  }
+  // write the arbitrary (unregistered) info sei playload to the aggregation sei message
+  if (seiHasUser_data_unregistered_info)
+  {
+    FinalizeUser_data_unregistered();
+    write_sei_message(AGGREGATION_SEI, seiUser_data_unregistered.data->streamBuffer, seiUser_data_unregistered.payloadSize, SEI_USER_DATA_UNREGISTERED);
+    ClearUser_data_unregistered();
+    has_aggregation_sei_message = TRUE;
+  }
+  // write the arbitrary (unregistered) info sei playload to the aggregation sei message
+  if (seiHasUser_data_registered_itu_t_t35_info)
+  {
+    FinalizeUser_data_registered_itu_t_t35();
+    write_sei_message(AGGREGATION_SEI, seiUser_data_registered_itu_t_t35.data->streamBuffer, seiUser_data_registered_itu_t_t35.payloadSize, SEI_USER_DATA_REGISTERED_ITU_T_T35);
+    ClearUser_data_registered_itu_t_t35();
+    has_aggregation_sei_message = TRUE;
+  }
+  //write RandomAccess info sei payload to the aggregation sei message
+  if (seiHasRandomAccess_info)
+  {
+    FinalizeRandomAccess();
+    write_sei_message(AGGREGATION_SEI, seiRandomAccess.data->streamBuffer, seiRandomAccess.payloadSize, SEI_RANDOM_ACCESS_POINT);
+    ClearRandomAccess();
+    has_aggregation_sei_message = TRUE;
+  }
+  // more aggregation sei payload is written here...
+
+  // JVT-D099 write the scene information SEI payload
+  if (seiHasSceneInformation)
+  {
+    FinalizeSceneInformation();
+    write_sei_message(AGGREGATION_SEI, seiSceneInformation.data->streamBuffer, seiSceneInformation.payloadSize, SEI_SCENE_INFORMATION);
+    has_aggregation_sei_message = TRUE;
+  }
+  // End JVT-D099
+
+  // after all the sei payload is written
+  if (has_aggregation_sei_message)
+    finalize_sei_message(AGGREGATION_SEI);
+}
+
+/*!
+ *****************************************************************************
+ * \begin_sub_sequence_rtp
+ * \brief
+ *    do some initialization for sub-sequence under rtp
+ *
+ * \date
+ *    September 10, 2002
+ *
+ * \author
+ *    Dong Tian   tian at cs.tut.fi
+ *****************************************************************************/
+
+void begin_sub_sequence_rtp()
+{
+  if ( input->of_mode != PAR_OF_RTP || input->NumFramesInELSubSeq == 0 )
+    return;
+
+  // begin to encode the base layer subseq
+  if ( IMG_NUMBER == 0 )
+  {
+//    printf("begin to encode the base layer subseq\n");
+    InitSubseqInfo(0);
+    if (1)
+      UpdateSubseqChar();
+  }
+  // begin to encode the enhanced layer subseq
+  if ( IMG_NUMBER % (input->NumFramesInELSubSeq+1) == 1 )
+  {
+//    printf("begin to encode the enhanced layer subseq\n");
+    InitSubseqInfo(1);  // init the sub-sequence in the enhanced layer
+//    add_dependent_subseq(1);
+    if (1)
+      UpdateSubseqChar();
+  }
+}
+
+/*!
+ *****************************************************************************
+ * \end_sub_sequence_rtp
+ * \brief
+ *    do nothing
+ *
+ * \date
+ *    September 10, 2002
+ *
+ * \author
+ *    Dong Tian   tian at cs.tut.fi
+ *****************************************************************************/
+void end_sub_sequence_rtp()
+{
+  // end of the base layer:
+  if ( img->number == input->no_frames-1 )
+  {
+//    printf("end of encoding the base layer subseq\n");
+    CloseSubseqInfo(0);
+//    updateSubSequenceBox(0);
+  }
+  // end of the enhanced layer:
+  if ( ((IMG_NUMBER%(input->NumFramesInELSubSeq+1)==0) && (input->successive_Bframe != 0) && (IMG_NUMBER>0)) || // there are B frames
+    ((IMG_NUMBER%(input->NumFramesInELSubSeq+1)==input->NumFramesInELSubSeq) && (input->successive_Bframe==0))   // there are no B frames
+    )
+  {
+//    printf("end of encoding the enhanced layer subseq\n");
+    CloseSubseqInfo(1);
+//    add_dependent_subseq(1);
+//    updateSubSequenceBox(1);
+  }
+}
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/rtp.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/rtp.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/rtp.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,72 @@
+
+/*!
+ ***************************************************************************
+ *
+ * \file rtp.h
+ *
+ * \brief
+ *    Definition of structures and functions to handle RTP headers.  For a
+ *    description of RTP see RFC1889 on http://www.ietf.org
+ *
+ * \date
+ *    30 September 2001
+ *
+ * \author
+ *    Stephan Wenger   stewe at cs.tu-berlin.de
+ **************************************************************************/
+
+#ifndef _RTP_H_
+#define _RTP_H_
+
+#include "nalu.h"
+
+#define MAXRTPPAYLOADLEN  (65536 - 40)    //!< Maximum payload size of an RTP packet
+#define MAXRTPPACKETSIZE  (65536 - 28)    //!< Maximum size of an RTP packet incl. header
+#define H264PAYLOADTYPE 105               //!< RTP paylaod type fixed here for simplicity
+#define H264SSRC 0x12345678               //!< SSRC, chosen to simplify debugging
+#define RTP_TR_TIMESTAMP_MULT 1000        //!< should be something like 27 Mhz / 29.97 Hz
+
+typedef struct
+{
+  unsigned int v;          //!< Version, 2 bits, MUST be 0x2
+  unsigned int p;          //!< Padding bit, Padding MUST NOT be used
+  unsigned int x;          //!< Extension, MUST be zero */
+  unsigned int cc;         /*!< CSRC count, normally 0 in the absence
+                                of RTP mixers */
+  unsigned int m;          //!< Marker bit
+  unsigned int pt;         //!< 7 bits, Payload Type, dynamically established
+  unsigned int seq;        /*!< RTP sequence number, incremented by one for
+                                each sent packet */
+  unsigned int timestamp;  //!< timestamp, 27 MHz for H.264
+  unsigned int ssrc;       //!< Synchronization Source, chosen randomly
+  byte *       payload;    //!< the payload including payload headers
+  unsigned int paylen;     //!< length of payload in bytes
+  byte *       packet;     //!< complete packet including header and payload
+  unsigned int packlen;    //!< length of packet, typically paylen+12
+} RTPpacket_t;
+
+#if 0
+int  ComposeRTPPacket (RTPpacket_t *p);
+int  DecomposeRTPpacket (RTPpacket_t *p);
+int  WriteRTPPacket (RTPpacket_t *p, FILE *f);
+void DumpRTPHeader (RTPpacket_t *p);
+void RTPUpdateTimestamp (int tr);
+int  RTPWriteBits (int Marker, int PacketType, void * bitstream,
+                   int BitStreamLenInByte, FILE *out);
+
+Boolean isAggregationPacket();
+int aggregationRTPWriteBits (int Marker, int PacketType, int subPacketType, void * bitstream, int BitStreamLenInByte, FILE *out);
+
+void begin_sub_sequence_rtp();
+void end_sub_sequence_rtp();
+#endif
+
+void RTPUpdateTimestamp (int tr);
+void OpenRTPFile (char *Filename);
+void CloseRTPFile ();
+int WriteRTPNALU (NALU_t *n);
+
+
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/sei.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/sei.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/sei.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,1642 @@
+
+/*!
+ ************************************************************************
+ *  \file
+ *     sei.c
+ *  \brief
+ *     implementation of SEI related functions
+ *  \author(s)
+ *      - Dong Tian                             <tian at cs.tut.fi>
+ *
+ ************************************************************************
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <memory.h>
+
+#include "global.h"
+
+#include "memalloc.h"
+#include "rtp.h"
+#include "mbuffer.h"
+#include "sei.h"
+#include "vlc.h"
+
+Boolean seiHasTemporal_reference=FALSE;
+Boolean seiHasClock_timestamp=FALSE;
+Boolean seiHasPanscan_rect=FALSE;
+Boolean seiHasBuffering_period=FALSE;
+Boolean seiHasHrd_picture=FALSE;
+Boolean seiHasFiller_payload=FALSE;
+Boolean seiHasUser_data_registered_itu_t_t35=FALSE;
+Boolean seiHasUser_data_unregistered=FALSE;
+Boolean seiHasRandom_access_point=FALSE;
+Boolean seiHasRef_pic_buffer_management_repetition=FALSE;
+Boolean seiHasSpare_picture=FALSE;
+
+Boolean seiHasSceneInformation=FALSE;
+
+Boolean seiHasSubseq_information=FALSE;
+Boolean seiHasSubseq_layer_characteristics=FALSE;
+Boolean seiHasSubseq_characteristics=FALSE;
+
+/*
+ ************************************************************************
+ *  \basic functions on supplemental enhancement information
+ *  \brief
+ *     The implementations are based on FCD
+ ************************************************************************
+ */
+
+//! sei_message[0]: this struct is to store the sei message packetized independently
+//! sei_message[1]: this struct is to store the sei message packetized together with slice data
+sei_struct sei_message[2];
+
+void InitSEIMessages()
+{
+  int i;
+  for (i=0; i<2; i++)
+  {
+    sei_message[i].data = malloc(MAXRTPPAYLOADLEN);
+    if( sei_message[i].data == NULL ) no_mem_exit("InitSEIMessages: sei_message[i].data");
+    sei_message[i].subPacketType = SEI_PACKET_TYPE;
+    clear_sei_message(i);
+  }
+
+  // init sei messages
+  seiSparePicturePayload.data = NULL;
+  InitSparePicture();
+  InitSubseqChar();
+  if (input->NumFramesInELSubSeq != 0)
+    InitSubseqLayerInfo();
+  InitSceneInformation();
+  // init panscanrect sei message
+  InitPanScanRectInfo();
+  // init user_data_unregistered
+  InitUser_data_unregistered();
+  // init user_data_unregistered
+  InitUser_data_registered_itu_t_t35();
+  // init user_RandomAccess
+  InitRandomAccess();
+}
+
+void CloseSEIMessages()
+{
+  int i;
+
+  if (input->NumFramesInELSubSeq != 0)
+    CloseSubseqLayerInfo();
+
+  CloseSubseqChar();
+  CloseSparePicture();
+  CloseSceneInformation();
+  ClosePanScanRectInfo();
+  CloseUser_data_unregistered();
+  CloseUser_data_registered_itu_t_t35();
+  CloseRandomAccess();
+
+  for (i=0; i<MAX_LAYER_NUMBER; i++)
+  {
+    if ( sei_message[i].data ) free( sei_message[i].data );
+    sei_message[i].data = NULL;
+  }
+}
+
+Boolean HaveAggregationSEI()
+{
+  if (sei_message[AGGREGATION_SEI].available && img->type != B_SLICE)
+    return TRUE;
+  if (seiHasSubseqInfo)
+    return TRUE;
+  if (seiHasSubseqLayerInfo && img->number == 0)
+    return TRUE;
+  if (seiHasSubseqChar)
+    return TRUE;
+  if (seiHasSceneInformation)
+    return TRUE;
+  if (seiHasPanScanRectInfo)
+    return TRUE;
+  if (seiHasUser_data_unregistered_info)
+    return TRUE;
+  if (seiHasUser_data_registered_itu_t_t35_info)
+    return TRUE;
+  if (seiHasRecoveryPoint_info)
+    return TRUE;
+  return FALSE;
+//  return input->SparePictureOption && ( seiHasSpare_picture || seiHasSubseq_information ||
+//    seiHasSubseq_layer_characteristics || seiHasSubseq_characteristics );
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *     write one sei payload to the sei message
+ *  \param id
+ *    0, if this is the normal packet\n
+ *    1, if this is a aggregation packet
+ *  \param payload
+ *    a pointer that point to the sei payload. Note that the bitstream
+ *    should have be byte aligned already.
+ *  \param payload_size
+ *    the size of the sei payload
+ *  \param payload_type
+ *    the type of the sei payload
+ *  \par Output
+ *    the content of the sei message (sei_message[id]) is updated.
+ ************************************************************************
+ */
+void write_sei_message(int id, byte* payload, int payload_size, int payload_type)
+{
+  int offset, type, size;
+  assert(payload_type >= 0 && payload_type < SEI_MAX_ELEMENTS);
+
+  type = payload_type;
+  size = payload_size;
+  offset = sei_message[id].payloadSize;
+
+  while ( type > 255 )
+  {
+    sei_message[id].data[offset++] = 0xFF;
+    type = type - 255;
+  }
+  sei_message[id].data[offset++] = (byte) type;
+
+  while ( size > 255 )
+  {
+    sei_message[id].data[offset++] = 0xFF;
+    size = size - 255;
+  }
+  sei_message[id].data[offset++] = (byte) size;
+
+  memcpy(sei_message[id].data + offset, payload, payload_size);
+  offset += payload_size;
+
+  sei_message[id].payloadSize = offset;
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *     write rbsp_trailing_bits to the sei message
+ *  \param id
+ *    0, if this is the normal packet \n
+ *    1, if this is a aggregation packet
+ *  \par Output
+ *    the content of the sei message is updated and ready for packetisation
+ ************************************************************************
+ */
+void finalize_sei_message(int id)
+{
+  int offset = sei_message[id].payloadSize;
+
+  sei_message[id].data[offset] = 0x80;
+  sei_message[id].payloadSize++;
+
+  sei_message[id].available = TRUE;
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *     empty the sei message buffer
+ *  \param id
+ *    0, if this is the normal packet \n
+ *    1, if this is a aggregation packet
+ *  \par Output
+ *    the content of the sei message is cleared and ready for storing new
+ *      messages
+ ************************************************************************
+ */
+void clear_sei_message(int id)
+{
+  memset( sei_message[id].data, 0, MAXRTPPAYLOADLEN);
+  sei_message[id].payloadSize       = 0;
+  sei_message[id].available         = FALSE;
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *     copy the bits from one bitstream buffer to another one
+ *  \param dest
+ *    pointer to the dest bitstream buffer
+ *  \param source
+ *    pointer to the source bitstream buffer
+ *  \par Output
+ *    the content of the dest bitstream is changed.
+ ************************************************************************
+ */
+void AppendTmpbits2Buf( Bitstream* dest, Bitstream* source )
+{
+  int i, j;
+  byte mask;
+  int bits_in_last_byte;
+
+  // copy the first bytes in source buffer
+  for (i=0; i<source->byte_pos; i++)
+  {
+    mask = 0x80;
+    for (j=0; j<8; j++)
+    {
+      dest->byte_buf <<= 1;
+      if (source->streamBuffer[i] & mask)
+        dest->byte_buf |= 1;
+      dest->bits_to_go--;
+      mask >>= 1;
+      if (dest->bits_to_go==0)
+      {
+        dest->bits_to_go = 8;
+        dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+        dest->byte_buf = 0;
+      }
+    }
+  }
+  // copy the last byte, there are still (8-source->bits_to_go) bits in the source buffer
+  bits_in_last_byte = 8-source->bits_to_go;
+  if ( bits_in_last_byte > 0 )
+  {
+    mask = (byte) (1 << (bits_in_last_byte-1));
+    for (j=0; j<bits_in_last_byte; j++)
+    {
+      dest->byte_buf <<= 1;
+      if (source->byte_buf & mask)
+        dest->byte_buf |= 1;
+      dest->bits_to_go--;
+      mask >>= 1;
+      if (dest->bits_to_go==0)
+      {
+        dest->bits_to_go = 8;
+        dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+        dest->byte_buf = 0;
+      }
+    }
+  }
+}
+
+/*
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ *  \functions on spare pictures
+ *  \brief
+ *     implementation of Spare Pictures related functions based on
+ *      JVT-D100
+ *  \author
+ *      Dong Tian                 <tian at cs.tut.fi>
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+
+// global variables for spare pictures
+
+// In current implementation, Sept 2002, the spare picture info is
+// paketized together with the immediately following frame. Thus we
+// define one set of global variables to save the info.
+Boolean seiHasSparePicture = FALSE;
+spare_picture_struct seiSparePicturePayload;
+
+/*!
+ ************************************************************************
+ *  \brief
+ *      Init the global variables for spare picture information
+ ************************************************************************
+ */
+void InitSparePicture()
+{
+  if ( seiSparePicturePayload.data != NULL ) CloseSparePicture();
+
+  seiSparePicturePayload.data = malloc( sizeof(Bitstream) );
+  if ( seiSparePicturePayload.data == NULL ) no_mem_exit("InitSparePicture: seiSparePicturePayload.data");
+  seiSparePicturePayload.data->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+  if ( seiSparePicturePayload.data->streamBuffer == NULL ) no_mem_exit("InitSparePicture: seiSparePicturePayload.data->streamBuffer");
+  memset( seiSparePicturePayload.data->streamBuffer, 0, MAXRTPPAYLOADLEN);
+  seiSparePicturePayload.num_spare_pics = 0;
+  seiSparePicturePayload.target_frame_num = 0;
+
+  seiSparePicturePayload.data->bits_to_go  = 8;
+  seiSparePicturePayload.data->byte_pos    = 0;
+  seiSparePicturePayload.data->byte_buf    = 0;
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *      Close the global variables for spare picture information
+ ************************************************************************
+ */
+void CloseSparePicture()
+{
+  if (seiSparePicturePayload.data->streamBuffer)
+    free(seiSparePicturePayload.data->streamBuffer);
+  seiSparePicturePayload.data->streamBuffer = NULL;
+  if (seiSparePicturePayload.data)
+    free(seiSparePicturePayload.data);
+  seiSparePicturePayload.data = NULL;
+  seiSparePicturePayload.num_spare_pics = 0;
+  seiSparePicturePayload.target_frame_num = 0;
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *     Calculate the spare picture info, save the result in map_sp
+ *      then compose the spare picture information.
+ *  \par Output
+ *      the spare picture payload is available in *seiSparePicturePayload*
+ *      the syntax elements in the loop (see FCD), excluding the two elements
+ *      at the beginning.
+ ************************************************************************
+ */
+void CalculateSparePicture()
+{
+  /*
+  int i, j, tmp, i0, j0, m;
+  byte **map_sp;
+  int delta_spare_frame_num;
+  Bitstream *tmpBitstream;
+
+  int num_of_mb=(img->height/16) * (img->width/16);
+  int threshold1 = 16*16*input->SPDetectionThreshold;
+  int threshold2 = num_of_mb * input->SPPercentageThreshold / 100;
+  int ref_area_indicator;
+  int CandidateSpareFrameNum, SpareFrameNum;
+  int possible_spare_pic_num;
+
+  // define it for debug purpose
+  #define WRITE_MAP_IMAGE
+
+#ifdef WRITE_MAP_IMAGE
+  byte **y;
+  int k;
+  FILE* fp;
+  static int first = 1;
+  char map_file_name[255]="map.yuv";
+#endif
+
+  // basic check
+  if (fb->picbuf_short[0]->used==0 || fb->picbuf_short[1]->used==0)
+  {
+#ifdef WRITE_MAP_IMAGE
+    fp = fopen( map_file_name, "wb" );
+    assert( fp != NULL );
+    // write the map image
+    for (i=0; i < img->height; i++)
+      for (j=0; j < img->width; j++)
+        fputc(0, fp);
+
+    for (k=0; k < 2; k++)
+      for (i=0; i < img->height/2; i++)
+        for (j=0; j < img->width/2; j++)
+          fputc(128, fp);
+    fclose( fp );
+#endif
+    seiHasSparePicture = FALSE;
+    return;
+  }
+  seiHasSparePicture = TRUE;
+
+  // set the global bitstream memory.
+  InitSparePicture();
+  seiSparePicturePayload.target_frame_num = img->number % MAX_FN;
+  // init the local bitstream memory.
+  tmpBitstream = malloc(sizeof(Bitstream));
+  if ( tmpBitstream == NULL ) no_mem_exit("CalculateSparePicture: tmpBitstream");
+  tmpBitstream->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+  if ( tmpBitstream->streamBuffer == NULL ) no_mem_exit("CalculateSparePicture: tmpBitstream->streamBuffer");
+  memset( tmpBitstream->streamBuffer, 0, MAXRTPPAYLOADLEN);
+
+#ifdef WRITE_MAP_IMAGE
+  if ( first )
+  {
+    fp = fopen( map_file_name, "wb" );
+    first = 0;
+  }
+  else
+    fp = fopen( map_file_name, "ab" );
+  get_mem2D(&y, img->height, img->width);
+#endif
+  get_mem2D(&map_sp, img->height/16, img->width/16);
+
+  if (fb->picbuf_short[2]->used!=0) possible_spare_pic_num = 2;
+  else possible_spare_pic_num = 1;
+  // loop over the spare pictures
+  for (m=0; m<possible_spare_pic_num; m++)
+  {
+    // clear the temporal bitstream buffer
+    tmpBitstream->bits_to_go  = 8;
+    tmpBitstream->byte_pos    = 0;
+    tmpBitstream->byte_buf    = 0;
+    memset( tmpBitstream->streamBuffer, 0, MAXRTPPAYLOADLEN);
+
+    // set delta_spare_frame_num
+    // the order of the following lines cannot be changed.
+    if (m==0)
+      CandidateSpareFrameNum = seiSparePicturePayload.target_frame_num - 1; // TargetFrameNum - 1;
+    else
+      CandidateSpareFrameNum = SpareFrameNum - 1;
+    if ( CandidateSpareFrameNum < 0 ) CandidateSpareFrameNum = MAX_FN - 1;
+    SpareFrameNum = fb->picbuf_short[m+1]->frame_num_256;
+    delta_spare_frame_num = CandidateSpareFrameNum - SpareFrameNum;
+    assert( delta_spare_frame_num == 0 );
+
+    // calculate the spare macroblock map of one spare picture
+    // the results are stored into map_sp[][]
+    for (i=0; i < img->height/16; i++)
+      for (j=0; j < img->width/16; j++)
+      {
+        tmp = 0;
+        for (i0=0; i0<16; i0++)
+          for (j0=0; j0<16; j0++)
+            tmp+=iabs(fb->picbuf_short[m+1]->Refbuf11[(i*16+i0)*img->width+j*16+j0]-
+                       fb->picbuf_short[0]->Refbuf11[(i*16+i0)*img->width+j*16+j0]);
+        tmp = (tmp<=threshold1? 255 : 0);
+        map_sp[i][j] = (tmp==0? 1 : 0);
+#ifdef WRITE_MAP_IMAGE
+//        if (m==0)
+        {
+        for (i0=0; i0<16; i0++)
+          for (j0=0; j0<16; j0++)
+            y[i*16+i0][j*16+j0]=tmp;
+        }
+#endif
+      }
+
+    // based on map_sp[][], compose the spare picture information
+    // and write the spare picture information to a temp bitstream
+    tmp = 0;
+    for (i=0; i < img->height/16; i++)
+      for (j=0; j < img->width/16; j++)
+        if (map_sp[i][j]==0) tmp++;
+    if ( tmp > threshold2 )
+      ref_area_indicator = 0;
+    else if ( !CompressSpareMBMap(map_sp, tmpBitstream) )
+      ref_area_indicator = 1;
+    else
+      ref_area_indicator = 2;
+
+//    printf( "ref_area_indicator = %d\n", ref_area_indicator );
+
+#ifdef WRITE_MAP_IMAGE
+    // write the map to a file
+//    if (m==0)
+    {
+      // write the map image
+      for (i=0; i < img->height; i++)
+        for (j=0; j < img->width; j++)
+        {
+          if ( ref_area_indicator == 0 ) fputc(255, fp);
+          else fputc(y[i][j], fp);
+        }
+
+      for (k=0; k < 2; k++)
+        for (i=0; i < img->height/2; i++)
+          for (j=0; j < img->width/2; j++)
+            fputc(128, fp);
+    }
+#endif
+
+    // Finnally, write the current spare picture information to
+    // the global variable: seiSparePicturePayload
+    ComposeSparePictureMessage(delta_spare_frame_num, ref_area_indicator, tmpBitstream);
+    seiSparePicturePayload.num_spare_pics++;
+  }  // END for (m=0; m<2; m++)
+
+  free_mem2D( map_sp );
+  free( tmpBitstream->streamBuffer );
+  free( tmpBitstream );
+
+#ifdef WRITE_MAP_IMAGE
+  free_mem2D( y );
+  fclose( fp );
+#undef WRITE_MAP_IMAGE
+#endif
+  */
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *      compose the spare picture information.
+ *  \param delta_spare_frame_num
+ *      see FCD
+ *  \param ref_area_indicator
+ *      Indicate how to represent the spare mb map
+ *  \param tmpBitstream
+ *      pointer to a buffer to save the payload
+ *  \par Output
+ *      bitstream: the composed spare picture payload are
+ *        ready to put into the sei_message.
+ ************************************************************************
+ */
+void ComposeSparePictureMessage(int delta_spare_frame_num, int ref_area_indicator, Bitstream *tmpBitstream)
+{
+  Bitstream *bitstream = seiSparePicturePayload.data;
+  SyntaxElement sym;
+
+  sym.type = SE_HEADER;
+  sym.mapping = ue_linfo;
+
+  sym.value1 = delta_spare_frame_num;
+  writeSyntaxElement2Buf_UVLC(&sym, bitstream);
+  sym.value1 = ref_area_indicator;
+  writeSyntaxElement2Buf_UVLC(&sym, bitstream);
+
+  AppendTmpbits2Buf( bitstream, tmpBitstream );
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *      test if the compressed spare mb map will occupy less mem and
+ *      fill the payload buffer.
+ *  \param map_sp
+ *      in which the spare picture information are stored.
+ *  \param bitstream
+ *      pointer to a buffer to save the payload
+ *  \return
+ *      TRUE: If it is compressed version, \n
+ *             FALSE: If it is not compressed.
+ ************************************************************************
+ */
+Boolean CompressSpareMBMap(unsigned char **map_sp, Bitstream *bitstream)
+{
+  int j, k;
+  int noc, bit0, bit1, bitc;
+  SyntaxElement sym;
+  int x, y, left, right, bottom, top, directx, directy;
+
+  // this is the size of the uncompressed mb map:
+  int size_uncompressed = (img->height/16) * (img->width/16);
+  int size_compressed   = 0;
+  Boolean ret;
+
+  // initialization
+  sym.type = SE_HEADER;
+  sym.mapping = ue_linfo;
+  noc = 0;
+  bit0 = 0;
+  bit1 = 1;
+  bitc = bit0;
+
+  // compress the map, the result goes to the temporal bitstream buffer
+  x = ( img->width/16 - 1 ) / 2;
+  y = ( img->height/16 - 1 ) / 2;
+  left = right = x;
+  top = bottom = y;
+  directx = 0;
+  directy = 1;
+  for (j=0; j<img->height/16; j++)
+    for (k=0; k<img->width/16; k++)
+    {
+      // check current mb
+      if ( map_sp[y][x] == bitc ) noc++;
+      else
+      {
+        sym.value1 = noc;
+        size_compressed += writeSyntaxElement2Buf_UVLC(&sym, bitstream);    // the return value indicate the num of bits written
+        noc=0;
+      }
+      // go to the next mb:
+      if ( directx == -1 && directy == 0 )
+      {
+        if (x > left) x--;
+        else if (x == 0)
+        {
+          y = bottom + 1;
+          bottom++;
+          directx = 1;
+          directy = 0;
+        }
+        else if (x == left)
+        {
+          x--;
+          left--;
+          directx = 0;
+          directy = 1;
+        }
+      }
+      else if ( directx == 1 && directy == 0 )
+      {
+        if (x < right) x++;
+        else if (x == img->width/16 - 1)
+        {
+          y = top - 1;
+          top--;
+          directx = -1;
+          directy = 0;
+        }
+        else if (x == right)
+        {
+          x++;
+          right++;
+          directx = 0;
+          directy = -1;
+        }
+      }
+      else if ( directx == 0 && directy == -1 )
+      {
+        if ( y > top) y--;
+        else if (y == 0)
+        {
+          x = left - 1;
+          left--;
+          directx = 0;
+          directy = 1;
+        }
+        else if (y == top)
+        {
+          y--;
+          top--;
+          directx = -1;
+          directy = 0;
+        }
+      }
+      else if ( directx == 0 && directy == 1 )
+      {
+        if (y < bottom) y++;
+        else if (y == img->height/16 - 1)
+        {
+          x = right+1;
+          right++;
+          directx = 0;
+          directy = -1;
+        }
+        else if (y == bottom)
+        {
+          y++;
+          bottom++;
+          directx = 1;
+          directy = 0;
+        }
+      }
+    }
+  if (noc!=0)
+  {
+    sym.value1 = noc;
+    size_compressed += writeSyntaxElement2Buf_UVLC(&sym, bitstream);
+  }
+
+  ret = (size_compressed<size_uncompressed? TRUE : FALSE);
+  if ( !ret ) // overwrite the streambuffer with the original mb map
+  {
+    // write the mb map to payload bit by bit
+    bitstream->byte_buf = 0;
+    bitstream->bits_to_go = 8;
+    bitstream->byte_pos = 0;
+    for (j=0; j<img->height/16; j++)
+    {
+      for (k=0; k<img->width/16; k++)
+      {
+        bitstream->byte_buf <<= 1;
+        if (map_sp[j][k]) bitstream->byte_buf |= 1;
+        bitstream->bits_to_go--;
+        if (bitstream->bits_to_go==0)
+        {
+          bitstream->bits_to_go = 8;
+          bitstream->streamBuffer[bitstream->byte_pos++]=bitstream->byte_buf;
+          bitstream->byte_buf = 0;
+        }
+      }
+    }
+  }
+
+  return ret;
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *      Finalize the spare picture SEI payload.
+ *        The spare picture paylaod will be ready for encapsulation, and it
+ *        should be called before current picture packetized.
+ *  \par Input
+ *      seiSparePicturePayload.data: points to the payload starting from
+ *        delta_spare_frame_num. (See FCD)
+ *  \par Output
+ *      seiSparePicturePayload.data is updated, pointing to the whole spare
+ *        picture information: spare_picture( PayloadSize ) (See FCD)
+ *        Make sure it is byte aligned.
+ ************************************************************************
+ */
+void FinalizeSpareMBMap()
+{
+  int CurrFrameNum = img->number % MAX_FN;
+  int delta_frame_num;
+  SyntaxElement sym;
+  Bitstream *dest, *source;
+
+  sym.type = SE_HEADER;
+  sym.mapping = ue_linfo;
+
+  source = seiSparePicturePayload.data;
+  dest = malloc(sizeof(Bitstream));
+  if ( dest == NULL ) no_mem_exit("FinalizeSpareMBMap: dest");
+  dest->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+  if ( dest->streamBuffer == NULL ) no_mem_exit("FinalizeSpareMBMap: dest->streamBuffer");
+  dest->bits_to_go  = 8;
+  dest->byte_pos    = 0;
+  dest->byte_buf    = 0;
+  memset( dest->streamBuffer, 0, MAXRTPPAYLOADLEN);
+
+  //    delta_frame_num
+  delta_frame_num = CurrFrameNum - seiSparePicturePayload.target_frame_num;
+  if ( delta_frame_num < 0 ) delta_frame_num += MAX_FN;
+  sym.value1 = delta_frame_num;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+
+  // num_spare_pics_minus1
+  sym.value1 = seiSparePicturePayload.num_spare_pics - 1;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+
+  // copy the other bits
+  AppendTmpbits2Buf( dest, source);
+
+  // make sure the payload is byte aligned, stuff bits are 10..0
+  if ( dest->bits_to_go != 8 )
+  {
+    (dest->byte_buf) <<= 1;
+    dest->byte_buf |= 1;
+    dest->bits_to_go--;
+    if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+    dest->bits_to_go = 8;
+    dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+    dest->byte_buf = 0;
+  }
+  seiSparePicturePayload.payloadSize = dest->byte_pos;
+
+  // the payload is ready now
+  seiSparePicturePayload.data = dest;
+  free( source->streamBuffer );
+  free( source );
+}
+
+/*
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ *  \functions on subseq information sei messages
+ *  \brief
+ *      JVT-D098
+ *  \author
+ *      Dong Tian                 <tian at cs.tut.fi>
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+
+Boolean seiHasSubseqInfo = FALSE;
+subseq_information_struct seiSubseqInfo[MAX_LAYER_NUMBER];
+
+/*!
+ ************************************************************************
+ *  \brief
+ *      init subseqence info
+ ************************************************************************
+ */
+void InitSubseqInfo(int currLayer)
+{
+  static unsigned short id = 0;
+
+  seiHasSubseqInfo = TRUE;
+  seiSubseqInfo[currLayer].subseq_layer_num = currLayer;
+  seiSubseqInfo[currLayer].subseq_id = id++;
+  seiSubseqInfo[currLayer].last_picture_flag = 0;
+  seiSubseqInfo[currLayer].stored_frame_cnt = -1;
+  seiSubseqInfo[currLayer].payloadSize = 0;
+
+  seiSubseqInfo[currLayer].data = malloc( sizeof(Bitstream) );
+  if ( seiSubseqInfo[currLayer].data == NULL ) no_mem_exit("InitSubseqInfo: seiSubseqInfo[currLayer].data");
+  seiSubseqInfo[currLayer].data->streamBuffer = malloc( MAXRTPPAYLOADLEN );
+  if ( seiSubseqInfo[currLayer].data->streamBuffer == NULL ) no_mem_exit("InitSubseqInfo: seiSubseqInfo[currLayer].data->streamBuffer");
+  seiSubseqInfo[currLayer].data->bits_to_go  = 8;
+  seiSubseqInfo[currLayer].data->byte_pos    = 0;
+  seiSubseqInfo[currLayer].data->byte_buf    = 0;
+  memset( seiSubseqInfo[currLayer].data->streamBuffer, 0, MAXRTPPAYLOADLEN );
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *      update subseqence info
+ ************************************************************************
+ */
+void UpdateSubseqInfo(int currLayer)
+{
+  if (img->type != B_SLICE)
+  {
+    seiSubseqInfo[currLayer].stored_frame_cnt ++;
+    seiSubseqInfo[currLayer].stored_frame_cnt = seiSubseqInfo[currLayer].stored_frame_cnt % MAX_FN;
+  }
+
+  if ( currLayer == 0 )
+  {
+    if ( img->number == input->no_frames-1 )
+      seiSubseqInfo[currLayer].last_picture_flag = 1;
+    else
+      seiSubseqInfo[currLayer].last_picture_flag = 0;
+  }
+  if ( currLayer == 1 )
+  {
+    if ( ((IMG_NUMBER%(input->NumFramesInELSubSeq+1)==0) && (input->successive_Bframe != 0) && (IMG_NUMBER>0)) || // there are B frames
+      ((IMG_NUMBER%(input->NumFramesInELSubSeq+1)==input->NumFramesInELSubSeq) && (input->successive_Bframe==0))  // there are no B frames
+      )
+      seiSubseqInfo[currLayer].last_picture_flag = 1;
+    else
+      seiSubseqInfo[currLayer].last_picture_flag = 0;
+  }
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *      Finalize subseqence info
+ ************************************************************************
+ */
+void FinalizeSubseqInfo(int currLayer)
+{
+  SyntaxElement sym;
+  Bitstream *dest = seiSubseqInfo[currLayer].data;
+
+  sym.type = SE_HEADER;
+  sym.mapping = ue_linfo;
+
+  sym.value1 = seiSubseqInfo[currLayer].subseq_layer_num;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+  sym.value1 = seiSubseqInfo[currLayer].subseq_id;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+  sym.bitpattern = seiSubseqInfo[currLayer].last_picture_flag;
+  sym.len = 1;
+  writeSyntaxElement2Buf_Fixed(&sym, dest);
+  sym.value1 = seiSubseqInfo[currLayer].stored_frame_cnt;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+
+  // make sure the payload is byte aligned, stuff bits are 10..0
+  if ( dest->bits_to_go != 8 )
+  {
+    (dest->byte_buf) <<= 1;
+    dest->byte_buf |= 1;
+    dest->bits_to_go--;
+    if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+    dest->bits_to_go = 8;
+    dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+    dest->byte_buf = 0;
+  }
+  seiSubseqInfo[currLayer].payloadSize = dest->byte_pos;
+
+//  printf("layer %d, last picture %d, stored_cnt %d\n", currLayer, seiSubseqInfo[currLayer].last_picture_flag, seiSubseqInfo[currLayer].stored_frame_cnt );
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *      Clear the payload buffer
+ ************************************************************************
+ */
+void ClearSubseqInfoPayload(int currLayer)
+{
+  seiSubseqInfo[currLayer].data->bits_to_go  = 8;
+  seiSubseqInfo[currLayer].data->byte_pos    = 0;
+  seiSubseqInfo[currLayer].data->byte_buf    = 0;
+  memset( seiSubseqInfo[currLayer].data->streamBuffer, 0, MAXRTPPAYLOADLEN );
+  seiSubseqInfo[currLayer].payloadSize = 0;
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *      Close the global variables for spare picture information
+ ************************************************************************
+ */
+void CloseSubseqInfo(int currLayer)
+{
+  seiSubseqInfo[currLayer].stored_frame_cnt = -1;
+  seiSubseqInfo[currLayer].payloadSize = 0;
+
+  free( seiSubseqInfo[currLayer].data->streamBuffer );
+  free( seiSubseqInfo[currLayer].data );
+}
+
+/*
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ *  \functions on subseq layer characteristic sei messages
+ *  \brief
+ *      JVT-D098
+ *  \author
+ *      Dong Tian                 <tian at cs.tut.fi>
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+
+Boolean seiHasSubseqLayerInfo = FALSE;
+subseq_layer_information_struct seiSubseqLayerInfo;
+
+/*!
+ ************************************************************************
+ *  \brief
+ *      Init the global variables for spare picture information
+ ************************************************************************
+ */
+void InitSubseqLayerInfo()
+{
+  int i;
+  seiHasSubseqLayerInfo = TRUE;
+  seiSubseqLayerInfo.layer_number = 0;
+  for (i=0; i<MAX_LAYER_NUMBER; i++)
+  {
+    seiSubseqLayerInfo.bit_rate[i] = 0;
+    seiSubseqLayerInfo.frame_rate[i] = 0;
+    seiSubseqLayerInfo.layer_number++;
+  }
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *
+ ************************************************************************
+ */
+void CloseSubseqLayerInfo()
+{
+}
+
+/*!
+ ************************************************************************
+ *  \brief
+ *      Write the data to buffer, which is byte aligned
+ ************************************************************************
+ */
+void FinalizeSubseqLayerInfo()
+{
+  int i, pos;
+  pos = 0;
+  seiSubseqLayerInfo.payloadSize = 0;
+  for (i=0; i<seiSubseqLayerInfo.layer_number; i++)
+  {
+    *((unsigned short*)&(seiSubseqLayerInfo.data[pos])) = seiSubseqLayerInfo.bit_rate[i];
+    pos += 2;
+    *((unsigned short*)&(seiSubseqLayerInfo.data[pos])) = seiSubseqLayerInfo.frame_rate[i];
+    pos += 2;
+    seiSubseqLayerInfo.payloadSize += 4;
+  }
+}
+
+/*
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ *  \functions on subseq characteristic sei messages
+ *  \brief
+ *      JVT-D098
+ *  \author
+ *      Dong Tian                 <tian at cs.tut.fi>
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+
+Boolean seiHasSubseqChar = FALSE;
+subseq_char_information_struct seiSubseqChar;
+
+void InitSubseqChar()
+{
+  seiSubseqChar.data = malloc( sizeof(Bitstream) );
+  if( seiSubseqChar.data == NULL ) no_mem_exit("InitSubseqChar: seiSubseqChar.data");
+  seiSubseqChar.data->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+  if( seiSubseqChar.data->streamBuffer == NULL ) no_mem_exit("InitSubseqChar: seiSubseqChar.data->streamBuffer");
+  ClearSubseqCharPayload();
+
+  seiSubseqChar.subseq_layer_num = img->layer;
+  seiSubseqChar.subseq_id = seiSubseqInfo[img->layer].subseq_id;
+  seiSubseqChar.duration_flag = 0;
+  seiSubseqChar.average_rate_flag = 0;
+  seiSubseqChar.num_referenced_subseqs = 0;
+}
+
+void ClearSubseqCharPayload()
+{
+  memset( seiSubseqChar.data->streamBuffer, 0, MAXRTPPAYLOADLEN);
+  seiSubseqChar.data->bits_to_go  = 8;
+  seiSubseqChar.data->byte_pos    = 0;
+  seiSubseqChar.data->byte_buf    = 0;
+  seiSubseqChar.payloadSize       = 0;
+
+  seiHasSubseqChar = FALSE;
+}
+
+void UpdateSubseqChar()
+{
+  seiSubseqChar.subseq_layer_num = img->layer;
+  seiSubseqChar.subseq_id = seiSubseqInfo[img->layer].subseq_id;
+  seiSubseqChar.duration_flag = 0;
+  seiSubseqChar.average_rate_flag = 0;
+  seiSubseqChar.average_bit_rate = 100;
+  seiSubseqChar.average_frame_rate = 30;
+  seiSubseqChar.num_referenced_subseqs = 0;
+  seiSubseqChar.ref_subseq_layer_num[0] = 1;
+  seiSubseqChar.ref_subseq_id[0] = 2;
+  seiSubseqChar.ref_subseq_layer_num[1] = 3;
+  seiSubseqChar.ref_subseq_id[1] = 4;
+
+  seiHasSubseqChar = TRUE;
+}
+
+void FinalizeSubseqChar()
+{
+  int i;
+  SyntaxElement sym;
+  Bitstream *dest = seiSubseqChar.data;
+
+  sym.type = SE_HEADER;
+  sym.mapping = ue_linfo;
+
+  sym.value1 = seiSubseqChar.subseq_layer_num;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+  sym.value1 = seiSubseqChar.subseq_id;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+  sym.bitpattern = seiSubseqChar.duration_flag;
+  sym.len = 1;
+  writeSyntaxElement2Buf_Fixed(&sym, dest);
+  if ( seiSubseqChar.duration_flag )
+  {
+    sym.bitpattern = seiSubseqChar.subseq_duration;
+    sym.len = 32;
+    writeSyntaxElement2Buf_Fixed(&sym, dest);
+  }
+  sym.bitpattern = seiSubseqChar.average_rate_flag;
+  sym.len = 1;
+  writeSyntaxElement2Buf_Fixed(&sym, dest);
+  if ( seiSubseqChar.average_rate_flag )
+  {
+    sym.bitpattern = seiSubseqChar.average_bit_rate;
+    sym.len = 16;
+    writeSyntaxElement2Buf_Fixed(&sym, dest);
+    sym.bitpattern = seiSubseqChar.average_frame_rate;
+    sym.len = 16;
+    writeSyntaxElement2Buf_Fixed(&sym, dest);
+  }
+  sym.value1 = seiSubseqChar.num_referenced_subseqs;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+  for (i=0; i<seiSubseqChar.num_referenced_subseqs; i++)
+  {
+    sym.value1 = seiSubseqChar.ref_subseq_layer_num[i];
+    writeSyntaxElement2Buf_UVLC(&sym, dest);
+    sym.value1 = seiSubseqChar.ref_subseq_id[i];
+    writeSyntaxElement2Buf_UVLC(&sym, dest);
+  }
+
+  // make sure the payload is byte aligned, stuff bits are 10..0
+  if ( dest->bits_to_go != 8 )
+  {
+    (dest->byte_buf) <<= 1;
+    dest->byte_buf |= 1;
+    dest->bits_to_go--;
+    if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+    dest->bits_to_go = 8;
+    dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+    dest->byte_buf = 0;
+  }
+  seiSubseqChar.payloadSize = dest->byte_pos;
+}
+
+void CloseSubseqChar()
+{
+  if (seiSubseqChar.data)
+  {
+    free(seiSubseqChar.data->streamBuffer);
+    free(seiSubseqChar.data);
+  }
+  seiSubseqChar.data = NULL;
+}
+
+
+// JVT-D099
+/*
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ *  \functions on scene information SEI message
+ *  \brief
+ *      JVT-D099
+ *  \author
+ *      Ye-Kui Wang                 <wyk at ieee.org>
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+
+scene_information_struct seiSceneInformation;
+
+void InitSceneInformation()
+{
+  seiHasSceneInformation = TRUE;
+
+  seiSceneInformation.scene_id = 0;
+  seiSceneInformation.scene_transition_type = 0;
+  seiSceneInformation.second_scene_id = -1;
+
+  seiSceneInformation.data = malloc( sizeof(Bitstream) );
+  if( seiSceneInformation.data == NULL ) no_mem_exit("InitSceneInformation: seiSceneInformation.data");
+  seiSceneInformation.data->streamBuffer = malloc( MAXRTPPAYLOADLEN );
+  if( seiSceneInformation.data->streamBuffer == NULL ) no_mem_exit("InitSceneInformation: seiSceneInformation.data->streamBuffer");
+  seiSceneInformation.data->bits_to_go  = 8;
+  seiSceneInformation.data->byte_pos    = 0;
+  seiSceneInformation.data->byte_buf    = 0;
+  memset( seiSceneInformation.data->streamBuffer, 0, MAXRTPPAYLOADLEN );
+}
+
+void CloseSceneInformation()
+{
+  if (seiSceneInformation.data)
+  {
+    free(seiSceneInformation.data->streamBuffer);
+    free(seiSceneInformation.data);
+  }
+  seiSceneInformation.data = NULL;
+}
+
+void FinalizeSceneInformation()
+{
+  SyntaxElement sym;
+  Bitstream *dest = seiSceneInformation.data;
+
+  sym.type = SE_HEADER;
+  sym.mapping = ue_linfo;
+
+  sym.bitpattern = seiSceneInformation.scene_id;
+  sym.len = 8;
+  writeSyntaxElement2Buf_Fixed(&sym, dest);
+
+  sym.value1 = seiSceneInformation.scene_transition_type;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+
+  if(seiSceneInformation.scene_transition_type > 3)
+  {
+    sym.bitpattern = seiSceneInformation.second_scene_id;
+    sym.len = 8;
+    writeSyntaxElement2Buf_Fixed(&sym, dest);
+  }
+
+  // make sure the payload is byte aligned, stuff bits are 10..0
+  if ( dest->bits_to_go != 8 )
+  {
+    (dest->byte_buf) <<= 1;
+    dest->byte_buf |= 1;
+    dest->bits_to_go--;
+    if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+    dest->bits_to_go = 8;
+    dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+    dest->byte_buf = 0;
+  }
+  seiSceneInformation.payloadSize = dest->byte_pos;
+}
+
+// HasSceneInformation: To include a scene information SEI into the next slice/DP,
+//      set HasSceneInformation to be TRUE when calling this function. Otherwise,
+//      set HasSceneInformation to be FALSE.
+void UpdateSceneInformation(Boolean HasSceneInformation, int sceneID, int sceneTransType, int secondSceneID)
+{
+  seiHasSceneInformation = HasSceneInformation;
+
+  assert (sceneID < 256);
+  seiSceneInformation.scene_id = sceneID;
+
+  assert (sceneTransType <= 6 );
+  seiSceneInformation.scene_transition_type = sceneTransType;
+
+  if(sceneTransType > 3)
+  {
+    assert (secondSceneID < 256);
+    seiSceneInformation.second_scene_id = secondSceneID;
+  }
+}
+// End JVT-D099
+
+
+/*
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ *  \functions on Pan Scan messages
+ *  \brief
+ *      Based on FCD
+ *  \author
+ *      Shankar Regunathan                 <tian at cs.tut.fi>
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+
+Boolean seiHasPanScanRectInfo = FALSE;
+panscanrect_information_struct seiPanScanRectInfo;
+
+void InitPanScanRectInfo()
+{
+
+  seiPanScanRectInfo.data = malloc( sizeof(Bitstream) );
+  if( seiPanScanRectInfo.data == NULL ) no_mem_exit("InitPanScanRectInfo: seiPanScanRectInfo.data");
+  seiPanScanRectInfo.data->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+  if( seiPanScanRectInfo.data->streamBuffer == NULL ) no_mem_exit("InitPanScanRectInfo: seiPanScanRectInfo.data->streamBuffer");
+  ClearPanScanRectInfoPayload();
+
+  seiPanScanRectInfo.pan_scan_rect_left_offset = 0;
+  seiPanScanRectInfo.pan_scan_rect_right_offset = 0;
+  seiPanScanRectInfo.pan_scan_rect_top_offset = 0;
+  seiPanScanRectInfo.pan_scan_rect_bottom_offset = 0;
+
+}
+
+
+void ClearPanScanRectInfoPayload()
+{
+  memset( seiPanScanRectInfo.data->streamBuffer, 0, MAXRTPPAYLOADLEN);
+  seiPanScanRectInfo.data->bits_to_go  = 8;
+  seiPanScanRectInfo.data->byte_pos    = 0;
+  seiPanScanRectInfo.data->byte_buf    = 0;
+  seiPanScanRectInfo.payloadSize       = 0;
+
+  seiHasPanScanRectInfo = TRUE;
+}
+
+void UpdatePanScanRectInfo()
+{
+  seiPanScanRectInfo.pan_scan_rect_id = 3;
+  seiPanScanRectInfo.pan_scan_rect_left_offset = 10;
+  seiPanScanRectInfo.pan_scan_rect_right_offset = 40;
+  seiPanScanRectInfo.pan_scan_rect_top_offset = 20;
+  seiPanScanRectInfo.pan_scan_rect_bottom_offset =32;
+  seiHasPanScanRectInfo = TRUE;
+}
+
+void FinalizePanScanRectInfo()
+{
+  SyntaxElement sym;
+  Bitstream *dest = seiPanScanRectInfo.data;
+
+
+  sym.type = SE_HEADER;
+  sym.mapping = ue_linfo;
+
+  sym.value1 = seiPanScanRectInfo.pan_scan_rect_id;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+  sym.value1 = seiPanScanRectInfo.pan_scan_rect_left_offset;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+  sym.value1 = seiPanScanRectInfo.pan_scan_rect_right_offset;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+  sym.value1 = seiPanScanRectInfo.pan_scan_rect_top_offset;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+  sym.value1 = seiPanScanRectInfo.pan_scan_rect_bottom_offset;
+  writeSyntaxElement2Buf_UVLC(&sym, dest);
+
+// #define PRINT_PAN_SCAN_RECT
+#ifdef PRINT_PAN_SCAN_RECT
+  printf("Pan Scan Id %d Left %d Right %d Top %d Bottom %d \n", seiPanScanRectInfo.pan_scan_rect_id, seiPanScanRectInfo.pan_scan_rect_left_offset, seiPanScanRectInfo.pan_scan_rect_right_offset, seiPanScanRectInfo.pan_scan_rect_top_offset, seiPanScanRectInfo.pan_scan_rect_bottom_offset);
+#endif
+#ifdef PRINT_PAN_SCAN_RECT
+#undef PRINT_PAN_SCAN_RECT
+#endif
+  // make sure the payload is byte aligned, stuff bits are 10..0
+  if ( dest->bits_to_go != 8 )
+  {
+    (dest->byte_buf) <<= 1;
+    dest->byte_buf |= 1;
+    dest->bits_to_go--;
+    if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+    dest->bits_to_go = 8;
+    dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+    dest->byte_buf = 0;
+  }
+  seiPanScanRectInfo.payloadSize = dest->byte_pos;
+}
+
+
+
+void ClosePanScanRectInfo()
+{
+  if (seiPanScanRectInfo.data)
+  {
+    free(seiPanScanRectInfo.data->streamBuffer);
+    free(seiPanScanRectInfo.data);
+  }
+  seiPanScanRectInfo.data = NULL;
+}
+
+/*
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ *  \functions on arbitrary (unregistered) data
+ *  \brief
+ *      Based on FCD
+ *  \author
+ *      Shankar Regunathan                 <tian at cs.tut.fi>
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+Boolean seiHasUser_data_unregistered_info;
+user_data_unregistered_information_struct seiUser_data_unregistered;
+void InitUser_data_unregistered()
+{
+
+  seiUser_data_unregistered.data = malloc( sizeof(Bitstream) );
+  if( seiUser_data_unregistered.data == NULL ) no_mem_exit("InitUser_data_unregistered: seiUser_data_unregistered.data");
+  seiUser_data_unregistered.data->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+  if( seiUser_data_unregistered.data->streamBuffer == NULL ) no_mem_exit("InitUser_data_unregistered: seiUser_data_unregistered.data->streamBuffer");
+  seiUser_data_unregistered.byte = malloc(MAXRTPPAYLOADLEN);
+  if( seiUser_data_unregistered.byte == NULL ) no_mem_exit("InitUser_data_unregistered: seiUser_data_unregistered.byte");
+  ClearUser_data_unregistered();
+
+}
+
+
+void ClearUser_data_unregistered()
+{
+  memset( seiUser_data_unregistered.data->streamBuffer, 0, MAXRTPPAYLOADLEN);
+  seiUser_data_unregistered.data->bits_to_go  = 8;
+  seiUser_data_unregistered.data->byte_pos    = 0;
+  seiUser_data_unregistered.data->byte_buf    = 0;
+  seiUser_data_unregistered.payloadSize       = 0;
+
+  memset( seiUser_data_unregistered.byte, 0, MAXRTPPAYLOADLEN);
+  seiUser_data_unregistered.total_byte = 0;
+
+  seiHasUser_data_unregistered_info = TRUE;
+}
+
+void UpdateUser_data_unregistered()
+{
+  int i, temp_data;
+  int total_byte;
+
+
+  total_byte = 7;
+  for(i = 0; i < total_byte; i++)
+  {
+    temp_data = i * 4;
+    seiUser_data_unregistered.byte[i] = (char) iClip3(0, 255, temp_data);
+  }
+  seiUser_data_unregistered.total_byte = total_byte;
+}
+
+void FinalizeUser_data_unregistered()
+{
+  int i;
+  SyntaxElement sym;
+  Bitstream *dest = seiUser_data_unregistered.data;
+
+  sym.type = SE_HEADER;
+  sym.mapping = ue_linfo;
+
+// #define PRINT_USER_DATA_UNREGISTERED_INFO
+  for( i = 0; i < seiUser_data_unregistered.total_byte; i++)
+  {
+    sym.bitpattern = seiUser_data_unregistered.byte[i];
+    sym.len = 8; // b (8)
+    writeSyntaxElement2Buf_Fixed(&sym, dest);
+#ifdef PRINT_USER_DATA_UNREGISTERED_INFO
+    printf("Unreg data payload_byte = %d\n", seiUser_data_unregistered.byte[i]);
+#endif
+  }
+#ifdef PRINT_USER_DATA_UNREGISTERED_INFO
+#undef PRINT_USER_DATA_UNREGISTERED_INFO
+#endif
+  // make sure the payload is byte aligned, stuff bits are 10..0
+  if ( dest->bits_to_go != 8 )
+  {
+    (dest->byte_buf) <<= 1;
+    dest->byte_buf |= 1;
+    dest->bits_to_go--;
+    if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+    dest->bits_to_go = 8;
+    dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+    dest->byte_buf = 0;
+  }
+  seiUser_data_unregistered.payloadSize = dest->byte_pos;
+}
+
+void CloseUser_data_unregistered()
+{
+  if (seiUser_data_unregistered.data)
+  {
+    free(seiUser_data_unregistered.data->streamBuffer);
+    free(seiUser_data_unregistered.data);
+  }
+  seiUser_data_unregistered.data = NULL;
+  if(seiUser_data_unregistered.byte)
+  {
+    free(seiUser_data_unregistered.byte);
+  }
+}
+
+
+/*
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ *  \functions on registered ITU_T_T35 user data
+ *  \brief
+ *      Based on FCD
+ *  \author
+ *      Shankar Regunathan                 <tian at cs.tut.fi>
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+Boolean seiHasUser_data_registered_itu_t_t35_info;
+user_data_registered_itu_t_t35_information_struct seiUser_data_registered_itu_t_t35;
+void InitUser_data_registered_itu_t_t35()
+{
+
+  seiUser_data_registered_itu_t_t35.data = malloc( sizeof(Bitstream) );
+  if( seiUser_data_registered_itu_t_t35.data == NULL ) no_mem_exit("InitUser_data_unregistered: seiUser_data_registered_itu_t_t35.data");
+  seiUser_data_registered_itu_t_t35.data->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+  if( seiUser_data_registered_itu_t_t35.data->streamBuffer == NULL ) no_mem_exit("InitUser_data_unregistered: seiUser_data_registered_itu_t_t35.data->streamBuffer");
+  seiUser_data_registered_itu_t_t35.byte = malloc(MAXRTPPAYLOADLEN);
+  if( seiUser_data_registered_itu_t_t35.data == NULL ) no_mem_exit("InitUser_data_unregistered: seiUser_data_registered_itu_t_t35.byte");
+  ClearUser_data_registered_itu_t_t35();
+
+}
+
+
+void ClearUser_data_registered_itu_t_t35()
+{
+  memset( seiUser_data_registered_itu_t_t35.data->streamBuffer, 0, MAXRTPPAYLOADLEN);
+  seiUser_data_registered_itu_t_t35.data->bits_to_go  = 8;
+  seiUser_data_registered_itu_t_t35.data->byte_pos    = 0;
+  seiUser_data_registered_itu_t_t35.data->byte_buf    = 0;
+  seiUser_data_registered_itu_t_t35.payloadSize       = 0;
+
+  memset( seiUser_data_registered_itu_t_t35.byte, 0, MAXRTPPAYLOADLEN);
+  seiUser_data_registered_itu_t_t35.total_byte = 0;
+  seiUser_data_registered_itu_t_t35.itu_t_t35_country_code = 0;
+  seiUser_data_registered_itu_t_t35.itu_t_t35_country_code_extension_byte = 0;
+
+  seiHasUser_data_registered_itu_t_t35_info = TRUE;
+}
+
+void UpdateUser_data_registered_itu_t_t35()
+{
+  int i, temp_data;
+  int total_byte;
+  int country_code;
+
+  country_code = 82; // Country_code for India
+
+  if(country_code < 0xFF)
+  {
+    seiUser_data_registered_itu_t_t35.itu_t_t35_country_code = country_code;
+  }
+  else
+  {
+    seiUser_data_registered_itu_t_t35.itu_t_t35_country_code = 0xFF;
+    seiUser_data_registered_itu_t_t35.itu_t_t35_country_code_extension_byte = country_code - 0xFF;
+  }
+
+  total_byte = 7;
+  for(i = 0; i < total_byte; i++)
+  {
+    temp_data = i * 3;
+    seiUser_data_registered_itu_t_t35.byte[i] = (char) iClip3(0, 255, temp_data);
+  }
+  seiUser_data_registered_itu_t_t35.total_byte = total_byte;
+}
+
+void FinalizeUser_data_registered_itu_t_t35()
+{
+  int i;
+  SyntaxElement sym;
+  Bitstream *dest = seiUser_data_registered_itu_t_t35.data;
+
+  sym.type = SE_HEADER;
+  sym.mapping = ue_linfo;
+
+  sym.bitpattern = seiUser_data_registered_itu_t_t35.itu_t_t35_country_code;
+  sym.len = 8;
+  writeSyntaxElement2Buf_Fixed(&sym, dest);
+
+// #define PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+#ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+  printf(" ITU_T_T35_COUNTRTY_CODE %d \n", seiUser_data_registered_itu_t_t35.itu_t_t35_country_code);
+#endif
+
+  if(seiUser_data_registered_itu_t_t35.itu_t_t35_country_code == 0xFF)
+  {
+    sym.bitpattern = seiUser_data_registered_itu_t_t35.itu_t_t35_country_code_extension_byte;
+    sym.len = 8;
+    writeSyntaxElement2Buf_Fixed(&sym, dest);
+#ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+    printf(" ITU_T_T35_COUNTRTY_CODE_EXTENSION_BYTE %d \n", seiUser_data_registered_itu_t_t35.itu_t_t35_country_code_extension_byte);
+#endif
+  }
+
+  for( i = 0; i < seiUser_data_registered_itu_t_t35.total_byte; i++)
+  {
+    sym.bitpattern = seiUser_data_registered_itu_t_t35.byte[i];
+    sym.len = 8; // b (8)
+    writeSyntaxElement2Buf_Fixed(&sym, dest);
+#ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+    printf("itu_t_t35 payload_byte = %d\n", seiUser_data_registered_itu_t_t35.byte[i]);
+#endif
+  }
+#ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+#undef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+#endif
+  // make sure the payload is byte aligned, stuff bits are 10..0
+  if ( dest->bits_to_go != 8 )
+  {
+    (dest->byte_buf) <<= 1;
+    dest->byte_buf |= 1;
+    dest->bits_to_go--;
+    if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+    dest->bits_to_go = 8;
+    dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+    dest->byte_buf = 0;
+  }
+  seiUser_data_registered_itu_t_t35.payloadSize = dest->byte_pos;
+}
+
+void CloseUser_data_registered_itu_t_t35()
+{
+  if (seiUser_data_registered_itu_t_t35.data)
+  {
+    free(seiUser_data_registered_itu_t_t35.data->streamBuffer);
+    free(seiUser_data_registered_itu_t_t35.data);
+  }
+  seiUser_data_registered_itu_t_t35.data = NULL;
+  if(seiUser_data_registered_itu_t_t35.byte)
+  {
+    free(seiUser_data_registered_itu_t_t35.byte);
+  }
+}
+
+/*
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ *  \functions on random access message
+ *  \brief
+ *      Based on FCD
+ *  \author
+ *      Shankar Regunathan                 <tian at cs.tut.fi>
+ **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+Boolean seiHasRecoveryPoint_info;
+recovery_point_information_struct seiRecoveryPoint;
+void InitRandomAccess()
+{
+
+  seiRecoveryPoint.data = malloc( sizeof(Bitstream) );
+  if( seiRecoveryPoint.data == NULL ) no_mem_exit("InitRandomAccess: seiRandomAccess.data");
+  seiRecoveryPoint.data->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+  if( seiRecoveryPoint.data->streamBuffer == NULL ) no_mem_exit("InitRandomAccess: seiRandomAccess.data->streamBuffer");
+  ClearRandomAccess();
+
+}
+
+
+void ClearRandomAccess()
+{
+  memset( seiRecoveryPoint.data->streamBuffer, 0, MAXRTPPAYLOADLEN);
+  seiRecoveryPoint.data->bits_to_go  = 8;
+  seiRecoveryPoint.data->byte_pos    = 0;
+  seiRecoveryPoint.data->byte_buf    = 0;
+  seiRecoveryPoint.payloadSize       = 0;
+
+  seiRecoveryPoint.recovery_frame_cnt = 0;
+  seiRecoveryPoint.broken_link_flag = 0;
+  seiRecoveryPoint.exact_match_flag = 0;
+
+  seiHasRecoveryPoint_info = FALSE;
+}
+
+void UpdateRandomAccess()
+{
+
+  if(img->type == I_SLICE)
+  {
+    seiRecoveryPoint.recovery_frame_cnt = 0;
+    seiRecoveryPoint.exact_match_flag = 1;
+    seiRecoveryPoint.broken_link_flag = 0;
+    seiHasRecoveryPoint_info = TRUE;
+  }
+  else
+  {
+    seiHasRecoveryPoint_info = FALSE;
+  }
+}
+
+void FinalizeRandomAccess()
+{
+  Bitstream *bitstream = seiRecoveryPoint.data;
+
+  ue_v(   "SEI: recovery_frame_cnt",       seiRecoveryPoint.recovery_frame_cnt,       bitstream);
+  u_1 (   "SEI: exact_match_flag",         seiRecoveryPoint.exact_match_flag,         bitstream);
+  u_1 (   "SEI: broken_link_flag",         seiRecoveryPoint.broken_link_flag,         bitstream);
+  u_v (2, "SEI: changing_slice_group_idc", seiRecoveryPoint.changing_slice_group_idc, bitstream);
+
+
+// #define PRINT_RECOVERY_POINT
+#ifdef PRINT_RECOVERY_POINT
+  printf(" recovery_frame_cnt %d \n",       seiRecoveryPoint.recovery_frame_cnt);
+  printf(" exact_match_flag %d \n",         seiRecoveryPoint.exact_match_flag);
+  printf(" broken_link_flag %d \n",         seiRecoveryPoint.broken_link_flag);
+  printf(" changing_slice_group_idc %d \n", seiRecoveryPoint.changing_slice_group_idc);
+  printf(" %d %d \n", bitstream->byte_pos, bitstream->bits_to_go);
+
+#undef PRINT_RECOVERY_POINT
+#endif
+  // make sure the payload is byte aligned, stuff bits are 10..0
+  if ( bitstream->bits_to_go != 8 )
+  {
+    (bitstream->byte_buf) <<= 1;
+    bitstream->byte_buf |= 1;
+    bitstream->bits_to_go--;
+    if ( bitstream->bits_to_go != 0 )
+      (bitstream->byte_buf) <<= (bitstream->bits_to_go);
+    bitstream->bits_to_go = 8;
+    bitstream->streamBuffer[bitstream->byte_pos++]=bitstream->byte_buf;
+    bitstream->byte_buf = 0;
+  }
+  seiRecoveryPoint.payloadSize = bitstream->byte_pos;
+}
+
+void CloseRandomAccess()
+{
+  if (seiRecoveryPoint.data)
+  {
+    free(seiRecoveryPoint.data->streamBuffer);
+    free(seiRecoveryPoint.data);
+  }
+  seiRecoveryPoint.data = NULL;
+}


Index: llvm-test/MultiSource/Applications/JM/lencod/sei.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/sei.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/sei.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,325 @@
+
+/*!
+ ************************************************************************
+ *  \file
+ *     sei.h
+ *  \brief
+ *     definitions for Supplemental Enhanced Information
+ *  \author(s)
+ *      - Dong Tian                             <tian at cs.tut.fi>
+ *      - TBD
+ *
+ * ************************************************************************
+ */
+
+#ifndef SEI_H
+#define SEI_H
+
+#define MAX_LAYER_NUMBER 2
+#define MAX_DEPENDENT_SUBSEQ 5
+
+
+//! definition of SEI payload type
+typedef enum {
+  SEI_BUFFERING_PERIOD = 0,
+  SEI_PIC_TIMING,
+  SEI_PAN_SCAN_RECT,
+  SEI_FILLER_PAYLOAD,
+  SEI_USER_DATA_REGISTERED_ITU_T_T35,
+  SEI_USER_DATA_UNREGISTERED,
+  SEI_RECOVERY_POINT,
+  SEI_DEC_REF_PIC_MARKING_REPETITION,
+  SEI_SPARE_PIC,
+  SEI_SCENE_INFO,
+  SEI_SUB_SEQ_INFO,
+  SEI_SUB_SEQ_LAYER_CHARACTERISTICS,
+  SEI_SUB_SEQ_CHARACTERISTICS,
+  SEI_FULL_FRAME_FREEZE,
+  SEI_FULL_FRAME_FREEZE_RELEASE,
+  SEI_FULL_FRAME_SNAPSHOT,
+  SEI_PROGRESSIVE_REFINEMENT_SEGMENT_START,
+  SEI_PROGRESSIVE_REFINEMENT_SEGMENT_END,
+  SEI_MOTION_CONSTRAINED_SLICE_GROUP_SET,
+  SEI_FILM_GRAIN_CHARACTERISTICS,
+  SEI_DEBLOCKING_FILTER_DISPLAY_PREFERENCE,
+  SEI_STEREO_VIDEO_INFO,
+
+  SEI_MAX_ELEMENTS  //!< number of maximum syntax elements
+} SEI_type;
+
+#define MAX_FN 256
+
+#define AGGREGATION_PACKET_TYPE 4
+#define SEI_PACKET_TYPE 5  // Tian Dong: See VCEG-N72, it need updates
+
+#define NORMAL_SEI 0
+#define AGGREGATION_SEI 1
+
+//! SEI structure
+typedef struct
+{
+  Boolean available;
+  int payloadSize;
+  unsigned char subPacketType;
+  byte* data;
+} sei_struct;
+
+//!< sei_message[0]: this struct is to store the sei message packtized independently
+//!< sei_message[1]: this struct is to store the sei message packtized together with slice data
+extern sei_struct sei_message[2];
+
+void InitSEIMessages(void);
+void CloseSEIMessages(void);
+Boolean HaveAggregationSEI(void);
+void write_sei_message(int id, byte* payload, int payload_size, int payload_type);
+void finalize_sei_message(int id);
+void clear_sei_message(int id);
+void AppendTmpbits2Buf( Bitstream* dest, Bitstream* source );
+
+void PrepareAggregationSEIMessage(void);
+
+
+//! Spare Picture
+typedef struct
+{
+  int target_frame_num;
+  int num_spare_pics;
+  int payloadSize;
+  Bitstream* data;
+} spare_picture_struct;
+
+extern Boolean seiHasSparePicture;
+//extern Boolean sei_has_sp;
+extern spare_picture_struct seiSparePicturePayload;
+
+void InitSparePicture();
+void CloseSparePicture();
+void CalculateSparePicture();
+void ComposeSparePictureMessage(int delta_spare_frame_num, int ref_area_indicator, Bitstream *tmpBitstream);
+Boolean CompressSpareMBMap(unsigned char **map_sp, Bitstream *bitstream);
+void FinalizeSpareMBMap();
+
+//! Subseq Information
+typedef struct
+{
+  int subseq_layer_num;
+  int subseq_id;
+  unsigned int last_picture_flag;
+  unsigned int stored_frame_cnt;
+
+  int payloadSize;
+  Bitstream* data;
+} subseq_information_struct;
+
+extern Boolean seiHasSubseqInfo;
+extern subseq_information_struct seiSubseqInfo[MAX_LAYER_NUMBER];
+
+void InitSubseqInfo(int currLayer);
+void UpdateSubseqInfo(int currLayer);
+void FinalizeSubseqInfo(int currLayer);
+void ClearSubseqInfoPayload(int currLayer);
+void CloseSubseqInfo(int currLayer);
+
+//! Subseq Layer Information
+typedef struct
+{
+  unsigned short bit_rate[MAX_LAYER_NUMBER];
+  unsigned short frame_rate[MAX_LAYER_NUMBER];
+  byte data[4*MAX_LAYER_NUMBER];
+  int layer_number;
+  int payloadSize;
+} subseq_layer_information_struct;
+
+extern Boolean seiHasSubseqLayerInfo;
+extern subseq_layer_information_struct seiSubseqLayerInfo;
+
+void InitSubseqLayerInfo();
+void CloseSubseqLayerInfo();
+void FinalizeSubseqLayerInfo();
+
+//! Subseq Characteristics
+typedef struct
+{
+  int subseq_layer_num;
+  int subseq_id;
+  int duration_flag;
+  unsigned int subseq_duration;
+  unsigned int average_rate_flag;
+  unsigned int average_bit_rate;
+  unsigned int average_frame_rate;
+  int num_referenced_subseqs;
+  int ref_subseq_layer_num[MAX_DEPENDENT_SUBSEQ];
+  int ref_subseq_id[MAX_DEPENDENT_SUBSEQ];
+
+  Bitstream* data;
+  int payloadSize;
+} subseq_char_information_struct;
+
+extern Boolean seiHasSubseqChar;
+extern subseq_char_information_struct seiSubseqChar;
+
+void InitSubseqChar();
+void ClearSubseqCharPayload();
+void UpdateSubseqChar();
+void FinalizeSubseqChar();
+void CloseSubseqChar();
+
+
+typedef struct
+{
+  int scene_id;
+  int scene_transition_type;
+  int second_scene_id;
+
+  Bitstream* data;
+  int payloadSize;
+} scene_information_struct;
+
+extern Boolean seiHasSceneInformation;
+extern scene_information_struct seiSceneInformation;
+
+void InitSceneInformation();
+void CloseSceneInformation();
+void UpdateSceneInformation(Boolean HasSceneInformation, int sceneID, int sceneTransType, int secondSceneID);
+void FinalizeSceneInformation();
+
+//! PanScanRect Information
+typedef struct
+{
+  int pan_scan_rect_id;
+  int pan_scan_rect_left_offset;
+  int pan_scan_rect_right_offset;
+  int pan_scan_rect_top_offset;
+  int pan_scan_rect_bottom_offset;
+
+  Bitstream *data;
+  int payloadSize;
+} panscanrect_information_struct;
+
+extern Boolean seiHasPanScanRectInfo;
+extern panscanrect_information_struct seiPanScanRectInfo;
+
+void InitPanScanRectInfo();
+void ClearPanScanRectInfoPayload();
+void UpdatePanScanRectInfo();
+void FinalizePanScanRectInfo();
+void ClosePanScanRectInfo();
+
+//! User_data_unregistered Information
+typedef struct
+{
+  char *byte;
+  int total_byte;
+  Bitstream *data;
+  int payloadSize;
+} user_data_unregistered_information_struct;
+
+extern Boolean seiHasUser_data_unregistered_info;
+extern user_data_unregistered_information_struct seiUser_data_unregistered;
+
+void InitUser_data_unregistered();
+void ClearUser_data_unregistered();
+void UpdateUser_data_unregistered();
+void FinalizeUser_data_unregistered();
+void CloseUser_data_unregistered();
+
+//! User_data_registered_itu_t_t35 Information
+typedef struct
+{
+  char *byte;
+  int total_byte;
+  int itu_t_t35_country_code;
+  int itu_t_t35_country_code_extension_byte;
+  Bitstream *data;
+  int payloadSize;
+} user_data_registered_itu_t_t35_information_struct;
+
+extern Boolean seiHasUser_data_registered_itu_t_t35_info;
+extern user_data_registered_itu_t_t35_information_struct seiUser_data_registered_itu_t_t35;
+
+void InitUser_data_registered_itu_t_t35();
+void ClearUser_data_registered_itu_t_t35();
+void UpdateUser_data_registered_itu_t_t35();
+void FinalizeUser_data_registered_itu_t_t35();
+void CloseUser_data_registered_itu_t_t35();
+
+//! Recovery Point Information
+typedef struct
+{
+  unsigned int  recovery_frame_cnt;
+  unsigned char exact_match_flag;
+  unsigned char broken_link_flag;
+  unsigned char changing_slice_group_idc;
+
+  Bitstream *data;
+  int payloadSize;
+} recovery_point_information_struct;
+
+extern Boolean seiHasRecoveryPoint_info;
+extern recovery_point_information_struct seiRecoveryPoint;
+
+void InitRandomAccess();
+void ClearRandomAccess();
+void UpdateRandomAccess();
+void FinalizeRandomAccess();
+void CloseRandomAccess();
+
+
+// This is only temp
+//! Buffering Period Information
+#define MAX_CPB_CNT_MINUS1 31
+#define MAX_PIC_STRUCT_VALUE 16
+typedef struct
+{
+  int seq_parameter_set_id;
+  int initial_cpb_removal_delay[MAX_CPB_CNT_MINUS1+1];
+  int initial_cpb_removal_delay_offset[MAX_CPB_CNT_MINUS1+1];
+
+  Bitstream *data;
+  int payloadSize;
+} bufferingperiod_information_struct;
+Boolean seiHasBufferingPeriod_info;
+bufferingperiod_information_struct seiBufferingPeriod;
+
+void InitBufferingPeriod();
+void ClearBufferingPeriod();
+void CloseBufferingPeriod();
+void UpdateBufferingPeriod();
+void FinalizeBufferingPeriod();
+
+//! Picture timing Information
+typedef struct
+{
+  int cpb_removal_delay;
+  int dpb_output_delay;
+  int pic_struct;
+  Boolean clock_timestamp_flag[MAX_PIC_STRUCT_VALUE];
+  int ct_type;
+  Boolean nuit_field_based_flag;
+  int counting_type;
+  Boolean full_timestamp_flag;
+  Boolean discontinuity_flag;
+  Boolean cnt_dropped_flag;
+  int n_frames;
+  int seconds_value;
+  int minutes_value;
+  int hours_value;
+  Boolean seconds_flag;
+  Boolean minutes_flag;
+  Boolean hours_flag;
+  int time_offset;
+
+  Bitstream *data;
+  int payloadSize;
+} pictiming_information_struct;
+Boolean seiHasPicTiming_info;
+pictiming_information_struct seiPicTiming;
+
+void InitPicTiming();
+void ClearPicTiming();
+void ClosePicTiming();
+void UpdatePicTiming();
+void FinalizePicTiming();
+
+// end of temp additions
+#endif


Index: llvm-test/MultiSource/Applications/JM/lencod/slice.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/slice.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/slice.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,1344 @@
+
+/*!
+ **************************************************************************************
+ * \file
+ *    slice.c
+ * \brief
+ *    generate the slice header, setup the bit buffer for slices,
+ *    and generates the slice NALU(s)
+
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Thomas Stockhammer            <stockhammer at ei.tum.de>
+ *      - Detlev Marpe                  <marpe at hhi.de>
+ *      - Stephan Wenger                <stewe at cs.tu-berlin.de>
+ *      - Alexis Michael Tourapis       <alexismt at ieee.org>
+ ***************************************************************************************
+ */
+
+#include "contributors.h"
+
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+#include <float.h>
+
+#include "global.h"
+#include "header.h"
+#include "rtp.h"
+#include "fmo.h"
+#include "vlc.h"
+#include "image.h"
+#include "cabac.h"
+#include "elements.h"
+#include "me_epzs.h"
+#include "ratectl.h"
+#include "rc_quadratic.h"
+#include "macroblock.h"
+#include "symbol.h"
+
+// Local declarations
+static Slice *malloc_slice();
+static void  free_slice(Slice *slice);
+static void  init_slice(int start_mb_addr);
+static void set_ref_pic_num();
+extern ColocatedParams *Co_located;
+extern StorablePicture **listX[6];
+void poc_ref_pic_reorder(StorablePicture **list, unsigned num_ref_idx_lX_active, int *reordering_of_pic_nums_idc, int *abs_diff_pic_num_minus1, int *long_term_pic_idx, int list_no);
+void SetLagrangianMultipliers();
+
+/*!
+ ************************************************************************
+ * \brief
+ *    init_ref_pic_list_reordering initializations should go here
+ ************************************************************************
+ */
+void init_ref_pic_list_reordering()
+{
+  Slice* currSlice = img->currentSlice;
+
+  currSlice->ref_pic_list_reordering_flag_l0 = 0;
+  currSlice->ref_pic_list_reordering_flag_l1 = 0;
+}
+
+
+/*!
+ ************************************************************************
+ *  \brief
+ *     This function generates the slice (and partition) header(s)
+ *
+ *  \return number of bits used for the slice (and partition) header(s)
+ *
+ *  \par Side effects:
+ *      Adds slice/partition header symbols to the symbol buffer
+ *      increments Picture->no_slices, allocates memory for the
+ *      slice, sets img->currSlice
+ ************************************************************************
+*/
+int start_slice()
+{
+  EncodingEnvironmentPtr eep;
+  Slice *currSlice = img->currentSlice;
+  Bitstream *currStream;
+  int header_len = 0;
+  int i;
+  int NumberOfPartitions = (input->partition_mode == PAR_DP_1?1:3);
+
+  //one  partition for IDR img
+  if(img->currentPicture->idr_flag)
+  {
+     NumberOfPartitions = 1;
+  }
+
+  RTPUpdateTimestamp (img->tr);   // this has no side effects, just leave it for all NALs
+
+  for (i=0; i<NumberOfPartitions; i++)
+  {
+    currStream = (currSlice->partArr[i]).bitstream;
+
+    currStream->write_flag = 0;
+    if (i==0)     // First partition
+      header_len += SliceHeader (0);
+    else          // Second/Third partition
+      header_len += Partition_BC_Header(i);
+
+    //! Initialize CABAC
+    if (input->symbol_mode == CABAC)
+    {
+      eep = &((currSlice->partArr[i]).ee_cabac);
+      if (currStream->bits_to_go != 8)
+        header_len+=currStream->bits_to_go;
+      writeVlcByteAlign(currStream);
+      arienco_start_encoding(eep, currStream->streamBuffer, &(currStream->byte_pos));
+      cabac_new_slice();
+    }
+    else
+    {
+      // Initialize CA-VLC
+      CAVLC_init();
+    }
+  }
+  if(input->symbol_mode == CABAC)
+  {
+    init_contexts();
+  }
+  return header_len;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    This function terminates a slice (but doesn't write it out),
+ *    the old terminate_slice (0)
+ * \return
+ *    0 if OK,                                                         \n
+ *    1 in case of error
+ *
+ ************************************************************************
+ */
+int terminate_slice(int lastslice)
+{
+  static int MbWidthC  [4]= { 0, 8, 8,  16};
+  static int MbHeightC [4]= { 0, 8, 16, 16};
+
+  int bytes_written;
+  Bitstream *currStream;
+  Slice *currSlice = img->currentSlice;
+  EncodingEnvironmentPtr eep;
+  int i;
+  int byte_pos_before_startcode_emu_prevention;
+  int min_num_bytes=0;
+  int stuffing_bytes=0;
+  int RawMbBits;
+
+  if (input->symbol_mode == CABAC)
+    write_terminating_bit (1);      // only once, not for all partitions
+
+  for (i=0; i<currSlice->max_part_nr; i++)
+  {
+    currStream = (currSlice->partArr[i]).bitstream;
+    if (input->symbol_mode == UVLC)
+    {
+      SODBtoRBSP(currStream);
+      byte_pos_before_startcode_emu_prevention = currStream->byte_pos;
+      currStream->byte_pos = RBSPtoEBSP(currStream->streamBuffer, 0 , currStream->byte_pos, 0);
+      *(stats->em_prev_bits) += (currStream->byte_pos - byte_pos_before_startcode_emu_prevention) * 8;
+    }
+    else     // CABAC
+    {
+      eep = &((currSlice->partArr[i]).ee_cabac);
+      // terminate the arithmetic code
+      arienco_done_encoding(eep);
+      currStream->bits_to_go = eep->Ebits_to_go;
+      currStream->byte_buf = 0;
+      bytes_written = currStream->byte_pos;
+      img->bytes_in_picture += currStream->byte_pos;
+
+      byte_pos_before_startcode_emu_prevention= currStream->byte_pos;
+      if (lastslice && i==((currSlice->max_part_nr-1)))
+      {
+        RawMbBits = 256 * img->bitdepth_luma + 2 * MbWidthC[active_sps->chroma_format_idc] * MbHeightC[active_sps->chroma_format_idc] * img->bitdepth_chroma;
+        min_num_bytes = ((96 * get_pic_bin_count()) - (RawMbBits * (int)img->PicSizeInMbs *3) + 1023) / 1024;
+        if (min_num_bytes>img->bytes_in_picture)
+        {
+          stuffing_bytes = min_num_bytes - img->bytes_in_picture;
+          printf ("CABAC stuffing words = %6d\n", stuffing_bytes/3);
+        }
+      }
+
+//      printf ("bytepos: %d\n", currStream->byte_pos);
+      currStream->byte_pos = RBSPtoEBSP(currStream->streamBuffer, 0, currStream->byte_pos, currStream->byte_pos + stuffing_bytes);
+      *(stats->em_prev_bits) += (currStream->byte_pos - byte_pos_before_startcode_emu_prevention) * 8;
+    }           // CABAC
+  }           // partition loop
+  if( input->symbol_mode == CABAC )
+  {
+    store_contexts();
+  }
+
+  if (img->type != I_SLICE || img->type != SI_SLICE)
+    free_ref_pic_list_reordering_buffer (currSlice);
+  return 0;
+}
+
+/*!
+************************************************************************
+* \brief
+*    Encodes one slice
+* \par
+*   returns the number of coded MBs in the SLice
+************************************************************************
+*/
+int encode_one_slice (int SliceGroupId, Picture *pic, int TotalCodedMBs)
+{
+  Boolean end_of_slice = FALSE;
+  Boolean recode_macroblock;
+  int len;
+  int NumberOfCodedMBs = 0;
+  int CurrentMbAddr;
+  double FrameRDCost = DBL_MAX, FieldRDCost = DBL_MAX;
+
+  img->cod_counter = 0;
+
+  CurrentMbAddr = FmoGetFirstMacroblockInSlice (SliceGroupId);
+// printf ("\n\nEncode_one_slice: PictureID %d SliceGroupId %d  SliceID %d  FirstMB %d \n", img->tr, SliceGroupId, img->current_slice_nr, CurrentMbInScanOrder);
+
+  init_slice (CurrentMbAddr);
+  Bytes_After_Header = img->currentSlice->partArr[0].bitstream->byte_pos;
+
+  SetLagrangianMultipliers();
+
+  if (input->symbol_mode==CABAC)
+  {
+    SetCtxModelNumber ();
+  }
+
+  img->checkref = (input->rdopt && input->RestrictRef && (img->type==P_SLICE || img->type==SP_SLICE));
+
+/*
+  // Tian Dong: June 7, 2002 JVT-B042
+  // When the pictures are put into different layers and subseq, not all the reference frames
+  // in multi-frame buffer are valid for prediction. The acutual number of the valid reference
+  // frames, fb->num_short_used, will be given by start_slice(sym).
+  // Save the fb->short_used.
+  if (input->NumFramesInELSubSeq)
+    {
+      short_used = fb->short_used;
+    }
+*/
+
+  len = start_slice ();
+
+  // Rate control
+  if (input->RCEnable)
+  {
+    generic_RC->NumberofHeaderBits +=len;
+
+    // basic unit layer rate control
+    if(img->BasicUnit < img->FrameSizeInMbs)
+      generic_RC->NumberofBasicUnitHeaderBits +=len;
+  }
+//  printf("short size, used, num-used: (%d,%d,%d)\n", fb->short_size, fb->short_used, fb->num_short_used);
+
+/*
+  // Tian Dong: June 7, 2002 JVT-B042
+  if (input->NumFramesInELSubSeq)
+    {
+      fb->short_used = fb->num_short_used;
+    }
+*/
+  // Update statistics
+  stats->bit_slice += len;
+  stats->bit_use_header[img->type] += len;
+// printf ("\n\n");
+
+  while (end_of_slice == FALSE) // loop over macroblocks
+  {
+    if (img->AdaptiveRounding && input->AdaptRndPeriod && (img->current_mb_nr % input->AdaptRndPeriod == 0))
+    {
+      CalculateOffsetParam();
+
+      if(input->Transform8x8Mode)
+      {
+        CalculateOffset8Param();
+      }
+    }
+
+    //sw paff
+    if (!img->MbaffFrameFlag)
+    {
+      recode_macroblock = FALSE;
+      rdopt = &rddata_top_frame_mb;   // store data in top frame MB
+
+      start_macroblock (CurrentMbAddr, FALSE);
+      encode_one_macroblock ();
+
+      write_one_macroblock (1);
+
+      terminate_macroblock (&end_of_slice, &recode_macroblock);
+
+//       printf ("encode_one_slice: mb %d,  slice %d,   bitbuf bytepos %d EOS %d\n",
+//       img->current_mb_nr, img->current_slice_nr,
+//       img->currentSlice->partArr[0].bitstream->byte_pos, end_of_slice);
+
+      if (recode_macroblock == FALSE)       // The final processing of the macroblock has been done
+      {
+        CurrentMbAddr = FmoGetNextMBNr (CurrentMbAddr);
+        if (CurrentMbAddr == -1)   // end of slice
+        {
+//          printf ("FMO End of Slice Group detected, current MBs %d, force end of slice\n", NumberOfCodedMBs+1);
+          end_of_slice = TRUE;
+        }
+        NumberOfCodedMBs++;       // only here we are sure that the coded MB is actually included in the slice
+        proceed2nextMacroblock ();
+      }
+      else
+      {
+        //!Go back to the previous MB to recode it
+        img->current_mb_nr = FmoGetPreviousMBNr(img->current_mb_nr);
+        if(img->current_mb_nr == -1 )   // The first MB of the slice group  is too big,
+                                        // which means it's impossible to encode picture using current slice bits restriction
+        {
+          snprintf (errortext, ET_SIZE, "Error encoding first MB with specified parameter, bits of current MB may be too big");
+          error (errortext, 300);
+        }
+      }
+    }
+    else                      // TBD -- Addition of FMO
+    {
+
+//! This following ugly code breaks slices, at least for a slice mode that accumulates a certain
+//! number of bits into one slice.
+//! The suggested algorithm is as follows:
+//!
+//! SaveState (Bitstream, stats,  etc. etc.);
+//! BitsForThisMBPairInFrameMode = CodeMB (Upper, FRAME_MODE) + CodeMB (Lower, FRAME_MODE);
+//! DistortionForThisMBPairInFrameMode = CalculateDistortion(Upper) + CalculateDistortion (Lower);
+//! RestoreState();
+//! BitsForThisMBPairInFieldMode = CodeMB (Upper, FIELD_MODE) + CodeMB (Lower, FIELD_MODE);
+//! DistortionForThisMBPairInFrameMode = CalculateDistortion(Upper) + CalculateDistortion (Lower);
+//! FrameFieldMode = Decision (...)
+//! RestoreState()
+//! if (FrameFieldMode == FRAME) {
+//!   CodeMB (Upper, FRAME); CodeMB (Lower, FRAME);
+//! } else {
+//!   CodeMB (Upper FIELD); CodeMB (Lower, FIELD);
+//! }
+//!
+//! Open questions/issues:
+//!   1. CABAC/CA-VLC state:  It seems that the CABAC/CA_VLC states are changed during the
+//!      dummy encoding processes (for the R-D based selection), but that they are never
+//!      reset, once the selection is made.  I believe that this breaks the MB-adaptive
+//!      frame/field coding.  The necessary code for the state saves is readily available
+//!      in macroblock.c, start_macroblock() and terminate_macroblock() (this code needs
+//!      to be double checked that it works with CA-VLC as well
+//!   2. would it be an option to allocate Bitstreams with zero data in them (or copy the
+//!      already generated bitstream) for the "test coding"?
+
+      img->write_macroblock = 0;
+      if (input->MbInterlace == ADAPTIVE_CODING || input->MbInterlace == FRAME_MB_PAIR_CODING)
+      {
+        //================ code MB pair as frame MB ================
+        //----------------------------------------------------------
+        recode_macroblock = FALSE;
+
+        img->field_mode = 0;  // MB coded as frame
+        img->top_field = 0;   // Set top field to 0
+
+        //Rate control
+        img->write_macroblock = 0;
+        img->bot_MB = 0;
+
+        // save RC state only when it is going to change
+        if ( input->RCEnable && input->MbInterlace == ADAPTIVE_CODING
+          && img->NumberofCodedMacroBlocks > 0 && (img->NumberofCodedMacroBlocks % img->BasicUnit) == 0 )
+          copy_rc_jvt( quadratic_RC_init, quadratic_RC ); // save initial RC status
+
+        if ( input->RCEnable && input->MbInterlace == ADAPTIVE_CODING )
+          copy_rc_generic( generic_RC_init, generic_RC ); // save initial RC status
+
+        start_macroblock (CurrentMbAddr, FALSE);
+
+        rdopt = &rddata_top_frame_mb; // store data in top frame MB
+        encode_one_macroblock ();         // code the MB as frame
+
+        FrameRDCost = rdopt->min_rdcost;
+        //***   Top MB coded as frame MB ***//
+
+        //Rate control
+        img->bot_MB = 1; //for Rate control
+
+        // go to the bottom MB in the MB pair
+        img->field_mode = 0;  // MB coded as frame  //GB
+
+        start_macroblock (CurrentMbAddr+1, FALSE);
+        rdopt = &rddata_bot_frame_mb; // store data in top frame MB
+        encode_one_macroblock ();         // code the MB as frame
+
+        if ( input->RCEnable && input->MbInterlace == ADAPTIVE_CODING
+          && img->NumberofCodedMacroBlocks > 0 && (img->NumberofCodedMacroBlocks % img->BasicUnit) == 0 )
+          copy_rc_jvt( quadratic_RC_best, quadratic_RC ); // restore initial RC status
+
+        if (input->RCEnable && input->MbInterlace == ADAPTIVE_CODING )
+          copy_rc_generic( generic_RC_best, generic_RC ); // save frame RC stats
+
+        FrameRDCost += rdopt->min_rdcost;
+
+        //***   Bottom MB coded as frame MB ***//
+      }
+
+      if ((input->MbInterlace == ADAPTIVE_CODING) || (input->MbInterlace == FIELD_CODING))
+      {
+        //Rate control
+        img->bot_MB = 0;
+
+        //=========== start coding the MB pair as a field MB pair =============
+        //---------------------------------------------------------------------
+        img->field_mode = 1;  // MB coded as field
+        img->top_field = 1;   // Set top field to 1
+        img->buf_cycle <<= 1;
+        input->num_ref_frames <<= 1;
+        img->num_ref_idx_l0_active <<= 1;
+        img->num_ref_idx_l0_active += 1;
+
+        if ( input->RCEnable && input->MbInterlace == ADAPTIVE_CODING
+          && img->NumberofCodedMacroBlocks > 0 && (img->NumberofCodedMacroBlocks % img->BasicUnit) == 0 )
+          copy_rc_jvt( quadratic_RC, quadratic_RC_init ); // restore initial RC status
+
+        if ( input->RCEnable && input->MbInterlace == ADAPTIVE_CODING )
+          copy_rc_generic( generic_RC, generic_RC_init ); // reset RC stats
+
+        start_macroblock (CurrentMbAddr, TRUE);
+
+        rdopt = &rddata_top_field_mb; // store data in top frame MB
+//        TopFieldIsSkipped = 0;        // set the top field MB skipped flag to 0
+        encode_one_macroblock ();         // code the MB as field
+
+        FieldRDCost = rdopt->min_rdcost;
+        //***   Top MB coded as field MB ***//
+        //Rate control
+        img->bot_MB = 1;//for Rate control
+
+        img->top_field = 0;   // Set top field to 0
+        start_macroblock (CurrentMbAddr+1, TRUE);
+        rdopt = &rddata_bot_field_mb; // store data in top frame MB
+        encode_one_macroblock ();         // code the MB as field
+
+        FieldRDCost += rdopt->min_rdcost;
+        //***   Bottom MB coded as field MB ***//
+      }
+
+      //Rate control
+      img->write_mbaff_frame = 0;  //Rate control
+
+      //=========== decide between frame/field MB pair ============
+      //-----------------------------------------------------------
+      if ( ((input->MbInterlace == ADAPTIVE_CODING) && (FrameRDCost < FieldRDCost)) || input->MbInterlace == FRAME_MB_PAIR_CODING )
+      {
+        img->field_mode = 0;
+        MBPairIsField = 0;
+        if ( input->MbInterlace != FRAME_MB_PAIR_CODING )
+        {
+          img->buf_cycle >>= 1;
+          input->num_ref_frames >>= 1;
+          img->num_ref_idx_l0_active -= 1;
+          img->num_ref_idx_l0_active >>= 1;
+        }
+
+        if ( input->RCEnable && input->MbInterlace == ADAPTIVE_CODING
+          && img->NumberofCodedMacroBlocks > 0 && (img->NumberofCodedMacroBlocks % img->BasicUnit) == 0 )
+          copy_rc_jvt( quadratic_RC, quadratic_RC_best ); // restore initial RC status
+
+        if ( input->RCEnable && input->MbInterlace == ADAPTIVE_CODING )
+          copy_rc_generic( generic_RC, generic_RC_best ); // restore frame RC stats
+
+        //Rate control
+        img->write_mbaff_frame = 1;  //for Rate control
+      }
+      else
+      {
+        img->field_mode = 1;
+        MBPairIsField = 1;
+      }
+
+      //Rate control
+      img->write_macroblock = 1;//Rate control
+
+      if (MBPairIsField)
+        img->top_field = 1;
+      else
+        img->top_field = 0;
+
+      //Rate control
+      img->bot_MB = 0;// for Rate control
+
+      // go back to the Top MB in the MB pair
+      start_macroblock (CurrentMbAddr, img->field_mode);
+
+      rdopt =  img->field_mode ? &rddata_top_field_mb : &rddata_top_frame_mb;
+      copy_rdopt_data (0);  // copy the MB data for Top MB from the temp buffers
+      write_one_macroblock (1);     // write the Top MB data to the bitstream
+      terminate_macroblock (&end_of_slice, &recode_macroblock);     // done coding the Top MB
+
+      if (recode_macroblock == FALSE)       // The final processing of the macroblock has been done
+      {
+        CurrentMbAddr = FmoGetNextMBNr (CurrentMbAddr);
+        if (CurrentMbAddr == -1)   // end of slice
+        {
+          end_of_slice = TRUE;
+        }
+        NumberOfCodedMBs++;       // only here we are sure that the coded MB is actually included in the slice
+        proceed2nextMacroblock ();
+
+
+        //Rate control
+        img->bot_MB = 1;//for Rate control
+        // go to the Bottom MB in the MB pair
+        img->top_field = 0;
+        start_macroblock (CurrentMbAddr, img->field_mode);
+
+        rdopt = img->field_mode ? &rddata_bot_field_mb : &rddata_bot_frame_mb;
+        copy_rdopt_data (1);  // copy the MB data for Bottom MB from the temp buffers
+
+        write_one_macroblock (0);     // write the Bottom MB data to the bitstream
+        terminate_macroblock (&end_of_slice, &recode_macroblock);     // done coding the Top MB
+        if (recode_macroblock == FALSE)       // The final processing of the macroblock has been done
+        {
+          CurrentMbAddr = FmoGetNextMBNr (CurrentMbAddr);
+          if (CurrentMbAddr == -1)   // end of slice
+          {
+            end_of_slice = TRUE;
+          }
+          NumberOfCodedMBs++;       // only here we are sure that the coded MB is actually included in the slice
+          proceed2nextMacroblock ();
+        }
+        else
+        {
+          //Go back to the beginning of the macroblock pair to recode it
+          img->current_mb_nr = FmoGetPreviousMBNr(img->current_mb_nr);
+          img->current_mb_nr = FmoGetPreviousMBNr(img->current_mb_nr);
+          if(img->current_mb_nr == -1 )   // The first MB of the slice group  is too big,
+            // which means it's impossible to encode picture using current slice bits restriction
+          {
+            snprintf (errortext, ET_SIZE, "Error encoding first MB with specified parameter, bits of current MB may be too big");
+            error (errortext, 300);
+          }
+        }
+      }
+      else
+      {
+        //!Go back to the previous MB to recode it
+        img->current_mb_nr = FmoGetPreviousMBNr(img->current_mb_nr);
+        if(img->current_mb_nr == -1 )   // The first MB of the slice group  is too big,
+                                        // which means it's impossible to encode picture using current slice bits restriction
+        {
+          snprintf (errortext, ET_SIZE, "Error encoding first MB with specified parameter, bits of current MB may be too big");
+          error (errortext, 300);
+        }
+      }
+
+      if (MBPairIsField)    // if MB Pair was coded as field the buffer size variables back to frame mode
+      {
+        img->buf_cycle >>= 1;
+        input->num_ref_frames >>= 1;
+        img->num_ref_idx_l0_active -= 1;
+        img->num_ref_idx_l0_active >>= 1;
+      }
+
+      img->field_mode = img->top_field = 0; // reset to frame mode
+
+      if ( !end_of_slice )
+      {
+        assert( CurrentMbAddr < (int)img->PicSizeInMbs );
+        assert( CurrentMbAddr >= 0 );
+        if (CurrentMbAddr == FmoGetLastCodedMBOfSliceGroup (FmoMB2SliceGroup (CurrentMbAddr)))
+          end_of_slice = TRUE;        // just in case it doesn't get set in terminate_macroblock
+      }
+    }
+  }
+/*
+  // Tian Dong: June 7, 2002 JVT-B042
+  // Restore the short_used
+  if (input->NumFramesInELSubSeq)
+    {
+      fb->short_used = short_used;
+    }
+*/
+  terminate_slice ( (NumberOfCodedMBs+TotalCodedMBs >= (int)img->PicSizeInMbs) );
+  return NumberOfCodedMBs;
+}
+
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Initializes the parameters for a new slice and
+ *     allocates the memory for the coded slice in the Picture structure
+ *  \par Side effects:
+ *      Adds slice/partition header symbols to the symbol buffer
+ *      increments Picture->no_slices, allocates memory for the
+ *      slice, sets img->currSlice
+ ************************************************************************
+ */
+static void init_slice (int start_mb_addr)
+{
+  int i;
+  Picture *currPic = img->currentPicture;
+  DataPartition *dataPart;
+  Bitstream *currStream;
+  Slice *currSlice;
+
+  img->current_mb_nr = start_mb_addr;
+
+  // Allocate new Slice in the current Picture, and set img->currentSlice
+  assert (currPic != NULL);
+  currPic->no_slices++;
+
+  if (currPic->no_slices >= MAXSLICEPERPICTURE)
+    error ("Too many slices per picture, increase MAXSLICEPERPICTURE in global.h.", -1);
+
+  currPic->slices[currPic->no_slices-1] = malloc_slice();
+  currSlice = currPic->slices[currPic->no_slices-1];
+
+  img->currentSlice = currSlice;
+
+  currSlice->picture_id = img->tr % 256;
+  currSlice->qp = img->qp;
+  currSlice->start_mb_nr = start_mb_addr;
+  currSlice->slice_too_big = dummy_slice_too_big;
+
+  for (i = 0; i < currSlice->max_part_nr; i++)
+  {
+    dataPart = &(currSlice->partArr[i]);
+
+    currStream = dataPart->bitstream;
+    currStream->bits_to_go = 8;
+    currStream->byte_pos = 0;
+    currStream->byte_buf = 0;
+  }
+
+  img->num_ref_idx_l0_active = active_pps->num_ref_idx_l0_active_minus1 + 1;
+  img->num_ref_idx_l1_active = active_pps->num_ref_idx_l1_active_minus1 + 1;
+
+  // primary and redundant slices: number of references overriding.
+  if(input->redundant_pic_flag)
+  {
+    if(!redundant_coding)
+    {
+      img->num_ref_idx_l0_active = imin(img->number,input->NumRefPrimary);
+    }
+    else
+    {
+      // 1 reference picture for redundant slices
+      img->num_ref_idx_l0_active = 1;
+    }
+  }
+
+  // code now also considers fields. Issue whether we should account this within the appropriate input params directly
+  if ((img->type == P_SLICE || img->type == SP_SLICE) && input->P_List0_refs)
+  {
+    img->num_ref_idx_l0_active = imin(img->num_ref_idx_l0_active, input->P_List0_refs * ((img->structure !=0) + 1));
+  }
+  if (img->type == B_SLICE )
+  {
+    if (input->B_List0_refs)
+    {
+      img->num_ref_idx_l0_active = imin(img->num_ref_idx_l0_active, input->B_List0_refs * ((img->structure !=0) + 1));
+    }
+    if (input->B_List1_refs)
+    {
+      img->num_ref_idx_l1_active = imin(img->num_ref_idx_l1_active, input->B_List1_refs * ((img->structure !=0) + 1));
+    }
+  }
+  // generate reference picture lists
+  init_lists(img->type, (PictureStructure) img->structure);
+
+  // assign list 0 size from list size
+  img->num_ref_idx_l0_active = listXsize[0];
+  img->num_ref_idx_l1_active = listXsize[1];
+
+  //Perform memory management based on poc distances
+  //if (img->nal_reference_idc  && input->HierarchicalCoding && input->PocMemoryManagement && dpb.ref_frames_in_buffer==active_sps->num_ref_frames)
+  if (img->nal_reference_idc  && input->PocMemoryManagement && dpb.ref_frames_in_buffer==active_sps->num_ref_frames)
+  {
+    poc_based_ref_management(img->frame_num);
+  }
+
+  if (input->EnableOpenGOP)
+  {
+    for (i = 0; i<listXsize[0]; i++)
+    {
+      if (listX[0][i]->poc < img->last_valid_reference && img->ThisPOC > img->last_valid_reference)
+      {
+        listXsize[0] = img->num_ref_idx_l0_active = imax(1,i);
+        break;
+      }
+    }
+
+    for (i = 0; i<listXsize[1]; i++)
+    {
+      if (listX[1][i]->poc < img->last_valid_reference && img->ThisPOC > img->last_valid_reference)
+      {
+        listXsize[1] = img->num_ref_idx_l1_active = imax(1,i);
+        break;
+      }
+    }
+  }
+
+  init_ref_pic_list_reordering();
+
+  //Perform reordering based on poc distances for HierarchicalCoding
+  //if (img->type==P_SLICE && input->HierarchicalCoding && input->ReferenceReorder)
+  if (img->type==P_SLICE && input->ReferenceReorder)
+  {
+
+    int i, num_ref;
+
+    alloc_ref_pic_list_reordering_buffer(currSlice);
+
+    if ((img->type != I_SLICE) && (img->type !=SI_SLICE))
+    {
+      for (i=0; i<img->num_ref_idx_l0_active + 1; i++)
+      {
+        currSlice->reordering_of_pic_nums_idc_l0[i] = 3;
+        currSlice->abs_diff_pic_num_minus1_l0[i] = 0;
+        currSlice->long_term_pic_idx_l0[i] = 0;
+      }
+
+      if (img->type == B_SLICE)
+      {
+        for (i=0; i<img->num_ref_idx_l1_active + 1; i++)
+        {
+          currSlice->reordering_of_pic_nums_idc_l1[i] = 3;
+          currSlice->abs_diff_pic_num_minus1_l1[i] = 0;
+          currSlice->long_term_pic_idx_l1[i] = 0;
+        }
+      }
+    }
+
+    if ((img->type != I_SLICE) && (img->type !=SI_SLICE))
+    {
+      num_ref = img->num_ref_idx_l0_active;
+      poc_ref_pic_reorder(listX[LIST_0],
+                          num_ref,
+                          currSlice->reordering_of_pic_nums_idc_l0,
+                          currSlice->abs_diff_pic_num_minus1_l0,
+                          currSlice->long_term_pic_idx_l0, LIST_0);
+
+      //reference picture reordering
+      reorder_ref_pic_list(listX[LIST_0], &listXsize[LIST_0],
+                           img->num_ref_idx_l0_active - 1,
+                           currSlice->reordering_of_pic_nums_idc_l0,
+                           currSlice->abs_diff_pic_num_minus1_l0,
+                           currSlice->long_term_pic_idx_l0);
+
+    // This is not necessary since order is already poc based...
+      if (img->type == B_SLICE)
+      {
+        num_ref = img->num_ref_idx_l1_active;
+        poc_ref_pic_reorder(listX[LIST_1],
+                            num_ref,
+                            currSlice->reordering_of_pic_nums_idc_l1,
+                            currSlice->abs_diff_pic_num_minus1_l1,
+                            currSlice->long_term_pic_idx_l1, LIST_1);
+
+        //reference picture reordering
+        reorder_ref_pic_list(listX[LIST_1], &listXsize[LIST_1],
+                             img->num_ref_idx_l1_active - 1,
+                             currSlice->reordering_of_pic_nums_idc_l1,
+                             currSlice->abs_diff_pic_num_minus1_l1,
+                             currSlice->long_term_pic_idx_l1);
+      }
+    }
+  }
+
+
+  //if (img->MbaffFrameFlag)
+  if (img->structure==FRAME)
+    init_mbaff_lists();
+
+  if (img->type != I_SLICE && (active_pps->weighted_pred_flag == 1 || (active_pps->weighted_bipred_idc > 0 && (img->type == B_SLICE))))
+  {
+    if (img->type==P_SLICE || img->type==SP_SLICE)
+    {
+      if (input->GenerateMultiplePPS && input->RDPictureDecision)
+      {
+        if (enc_picture==enc_frame_picture2)
+          estimate_weighting_factor_P_slice (0);
+        else
+          estimate_weighting_factor_P_slice (1);
+      }
+      else
+        estimate_weighting_factor_P_slice (0);
+
+    }
+    else
+       estimate_weighting_factor_B_slice ();
+  }
+
+  set_ref_pic_num();
+
+  if (img->type == B_SLICE)
+    compute_colocated(Co_located, listX);
+  if (img->type != I_SLICE && input->SearchMode == EPZS)
+    EPZSSliceInit(EPZSCo_located, listX);
+
+  if (input->symbol_mode == UVLC)
+  {
+    writeMB_typeInfo       = writeSE_UVLC;
+    writeIntraPredMode     = writeIntraPredMode_CAVLC;
+    writeB8_typeInfo       = writeSE_UVLC;
+    for (i=0; i<6; i++)
+    {
+      switch (listXsize[i])
+      {
+      case 0:
+        writeRefFrame[i]     = NULL;
+        break;
+      case 1:
+        writeRefFrame[i]     = writeSE_Dummy;
+        break;
+      case 2:
+        writeRefFrame[i]     = writeSE_invFlag;
+        break;
+      default:
+        writeRefFrame[i]     = writeSE_UVLC;
+        break;
+      }
+    }
+    writeMVD               = writeSE_SVLC;
+    writeCBP               = writeCBP_VLC;
+    writeDquant            = writeSE_SVLC;
+    writeCIPredMode        = writeSE_UVLC;
+    writeFieldModeInfo     = writeSE_Flag;
+    writeMB_transform_size = writeSE_Flag;
+  }
+  else
+  {
+    writeMB_typeInfo       = writeMB_typeInfo_CABAC;
+    writeIntraPredMode     = writeIntraPredMode_CABAC;
+    writeB8_typeInfo       = writeB8_typeInfo_CABAC;
+    for (i=0; i<6; i++)
+    {
+      switch (listXsize[i])
+      {
+      case 0:
+        writeRefFrame[i]     = NULL;
+      case 1:
+        writeRefFrame[i]     = writeSE_Dummy;
+        break;
+      default:
+        writeRefFrame[i]     = writeRefFrame_CABAC;
+      }
+    }
+    writeMVD               = writeMVD_CABAC;
+    writeCBP               = writeCBP_CABAC;
+    writeDquant            = writeDquant_CABAC;
+    writeCIPredMode        = writeCIPredMode_CABAC;
+    writeFieldModeInfo     = writeFieldModeInfo_CABAC;
+    writeMB_transform_size = writeMB_transform_size_CABAC;
+  }
+
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Allocates a slice structure along with its dependent data structures
+ * \return
+ *    Pointer to a Slice
+ ************************************************************************
+ */
+static Slice *malloc_slice()
+{
+  int i;
+  DataPartition *dataPart;
+  Slice *slice;
+
+//  const int buffer_size = (img->size * 4); // AH 190202: There can be data expansion with
+                                                          // low QP values. So, we make sure that buffer
+                                                          // does not overflow. 4 is probably safe multiplier.
+  int buffer_size;
+
+  switch (input->slice_mode)
+  {
+  case 2:
+    buffer_size = 2 * input->slice_argument;
+    break;
+  case 1:
+    buffer_size = 500 + input->slice_argument * (128 + 256 * img->bitdepth_luma + 512 * img->bitdepth_chroma);;
+    break;
+  default:
+    buffer_size = 500 + img->FrameSizeInMbs * (128 + 256 * img->bitdepth_luma + 512 * img->bitdepth_chroma);
+    break;
+  }
+
+                                                          // KS: this is approx. max. allowed code picture size
+
+  if ((slice = (Slice *) calloc(1, sizeof(Slice))) == NULL) no_mem_exit ("malloc_slice: slice structure");
+
+  if (input->symbol_mode == CABAC)
+    {
+      // create all context models
+      slice->mot_ctx = create_contexts_MotionInfo();
+      slice->tex_ctx = create_contexts_TextureInfo();
+    }
+
+  slice->max_part_nr = input->partition_mode==0?1:3;
+
+  //for IDR img there should be only one partition
+  if(img->currentPicture->idr_flag)
+    slice->max_part_nr = 1;
+
+  assignSE2partition[0] = assignSE2partition_NoDP;
+  //ZL
+  //for IDR img all the syntax element should be mapped to one partition
+  if(!img->currentPicture->idr_flag&&input->partition_mode==1)
+    assignSE2partition[1] =  assignSE2partition_DP;
+  else
+    assignSE2partition[1] =  assignSE2partition_NoDP;
+
+
+
+  slice->num_mb = 0;          // no coded MBs so far
+
+  if ((slice->partArr = (DataPartition *) calloc(slice->max_part_nr, sizeof(DataPartition))) == NULL) no_mem_exit ("malloc_slice: partArr");
+  for (i=0; i<slice->max_part_nr; i++) // loop over all data partitions
+  {
+    dataPart = &(slice->partArr[i]);
+    if ((dataPart->bitstream = (Bitstream *) calloc(1, sizeof(Bitstream))) == NULL) no_mem_exit ("malloc_slice: Bitstream");
+    if ((dataPart->bitstream->streamBuffer = (byte *) calloc(buffer_size, sizeof(byte))) == NULL) no_mem_exit ("malloc_slice: StreamBuffer");
+    // Initialize storage of bitstream parameters
+  }
+  return slice;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Memory frees of all Slice structures and of its dependent
+ *    data structures
+ * \par Input:
+ *    Image Parameters struct struct img_par *img
+ ************************************************************************
+ */
+void free_slice_list(Picture *currPic)
+{
+  int i;
+
+  for (i=0; i<currPic->no_slices; i++)
+  {
+    free_slice (currPic->slices[i]);
+    currPic->slices[i]=NULL;
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Memory frees of the Slice structure and of its dependent
+ *    data structures
+ * \param slice:
+ *    Slice to be freed
+ ************************************************************************
+ */
+static void free_slice(Slice *slice)
+{
+  int i;
+  DataPartition *dataPart;
+
+  if (slice != NULL)
+  {
+    for (i=0; i<slice->max_part_nr; i++) // loop over all data partitions
+    {
+      dataPart = &(slice->partArr[i]);
+      if (dataPart != NULL)
+      {
+        if (dataPart->bitstream->streamBuffer != NULL)
+          free(dataPart->bitstream->streamBuffer);
+        if (dataPart->bitstream != NULL)
+          free(dataPart->bitstream);
+      }
+    }
+    if (slice->partArr != NULL)
+      free(slice->partArr);
+    if (input->symbol_mode == CABAC)
+    {
+      delete_contexts_MotionInfo(slice->mot_ctx);
+      delete_contexts_TextureInfo(slice->tex_ctx);
+    }
+
+    free(slice);
+  }
+}
+
+void set_ref_pic_num()
+{
+  int i,j;
+  StorablePicture *this_ref;
+
+  //! need to add field ref_pic_num that handles field pair.
+
+  for (i=0;i<listXsize[LIST_0];i++)
+  {
+    this_ref = listX[LIST_0][i];
+    enc_picture->ref_pic_num        [LIST_0][i] = this_ref->poc * 2 + ((this_ref->structure==BOTTOM_FIELD)?1:0) ;
+    enc_picture->frm_ref_pic_num    [LIST_0][i] = this_ref->frame_poc * 2;
+    enc_picture->top_ref_pic_num    [LIST_0][i] = this_ref->top_poc * 2;
+    enc_picture->bottom_ref_pic_num [LIST_0][i] = this_ref->bottom_poc * 2 + 1;
+  }
+
+  for (i=0;i<listXsize[LIST_1];i++)
+  {
+    this_ref = listX[LIST_1][i];
+    enc_picture->ref_pic_num        [LIST_1][i] = this_ref->poc  *2 + ((this_ref->structure==BOTTOM_FIELD)?1:0);
+    enc_picture->frm_ref_pic_num    [LIST_1][i] = this_ref->frame_poc * 2;
+    enc_picture->top_ref_pic_num    [LIST_1][i] = this_ref->top_poc * 2;
+    enc_picture->bottom_ref_pic_num [LIST_1][i] = this_ref->bottom_poc * 2 + 1;
+  }
+
+  if (!active_sps->frame_mbs_only_flag && img->structure==FRAME)
+  {
+    for (j=2;j<6;j++)
+      for (i=0;i<listXsize[j];i++)
+      {
+        this_ref = listX[j][i];
+        enc_picture->ref_pic_num[j][i] = this_ref->poc * 2 + ((this_ref->structure==BOTTOM_FIELD)?1:0);
+        enc_picture->frm_ref_pic_num[j][i] = this_ref->frame_poc * 2 ;
+        enc_picture->top_ref_pic_num[j][i] = this_ref->top_poc * 2 ;
+        enc_picture->bottom_ref_pic_num[j][i] = this_ref->bottom_poc * 2 + 1;
+      }
+  }
+}
+
+/*!
+************************************************************************
+* \brief
+*    decide reference picture reordering, Frame only
+************************************************************************
+*/
+void poc_ref_pic_reorder(StorablePicture **list, unsigned num_ref_idx_lX_active, int *reordering_of_pic_nums_idc, int *abs_diff_pic_num_minus1, int *long_term_pic_idx, int list_no)
+{
+  unsigned i,j,k;
+
+  int currPicNum, picNumLXPred;
+
+  int default_order[32];
+  int re_order[32];
+  int tmp_reorder[32];
+  int list_sign[32];
+  int reorder_stop, no_reorder;
+  int poc_diff[32];
+  int tmp_value, diff;
+
+  int abs_poc_dist;
+  int maxPicNum, MaxFrameNum = 1 << (log2_max_frame_num_minus4 + 4);
+
+  if (img->structure==FRAME)
+  {
+    maxPicNum  = MaxFrameNum;
+    currPicNum = img->frame_num;
+  }
+  else
+  {
+    maxPicNum  = 2 * MaxFrameNum;
+    currPicNum = 2 * img->frame_num + 1;
+  }
+
+  picNumLXPred = currPicNum;
+
+  // First assign default list order.
+  for (i=0; i<num_ref_idx_lX_active; i++)
+  {
+    default_order[i] = list[i]->pic_num;
+  }
+
+  // Now access all references in buffer and assign them
+  // to a pottential reordering list. For each one of these
+  // references compute the poc distance compared to current
+  // frame.
+  for (i=0; i<dpb.ref_frames_in_buffer; i++)
+  {
+    re_order[i] = dpb.fs_ref[i]->frame->pic_num;
+
+    if (dpb.fs_ref[i]->is_used==3 && (dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term))
+    {
+      abs_poc_dist = iabs(dpb.fs_ref[i]->frame->poc - enc_picture->poc) ;
+      poc_diff[i] = abs_poc_dist;
+      if (list_no == LIST_0)
+      {
+        list_sign[i] = (enc_picture->poc < dpb.fs_ref[i]->frame->poc) ? +1 : -1;
+      }
+      else
+      {
+        list_sign[i] = (enc_picture->poc > dpb.fs_ref[i]->frame->poc) ? +1 : -1;
+      }
+    }
+  }
+
+
+  // now sort these references based on poc (temporal) distance
+  for (i=0; i< dpb.ref_frames_in_buffer-1; i++)
+  {
+    for (j=i+1; j< dpb.ref_frames_in_buffer; j++)
+    {
+      if (poc_diff[i]>poc_diff[j] || (poc_diff[i] == poc_diff[j] && list_sign[j] > list_sign[i]))
+      {
+
+        tmp_value = poc_diff[i];
+        poc_diff[i] = poc_diff[j];
+        poc_diff[j] = tmp_value;
+        tmp_value  = re_order[i];
+        re_order[i] = re_order[j];
+        re_order[j] = tmp_value ;
+        tmp_value  = list_sign[i];
+        list_sign[i] = list_sign[j];
+        list_sign[j] = tmp_value ;
+      }
+    }
+  }
+
+  // Check versus default list to see if any
+  // change has happened
+  no_reorder = 1;
+  for (i=0; i<num_ref_idx_lX_active; i++)
+  {
+    if (default_order[i] != re_order[i])
+    {
+      no_reorder = 0;
+    }
+  }
+
+  // If different, then signal reordering
+  if (no_reorder==0)
+  {
+    for (i=0; i<num_ref_idx_lX_active; i++)
+    {
+      diff = re_order[i]-picNumLXPred;
+      if (diff <= 0)
+      {
+        reordering_of_pic_nums_idc[i] = 0;
+        abs_diff_pic_num_minus1[i] = iabs(diff)-1;
+        if (abs_diff_pic_num_minus1[i] < 0)
+          abs_diff_pic_num_minus1[i] = maxPicNum -1;
+      }
+      else
+      {
+        reordering_of_pic_nums_idc[i] = 1;
+        abs_diff_pic_num_minus1[i] = iabs(diff)-1;
+      }
+      picNumLXPred = re_order[i];
+
+      tmp_reorder[i] = re_order[i];
+
+      k = i;
+      for (j=i; j<num_ref_idx_lX_active; j++)
+      {
+        if (default_order[j] != re_order[i])
+        {
+          ++k;
+          tmp_reorder[k] = default_order[j];
+        }
+      }
+      reorder_stop = 1;
+      for(j=i+1; j<num_ref_idx_lX_active; j++)
+      {
+        if (tmp_reorder[j] != re_order[j])
+        {
+          reorder_stop = 0;
+          break;
+        }
+      }
+
+      if (reorder_stop==1)
+      {
+        ++i;
+        break;
+      }
+
+      for(j=0; j<num_ref_idx_lX_active; j++)
+      {
+        default_order[j] = tmp_reorder[j];
+      }
+
+    }
+    reordering_of_pic_nums_idc[i] = 3;
+
+    for(j=0; j<num_ref_idx_lX_active; j++)
+    {
+      default_order[j] = tmp_reorder[j];
+    }
+
+    if (list_no==0)
+    {
+      img->currentSlice->ref_pic_list_reordering_flag_l0=1;
+    }
+    else
+    {
+      img->currentSlice->ref_pic_list_reordering_flag_l1=1;
+    }
+  }
+}
+
+extern int QP2QUANT[40];
+
+void SetLagrangianMultipliers()
+{
+  int qp, j, k;
+  double qp_temp;
+  double lambda_scale = 1.0 - dClip3(0.0,0.5,0.05 * (double) input->jumpd);;
+
+  if (input->rdopt) // RDOPT on computation of Lagrangian multipliers
+  {
+    for (j = 0; j < 5; j++)
+    {
+      for (qp = -img->bitdepth_luma_qp_scale; qp < 52; qp++)
+      {
+        qp_temp = (double)qp + img->bitdepth_luma_qp_scale - SHIFT_QP;
+
+        if (input->UseExplicitLambdaParams == 1) // consideration of explicit lambda weights.
+        {
+          img->lambda_md[j][qp] = input->LambdaWeight[j] * pow (2, qp_temp/3.0);
+          // Scale lambda due to hadamard qpel only consideration
+          img->lambda_md[j][qp] = ( (input->MEErrorMetric[H_PEL] == ERROR_SATD && input->MEErrorMetric[Q_PEL] == ERROR_SATD) ? 1.00 : 0.95) * img->lambda_md[j][qp];
+
+          for (k = F_PEL; k <= Q_PEL; k++)
+          {
+            img->lambda_me[j][qp][k] = input->MEErrorMetric[k] == ERROR_SSE ? img->lambda_md[j][qp] : sqrt(img->lambda_md[j][qp]);
+            img->lambda_mf[j][qp][k] = LAMBDA_FACTOR (img->lambda_me[j][qp][k]);
+          }
+
+          if (j == B_SLICE)
+          {
+            img->lambda_md[5][qp] = input->LambdaWeight[5] * pow (2, qp_temp/3.0);
+            img->lambda_md[5][qp] = ((input->MEErrorMetric[H_PEL] == ERROR_SATD && input->MEErrorMetric[Q_PEL] == ERROR_SATD) ? 1.00 : 0.95) * img->lambda_md[5][qp];
+
+            for (k = F_PEL; k <= Q_PEL; k++)
+            {
+              img->lambda_me[5][qp][k] = input->MEErrorMetric[k] == ERROR_SSE ? img->lambda_md[5][qp] : sqrt(img->lambda_md[5][qp]);
+              img->lambda_mf[5][qp][k] = LAMBDA_FACTOR (img->lambda_me[5][qp][k]);
+            }
+          }
+        }
+        else if (input->UseExplicitLambdaParams == 2) // consideration of fixed lambda values.
+        {
+          img->lambda_md[j][qp] = input->FixedLambda[j];
+          // Scale lambda due to hadamard qpel only consideration
+          img->lambda_md[j][qp] = ( (input->MEErrorMetric[H_PEL] == ERROR_SATD && input->MEErrorMetric[Q_PEL] == ERROR_SATD) ? 1.00 : 0.95) * img->lambda_md[j][qp];
+
+          for (k = F_PEL; k <= Q_PEL; k++)
+          {
+            img->lambda_me[j][qp][k] = input->MEErrorMetric[k] == ERROR_SSE ? img->lambda_md[j][qp] : sqrt(img->lambda_md[j][qp]);
+            img->lambda_mf[j][qp][k] = LAMBDA_FACTOR (img->lambda_me[j][qp][k]);
+          }
+
+          if (j == B_SLICE)
+          {
+            img->lambda_md[5][qp] = input->FixedLambda[5];
+            img->lambda_md[5][qp] = ((input->MEErrorMetric[H_PEL] == ERROR_SATD && input->MEErrorMetric[Q_PEL] == ERROR_SATD) ? 1.00 : 0.95) * img->lambda_md[5][qp];
+
+            for (k = F_PEL; k <= Q_PEL; k++)
+            {
+              img->lambda_me[5][qp][k] = input->MEErrorMetric[k] == ERROR_SSE ? img->lambda_md[5][qp] : sqrt(img->lambda_md[5][qp]);
+              img->lambda_mf[5][qp][k] = LAMBDA_FACTOR (img->lambda_me[5][qp][k]);
+            }
+          }
+        }
+        else
+        {
+          if (input->successive_Bframe>0)
+            img->lambda_md[j][qp] = 0.68 * pow (2, qp_temp/3.0)
+            * (j == B_SLICE ? dClip3(2.00,4.00,(qp_temp / 6.0)) : (j == SP_SLICE) ? dClip3(1.4,3.0,(qp_temp / 12.0)) : 1.0);
+          else
+            img->lambda_md[j][qp] = 0.85 * pow (2, qp_temp/3.0)
+            * ( (j == B_SLICE) ? 4.0 : (j == SP_SLICE) ? dClip3(1.4,3.0,(qp_temp / 12.0)) : 1.0);
+          // Scale lambda due to hadamard qpel only consideration
+          img->lambda_md[j][qp] = ((input->MEErrorMetric[H_PEL] == ERROR_SATD && input->MEErrorMetric[Q_PEL] == ERROR_SATD) ? 1.00 : 0.95) * img->lambda_md[j][qp];
+          img->lambda_md[j][qp] = (j == B_SLICE && input->BRefPictures == 2 && img->b_frame_to_code == 0 ? 0.50 : 1.00) * img->lambda_md[j][qp];
+
+          if (j == B_SLICE)
+          {
+            img->lambda_md[5][qp] = img->lambda_md[j][qp];
+
+            if (input->HierarchicalCoding == 2)
+              img->lambda_md[5][qp] *= (1.0 - dmin(0.4,0.2 * (double) gop_structure[img->b_frame_to_code-1].hierarchy_layer)) ;
+            else
+              img->lambda_md[5][qp] *= 0.80;
+
+            img->lambda_md[5][qp] *= lambda_scale;
+
+            for (k = F_PEL; k <= Q_PEL; k++)
+            {
+              img->lambda_me[5][qp][k] = input->MEErrorMetric[k] == ERROR_SSE ? img->lambda_md[5][qp] : sqrt(img->lambda_md[5][qp]);
+              img->lambda_mf[5][qp][k] = LAMBDA_FACTOR (img->lambda_me[5][qp][k]);
+            }
+          }
+          else
+            img->lambda_md[j][qp] *= lambda_scale;
+
+          for (k = F_PEL; k <= Q_PEL; k++)
+          {
+            img->lambda_me[j][qp][k] = input->MEErrorMetric[k] == ERROR_SSE ? img->lambda_md[j][qp] : sqrt(img->lambda_md[j][qp]);
+            img->lambda_mf[j][qp][k] = LAMBDA_FACTOR (img->lambda_me[j][qp][k]);
+          }
+
+          if (input->CtxAdptLagrangeMult == 1)
+          {
+            int lambda_qp = (qp >= 32 && !input->RCEnable) ? imax(0, qp - 4) : imax(0, qp - 6);
+            img->lambda_mf_factor[j][qp] = log (img->lambda_me[j][lambda_qp][Q_PEL] + 1.0) / log (2.0);
+          }
+        }
+      }
+    }
+  }
+  else // RDOPT off computation of Lagrangian multipliers
+  {
+    for (j = 0; j < 6; j++)
+    {
+      for (qp = -img->bitdepth_luma_qp_scale; qp < 52; qp++)
+      {
+        img->lambda_md[j][qp] = QP2QUANT[imax(0,qp - SHIFT_QP)];
+
+        for (k = F_PEL; k <= Q_PEL; k++)
+        {
+          img->lambda_me[j][qp][k]  = img->lambda_md[j][qp];
+          img->lambda_me[j][qp][k] *= input->MEErrorMetric[k] == ERROR_SSE ? img->lambda_me[j][qp][k] : 1;
+          img->lambda_mf[j][qp][k]  = LAMBDA_FACTOR (img->lambda_me[j][qp][k]);
+        }
+
+        if (input->CtxAdptLagrangeMult == 1)
+        {
+          int lambda_qp = (qp >= 32 && !input->RCEnable) ? imax(0, qp-4) : imax(0, qp-6);
+          img->lambda_mf_factor[j][qp] = log (img->lambda_me[j][lambda_qp][Q_PEL] + 1.0) / log (2.0);
+        }
+      }
+    }
+  }
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/symbol.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/symbol.c:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/symbol.c	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,29 ----
+ 
+ /*!
+ ***************************************************************************
+ *
+ * \file symbol.c
+ *
+ * \brief
+ *    Generic Symbol writing interface
+ *
+ * \date
+ *    18 Jan 2006
+ *
+ * \author
+ *    Karsten Suehring   suehring at hhi.de
+ **************************************************************************/
+ 
+ #include "global.h"
+ #include "symbol.h"
+ 
+ void (*writeMB_typeInfo)      (SyntaxElement *se, DataPartition *dP);
+ void (*writeIntraPredMode)    (SyntaxElement *se, DataPartition *dP);
+ void (*writeB8_typeInfo)      (SyntaxElement *se, DataPartition *dP);
+ void (*writeRefFrame[6])      (SyntaxElement *se, DataPartition *dP);
+ void (*writeMVD)              (SyntaxElement *se, DataPartition *dP);
+ void (*writeCBP)              (SyntaxElement *se, DataPartition *dP);
+ void (*writeDquant)           (SyntaxElement *se, DataPartition *dP);
+ void (*writeCIPredMode)       (SyntaxElement *se, DataPartition *dP);
+ void (*writeFieldModeInfo)    (SyntaxElement *se, DataPartition *dP);
+ void (*writeMB_transform_size)(SyntaxElement *se, DataPartition *dP);


Index: llvm-test/MultiSource/Applications/JM/lencod/symbol.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/symbol.h:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/symbol.h	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,26 ----
+ 
+ /*!
+ ***************************************************************************
+ *
+ * \file symbol.h
+ *
+ * \brief
+ *    Generic Symbol writing interface
+ *
+ * \date
+ *    18 Jan 2006
+ *
+ * \author
+ *    Karsten Suehring   suehring at hhi.de
+ **************************************************************************/
+ 
+ extern void (*writeMB_typeInfo)      (SyntaxElement *se, DataPartition *dP);
+ extern void (*writeIntraPredMode)    (SyntaxElement *se, DataPartition *dP);
+ extern void (*writeB8_typeInfo)      (SyntaxElement *se, DataPartition *dP);
+ extern void (*writeRefFrame[6])      (SyntaxElement *se, DataPartition *dP);
+ extern void (*writeMVD)              (SyntaxElement *se, DataPartition *dP);
+ extern void (*writeCBP)              (SyntaxElement *se, DataPartition *dP);
+ extern void (*writeDquant)           (SyntaxElement *se, DataPartition *dP);
+ extern void (*writeCIPredMode)       (SyntaxElement *se, DataPartition *dP);
+ extern void (*writeFieldModeInfo)    (SyntaxElement *se, DataPartition *dP);
+ extern void (*writeMB_transform_size)(SyntaxElement *se, DataPartition *dP);


Index: llvm-test/MultiSource/Applications/JM/lencod/transform8x8.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/transform8x8.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/transform8x8.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,1579 @@
+/*!
+ ***************************************************************************
+ * \file transform8x8.c
+ *
+ * \brief
+ *    8x8 transform functions
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Yuri Vatis                      <vatis at hhi.de>
+ *    - Jan Muenster                    <muenster at hhi.de>
+ *    - Lowell Winger                   <lwinger at lsil.com>
+ * \date
+ *    12. October 2003
+ **************************************************************************
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <limits.h>
+
+#include "global.h"
+
+#include "image.h"
+#include "mb_access.h"
+#include "elements.h"
+#include "cabac.h"
+#include "vlc.h"
+#include "minmax.h"
+#include "transform8x8.h"
+#include "macroblock.h"
+#include "symbol.h"
+
+int   cofAC8x8_chroma[2][4][2][18];
+static int diff64[64];
+
+
+const int quant_coef8[6][8][8] =
+{
+  {
+    {13107, 12222,  16777,  12222,  13107,  12222,  16777,  12222},
+    {12222, 11428,  15481,  11428,  12222,  11428,  15481,  11428},
+    {16777, 15481,  20972,  15481,  16777,  15481,  20972,  15481},
+    {12222, 11428,  15481,  11428,  12222,  11428,  15481,  11428},
+    {13107, 12222,  16777,  12222,  13107,  12222,  16777,  12222},
+    {12222, 11428,  15481,  11428,  12222,  11428,  15481,  11428},
+    {16777, 15481,  20972,  15481,  16777,  15481,  20972,  15481},
+    {12222, 11428,  15481,  11428,  12222,  11428,  15481,  11428}
+  },
+  {
+    {11916, 11058,  14980,  11058,  11916,  11058,  14980,  11058},
+    {11058, 10826,  14290,  10826,  11058,  10826,  14290,  10826},
+    {14980, 14290,  19174,  14290,  14980,  14290,  19174,  14290},
+    {11058, 10826,  14290,  10826,  11058,  10826,  14290,  10826},
+    {11916, 11058,  14980,  11058,  11916,  11058,  14980,  11058},
+    {11058, 10826,  14290,  10826,  11058,  10826,  14290,  10826},
+    {14980, 14290,  19174,  14290,  14980,  14290,  19174,  14290},
+    {11058, 10826,  14290,  10826,  11058,  10826,  14290,  10826}
+  },
+  {
+    {10082, 9675,   12710,  9675,   10082,  9675, 12710,  9675},
+    {9675,  8943,   11985,  8943,   9675,   8943, 11985,  8943},
+    {12710, 11985,  15978,  11985,  12710,  11985,  15978,  11985},
+    {9675,  8943,   11985,  8943,   9675,   8943, 11985,  8943},
+    {10082, 9675,   12710,  9675,   10082,  9675, 12710,  9675},
+    {9675,  8943,   11985,  8943,   9675, 8943, 11985,  8943},
+    {12710, 11985,  15978,  11985,  12710,  11985,  15978,  11985},
+    {9675,  8943,   11985,  8943,   9675, 8943, 11985,  8943}
+  },
+  {
+    {9362,  8931, 11984,  8931, 9362, 8931, 11984,  8931},
+    {8931,  8228, 11259,  8228, 8931, 8228, 11259,  8228},
+    {11984, 11259,  14913,  11259,  11984,  11259,  14913,  11259},
+    {8931,  8228, 11259,  8228, 8931, 8228, 11259,  8228},
+    {9362,  8931, 11984,  8931, 9362, 8931, 11984,  8931},
+    {8931,  8228, 11259,  8228, 8931, 8228, 11259,  8228},
+    {11984, 11259,  14913,  11259,  11984,  11259,  14913,  11259},
+    {8931,  8228, 11259,  8228, 8931, 8228, 11259,  8228}
+  },
+  {
+    {8192,  7740, 10486,  7740, 8192, 7740, 10486,  7740},
+    {7740,  7346, 9777, 7346, 7740, 7346, 9777, 7346},
+    {10486, 9777, 13159,  9777, 10486,  9777, 13159,  9777},
+    {7740,  7346, 9777, 7346, 7740, 7346, 9777, 7346},
+    {8192,  7740, 10486,  7740, 8192, 7740, 10486,  7740},
+    {7740,  7346, 9777, 7346, 7740, 7346, 9777, 7346},
+    {10486, 9777, 13159,  9777, 10486,  9777, 13159,  9777},
+    {7740,  7346, 9777, 7346, 7740, 7346, 9777, 7346}
+  },
+  {
+    {7282,  6830, 9118, 6830, 7282, 6830, 9118, 6830},
+    {6830,  6428, 8640, 6428, 6830, 6428, 8640, 6428},
+    {9118,  8640, 11570,  8640, 9118, 8640, 11570,  8640},
+    {6830,  6428, 8640, 6428, 6830, 6428, 8640, 6428},
+    {7282,  6830, 9118, 6830, 7282, 6830, 9118, 6830},
+    {6830,  6428, 8640, 6428, 6830, 6428, 8640, 6428},
+    {9118,  8640, 11570,  8640, 9118, 8640, 11570,  8640},
+    {6830,  6428, 8640, 6428, 6830, 6428, 8640, 6428}
+  }
+};
+
+
+const int dequant_coef8[6][8][8] =
+{
+  {
+    {20,  19, 25, 19, 20, 19, 25, 19},
+    {19,  18, 24, 18, 19, 18, 24, 18},
+    {25,  24, 32, 24, 25, 24, 32, 24},
+    {19,  18, 24, 18, 19, 18, 24, 18},
+    {20,  19, 25, 19, 20, 19, 25, 19},
+    {19,  18, 24, 18, 19, 18, 24, 18},
+    {25,  24, 32, 24, 25, 24, 32, 24},
+    {19,  18, 24, 18, 19, 18, 24, 18}
+  },
+  {
+    {22,  21, 28, 21, 22, 21, 28, 21},
+    {21,  19, 26, 19, 21, 19, 26, 19},
+    {28,  26, 35, 26, 28, 26, 35, 26},
+    {21,  19, 26, 19, 21, 19, 26, 19},
+    {22,  21, 28, 21, 22, 21, 28, 21},
+    {21,  19, 26, 19, 21, 19, 26, 19},
+    {28,  26, 35, 26, 28, 26, 35, 26},
+    {21,  19, 26, 19, 21, 19, 26, 19}
+  },
+  {
+    {26,  24, 33, 24, 26, 24, 33, 24},
+    {24,  23, 31, 23, 24, 23, 31, 23},
+    {33,  31, 42, 31, 33, 31, 42, 31},
+    {24,  23, 31, 23, 24, 23, 31, 23},
+    {26,  24, 33, 24, 26, 24, 33, 24},
+    {24,  23, 31, 23, 24, 23, 31, 23},
+    {33,  31, 42, 31, 33, 31, 42, 31},
+    {24,  23, 31, 23, 24, 23, 31, 23}
+  },
+  {
+    {28,  26, 35, 26, 28, 26, 35, 26},
+    {26,  25, 33, 25, 26, 25, 33, 25},
+    {35,  33, 45, 33, 35, 33, 45, 33},
+    {26,  25, 33, 25, 26, 25, 33, 25},
+    {28,  26, 35, 26, 28, 26, 35, 26},
+    {26,  25, 33, 25, 26, 25, 33, 25},
+    {35,  33, 45, 33, 35, 33, 45, 33},
+    {26,  25, 33, 25, 26, 25, 33, 25}
+  },
+  {
+    {32,  30, 40, 30, 32, 30, 40, 30},
+    {30,  28, 38, 28, 30, 28, 38, 28},
+    {40,  38, 51, 38, 40, 38, 51, 38},
+    {30,  28, 38, 28, 30, 28, 38, 28},
+    {32,  30, 40, 30, 32, 30, 40, 30},
+    {30,  28, 38, 28, 30, 28, 38, 28},
+    {40,  38, 51, 38, 40, 38, 51, 38},
+    {30,  28, 38, 28, 30, 28, 38, 28}
+  },
+  {
+    {36,  34, 46, 34, 36, 34, 46, 34},
+    {34,  32, 43, 32, 34, 32, 43, 32},
+    {46,  43, 58, 43, 46, 43, 58, 43},
+    {34,  32, 43, 32, 34, 32, 43, 32},
+    {36,  34, 46, 34, 36, 34, 46, 34},
+    {34,  32, 43, 32, 34, 32, 43, 32},
+    {46,  43, 58, 43, 46, 43, 58, 43},
+    {34,  32, 43, 32, 34, 32, 43, 32}
+  }
+
+};
+
+
+//! single scan pattern
+const byte SNGL_SCAN8x8[64][2] = {
+  {0,0}, {1,0}, {0,1}, {0,2}, {1,1}, {2,0}, {3,0}, {2,1},
+  {1,2}, {0,3}, {0,4}, {1,3}, {2,2}, {3,1}, {4,0}, {5,0},
+  {4,1}, {3,2}, {2,3}, {1,4}, {0,5}, {0,6}, {1,5}, {2,4},
+  {3,3}, {4,2}, {5,1}, {6,0}, {7,0}, {6,1}, {5,2}, {4,3},
+  {3,4}, {2,5}, {1,6}, {0,7}, {1,7}, {2,6}, {3,5}, {4,4},
+  {5,3}, {6,2}, {7,1}, {7,2}, {6,3}, {5,4}, {4,5}, {3,6},
+  {2,7}, {3,7}, {4,6}, {5,5}, {6,4}, {7,3}, {7,4}, {6,5},
+  {5,6}, {4,7}, {5,7}, {6,6}, {7,5}, {7,6}, {6,7}, {7,7}
+};
+
+
+//! field scan pattern
+const byte FIELD_SCAN8x8[64][2] = {   // 8x8
+  {0,0}, {0,1}, {0,2}, {1,0}, {1,1}, {0,3}, {0,4}, {1,2},
+  {2,0}, {1,3}, {0,5}, {0,6}, {0,7}, {1,4}, {2,1}, {3,0},
+  {2,2}, {1,5}, {1,6}, {1,7}, {2,3}, {3,1}, {4,0}, {3,2},
+  {2,4}, {2,5}, {2,6}, {2,7}, {3,3}, {4,1}, {5,0}, {4,2},
+  {3,4}, {3,5}, {3,6}, {3,7}, {4,3}, {5,1}, {6,0}, {5,2},
+  {4,4}, {4,5}, {4,6}, {4,7}, {5,3}, {6,1}, {6,2}, {5,4},
+  {5,5}, {5,6}, {5,7}, {6,3}, {7,0}, {7,1}, {6,4}, {6,5},
+  {6,6}, {6,7}, {7,2}, {7,3}, {7,4}, {7,5}, {7,6}, {7,7}
+};
+
+
+//! array used to find expensive coefficients
+const byte COEFF_COST8x8[2][64] =
+{
+  {3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
+  1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
+  {9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9}
+};
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    8x8 Intra mode decision for a macroblock
+ *************************************************************************************
+ */
+
+int Mode_Decision_for_new_Intra8x8Macroblock (double lambda, int *min_cost)
+{
+  int  cbp=0, b8, cost8x8;
+
+  *min_cost = (int)floor(6.0 * lambda + 0.4999);
+
+  for (b8=0; b8<4; b8++)
+  {
+    if (Mode_Decision_for_new_8x8IntraBlocks (b8, lambda, &cost8x8))
+    {
+      cbp |= (1<<b8);
+    }
+    *min_cost += cost8x8;
+  }
+
+  return cbp;
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    8x8 Intra mode decision for a macroblock
+ *************************************************************************************
+ */
+
+int Mode_Decision_for_new_8x8IntraBlocks (int b8, double lambda, int *min_cost)
+{
+  int     ipmode, best_ipmode = 0, i, j, k, y, cost, dummy;
+  int     c_nz, nonzero = 0;
+  imgpel  rec8x8[8][8];
+  double  rdcost = 0.0;
+  int     block_x     = 8*(b8 & 0x01);
+  int     block_y     = 8*(b8 >> 1);
+  int     pic_pix_x   = img->pix_x+block_x;
+  int     pic_pix_y   = img->pix_y+block_y;
+  int     pic_opix_x   = img->opix_x+block_x;
+  int     pic_opix_y   = img->opix_y+block_y;
+  int     pic_block_x = pic_pix_x/4;
+  int     pic_block_y = pic_pix_y/4;
+  double  min_rdcost  = 1e30;
+  imgpel    **imgY_orig  = imgY_org;
+  extern  int ****cofAC8x8;
+  int fadjust8x8[2][16][16];
+  int left_available, up_available, all_available;
+
+  char   upMode;
+  char   leftMode;
+  int    mostProbableMode;
+
+  PixelPos left_block;
+  PixelPos top_block;
+
+  Macroblock     *currMB       = &img->mb_data[img->current_mb_nr];
+
+  getLuma4x4Neighbour(img->current_mb_nr, block_x - 1, block_y,     &left_block);
+  getLuma4x4Neighbour(img->current_mb_nr, block_x,     block_y - 1, &top_block);
+
+  if (input->UseConstrainedIntraPred)
+  {
+    top_block.available  = top_block.available ? img->intra_block [top_block.mb_addr] : 0;
+    left_block.available = left_block.available ? img->intra_block [left_block.mb_addr] : 0;
+  }
+
+  if(b8 >> 1)
+    upMode    =  top_block.available ? img->ipredmode8x8[top_block.pos_y ][top_block.pos_x ] : -1;
+  else
+    upMode    =  top_block.available ? img->ipredmode   [top_block.pos_y ][top_block.pos_x ] : -1;
+  if(b8 & 0x01)
+    leftMode  = left_block.available ? img->ipredmode8x8[left_block.pos_y][left_block.pos_x] : -1;
+  else
+    leftMode  = left_block.available ? img->ipredmode[left_block.pos_y][left_block.pos_x] : -1;
+
+  mostProbableMode  = (upMode < 0 || leftMode < 0) ? DC_PRED : upMode < leftMode ? upMode : leftMode;
+
+  *min_cost = INT_MAX;
+
+  //===== INTRA PREDICTION FOR 8x8 BLOCK =====
+  intrapred_luma8x8 (pic_pix_x, pic_pix_y, &left_available, &up_available, &all_available);
+
+  //===== LOOP OVER ALL 8x8 INTRA PREDICTION MODES =====
+  for (ipmode=0; ipmode<NO_INTRA_PMODE; ipmode++)
+  {
+    if( (ipmode==DC_PRED) ||
+        ((ipmode==VERT_PRED||ipmode==VERT_LEFT_PRED||ipmode==DIAG_DOWN_LEFT_PRED) && up_available ) ||
+        ((ipmode==HOR_PRED||ipmode==HOR_UP_PRED) && left_available ) ||
+        (all_available) )
+    {
+      if (!input->rdopt)
+      {
+        for (k=j=0; j<8; j++)
+          for (i=0; i<8; i++, k++)
+          {
+            diff64[k] = imgY_orig[pic_opix_y+j][pic_opix_x+i] - img->mprr_3[ipmode][j][i];
+          }
+        cost  = (ipmode == mostProbableMode) ? 0 : (int)floor(4 * lambda );
+        cost += distortion8x8 (diff64);
+        if (cost < *min_cost)
+        {
+          best_ipmode = ipmode;
+          *min_cost   = cost;
+        }
+      }
+      else
+      {
+        // get prediction and prediction error
+        for (j=0; j<8; j++)
+        {
+          memcpy(&img->mpr[block_y+j][block_x],img->mprr_3[ipmode][j], 8 * sizeof(imgpel));
+          for (i=0; i<8; i++)
+          {
+            img->m7[j][i] = imgY_orig[pic_opix_y+j][pic_opix_x+i] - img->mprr_3[ipmode][j][i];
+          }
+        }
+        //===== store the coding state =====
+        //store_coding_state_cs_cm();
+        // get and check rate-distortion cost
+
+        if ((rdcost = RDCost_for_8x8IntraBlocks (&c_nz, b8, ipmode, lambda, min_rdcost, mostProbableMode)) < min_rdcost)
+        {
+          //--- set coefficients ---
+          for(k=0; k<4; k++) // do 4x now
+          {
+            for (j=0; j<2; j++)
+              memcpy(cofAC8x8[b8][k][j],img->cofAC[b8][k][j], 65 * sizeof(int));
+          }
+
+          //--- set reconstruction ---
+          for (y=0; y<8; y++)
+          {
+            memcpy(rec8x8[y],&enc_picture->imgY[pic_pix_y+y][pic_pix_x], 8 * sizeof(imgpel));
+          }
+
+          if (img->AdaptiveRounding)
+          {
+            for (j=block_y; j<block_y + 8; j++)
+              memcpy(&fadjust8x8[1][j][block_x],&img->fadjust8x8[1][j][block_x], 8 * sizeof(int));
+          }
+
+          //--- flag if dct-coefficients must be coded ---
+          nonzero = c_nz;
+
+          //--- set best mode update minimum cost ---
+          min_rdcost  = rdcost;
+          best_ipmode = ipmode;
+        }
+        reset_coding_state_cs_cm();
+      }
+    }
+  }
+
+  //===== set intra mode prediction =====
+  img->ipredmode8x8[pic_block_y][pic_block_x] = (char) best_ipmode;
+  currMB->intra_pred_modes8x8[4*b8] = (mostProbableMode == best_ipmode)
+    ? -1
+    : (best_ipmode < mostProbableMode ? best_ipmode : best_ipmode-1);
+
+  for(j = img->mb_y*4+(b8 >> 1)*2; j < img->mb_y*4+(b8 >> 1)*2 + 2; j++)   //loop 4x4s in the subblock for 8x8 prediction setting
+   memset(&img->ipredmode8x8[j][img->mb_x*4+(b8 & 0x01)*2], best_ipmode, 2 * sizeof(char));
+
+
+  if (!input->rdopt)
+  {
+    // get prediction and prediction error
+    for (j=0; j<8; j++)
+    {
+      memcpy(&img->mpr[block_y+j][block_x],img->mprr_3[best_ipmode][j], 8 * sizeof(imgpel));
+      for (i=0; i<8; i++)
+      {
+        img->m7[j][i] = imgY_orig[pic_opix_y+j][pic_opix_x+i] - img->mprr_3[best_ipmode][j][i];
+      }
+    }
+    nonzero = dct_luma8x8 (b8, &dummy, 1);
+  }
+  else
+  {
+    //===== restore coefficients =====
+    for(k=0; k<4; k++) // do 4x now
+    {
+      for (j=0; j<2; j++)
+        memcpy(img->cofAC[b8][k][j],cofAC8x8[b8][k][j], 65 * sizeof(int));
+    }
+
+    if (img->AdaptiveRounding)
+    {
+      for (j=0; j<8; j++)
+        memcpy(&img->fadjust8x8[1][block_y+j][block_x], &fadjust8x8[1][block_y+j][block_x], 8 * sizeof(int));
+    }
+
+    //===== restore reconstruction and prediction (needed if single coeffs are removed) =====
+    for (y=0; y<8; y++)
+    {
+      memcpy(&enc_picture->imgY[pic_pix_y+y][pic_pix_x], rec8x8[y], 8 * sizeof(imgpel));
+      memcpy(&img->mpr[block_y+y][block_x], img->mprr_3[best_ipmode][y], 8 * sizeof(imgpel));
+    }
+  }
+
+  return nonzero;
+}
+
+
+
+// Notation for comments regarding prediction and predictors.
+// The pels of the 4x4 block are labelled a..p. The predictor pels above
+// are labelled A..H, from the left I..P, and from above left X, as follows:
+//
+//  Z  A  B  C  D  E  F  G  H  I  J  K  L  M   N  O  P
+//  Q  a1 b1 c1 d1 e1 f1 g1 h1
+//  R  a2 b2 c2 d2 e2 f2 g2 h2
+//  S  a3 b3 c3 d3 e3 f3 g3 h3
+//  T  a4 b4 c4 d4 e4 f4 g4 h4
+//  U  a5 b5 c5 d5 e5 f5 g5 h5
+//  V  a6 b6 c6 d6 e6 f6 g6 h6
+//  W  a7 b7 c7 d7 e7 f7 g7 h7
+//  X  a8 b8 c8 d8 e8 f8 g8 h8
+
+
+// Predictor array index definitions
+#define P_Z (PredPel[0])
+#define P_A (PredPel[1])
+#define P_B (PredPel[2])
+#define P_C (PredPel[3])
+#define P_D (PredPel[4])
+#define P_E (PredPel[5])
+#define P_F (PredPel[6])
+#define P_G (PredPel[7])
+#define P_H (PredPel[8])
+#define P_I (PredPel[9])
+#define P_J (PredPel[10])
+#define P_K (PredPel[11])
+#define P_L (PredPel[12])
+#define P_M (PredPel[13])
+#define P_N (PredPel[14])
+#define P_O (PredPel[15])
+#define P_P (PredPel[16])
+#define P_Q (PredPel[17])
+#define P_R (PredPel[18])
+#define P_S (PredPel[19])
+#define P_T (PredPel[20])
+#define P_U (PredPel[21])
+#define P_V (PredPel[22])
+#define P_W (PredPel[23])
+#define P_X (PredPel[24])
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Make intra 8x8 prediction according to all 9 prediction modes.
+ *    The routine uses left and upper neighbouring points from
+ *    previous coded blocks to do this (if available). Notice that
+ *    inaccessible neighbouring points are signalled with a negative
+ *    value in the predmode array .
+ *
+ *  \par Input:
+ *     Starting point of current 8x8 block image posision
+ *
+ *  \par Output:
+ *      none
+ ************************************************************************
+ */
+void intrapred_luma8x8(int img_x,int img_y, int *left_available, int *up_available, int *all_available)
+{
+  int i,j;
+  int s0;
+  static imgpel PredPel[25];  // array of predictor pels
+  imgpel **imgY = enc_picture->imgY;  // For MB level frame/field coding tools -- set default to imgY
+  imgpel *imgYpel;
+  imgpel (*cur_pred)[8];
+
+  int ioff = (img_x & 15);
+  int joff = (img_y & 15);
+  int mb_nr=img->current_mb_nr;
+
+  PixelPos pix_a[8];
+  PixelPos pix_b, pix_c, pix_d;
+
+  int block_available_up;
+  int block_available_left;
+  int block_available_up_left;
+  int block_available_up_right;
+
+  for (i=0;i<8;i++)
+  {
+    getNeighbour(mb_nr, ioff -1 , joff +i , IS_LUMA, &pix_a[i]);
+  }
+
+  getNeighbour(mb_nr, ioff    , joff -1 , IS_LUMA, &pix_b);
+  getNeighbour(mb_nr, ioff +8 , joff -1 , IS_LUMA, &pix_c);
+  getNeighbour(mb_nr, ioff -1 , joff -1 , IS_LUMA, &pix_d);
+
+  pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8);
+
+  if (input->UseConstrainedIntraPred)
+  {
+    for (i=0, block_available_left=1; i<8;i++)
+      block_available_left  &= pix_a[i].available ? img->intra_block[pix_a[i].mb_addr]: 0;
+    block_available_up       = pix_b.available ? img->intra_block [pix_b.mb_addr] : 0;
+    block_available_up_right = pix_c.available ? img->intra_block [pix_c.mb_addr] : 0;
+    block_available_up_left  = pix_d.available ? img->intra_block [pix_d.mb_addr] : 0;
+  }
+  else
+  {
+    block_available_left     = pix_a[0].available;
+    block_available_up       = pix_b.available;
+    block_available_up_right = pix_c.available;
+    block_available_up_left  = pix_d.available;
+  }
+
+  *left_available = block_available_left;
+  *up_available   = block_available_up;
+  *all_available  = block_available_up && block_available_left && block_available_up_left;
+
+  i = (img_x & 15);
+  j = (img_y & 15);
+
+  // form predictor pels
+  // form predictor pels
+  if (block_available_up)
+  {
+    imgYpel = &imgY[pix_b.pos_y][pix_b.pos_x];
+    P_A = *(imgYpel++);
+    P_B = *(imgYpel++);
+    P_C = *(imgYpel++);
+    P_D = *(imgYpel++);
+    P_E = *(imgYpel++);
+    P_F = *(imgYpel++);
+    P_G = *(imgYpel++);
+    P_H = *(imgYpel);
+  }
+  else
+  {
+    P_A = P_B = P_C = P_D = P_E = P_F = P_G = P_H = img->dc_pred_value_luma;
+  }
+
+  if (block_available_up_right)
+  {
+    imgYpel = &imgY[pix_c.pos_y][pix_c.pos_x];
+    P_I = *(imgYpel++);
+    P_J = *(imgYpel++);
+    P_K = *(imgYpel++);
+    P_L = *(imgYpel++);
+    P_M = *(imgYpel++);
+    P_N = *(imgYpel++);
+    P_O = *(imgYpel++);
+    P_P = *(imgYpel);
+
+  }
+  else
+  {
+    P_I = P_J = P_K = P_L = P_M = P_N = P_O = P_P = P_H;
+  }
+
+  if (block_available_left)
+  {
+    P_Q = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+    P_R = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+    P_S = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+    P_T = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+    P_U = imgY[pix_a[4].pos_y][pix_a[4].pos_x];
+    P_V = imgY[pix_a[5].pos_y][pix_a[5].pos_x];
+    P_W = imgY[pix_a[6].pos_y][pix_a[6].pos_x];
+    P_X = imgY[pix_a[7].pos_y][pix_a[7].pos_x];
+  }
+  else
+  {
+    P_Q = P_R = P_S = P_T = P_U = P_V = P_W = P_X = img->dc_pred_value_luma;
+  }
+
+  if (block_available_up_left)
+  {
+    P_Z = imgY[pix_d.pos_y][pix_d.pos_x];
+  }
+  else
+  {
+    P_Z = img->dc_pred_value_luma;
+  }
+
+  for(i=0;i<9;i++)
+    img->mprr_3[i][0][0]=-1;
+
+  LowPassForIntra8x8Pred(&(P_Z), block_available_up_left, block_available_up, block_available_left);
+
+  ///////////////////////////////
+  // make DC prediction
+  ///////////////////////////////
+  s0 = 0;
+  if (block_available_up && block_available_left)
+  {
+    // no edge
+    s0 = rshift_rnd_sf((P_A + P_B + P_C + P_D + P_E + P_F + P_G + P_H + P_Q + P_R + P_S + P_T + P_U + P_V + P_W + P_X), 4);
+  }
+  else if (!block_available_up && block_available_left)
+  {
+    // upper edge
+    s0 = rshift_rnd_sf((P_Q + P_R + P_S + P_T + P_U + P_V + P_W + P_X), 3);
+  }
+  else if (block_available_up && !block_available_left)
+  {
+    // left edge
+    s0 = rshift_rnd_sf((P_A + P_B + P_C + P_D + P_E + P_F + P_G + P_H), 3);
+  }
+  else //if (!block_available_up && !block_available_left)
+  {
+    // top left corner, nothing to predict from
+    s0 = img->dc_pred_value_luma;
+  }
+
+  // store DC prediction
+  cur_pred = img->mprr_3[DC_PRED];
+  for (j=0; j < 2*BLOCK_SIZE; j++)
+  {
+    for (i=0; i < 2*BLOCK_SIZE; i++)
+    {
+      cur_pred[i][j] = (imgpel) s0;
+    }
+  }
+
+
+  ///////////////////////////////
+  // make horiz and vert prediction
+  ///////////////////////////////
+  cur_pred = img->mprr_3[VERT_PRED];
+  for (i=0; i < 2*BLOCK_SIZE; i++)
+  {
+    cur_pred[0][i] =
+    cur_pred[1][i] =
+    cur_pred[2][i] =
+    cur_pred[3][i] =
+    cur_pred[4][i] =
+    cur_pred[5][i] =
+    cur_pred[6][i] =
+    cur_pred[7][i] = (imgpel)(&P_A)[i];
+  }
+  if(!block_available_up)
+    cur_pred[0][0]=-1;
+
+  cur_pred = img->mprr_3[HOR_PRED];
+  for (i=0; i < 2*BLOCK_SIZE; i++)
+  {
+    cur_pred[i][0]  =
+    cur_pred[i][1]  =
+    cur_pred[i][2]  =
+    cur_pred[i][3]  =
+    cur_pred[i][4]  =
+    cur_pred[i][5]  =
+    cur_pred[i][6]  =
+    cur_pred[i][7]  = (imgpel) (&P_Q)[i];
+  }
+  if(!block_available_left)
+    cur_pred[0][0]=-1;
+
+  ///////////////////////////////////
+  // make diagonal down left prediction
+  ///////////////////////////////////
+  if (block_available_up)
+  {
+    // Mode DIAG_DOWN_LEFT_PRED
+    cur_pred = img->mprr_3[DIAG_DOWN_LEFT_PRED];
+    cur_pred[0][0] = (imgpel) ((P_A + P_C + 2*(P_B) + 2) >> 2);
+    cur_pred[0][1] =
+    cur_pred[1][0] = (imgpel) ((P_B + P_D + 2*(P_C) + 2) >> 2);
+    cur_pred[0][2] =
+    cur_pred[1][1] =
+    cur_pred[2][0] = (imgpel) ((P_C + P_E + 2*(P_D) + 2) >> 2);
+    cur_pred[0][3] =
+    cur_pred[1][2] =
+    cur_pred[2][1] =
+    cur_pred[3][0] = (imgpel) ((P_D + P_F + 2*(P_E) + 2) >> 2);
+    cur_pred[0][4] =
+    cur_pred[1][3] =
+    cur_pred[2][2] =
+    cur_pred[3][1] =
+    cur_pred[4][0] = (imgpel) ((P_E + P_G + 2*(P_F) + 2) >> 2);
+    cur_pred[0][5] =
+    cur_pred[1][4] =
+    cur_pred[2][3] =
+    cur_pred[3][2] =
+    cur_pred[4][1] =
+    cur_pred[5][0] = (imgpel) ((P_F + P_H + 2*(P_G) + 2) >> 2);
+    cur_pred[0][6] =
+    cur_pred[1][5] =
+    cur_pred[2][4] =
+    cur_pred[3][3] =
+    cur_pred[4][2] =
+    cur_pred[5][1] =
+    cur_pred[6][0] = (imgpel) ((P_G + P_I + 2*(P_H) + 2) >> 2);
+    cur_pred[0][7] =
+    cur_pred[1][6] =
+    cur_pred[2][5] =
+    cur_pred[3][4] =
+    cur_pred[4][3] =
+    cur_pred[5][2] =
+    cur_pred[6][1] =
+    cur_pred[7][0] = (imgpel) ((P_H + P_J + 2*(P_I) + 2) >> 2);
+    cur_pred[1][7] =
+    cur_pred[2][6] =
+    cur_pred[3][5] =
+    cur_pred[4][4] =
+    cur_pred[5][3] =
+    cur_pred[6][2] =
+    cur_pred[7][1] = (imgpel) ((P_I + P_K + 2*(P_J) + 2) >> 2);
+    cur_pred[2][7] =
+    cur_pred[3][6] =
+    cur_pred[4][5] =
+    cur_pred[5][4] =
+    cur_pred[6][3] =
+    cur_pred[7][2] = (imgpel) ((P_J + P_L + 2*(P_K) + 2) >> 2);
+    cur_pred[3][7] =
+    cur_pred[4][6] =
+    cur_pred[5][5] =
+    cur_pred[6][4] =
+    cur_pred[7][3] = (imgpel) ((P_K + P_M + 2*(P_L) + 2) >> 2);
+    cur_pred[4][7] =
+    cur_pred[5][6] =
+    cur_pred[6][5] =
+    cur_pred[7][4] = (imgpel) ((P_L + P_N + 2*(P_M) + 2) >> 2);
+    cur_pred[5][7] =
+    cur_pred[6][6] =
+    cur_pred[7][5] = (imgpel) ((P_M + P_O + 2*(P_N) + 2) >> 2);
+    cur_pred[6][7] =
+    cur_pred[7][6] = (imgpel) ((P_N + P_P + 2*(P_O) + 2) >> 2);
+    cur_pred[7][7] = (imgpel) ((P_O + 3*(P_P) + 2) >> 2);
+
+    ///////////////////////////////////
+    // make vertical left prediction
+    ///////////////////////////////////
+    cur_pred = img->mprr_3[VERT_LEFT_PRED];
+    cur_pred[0][0] = (imgpel) ((P_A + P_B + 1) >> 1);
+    cur_pred[0][1] =
+    cur_pred[2][0] = (imgpel) ((P_B + P_C + 1) >> 1);
+    cur_pred[0][2] =
+    cur_pred[2][1] =
+    cur_pred[4][0] = (imgpel) ((P_C + P_D + 1) >> 1);
+    cur_pred[0][3] =
+    cur_pred[2][2] =
+    cur_pred[4][1] =
+    cur_pred[6][0] = (imgpel) ((P_D + P_E + 1) >> 1);
+    cur_pred[0][4] =
+    cur_pred[2][3] =
+    cur_pred[4][2] =
+    cur_pred[6][1] = (imgpel) ((P_E + P_F + 1) >> 1);
+    cur_pred[0][5] =
+    cur_pred[2][4] =
+    cur_pred[4][3] =
+    cur_pred[6][2] = (imgpel) ((P_F + P_G + 1) >> 1);
+    cur_pred[0][6] =
+    cur_pred[2][5] =
+    cur_pred[4][4] =
+    cur_pred[6][3] = (imgpel) ((P_G + P_H + 1) >> 1);
+    cur_pred[0][7] =
+    cur_pred[2][6] =
+    cur_pred[4][5] =
+    cur_pred[6][4] = (imgpel) ((P_H + P_I + 1) >> 1);
+    cur_pred[2][7] =
+    cur_pred[4][6] =
+    cur_pred[6][5] = (imgpel) ((P_I + P_J + 1) >> 1);
+    cur_pred[4][7] =
+    cur_pred[6][6] = (imgpel) ((P_J + P_K + 1) >> 1);
+    cur_pred[6][7] = (imgpel) ((P_K + P_L + 1) >> 1);
+    cur_pred[1][0] = (imgpel) ((P_A + P_C + 2*P_B + 2) >> 2);
+    cur_pred[1][1] =
+    cur_pred[3][0] = (imgpel) ((P_B + P_D + 2*P_C + 2) >> 2);
+    cur_pred[1][2] =
+    cur_pred[3][1] =
+    cur_pred[5][0] = (imgpel) ((P_C + P_E + 2*P_D + 2) >> 2);
+    cur_pred[1][3] =
+    cur_pred[3][2] =
+    cur_pred[5][1] =
+    cur_pred[7][0] = (imgpel) ((P_D + P_F + 2*P_E + 2) >> 2);
+    cur_pred[1][4] =
+    cur_pred[3][3] =
+    cur_pred[5][2] =
+    cur_pred[7][1] = (imgpel) ((P_E + P_G + 2*P_F + 2) >> 2);
+    cur_pred[1][5] =
+    cur_pred[3][4] =
+    cur_pred[5][3] =
+    cur_pred[7][2] = (imgpel) ((P_F + P_H + 2*P_G + 2) >> 2);
+    cur_pred[1][6] =
+    cur_pred[3][5] =
+    cur_pred[5][4] =
+    cur_pred[7][3] = (imgpel) ((P_G + P_I + 2*P_H + 2) >> 2);
+    cur_pred[1][7] =
+    cur_pred[3][6] =
+    cur_pred[5][5] =
+    cur_pred[7][4] = (imgpel) ((P_H + P_J + 2*P_I + 2) >> 2);
+    cur_pred[3][7] =
+    cur_pred[5][6] =
+    cur_pred[7][5] = (imgpel) ((P_I + P_K + 2*P_J + 2) >> 2);
+    cur_pred[5][7] =
+    cur_pred[7][6] = (imgpel) ((P_J + P_L + 2*P_K + 2) >> 2);
+    cur_pred[7][7] = (imgpel) ((P_K + P_M + 2*P_L + 2) >> 2);
+  }
+
+  ///////////////////////////////////
+  // make diagonal down right prediction
+  ///////////////////////////////////
+  if (block_available_up && block_available_left && block_available_up_left)
+  {
+    // Mode DIAG_DOWN_RIGHT_PRED
+    cur_pred = img->mprr_3[DIAG_DOWN_RIGHT_PRED];
+    cur_pred[7][0] = (imgpel) ((P_X + P_V + 2*(P_W) + 2) >> 2);
+    cur_pred[6][0] =
+    cur_pred[7][1] = (imgpel) ((P_W + P_U + 2*(P_V) + 2) >> 2);
+    cur_pred[5][0] =
+    cur_pred[6][1] =
+    cur_pred[7][2] = (imgpel) ((P_V + P_T + 2*(P_U) + 2) >> 2);
+    cur_pred[4][0] =
+    cur_pred[5][1] =
+    cur_pred[6][2] =
+    cur_pred[7][3] = (imgpel) ((P_U + P_S + 2*(P_T) + 2) >> 2);
+    cur_pred[3][0] =
+    cur_pred[4][1] =
+    cur_pred[5][2] =
+    cur_pred[6][3] =
+    cur_pred[7][4] = (imgpel) ((P_T + P_R + 2*(P_S) + 2) >> 2);
+    cur_pred[2][0] =
+    cur_pred[3][1] =
+    cur_pred[4][2] =
+    cur_pred[5][3] =
+    cur_pred[6][4] =
+    cur_pred[7][5] = (imgpel) ((P_S + P_Q + 2*(P_R) + 2) >> 2);
+    cur_pred[1][0] =
+    cur_pred[2][1] =
+    cur_pred[3][2] =
+    cur_pred[4][3] =
+    cur_pred[5][4] =
+    cur_pred[6][5] =
+    cur_pred[7][6] = (imgpel) ((P_R + P_Z + 2*(P_Q) + 2) >> 2);
+    cur_pred[0][0] =
+    cur_pred[1][1] =
+    cur_pred[2][2] =
+    cur_pred[3][3] =
+    cur_pred[4][4] =
+    cur_pred[5][5] =
+    cur_pred[6][6] =
+    cur_pred[7][7] = (imgpel) ((P_Q + P_A + 2*(P_Z) + 2) >> 2);
+    cur_pred[0][1] =
+    cur_pred[1][2] =
+    cur_pred[2][3] =
+    cur_pred[3][4] =
+    cur_pred[4][5] =
+    cur_pred[5][6] =
+    cur_pred[6][7] = (imgpel) ((P_Z + P_B + 2*(P_A) + 2) >> 2);
+    cur_pred[0][2] =
+    cur_pred[1][3] =
+    cur_pred[2][4] =
+    cur_pred[3][5] =
+    cur_pred[4][6] =
+    cur_pred[5][7] = (imgpel) ((P_A + P_C + 2*(P_B) + 2) >> 2);
+    cur_pred[0][3] =
+    cur_pred[1][4] =
+    cur_pred[2][5] =
+    cur_pred[3][6] =
+    cur_pred[4][7] = (imgpel) ((P_B + P_D + 2*(P_C) + 2) >> 2);
+    cur_pred[0][4] =
+    cur_pred[1][5] =
+    cur_pred[2][6] =
+    cur_pred[3][7] = (imgpel) ((P_C + P_E + 2*(P_D) + 2) >> 2);
+    cur_pred[0][5] =
+    cur_pred[1][6] =
+    cur_pred[2][7] = (imgpel) ((P_D + P_F + 2*(P_E) + 2) >> 2);
+    cur_pred[0][6] =
+    cur_pred[1][7] = (imgpel) ((P_E + P_G + 2*(P_F) + 2) >> 2);
+    cur_pred[0][7] = (imgpel) ((P_F + P_H + 2*(P_G) + 2) >> 2);
+
+  ///////////////////////////////////
+  // make vertical right prediction
+  ///////////////////////////////////
+    cur_pred = img->mprr_3[VERT_RIGHT_PRED];
+    cur_pred[0][0] =
+    cur_pred[2][1] =
+    cur_pred[4][2] =
+    cur_pred[6][3] = (imgpel) ((P_Z + P_A + 1) >> 1);
+    cur_pred[0][1] =
+    cur_pred[2][2] =
+    cur_pred[4][3] =
+    cur_pred[6][4] = (imgpel) ((P_A + P_B + 1) >> 1);
+    cur_pred[0][2] =
+    cur_pred[2][3] =
+    cur_pred[4][4] =
+    cur_pred[6][5] = (imgpel) ((P_B + P_C + 1) >> 1);
+    cur_pred[0][3] =
+    cur_pred[2][4] =
+    cur_pred[4][5] =
+    cur_pred[6][6] = (imgpel) ((P_C + P_D + 1) >> 1);
+    cur_pred[0][4] =
+    cur_pred[2][5] =
+    cur_pred[4][6] =
+    cur_pred[6][7] = (imgpel) ((P_D + P_E + 1) >> 1);
+    cur_pred[0][5] =
+    cur_pred[2][6] =
+    cur_pred[4][7] = (imgpel) ((P_E + P_F + 1) >> 1);
+    cur_pred[0][6] =
+    cur_pred[2][7] = (imgpel) ((P_F + P_G + 1) >> 1);
+    cur_pred[0][7] = (imgpel) ((P_G + P_H + 1) >> 1);
+    cur_pred[1][0] =
+    cur_pred[3][1] =
+    cur_pred[5][2] =
+    cur_pred[7][3] = (imgpel) ((P_Q + P_A + 2*P_Z + 2) >> 2);
+    cur_pred[1][1] =
+    cur_pred[3][2] =
+    cur_pred[5][3] =
+    cur_pred[7][4] = (imgpel) ((P_Z + P_B + 2*P_A + 2) >> 2);
+    cur_pred[1][2] =
+    cur_pred[3][3] =
+    cur_pred[5][4] =
+    cur_pred[7][5] = (imgpel) ((P_A + P_C + 2*P_B + 2) >> 2);
+    cur_pred[1][3] =
+    cur_pred[3][4] =
+    cur_pred[5][5] =
+    cur_pred[7][6] = (imgpel) ((P_B + P_D + 2*P_C + 2) >> 2);
+    cur_pred[1][4] =
+    cur_pred[3][5] =
+    cur_pred[5][6] =
+    cur_pred[7][7] = (imgpel) ((P_C + P_E + 2*P_D + 2) >> 2);
+    cur_pred[1][5] =
+    cur_pred[3][6] =
+    cur_pred[5][7] = (imgpel) ((P_D + P_F + 2*P_E + 2) >> 2);
+    cur_pred[1][6] =
+    cur_pred[3][7] = (imgpel) ((P_E + P_G + 2*P_F + 2) >> 2);
+    cur_pred[1][7] = (imgpel) ((P_F + P_H + 2*P_G + 2) >> 2);
+    cur_pred[2][0] =
+    cur_pred[4][1] =
+    cur_pred[6][2] = (imgpel) ((P_R + P_Z + 2*P_Q + 2) >> 2);
+    cur_pred[3][0] =
+    cur_pred[5][1] =
+    cur_pred[7][2] = (imgpel) ((P_S + P_Q + 2*P_R + 2) >> 2);
+    cur_pred[4][0] =
+    cur_pred[6][1] = (imgpel) ((P_T + P_R + 2*P_S + 2) >> 2);
+    cur_pred[5][0] =
+    cur_pred[7][1] = (imgpel) ((P_U + P_S + 2*P_T + 2) >> 2);
+    cur_pred[6][0] = (imgpel) ((P_V + P_T + 2*P_U + 2) >> 2);
+    cur_pred[7][0] = (imgpel) ((P_W + P_U + 2*P_V + 2) >> 2);
+
+  ///////////////////////////////////
+  // make horizontal down prediction
+  ///////////////////////////////////
+    cur_pred = img->mprr_3[HOR_DOWN_PRED];
+    cur_pred[0][0] =
+    cur_pred[1][2] =
+    cur_pred[2][4] =
+    cur_pred[3][6] = (imgpel) ((P_Q + P_Z + 1) >> 1);
+    cur_pred[1][0] =
+    cur_pred[2][2] =
+    cur_pred[3][4] =
+    cur_pred[4][6] = (imgpel) ((P_R + P_Q + 1) >> 1);
+    cur_pred[2][0] =
+    cur_pred[3][2] =
+    cur_pred[4][4] =
+    cur_pred[5][6] = (imgpel) ((P_S + P_R + 1) >> 1);
+    cur_pred[3][0] =
+    cur_pred[4][2] =
+    cur_pred[5][4] =
+    cur_pred[6][6] = (imgpel) ((P_T + P_S + 1) >> 1);
+    cur_pred[4][0] =
+    cur_pred[5][2] =
+    cur_pred[6][4] =
+    cur_pred[7][6] = (imgpel) ((P_U + P_T + 1) >> 1);
+    cur_pred[5][0] =
+    cur_pred[6][2] =
+    cur_pred[7][4] = (imgpel) ((P_V + P_U + 1) >> 1);
+    cur_pred[6][0] =
+    cur_pred[7][2] = (imgpel) ((P_W + P_V + 1) >> 1);
+    cur_pred[7][0] = (imgpel) ((P_X + P_W + 1) >> 1);
+    cur_pred[0][1] =
+    cur_pred[1][3] =
+    cur_pred[2][5] =
+    cur_pred[3][7] = (imgpel) ((P_Q + P_A + 2*P_Z + 2) >> 2);
+    cur_pred[1][1] =
+    cur_pred[2][3] =
+    cur_pred[3][5] =
+    cur_pred[4][7] = (imgpel) ((P_Z + P_R + 2*P_Q + 2) >> 2);
+    cur_pred[2][1] =
+    cur_pred[3][3] =
+    cur_pred[4][5] =
+    cur_pred[5][7] = (imgpel) ((P_Q + P_S + 2*P_R + 2) >> 2);
+    cur_pred[3][1] =
+    cur_pred[4][3] =
+    cur_pred[5][5] =
+    cur_pred[6][7] = (imgpel) ((P_R + P_T + 2*P_S + 2) >> 2);
+    cur_pred[4][1] =
+    cur_pred[5][3] =
+    cur_pred[6][5] =
+    cur_pred[7][7] = (imgpel) ((P_S + P_U + 2*P_T + 2) >> 2);
+    cur_pred[5][1] =
+    cur_pred[6][3] =
+    cur_pred[7][5] = (imgpel) ((P_T + P_V + 2*P_U + 2) >> 2);
+    cur_pred[6][1] =
+    cur_pred[7][3] = (imgpel) ((P_U + P_W + 2*P_V + 2) >> 2);
+    cur_pred[7][1] = (imgpel) ((P_V + P_X + 2*P_W + 2) >> 2);
+    cur_pred[0][2] =
+    cur_pred[1][4] =
+    cur_pred[2][6] = (imgpel) ((P_Z + P_B + 2*P_A + 2) >> 2);
+    cur_pred[0][3] =
+    cur_pred[1][5] =
+    cur_pred[2][7] = (imgpel) ((P_A + P_C + 2*P_B + 2) >> 2);
+    cur_pred[0][4] =
+    cur_pred[1][6] = (imgpel) ((P_B + P_D + 2*P_C + 2) >> 2);
+    cur_pred[0][5] =
+    cur_pred[1][7] = (imgpel) ((P_C + P_E + 2*P_D + 2) >> 2);
+    cur_pred[0][6] = (imgpel) ((P_D + P_F + 2*P_E + 2) >> 2);
+    cur_pred[0][7] = (imgpel) ((P_E + P_G + 2*P_F + 2) >> 2);
+  }
+
+  ///////////////////////////////////
+  // make horizontal up prediction
+  ///////////////////////////////////
+  if (block_available_left)
+  {
+    cur_pred = img->mprr_3[HOR_UP_PRED];
+    cur_pred[0][0] = (imgpel) ((P_Q + P_R + 1) >> 1);
+    cur_pred[1][0] =
+    cur_pred[0][2] = (imgpel) ((P_R + P_S + 1) >> 1);
+    cur_pred[2][0] =
+    cur_pred[1][2] =
+    cur_pred[0][4] = (imgpel) ((P_S + P_T + 1) >> 1);
+    cur_pred[3][0] =
+    cur_pred[2][2] =
+    cur_pred[1][4] =
+    cur_pred[0][6] = (imgpel) ((P_T + P_U + 1) >> 1);
+    cur_pred[4][0] =
+    cur_pred[3][2] =
+    cur_pred[2][4] =
+    cur_pred[1][6] = (imgpel) ((P_U + P_V + 1) >> 1);
+    cur_pred[5][0] =
+    cur_pred[4][2] =
+    cur_pred[3][4] =
+    cur_pred[2][6] = (imgpel) ((P_V + P_W + 1) >> 1);
+    cur_pred[6][0] =
+    cur_pred[5][2] =
+    cur_pred[4][4] =
+    cur_pred[3][6] = (imgpel) ((P_W + P_X + 1) >> 1);
+    cur_pred[4][6] =
+    cur_pred[4][7] =
+    cur_pred[5][4] =
+    cur_pred[5][5] =
+    cur_pred[5][6] =
+    cur_pred[5][7] =
+    cur_pred[6][2] =
+    cur_pred[6][3] =
+    cur_pred[6][4] =
+    cur_pred[6][5] =
+    cur_pred[6][6] =
+    cur_pred[6][7] =
+    cur_pred[7][0] =
+    cur_pred[7][1] =
+    cur_pred[7][2] =
+    cur_pred[7][3] =
+    cur_pred[7][4] =
+    cur_pred[7][5] =
+    cur_pred[7][6] =
+    cur_pred[7][7] = (imgpel) P_X;
+    cur_pred[6][1] =
+    cur_pred[5][3] =
+    cur_pred[4][5] =
+    cur_pred[3][7] = (imgpel) ((P_W + 3*P_X + 2) >> 2);
+    cur_pred[5][1] =
+    cur_pred[4][3] =
+    cur_pred[3][5] =
+    cur_pred[2][7] = (imgpel) ((P_X + P_V + 2*P_W + 2) >> 2);
+    cur_pred[4][1] =
+    cur_pred[3][3] =
+    cur_pred[2][5] =
+    cur_pred[1][7] = (imgpel) ((P_W + P_U + 2*P_V + 2) >> 2);
+    cur_pred[3][1] =
+    cur_pred[2][3] =
+    cur_pred[1][5] =
+    cur_pred[0][7] = (imgpel) ((P_V + P_T + 2*P_U + 2) >> 2);
+    cur_pred[2][1] =
+    cur_pred[1][3] =
+    cur_pred[0][5] = (imgpel) ((P_U + P_S + 2*P_T + 2) >> 2);
+    cur_pred[1][1] =
+    cur_pred[0][3] = (imgpel) ((P_T + P_R + 2*P_S + 2) >> 2);
+    cur_pred[0][1] = (imgpel) ((P_S + P_Q + 2*P_R + 2) >> 2);
+  }
+}
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    Prefiltering for Intra8x8 prediction
+ *************************************************************************************
+ */
+void LowPassForIntra8x8Pred(imgpel *PredPel, int block_up_left, int block_up, int block_left)
+{
+  int i;
+  imgpel LoopArray[25];
+
+  memcpy(LoopArray,PredPel, 25 * sizeof(imgpel));
+
+  if(block_up)
+  {
+    if(block_up_left)
+    {
+      LoopArray[1] = (((&P_Z)[0] + ((&P_Z)[1]<<1) + (&P_Z)[2] + 2)>>2);
+    }
+    else
+      LoopArray[1] = (((&P_Z)[1] + ((&P_Z)[1]<<1) + (&P_Z)[2] + 2)>>2);
+
+
+    for(i = 2; i <16; i++)
+    {
+      LoopArray[i] = (((&P_Z)[i-1] + ((&P_Z)[i]<<1) + (&P_Z)[i+1] + 2)>>2);
+    }
+    LoopArray[16] = ((P_P + (P_P<<1) + P_O + 2)>>2);
+  }
+
+  if(block_up_left)
+  {
+    if(block_up && block_left)
+    {
+      LoopArray[0] = ((P_Q + (P_Z<<1) + P_A +2)>>2);
+    }
+    else
+    {
+      if(block_up)
+        LoopArray[0] = ((P_Z + (P_Z<<1) + P_A +2)>>2);
+      else
+        if(block_left)
+          LoopArray[0] = ((P_Z + (P_Z<<1) + P_Q +2)>>2);
+    }
+  }
+
+  if(block_left)
+  {
+    if(block_up_left)
+      LoopArray[17] = ((P_Z + (P_Q<<1) + P_R + 2)>>2);
+    else
+      LoopArray[17] = ((P_Q + (P_Q<<1) + P_R + 2)>>2);
+
+    for(i = 18; i <24; i++)
+    {
+      LoopArray[i] = (((&P_Z)[i-1] + ((&P_Z)[i]<<1) + (&P_Z)[i+1] + 2)>>2);
+    }
+    LoopArray[24] = ((P_W + (P_X<<1) + P_X + 2)>>2);
+  }
+
+  memcpy(PredPel, LoopArray, 25 * sizeof(imgpel));
+}
+
+
+
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    R-D Cost for an 8x8 Intra block
+ *************************************************************************************
+ */
+
+double RDCost_for_8x8IntraBlocks(int *nonzero, int b8, int ipmode, double lambda, double min_rdcost, int mostProbableMode)
+{
+  double  rdcost = 0.0;
+  int     dummy, x, y, rate;
+  int64   distortion  = 0;
+  int     block_x     = 8*(b8 & 0x01);
+  int     block_y     = 8*(b8 >> 1);
+  int     pic_pix_x   = img->pix_x+block_x;
+  int     pic_pix_y   = img->pix_y+block_y;
+  int     pic_opix_y  = img->opix_y+block_y;
+  imgpel    **imgY_orig  = imgY_org;
+  imgpel    **imgY       = enc_picture->imgY;
+
+  Slice          *currSlice    =  img->currentSlice;
+  SyntaxElement  se;
+  const int      *partMap      = assignSE2partition[input->partition_mode];
+  DataPartition  *dataPart;
+
+  //===== perform DCT, Q, IQ, IDCT, Reconstruction =====
+  dummy = 0;
+
+  *nonzero = dct_luma8x8 (b8, &dummy, 1);
+
+  //===== get distortion (SSD) of 8x8 block =====
+  for (y=0; y<8; y++)
+    for (x=pic_pix_x; x<pic_pix_x+8; x++)
+      distortion += img->quad [imgY_orig[pic_opix_y+y][x] - imgY[pic_pix_y+y][x]];
+
+  //===== RATE for INTRA PREDICTION MODE  (SYMBOL MODE MUST BE SET TO UVLC) =====
+  se.value1 = (mostProbableMode == ipmode) ? -1 : ipmode < mostProbableMode ? ipmode : ipmode-1;
+
+  //--- set position and type ---
+  se.context = b8;
+  se.type    = SE_INTRAPREDMODE;
+
+  //--- choose data partition ---
+  if (img->type!=B_SLICE)
+    dataPart = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);
+  else
+    dataPart = &(currSlice->partArr[partMap[SE_BFRAME]]);
+
+  //--- encode and update rate ---
+  writeIntraPredMode (&se, dataPart);
+  /*
+  if (input->symbol_mode == UVLC)
+    writeIntraPredMode_CAVLC(currSE, dataPart);
+  else
+  {
+    currSE->writing = writeIntraPredMode_CABAC;
+    dataPart->writeSyntaxElement (currSE, dataPart);
+  }
+  */
+
+  rate = se.len;
+
+  //===== RATE for LUMINANCE COEFFICIENTS =====
+
+  if (input->symbol_mode == UVLC)
+  {
+    int b4;
+    for(b4=0; b4<4; b4++)
+      rate  += writeCoeff4x4_CAVLC (LUMA, b8, b4, 0);
+  }
+  else
+  {
+    rate  += writeLumaCoeff8x8_CABAC (b8, 1);
+  }
+
+  rdcost = (double)distortion + lambda*(double)rate;
+
+  return rdcost;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    The routine performs transform,quantization,inverse transform, adds the diff.
+ *    to the prediction and writes the result to the decoded luma frame. Includes the
+ *    RD constrained quantization also.
+ *
+ * \par Input:
+ *    b8: Block position inside a macro block (0,1,2,3).
+ *
+ * \par Output:
+ *    nonzero: 0 if no levels are nonzero.  1 if there are nonzero levels.
+ *    coeff_cost: Counter for nonzero coefficients, used to discard expensive levels.
+ ************************************************************************
+ */
+
+#define MC(coeff) ((coeff)&3)
+
+int dct_luma8x8(int b8,int *coeff_cost, int intra)
+{
+  int i,j,ilev,coeff_ctr;
+  int level,scan_pos,run;
+  int nonzero;
+  int qp_per,qp_rem,q_bits;
+
+  int block_x = 8*(b8 & 0x01);
+  int block_y = 8*(b8 >> 1);
+  int*  ACLevel = img->cofAC[b8][0][0];
+  int*  ACRun   = img->cofAC[b8][0][1];
+  int m6[8][8];
+  int a[8], b[8];
+  int scan_poss[4],runs[4];
+  int pix_x, pix_y, ipix_y;
+  int **levelscale,**leveloffset;
+  int **invlevelscale;
+  int MCcoeff;
+  Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+
+  Boolean lossless_qpprime = (Boolean) ((img->qp_scaled)==0 && img->lossless_qpprime_flag==1);
+  const byte (*pos_scan)[2] = currMB->is_field_mode ? FIELD_SCAN8x8 : SNGL_SCAN8x8;
+
+  qp_per    = qp_per_matrix[(img->qp_scaled - MIN_QP)];
+  qp_rem    = qp_rem_matrix[(img->qp_scaled - MIN_QP)];
+  q_bits    = Q_BITS_8+qp_per;
+  levelscale    = LevelScale8x8Luma[intra][qp_rem];
+  leveloffset   = LevelOffset8x8Luma[intra][qp_per];
+  invlevelscale = InvLevelScale8x8Luma[intra][qp_rem];
+
+  // horizontal transform
+  if (!lossless_qpprime)
+  {
+    for( i=0; i<8; i++)
+    {
+      a[0] = img->m7[i][0] + img->m7[i][7];
+      a[1] = img->m7[i][1] + img->m7[i][6];
+      a[2] = img->m7[i][2] + img->m7[i][5];
+      a[3] = img->m7[i][3] + img->m7[i][4];
+
+      b[0] = a[0] + a[3];
+      b[1] = a[1] + a[2];
+      b[2] = a[0] - a[3];
+      b[3] = a[1] - a[2];
+
+      a[4] = img->m7[i][0] - img->m7[i][7];
+      a[5] = img->m7[i][1] - img->m7[i][6];
+      a[6] = img->m7[i][2] - img->m7[i][5];
+      a[7] = img->m7[i][3] - img->m7[i][4];
+
+      b[4]= a[5] + a[6] + ((a[4]>>1) + a[4]);
+      b[5]= a[4] - a[7] - ((a[6]>>1) + a[6]);
+      b[6]= a[4] + a[7] - ((a[5]>>1) + a[5]);
+      b[7]= a[5] - a[6] + ((a[7]>>1) + a[7]);
+
+      m6[0][i] = b[0] + b[1];
+      m6[2][i] = b[2] + (b[3]>>1);
+      m6[4][i] = b[0] - b[1];
+      m6[6][i] = (b[2]>>1) - b[3];
+      m6[1][i] =   b[4] + (b[7]>>2);
+      m6[3][i] =   b[5] + (b[6]>>2);
+      m6[5][i] =   b[6] - (b[5]>>2);
+      m6[7][i] = - b[7] + (b[4]>>2);
+    }
+    // vertical transform
+    for( i=0; i<8; i++)
+    {
+      a[0] = m6[i][0] + m6[i][7];
+      a[1] = m6[i][1] + m6[i][6];
+      a[2] = m6[i][2] + m6[i][5];
+      a[3] = m6[i][3] + m6[i][4];
+
+      b[0] = a[0] + a[3];
+      b[1] = a[1] + a[2];
+      b[2] = a[0] - a[3];
+      b[3] = a[1] - a[2];
+
+      a[4] = m6[i][0] - m6[i][7];
+      a[5] = m6[i][1] - m6[i][6];
+      a[6] = m6[i][2] - m6[i][5];
+      a[7] = m6[i][3] - m6[i][4];
+
+      b[4]= a[5] + a[6] + ((a[4]>>1) + a[4]);
+      b[5]= a[4] - a[7] - ((a[6]>>1) + a[6]);
+      b[6]= a[4] + a[7] - ((a[5]>>1) + a[5]);
+      b[7]= a[5] - a[6] + ((a[7]>>1) + a[7]);
+
+      img->m7[0][i] = b[0] + b[1];
+      img->m7[2][i] = b[2] + (b[3]>>1);
+      img->m7[4][i] = b[0] - b[1];
+      img->m7[6][i] = (b[2]>>1) - b[3];
+      img->m7[1][i] =   b[4] + (b[7]>>2);
+      img->m7[3][i] =   b[5] + (b[6]>>2);
+      img->m7[5][i] =   b[6] - (b[5]>>2);
+      img->m7[7][i] = - b[7] + (b[4]>>2);
+    }
+
+    // Quant
+    nonzero=FALSE;
+
+    run=-1;
+    scan_pos=0;
+
+    runs[0] = runs[1] = runs[2] = runs[3] = -1;
+    scan_poss[0] = scan_poss[1] = scan_poss[2] = scan_poss[3] = 0;
+
+    for (coeff_ctr = 0; coeff_ctr < 64; coeff_ctr++)
+    {
+
+      i=pos_scan[coeff_ctr][0];
+      j=pos_scan[coeff_ctr][1];
+
+      MCcoeff = MC(coeff_ctr);
+      run++;
+      ilev=0;
+
+      runs[MCcoeff]++;
+
+      level = (iabs (img->m7[j][i]) * levelscale[j][i] + leveloffset[j][i]) >> q_bits;
+
+
+      if (img->AdaptiveRounding)
+      {
+        img->fadjust8x8[intra][block_y + j][block_x + i] = ( level == 0 )
+          ? 0
+          : rshift_rnd_sf((AdaptRndWeight * (iabs (img->m7[j][i]) * levelscale[j][i] - (level << q_bits))), (q_bits + 1));
+      }
+
+      if (level != 0)
+      {
+        nonzero=TRUE;
+
+        if (currMB->luma_transform_size_8x8_flag && input->symbol_mode == UVLC)
+        {
+          *coeff_cost += (level > 1) ? MAX_VALUE : COEFF_COST8x8[input->disthres][runs[MCcoeff]];
+
+          img->cofAC[b8][MCcoeff][0][scan_poss[MCcoeff]] = isignab(level,img->m7[j][i]);
+          img->cofAC[b8][MCcoeff][1][scan_poss[MCcoeff]] = runs[MCcoeff];
+          ++scan_poss[MCcoeff];
+          runs[MCcoeff]=-1;
+        }
+        else
+        {
+          *coeff_cost += (level > 1) ? MAX_VALUE : COEFF_COST8x8[input->disthres][run];
+          ACLevel[scan_pos] = isignab(level,img->m7[j][i]);
+          ACRun  [scan_pos] = run;
+          ++scan_pos;
+          run=-1;                     // reset zero level counter
+        }
+
+        level = isignab(level, img->m7[j][i]);
+
+        ilev = rshift_rnd_sf(level*invlevelscale[j][i]<<qp_per, 6); // dequantization
+      }
+      img->m7[j][i] = ilev;
+    }
+  }
+  else
+  {
+    // Quant
+    nonzero=FALSE;
+
+    run=-1;
+    scan_pos=0;
+
+    runs[0]=runs[1]=runs[2]=runs[3]=-1;
+    scan_poss[0]=scan_poss[1]=scan_poss[2]=scan_poss[3]=0;
+
+    for (coeff_ctr=0; coeff_ctr < 64; coeff_ctr++)
+    {
+      i=pos_scan[coeff_ctr][0];
+      j=pos_scan[coeff_ctr][1];
+
+      MCcoeff = MC(coeff_ctr);
+      run++;
+      ilev=0;
+
+      runs[MCcoeff]++;
+
+      level = iabs (img->m7[j][i]);
+
+      if (img->AdaptiveRounding)
+      {
+        img->fadjust8x8[intra][block_y+j][block_x+i] = 0;
+      }
+
+      if (level != 0)
+      {
+        nonzero=TRUE;
+
+        if (currMB->luma_transform_size_8x8_flag && input->symbol_mode == UVLC)
+        {
+          *coeff_cost += MAX_VALUE;
+
+          img->cofAC[b8][MCcoeff][0][scan_poss[MCcoeff]] = isignab(level,img->m7[j][i]);
+          img->cofAC[b8][MCcoeff][1][scan_poss[MCcoeff]] = runs[MCcoeff];
+          ++scan_poss[MCcoeff];
+          runs[MCcoeff]=-1;
+        }
+        else
+        {
+          *coeff_cost += MAX_VALUE;
+          ACLevel[scan_pos] = isignab(level,img->m7[j][i]);
+          ACRun  [scan_pos] = run;
+          ++scan_pos;
+          run=-1;                     // reset zero level counter
+        }
+
+        level = isignab(level, img->m7[j][i]);
+        ilev = level;
+      }
+    }
+  }
+  if (!currMB->luma_transform_size_8x8_flag || input->symbol_mode != UVLC)
+    ACLevel[scan_pos] = 0;
+  else
+  {
+    for(i=0; i<4; i++)
+      img->cofAC[b8][i][0][scan_poss[i]] = 0;
+  }
+
+
+  //    Inverse Transform
+  // horizontal inverse transform
+  if (!lossless_qpprime)
+  {
+    for( i=0; i<8; i++)
+    {
+      a[0] = img->m7[i][0] + img->m7[i][4];
+      a[4] = img->m7[i][0] - img->m7[i][4];
+      a[2] = (img->m7[i][2]>>1) - img->m7[i][6];
+      a[6] = img->m7[i][2] + (img->m7[i][6]>>1);
+
+      b[0] = a[0] + a[6];
+      b[2] = a[4] + a[2];
+      b[4] = a[4] - a[2];
+      b[6] = a[0] - a[6];
+
+      a[1] = -img->m7[i][3] + img->m7[i][5] - img->m7[i][7] - (img->m7[i][7]>>1);
+      a[3] =  img->m7[i][1] + img->m7[i][7] - img->m7[i][3] - (img->m7[i][3]>>1);
+      a[5] = -img->m7[i][1] + img->m7[i][7] + img->m7[i][5] + (img->m7[i][5]>>1);
+      a[7] =  img->m7[i][3] + img->m7[i][5] + img->m7[i][1] + (img->m7[i][1]>>1);
+
+      b[1] = a[1] + (a[7]>>2);
+      b[7] = -(a[1]>>2) + a[7];
+      b[3] = a[3] + (a[5]>>2);
+      b[5] = (a[3]>>2) - a[5];
+
+      m6[0][i] = b[0] + b[7];
+      m6[1][i] = b[2] + b[5];
+      m6[2][i] = b[4] + b[3];
+      m6[3][i] = b[6] + b[1];
+      m6[4][i] = b[6] - b[1];
+      m6[5][i] = b[4] - b[3];
+      m6[6][i] = b[2] - b[5];
+      m6[7][i] = b[0] - b[7];
+    }
+
+    // vertical inverse transform
+    for( i=0; i<8; i++)
+    {
+      a[0] =  m6[i][0] + m6[i][4];
+      a[4] =  m6[i][0] - m6[i][4];
+      a[2] = (m6[i][2]>>1) - m6[i][6];
+      a[6] =  m6[i][2] + (m6[i][6]>>1);
+
+      b[0] = a[0] + a[6];
+      b[2] = a[4] + a[2];
+      b[4] = a[4] - a[2];
+      b[6] = a[0] - a[6];
+
+      a[1] = -m6[i][3] + m6[i][5] - m6[i][7] - (m6[i][7]>>1);
+      a[3] =  m6[i][1] + m6[i][7] - m6[i][3] - (m6[i][3]>>1);
+      a[5] = -m6[i][1] + m6[i][7] + m6[i][5] + (m6[i][5]>>1);
+      a[7] =  m6[i][3] + m6[i][5] + m6[i][1] + (m6[i][1]>>1);
+
+      b[1] =   a[1] + (a[7]>>2);
+      b[7] = -(a[1]>>2) + a[7];
+      b[3] =   a[3] + (a[5]>>2);
+      b[5] =  (a[3]>>2) - a[5];
+
+      img->m7[0][i] = b[0] + b[7];
+      img->m7[1][i] = b[2] + b[5];
+      img->m7[2][i] = b[4] + b[3];
+      img->m7[3][i] = b[6] + b[1];
+      img->m7[4][i] = b[6] - b[1];
+      img->m7[5][i] = b[4] - b[3];
+      img->m7[6][i] = b[2] - b[5];
+      img->m7[7][i] = b[0] - b[7];
+    }
+
+    for( j=0; j<2*BLOCK_SIZE; j++)
+    {
+      pix_y = block_y + j;
+      ipix_y = img->pix_y + pix_y;
+      for( i=0; i<2*BLOCK_SIZE; i++)
+      {
+        pix_x = block_x + i;
+        img->m7[j][i] = iClip1( img->max_imgpel_value, rshift_rnd_sf((img->m7[j][i]+((long)img->mpr[pix_y][pix_x] << DQ_BITS_8)),DQ_BITS_8));
+        enc_picture->imgY[ipix_y][img->pix_x + pix_x]= (imgpel) img->m7[j][i];
+      }
+    }
+  }
+  else
+  {
+    for( j=0; j<2*BLOCK_SIZE; j++)
+    {
+      pix_y = block_y + j;
+      ipix_y = img->pix_y + pix_y;
+      for( i=0; i<2*BLOCK_SIZE; i++)
+      {
+        pix_x = block_x + i;
+        img->m7[j][i] = img->m7[j][i] + img->mpr[pix_y][block_x+i];
+        enc_picture->imgY[ipix_y][img->pix_x + pix_x]= (imgpel) img->m7[j][i];
+      }
+    }
+  }
+
+  //  Decoded block moved to frame memory
+  return nonzero;
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/transform8x8.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/transform8x8.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/transform8x8.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,32 @@
+
+/*!
+ ***************************************************************************
+ *
+ * \file transform8x8.h
+ *
+ * \brief
+*    prototypes of 8x8 transform functions
+  *
+ * \date
+ *    9. October 2003
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    Yuri Vatis  vatis at hhi.de
+ **************************************************************************/
+
+#ifndef _TRANSFORM8X8_H_
+#define _TRANSFORM8X8_H_
+
+int    Mode_Decision_for_new_Intra8x8Macroblock (double lambda, int *min_cost);
+int    Mode_Decision_for_new_8x8IntraBlocks (int b8, double lambda, int *min_cost);
+
+void   intrapred_luma8x8(int img_x,int img_y, int *left_available, int *up_available, int *all_available);
+
+double RDCost_for_8x8IntraBlocks(int *c_nz, int b8, int ipmode, double lambda, double min_rdcost, int mostProbableMode);
+
+int    dct_luma8x8(int b8,int *coeff_cost, int intra);
+
+void   LowPassForIntra8x8Pred(imgpel *PredPel, int block_up_left, int block_up, int block_left);
+
+#endif //_TRANSFORM8X8_H_


Index: llvm-test/MultiSource/Applications/JM/lencod/vlc.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/vlc.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/vlc.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,1445 @@
+
+/*!
+ ***************************************************************************
+ * \file vlc.c
+ *
+ * \brief
+ *    (CA)VLC coding functions
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *    - Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+ *    - Detlev Marpe                    <marpe at hhi.de>
+ *    - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+ ***************************************************************************
+ */
+
+#include "contributors.h"
+
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <assert.h>
+
+#include "global.h"
+
+#include "vlc.h"
+
+#if TRACE
+#define SYMTRACESTRING(s) strncpy(sym.tracestring,s,TRACESTRING_SIZE)
+#else
+#define SYMTRACESTRING(s) // do nothing
+#endif
+
+//! gives codeword number from CBP value, both for intra and inter
+static const unsigned char NCBP[2][48][2]=
+{
+  {  // 0      1        2       3       4       5       6       7       8       9      10      11
+    { 1, 0},{10, 1},{11, 2},{ 6, 5},{12, 3},{ 7, 6},{14,14},{ 2,10},{13, 4},{15,15},{ 8, 7},{ 3,11},
+    { 9, 8},{ 4,12},{ 5,13},{ 0, 9},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},
+    { 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},
+    { 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0}
+  },
+  {
+    { 3, 0},{29, 2},{30, 3},{17, 7},{31, 4},{18, 8},{37,17},{ 8,13},{32, 5},{38,18},{19, 9},{ 9,14},
+    {20,10},{10,15},{11,16},{ 2,11},{16, 1},{33,32},{34,33},{21,36},{35,34},{22,37},{39,44},{ 4,40},
+    {36,35},{40,45},{23,38},{ 5,41},{24,39},{ 6,42},{ 7,43},{ 1,19},{41, 6},{42,24},{43,25},{25,20},
+    {44,26},{26,21},{46,46},{12,28},{45,27},{47,47},{27,22},{13,29},{28,23},{14,30},{15,31},{ 0,12}
+  }
+};
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    ue_v, writes an ue(v) syntax element, returns the length in bits
+ *
+ * \param tracestring
+ *    the string for the trace file
+ * \param value
+ *    the value to be coded
+ *  \param bitstream
+ *    the target bitstream the value should be coded into
+ *
+ * \return
+ *    Number of bits used by the coded syntax element
+ *
+ * \ note
+ *    This function writes always the bit buffer for the progressive scan flag, and
+ *    should not be used (or should be modified appropriately) for the interlace crap
+ *    When used in the context of the Parameter Sets, this is obviously not a
+ *    problem.
+ *
+ *************************************************************************************
+ */
+int ue_v (char *tracestring, int value, Bitstream *bitstream)
+{
+  SyntaxElement symbol, *sym=&symbol;
+  sym->value1 = value;
+  sym->value2 = 0;
+
+  assert (bitstream->streamBuffer != NULL);
+
+  ue_linfo(sym->value1,sym->value2,&(sym->len),&(sym->inf));
+  symbol2uvlc(sym);
+
+  writeUVLC2buffer (sym, bitstream);
+
+#if TRACE
+  strncpy(sym->tracestring,tracestring,TRACESTRING_SIZE);
+  trace2out (sym);
+#endif
+
+  return (sym->len);
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    se_v, writes an se(v) syntax element, returns the length in bits
+ *
+ * \param tracestring
+ *    the string for the trace file
+ * \param value
+ *    the value to be coded
+ *  \param bitstream
+ *    the target bitstream the value should be coded into
+ *
+ * \return
+ *    Number of bits used by the coded syntax element
+ *
+ * \ note
+ *    This function writes always the bit buffer for the progressive scan flag, and
+ *    should not be used (or should be modified appropriately) for the interlace crap
+ *    When used in the context of the Parameter Sets, this is obviously not a
+ *    problem.
+ *
+ *************************************************************************************
+ */
+int se_v (char *tracestring, int value, Bitstream *bitstream)
+{
+  SyntaxElement symbol, *sym=&symbol;
+  sym->value1 = value;
+  sym->value2 = 0;
+
+  assert (bitstream->streamBuffer != NULL);
+
+  se_linfo(sym->value1,sym->value2,&(sym->len),&(sym->inf));
+  symbol2uvlc(sym);
+
+  writeUVLC2buffer (sym, bitstream);
+
+#if TRACE
+  strncpy(sym->tracestring,tracestring,TRACESTRING_SIZE);
+  trace2out (sym);
+#endif
+
+  return (sym->len);
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    u_1, writes a flag (u(1) syntax element, returns the length in bits,
+ *    always 1
+ *
+ * \param tracestring
+ *    the string for the trace file
+ * \param value
+ *    the value to be coded
+ *  \param bitstream
+ *    the target bitstream the value should be coded into
+ *
+ * \return
+ *    Number of bits used by the coded syntax element (always 1)
+ *
+ * \ note
+ *    This function writes always the bit buffer for the progressive scan flag, and
+ *    should not be used (or should be modified appropriately) for the interlace crap
+ *    When used in the context of the Parameter Sets, this is obviously not a
+ *    problem.
+ *
+ *************************************************************************************
+ */
+Boolean u_1 (char *tracestring, int value, Bitstream *bitstream)
+{
+  SyntaxElement symbol, *sym=&symbol;
+
+  sym->bitpattern = value;
+  sym->len = 1;
+  sym->value1 = value;
+
+  assert (bitstream->streamBuffer != NULL);
+
+  writeUVLC2buffer(sym, bitstream);
+
+#if TRACE
+  strncpy(sym->tracestring,tracestring,TRACESTRING_SIZE);
+  trace2out (sym);
+#endif
+
+  return ((Boolean) sym->len);
+}
+
+
+/*!
+ *************************************************************************************
+ * \brief
+ *    u_v, writes a n bit fixed length syntax element, returns the length in bits,
+ *
+ * \param n
+ *    length in bits
+ * \param tracestring
+ *    the string for the trace file
+ * \param value
+ *    the value to be coded
+ *  \param bitstream
+ *    the target bitstream the value should be coded into
+ *
+ * \return
+ *    Number of bits used by the coded syntax element
+ *
+ * \ note
+ *    This function writes always the bit buffer for the progressive scan flag, and
+ *    should not be used (or should be modified appropriately) for the interlace crap
+ *    When used in the context of the Parameter Sets, this is obviously not a
+ *    problem.
+ *
+ *************************************************************************************
+ */
+
+int u_v (int n, char *tracestring, int value, Bitstream *bitstream)
+{
+  SyntaxElement symbol, *sym=&symbol;
+
+  sym->bitpattern = value;
+  sym->len = n;
+  sym->value1 = value;
+
+  assert (bitstream->streamBuffer != NULL);
+
+  writeUVLC2buffer(sym, bitstream);
+
+#if TRACE
+  strncpy(sym->tracestring,tracestring,TRACESTRING_SIZE);
+  trace2out (sym);
+#endif
+
+  return (sym->len);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    mapping for ue(v) syntax elements
+ * \param ue
+ *    value to be mapped
+ * \param dummy
+ *    dummy parameter
+ * \param info
+ *    returns mapped value
+ * \param len
+ *    returns mapped value length
+ ************************************************************************
+ */
+void ue_linfo(int ue, int dummy, int *len,int *info)
+{
+  int i,nn;
+
+  nn=(ue+1)/2;
+
+  for (i=0; i < 16 && nn != 0; i++)
+  {
+    nn /= 2;
+  }
+  *len= 2*i + 1;
+  *info=ue+1-(1<<i);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    mapping for se(v) syntax elements
+ * \param se
+ *    value to be mapped
+ * \param dummy
+ *    dummy parameter
+ * \param len
+ *    returns mapped value length
+ * \param info
+ *    returns mapped value
+ ************************************************************************
+ */
+void se_linfo(int se, int dummy, int *len,int *info)
+{
+
+  int i,n,sign,nn;
+
+  sign=0;
+
+  if (se <= 0)
+  {
+    sign=1;
+  }
+  n=iabs(se) << 1;
+
+  //  n+1 is the number in the code table.  Based on this we find length and info
+
+  nn=n/2;
+  for (i=0; i < 16 && nn != 0; i++)
+  {
+    nn /= 2;
+  }
+  *len=i*2 + 1;
+  *info=n - (1 << i) + sign;
+}
+
+
+/*!
+ ************************************************************************
+ * \par Input:
+ *    Number in the code table
+ * \par Output:
+ *    length and info
+ ************************************************************************
+ */
+void cbp_linfo_intra(int cbp, int dummy, int *len,int *info)
+{
+  ue_linfo(NCBP[img->yuv_format?1:0][cbp][0], dummy, len, info);
+}
+
+
+/*!
+ ************************************************************************
+ * \par Input:
+ *    Number in the code table
+ * \par Output:
+ *    length and info
+ ************************************************************************
+ */
+void cbp_linfo_inter(int cbp, int dummy, int *len,int *info)
+{
+  ue_linfo(NCBP[img->yuv_format?1:0][cbp][1], dummy, len, info);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    2x2 transform of chroma DC
+ * \par Input:
+ *    level and run for coefficients
+ * \par Output:
+ *    length and info
+ * \note
+ *    see ITU document for bit assignment
+ ************************************************************************
+ */
+void levrun_linfo_c2x2(int level,int run,int *len,int *info)
+{
+  const int NTAB[2][2]=
+  {
+    {1,5},
+    {3,0}
+  };
+  const int LEVRUN[4]=
+  {
+    2,1,0,0
+  };
+
+  int levabs,i,n,sign,nn;
+
+  if (level == 0) //  check if the coefficient sign EOB (level=0)
+  {
+    *len=1;
+    return;
+  }
+  sign=0;
+  if (level <= 0)
+  {
+    sign=1;
+  }
+  levabs=iabs(level);
+  if (levabs <= LEVRUN[run])
+  {
+    n=NTAB[levabs-1][run]+1;
+  }
+  else
+  {
+    n=(levabs-LEVRUN[run])*8 + run*2;
+  }
+
+  nn=n/2;
+
+  for (i=0; i < 16 && nn != 0; i++)
+  {
+    nn /= 2;
+  }
+  *len= 2*i + 1;
+  *info=n-(1 << i)+sign;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Single scan coefficients
+ * \par Input:
+ *    level and run for coefficients
+ * \par Output:
+ *    length and info
+ * \note
+ *    see ITU document for bit assignment
+ ************************************************************************
+ */
+void levrun_linfo_inter(int level,int run,int *len,int *info)
+{
+  const byte LEVRUN[16]=
+  {
+    4,2,2,1,1,1,1,1,1,1,0,0,0,0,0,0
+  };
+  const byte NTAB[4][10]=
+  {
+    { 1, 3, 5, 9,11,13,21,23,25,27},
+    { 7,17,19, 0, 0, 0, 0, 0, 0, 0},
+    {15, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {29, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+  };
+
+  int levabs,i,n,sign,nn;
+
+  if (level == 0)           //  check for EOB
+  {
+    *len=1;
+    return;
+  }
+
+  if (level <= 0)
+    sign=1;
+  else
+    sign=0;
+
+  levabs=iabs(level);
+  if (levabs <= LEVRUN[run])
+  {
+    n=NTAB[levabs-1][run]+1;
+  }
+  else
+  {
+    n=(levabs-LEVRUN[run])*32 + run*2;
+  }
+
+  nn=n/2;
+
+  for (i=0; i < 16 && nn != 0; i++)
+  {
+    nn /= 2;
+  }
+  *len= 2*i + 1;
+  *info=n-(1 << i)+sign;
+
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Makes code word and passes it back
+ *    A code word has the following format: 0 0 0 ... 1 Xn ...X2 X1 X0.
+ *
+ * \par Input:
+ *    Info   : Xn..X2 X1 X0                                             \n
+ *    Length : Total number of bits in the codeword
+ ************************************************************************
+ */
+ // NOTE this function is called with sym->inf > (1<<(sym->len/2)).  The upper bits of inf are junk
+int symbol2uvlc(SyntaxElement *sym)
+{
+  int suffix_len=sym->len/2;
+  assert (suffix_len<32);
+  sym->bitpattern = (1<<suffix_len)|(sym->inf&((1<<suffix_len)-1));
+  return 0;
+}
+
+/*!
+************************************************************************
+* \brief
+*    generates UVLC code and passes the codeword to the buffer
+************************************************************************
+*/
+void writeSE_UVLC(SyntaxElement *se, DataPartition *dp)
+{
+  ue_linfo (se->value1,se->value2,&(se->len),&(se->inf));
+  symbol2uvlc(se);
+
+  writeUVLC2buffer(se, dp->bitstream);
+
+  if(se->type != SE_HEADER)
+    dp->bitstream->write_flag = 1;
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+}
+
+/*!
+************************************************************************
+* \brief
+*    generates UVLC code and passes the codeword to the buffer
+************************************************************************
+*/
+void writeSE_SVLC(SyntaxElement *se, DataPartition *dp)
+{
+  se_linfo (se->value1,se->value2,&(se->len),&(se->inf));
+  symbol2uvlc(se);
+
+  writeUVLC2buffer(se, dp->bitstream);
+
+  if(se->type != SE_HEADER)
+    dp->bitstream->write_flag = 1;
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+}
+
+/*!
+************************************************************************
+* \brief
+*    generates UVLC code and passes the codeword to the buffer
+************************************************************************
+*/
+void writeCBP_VLC(SyntaxElement *se, DataPartition *dp)
+{
+  Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+  if (IS_OLDINTRA (currMB) || currMB->mb_type == SI4MB ||  currMB->mb_type == I8MB)
+  {
+    cbp_linfo_intra (se->value1,se->value2,&(se->len),&(se->inf));
+  }
+  else
+  {
+    cbp_linfo_inter (se->value1,se->value2,&(se->len),&(se->inf));
+  }
+  symbol2uvlc(se);
+
+  writeUVLC2buffer(se, dp->bitstream);
+
+  if(se->type != SE_HEADER)
+    dp->bitstream->write_flag = 1;
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    generates code and passes the codeword to the buffer
+ ************************************************************************
+ */
+void writeIntraPredMode_CAVLC(SyntaxElement *se, DataPartition *dp)
+{
+
+  if (se->value1 == -1)
+  {
+    se->len = 1;
+    se->inf = 1;
+  }
+  else
+  {
+    se->len = 4;
+    se->inf = se->value1;
+  }
+
+  se->bitpattern = se->inf;
+  writeUVLC2buffer(se, dp->bitstream);
+
+  if(se->type != SE_HEADER)
+    dp->bitstream->write_flag = 1;
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+
+  return;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    generates UVLC code and passes the codeword to the buffer
+ * \author
+ *  Tian Dong
+ ************************************************************************
+ */
+int writeSyntaxElement2Buf_UVLC(SyntaxElement *se, Bitstream* this_streamBuffer )
+{
+
+  se->mapping(se->value1,se->value2,&(se->len),&(se->inf));
+
+  symbol2uvlc(se);
+
+  writeUVLC2buffer(se, this_streamBuffer );
+
+#if TRACE
+  if(se->type <= 1)
+    trace2out (se);
+#endif
+
+  return (se->len);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    writes UVLC code to the appropriate buffer
+ ************************************************************************
+ */
+void  writeUVLC2buffer(SyntaxElement *se, Bitstream *currStream)
+{
+
+  int i;
+  unsigned int mask = 1 << (se->len-1);
+  assert ((se->len-1)<32);
+
+  // Add the new bits to the bitstream.
+  // Write out a byte if it is full
+  for (i=0; i<se->len; i++)
+  {
+    currStream->byte_buf <<= 1;
+    if (se->bitpattern & mask)
+      currStream->byte_buf |= 1;
+    currStream->bits_to_go--;
+    mask >>= 1;
+    if (currStream->bits_to_go==0)
+    {
+      currStream->bits_to_go = 8;
+      currStream->streamBuffer[currStream->byte_pos++]=currStream->byte_buf;
+      currStream->byte_buf = 0;
+    }
+  }
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    generates UVLC code and passes the codeword to the buffer
+ * \author
+ *  Tian Dong
+ ************************************************************************
+ */
+int writeSyntaxElement2Buf_Fixed(SyntaxElement *se, Bitstream* this_streamBuffer )
+{
+  writeUVLC2buffer(se, this_streamBuffer );
+
+#if TRACE
+  if(se->type <= 1)
+    trace2out (se);
+#endif
+  return (se->len);
+}
+
+/*!
+************************************************************************
+* \brief
+*    generates UVLC code and passes the codeword to the buffer
+* \author
+*  Tian Dong
+************************************************************************
+*/
+void writeSE_Flag(SyntaxElement *se, DataPartition *dp )
+{
+  se->len        = 1;
+  se->bitpattern = (se->value1 & 1);
+
+  writeUVLC2buffer(se, dp->bitstream );
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+}
+
+/*!
+************************************************************************
+* \brief
+*    generates UVLC code and passes the codeword to the buffer
+* \author
+*  Tian Dong
+************************************************************************
+*/
+void writeSE_invFlag(SyntaxElement *se, DataPartition *dp )
+{
+  se->len        = 1;
+  se->bitpattern = 1-(se->value1 & 1);
+
+  writeUVLC2buffer(se, dp->bitstream );
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+}
+
+/*!
+************************************************************************
+* \brief
+*    generates UVLC code and passes the codeword to the buffer
+* \author
+*  Tian Dong
+************************************************************************
+*/
+void writeSE_Dummy(SyntaxElement *se, DataPartition *dp )
+{
+  se->len = 0;
+}
+
+
+/*!
+************************************************************************
+* \brief
+*    generates UVLC code and passes the codeword to the buffer
+* \author
+*  Tian Dong
+************************************************************************
+*/
+void writeSE_Fix(SyntaxElement *se, DataPartition *dp )
+{
+  writeUVLC2buffer(se, dp->bitstream );
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Makes code word and passes it back
+ *
+ * \par Input:
+ *    Info   : Xn..X2 X1 X0                                             \n
+ *    Length : Total number of bits in the codeword
+ ************************************************************************
+ */
+
+int symbol2vlc(SyntaxElement *sym)
+{
+  int info_len = sym->len;
+
+  // Convert info into a bitpattern int
+  sym->bitpattern = 0;
+
+  // vlc coding
+  while(--info_len >= 0)
+  {
+    sym->bitpattern <<= 1;
+    sym->bitpattern |= (0x01 & (sym->inf >> info_len));
+  }
+  return 0;
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    generates VLC code and passes the codeword to the buffer
+ ************************************************************************
+ */
+int writeSyntaxElement_VLC(SyntaxElement *se, DataPartition *dp)
+{
+
+  se->inf = se->value1;
+  se->len = se->value2;
+  symbol2vlc(se);
+
+  writeUVLC2buffer(se, dp->bitstream);
+
+  if(se->type != SE_HEADER)
+    dp->bitstream->write_flag = 1;
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+
+  return (se->len);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    write VLC for NumCoeff and TrailingOnes
+ ************************************************************************
+ */
+
+int writeSyntaxElement_NumCoeffTrailingOnes(SyntaxElement *se, DataPartition *dp)
+{
+  static const int lentab[3][4][17] =
+  {
+    {   // 0702
+      { 1, 6, 8, 9,10,11,13,13,13,14,14,15,15,16,16,16,16},
+      { 0, 2, 6, 8, 9,10,11,13,13,14,14,15,15,15,16,16,16},
+      { 0, 0, 3, 7, 8, 9,10,11,13,13,14,14,15,15,16,16,16},
+      { 0, 0, 0, 5, 6, 7, 8, 9,10,11,13,14,14,15,15,16,16},
+    },
+    {
+      { 2, 6, 6, 7, 8, 8, 9,11,11,12,12,12,13,13,13,14,14},
+      { 0, 2, 5, 6, 6, 7, 8, 9,11,11,12,12,13,13,14,14,14},
+      { 0, 0, 3, 6, 6, 7, 8, 9,11,11,12,12,13,13,13,14,14},
+      { 0, 0, 0, 4, 4, 5, 6, 6, 7, 9,11,11,12,13,13,13,14},
+    },
+    {
+      { 4, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9,10,10,10,10},
+      { 0, 4, 5, 5, 5, 5, 6, 6, 7, 8, 8, 9, 9, 9,10,10,10},
+      { 0, 0, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,10,10,10},
+      { 0, 0, 0, 4, 4, 4, 4, 4, 5, 6, 7, 8, 8, 9,10,10,10},
+    },
+
+  };
+
+  static const int codtab[3][4][17] =
+  {
+    {
+      { 1, 5, 7, 7, 7, 7,15,11, 8,15,11,15,11,15,11, 7,4},
+      { 0, 1, 4, 6, 6, 6, 6,14,10,14,10,14,10, 1,14,10,6},
+      { 0, 0, 1, 5, 5, 5, 5, 5,13, 9,13, 9,13, 9,13, 9,5},
+      { 0, 0, 0, 3, 3, 4, 4, 4, 4, 4,12,12, 8,12, 8,12,8},
+    },
+    {
+      { 3,11, 7, 7, 7, 4, 7,15,11,15,11, 8,15,11, 7, 9,7},
+      { 0, 2, 7,10, 6, 6, 6, 6,14,10,14,10,14,10,11, 8,6},
+      { 0, 0, 3, 9, 5, 5, 5, 5,13, 9,13, 9,13, 9, 6,10,5},
+      { 0, 0, 0, 5, 4, 6, 8, 4, 4, 4,12, 8,12,12, 8, 1,4},
+    },
+    {
+      {15,15,11, 8,15,11, 9, 8,15,11,15,11, 8,13, 9, 5,1},
+      { 0,14,15,12,10, 8,14,10,14,14,10,14,10, 7,12, 8,4},
+      { 0, 0,13,14,11, 9,13, 9,13,10,13, 9,13, 9,11, 7,3},
+      { 0, 0, 0,12,11,10, 9, 8,13,12,12,12, 8,12,10, 6,2},
+    },
+  };
+  int vlcnum;
+
+  vlcnum = se->len;
+
+  // se->value1 : numcoeff
+  // se->value2 : numtrailingones
+
+  if (vlcnum == 3)
+  {
+    se->len = 6;  // 4 + 2 bit FLC
+    if (se->value1 > 0)
+    {
+      se->inf = ((se->value1-1) << 2) | se->value2;
+    }
+    else
+    {
+      se->inf = 3;
+    }
+  }
+  else
+  {
+    se->len = lentab[vlcnum][se->value2][se->value1];
+    se->inf = codtab[vlcnum][se->value2][se->value1];
+  }
+  //se->inf = 0;
+
+  if (se->len == 0)
+  {
+    printf("ERROR: (numcoeff,trailingones) not valid: vlc=%d (%d, %d)\n",
+      vlcnum, se->value1, se->value2);
+    exit(-1);
+  }
+
+  symbol2vlc(se);
+
+  writeUVLC2buffer(se, dp->bitstream);
+
+  if(se->type != SE_HEADER)
+    dp->bitstream->write_flag = 1;
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+
+  return (se->len);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    write VLC for NumCoeff and TrailingOnes for Chroma DC
+ ************************************************************************
+ */
+int writeSyntaxElement_NumCoeffTrailingOnesChromaDC(SyntaxElement *se, DataPartition *dp)
+{
+  static const int lentab[3][4][17] =
+  {
+    //YUV420
+   {{ 2, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { 0, 1, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { 0, 0, 3, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { 0, 0, 0, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
+    //YUV422
+   {{ 1, 7, 7, 9, 9,10,11,12,13, 0, 0, 0, 0, 0, 0, 0, 0},
+    { 0, 2, 7, 7, 9,10,11,12,12, 0, 0, 0, 0, 0, 0, 0, 0},
+    { 0, 0, 3, 7, 7, 9,10,11,12, 0, 0, 0, 0, 0, 0, 0, 0},
+    { 0, 0, 0, 5, 6, 7, 7,10,11, 0, 0, 0, 0, 0, 0, 0, 0}},
+    //YUV444
+   {{ 1, 6, 8, 9,10,11,13,13,13,14,14,15,15,16,16,16,16},
+    { 0, 2, 6, 8, 9,10,11,13,13,14,14,15,15,15,16,16,16},
+    { 0, 0, 3, 7, 8, 9,10,11,13,13,14,14,15,15,16,16,16},
+    { 0, 0, 0, 5, 6, 7, 8, 9,10,11,13,14,14,15,15,16,16}}
+  };
+
+  static const int codtab[3][4][17] =
+  {
+    //YUV420
+   {{ 1, 7, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { 0, 1, 6, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
+    //YUV422
+   {{ 1,15,14, 7, 6, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0},
+    { 0, 1,13,12, 5, 6, 6, 6, 5, 0, 0, 0, 0, 0, 0, 0, 0},
+    { 0, 0, 1,11,10, 4, 5, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0},
+    { 0, 0, 0, 1, 1, 9, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0}},
+    //YUV444
+   {{ 1, 5, 7, 7, 7, 7,15,11, 8,15,11,15,11,15,11, 7, 4},
+    { 0, 1, 4, 6, 6, 6, 6,14,10,14,10,14,10, 1,14,10, 6},
+    { 0, 0, 1, 5, 5, 5, 5, 5,13, 9,13, 9,13, 9,13, 9, 5},
+    { 0, 0, 0, 3, 3, 4, 4, 4, 4, 4,12,12, 8,12, 8,12, 8}}
+
+  };
+  int yuv = img->yuv_format - 1;
+
+  // se->value1 : numcoeff
+  // se->value2 : numtrailingones
+  se->len = lentab[yuv][se->value2][se->value1];
+  se->inf = codtab[yuv][se->value2][se->value1];
+
+  if (se->len == 0)
+  {
+    printf("ERROR: (numcoeff,trailingones) not valid: (%d, %d)\n",
+      se->value1, se->value2);
+    exit(-1);
+  }
+
+  symbol2vlc(se);
+
+  writeUVLC2buffer(se, dp->bitstream);
+
+  if(se->type != SE_HEADER)
+    dp->bitstream->write_flag = 1;
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+
+  return (se->len);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    write VLC for TotalZeros
+ ************************************************************************
+ */
+int writeSyntaxElement_TotalZeros(SyntaxElement *se, DataPartition *dp)
+{
+  static const int lentab[TOTRUN_NUM][16] =
+  {
+    { 1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
+    { 3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
+    { 4,3,3,3,4,4,3,3,4,5,5,6,5,6},
+    { 5,3,4,4,3,3,3,4,3,4,5,5,5},
+    { 4,4,4,3,3,3,3,3,4,5,4,5},
+    { 6,5,3,3,3,3,3,3,4,3,6},
+    { 6,5,3,3,3,2,3,4,3,6},
+    { 6,4,5,3,2,2,3,3,6},
+    { 6,6,4,2,2,3,2,5},
+    { 5,5,3,2,2,2,4},
+    { 4,4,3,3,1,3},
+    { 4,4,2,1,3},
+    { 3,3,1,2},
+    { 2,2,1},
+    { 1,1},
+  };
+
+  static const int codtab[TOTRUN_NUM][16] =
+  {
+    {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
+    {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
+    {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
+    {3,7,5,4,6,5,4,3,3,2,2,1,0},
+    {5,4,3,7,6,5,4,3,2,1,1,0},
+    {1,1,7,6,5,4,3,2,1,1,0},
+    {1,1,5,4,3,3,2,1,1,0},
+    {1,1,1,3,3,2,2,1,0},
+    {1,0,1,3,2,1,1,1,},
+    {1,0,1,3,2,1,1,},
+    {0,1,1,2,1,3},
+    {0,1,1,1,1},
+    {0,1,1,1},
+    {0,1,1},
+    {0,1},
+  };
+  int vlcnum;
+
+  vlcnum = se->len;
+
+  // se->value1 : TotalZeros
+  se->len = lentab[vlcnum][se->value1];
+  se->inf = codtab[vlcnum][se->value1];
+
+  if (se->len == 0)
+  {
+    printf("ERROR: (TotalZeros) not valid: (%d)\n",se->value1);
+    exit(-1);
+  }
+
+  symbol2vlc(se);
+
+  writeUVLC2buffer(se, dp->bitstream);
+
+  if(se->type != SE_HEADER)
+    dp->bitstream->write_flag = 1;
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+
+  return (se->len);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    write VLC for TotalZeros for Chroma DC
+ ************************************************************************
+ */
+int writeSyntaxElement_TotalZerosChromaDC(SyntaxElement *se, DataPartition *dp)
+{
+  static const int lentab[3][TOTRUN_NUM][16] =
+  {
+    //YUV420
+   {{ 1,2,3,3},
+    { 1,2,2},
+    { 1,1}},
+    //YUV422
+   {{ 1,3,3,4,4,4,5,5},
+    { 3,2,3,3,3,3,3},
+    { 3,3,2,2,3,3},
+    { 3,2,2,2,3},
+    { 2,2,2,2},
+    { 2,2,1},
+    { 1,1}},
+    //YUV444
+   {{ 1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
+    { 3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
+    { 4,3,3,3,4,4,3,3,4,5,5,6,5,6},
+    { 5,3,4,4,3,3,3,4,3,4,5,5,5},
+    { 4,4,4,3,3,3,3,3,4,5,4,5},
+    { 6,5,3,3,3,3,3,3,4,3,6},
+    { 6,5,3,3,3,2,3,4,3,6},
+    { 6,4,5,3,2,2,3,3,6},
+    { 6,6,4,2,2,3,2,5},
+    { 5,5,3,2,2,2,4},
+    { 4,4,3,3,1,3},
+    { 4,4,2,1,3},
+    { 3,3,1,2},
+    { 2,2,1},
+    { 1,1}}
+  };
+
+  static const int codtab[3][TOTRUN_NUM][16] =
+  {
+    //YUV420
+   {{ 1,1,1,0},
+    { 1,1,0},
+    { 1,0}},
+    //YUV422
+   {{ 1,2,3,2,3,1,1,0},
+    { 0,1,1,4,5,6,7},
+    { 0,1,1,2,6,7},
+    { 6,0,1,2,7},
+    { 0,1,2,3},
+    { 0,1,1},
+    { 0,1}},
+    //YUV444
+   {{1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
+    {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
+    {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
+    {3,7,5,4,6,5,4,3,3,2,2,1,0},
+    {5,4,3,7,6,5,4,3,2,1,1,0},
+    {1,1,7,6,5,4,3,2,1,1,0},
+    {1,1,5,4,3,3,2,1,1,0},
+    {1,1,1,3,3,2,2,1,0},
+    {1,0,1,3,2,1,1,1,},
+    {1,0,1,3,2,1,1,},
+    {0,1,1,2,1,3},
+    {0,1,1,1,1},
+    {0,1,1,1},
+    {0,1,1},
+    {0,1}}
+  };
+  int vlcnum;
+  int yuv = img->yuv_format - 1;
+
+  vlcnum = se->len;
+
+  // se->value1 : TotalZeros
+  se->len = lentab[yuv][vlcnum][se->value1];
+  se->inf = codtab[yuv][vlcnum][se->value1];
+
+  if (se->len == 0)
+  {
+    printf("ERROR: (TotalZeros) not valid: (%d)\n",se->value1);
+    exit(-1);
+  }
+
+  symbol2vlc(se);
+
+  writeUVLC2buffer(se, dp->bitstream);
+
+  if(se->type != SE_HEADER)
+    dp->bitstream->write_flag = 1;
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+
+  return (se->len);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    write VLC for Run Before Next Coefficient, VLC0
+ ************************************************************************
+ */
+int writeSyntaxElement_Run(SyntaxElement *se, DataPartition *dp)
+{
+  static const int lentab[TOTRUN_NUM][16] =
+  {
+    {1,1},
+    {1,2,2},
+    {2,2,2,2},
+    {2,2,2,3,3},
+    {2,2,3,3,3,3},
+    {2,3,3,3,3,3,3},
+    {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
+  };
+
+  static const int codtab[TOTRUN_NUM][16] =
+  {
+    {1,0},
+    {1,1,0},
+    {3,2,1,0},
+    {3,2,1,1,0},
+    {3,2,3,2,1,0},
+    {3,0,1,3,2,5,4},
+    {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
+  };
+  int vlcnum;
+
+  vlcnum = se->len;
+
+  // se->value1 : run
+  se->len = lentab[vlcnum][se->value1];
+  se->inf = codtab[vlcnum][se->value1];
+
+  if (se->len == 0)
+  {
+    printf("ERROR: (run) not valid: (%d)\n",se->value1);
+    exit(-1);
+  }
+
+  symbol2vlc(se);
+
+  writeUVLC2buffer(se, dp->bitstream);
+
+  if(se->type != SE_HEADER)
+    dp->bitstream->write_flag = 1;
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+
+  return (se->len);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    write VLC for Coeff Level (VLC1)
+ ************************************************************************
+ */
+int writeSyntaxElement_Level_VLC1(SyntaxElement *se, DataPartition *dp, int profile_idc)
+{
+  int level = se->value1;
+  int levabs = iabs(level);
+  int sign = (level < 0 ? 1 : 0);
+
+  if (levabs < 8)
+  {
+    se->len = levabs * 2 + sign - 1;
+    se->inf = 1;
+  }
+  else if (levabs < 16) //8+8)
+  {
+    // escape code1
+    //se->len = 14 + 1 + 4;
+    se->len = 19;
+    se->inf = (1 << 4) | ((levabs - 8) << 1) | sign;
+  }
+  else
+  {
+    int iLength = 28, numPrefix = 15;
+    int iCodeword, addbit, offset;
+    int levabsm16 = levabs-16;
+
+    // escape code2
+    if ((levabsm16) > (1<<11))
+    {
+      numPrefix++;
+      while ((levabsm16) > (1<<(numPrefix-3))-4096)
+      {
+        numPrefix++;
+      }
+    }
+
+    addbit  = numPrefix - 15;
+    iLength += (addbit<<1);
+    offset = (2048<<addbit)-2048;
+
+    iCodeword = (1<<(12+addbit))|((levabsm16)<<1)|sign;
+
+    /* Assert to make sure that the code fits in the VLC */
+    /* make sure that we are in High Profile to represent level_prefix > 15 */
+    if (numPrefix > 15 && profile_idc < 100)
+    {
+      //error( "level_prefix must be <= 15 except in High Profile\n",  1000 );
+      se->len = 0x0000FFFF; // This can be some other big number
+      se->inf = iCodeword;
+      return (se->len);
+    }
+    se->len = iLength;
+    se->inf = iCodeword;
+  }
+
+
+  symbol2vlc(se);
+
+  writeUVLC2buffer(se, dp->bitstream);
+
+  if(se->type != SE_HEADER)
+    dp->bitstream->write_flag = 1;
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+
+  return (se->len);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    write VLC for Coeff Level
+ ************************************************************************
+ */
+int writeSyntaxElement_Level_VLCN(SyntaxElement *se, int vlc, DataPartition *dp, int profile_idc)
+{
+  int addbit, offset;
+  int iCodeword;
+  int iLength;
+
+  int level = se->value1;
+
+  int levabs = iabs(level);
+  int sign = (level < 0 ? 1 : 0);
+
+  int shift = vlc-1;
+  int escape = (15<<shift)+1;
+
+  int numPrefix = (levabs-1)>>shift;
+
+  int sufmask = ~((0xffffffff)<<shift);
+  int suffix = (levabs-1)&sufmask;
+
+  if (levabs < escape)
+  {
+    iLength = numPrefix + vlc + 1;
+    iCodeword = (1<<(shift+1))|(suffix<<1)|sign;
+  }
+  else
+  {
+    int levabsesc = levabs-escape;
+
+    iLength = 28;
+    numPrefix = 15;
+
+    if ((levabsesc) > (1<<11))
+    {
+      numPrefix++;
+      while ((levabsesc) > (1<<(numPrefix-3))-4096)
+      {
+        numPrefix++;
+      }
+    }
+
+    addbit  = numPrefix - 15;
+    iLength += (addbit<<1);
+    offset = (2048<<addbit)-2048;
+
+    iCodeword = (1<<(12+addbit))|((levabsesc-offset)<<1)|sign;
+    /* Assert to make sure that the code fits in the VLC */
+    /* make sure that we are in High Profile to represent level_prefix > 15 */
+    if (numPrefix > 15 &&  profile_idc < 100)
+    {
+      //error( "level_prefix must be <= 15 except in High Profile\n",  1000 );
+      se->len = 0x0000FFFF; // This can be some other big number
+      se->inf = iCodeword;
+      return (se->len);
+    }
+  }
+  se->len = iLength;
+  se->inf = iCodeword;
+
+  symbol2vlc(se);
+
+  writeUVLC2buffer(se, dp->bitstream);
+
+  if(se->type != SE_HEADER)
+    dp->bitstream->write_flag = 1;
+
+#if TRACE
+  if(dp->bitstream->trace_enabled)
+    trace2out (se);
+#endif
+
+  return (se->len);
+}
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    Write out a trace string on the trace file
+ ************************************************************************
+ */
+#if TRACE
+int bitcounter = 0;
+
+void trace2out(SyntaxElement *sym)
+{
+  static
+  int i, chars;
+
+  if (p_trace != NULL)
+  {
+    putc('@', p_trace);
+    chars = fprintf(p_trace, "%i", bitcounter);
+    while(chars++ < 6)
+      putc(' ',p_trace);
+
+    chars += fprintf(p_trace, "%s", sym->tracestring);
+    while(chars++ < 55)
+      putc(' ',p_trace);
+
+    // align bit pattern
+    if(sym->len<15)
+    {
+      for(i=0 ; i<15-sym->len ; i++)
+        fputc(' ', p_trace);
+    }
+
+    // print bit pattern
+    bitcounter += sym->len;
+    for(i=1 ; i<=sym->len ; i++)
+    {
+      if((sym->bitpattern >> (sym->len-i)) & 0x1)
+        fputc('1', p_trace);
+      else
+        fputc('0', p_trace);
+    }
+    fprintf(p_trace, " (%3d) \n",sym->value1);
+  }
+  fflush (p_trace);
+}
+
+void trace2out_cabac(SyntaxElement *sym)
+{
+  int chars;
+
+  if (p_trace != NULL)
+  {
+    putc('@', p_trace);
+    chars = fprintf(p_trace, "%i", bitcounter);
+    while(chars++ < 6)
+      putc(' ',p_trace);
+
+    chars += fprintf(p_trace, "%s", sym->tracestring);
+    while(chars++ < 70)
+      putc(' ',p_trace);
+
+    fprintf(p_trace, " (%3d) \n",sym->value1);
+  }
+  fflush (p_trace);
+  bitcounter += sym->len;
+}
+#endif
+
+
+/*!
+ ************************************************************************
+ * \brief
+ *    puts the less than 8 bits in the byte buffer of the Bitstream into
+ *    the streamBuffer.
+ *
+ * \param
+ *   currStream: the Bitstream the alignment should be established
+ *
+ ************************************************************************
+ */
+void writeVlcByteAlign(Bitstream* currStream)
+{
+  if (currStream->bits_to_go < 8)
+  { // trailing bits to process
+    currStream->byte_buf = (currStream->byte_buf <<currStream->bits_to_go) | (0xff >> (8 - currStream->bits_to_go));
+    stats->bit_use_stuffingBits[img->type]+=currStream->bits_to_go;
+    currStream->streamBuffer[currStream->byte_pos++]=currStream->byte_buf;
+    currStream->bits_to_go = 8;
+  }
+}
+


Index: llvm-test/MultiSource/Applications/JM/lencod/vlc.h
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/vlc.h:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/vlc.h	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,57 @@
+
+/*!
+ *************************************************************************************
+ * \file vlc.h
+ *
+ * \brief
+ *    Prototypes for VLC coding funtions
+ * \author
+ *     Karsten Suehring
+ *************************************************************************************
+ */
+
+#ifndef _VLC_H_
+#define _VLC_H_
+
+Boolean u_1  (char *tracestring, int value, Bitstream *bitstream);
+int se_v (char *tracestring, int value, Bitstream *bitstream);
+int ue_v (char *tracestring, int value, Bitstream *bitstream);
+int u_v  (int n, char *tracestring, int value, Bitstream *bitstream);
+
+
+void levrun_linfo_c2x2(int level,int run,int *len,int *info);
+void levrun_linfo_inter(int level,int run,int *len,int *info);
+
+void writeSE_UVLC   (SyntaxElement *se, DataPartition *dp);
+void writeSE_SVLC   (SyntaxElement *se, DataPartition *dp);
+void writeSE_Fix    (SyntaxElement *se, DataPartition *dp);
+void writeSE_Flag   (SyntaxElement *se, DataPartition *dp);
+void writeSE_invFlag(SyntaxElement *se, DataPartition *dp);
+void writeSE_Dummy  (SyntaxElement *se, DataPartition *dp);
+
+void writeCBP_VLC   (SyntaxElement *se, DataPartition *dp);
+void writeIntraPredMode_CAVLC(SyntaxElement *se, DataPartition *dp);
+
+int   writeSyntaxElement2Buf_UVLC(SyntaxElement *se, Bitstream* this_streamBuffer );
+void  writeUVLC2buffer(SyntaxElement *se, Bitstream *currStream);
+int   writeSyntaxElement2Buf_Fixed(SyntaxElement *se, Bitstream* this_streamBuffer );
+int   symbol2uvlc(SyntaxElement *se);
+void  ue_linfo(int n, int dummy, int *len,int *info);
+void  se_linfo(int mvd, int dummy, int *len,int *info);
+void  cbp_linfo_intra(int cbp, int dummy, int *len,int *info);
+void  cbp_linfo_inter(int cbp, int dummy, int *len,int *info);
+
+// CAVLC
+void  CAVLC_init(void);
+
+int   writeSyntaxElement_VLC(SyntaxElement *se, DataPartition *this_dataPart);
+int   writeSyntaxElement_TotalZeros(SyntaxElement *se, DataPartition *this_dataPart);
+int   writeSyntaxElement_TotalZerosChromaDC(SyntaxElement *se, DataPartition *this_dataPart);
+int   writeSyntaxElement_Run(SyntaxElement *se, DataPartition *this_dataPart);
+int   writeSyntaxElement_NumCoeffTrailingOnes(SyntaxElement *se, DataPartition *this_dataPart);
+int   writeSyntaxElement_NumCoeffTrailingOnesChromaDC(SyntaxElement *se, DataPartition *this_dataPart);
+int   writeSyntaxElement_Level_VLC1(SyntaxElement *se, DataPartition *this_dataPart, int profile_idc);
+int   writeSyntaxElement_Level_VLCN(SyntaxElement *se, int vlc, DataPartition *this_dataPart, int profile_idc);
+
+#endif
+


Index: llvm-test/MultiSource/Applications/JM/lencod/weighted_prediction.c
diff -u /dev/null llvm-test/MultiSource/Applications/JM/lencod/weighted_prediction.c:1.3
--- /dev/null	Sun Feb  4 08:38:55 2007
+++ llvm-test/MultiSource/Applications/JM/lencod/weighted_prediction.c	Sun Feb  4 08:38:32 2007
@@ -0,0 +1,737 @@
+
+/*!
+*************************************************************************************
+* \file weighted_prediction.c
+*
+* \brief
+*    Estimate weights for WP
+*
+* \author
+*    Main contributors (see contributors.h for copyright, address and affiliation details)
+*     - Alexis Michael Tourapis         <alexismt at ieee.org>
+*     - Athanasios Leontaris            <aleon at dolby.com>
+*************************************************************************************
+*/
+#include <stdlib.h>
+#include "contributors.h"
+
+#include "global.h"
+#include <memory.h>
+#include "image.h"
+
+
+static SubImageContainer ref_pic_sub;
+static SubImageContainer ref_qpic_sub;
+
+
+/*!
+************************************************************************
+* \brief
+*    Estimates reference picture weighting factors
+************************************************************************
+*/
+
+void estimate_weighting_factor_P_slice(int select_offset)
+{
+  int i, j, n;
+
+  double dc_org = 0.0;
+  int index;
+  int comp;
+  double dc_ref[MAX_REFERENCE_PICTURES];
+
+  int default_weight[3];
+
+  int list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))? (img->current_mb_nr & 0x01) ? 4 : 2 : 0;
+  int weight[2][MAX_REFERENCE_PICTURES][3];
+  int offset[2][MAX_REFERENCE_PICTURES][3];
+  int clist;
+
+  imgpel **tmpPtr;
+
+  luma_log_weight_denom = 5;
+  chroma_log_weight_denom = 5;
+  wp_luma_round = 1 << (luma_log_weight_denom - 1);
+  wp_chroma_round = 1 << (chroma_log_weight_denom - 1);
+  default_weight[0] = 1<<luma_log_weight_denom;
+  default_weight[1] = default_weight[2] = 1<<chroma_log_weight_denom;
+
+  /* set all values to defaults */
+  for (i = 0; i < 2 + list_offset; i++)
+  {
+    for (j = 0; j < listXsize[i]; j++)
+    {
+      for (n = 0; n < 3; n++)
+      {
+        weight[i][j][n] = default_weight[n];
+        wp_weight[i][j][n] = default_weight[n];
+        wp_offset[i][j][n] = 0;
+        offset[i][j][n] = 0;
+      }
+    }
+  }
+
+  for (i = 0; i < img->height; i++)
+  {
+    for (j = 0; j < img->width; j++)
+    {
+      dc_org += (double) imgY_org[i][j];
+    }
+  }
+
+  for (clist=0; clist<2 + list_offset; clist++)
+  {
+    for (n = 0; n < listXsize[clist]; n++)
+    {
+      dc_ref[n] = 0.0;
+
+      ref_pic_sub.luma       = listX[clist][n]->imgY_sub;
+      tmpPtr = ref_pic_sub.luma[0][0];
+
+      // Y
+      for (j = IMG_PAD_SIZE; j < img->height + IMG_PAD_SIZE; j++)
+      {
+        for ( i = IMG_PAD_SIZE; i < img->width + IMG_PAD_SIZE; i++ )
+        {
+          dc_ref[n] += (double) tmpPtr[j][i];
+        }
+      }
+
+      if (select_offset==0.0)
+      {
+        if (dc_ref[n] != 0)
+          weight[clist][n][0] = (int) (default_weight[0] * dc_org / dc_ref[n] + 0.5);
+        else
+          weight[clist][n][0] = default_weight[0];  // only used when reference picture is black
+        if (weight[clist][n][0] < -64 || weight[clist][n][0] >127)
+          weight[clist][n][0] = default_weight[0];
+      }
+      else
+      {
+        offset[clist][n][0] = (int) ((dc_org-dc_ref[n])/(img->size)+0.5);
+        offset[clist][n][0] = (offset[clist][n][0]<-128) ? -128: (offset[clist][n][0]>127) ? 127:offset[clist][n][0];
+        weight[clist][n][0] = default_weight[0];
+      }
+
+
+      /* for now always use default weight for chroma weight */
+      weight[clist][n][1] = default_weight[1];
+      weight[clist][n][2] = default_weight[2];
+    }
+  }
+
+  for (clist=0; clist<2 + list_offset; clist++)
+  {
+    for (index = 0; index < listXsize[clist]; index++)
+    {
+      for (comp=0; comp < 3; comp ++)
+      {
+        wp_weight[clist][index][comp] = weight[clist][index][comp];
+        wp_offset[clist][index][comp] = offset[clist][index][comp];
+        // printf("index %d component %d weight %d offset %d\n",index,comp,weight[0][index][comp],offset[0][index][comp]);
+      }
+    }
+  }
+
+}
+
+/*!
+************************************************************************
+* \brief
+*    Estimates reference picture weighting factors
+************************************************************************
+*/
+void estimate_weighting_factor_B_slice()
+{
+  int i, j, n;
+
+  int tx,DistScaleFactor;
+  double dc_org = 0.0;
+  int index;
+  int comp;
+  double dc_ref[6][MAX_REFERENCE_PICTURES];
+
+  int log_weight_denom;
+
+  int default_weight[3];
+  int list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))? (img->current_mb_nr & 0x01) ? 4 : 2 : 0;
+  int weight[6][MAX_REFERENCE_PICTURES][3];
+  int offset[6][MAX_REFERENCE_PICTURES][3];
+  int im_weight[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES][3];
+  int im_offset[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES][3];
+  int clist;
+  int wf_weight, wf_offset;
+  imgpel **tmpPtr;
+
+  if (active_pps->weighted_bipred_idc == 2) //! implicit mode
+  {
+    luma_log_weight_denom = 5;
+    chroma_log_weight_denom = 5;
+  }
+  else
+  {
+    //luma_log_weight_denom = 6;
+    //chroma_log_weight_denom = 6;
+    luma_log_weight_denom = 5;
+    chroma_log_weight_denom = 5;
+  }
+
+  wp_luma_round = 1 << (luma_log_weight_denom - 1);
+  wp_chroma_round = 1 << (chroma_log_weight_denom - 1);
+  default_weight[0] = 1<<luma_log_weight_denom;
+  default_weight[1] = 1<<chroma_log_weight_denom;
+  default_weight[2] = 1<<chroma_log_weight_denom;
+
+  /* set all values to defaults */
+  for (i = 0; i < 2 + list_offset; i++)
+  {
+    for (j = 0; j < listXsize[i]; j++)
+    {
+      for (n = 0; n < 3; n++)
+      {
+        wp_weight[i][j][n] = default_weight[n];
+        wp_offset[i][j][n] = 0;
+        offset   [i][j][n] = 0;
+        weight   [i][j][n] = default_weight[n];
+      }
+    }
+  }
+
+  for (i = 0; i < listXsize[LIST_0]; i++)
+  {
+    for (j = 0; j < listXsize[LIST_1]; j++)
+    {
+      int td, tb;
+      td = iClip3(-128,127,(listX[LIST_1][j]->poc - listX[LIST_0][i]->poc));
+      tb = iClip3(-128,127,(enc_picture->poc - listX[LIST_0][i]->poc));
+      for (comp = 0; comp < 3; comp++)
+      {
+        // implicit weights
+        if (td == 0)
+        {
+          im_weight[1][i][j][comp] = default_weight[comp];
+          im_weight[0][i][j][comp] = default_weight[comp];
+          im_offset[1][i][j][comp] = 0;
+          im_offset[0][i][j][comp] = 0;
+        }
+        else
+        {
+          tx = (16384 + iabs(td/2))/td;
+          DistScaleFactor = iClip3(-1024, 1023, (tx*tb + 32 )>>6);
+          im_weight[1][i][j][comp] = DistScaleFactor>>2;
+          if (im_weight[1][i][j][comp] < -64 || im_weight[1][i][j][comp] >128)
+            im_weight[1][i][j][comp] = default_weight[comp];
+          im_weight[0][i][j][comp] = 64 - im_weight[1][i][j][comp];
+          im_offset[1][i][j][comp] = 0;
+          im_offset[0][i][j][comp] = 0;
+        }
+      }
+      /*
+      printf ("%d imp weight[%d][%d] = %d  , %d (%d %d %d) (%d %d) (%d %d)\n",enc_picture->poc, i, j,  im_weight[0][i][j][0], im_weight[1][i][j][0],
+        enc_picture->poc,listX[LIST_0][i]->poc, listX[LIST_1][j]->poc,
+        DistScaleFactor ,tx,td,tb);
+      */
+    }
+  }
+
+
+  if (active_pps->weighted_bipred_idc == 2) //! implicit mode
+  {
+
+    for (i = 0; i < listXsize[LIST_0]; i++)
+    {
+      for (j = 0; j < listXsize[LIST_1]; j++)
+      {
+        for (comp = 0; comp < 3; comp++)
+        {
+          log_weight_denom = (comp == 0) ? luma_log_weight_denom : chroma_log_weight_denom;
+          wbp_weight[1][i][j][comp] = im_weight[1][i][j][comp] ;
+          wbp_weight[0][i][j][comp] = im_weight[0][i][j][comp];
+        }
+      }
+    }
+
+    for (clist=0; clist<2 + list_offset; clist++)
+    {
+      for (index = 0; index < listXsize[clist]; index++)
+      {
+        wp_weight[clist][index][0] = default_weight[0];
+        wp_weight[clist][index][1] = default_weight[1];
+        wp_weight[clist][index][2] = default_weight[2];
+        wp_offset[clist][index][0] = 0;
+        wp_offset[clist][index][1] = 0;
+        wp_offset[clist][index][2] = 0;
+      }
+    }
+  }
+  else
+  {
+    for (i = 0; i < img->height; i++)
+    {
+      for (j = 0; j < img->width; j++)
+      {
+        dc_org += (double) imgY_org[i][j];
+      }
+    }
+
+    for (clist=0; clist<2 + list_offset; clist++)
+    {
+      for (n = 0; n < listXsize[clist]; n++)
+      {
+        dc_ref[clist][n] = 0;
+
+        ref_qpic_sub.luma      = listX[clist][n]->imgY_sub;
+        tmpPtr = ref_qpic_sub.luma[0][0];
+        // Y
+        for (j = IMG_PAD_SIZE; j < (img->height + IMG_PAD_SIZE); j++)
+        {
+          for (i = IMG_PAD_SIZE; i < img->width + IMG_PAD_SIZE; i++)
+          {
+            dc_ref[clist][n] += (double) tmpPtr[j][i];
+          }
+        }
+        if (dc_ref[clist][n] != 0.0)
+          wf_weight = (int) (default_weight[0] * dc_org / dc_ref[clist][n] + 0.5);
+        else
+          wf_weight = default_weight[0];  // only used when reference picture is black
+
+        //wf_weight = iClip3(-64, 64, wf_weight);
+
+        if ( (wf_weight<-128) || (wf_weight>127) )
+        {
+          wf_weight = default_weight[0];
+        }
+        wf_offset = 0;
+
+        //    printf("dc_org = %d, dc_ref = %d, weight[%d] = %d\n",dc_org, dc_ref[n],n,weight[n][0]);
+
+        weight[clist][n][0] = wf_weight;
+        weight[clist][n][1] = default_weight[1];
+        weight[clist][n][2] = default_weight[2];
+        offset[clist][n][0] = 0;
+        offset[clist][n][1] = 0;
+        offset[clist][n][2] = 0;
+
+      }
+    }
+
+    if (active_pps->weighted_bipred_idc == 1)
+    {
+      for (clist=0; clist<2 + list_offset; clist++)
+      {
+        for (index = 0; index < listXsize[clist]; index++)
+        {
+          for (comp = 0; comp < 3; comp++)
+          {
+            wp_weight[clist][index][comp] = weight[clist][index][comp];
+            wp_offset[clist][index][comp] = offset[clist][index][comp];
+            //printf("%d %d\n",wp_weight[clist][index][comp],wp_offset[clist][index][comp]);
+          }
+        }
+      }
+    }
+    else
+    {
+      for (clist=0; clist<2 + list_offset; clist++)
+      {
+        for (index = 0; index < listXsize[clist]; index++)
+        {
+          wp_weight[clist][index][0] = default_weight[0];
+          wp_weight[clist][index][1] = default_weight[1];
+          wp_weight[clist][index][2] = default_weight[2];
+          wp_offset[clist][index][0] = 0;
+          wp_offset[clist][index][1] = 0;
+          wp_offset[clist][index][2] = 0;
+        }
+      }
+    }
+    for (i = 0; i < listXsize[LIST_0]; i++)
+    {
+      for (j = 0; j < listXsize[LIST_1]; j++)
+      {
+        for (comp = 0; comp < 3; comp++)
+        {
+          log_weight_denom = (comp == 0) ? luma_log_weight_denom : chroma_log_weight_denom;
+          wbp_weight[0][i][j][comp] = wp_weight[0][i][comp];
+          wbp_weight[1][i][j][comp] = wp_weight[1][j][comp];
+        }
+        /*
+        printf ("bpw weight[%d][%d] = %d  , %d (%d %d %d) (%d %d) (%d %d)\n", i, j, wbp_weight[0][i][j][0], wbp_weight[1][i][j][0],
+          enc_picture->poc,listX[LIST_0][i]->poc, listX[LIST_1][j]->poc,
+          DistScaleFactor ,tx,tx,tx);
+        */
+      }
+    }
+  }
+}
+
+
+/*!
+************************************************************************
+* \brief
+*    Tests P slice weighting factors to perform or not WP RD decision
+************************************************************************
+*/
+
+int test_wp_P_slice(int select_offset)
+{
+  int i, j, n;
+
+  double dc_org = 0.0;
+  int index;
+  int comp;
+  double dc_ref[MAX_REFERENCE_PICTURES];
+
+  int default_weight;
+  int default_weight_chroma;
+  int list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))? (img->current_mb_nr & 0x01) ? 4 : 2 : 0;
+  int weight[2][MAX_REFERENCE_PICTURES][3];
+  int offset[2][MAX_REFERENCE_PICTURES][3];
+  int clist;
+  int perform_wp = 0;
+  imgpel **tmpPtr;
+
+
+  luma_log_weight_denom = 5;
+  chroma_log_weight_denom = 5;
+  wp_luma_round = 1 << (luma_log_weight_denom - 1);
+  wp_chroma_round = 1 << (chroma_log_weight_denom - 1);
+  default_weight = 1<<luma_log_weight_denom;
+  default_weight_chroma = 1<<chroma_log_weight_denom;
+
+  /* set all values to defaults */
+  for (i = 0; i < 2 + list_offset; i++)
+  {
+    for (j = 0; j < listXsize[i]; j++)
+    {
+      for (n = 0; n < 3; n++)
+      {
+        weight[i][j][n] = default_weight;
+        wp_weight[i][j][n] = default_weight;
+        wp_offset[i][j][n] = 0;
+        offset[i][j][n] = 0;
+      }
+    }
+  }
+
+  for (i = 0; i < img->height; i++)
+  {
+    for (j = 0; j < img->width; j++)
+    {
+      dc_org += (double) imgY_org[i][j];
+    }
+  }
+
+  for (clist=0; clist<2 + list_offset; clist++)
+  {
+    for (n = 0; n < listXsize[clist]; n++)
+    {
+      dc_ref[n] = 0.0;
+
+      ref_pic_sub.luma       = listX[clist][n]->imgY_sub;
+      tmpPtr = ref_pic_sub.luma[0][0];
+      // Y
+      for (j = IMG_PAD_SIZE; j < img->height + IMG_PAD_SIZE; j++)
+      {
+        for (i = IMG_PAD_SIZE; i < img->width + IMG_PAD_SIZE; i++)
+        {
+          dc_ref[n] += (double) tmpPtr[j][i];
+        }
+      }
+
+      if (select_offset==0)
+      {
+        if (dc_ref[n] != 0.0)
+          weight[clist][n][0] = (int) (default_weight * dc_org / dc_ref[n] + 0.5);
+        else
+          weight[clist][n][0] = default_weight;  // only used when reference picture is black
+        if (weight[clist][n][0] < -64 || weight[clist][n][0] >127)
+          weight[clist][n][0] = 32;
+      }
+      else
+      {
+        offset[clist][n][0] = (int) ((dc_org-dc_ref[n])/(img->size)+0.5);
+        offset[clist][n][0] = (offset[clist][n][0]<-128) ? -128: (offset[clist][n][0]>127) ? 127:offset[clist][n][0];
+        weight[clist][n][0] = default_weight;
+      }
+
+      /* for now always use default weight for chroma weight */
+      weight[clist][n][1] = default_weight_chroma;
+      weight[clist][n][2] = default_weight_chroma;
+
+    }
+  }
+
+  for (clist=0; clist<2 + list_offset; clist++)
+  {
+    for (index = 0; index < listXsize[clist]; index++)
+    {
+      for (comp=0; comp < 3; comp ++)
+      {
+        int offset_test = input->RDPSliceBTest && active_sps->profile_idc != 66
+          ? iabs(offset[clist][index][comp]) > 2
+          : offset[clist][index][comp] != 0;
+
+        if (weight[clist][index][comp] != default_weight ||  offset_test)
+        {
+          perform_wp = 1;
+          break;
+        }
+      }
+      if (perform_wp == 1) break;
+    }
+    if (perform_wp == 1) break;
+  }
+
+  return perform_wp;
+}
+
+/*!
+************************************************************************
+* \brief
+*    test_wp_B_slice:
+*    Tests B slice weighting prediction
+************************************************************************
+*/
+int test_wp_B_slice(int select_method)
+{
+  int i, j, n;
+
+  int tx,DistScaleFactor;
+  double dc_org = 0.0;
+  int index;
+  int comp;
+  double dc_ref[6][MAX_REFERENCE_PICTURES];
+
+  int log_weight_denom;
+
+  int default_weight[3];
+  // this needs to be fixed.
+  int list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))? (img->current_mb_nr & 0x01) ? 4 : 2 : 0;
+  int weight[6][MAX_REFERENCE_PICTURES][3];
+  int offset[6][MAX_REFERENCE_PICTURES][3];
+  int im_weight[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES][3];
+  int im_offset[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES][3];
+  int clist;
+  int wf_weight, wf_offset;
+  int perform_wp = 0;
+  imgpel **tmpPtr;
+
+  if (select_method == 1) //! implicit mode
+  {
+    luma_log_weight_denom = 5;
+    chroma_log_weight_denom = 5;
+  }
+  else
+  {
+    luma_log_weight_denom = 6;
+    chroma_log_weight_denom = 6;
+  }
+
+  wp_luma_round = 1 << (luma_log_weight_denom - 1);
+  wp_chroma_round = 1 << (chroma_log_weight_denom - 1);
+  default_weight[0] = 1<<luma_log_weight_denom;
+  default_weight[1] = 1<<chroma_log_weight_denom;
+  default_weight[2] = 1<<chroma_log_weight_denom;
+
+  /* set all values to defaults */
+  for (i = 0; i < 2 + list_offset; i++)
+  {
+    for (j = 0; j < listXsize[i]; j++)
+    {
+      for (n = 0; n < 3; n++)
+      {
+        wp_weight[i][j][n] = default_weight[n];
+        wp_offset[i][j][n] = 0;
+        offset   [i][j][n] = 0;
+        weight   [i][j][n] = default_weight[n];
+      }
+    }
+  }
+
+  for (i = 0; i < listXsize[LIST_0]; i++)
+  {
+    for (j = 0; j < listXsize[LIST_1]; j++)
+    {
+      int td, tb;
+      td = iClip3(-128,127,(listX[LIST_1][j]->poc - listX[LIST_0][i]->poc));
+      tb = iClip3(-128,127,(enc_picture->poc - listX[LIST_0][i]->poc));
+      for (comp = 0; comp < 3; comp++)
+      {
+        // implicit weights
+        if (td == 0)
+        {
+          im_weight[1][i][j][comp] = default_weight[comp];
+          im_weight[0][i][j][comp] = default_weight[comp];
+          im_offset[1][i][j][comp] = 0;
+          im_offset[0][i][j][comp] = 0;
+        }
+        else
+        {
+          tx = (16384 + iabs(td/2))/td;
+          DistScaleFactor = iClip3(-1024, 1023, (tx*tb + 32 )>>6);
+          im_weight[1][i][j][comp] = DistScaleFactor>>2;
+          if (im_weight[1][i][j][comp] < -64 || im_weight[1][i][j][comp] >128)
+            im_weight[1][i][j][comp] = 32;
+          im_weight[0][i][j][comp] = 64 - im_weight[1][i][j][comp];
+          im_offset[1][i][j][comp] = 0;
+          im_offset[0][i][j][comp] = 0;
+        }
+      }
+    }
+  }
+
+
+  if (select_method == 1) //! implicit mode
+  {
+    for (i = 0; i < listXsize[LIST_0]; i++)
+    {
+      for (j = 0; j < listXsize[LIST_1]; j++)
+      {
+        for (comp = 0; comp < 3; comp++)
+        {
+          log_weight_denom = (comp == 0) ? luma_log_weight_denom : chroma_log_weight_denom;
+          wbp_weight[1][i][j][comp] = im_weight[1][i][j][comp] ;
+          wbp_weight[0][i][j][comp] = im_weight[0][i][j][comp];
+        }
+      }
+    }
+
+    for (clist=0; clist<2 + list_offset; clist++)
+    {
+      for (index = 0; index < listXsize[clist]; index++)
+      {
+        wp_weight[clist][index][0] = default_weight[0];
+        wp_weight[clist][index][1] = default_weight[1];
+        wp_weight[clist][index][2] = default_weight[2];
+        wp_offset[clist][index][0] = 0;
+        wp_offset[clist][index][1] = 0;
+        wp_offset[clist][index][2] = 0;
+      }
+    }
+  }
+  else
+  {
+    for (i = 0; i < img->height; i++)
+    {
+      for (j = 0; j < img->width; j++)
+      {
+        dc_org += (double) imgY_org[i][j];
+      }
+    }
+
+    for (clist=0; clist<2 + list_offset; clist++)
+    {
+      for (n = 0; n < listXsize[clist]; n++)
+      {
+        dc_ref[clist][n] = 0;
+        ref_pic_sub.luma       = listX[clist][n]->imgY_sub;
+        tmpPtr = ref_pic_sub.luma[0][0];
+
+        // Y
+        for (j = IMG_PAD_SIZE; j < img->height + IMG_PAD_SIZE; j++)
+        {
+          for (i = IMG_PAD_SIZE; i < img->width + IMG_PAD_SIZE; i++)
+          {
+            dc_ref[clist][n] += (double) tmpPtr[j][i];
+          }
+        }
+
+        if (dc_ref[clist][n] != 0.0)
+          wf_weight = (int) (default_weight[0] * dc_org / dc_ref[clist][n] + 0.5);
+        else
+          wf_weight = default_weight[0];  // only used when reference picture is black
+
+        if ( (wf_weight<-64) || (wf_weight>127) )
+        {
+          wf_weight = default_weight[0];
+        }
+        wf_offset = 0;
+
+
+        weight[clist][n][0] = wf_weight;
+        weight[clist][n][1] = default_weight[1];
+        weight[clist][n][2] = default_weight[2];
+        offset[clist][n][0] = 0;
+        offset[clist][n][1] = 0;
+        offset[clist][n][2] = 0;
+
+      }
+    }
+
+    if (select_method == 0) //! explicit mode
+    {
+      for (clist=0; clist<2 + list_offset; clist++)
+      {
+        for (index = 0; index < listXsize[clist]; index++)
+        {
+          for (comp = 0; comp < 3; comp++)
+          {
+            wp_weight[clist][index][comp] = weight[clist][index][comp];
+            wp_offset[clist][index][comp] = offset[clist][index][comp];
+          }
+        }
+      }
+    }
+    else
+    {
+      for (clist=0; clist<2 + list_offset; clist++)
+      {
+        for (index = 0; index < listXsize[clist]; index++)
+        {
+          wp_weight[clist][index][0] = default_weight[0];
+          wp_weight[clist][index][1] = default_weight[1];
+          wp_weight[clist][index][2] = default_weight[2];
+          wp_offset[clist][index][0] = 0;
+          wp_offset[clist][index][1] = 0;
+          wp_offset[clist][index][2] = 0;
+        }
+      }
+    }
+    for (i = 0; i < listXsize[LIST_0]; i++)
+    {
+      for (j = 0; j < listXsize[LIST_1]; j++)
+      {
+        for (comp = 0; comp < 3; comp++)
+        {
+          log_weight_denom = (comp == 0) ? luma_log_weight_denom : chroma_log_weight_denom;
+          wbp_weight[0][i][j][comp] = wp_weight[0][i][comp];
+          wbp_weight[1][i][j][comp] = wp_weight[1][j][comp];
+        }
+        /*
+        printf ("bpw weight[%d][%d] = %d  , %d (%d %d %d) (%d %d) (%d %d)\n", i, j, wbp_weight[0][i][j][0], wbp_weight[1][i][j][0],
+          enc_picture->poc,listX[LIST_0][i]->poc, listX[LIST_1][j]->poc,
+          DistScaleFactor ,tx,tx,tx);
+        */
+      }
+    }
+  }
+
+  if (select_method == 0) //! implicit mode
+  {
+    int active_refs[2];
+
+    active_refs[0]=input->B_List0_refs == 0 ? listXsize[0] : imin(input->B_List0_refs,listXsize[0]);
+    active_refs[1]=input->B_List1_refs == 0 ? listXsize[1] : imin(input->B_List0_refs,listXsize[1]);
+
+    for (clist=0; clist<2 + list_offset; clist++)
+    {
+      for (index = 0; index < active_refs[clist]; index++)
+      {
+        for (comp=0; comp < 3; comp ++)
+        {
+          if (wp_weight[clist][index][comp] != default_weight[comp])
+          {
+            perform_wp = 1;
+            break;
+          }
+        }
+        if (perform_wp == 1) break;
+      }
+      if (perform_wp == 1) break;
+    }
+  }
+  return perform_wp;
+}
+
+


Index: llvm-test/MultiSource/Applications/JM/lencod/win32.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/win32.h:1.1
*** /dev/null	Sun Feb  4 08:38:55 2007
--- llvm-test/MultiSource/Applications/JM/lencod/win32.h	Sun Feb  4 08:38:32 2007
***************
*** 0 ****
--- 1,69 ----
+ 
+ /*!
+  ************************************************************************
+  *  \file
+  *     win32.h
+  *
+  *  \brief
+  *     win32 definitions for H.264 encoder.
+  *
+  *  \author
+  *
+  ************************************************************************
+  */
+ #ifndef _WIN32_H_
+ #define _WIN32_H_
+ 
+ # include <fcntl.h>
+ # include <stdio.h>
+ 
+ #if defined(WIN32)
+ # include <io.h>
+ # include <sys/types.h>
+ # include <sys/stat.h>
+ # define strcasecmp _strcmpi
+ 
+ # define  snprintf _snprintf
+ # define  open     _open
+ # define  close    _close
+ # define  read     _read
+ # define  write    _write
+ # define  lseek    _lseeki64
+ # define  fsync    _commit
+ # define  tell     _tell
+ # define  TIMEB    _timeb
+ # define  ftime    _ftime
+ # define  OPENFLAGS_WRITE _O_WRONLY|_O_CREAT|_O_BINARY|_O_TRUNC
+ # define  OPEN_PERMISSIONS _S_IREAD | _S_IWRITE
+ # define  OPENFLAGS_READ  _O_RDONLY|_O_BINARY
+ # define  inline   _inline
+ #else
+ # include <unistd.h>
+ # include <strings.h>
+ # define  TIMEB    timeb
+ # define  OPENFLAGS_WRITE O_WRONLY|O_CREAT|O_TRUNC
+ # define  OPENFLAGS_READ  O_RDONLY
+ # define  OPEN_PERMISSIONS S_IRUSR | S_IWUSR
+ 
+ # if __STDC_VERSION__ >= 199901L
+    /* "inline" is a keyword */
+ # else
+ #  define inline /* nothing */
+ # endif
+ #endif
+ 
+ #if defined(WIN32) && !defined(__GNUC__)
+ typedef __int64   int64;
+ # define FORMAT_OFF_T "I64d"
+ # ifndef INT64_MIN
+ #  define INT64_MIN        (-9223372036854775807i64 - 1i64)
+ # endif
+ #else
+ typedef long long int64;
+ # define FORMAT_OFF_T "lld"
+ # ifndef INT64_MIN
+ #  define INT64_MIN        (-9223372036854775807LL - 1LL)
+ # endif
+ #endif
+ 
+ #endif






More information about the llvm-commits mailing list