[llvm-commits] CVS: llvm-test/MultiSource/Applications/JM/lencod/Makefile annexb.c annexb.h biariencode.c biariencode.h block.c block.h cabac.c cabac.h configfile.c configfile.h context_ini.c context_ini.h contributors.h ctx_tables.h decoder.c defines.h elements.h epzs.c epzs.h explicit_gop.c explicit_gop.h fast_me.c fast_me.h filehandle.c fmo.c fmo.h global.h header.c header.h image.c image.h intrarefresh.c intrarefresh.h leaky_bucket.c leaky_bucket.h lencod.c loopFilter.c macroblock.c macroblock.h mb_access.c mb_access.h mbuffer.c mbuffer.h memalloc.c memalloc.h minmax.h mode_decision.c mode_decision.h mv-search.c mv-search.h nal.c nalu.c nalu.h nalucommon.c nalucommon.h output.c output.h parset.c parset.h parsetcommon.c parsetcommon.h q_matrix.c q_matrix.h q_offsets.c q_offsets.h ratectl.c ratectl.h rdopt.c rdopt_coding_state.c rdopt_coding_state.h rdpicdecision.c refbuf.c refbuf.h rtp.c rtp.h sei.c sei.h simplified_fast_me.c simplified_fast_me.h slice.c transform8x8.c transfo! rm8x8.h vlc.c vlc.h weighted_prediction.c

Evan Cheng evan.cheng at apple.com
Sat Feb 11 02:33:43 PST 2006



Changes in directory llvm-test/MultiSource/Applications/JM/lencod:

Makefile added (r1.1)
annexb.c added (r1.1)
annexb.h added (r1.1)
biariencode.c added (r1.1)
biariencode.h added (r1.1)
block.c added (r1.1)
block.h added (r1.1)
cabac.c added (r1.1)
cabac.h added (r1.1)
configfile.c added (r1.1)
configfile.h added (r1.1)
context_ini.c added (r1.1)
context_ini.h added (r1.1)
contributors.h added (r1.1)
ctx_tables.h added (r1.1)
decoder.c added (r1.1)
defines.h added (r1.1)
elements.h added (r1.1)
epzs.c added (r1.1)
epzs.h added (r1.1)
explicit_gop.c added (r1.1)
explicit_gop.h added (r1.1)
fast_me.c added (r1.1)
fast_me.h added (r1.1)
filehandle.c added (r1.1)
fmo.c added (r1.1)
fmo.h added (r1.1)
global.h added (r1.1)
header.c added (r1.1)
header.h added (r1.1)
image.c added (r1.1)
image.h added (r1.1)
intrarefresh.c added (r1.1)
intrarefresh.h added (r1.1)
leaky_bucket.c added (r1.1)
leaky_bucket.h added (r1.1)
lencod.c added (r1.1)
loopFilter.c added (r1.1)
macroblock.c added (r1.1)
macroblock.h added (r1.1)
mb_access.c added (r1.1)
mb_access.h added (r1.1)
mbuffer.c added (r1.1)
mbuffer.h added (r1.1)
memalloc.c added (r1.1)
memalloc.h added (r1.1)
minmax.h added (r1.1)
mode_decision.c added (r1.1)
mode_decision.h added (r1.1)
mv-search.c added (r1.1)
mv-search.h added (r1.1)
nal.c added (r1.1)
nalu.c added (r1.1)
nalu.h added (r1.1)
nalucommon.c added (r1.1)
nalucommon.h added (r1.1)
output.c added (r1.1)
output.h added (r1.1)
parset.c added (r1.1)
parset.h added (r1.1)
parsetcommon.c added (r1.1)
parsetcommon.h added (r1.1)
q_matrix.c added (r1.1)
q_matrix.h added (r1.1)
q_offsets.c added (r1.1)
q_offsets.h added (r1.1)
ratectl.c added (r1.1)
ratectl.h added (r1.1)
rdopt.c added (r1.1)
rdopt_coding_state.c added (r1.1)
rdopt_coding_state.h added (r1.1)
rdpicdecision.c added (r1.1)
refbuf.c added (r1.1)
refbuf.h added (r1.1)
rtp.c added (r1.1)
rtp.h added (r1.1)
sei.c added (r1.1)
sei.h added (r1.1)
simplified_fast_me.c added (r1.1)
simplified_fast_me.h added (r1.1)
slice.c added (r1.1)
transform8x8.c added (r1.1)
transform8x8.h added (r1.1)
vlc.c added (r1.1)
vlc.h added (r1.1)
weighted_prediction.c added (r1.1)
---
Log message:

Added H.264 reference encoder / decoder from ITU to LLVM test suite.

---
Diffs of the changes:  (+55883 -0)

 Makefile              |    8 
 annexb.c              |  117 +
 annexb.h              |   25 
 biariencode.c         |  342 +++
 biariencode.h         |  138 +
 block.c               | 2800 +++++++++++++++++++++++++++++++
 block.h               |  179 ++
 cabac.c               | 1503 ++++++++++++++++
 cabac.h               |   65 
 configfile.c          | 1162 +++++++++++++
 configfile.h          |  271 +++
 context_ini.c         |  365 ++++
 context_ini.h         |   32 
 contributors.h        |  212 ++
 ctx_tables.h          |  729 ++++++++
 decoder.c             |  655 +++++++
 defines.h             |  207 ++
 elements.h            |  109 +
 epzs.c                | 2321 ++++++++++++++++++++++++++
 epzs.h                |   71 
 explicit_gop.c        |  472 +++++
 explicit_gop.h        |   25 
 fast_me.c             |  914 ++++++++++
 fast_me.h             |  168 +
 filehandle.c          |  140 +
 fmo.c                 |  730 ++++++++
 fmo.h                 |   39 
 global.h              | 1430 ++++++++++++++++
 header.c              |  564 ++++++
 header.h              |   22 
 image.c               | 2640 +++++++++++++++++++++++++++++
 image.h               |   34 
 intrarefresh.c        |  136 +
 intrarefresh.h        |   26 
 leaky_bucket.c        |  296 +++
 leaky_bucket.h        |   29 
 lencod.c              | 2421 +++++++++++++++++++++++++++
 loopFilter.c          |  482 +++++
 macroblock.c          | 4435 ++++++++++++++++++++++++++++++++++++++++++++++++++
 macroblock.h          |  112 +
 mb_access.c           |  683 +++++++
 mb_access.h           |   30 
 mbuffer.c             | 3865 +++++++++++++++++++++++++++++++++++++++++++
 mbuffer.h             |  193 ++
 memalloc.c            |  763 ++++++++
 memalloc.h            |   63 
 minmax.h              |   19 
 mode_decision.c       | 1844 ++++++++++++++++++++
 mode_decision.h       |   87 
 mv-search.c           | 3873 +++++++++++++++++++++++++++++++++++++++++++
 mv-search.h           |   76 
 nal.c                 |  147 +
 nalu.c                |   78 
 nalu.h                |   28 
 nalucommon.c          |   72 
 nalucommon.h          |   55 
 output.c              |  468 +++++
 output.h              |   24 
 parset.c              |  908 ++++++++++
 parset.h              |   45 
 parsetcommon.c        |  100 +
 parsetcommon.h        |  194 ++
 q_matrix.c            |  633 +++++++
 q_matrix.h            |   40 
 q_offsets.c           |  550 ++++++
 q_offsets.h           |   28 
 ratectl.c             | 1803 ++++++++++++++++++++
 ratectl.h             |  146 +
 rdopt.c               | 3568 ++++++++++++++++++++++++++++++++++++++++
 rdopt_coding_state.c  |  203 ++
 rdopt_coding_state.h  |   53 
 rdpicdecision.c       |   64 
 refbuf.c              |  167 +
 refbuf.h              |   28 
 rtp.c                 |  613 ++++++
 rtp.h                 |   72 
 sei.c                 | 1644 ++++++++++++++++++
 sei.h                 |  267 +++
 simplified_fast_me.c  |  825 +++++++++
 simplified_fast_me.h  |   90 +
 slice.c               | 1134 ++++++++++++
 transform8x8.c        | 1801 ++++++++++++++++++++
 transform8x8.h        |   32 
 vlc.c                 | 1265 ++++++++++++++
 vlc.h                 |   52 
 weighted_prediction.c |  764 ++++++++
 86 files changed, 55883 insertions(+)


Index: llvm-test/MultiSource/Applications/JM/lencod/Makefile
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/Makefile:1.1
*** /dev/null	Sat Feb 11 04:33:32 2006
--- llvm-test/MultiSource/Applications/JM/lencod/Makefile	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,8 ----
+ LEVEL = ../../../..
+ PROG     = lencod
+ CPPFLAGS = -D __USE_LARGEFILE64 -D _FILE_OFFSET_BITS=64
+ LDFLAGS  = -lm $(TOOLLINKOPTS)
+ 
+ RUN_OPTIONS = -d $(PROJ_SRC_DIR)/data/encoder.cfg -p InputFile=$(PROJ_SRC_DIR)/data/foreman_part_qcif.yuv
+ 
+ include ../../../Makefile.multisrc


Index: llvm-test/MultiSource/Applications/JM/lencod/annexb.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/annexb.c:1.1
*** /dev/null	Sat Feb 11 04:33:41 2006
--- llvm-test/MultiSource/Applications/JM/lencod/annexb.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,117 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file annexb.c
+  *
+  * \brief
+  *    Annex B Byte Stream format NAL Unit writing routines
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *      - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+  *************************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <assert.h>
+ 
+ #include "global.h"
+ #include "nalucommon.h"
+ 
+ static FILE *f = NULL;    // the output file
+ 
+ 
+ /*!
+  ********************************************************************************************
+  * \brief 
+  *    Writes a NALU to the Annex B Byte Stream
+  *
+  * \return
+  *    number of bits written
+  *
+  ********************************************************************************************
+ */
+ int WriteAnnexbNALU (NALU_t *n)
+ {
+   int BitsWritten = 0;
+ 
+   assert (n != NULL);
+   assert (n->forbidden_bit == 0);
+   assert (f != NULL);
+   assert (n->startcodeprefix_len == 3 || n->startcodeprefix_len == 4);
+ 
+ // printf ("WriteAnnexbNALU: writing %d bytes w/ startcode_len %d\n", n->len+1, n->startcodeprefix_len); 
+   if (n->startcodeprefix_len > 3)
+   {
+     putc (0, f);
+     BitsWritten =+ 8;
+   }
+   putc (0, f);
+   putc (0, f);
+   putc (1, f);
+   BitsWritten += 24;
+ 
+   n->buf[0] =
+     n->forbidden_bit << 7      |
+     n->nal_reference_idc << 5  |
+     n->nal_unit_type;
+ 
+ // printf ("First Byte %x, nal_ref_idc %x, nal_unit_type %d\n", n->buf[0], n->nal_reference_idc, n->nal_unit_type);
+ 
+   if (n->len != fwrite (n->buf, 1, n->len, f))
+   {
+     printf ("Fatal: cannot write %d bytes to bitstream file, exit (-1)\n", n->len);
+     exit (-1);
+   }
+   BitsWritten += n->len * 8;
+ 
+   fflush (f);
+ #if TRACE
+   fprintf (p_trace, "\n\nAnnex B NALU w/ %s startcode, len %d, forbidden_bit %d, nal_reference_idc %d, nal_unit_type %d\n\n",
+     n->startcodeprefix_len == 4?"long":"short", n->len, n->forbidden_bit, n->nal_reference_idc, n->nal_unit_type);
+   fflush (p_trace);
+ #endif
+   return BitsWritten;
+ }
+ 
+ 
+ /*!
+  ********************************************************************************************
+  * \brief 
+  *    Opens the output file for the bytestream    
+  *
+  * \param Filename
+  *    The filename of the file to be opened
+  *
+  * \return
+  *    none.  Function terminates the program in case of an error
+  *
+  ********************************************************************************************
+ */
+ void OpenAnnexbFile (char *Filename)
+ {
+   if ((f = fopen (Filename, "wb")) == NULL)
+   {
+     printf ("Fatal: cannot open Annex B bytestream file '%s', exit (-1)\n", Filename);
+     exit (-1);
+   }
+ }
+ 
+ 
+ /*!
+  ********************************************************************************************
+  * \brief 
+  *    Closes the output bit stream file
+  *
+  * \return
+  *    none.  Funtion trerminates the program in case of an error
+  ********************************************************************************************
+ */
+ void CloseAnnexbFile() {
+   if (fclose (f))
+   {
+     printf ("Fatal: cannot close Annex B bytestream file, exit (-1)\n");
+     exit (-1);
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/annexb.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/annexb.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/annexb.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,25 ----
+ 
+ /*!
+  **************************************************************************************
+  * \file
+  *    annexb.h
+  * \brief
+  *    Byte stream operations support
+  *    This code reflects JVT version xxx
+  *  \date 7 December 2002
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details) 
+  *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+  ***************************************************************************************
+  */
+ 
+ #ifndef _ANNEXB_H_
+ #define _ANNEXB_H_
+ 
+ #include "nalucommon.h"
+ 
+ int WriteAnnexbNALU (NALU_t *n);
+ void CloseAnnexbFile();
+ void OpenAnnexbFile (char *Filename);
+ 
+ #endif //_ANNEXB_H_


Index: llvm-test/MultiSource/Applications/JM/lencod/biariencode.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/biariencode.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/biariencode.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,342 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file biariencode.c
+  *
+  * \brief
+  *    Routines for binary arithmetic encoding
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Detlev Marpe                    <marpe at hhi.de>
+  *    - Gabi Blaettermann               <blaetter at hhi.de>
+  *************************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <stdio.h>
+ 
+ #include "global.h"
+ #include "biariencode.h"
+ 
+ int binCount = 0;
+ 
+ /*!
+  ************************************************************************
+  * Macro for writing bytes of code
+  ***********************************************************************
+  */
+ 
+ #define put_byte() { \
+                      Ecodestrm[(*Ecodestrm_len)++] = Ebuffer; \
+                      Ebits_to_go = 8; \
+                      while (eep->C > 7) { \
+                        eep->C-=8; \
+                        eep->E++; \
+                      } \
+                     } 
+ 
+ #define put_one_bit(b) { \
+                          Ebuffer <<= 1; Ebuffer |= (b); \
+                          if (--Ebits_to_go == 0) \
+                            put_byte(); \
+                        }
+ 
+ #define put_one_bit_plus_outstanding(b) { \
+                                           put_one_bit(b); \
+                                           while (Ebits_to_follow > 0) \
+                                           { \
+                                             Ebits_to_follow--; \
+                                             put_one_bit(!(b)); \
+                                           } \
+                                          }
+ 
+ int pic_bin_count;
+ 
+ void reset_pic_bin_count()
+ {
+   pic_bin_count = 0;
+ }
+ 
+ int get_pic_bin_count()
+ {
+   return pic_bin_count;
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocates memory for the EncodingEnvironment struct
+  ************************************************************************
+  */
+ EncodingEnvironmentPtr arienco_create_encoding_environment()
+ {
+   EncodingEnvironmentPtr eep;
+ 
+   if ( (eep = (EncodingEnvironmentPtr) calloc(1,sizeof(EncodingEnvironment))) == NULL)
+     no_mem_exit("arienco_create_encoding_environment: eep");
+ 
+   return eep;
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Frees memory of the EncodingEnvironment struct
+  ************************************************************************
+  */
+ void arienco_delete_encoding_environment(EncodingEnvironmentPtr eep)
+ {
+   if (eep == NULL)
+   {
+     snprintf(errortext, ET_SIZE, "Error freeing eep (NULL pointer)");
+     error (errortext, 200);
+   }
+   else
+     free(eep);
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Initializes the EncodingEnvironment for the arithmetic coder
+  ************************************************************************
+  */
+ void arienco_start_encoding(EncodingEnvironmentPtr eep,
+                             unsigned char *code_buffer,
+                             int *code_len )
+ {
+   Elow = 0;
+   Ebits_to_follow = 0;
+   Ebuffer = 0;
+   Ebits_to_go = 9; // to swallow first redundant bit
+ 
+   Ecodestrm = code_buffer;
+   Ecodestrm_len = code_len;
+ 
+   Erange = HALF-2;
+ 
+   eep->C = 0;
+   eep->E = 0;
+ 
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Returns the number of currently written bits
+  ************************************************************************
+  */
+ int arienco_bits_written(EncodingEnvironmentPtr eep)
+ {
+    return (8 * (*Ecodestrm_len) + Ebits_to_follow + 8  - Ebits_to_go);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Terminates the arithmetic codeword, writes stop bit and stuffing bytes (if any)
+  ************************************************************************
+  */
+ void arienco_done_encoding(EncodingEnvironmentPtr eep)
+ {
+   put_one_bit_plus_outstanding((Elow >> (B_BITS-1)) & 1);
+   put_one_bit((Elow >> (B_BITS-2))&1);
+   put_one_bit(1);
+ 
+   stats->bit_use_stuffingBits[img->type]+=(8-Ebits_to_go);
+ 
+   while (Ebits_to_go != 8)
+     put_one_bit(0);
+ 
+   pic_bin_count += eep->E*8 + eep->C; // no of processed bins
+ }
+ 
+ extern int cabac_encoding;
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Actually arithmetic encoding of one binary symbol by using
+  *    the probability estimate of its associated context model
+  ************************************************************************
+  */
+ void biari_encode_symbol(EncodingEnvironmentPtr eep, signed short symbol, BiContextTypePtr bi_ct )
+ {
+   register unsigned int range = Erange;
+   register unsigned int low = Elow;
+   unsigned int rLPS = rLPS_table_64x4[bi_ct->state][(range>>6) & 3];
+   
+ #if (2==TRACE)
+   if (cabac_encoding)
+     fprintf(p_trace, "%d  0x%04x  %d  %d\n", binCount++, Erange , bi_ct->state, bi_ct->MPS );
+ #endif
+   
+   range -= rLPS;  
+   bi_ct->count += cabac_encoding;
+ 
+   /* covers all cases where code does not bother to shift down symbol to be 
+    * either 0 or 1, e.g. in some cases for cbp, mb_Type etc the code simply 
+    * masks off the bit position and passes in the resulting value */
+   symbol = (symbol != 0);
+ 
+   if (symbol != bi_ct->MPS) 
+   {
+     low += range;
+     range = rLPS;
+     
+     if (!bi_ct->state)
+       bi_ct->MPS = bi_ct->MPS ^ 1;               // switch LPS if necessary
+     bi_ct->state = AC_next_state_LPS_64[bi_ct->state]; // next state
+   } 
+   else 
+     bi_ct->state = AC_next_state_MPS_64[bi_ct->state]; // next state
+  
+   /* renormalisation */    
+   while (range < QUARTER)
+   {
+     if (low >= HALF)
+     {
+       put_one_bit_plus_outstanding(1);
+       low -= HALF;
+     }
+     else if (low < QUARTER)
+     {
+       put_one_bit_plus_outstanding(0);
+     }
+     else
+     {
+       Ebits_to_follow++;
+       low -= QUARTER;
+     }
+     low <<= 1;
+     range <<= 1;
+   }
+   Erange = range;
+   Elow = low;
+   eep->C++;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Arithmetic encoding of one binary symbol assuming 
+  *    a fixed prob. distribution with p(symbol) = 0.5
+  ************************************************************************
+  */
+ void biari_encode_symbol_eq_prob(EncodingEnvironmentPtr eep, signed short symbol)
+ {
+   register unsigned int low = (Elow<<1);
+   
+ #if (2==TRACE)
+   extern int cabac_encoding;
+   if (cabac_encoding)
+     fprintf(p_trace, "%d  0x%04x\n", binCount++, Erange );
+ #endif
+   
+   if (symbol != 0)
+     low += Erange;
+ 
+   /* renormalisation as for biari_encode_symbol; 
+      note that low has already been doubled */ 
+   if (low >= ONE)
+   {
+     put_one_bit_plus_outstanding(1);
+     low -= ONE;
+   }
+   else 
+     if (low < HALF)
+     {
+       put_one_bit_plus_outstanding(0);
+     }
+     else
+     {
+       Ebits_to_follow++;
+       low -= HALF;
+     }
+     Elow = low;
+     eep->C++;    
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Arithmetic encoding for last symbol before termination
+  ************************************************************************
+  */
+ void biari_encode_symbol_final(EncodingEnvironmentPtr eep, signed short symbol)
+ {
+   register unsigned int range = Erange-2;
+   register unsigned int low = Elow;
+   
+ #if (2==TRACE)
+   extern int cabac_encoding;
+   if (cabac_encoding)
+     fprintf(p_trace, "%d  0x%04x\n", binCount++, Erange);
+ #endif
+   
+   if (symbol) {
+     low += range;
+     range = 2;
+   }
+   
+   while (range < QUARTER)
+   {
+     if (low >= HALF)
+     {
+       put_one_bit_plus_outstanding(1);
+       low -= HALF;
+     }
+     else 
+       if (low < QUARTER)
+       {
+         put_one_bit_plus_outstanding(0);
+       }
+       else
+       {
+         Ebits_to_follow++;
+         low -= QUARTER;
+       }
+       low <<= 1;
+       range <<= 1;
+   }
+   Erange = range;
+   Elow = low;
+   eep->C++;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Initializes a given context with some pre-defined probability state
+  ************************************************************************
+  */
+ void biari_init_context (BiContextTypePtr ctx, const int* ini)
+ {
+   int pstate;
+ 
+   pstate = ((ini[0]* max(0, img->qp)) >> 4) + ini[1];
+   pstate = min (max ( 1, pstate), 126);
+ 
+   if ( pstate >= 64 )
+   {
+     ctx->state  = pstate - 64;
+     ctx->MPS    = 1;
+   }
+   else
+   {
+     ctx->state  = 63 - pstate;
+     ctx->MPS    = 0;
+   }
+   
+   ctx->count = 0;
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/biariencode.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/biariencode.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/biariencode.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,138 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file
+  *    biariencode.h
+  *
+  * \brief
+  *    Headerfile for binary arithmetic encoding routines
+  *
+  * \author
+  *    Detlev Marpe,
+  *    Gabi Blaettermann
+  *    Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved.
+  *
+  * \date
+  *    21. Oct 2000
+  **************************************************************************
+  */
+ 
+ 
+ #ifndef _BIARIENCOD_H_
+ #define _BIARIENCOD_H_
+ 
+ 
+ /************************************************************************
+  * D e f i n i t i o n s
+  ***********************************************************************
+  */
+ 
+ // some definitions to increase the readability of the source code
+ 
+ #define Elow                      (eep->Elow)
+ #define Erange                    (eep->Erange)
+ #define Ebits_to_follow           (eep->Ebits_to_follow)
+ #define Ebuffer                   (eep->Ebuffer)
+ #define Ebits_to_go               (eep->Ebits_to_go)
+ #define Ecodestrm                 (eep->Ecodestrm)
+ #define Ecodestrm_len             (eep->Ecodestrm_len)
+ #define Ecodestrm_laststartcode   (eep->Ecodestrm_laststartcode)
+ #define B_BITS                    10 // Number of bits to represent the whole coding interval
+ #define ONE                       (1 << B_BITS)
+ #define HALF                      (1 << (B_BITS-1))
+ #define QUARTER                   (1 << (B_BITS-2))
+ 
+ /* Range table for LPS */ 
+ const byte rLPS_table_64x4[64][4]=
+ {
+         { 128, 176, 208, 240},
+         { 128, 167, 197, 227},
+         { 128, 158, 187, 216},
+         { 123, 150, 178, 205},
+         { 116, 142, 169, 195},
+         { 111, 135, 160, 185},
+         { 105, 128, 152, 175},
+         { 100, 122, 144, 166},
+         {  95, 116, 137, 158},
+         {  90, 110, 130, 150},
+         {  85, 104, 123, 142},
+         {  81,  99, 117, 135},
+         {  77,  94, 111, 128},
+         {  73,  89, 105, 122},
+         {  69,  85, 100, 116},
+         {  66,  80,  95, 110},
+         {  62,  76,  90, 104},
+         {  59,  72,  86,  99},
+         {  56,  69,  81,  94},
+         {  53,  65,  77,  89},
+         {  51,  62,  73,  85},
+         {  48,  59,  69,  80},
+         {  46,  56,  66,  76},
+         {  43,  53,  63,  72},
+         {  41,  50,  59,  69},
+         {  39,  48,  56,  65},
+         {  37,  45,  54,  62},
+         {  35,  43,  51,  59},
+         {  33,  41,  48,  56},
+         {  32,  39,  46,  53},
+         {  30,  37,  43,  50},
+         {  29,  35,  41,  48},
+         {  27,  33,  39,  45},
+         {  26,  31,  37,  43},
+         {  24,  30,  35,  41},
+         {  23,  28,  33,  39},
+         {  22,  27,  32,  37},
+         {  21,  26,  30,  35},
+         {  20,  24,  29,  33},
+         {  19,  23,  27,  31},
+         {  18,  22,  26,  30},
+         {  17,  21,  25,  28},
+         {  16,  20,  23,  27},
+         {  15,  19,  22,  25},
+         {  14,  18,  21,  24},
+         {  14,  17,  20,  23},
+         {  13,  16,  19,  22},
+         {  12,  15,  18,  21},
+         {  12,  14,  17,  20},
+         {  11,  14,  16,  19},
+         {  11,  13,  15,  18},
+         {  10,  12,  15,  17},
+         {  10,  12,  14,  16},
+         {   9,  11,  13,  15},
+         {   9,  11,  12,  14},
+         {   8,  10,  12,  14},
+         {   8,   9,  11,  13},
+         {   7,   9,  11,  12},
+         {   7,   9,  10,  12},
+         {   7,   8,  10,  11},
+         {   6,   8,   9,  11},
+         {   6,   7,   9,  10},
+         {   6,   7,   8,   9},
+         {   2,   2,   2,   2}
+ };
+ 
+ const unsigned short AC_next_state_MPS_64[64] =    
+ {
+                 1,2,3,4,5,6,7,8,9,10,
+                 11,12,13,14,15,16,17,18,19,20,
+                 21,22,23,24,25,26,27,28,29,30,
+                 31,32,33,34,35,36,37,38,39,40,
+                 41,42,43,44,45,46,47,48,49,50,
+                 51,52,53,54,55,56,57,58,59,60,
+                 61,62,62,63
+ };      
+ 
+ const unsigned short AC_next_state_LPS_64[64] =    
+ {
+                  0, 0, 1, 2, 2, 4, 4, 5, 6, 7,
+                  8, 9, 9,11,11,12,13,13,15,15, 
+                  16,16,18,18,19,19,21,21,22,22,
+                  23,24,24,25,26,26,27,27,28,29,
+                  29,30,30,30,31,32,32,33,33,33,
+                  34,34,35,35,35,36,36,36,37,37, 
+                  37,38,38,63 
+ };
+ 
+ 
+ #endif  // BIARIENCOD_H
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/block.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/block.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/block.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,2800 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file block.c
+  *
+  * \brief
+  *    Process one block
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+  *    - Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+  *    - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+  *    - Jani Lainema                    <jani.lainema at nokia.com>
+  *    - Detlev Marpe                    <marpe at hhi.de>
+  *    - Thomas Wedi                     <wedi at tnt.uni-hannover.de>
+  *    - Ragip Kurceren                  <ragip.kurceren at nokia.com>
+  *    - Greg Conklin                    <gregc at real.com>
+  *************************************************************************************
+  */
+ 
+ #include "contributors.h"
+ 
+ 
+ #include <stdlib.h>
+ #include <stdio.h>
+ #include <memory.h>
+ #include <string.h>
+ #include <math.h>
+ 
+ #include "global.h"
+ 
+ #include "image.h"
+ #include "mb_access.h"
+ #include "block.h"
+ #include "vlc.h"
+ 
+ 
+ const int quant_coef[6][4][4] = {
+   {{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243},{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243}},
+   {{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660},{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660}},
+   {{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194},{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194}},
+   {{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647},{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647}},
+   {{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355},{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355}},
+   {{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893},{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893}}
+ };
+ 
+ const int dequant_coef[6][4][4] = {
+   {{10, 13, 10, 13},{ 13, 16, 13, 16},{10, 13, 10, 13},{ 13, 16, 13, 16}},
+   {{11, 14, 11, 14},{ 14, 18, 14, 18},{11, 14, 11, 14},{ 14, 18, 14, 18}},
+   {{13, 16, 13, 16},{ 16, 20, 16, 20},{13, 16, 13, 16},{ 16, 20, 16, 20}},
+   {{14, 18, 14, 18},{ 18, 23, 18, 23},{14, 18, 14, 18},{ 18, 23, 18, 23}},
+   {{16, 20, 16, 20},{ 20, 25, 20, 25},{16, 20, 16, 20},{ 20, 25, 20, 25}},
+   {{18, 23, 18, 23},{ 23, 29, 23, 29},{18, 23, 18, 23},{ 23, 29, 23, 29}}
+ };
+ static const int A[4][4] = {
+   { 16, 20, 16, 20},
+   { 20, 25, 20, 25},
+   { 16, 20, 16, 20},
+   { 20, 25, 20, 25}
+ };
+ 
+ 
+ // Notation for comments regarding prediction and predictors.
+ // The pels of the 4x4 block are labelled a..p. The predictor pels above
+ // are labelled A..H, from the left I..P, and from above left X, as follows:
+ //
+ //  X A B C D E F G H
+ //  I a b c d
+ //  J e f g h
+ //  K i j k l
+ //  L m n o p
+ //
+ 
+ // Predictor array index definitions
+ #define P_X (PredPel[0])
+ #define P_A (PredPel[1])
+ #define P_B (PredPel[2])
+ #define P_C (PredPel[3])
+ #define P_D (PredPel[4])
+ #define P_E (PredPel[5])
+ #define P_F (PredPel[6])
+ #define P_G (PredPel[7])
+ #define P_H (PredPel[8])
+ #define P_I (PredPel[9])
+ #define P_J (PredPel[10])
+ #define P_K (PredPel[11])
+ #define P_L (PredPel[12])
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Make intra 4x4 prediction according to all 9 prediction modes.
+  *    The routine uses left and upper neighbouring points from
+  *    previous coded blocks to do this (if available). Notice that
+  *    inaccessible neighbouring points are signalled with a negative
+  *    value in the predmode array .
+  *
+  *  \par Input:
+  *     Starting point of current 4x4 block image posision
+  *
+  *  \par Output:
+  *      none
+  ************************************************************************
+  */
+ void intrapred_luma(int img_x,int img_y, int *left_available, int *up_available, int *all_available)
+ {
+   int i,j;
+   int s0;
+   int PredPel[13];  // array of predictor pels
+   imgpel **imgY = enc_picture->imgY;  // For MB level frame/field coding tools -- set default to imgY
+ 
+   int ioff = (img_x & 15);
+   int joff = (img_y & 15);
+   int mb_nr=img->current_mb_nr;
+ 
+   PixelPos pix_a[4];
+   PixelPos pix_b, pix_c, pix_d;
+ 
+   int block_available_up;
+   int block_available_left;
+   int block_available_up_left;
+   int block_available_up_right;
+ 
+   for (i=0;i<4;i++)
+   {
+     getNeighbour(mb_nr, ioff -1 , joff +i , 1, &pix_a[i]);
+   }
+     
+   getNeighbour(mb_nr, ioff    , joff -1 , 1, &pix_b);
+   getNeighbour(mb_nr, ioff +4 , joff -1 , 1, &pix_c);
+   getNeighbour(mb_nr, ioff -1 , joff -1 , 1, &pix_d);
+ 
+   pix_c.available = pix_c.available && !(((ioff==4)||(ioff==12)) && ((joff==4)||(joff==12)));
+ 
+   if (input->UseConstrainedIntraPred)
+   {
+     for (i=0, block_available_left=1; i<4;i++)
+       block_available_left  &= pix_a[i].available ? img->intra_block[pix_a[i].mb_addr]: 0;
+     block_available_up       = pix_b.available ? img->intra_block [pix_b.mb_addr] : 0;
+     block_available_up_right = pix_c.available ? img->intra_block [pix_c.mb_addr] : 0;
+     block_available_up_left  = pix_d.available ? img->intra_block [pix_d.mb_addr] : 0;
+   }
+   else
+   {
+     block_available_left     = pix_a[0].available;
+     block_available_up       = pix_b.available;
+     block_available_up_right = pix_c.available;
+     block_available_up_left  = pix_d.available;
+   }
+   
+   *left_available = block_available_left;
+   *up_available   = block_available_up;
+   *all_available  = block_available_up && block_available_left && block_available_up_left;
+ 
+   i = (img_x & 15);
+   j = (img_y & 15);
+ 
+   // form predictor pels
+   if (block_available_up)
+   {
+     P_A = imgY[pix_b.pos_y][pix_b.pos_x+0];
+     P_B = imgY[pix_b.pos_y][pix_b.pos_x+1];
+     P_C = imgY[pix_b.pos_y][pix_b.pos_x+2];
+     P_D = imgY[pix_b.pos_y][pix_b.pos_x+3];
+ 
+   }
+   else
+   {
+     P_A = P_B = P_C = P_D = img->dc_pred_value;
+   }
+ 
+   if (block_available_up_right)
+   {
+     P_E = imgY[pix_c.pos_y][pix_c.pos_x+0];
+     P_F = imgY[pix_c.pos_y][pix_c.pos_x+1];
+     P_G = imgY[pix_c.pos_y][pix_c.pos_x+2];
+     P_H = imgY[pix_c.pos_y][pix_c.pos_x+3];
+   }
+   else
+   {
+     P_E = P_F = P_G = P_H = P_D;
+   }
+ 
+   if (block_available_left)
+   {
+     P_I = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+     P_J = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+     P_K = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+     P_L = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+   }
+   else
+   {
+     P_I = P_J = P_K = P_L = img->dc_pred_value;
+   }
+ 
+   if (block_available_up_left)
+   {
+     P_X = imgY[pix_d.pos_y][pix_d.pos_x];
+   }
+   else
+   {
+     P_X = img->dc_pred_value;
+   }
+ 
+   for(i=0;i<9;i++)
+     img->mprr[i][0][0]=-1;
+ 
+   ///////////////////////////////
+   // make DC prediction
+   ///////////////////////////////
+   s0 = 0;
+   if (block_available_up && block_available_left)
+   {   
+     // no edge
+     s0 = (P_A + P_B + P_C + P_D + P_I + P_J + P_K + P_L + 4) >> (BLOCK_SHIFT + 1);
+   }
+   else if (!block_available_up && block_available_left)
+   {
+     // upper edge
+     s0 = (P_I + P_J + P_K + P_L + 2) >> BLOCK_SHIFT;;             
+   }
+   else if (block_available_up && !block_available_left)
+   {
+     // left edge
+     s0 = (P_A + P_B + P_C + P_D + 2) >> BLOCK_SHIFT;             
+   }
+   else //if (!block_available_up && !block_available_left)
+   {
+     // top left corner, nothing to predict from
+     s0 = img->dc_pred_value;                           
+   }
+ 
+   // store DC prediction
+   for (j=0; j < BLOCK_SIZE; j++)
+   {
+     for (i=0; i < BLOCK_SIZE; i++)
+       img->mprr[DC_PRED][j][i] = s0;
+   }
+ 
+   ///////////////////////////////
+   // make horiz and vert prediction
+   ///////////////////////////////
+ 
+   for (i=0; i < BLOCK_SIZE; i++)
+   {
+     img->mprr[VERT_PRED][0][i] = 
+     img->mprr[VERT_PRED][1][i] = 
+     img->mprr[VERT_PRED][2][i] = 
+     img->mprr[VERT_PRED][3][i] = (&P_A)[i];
+     img->mprr[HOR_PRED][i][0]  = 
+     img->mprr[HOR_PRED][i][1]  = 
+     img->mprr[HOR_PRED][i][2]  = 
+     img->mprr[HOR_PRED][i][3]  = (&P_I)[i];
+   }
+ 
+   if(!block_available_up)
+     img->mprr[VERT_PRED][0][0]=-1;
+   if(!block_available_left)
+     img->mprr[HOR_PRED][0][0]=-1;
+ 
+   if (block_available_up) 
+   {
+     // Mode DIAG_DOWN_LEFT_PRED
+     img->mprr[DIAG_DOWN_LEFT_PRED][0][0] = (P_A + P_C + 2*(P_B) + 2) >> 2;
+     img->mprr[DIAG_DOWN_LEFT_PRED][0][1] = 
+     img->mprr[DIAG_DOWN_LEFT_PRED][1][0] = (P_B + P_D + 2*(P_C) + 2) >> 2;
+     img->mprr[DIAG_DOWN_LEFT_PRED][0][2] =
+     img->mprr[DIAG_DOWN_LEFT_PRED][1][1] =
+     img->mprr[DIAG_DOWN_LEFT_PRED][2][0] = (P_C + P_E + 2*(P_D) + 2) >> 2;
+     img->mprr[DIAG_DOWN_LEFT_PRED][0][3] = 
+     img->mprr[DIAG_DOWN_LEFT_PRED][1][2] = 
+     img->mprr[DIAG_DOWN_LEFT_PRED][2][1] = 
+     img->mprr[DIAG_DOWN_LEFT_PRED][3][0] = (P_D + P_F + 2*(P_E) + 2) >> 2;
+     img->mprr[DIAG_DOWN_LEFT_PRED][1][3] = 
+     img->mprr[DIAG_DOWN_LEFT_PRED][2][2] = 
+     img->mprr[DIAG_DOWN_LEFT_PRED][3][1] = (P_E + P_G + 2*(P_F) + 2) >> 2;
+     img->mprr[DIAG_DOWN_LEFT_PRED][2][3] = 
+     img->mprr[DIAG_DOWN_LEFT_PRED][3][2] = (P_F + P_H + 2*(P_G) + 2) >> 2;
+     img->mprr[DIAG_DOWN_LEFT_PRED][3][3] = (P_G + 3*(P_H) + 2) >> 2;
+ 
+     // Mode VERT_LEFT_PRED
+     img->mprr[VERT_LEFT_PRED][0][0] = (P_A + P_B + 1) >> 1;
+     img->mprr[VERT_LEFT_PRED][0][1] = 
+     img->mprr[VERT_LEFT_PRED][2][0] = (P_B + P_C + 1) >> 1;
+     img->mprr[VERT_LEFT_PRED][0][2] = 
+     img->mprr[VERT_LEFT_PRED][2][1] = (P_C + P_D + 1) >> 1;
+     img->mprr[VERT_LEFT_PRED][0][3] = 
+     img->mprr[VERT_LEFT_PRED][2][2] = (P_D + P_E + 1) >> 1;
+     img->mprr[VERT_LEFT_PRED][2][3] = (P_E + P_F + 1) >> 1;
+     img->mprr[VERT_LEFT_PRED][1][0] = (P_A + 2*P_B + P_C + 2) >> 2;
+     img->mprr[VERT_LEFT_PRED][1][1] = 
+     img->mprr[VERT_LEFT_PRED][3][0] = (P_B + 2*P_C + P_D + 2) >> 2;
+     img->mprr[VERT_LEFT_PRED][1][2] = 
+     img->mprr[VERT_LEFT_PRED][3][1] = (P_C + 2*P_D + P_E + 2) >> 2;
+     img->mprr[VERT_LEFT_PRED][1][3] = 
+     img->mprr[VERT_LEFT_PRED][3][2] = (P_D + 2*P_E + P_F + 2) >> 2;
+     img->mprr[VERT_LEFT_PRED][3][3] = (P_E + 2*P_F + P_G + 2) >> 2;
+ 
+   }
+ 
+   /*  Prediction according to 'diagonal' modes */
+   if (block_available_left) 
+   {
+     // Mode HOR_UP_PRED
+     img->mprr[HOR_UP_PRED][0][0] = (P_I + P_J + 1) >> 1;
+     img->mprr[HOR_UP_PRED][0][1] = (P_I + 2*P_J + P_K + 2) >> 2;
+     img->mprr[HOR_UP_PRED][0][2] = 
+     img->mprr[HOR_UP_PRED][1][0] = (P_J + P_K + 1) >> 1;
+     img->mprr[HOR_UP_PRED][0][3] = 
+     img->mprr[HOR_UP_PRED][1][1] = (P_J + 2*P_K + P_L + 2) >> 2;
+     img->mprr[HOR_UP_PRED][1][2] = 
+     img->mprr[HOR_UP_PRED][2][0] = (P_K + P_L + 1) >> 1;
+     img->mprr[HOR_UP_PRED][1][3] = 
+     img->mprr[HOR_UP_PRED][2][1] = (P_K + 2*P_L + P_L + 2) >> 2;
+     img->mprr[HOR_UP_PRED][3][0] = 
+     img->mprr[HOR_UP_PRED][2][2] = 
+     img->mprr[HOR_UP_PRED][2][3] = 
+     img->mprr[HOR_UP_PRED][3][1] = 
+     img->mprr[HOR_UP_PRED][3][2] = 
+     img->mprr[HOR_UP_PRED][3][3] = P_L;
+   }
+ 
+   /*  Prediction according to 'diagonal' modes */
+   if (block_available_up && block_available_left && block_available_up_left) 
+   {
+     // Mode DIAG_DOWN_RIGHT_PRED
+     img->mprr[DIAG_DOWN_RIGHT_PRED][3][0] = (P_L + 2*P_K + P_J + 2) >> 2; 
+     img->mprr[DIAG_DOWN_RIGHT_PRED][2][0] =
+     img->mprr[DIAG_DOWN_RIGHT_PRED][3][1] = (P_K + 2*P_J + P_I + 2) >> 2; 
+     img->mprr[DIAG_DOWN_RIGHT_PRED][1][0] =
+     img->mprr[DIAG_DOWN_RIGHT_PRED][2][1] = 
+     img->mprr[DIAG_DOWN_RIGHT_PRED][3][2] = (P_J + 2*P_I + P_X + 2) >> 2; 
+     img->mprr[DIAG_DOWN_RIGHT_PRED][0][0] =
+     img->mprr[DIAG_DOWN_RIGHT_PRED][1][1] =
+     img->mprr[DIAG_DOWN_RIGHT_PRED][2][2] =
+     img->mprr[DIAG_DOWN_RIGHT_PRED][3][3] = (P_I + 2*P_X + P_A + 2) >> 2; 
+     img->mprr[DIAG_DOWN_RIGHT_PRED][0][1] =
+     img->mprr[DIAG_DOWN_RIGHT_PRED][1][2] =
+     img->mprr[DIAG_DOWN_RIGHT_PRED][2][3] = (P_X + 2*P_A + P_B + 2) >> 2;
+     img->mprr[DIAG_DOWN_RIGHT_PRED][0][2] =
+     img->mprr[DIAG_DOWN_RIGHT_PRED][1][3] = (P_A + 2*P_B + P_C + 2) >> 2;
+     img->mprr[DIAG_DOWN_RIGHT_PRED][0][3] = (P_B + 2*P_C + P_D + 2) >> 2;
+ 
+      // Mode VERT_RIGHT_PRED
+     img->mprr[VERT_RIGHT_PRED][0][0] = 
+     img->mprr[VERT_RIGHT_PRED][2][1] = (P_X + P_A + 1) >> 1;
+     img->mprr[VERT_RIGHT_PRED][0][1] = 
+     img->mprr[VERT_RIGHT_PRED][2][2] = (P_A + P_B + 1) >> 1;
+     img->mprr[VERT_RIGHT_PRED][0][2] = 
+     img->mprr[VERT_RIGHT_PRED][2][3] = (P_B + P_C + 1) >> 1;
+     img->mprr[VERT_RIGHT_PRED][0][3] = (P_C + P_D + 1) >> 1;
+     img->mprr[VERT_RIGHT_PRED][1][0] = 
+     img->mprr[VERT_RIGHT_PRED][3][1] = (P_I + 2*P_X + P_A + 2) >> 2;
+     img->mprr[VERT_RIGHT_PRED][1][1] = 
+     img->mprr[VERT_RIGHT_PRED][3][2] = (P_X + 2*P_A + P_B + 2) >> 2;
+     img->mprr[VERT_RIGHT_PRED][1][2] = 
+     img->mprr[VERT_RIGHT_PRED][3][3] = (P_A + 2*P_B + P_C + 2) >> 2;
+     img->mprr[VERT_RIGHT_PRED][1][3] = (P_B + 2*P_C + P_D + 2) >> 2;
+     img->mprr[VERT_RIGHT_PRED][2][0] = (P_X + 2*P_I + P_J + 2) >> 2;
+     img->mprr[VERT_RIGHT_PRED][3][0] = (P_I + 2*P_J + P_K + 2) >> 2;
+ 
+     // Mode HOR_DOWN_PRED
+     img->mprr[HOR_DOWN_PRED][0][0] = 
+     img->mprr[HOR_DOWN_PRED][1][2] = (P_X + P_I + 1) >> 1;
+     img->mprr[HOR_DOWN_PRED][0][1] = 
+     img->mprr[HOR_DOWN_PRED][1][3] = (P_I + 2*P_X + P_A + 2) >> 2;
+     img->mprr[HOR_DOWN_PRED][0][2] = (P_X + 2*P_A + P_B + 2) >> 2;
+     img->mprr[HOR_DOWN_PRED][0][3] = (P_A + 2*P_B + P_C + 2) >> 2;
+     img->mprr[HOR_DOWN_PRED][1][0] = 
+     img->mprr[HOR_DOWN_PRED][2][2] = (P_I + P_J + 1) >> 1;
+     img->mprr[HOR_DOWN_PRED][1][1] = 
+     img->mprr[HOR_DOWN_PRED][2][3] = (P_X + 2*P_I + P_J + 2) >> 2;
+     img->mprr[HOR_DOWN_PRED][2][0] = 
+     img->mprr[HOR_DOWN_PRED][3][2] = (P_J + P_K + 1) >> 1;
+     img->mprr[HOR_DOWN_PRED][2][1] = 
+     img->mprr[HOR_DOWN_PRED][3][3] = (P_I + 2*P_J + P_K + 2) >> 2;
+     img->mprr[HOR_DOWN_PRED][3][0] = (P_K + P_L + 1) >> 1;
+     img->mprr[HOR_DOWN_PRED][3][1] = (P_J + 2*P_K + P_L + 2) >> 2;
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    16x16 based luma prediction
+  *
+  * \par Input:
+  *    Image parameters
+  *
+  * \par Output:
+  *    none
+  ************************************************************************
+  */
+ void intrapred_luma_16x16()
+ {
+   int s0=0,s1,s2;
+   imgpel s[2][16];
+   int i,j;
+ 
+   int ih,iv;
+   int ib,ic,iaa;
+ 
+   imgpel   **imgY_pred = enc_picture->imgY;  // For Mb level field/frame coding tools -- default to frame pred
+   int          mb_nr = img->current_mb_nr;
+ 
+   PixelPos up;          //!< pixel position p(0,-1)
+   PixelPos left[17];    //!< pixel positions p(-1, -1..15)
+ 
+   int up_avail, left_avail, left_up_avail;
+ 
+   for (i=0;i<17;i++)
+   {
+     getNeighbour(mb_nr, -1,  i-1, 1, &left[i]);
+   }
+   
+   getNeighbour(mb_nr,    0,   -1, 1, &up);
+ 
+   if (!(input->UseConstrainedIntraPred))
+   {
+     up_avail      = up.available;
+     left_avail    = left[1].available;
+     left_up_avail = left[0].available;
+   }
+   else
+   {
+     up_avail      = up.available ? img->intra_block[up.mb_addr] : 0;
+     for (i=1, left_avail=1; i<17;i++)
+       left_avail  &= left[i].available ? img->intra_block[left[i].mb_addr]: 0;
+     left_up_avail = left[0].available ? img->intra_block[left[0].mb_addr]: 0;
+   }
+ 
+   s1=s2=0;
+   // make DC prediction
+   if (up_avail)
+   {
+     for (i=0; i < MB_BLOCK_SIZE; i++)
+       s1 += imgY_pred[up.pos_y][up.pos_x+i];    // sum hor pix
+   }
+ 
+   if (left_avail)
+   {
+     for (i=0; i < MB_BLOCK_SIZE; i++)      
+       s2 += imgY_pred[left[i+1].pos_y][left[i+1].pos_x];    // sum vert pix
+   }
+ 
+   if (up_avail && left_avail)
+     s0=(s1+s2+16)/(2*MB_BLOCK_SIZE);             // no edge
+   
+   if (!up_avail && left_avail)
+     s0=(s2+8)/MB_BLOCK_SIZE;                     // upper edge
+   
+   if (up_avail && !left_avail)
+     s0=(s1+8)/MB_BLOCK_SIZE;                     // left edge
+   
+   if (!up_avail && !left_avail)
+     s0=img->dc_pred_value;                       // top left corner, nothing to predict from
+ 
+   // vertical prediction
+   if (up_avail)
+     memcpy(s[0], &imgY_pred[up.pos_y][up.pos_x], MB_BLOCK_SIZE * sizeof(imgpel));
+   
+   // horizontal prediction
+   if (left_avail)
+   {
+     for (i=0; i < MB_BLOCK_SIZE; i++)
+       s[1][i]=imgY_pred[left[i+1].pos_y][left[i+1].pos_x];
+   }
+ 
+   for (j=0; j < MB_BLOCK_SIZE; j++)
+   {
+     memcpy(img->mprr_2[VERT_PRED_16][j], s[0], MB_BLOCK_SIZE * sizeof(imgpel)); // store vertical prediction
+     for (i=0; i < MB_BLOCK_SIZE; i++)
+     {
+       img->mprr_2[HOR_PRED_16 ][j][i]=s[1][j]; // store horizontal prediction
+       img->mprr_2[DC_PRED_16  ][j][i]=s0;      // store DC prediction
+     }
+   }
+   if (!up_avail || !left_avail || !left_up_avail) // edge
+     return;
+ 
+   // 16 bit integer plan pred
+ 
+   ih=0;
+   iv=0;
+   for (i=1;i<9;i++)
+   {
+     if (i<8)
+       ih += i*(imgY_pred[up.pos_y][up.pos_x+7+i] - imgY_pred[up.pos_y][up.pos_x+7-i]);
+     else
+       ih += i*(imgY_pred[up.pos_y][up.pos_x+7+i] - imgY_pred[left[0].pos_y][left[0].pos_x]);
+     
+     iv += i*(imgY_pred[left[8+i].pos_y][left[8+i].pos_x] - imgY_pred[left[8-i].pos_y][left[8-i].pos_x]);
+   }
+   ib=(5*ih+32)>>6;
+   ic=(5*iv+32)>>6;
+   
+   iaa=16*(imgY_pred[up.pos_y][up.pos_x+15]+imgY_pred[left[16].pos_y][left[16].pos_x]);
+ 
+   for (j=0;j< MB_BLOCK_SIZE;j++)
+   {
+     for (i=0;i< MB_BLOCK_SIZE;i++)
+     {
+       img->mprr_2[PLANE_16][j][i]=max(0,min((int)img->max_imgpel_value,(iaa+(i-7)*ib +(j-7)*ic + 16)/32));// store plane prediction
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    For new intra pred routines
+  *
+  * \par Input:
+  *    Image par, 16x16 based intra mode
+  *
+  * \par Output:
+  *    none
+  ************************************************************************
+  */
+ int dct_luma_16x16(int new_intra_mode)
+ {
+   //int qp_const;
+   int i,j;
+   int ii,jj;
+   int jdiv, jmod;
+   int M1[16][16];
+   int M4[4][4];
+   int M5[4],M6[4];
+   int M0[4][4][4][4];
+   int run,scan_pos,coeff_ctr,level;
+   int qp_per,qp_rem,q_bits;
+   int ac_coef = 0;
+ 
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   short is_field_mode = (img->field_picture || ( img->MbaffFrameFlag && currMB->mb_field));
+ 
+   int   b8, b4;
+   int*  DCLevel = img->cofDC[0][0];
+   int*  DCRun   = img->cofDC[0][1];
+   int*  ACLevel;
+   int*  ACRun;
+   int **levelscale,**leveloffset;
+   int **invlevelscale;
+   Boolean lossless_qpprime = ((currMB->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1);
+ 
+   qp_per    = (currMB->qp + img->bitdepth_luma_qp_scale - MIN_QP)/6;
+   qp_rem    = (currMB->qp + img->bitdepth_luma_qp_scale - MIN_QP)%6;
+   q_bits    = Q_BITS+qp_per;
+   levelscale    = LevelScale4x4Luma[1][qp_rem];
+   leveloffset   = LevelOffset4x4Luma[1][qp_per];
+   invlevelscale = InvLevelScale4x4Luma[1][qp_rem];
+ 
+ 
+   for (j=0;j<16;j++)
+   {
+     jdiv = j >> 2;
+     jmod = j & 0x03;
+     jj = img->opix_y+j;
+     for (i=0;i<16;i++)
+     {
+       // Residue Color Transform
+       if(!img->residue_transform_flag)
+         M1[j][i]=imgY_org[jj][img->opix_x+i]-img->mprr_2[new_intra_mode][j][i];
+       else
+         M1[j][i]=img->m7[j][i];
+ 
+       M0[jdiv][i >> 2][jmod][i & 0x03]=M1[j][i];
+     }
+   }
+ 
+   for (jj=0;jj<4 && !lossless_qpprime;jj++)
+   {
+     for (ii=0;ii<4;ii++)
+     {
+       for (j=0;j<4;j++)
+       {
+         M5[0] = M0[jj][ii][j][0] + M0[jj][ii][j][3];
+         M5[1] = M0[jj][ii][j][1] + M0[jj][ii][j][2];
+         M5[2] = M0[jj][ii][j][1] - M0[jj][ii][j][2];
+         M5[3] = M0[jj][ii][j][0] - M0[jj][ii][j][3];
+ 
+         M4[j][0] = M5[0]   + M5[1];
+         M4[j][2] = M5[0]   - M5[1];
+         M4[j][1] = M5[3]*2 + M5[2];
+         M4[j][3] = M5[3]   - M5[2]*2;
+       }
+       // vertical
+       for (i=0;i<4;i++)
+       {
+         M5[0] = M4[0][i] + M4[3][i];
+         M5[1] = M4[1][i] + M4[2][i];
+         M5[2] = M4[1][i] - M4[2][i];
+         M5[3] = M4[0][i] - M4[3][i];
+         
+         M0[jj][ii][0][i] = M5[0]   + M5[1];
+         M0[jj][ii][2][i] = M5[0]   - M5[1];
+         M0[jj][ii][1][i] = M5[3]*2 + M5[2];
+         M0[jj][ii][3][i] = M5[3]   - M5[2]*2;
+       }
+     }
+   }
+ 
+   // pick out DC coeff
+ 
+   for (j=0;j<4;j++)
+   {
+     for (i=0;i<4;i++)
+       M4[j][i]= M0[j][i][0][0];
+   }
+ 
+   if (!lossless_qpprime)
+   {
+     for (j=0;j<4;j++)
+     {
+       M5[0] = M4[j][0]+M4[j][3];
+       M5[1] = M4[j][1]+M4[j][2];
+       M5[2] = M4[j][1]-M4[j][2];
+       M5[3] = M4[j][0]-M4[j][3];
+       
+       M4[j][0] = M5[0]+M5[1];
+       M4[j][2] = M5[0]-M5[1];
+       M4[j][1] = M5[3]+M5[2];
+       M4[j][3] = M5[3]-M5[2];
+     }
+     
+     // vertical
+     
+     for (i=0;i<4;i++)
+     {    
+       M5[0] = M4[0][i]+M4[3][i];
+       M5[1] = M4[1][i]+M4[2][i];
+       M5[2] = M4[1][i]-M4[2][i];
+       M5[3] = M4[0][i]-M4[3][i];
+       
+       M4[0][i]=(M5[0]+M5[1])>>1;
+       M4[2][i]=(M5[0]-M5[1])>>1;
+       M4[1][i]=(M5[3]+M5[2])>>1;
+       M4[3][i]=(M5[3]-M5[2])>>1;
+     }
+   }
+   // quant
+ 
+   run=-1;
+   scan_pos=0;
+ 
+   for (coeff_ctr=0;coeff_ctr<16;coeff_ctr++)
+   {
+     if (is_field_mode) 
+     {  // Alternate scan for field coding
+         i=FIELD_SCAN[coeff_ctr][0];
+         j=FIELD_SCAN[coeff_ctr][1];
+     }
+     else 
+     {
+         i=SNGL_SCAN[coeff_ctr][0];
+         j=SNGL_SCAN[coeff_ctr][1];
+     }
+ 
+     run++;
+ 
+     if(lossless_qpprime)
+       level= absm(M4[j][i]);
+     else
+       level= (absm(M4[j][i]) * levelscale[0][0] + (leveloffset[0][0]<<1)) >> (q_bits+1);
+ 
+     if (input->symbol_mode == UVLC && img->qp < 10) 
+     {
+       if (level > CAVLC_LEVEL_LIMIT) 
+         level = CAVLC_LEVEL_LIMIT;
+     }
+ 
+     if (level != 0)
+     {
+       DCLevel[scan_pos] = sign(level,M4[j][i]);
+       DCRun  [scan_pos] = run;
+       ++scan_pos;
+       run=-1;
+     }
+     if(!lossless_qpprime)
+       M4[j][i]=sign(level,M4[j][i]);
+   }
+   DCLevel[scan_pos]=0;
+ 
+   // invers DC transform
+   for (j=0;j<4 && !lossless_qpprime;j++)
+   {
+     M6[0]=M4[j][0]+M4[j][2];
+     M6[1]=M4[j][0]-M4[j][2];
+     M6[2]=M4[j][1]-M4[j][3];
+     M6[3]=M4[j][1]+M4[j][3];
+ 
+     M4[j][0] = M6[0]+M6[3];
+     M4[j][1] = M6[1]+M6[2];
+     M4[j][2] = M6[1]-M6[2];
+     M4[j][3] = M6[0]-M6[3];
+   }
+ 
+   for (i=0;i<4 && !lossless_qpprime;i++)
+   {
+     
+     M6[0]=M4[0][i]+M4[2][i];
+     M6[1]=M4[0][i]-M4[2][i];
+     M6[2]=M4[1][i]-M4[3][i];
+     M6[3]=M4[1][i]+M4[3][i];
+     
+     if(qp_per<6)
+     {
+       M0[0][i][0][0] = ((M6[0]+M6[3])*invlevelscale[0][0]+(1<<(5-qp_per)))>>(6-qp_per);
+       M0[1][i][0][0] = ((M6[1]+M6[2])*invlevelscale[0][0]+(1<<(5-qp_per)))>>(6-qp_per);
+       M0[2][i][0][0] = ((M6[1]-M6[2])*invlevelscale[0][0]+(1<<(5-qp_per)))>>(6-qp_per);
+       M0[3][i][0][0] = ((M6[0]-M6[3])*invlevelscale[0][0]+(1<<(5-qp_per)))>>(6-qp_per);
+     }
+     else
+     {
+       M0[0][i][0][0] = ((M6[0]+M6[3])*invlevelscale[0][0])<<(qp_per-6);
+       M0[1][i][0][0] = ((M6[1]+M6[2])*invlevelscale[0][0])<<(qp_per-6);
+       M0[2][i][0][0] = ((M6[1]-M6[2])*invlevelscale[0][0])<<(qp_per-6);
+       M0[3][i][0][0] = ((M6[0]-M6[3])*invlevelscale[0][0])<<(qp_per-6);
+     }   
+   }
+ 
+   // AC inverse trans/quant for MB
+   for (jj=0;jj<4;jj++)
+   {
+     for (ii=0;ii<4;ii++)
+     {
+       for (j=0;j<4;j++)
+       {
+         memcpy(M4[j],M0[jj][ii][j], BLOCK_SIZE * sizeof(int));
+       }
+ 
+       run      = -1;
+       scan_pos =  0;
+       b8       = 2*(jj >> 1) + (ii >> 1);
+       b4       = 2*(jj & 0x01) + (ii & 0x01);
+       ACLevel  = img->cofAC [b8][b4][0];
+       ACRun    = img->cofAC [b8][b4][1];
+ 
+       for (coeff_ctr=1;coeff_ctr<16;coeff_ctr++) // set in AC coeff
+       {
+ 
+         if (is_field_mode) 
+         {  // Alternate scan for field coding
+           i=FIELD_SCAN[coeff_ctr][0];
+           j=FIELD_SCAN[coeff_ctr][1];
+         }
+         else 
+         {
+           i=SNGL_SCAN[coeff_ctr][0];
+           j=SNGL_SCAN[coeff_ctr][1];
+         }
+         run++;
+ 
+         if(lossless_qpprime)
+           level= absm( M4[j][i]);
+         else          
+           level= ( absm( M4[j][i]) * levelscale[i][j] + leveloffset[i][j]) >> q_bits;
+ 
+         if (img->AdaptiveRounding)
+         {
+           if (lossless_qpprime || level == 0 )
+           {
+             img->fadjust4x4[2][jj*BLOCK_SIZE+j][ii*BLOCK_SIZE+i] = 0;
+           }
+           else
+           {
+             img->fadjust4x4[2][jj*BLOCK_SIZE+j][ii*BLOCK_SIZE+i] = 
+               (AdaptRndWeight * (absm(M4[j][i]) * levelscale[i][j] - (level << q_bits)) + (1<< (q_bits))) >> (q_bits + 1);
+           }            
+         }
+ 
+         if (level != 0)
+         {
+           ac_coef = 15;
+           ACLevel[scan_pos] = sign(level,M4[j][i]);
+           ACRun  [scan_pos] = run;
+           ++scan_pos;
+           run=-1;
+         }
+         
+         if(!lossless_qpprime)
+         {
+           level=sign(level, M4[j][i]);
+           if(qp_per<4)
+             M4[j][i]=(level*invlevelscale[i][j]+(1<<(3-qp_per)))>>(4-qp_per);
+           else
+             M4[j][i]=(level*invlevelscale[i][j])<<(qp_per-4);
+         }
+       }
+       ACLevel[scan_pos] = 0;
+ 
+ 
+       // IDCT horizontal
+       for (j=0;j<4 && !lossless_qpprime;j++)
+       {
+         M6[0] = M4[j][0]     +  M4[j][2];
+         M6[1] = M4[j][0]     -  M4[j][2];
+         M6[2] =(M4[j][1]>>1) -  M4[j][3];
+         M6[3] = M4[j][1]     + (M4[j][3]>>1);
+         
+         M4[j][0] = M6[0] + M6[3];
+         M4[j][1] = M6[1] + M6[2];
+         M4[j][2] = M6[1] - M6[2];
+         M4[j][3] = M6[0] - M6[3];
+       }
+ 
+       // vert
+       for (i=0;i<4 && !lossless_qpprime;i++)
+       {
+         M6[0]= M4[0][i]     +  M4[2][i];
+         M6[1]= M4[0][i]     -  M4[2][i];
+         M6[2]=(M4[1][i]>>1) -  M4[3][i];
+         M6[3]= M4[1][i]     + (M4[3][i]>>1);
+         
+         M0[jj][ii][0][i] = M6[0] + M6[3];
+         M0[jj][ii][1][i] = M6[1] + M6[2];
+         M0[jj][ii][2][i] = M6[1] - M6[2];
+         M0[jj][ii][3][i] = M6[0] - M6[3];
+       }
+     }
+   }
+ 
+   // Residue Color Transform
+   if(!img->residue_transform_flag)
+   {
+     for (jj=0;jj<BLOCK_MULTIPLE; jj++)
+       for (ii=0;ii<BLOCK_MULTIPLE; ii++)
+         for (j=0;j<BLOCK_SIZE;j++)
+         {
+           memcpy(&M1[jj*BLOCK_SIZE + j][ii*BLOCK_SIZE], M0[jj][ii][j], BLOCK_SIZE * sizeof(int));
+         }
+   }
+   else
+   {
+     if(lossless_qpprime)
+     {
+       for (j=0;j<MB_BLOCK_SIZE;j++)    
+       {
+         jdiv = j >> 2;
+         jmod = j & 0x03;
+         for (i=0;i<MB_BLOCK_SIZE;i++)
+           img->m7[j][i]=M0[jdiv][i >> 2][jmod][i & 0x03];
+       }
+     }
+     else
+     {
+       for (j=0;j<MB_BLOCK_SIZE;j++)    
+       {
+         jdiv = j >> 2;
+         jmod = j & 0x03;
+         for (i=0;i<MB_BLOCK_SIZE;i++)
+           img->m7[j][i]=((M0[jdiv][i >> 2][jmod][i & 0x03]+DQ_ROUND)>>DQ_BITS);
+       }
+     }
+   }
+ 
+   if(!img->residue_transform_flag)
+   {
+     if(lossless_qpprime)
+     {
+       for (j=0;j<16;j++)
+       {
+         jj = img->pix_y+j;
+         for (i=0;i<16;i++)
+             enc_picture->imgY[jj][img->pix_x+i]=(imgpel)(M1[j][i]+img->mprr_2[new_intra_mode][j][i]);
+       }
+     }
+     else
+     {
+       for (j=0;j<16;j++)
+       {
+         jj = img->pix_y+j;
+         for (i=0;i<16;i++)
+           enc_picture->imgY[jj][img->pix_x+i]=(imgpel)clip1a((M1[j][i]+((long)img->mprr_2[new_intra_mode][j][i]<<DQ_BITS)+DQ_ROUND)>>DQ_BITS);
+       }
+     }
+   }
+   return ac_coef;
+ }
+ 
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    The routine performs transform,quantization,inverse transform, adds the diff.
+ *    to the prediction and writes the result to the decoded luma frame. Includes the
+ *    RD constrained quantization also.
+ *
+ * \par Input:
+ *    block_x,block_y: Block position inside a macro block (0,4,8,12).
+ *
+ * \par Output_
+ *    nonzero: 0 if no levels are nonzero.  1 if there are nonzero levels.             \n
+ *    coeff_cost: Counter for nonzero coefficients, used to discard expensive levels.
+ ************************************************************************
+ */
+ int dct_luma(int block_x,int block_y,int *coeff_cost, int intra)
+ {
+   int sign(int a,int b);
+ 
+   int i,j,ilev, m4[4][4], m5[4],m6[4],coeff_ctr;
+   int ii;
+   //int qp_const;
+   int level,scan_pos,run;
+   int nonzero;
+   int qp_per,qp_rem,q_bits;
+ 
+   int   pos_x   = block_x >> BLOCK_SHIFT;
+   int   pos_y   = block_y >> BLOCK_SHIFT;
+   int   b8      = 2*(pos_y >> 1) + (pos_x >> 1);
+   int   b4      = 2*(pos_y & 0x01) + (pos_x & 0x01);
+   int*  ACLevel = img->cofAC[b8][b4][0];
+   int*  ACRun   = img->cofAC[b8][b4][1];
+   short pix_y;
+ 
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   short is_field_mode = (img->field_picture || ( img->MbaffFrameFlag && currMB->mb_field));
+ 
+   Boolean lossless_qpprime = ((currMB->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1);
+   int **levelscale,**leveloffset;
+   int **invlevelscale;
+ 
+   qp_per    = (currMB->qp + img->bitdepth_luma_qp_scale - MIN_QP)/6; 
+   qp_rem    = (currMB->qp + img->bitdepth_luma_qp_scale - MIN_QP)%6; 
+   q_bits    = Q_BITS+qp_per;
+ 
+   levelscale    = LevelScale4x4Luma[intra][qp_rem];
+   leveloffset   = LevelOffset4x4Luma[intra][qp_per];
+   invlevelscale = InvLevelScale4x4Luma[intra][qp_rem];
+ 
+   //  Horizontal transform
+   if (!lossless_qpprime)
+   {
+     for (j=0; j < BLOCK_SIZE; j++)
+     {
+       m5[0] = img->m7[j][0]+img->m7[j][3];
+       m5[1] = img->m7[j][1]+img->m7[j][2];
+       m5[2] = img->m7[j][1]-img->m7[j][2];
+       m5[3] = img->m7[j][0]-img->m7[j][3];
+       
+       m4[j][0] = m5[0]   + m5[1];
+       m4[j][2] = m5[0]   - m5[1];
+       m4[j][1] = m5[3]*2 + m5[2];
+       m4[j][3] = m5[3]   - m5[2]*2;
+     }
+     
+     //  Vertical transform
+     for (i=0; i < BLOCK_SIZE; i++)
+     {    
+       m5[0] = m4[0][i] + m4[3][i];
+       m5[1] = m4[1][i] + m4[2][i];
+       m5[2] = m4[1][i] - m4[2][i];
+       m5[3] = m4[0][i] - m4[3][i];
+       
+       m4[0][i] = m5[0]   + m5[1];
+       m4[2][i] = m5[0]   - m5[1];
+       m4[1][i] = m5[3]*2 + m5[2];
+       m4[3][i] = m5[3]   - m5[2]*2;
+     }
+   }
+   // Quant
+ 
+   nonzero=FALSE;
+ 
+   run=-1;
+   scan_pos=0;
+   
+   for (coeff_ctr=0;coeff_ctr < 16;coeff_ctr++)
+   {
+ 
+     if (is_field_mode) 
+     {  
+       // Alternate scan for field coding
+       i=FIELD_SCAN[coeff_ctr][0];
+       j=FIELD_SCAN[coeff_ctr][1];
+     }
+     else 
+     {
+       i=SNGL_SCAN[coeff_ctr][0];
+       j=SNGL_SCAN[coeff_ctr][1];
+     }
+     
+     run++;
+     ilev=0;
+     
+     if(lossless_qpprime)
+       level = absm (img->m7[j][i]);
+     else
+       level = (absm (m4[j][i]) * levelscale[i][j] + leveloffset[i][j]) >> q_bits;
+ 
+     if (img->AdaptiveRounding)
+     {
+       if (lossless_qpprime || level == 0 )
+       {
+         img->fadjust4x4[intra][block_y+j][block_x+i] = 0;
+       }
+       else 
+       {
+         img->fadjust4x4[intra][block_y+j][block_x+i] = 
+           (AdaptRndWeight * (absm(m4[j][i]) * levelscale[i][j] - (level << q_bits)) + (1<< (q_bits))) >> (q_bits + 1);         
+       }
+     }
+ 
+     if (level != 0)
+     {
+       nonzero=TRUE;
+ 
+       *coeff_cost += (level > 1 || lossless_qpprime) ? MAX_VALUE : COEFF_COST[input->disthres][run];
+ 
+       if(lossless_qpprime)
+         ACLevel[scan_pos] = sign(level,img->m7[j][i]);
+       else
+         ACLevel[scan_pos] = sign(level,m4[j][i]);
+ 
+       ACRun  [scan_pos] = run;
+       ++scan_pos;
+       run=-1;                     // reset zero level counter
+ 
+       level=sign(level, m4[j][i]);
+ 
+       if(lossless_qpprime)
+       {
+         ilev=level;
+       }
+       else if(qp_per<4)
+       {
+         ilev=(level*invlevelscale[i][j]+(1<<(3-qp_per)))>>(4-qp_per);
+       }
+       else
+       {
+         ilev=(level*invlevelscale[i][j])<<(qp_per-4);
+       }
+     }
+     if(!lossless_qpprime)
+       m4[j][i]=ilev;
+   }
+ 
+   ACLevel[scan_pos] = 0;  
+   
+   //     IDCT.
+   //     horizontal
+ 
+   if (!lossless_qpprime)
+   {
+     for (j=0; j < BLOCK_SIZE; j++)
+     {
+       m6[0]=(m4[j][0]     +  m4[j][2]);
+       m6[1]=(m4[j][0]     -  m4[j][2]);
+       m6[2]=(m4[j][1]>>1) -  m4[j][3];
+       m6[3]= m4[j][1]     + (m4[j][3]>>1);
+       
+       m4[j][0] = m6[0] + m6[3];
+       m4[j][1] = m6[1] + m6[2];
+       m4[j][2] = m6[1] - m6[2];
+       m4[j][3] = m6[0] - m6[3];
+     }
+     
+     //  vertical
+     for (i=0; i < BLOCK_SIZE; i++)
+     {
+       
+       m6[0]=(m4[0][i]     +  m4[2][i]);
+       m6[1]=(m4[0][i]     -  m4[2][i]);
+       m6[2]=(m4[1][i]>>1) -  m4[3][i];
+       m6[3]= m4[1][i]     + (m4[3][i]>>1);
+       
+       ii = i + block_x;
+       
+       if (!img->residue_transform_flag)
+       {
+         img->m7[0][i] = min(img->max_imgpel_value,max(0,(m6[0]+m6[3]+((long)img->mpr[0 + block_y][ii] << DQ_BITS)+DQ_ROUND)>>DQ_BITS));
+         img->m7[1][i] = min(img->max_imgpel_value,max(0,(m6[1]+m6[2]+((long)img->mpr[1 + block_y][ii] << DQ_BITS)+DQ_ROUND)>>DQ_BITS));
+         img->m7[2][i] = min(img->max_imgpel_value,max(0,(m6[1]-m6[2]+((long)img->mpr[2 + block_y][ii] << DQ_BITS)+DQ_ROUND)>>DQ_BITS));
+         img->m7[3][i] = min(img->max_imgpel_value,max(0,(m6[0]-m6[3]+((long)img->mpr[3 + block_y][ii] << DQ_BITS)+DQ_ROUND)>>DQ_BITS));
+       } 
+       else 
+       {
+         if(lossless_qpprime)
+         {
+           img->m7[0][i] = m6[0]+m6[3];
+           img->m7[1][i] = m6[1]+m6[2];
+           img->m7[2][i] = m6[1]-m6[2];
+           img->m7[3][i] = m6[0]-m6[3];
+         }
+         else
+         {
+           img->m7[0][i] =(m6[0]+m6[3]+DQ_ROUND)>>DQ_BITS;
+           img->m7[1][i] =(m6[1]+m6[2]+DQ_ROUND)>>DQ_BITS;
+           img->m7[2][i] =(m6[1]-m6[2]+DQ_ROUND)>>DQ_BITS;
+           img->m7[3][i] =(m6[0]-m6[3]+DQ_ROUND)>>DQ_BITS;
+         }
+       }
+     }
+   }
+   //  Decoded block moved to frame memory
+   if (!img->residue_transform_flag)
+   {
+     if(lossless_qpprime)
+     {
+       for (j=0; j < BLOCK_SIZE; j++)
+       {
+         pix_y = img->pix_y+block_y+j;
+         for (i=0; i < BLOCK_SIZE; i++)
+         {
+          enc_picture->imgY[pix_y][img->pix_x+block_x+i]=img->m7[j][i]+img->mpr[j+block_y][i+block_x];
+         }
+       }
+     }
+     else
+     {
+       for (j=0; j < BLOCK_SIZE; j++)
+       {
+         pix_y = img->pix_y+block_y+j;
+         for (i=0; i < BLOCK_SIZE; i++)
+         {
+           enc_picture->imgY[pix_y][img->pix_x+block_x+i]=img->m7[j][i];
+         }
+       }
+     }
+     
+   }
+   return nonzero;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Transform,quantization,inverse transform for chroma.
+  *    The main reason why this is done in a separate routine is the
+  *    additional 2x2 transform of DC-coeffs. This routine is called
+  *    ones for each of the chroma components.
+  *
+  * \par Input:
+  *    uv    : Make difference between the U and V chroma component  \n
+  *    cr_cbp: chroma coded block pattern
+  *
+  * \par Output:
+  *    cr_cbp: Updated chroma coded block pattern.
+  ************************************************************************
+  */
+ int dct_chroma(int uv,int cr_cbp)
+ {
+   int i,j,i1,j2,ilev,n2,n1,j1,mb_y,coeff_ctr,level ,scan_pos,run;
+   int m1[BLOCK_SIZE],m5[BLOCK_SIZE],m6[BLOCK_SIZE];
+   int coeff_cost;
+   int cr_cbp_tmp;
+   int DCcoded=0 ;
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+  
+   int qp_per,qp_rem,q_bits;
+   int qp_c;
+ 
+   int   b4;
+   int*  DCLevel = img->cofDC[uv+1][0];
+   int*  DCRun   = img->cofDC[uv+1][1];
+   int*  ACLevel;
+   int*  ACRun;
+   int   intra = IS_INTRA (currMB);
+   int   uv_scale = uv*(img->num_blk8x8_uv >> 1);
+ 
+   //FRExt
+   int64 cbpblk_pattern[4]={0, 0xf0000, 0xff0000, 0xffff0000};
+   int yuv = img->yuv_format;
+   int b8;
+   int m3[4][4];
+   int m4[4][4];
+   int qp_per_dc = 0;
+   int qp_rem_dc = 0;
+   int q_bits_422 = 0;	
+   int ***levelscale, ***leveloffset;
+   int ***invlevelscale;
+   short pix_c_x, pix_c_y;
+   short is_field_mode = (img->field_picture || ( img->MbaffFrameFlag && currMB->mb_field));
+ 
+   Boolean lossless_qpprime = ((currMB->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1);
+ 
+   qp_c      = currMB->qp + img->chroma_qp_offset[uv];
+   qp_c      = Clip3(-img->bitdepth_chroma_qp_scale,51,qp_c);
+   qp_c      = (qp_c < 0)? qp_c : QP_SCALE_CR[qp_c - MIN_QP];
+ 
+   qp_per    = (qp_c + img->bitdepth_chroma_qp_scale)/6;              
+   qp_rem    = (qp_c + img->bitdepth_chroma_qp_scale)%6;              
+   q_bits    = Q_BITS+qp_per;
+ 
+   levelscale = LevelScale4x4Chroma[uv][intra];
+   leveloffset = LevelOffset4x4Chroma[uv][intra];
+   invlevelscale = InvLevelScale4x4Chroma[uv][intra];
+ 
+   if (img->yuv_format == YUV422)
+   {
+     //for YUV422 only
+     qp_per_dc = (qp_c + 3 + img->bitdepth_chroma_qp_scale)/6;
+     qp_rem_dc = (qp_c + 3 + img->bitdepth_chroma_qp_scale)%6;
+     
+     q_bits_422 = Q_BITS+qp_per_dc;  
+   }
+ 
+   
+   //============= dct transform ===============	
+   for (n2=0; n2 < img->mb_cr_size_y; n2 += BLOCK_SIZE)
+   {
+     for (n1=0; n1 < img->mb_cr_size_x; n1 += BLOCK_SIZE)
+     {
+ 
+       //  Horizontal transform.
+       for (j=0; j < BLOCK_SIZE && !lossless_qpprime; j++)
+       {
+         mb_y=n2+j;
+         
+         m5[0]=img->m7[mb_y][n1  ]+img->m7[mb_y][n1+3];
+         m5[1]=img->m7[mb_y][n1+1]+img->m7[mb_y][n1+2];
+         m5[2]=img->m7[mb_y][n1+1]-img->m7[mb_y][n1+2];
+         m5[3]=img->m7[mb_y][n1  ]-img->m7[mb_y][n1+3];
+         
+         img->m7[mb_y][n1  ] = (m5[0]   + m5[1]);
+         img->m7[mb_y][n1+2] = (m5[0]   - m5[1]);
+         img->m7[mb_y][n1+1] =  m5[3]*2 + m5[2];
+         img->m7[mb_y][n1+3] =  m5[3]   - m5[2]*2;
+       }
+ 
+       //  Vertical transform.
+ 
+       for (i=0; i < BLOCK_SIZE && !lossless_qpprime; i++)
+       {
+         j1=n1+i;
+         m5[0] = img->m7[n2  ][j1] + img->m7[n2+3][j1];
+         m5[1] = img->m7[n2+1][j1] + img->m7[n2+2][j1];
+         m5[2] = img->m7[n2+1][j1] - img->m7[n2+2][j1];
+         m5[3] = img->m7[n2  ][j1] - img->m7[n2+3][j1];
+ 
+         img->m7[n2+0][j1] = (m5[0]   + m5[1]);
+         img->m7[n2+2][j1] = (m5[0]   - m5[1]);
+         img->m7[n2+1][j1] =  m5[3]*2 + m5[2];
+         img->m7[n2+3][j1] =  m5[3]   - m5[2]*2;
+       }
+     }
+   }
+   
+   if (yuv == YUV420)
+   {
+     //================== CHROMA DC YUV420 ===================
+     //     2X2 transform of DC coeffs.
+     if(lossless_qpprime)
+     {
+       m1[0]=img->m7[0][0];
+       m1[1]=img->m7[0][4];
+       m1[2]=img->m7[4][0];
+       m1[3]=img->m7[4][4];
+     }
+     else 
+     {
+       m1[0]=(img->m7[0][0] + img->m7[0][4] + img->m7[4][0] + img->m7[4][4]);
+       m1[1]=(img->m7[0][0] - img->m7[0][4] + img->m7[4][0] - img->m7[4][4]);
+       m1[2]=(img->m7[0][0] + img->m7[0][4] - img->m7[4][0] - img->m7[4][4]);
+       m1[3]=(img->m7[0][0] - img->m7[0][4] - img->m7[4][0] + img->m7[4][4]);
+     }
+     
+     //     Quant of chroma 2X2 coeffs.
+     run=-1;
+     scan_pos=0;
+     
+     for (coeff_ctr=0; coeff_ctr < 4; coeff_ctr++)
+     {
+       run++;
+       ilev=0;
+       
+       if(lossless_qpprime)
+         level =absm(m1[coeff_ctr]);
+       else 
+         level =(absm(m1[coeff_ctr]) * levelscale[qp_rem][0][0] + (leveloffset[qp_per][0][0]<<1)) >> (q_bits+1);
+       
+       if (input->symbol_mode == UVLC && img->qp < 4) 
+       {
+         if (level > CAVLC_LEVEL_LIMIT) 
+           level = CAVLC_LEVEL_LIMIT;
+       }
+       
+       if (level  != 0)
+       {
+         currMB->cbp_blk |= 0xf0000 << (uv << 2) ;    // if one of the 2x2-DC levels is != 0 set the
+         cr_cbp=max(1,cr_cbp);                     // coded-bit all 4 4x4 blocks (bit 16-19 or 20-23)
+         DCcoded = 1 ;
+         DCLevel[scan_pos] = sign(level ,m1[coeff_ctr]);
+         DCRun  [scan_pos] = run;
+         scan_pos++;
+         run=-1;
+         
+         ilev=sign(level, m1[coeff_ctr]);
+       }
+       if(!lossless_qpprime)
+         m1[coeff_ctr]=ilev;
+     }
+     DCLevel[scan_pos] = 0;
+     
+     //  Inverse transform of 2x2 DC levels
+     if(!lossless_qpprime)
+     {
+       m5[0]=(m1[0] + m1[1] + m1[2] + m1[3]);
+       m5[1]=(m1[0] - m1[1] + m1[2] - m1[3]);
+       m5[2]=(m1[0] + m1[1] - m1[2] - m1[3]);
+       m5[3]=(m1[0] - m1[1] - m1[2] + m1[3]);
+       if(qp_per<5)
+       {
+         for(i=0; i<4; i++)
+           m1[i]=(m5[i] * invlevelscale[qp_rem][0][0])>>(5-qp_per);
+       }
+       else
+       {
+         for(i=0; i<4; i++)
+           m1[i]=(m5[i] * invlevelscale[qp_rem][0][0])<<(qp_per-5);
+       }
+ 
+       img->m7[0][0] = m1[0];
+       img->m7[0][4] = m1[1];
+       img->m7[4][0] = m1[2];
+       img->m7[4][4] = m1[3];
+     }
+   }
+   else if(yuv == YUV422)
+   {
+     //================== CHROMA DC YUV422 ===================
+     //transform DC coeff
+     //horizontal
+     
+     //pick out DC coeff
+     for (j=0; j < img->mb_cr_size_y; j+=BLOCK_SIZE)
+     {
+       for (i=0; i < img->mb_cr_size_x; i+=BLOCK_SIZE)
+         m3[i>>2][j>>2]= img->m7[j][i];
+     } 
+     //horizontal
+     if(!lossless_qpprime)
+     {
+       m4[0][0] = m3[0][0] + m3[1][0];
+       m4[0][1] = m3[0][1] + m3[1][1];
+       m4[0][2] = m3[0][2] + m3[1][2];
+       m4[0][3] = m3[0][3] + m3[1][3];
+       
+       m4[1][0] = m3[0][0] - m3[1][0];
+       m4[1][1] = m3[0][1] - m3[1][1];
+       m4[1][2] = m3[0][2] - m3[1][2];
+       m4[1][3] = m3[0][3] - m3[1][3];
+       
+       // vertical
+       for (i=0;i<2;i++)
+       {
+         m5[0] = m4[i][0] + m4[i][3];
+         m5[1] = m4[i][1] + m4[i][2];
+         m5[2] = m4[i][1] - m4[i][2];
+         m5[3] = m4[i][0] - m4[i][3];
+         
+         m4[i][0] = (m5[0] + m5[1]);
+         m4[i][2] = (m5[0] - m5[1]);
+         m4[i][1] = (m5[3] + m5[2]);
+         m4[i][3] = (m5[3] - m5[2]);
+       }
+     }
+     
+     run=-1;
+     scan_pos=0;
+     
+     //quant of chroma DC-coeffs
+     for (coeff_ctr=0;coeff_ctr<8;coeff_ctr++)
+     {
+       i=SCAN_YUV422[coeff_ctr][0];
+       j=SCAN_YUV422[coeff_ctr][1];
+       
+       run++;
+ 
+       if(lossless_qpprime)
+       {
+         level = absm(m3[i][j]);
+         m4[i][j]=m3[i][j];
+       }
+       else 
+         level =(absm(m4[i][j]) * levelscale[qp_rem_dc][0][0] + (leveloffset[qp_per_dc][0][0]*2)) >> (q_bits_422+1);
+ 
+       if (level != 0)
+       {
+         //YUV422
+         currMB->cbp_blk |= 0xff0000 << (uv << 3) ;   // if one of the DC levels is != 0 set the
+         cr_cbp=max(1,cr_cbp);												   // coded-bit all 4 4x4 blocks (bit 16-31 or 32-47) //YUV444
+         DCcoded = 1 ;
+         
+         DCLevel[scan_pos] = sign(level,m4[i][j]);
+         DCRun  [scan_pos] = run;
+         ++scan_pos;
+         run=-1;
+       }
+       if(!lossless_qpprime)
+         m3[i][j]=sign(level,m4[i][j]);
+     }
+     DCLevel[scan_pos]=0;
+ 
+     //inverse DC transform
+     //horizontal
+     if(!lossless_qpprime)
+     {
+       m4[0][0] = m3[0][0] + m3[1][0];
+       m4[0][1] = m3[0][1] + m3[1][1];
+       m4[0][2] = m3[0][2] + m3[1][2];
+       m4[0][3] = m3[0][3] + m3[1][3];
+       
+       m4[1][0] = m3[0][0] - m3[1][0];
+       m4[1][1] = m3[0][1] - m3[1][1];
+       m4[1][2] = m3[0][2] - m3[1][2];
+       m4[1][3] = m3[0][3] - m3[1][3];      
+       
+       // vertical
+       for (i=0;i<2;i++)
+       {       
+         m6[0]=m4[i][0]+m4[i][2];
+         m6[1]=m4[i][0]-m4[i][2];
+         m6[2]=m4[i][1]-m4[i][3];
+         m6[3]=m4[i][1]+m4[i][3];
+         
+         if(qp_per_dc<4)
+         {          
+           img->m7[0 ][i*4]=((((m6[0]+m6[3])*invlevelscale[qp_rem_dc][0][0]+(1<<(3-qp_per_dc)))>>(4-qp_per_dc))+2)>>2;
+           img->m7[4 ][i*4]=((((m6[1]+m6[2])*invlevelscale[qp_rem_dc][0][0]+(1<<(3-qp_per_dc)))>>(4-qp_per_dc))+2)>>2;
+           img->m7[8 ][i*4]=((((m6[1]-m6[2])*invlevelscale[qp_rem_dc][0][0]+(1<<(3-qp_per_dc)))>>(4-qp_per_dc))+2)>>2;
+           img->m7[12][i*4]=((((m6[0]-m6[3])*invlevelscale[qp_rem_dc][0][0]+(1<<(3-qp_per_dc)))>>(4-qp_per_dc))+2)>>2;
+         }
+         else
+         {
+           img->m7[0 ][i*4]=((((m6[0]+m6[3])*invlevelscale[qp_rem_dc][0][0])<<(qp_per_dc-4))+2)>>2;
+           img->m7[4 ][i*4]=((((m6[1]+m6[2])*invlevelscale[qp_rem_dc][0][0])<<(qp_per_dc-4))+2)>>2;
+           img->m7[8 ][i*4]=((((m6[1]-m6[2])*invlevelscale[qp_rem_dc][0][0])<<(qp_per_dc-4))+2)>>2;
+           img->m7[12][i*4]=((((m6[0]-m6[3])*invlevelscale[qp_rem_dc][0][0])<<(qp_per_dc-4))+2)>>2;
+         }
+       }//for (i=0;i<2;i++)    
+     }
+   }
+   else if(yuv == YUV444)
+   {
+     //================== CHROMA DC YUV444 ===================
+     //transform DC coeff
+     //pick out DC coeff
+     for (j=0; j < img->mb_cr_size_y; j+=BLOCK_SIZE)
+     {
+       for (i=0; i < img->mb_cr_size_x; i+=BLOCK_SIZE)
+         m4[i>>2][j>>2]= img->m7[j][i];
+     }
+     
+     //horizontal
+     for (j=0;j<4 && !lossless_qpprime;j++)
+     {
+       m5[0] = m4[0][j] + m4[3][j];
+       m5[1] = m4[1][j] + m4[2][j];
+       m5[2] = m4[1][j] - m4[2][j];
+       m5[3] = m4[0][j] - m4[3][j];
+       
+       m4[0][j]=m5[0]+m5[1];
+       m4[2][j]=m5[0]-m5[1];
+       m4[1][j]=m5[3]+m5[2];
+       m4[3][j]=m5[3]-m5[2];
+     }
+     // vertical
+     for (i=0;i<4 && !lossless_qpprime;i++)
+     {
+       m5[0] = m4[i][0] + m4[i][3];
+       m5[1] = m4[i][1] + m4[i][2];
+       m5[2] = m4[i][1] - m4[i][2];
+       m5[3] = m4[i][0] - m4[i][3];
+ 
+       m4[i][0]=(m5[0]+m5[1])>>1;
+       m4[i][2]=(m5[0]-m5[1])>>1;
+       m4[i][1]=(m5[3]+m5[2])>>1;
+       m4[i][3]=(m5[3]-m5[2])>>1;
+     }
+ 
+     run=-1;
+     scan_pos=0;
+     
+     //quant of chroma DC-coeffs
+     for (coeff_ctr=0;coeff_ctr<16;coeff_ctr++)
+     {
+       i=SNGL_SCAN[coeff_ctr][0];
+       j=SNGL_SCAN[coeff_ctr][1];
+       
+       run++;
+       
+       if(lossless_qpprime)
+         level = absm(m4[i][j]);
+       else 
+         level =(absm(m4[i][j]) * levelscale[qp_rem][0][0] + (leveloffset[qp_per][0][0]*2)) >> (q_bits+1);
+       
+       if (level != 0)
+       {
+         //YUV444
+         currMB->cbp_blk |= ((int64)0xffff0000) << (uv << 4) ;   // if one of the DC levels is != 0 set the
+         cr_cbp=max(1,cr_cbp);												   // coded-bit all 4 4x4 blocks (bit 16-31 or 32-47) //YUV444
+         DCcoded = 1 ;
+         
+         DCLevel[scan_pos] = sign(level,m4[i][j]);
+         DCRun  [scan_pos] = run;
+         ++scan_pos;
+         run=-1;
+       }
+       if(!lossless_qpprime)
+         m4[i][j]=sign(level,m4[i][j]);
+     }
+     DCLevel[scan_pos]=0;
+ 
+     // inverse DC transform
+     //horizontal
+     for (j=0;j<4 && !lossless_qpprime;j++)
+     {     
+       m6[0] = m4[0][j] + m4[2][j];
+       m6[1] = m4[0][j] - m4[2][j];
+       m6[2] = m4[1][j] - m4[3][j];
+       m6[3] = m4[1][j] + m4[3][j];
+       
+       m4[0][j] = m6[0] + m6[3];
+       m4[1][j] = m6[1] + m6[2];
+       m4[2][j] = m6[1] - m6[2];
+       m4[3][j] = m6[0] - m6[3];
+     }
+     
+     //vertical
+     for (i=0;i<4 && !lossless_qpprime;i++)
+     {
+       m6[0]=m4[i][0]+m4[i][2];
+       m6[1]=m4[i][0]-m4[i][2];
+       m6[2]=m4[i][1]-m4[i][3];
+       m6[3]=m4[i][1]+m4[i][3];
+ 
+       if(qp_per<4)
+       {
+         img->m7[0 ][i*4] = ((((m6[0] + m6[3])*invlevelscale[qp_rem][0][0]+(1<<(3-qp_per)))>>(4-qp_per))+2)>>2;
+         img->m7[4 ][i*4] = ((((m6[1] + m6[2])*invlevelscale[qp_rem][0][0]+(1<<(3-qp_per)))>>(4-qp_per))+2)>>2;
+         img->m7[8 ][i*4] = ((((m6[1] - m6[2])*invlevelscale[qp_rem][0][0]+(1<<(3-qp_per)))>>(4-qp_per))+2)>>2;
+         img->m7[12][i*4] = ((((m6[0] - m6[3])*invlevelscale[qp_rem][0][0]+(1<<(3-qp_per)))>>(4-qp_per))+2)>>2;
+       }
+       else
+       {
+         img->m7[0 ][i*4] = ((((m6[0]+m6[3])*invlevelscale[qp_rem][0][0])<<(qp_per-4))+2)>>2;
+         img->m7[4 ][i*4] = ((((m6[1]+m6[2])*invlevelscale[qp_rem][0][0])<<(qp_per-4))+2)>>2;
+         img->m7[8 ][i*4] = ((((m6[1]-m6[2])*invlevelscale[qp_rem][0][0])<<(qp_per-4))+2)>>2;
+         img->m7[12][i*4] = ((((m6[0]-m6[3])*invlevelscale[qp_rem][0][0])<<(qp_per-4))+2)>>2;
+       }
+     }
+   }
+ 
+   //     Quant of chroma AC-coeffs.
+   coeff_cost=0;
+   cr_cbp_tmp=0;
+ 
+   for (b8=0; b8 < (img->num_blk8x8_uv >> 1); b8++)
+   {
+     for (b4=0; b4 < 4; b4++)
+     {
+       n1 = hor_offset[yuv][b8][b4];
+       n2 = ver_offset[yuv][b8][b4];
+       ACLevel = img->cofAC[4+b8+uv_scale][b4][0];
+       ACRun   = img->cofAC[4+b8+uv_scale][b4][1];
+       run=-1;
+       scan_pos=0;
+ 
+       for (coeff_ctr=1; coeff_ctr < 16; coeff_ctr++)// start change rd_quant
+       {
+ 
+         if (is_field_mode) 
+         {  // Alternate scan for field coding
+           i=FIELD_SCAN[coeff_ctr][0];
+           j=FIELD_SCAN[coeff_ctr][1];
+         }
+         else 
+         {
+           i=SNGL_SCAN[coeff_ctr][0];
+           j=SNGL_SCAN[coeff_ctr][1];
+         }
+         ++run;
+         ilev=0;
+ 
+         if(lossless_qpprime)
+           level = absm(img->m7[n2+j][n1+i]);
+         else 
+           level=(absm(img->m7[n2+j][n1+i])*levelscale[qp_rem][i][j]+leveloffset[qp_per][i][j])>>q_bits;
+ 
+         if (img->AdaptiveRounding)
+         {
+           if (lossless_qpprime || level == 0 )
+           {
+             img->fadjust4x4Cr[intra][uv][n2+j][n1+i] = 0;
+           }
+           else
+           {
+             img->fadjust4x4Cr[intra][uv][n2+j][n1+i] = 
+               (AdaptRndWeight * (absm(img->m7[n2+j][n1+i]) * levelscale[qp_rem][i][j] - (level << q_bits)) + (1<< (q_bits))) >> (q_bits + 1); 
+           }          
+         }
+ 
+         if (level  != 0)
+         {
+           currMB->cbp_blk |= ((int64)1) << cbp_blk_chroma[b8 + uv_scale][b4];
+           if (level > 1 || lossless_qpprime)
+             coeff_cost += MAX_VALUE;                // set high cost, shall not be discarded
+           else
+             coeff_cost += COEFF_COST[input->disthres][run];
+ 
+           cr_cbp_tmp=2;
+           ACLevel[scan_pos] = sign(level,img->m7[n2+j][n1+i]);
+           ACRun  [scan_pos] = run;
+           ++scan_pos;
+           run=-1;
+ 
+           level=sign(level, img->m7[n2+j][n1+i]);
+           if(lossless_qpprime)
+           {
+             ilev = level;
+           }
+           else if(qp_per<4)
+           {
+             ilev=(level*invlevelscale[qp_rem][i][j]+(1<<(3-qp_per)))>>(4-qp_per);
+           }
+           else
+           {
+             ilev=(level*invlevelscale[qp_rem][i][j])<<(qp_per-4);
+           }
+         }
+         if(!lossless_qpprime)
+           img->m7[n2+j][n1+i]=ilev;
+       }
+       ACLevel[scan_pos] = 0;
+     }
+   }
+ 
+   // * reset chroma coeffs
+   if(coeff_cost < _CHROMA_COEFF_COST_ && !lossless_qpprime)
+   {
+     cr_cbp_tmp = 0 ;
+     
+     for (b8=0; b8 < (img->num_blk8x8_uv >> 1); b8++)
+     {
+       for (b4=0; b4 < 4; b4++)
+       {
+         n1 = hor_offset[yuv][b8][b4];
+         n2 = ver_offset[yuv][b8][b4];
+         ACLevel = img->cofAC[4+b8+uv_scale][b4][0];
+         ACRun   = img->cofAC[4+b8+uv_scale][b4][1];
+         if( DCcoded == 0) 
+           currMB->cbp_blk &= ~((int64)cbpblk_pattern[yuv] << (uv << (1+yuv)));  // if no chroma DC's: then reset coded-bits of this chroma subblock
+         
+         ACLevel[0] = 0;
+         for (coeff_ctr=1; coeff_ctr < 16; coeff_ctr++)// ac coeff
+         {
+ 
+           if (is_field_mode) 
+           {  // Alternate scan for field coding
+             i=FIELD_SCAN[coeff_ctr][0];
+             j=FIELD_SCAN[coeff_ctr][1];
+           }
+           else 
+           {
+             i=SNGL_SCAN[coeff_ctr][0];
+             j=SNGL_SCAN[coeff_ctr][1];
+           }
+           img->m7[n2+j][n1+i]=0;
+           ACLevel[coeff_ctr] = 0;
+         }
+       }
+     }
+   }
+ 
+   if(cr_cbp_tmp==2)   
+     cr_cbp = 2;
+   
+   //     IDCT.
+   //     Horizontal.
+   for (n2=0; n2 < img->mb_cr_size_y && !lossless_qpprime; n2 += BLOCK_SIZE)
+   {
+     for (n1=0; n1 < img->mb_cr_size_x; n1 += BLOCK_SIZE)
+     {
+       for (j=0; j < BLOCK_SIZE; j++)
+       {
+         j2 = n2 + j;
+         for (i=0; i < BLOCK_SIZE; i++)
+         {
+           m5[i]=img->m7[j2][n1+i];
+         }
+ 
+         m6[0] = (m5[0]     +  m5[2]);
+         m6[1] = (m5[0]     -  m5[2]);
+         m6[2] = (m5[1]>>1) -  m5[3];
+         m6[3] =  m5[1]     + (m5[3]>>1);
+ 
+         img->m7[j2][n1  ] = m6[0] + m6[3];
+         img->m7[j2][n1+1] = m6[1] + m6[2];
+         img->m7[j2][n1+2] = m6[1] - m6[2];
+         img->m7[j2][n1+3] = m6[0] - m6[3];
+       }
+ 
+       //     Vertical.
+       for (i=0; i < BLOCK_SIZE && !lossless_qpprime; i++)
+       {
+         i1 = n1 + i;
+         for (j=0; j < BLOCK_SIZE; j++)
+         {
+           m5[j]=img->m7[n2+j][i1];
+         }
+         m6[0]=(m5[0]+m5[2]);
+         m6[1]=(m5[0]-m5[2]);
+         m6[2]=(m5[1]>>1)-m5[3];
+         m6[3]=m5[1]+(m5[3]>>1);
+ 
+           // Residue Color Transform
+         if (!img->residue_transform_flag)
+         {
+           img->m7[n2  ][i1] = min(img->max_imgpel_value_uv,max(0,(m6[0]+m6[3]+((long)img->mpr[n2  ][i1] << DQ_BITS)+DQ_ROUND)>>DQ_BITS));
+           img->m7[n2+1][i1] = min(img->max_imgpel_value_uv,max(0,(m6[1]+m6[2]+((long)img->mpr[n2+1][i1] << DQ_BITS)+DQ_ROUND)>>DQ_BITS));
+           img->m7[n2+2][i1] = min(img->max_imgpel_value_uv,max(0,(m6[1]-m6[2]+((long)img->mpr[n2+2][i1] << DQ_BITS)+DQ_ROUND)>>DQ_BITS));
+           img->m7[n2+3][i1] = min(img->max_imgpel_value_uv,max(0,(m6[0]-m6[3]+((long)img->mpr[n2+3][i1] << DQ_BITS)+DQ_ROUND)>>DQ_BITS));
+         } 
+         else 
+         {
+           if(lossless_qpprime)
+           {
+             img->m7[n2  ][i1] = m6[0]+m6[3];
+             img->m7[n2+1][i1] = m6[1]+m6[2];
+             img->m7[n2+2][i1] = m6[1]-m6[2];
+             img->m7[n2+3][i1] = m6[0]-m6[3];
+           }
+           else
+           {
+             img->m7[n2  ][i1] = (m6[0]+m6[3]+DQ_ROUND)>>DQ_BITS;
+             img->m7[n2+1][i1] = (m6[1]+m6[2]+DQ_ROUND)>>DQ_BITS;
+             img->m7[n2+2][i1] = (m6[1]-m6[2]+DQ_ROUND)>>DQ_BITS;
+             img->m7[n2+3][i1] = (m6[0]-m6[3]+DQ_ROUND)>>DQ_BITS;
+           }
+         }
+       }
+     }
+   }
+ 
+   //  Decoded block moved to memory
+   if (!img->residue_transform_flag)
+   {
+     for (j=0; j < img->mb_cr_size_y; j++)
+     {
+       pix_c_y = img->pix_c_y+j;
+       for (i=0; i < img->mb_cr_size_x; i++)
+       {
+         pix_c_x = img->pix_c_x+i;
+         if(lossless_qpprime)
+           enc_picture->imgUV[uv][pix_c_y][pix_c_x]= img->m7[j][i]+img->mpr[j][i];
+         else
+           enc_picture->imgUV[uv][pix_c_y][pix_c_x]= img->m7[j][i];
+       }
+     }
+   }
+   return cr_cbp;
+ }
+ 
+ 
+ // Residue Color Transform
+ int dct_chroma4x4(int uv, int b8, int b4)
+ {
+   int sign(int a,int b);
+ 
+   int i,j,i1,j1,ilev,m5[4],m6[4],coeff_ctr;
+   int level,scan_pos,run;
+   int nonzeroAC;
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   int   intra = IS_INTRA (currMB);
+ 
+   int qp_per,qp_rem,q_bits;
+   int qp_c;
+ 
+   int*  ACLevel = img->cofAC[b8][b4][0];
+   int*  ACRun   = img->cofAC[b8][b4][1];
+ 
+   int **levelscale, **leveloffset;
+   int **invlevelscale;
+ 
+   Boolean lossless_qpprime = ((img->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1);
+ 
+   qp_c      = currMB->qp + img->chroma_qp_offset[uv];
+   qp_c      = (qp_c < 0)? qp_c : QP_SCALE_CR[qp_c - MIN_QP];
+ 
+   qp_per    = (qp_c + img->bitdepth_chroma_qp_scale)/6;              
+   qp_rem    = (qp_c + img->bitdepth_chroma_qp_scale)%6;              
+   q_bits    = Q_BITS+qp_per;
+ 
+   levelscale = LevelScale4x4Chroma[uv][intra][qp_rem];
+   leveloffset = LevelOffset4x4Chroma[uv][intra][qp_per];
+   invlevelscale = InvLevelScale4x4Chroma[uv][intra][qp_rem];
+ 
+   //  Horizontal transform
+   if(!lossless_qpprime)
+   for (j=0; j < BLOCK_SIZE; j++)
+   {
+     for (i=0; i < 2; i++)
+     {
+       i1=3-i;
+       m5[i]=img->m7[j][i]+img->m7[j][i1];
+       m5[i1]=img->m7[j][i]-img->m7[j][i1];
+     }
+     img->m7[j][0]=(m5[0]+m5[1]);
+     img->m7[j][2]=(m5[0]-m5[1]);
+     img->m7[j][1]=m5[3]*2+m5[2];
+     img->m7[j][3]=m5[3]-m5[2]*2;
+   }
+ 
+   //  Vertical transform
+   if(!lossless_qpprime)
+   for (i=0; i < BLOCK_SIZE; i++)
+   {
+     for (j=0; j < 2; j++)
+     {
+       j1=3-j;
+       m5[j]=img->m7[j][i]+img->m7[j1][i];
+       m5[j1]=img->m7[j][i]-img->m7[j1][i];
+     }
+     img->m7[0][i]=(m5[0]+m5[1]);
+     img->m7[2][i]=(m5[0]-m5[1]);
+     img->m7[1][i]=m5[3]*2+m5[2];
+     img->m7[3][i]=m5[3]-m5[2]*2;
+   }
+ 
+   // Quant
+ 
+   nonzeroAC=FALSE;
+ 
+   run=-1;
+   scan_pos=0;
+ 
+   if(lossless_qpprime)
+     level = absm(img->m7[0][0]);
+   else 
+     level =(absm(img->m7[0][0]) * levelscale[0][0] + leveloffset[0][0]) >> q_bits;
+ 
+   b8 -= 4*(uv+1);
+   dc_level_temp[uv][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)] = sign(level, img->m7[0][0]);
+ 
+   /* Inverse Quantization */
+   if(lossless_qpprime)
+   {
+     img->m7[0][0] = sign( level, img->m7[0][0]);
+   }
+   else
+   {
+     if(qp_per<4)
+     {
+       img->m7[0][0] = sign( ((level*invlevelscale[0][0]+(1<<(3-qp_per)))>>(4-qp_per)), img->m7[0][0]);
+     }
+     else
+     {
+       img->m7[0][0] = sign( ((level*invlevelscale[0][0])<<(qp_per-4)), img->m7[0][0]);
+     }
+   }
+ 
+   for (coeff_ctr=1;coeff_ctr < 16;coeff_ctr++)
+   {
+     i=SNGL_SCAN[coeff_ctr][0];
+     j=SNGL_SCAN[coeff_ctr][1];
+ 
+     run++;
+     ilev=0;
+ 
+     if(lossless_qpprime)
+       level = absm (img->m7[j][i]);
+     else 
+       level = (absm(img->m7[j][i])*levelscale[i][j]+leveloffset[i][j])>>q_bits;
+     
+     if (level != 0)
+     {
+       if(i||j) nonzeroAC=TRUE;
+       
+       ACLevel[scan_pos] = sign(level,img->m7[j][i]);
+       ACRun  [scan_pos] = run;
+       ++scan_pos;
+       run=-1;                     // reset zero level counter
+       
+       level=sign(level, img->m7[j][i]);
+       if(lossless_qpprime)
+       {
+         ilev=level;
+       }
+       else if(qp_per<4)
+       {
+         ilev=(level*invlevelscale[i][j]+(1<<(3-qp_per)))>>(4-qp_per);
+       }
+       else
+       {
+         ilev=(level*invlevelscale[i][j])<<(qp_per-4);
+       }
+     }
+     if(!lossless_qpprime)
+       img->m7[j][i]=ilev;
+   }
+   ACLevel[scan_pos] = 0;
+ 
+   
+   //     IDCT.
+   //     horizontal
+   if(!lossless_qpprime)
+   for (j=0; j < BLOCK_SIZE; j++)
+   {
+     for (i=0; i < BLOCK_SIZE; i++)
+     {
+       m5[i]=img->m7[j][i];
+     }
+     m6[0]=(m5[0]+m5[2]);
+     m6[1]=(m5[0]-m5[2]);
+     m6[2]=(m5[1]>>1)-m5[3];
+     m6[3]=m5[1]+(m5[3]>>1);
+ 
+     for (i=0; i < 2; i++)
+     {
+       i1=3-i;
+       img->m7[j][i]=m6[i]+m6[i1];
+       img->m7[j][i1]=m6[i]-m6[i1];
+     }
+   }
+ 
+   //  vertical
+   if(!lossless_qpprime)
+   for (i=0; i < BLOCK_SIZE; i++)
+   {
+     for (j=0; j < BLOCK_SIZE; j++)
+     {
+       m5[j]=img->m7[j][i];
+     }
+     m6[0]=(m5[0]+m5[2]);
+     m6[1]=(m5[0]-m5[2]);
+     m6[2]=(m5[1]>>1)-m5[3];
+     m6[3]=m5[1]+(m5[3]>>1);
+ 
+     for (j=0; j < 2; j++)
+     {
+       j1=3-j;
+       img->m7[j][i] =(m6[j]+m6[j1]+DQ_ROUND)>>DQ_BITS;
+       img->m7[j1][i]=(m6[j]-m6[j1]+DQ_ROUND)>>DQ_BITS;
+     }
+   }
+ 
+   return nonzeroAC;
+ }
+ 
+ // Residue Color Transform
+ int dct_chroma_DC(int uv, int cr_cbp)
+ {
+   int run, scan_pos, coeff_ctr, level, i, j;
+   int*  DCLevel = img->cofDC[uv+1][0];
+   int*  DCRun   = img->cofDC[uv+1][1];
+ 
+   run=-1;
+   scan_pos=0;
+ 
+   for (coeff_ctr=0; coeff_ctr < 16; coeff_ctr++)
+   {
+     i=SNGL_SCAN[coeff_ctr][0];
+     j=SNGL_SCAN[coeff_ctr][1];
+ 
+     run++;
+ 
+     level = absm(dc_level[uv][i][j]);
+ 
+     if (level  != 0)
+     {
+       cr_cbp=max(1,cr_cbp);
+       DCLevel[scan_pos] = sign(level ,dc_level[uv][i][j]);
+       DCRun  [scan_pos] = run;
+       scan_pos++;
+       run=-1;
+     }
+   }
+   DCLevel[scan_pos] = 0;
+ 
+   return cr_cbp;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    The routine performs transform,quantization,inverse transform, adds the diff.
+  *    to the prediction and writes the result to the decoded luma frame. Includes the
+  *    RD constrained quantization also.
+  *
+  * \par Input:
+  *    block_x,block_y: Block position inside a macro block (0,4,8,12).
+  *
+  * \par Output:
+  *    nonzero: 0 if no levels are nonzero.  1 if there are nonzero levels.              \n
+  *    coeff_cost: Counter for nonzero coefficients, used to discard expensive levels.
+  *
+  *
+  ************************************************************************
+  */
+ int dct_luma_sp(int block_x,int block_y,int *coeff_cost)
+ {
+   int sign(int a,int b);
+ 
+   int i,j,i1,j1,ilev,m5[4],m6[4],coeff_ctr;
+   int qp_const,level,scan_pos,run;
+   int nonzero;
+ 
+   int predicted_block[BLOCK_SIZE][BLOCK_SIZE],c_err,qp_const2;
+   int qp_per,qp_rem,q_bits;
+   int qp_per_sp,qp_rem_sp,q_bits_sp;
+ 
+   int   pos_x   = block_x >> BLOCK_SHIFT;
+   int   pos_y   = block_y >> BLOCK_SHIFT;
+   int   b8      = 2*(pos_y >> 1) + (pos_x >> 1);
+   int   b4      = 2*(pos_y & 0x01) + (pos_x & 0x01);
+   int*  ACLevel = img->cofAC[b8][b4][0];
+   int*  ACRun   = img->cofAC[b8][b4][1];
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   short is_field_mode = (img->field_picture || ( img->MbaffFrameFlag && currMB->mb_field));
+ 
+   // For encoding optimization
+   int c_err1, c_err2, level1, level2;
+   double D_dis1, D_dis2;
+   int len, info;
+   double lambda_mode   = 0.85 * pow (2, (currMB->qp - SHIFT_QP)/3.0) * 4; 
+ 
+   qp_per    = (currMB->qp-MIN_QP)/6;
+   qp_rem    = (currMB->qp-MIN_QP)%6;
+   q_bits    = Q_BITS+qp_per;
+   qp_per_sp    = (currMB->qpsp-MIN_QP)/6;
+   qp_rem_sp    = (currMB->qpsp-MIN_QP)%6;
+   q_bits_sp    = Q_BITS+qp_per_sp;
+ 
+   qp_const=(1<<q_bits)/6;    // inter
+   qp_const2=(1<<q_bits_sp)/2;  //sp_pred
+ 
+   //  Horizontal transform
+   for (j=0; j< BLOCK_SIZE; j++)
+     for (i=0; i< BLOCK_SIZE; i++)
+     {
+       img->m7[j][i]+=img->mpr[j+block_y][i+block_x];
+       predicted_block[i][j]=img->mpr[j+block_y][i+block_x];
+     }
+ 
+   for (j=0; j < BLOCK_SIZE; j++)
+   {
+     for (i=0; i < 2; i++)
+     {
+       i1=3-i;
+       m5[i]=img->m7[j][i]+img->m7[j][i1];
+       m5[i1]=img->m7[j][i]-img->m7[j][i1];
+     }
+     img->m7[j][0]=(m5[0]+m5[1]);
+     img->m7[j][2]=(m5[0]-m5[1]);
+     img->m7[j][1]=m5[3]*2+m5[2];
+     img->m7[j][3]=m5[3]-m5[2]*2;
+   }
+ 
+   //  Vertical transform
+ 
+   for (i=0; i < BLOCK_SIZE; i++)
+   {
+     for (j=0; j < 2; j++)
+     {
+       j1=3-j;
+       m5[j]=img->m7[j][i]+img->m7[j1][i];
+       m5[j1]=img->m7[j][i]-img->m7[j1][i];
+     }
+     img->m7[0][i]=(m5[0]+m5[1]);
+     img->m7[2][i]=(m5[0]-m5[1]);
+     img->m7[1][i]=m5[3]*2+m5[2];
+     img->m7[3][i]=m5[3]-m5[2]*2;
+   }
+ 
+   for (j=0; j < BLOCK_SIZE; j++)
+   {
+     for (i=0; i < 2; i++)
+     {
+       i1=3-i;
+       m5[i]=predicted_block[i][j]+predicted_block[i1][j];
+       m5[i1]=predicted_block[i][j]-predicted_block[i1][j];
+     }
+     predicted_block[0][j]=(m5[0]+m5[1]);
+     predicted_block[2][j]=(m5[0]-m5[1]);
+     predicted_block[1][j]=m5[3]*2+m5[2];
+     predicted_block[3][j]=m5[3]-m5[2]*2;
+   }
+ 
+   //  Vertical transform
+ 
+   for (i=0; i < BLOCK_SIZE; i++)
+   {
+     for (j=0; j < 2; j++)
+     {
+       j1=3-j;
+       m5[j]=predicted_block[i][j]+predicted_block[i][j1];
+       m5[j1]=predicted_block[i][j]-predicted_block[i][j1];
+     }
+     predicted_block[i][0]=(m5[0]+m5[1]);
+     predicted_block[i][2]=(m5[0]-m5[1]);
+     predicted_block[i][1]=m5[3]*2+m5[2];
+     predicted_block[i][3]=m5[3]-m5[2]*2;
+   }
+ 
+   // Quant
+   nonzero=FALSE;
+ 
+   run=-1;
+   scan_pos=0;
+   
+   for (coeff_ctr=0;coeff_ctr < 16;coeff_ctr++)     // 8 times if double scan, 16 normal scan
+   {
+ 
+     if (is_field_mode) 
+     {  // Alternate scan for field coding
+         i=FIELD_SCAN[coeff_ctr][0];
+         j=FIELD_SCAN[coeff_ctr][1];
+     }
+     else 
+     {
+         i=SNGL_SCAN[coeff_ctr][0];
+         j=SNGL_SCAN[coeff_ctr][1];
+     }
+     
+     run++;
+     ilev=0;
+     
+     // decide prediction
+     
+     // case 1
+     level1 = (absm (predicted_block[i][j]) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp; 
+     level1 = (level1 << q_bits_sp) / quant_coef[qp_rem_sp][i][j];                 
+     c_err1 = img->m7[j][i]-sign(level1, predicted_block[i][j]);                   
+     level1 = (absm (c_err1) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits;
+     
+     // case 2
+     c_err2=img->m7[j][i]-predicted_block[i][j];
+     level2 = (absm (c_err2) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits;
+     
+     // select prediction
+     if ((level1 != level2) && (level1 != 0) && (level2 != 0))
+     {
+       D_dis1 = img->m7[j][i] - ((sign(level1,c_err1)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_block[i][j]; 
+       levrun_linfo_inter(level1, run, &len, &info);
+       D_dis1 = D_dis1*D_dis1 + lambda_mode * len;
+       
+       D_dis2 = img->m7[j][i] - ((sign(level2,c_err2)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_block[i][j]; 
+       levrun_linfo_inter(level2, run, &len, &info);
+       D_dis2 = D_dis2 * D_dis2 + lambda_mode * len;
+       
+       if (D_dis1 == D_dis2)
+         level = (absm(level1) < absm(level2)) ? level1 : level2;
+       else
+       {
+         if (D_dis1 < D_dis2)
+           level = level1;
+         else
+           level = level2;
+       }
+       c_err = (level == level1) ? c_err1 : c_err2;
+     }
+     else if (level1 == level2)
+     {
+       level = level1;
+       c_err = c_err1;
+     }
+     else
+     {
+       level = (level1 == 0) ? level1 : level2;
+       c_err = (level1 == 0) ? c_err1 : c_err2;
+     }
+     
+     if (level != 0)
+     {
+       nonzero=TRUE;
+       if (level > 1)
+         *coeff_cost += MAX_VALUE;                // set high cost, shall not be discarded
+       else
+         *coeff_cost += COEFF_COST[input->disthres][run];
+       ACLevel[scan_pos] = sign(level,c_err);
+       ACRun  [scan_pos] = run;
+       ++scan_pos;
+       run=-1;                     // reset zero level counter
+       ilev=((sign(level,c_err)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6);
+     }
+     ilev+=predicted_block[i][j] ; 
+     img->m7[j][i] = sign((absm(ilev) * quant_coef[qp_rem_sp][i][j] + qp_const2)>> q_bits_sp, ilev) * dequant_coef[qp_rem_sp][i][j] << qp_per_sp;
+   }
+   ACLevel[scan_pos] = 0;
+   
+     
+   //     IDCT.
+   //     horizontal
+ 
+   for (j=0; j < BLOCK_SIZE; j++)
+   {
+     for (i=0; i < BLOCK_SIZE; i++)
+     {
+       m5[i]=img->m7[j][i];
+     }
+     m6[0]=(m5[0]+m5[2]);
+     m6[1]=(m5[0]-m5[2]);
+     m6[2]=(m5[1]>>1)-m5[3];
+     m6[3]=m5[1]+(m5[3]>>1);
+ 
+     for (i=0; i < 2; i++)
+     {
+       i1=3-i;
+       img->m7[j][i]=m6[i]+m6[i1];
+       img->m7[j][i1]=m6[i]-m6[i1];
+     }
+   }
+ 
+   //  vertical
+ 
+   for (i=0; i < BLOCK_SIZE; i++)
+   {
+     for (j=0; j < BLOCK_SIZE; j++)
+     {
+       m5[j]=img->m7[j][i];
+     }
+     m6[0]=(m5[0]+m5[2]);
+     m6[1]=(m5[0]-m5[2]);
+     m6[2]=(m5[1]>>1)-m5[3];
+     m6[3]=m5[1]+(m5[3]>>1);
+ 
+     for (j=0; j < 2; j++)
+     {
+       j1=3-j;
+       img->m7[j][i] =min(img->max_imgpel_value,max(0,(m6[j]+m6[j1]+DQ_ROUND)>>DQ_BITS));
+       img->m7[j1][i]=min(img->max_imgpel_value,max(0,(m6[j]-m6[j1]+DQ_ROUND)>>DQ_BITS));
+     }
+   }
+ 
+   //  Decoded block moved to frame memory
+ 
+   for (j=0; j < BLOCK_SIZE; j++)
+   for (i=0; i < BLOCK_SIZE; i++)
+     enc_picture->imgY[img->pix_y+block_y+j][img->pix_x+block_x+i]=img->m7[j][i];
+ 
+   return nonzero;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Transform,quantization,inverse transform for chroma.
+  *    The main reason why this is done in a separate routine is the
+  *    additional 2x2 transform of DC-coeffs. This routine is called
+  *    ones for each of the chroma components.
+  *
+  * \par Input:
+  *    uv    : Make difference between the U and V chroma component               \n
+  *    cr_cbp: chroma coded block pattern
+  *
+  * \par Output:
+  *    cr_cbp: Updated chroma coded block pattern.
+  ************************************************************************
+  */
+ int dct_chroma_sp(int uv,int cr_cbp)
+ {
+   int i,j,i1,j2,ilev,n2,n1,j1,mb_y,coeff_ctr,qp_const,c_err,level ,scan_pos,run;
+   int m1[BLOCK_SIZE],m5[BLOCK_SIZE],m6[BLOCK_SIZE];
+   int coeff_cost;
+   int cr_cbp_tmp;
+   int predicted_chroma_block[MB_BLOCK_SIZE>>1][MB_BLOCK_SIZE>>1],qp_const2,mp1[BLOCK_SIZE];
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   short is_field_mode = (img->field_picture || ( img->MbaffFrameFlag && currMB->mb_field));
+ 
+   int qp_per,qp_rem,q_bits;
+   int qp_per_sp,qp_rem_sp,q_bits_sp;
+ 
+   int   b4;
+   int*  DCLevel = img->cofDC[uv+1][0];
+   int*  DCRun   = img->cofDC[uv+1][1];
+   int*  ACLevel;
+   int*  ACRun;
+ 
+   int c_err1, c_err2, level1, level2;
+   int len, info;
+   double D_dis1, D_dis2;
+   double lambda_mode   = 0.85 * pow (2, (currMB->qp -SHIFT_QP)/3.0) * 4; 
+ 
+ 
+   int qpChroma=Clip3(0, 51, currMB->qp + active_pps->chroma_qp_index_offset);
+   int qpChromaSP=Clip3(0, 51, currMB->qpsp + active_pps->chroma_qp_index_offset);
+ 
+   qp_per    = ((qpChroma<0?qpChroma:QP_SCALE_CR[qpChroma])-MIN_QP)/6;
+   qp_rem    = ((qpChroma<0?qpChroma:QP_SCALE_CR[qpChroma])-MIN_QP)%6;
+   q_bits    = Q_BITS+qp_per;
+   qp_const=(1<<q_bits)/6;    // inter
+   qp_per_sp    = ((qpChromaSP<0?currMB->qpsp:QP_SCALE_CR[qpChromaSP])-MIN_QP)/6;
+   qp_rem_sp    = ((qpChromaSP<0?currMB->qpsp:QP_SCALE_CR[qpChromaSP])-MIN_QP)%6;
+   q_bits_sp    = Q_BITS+qp_per_sp;
+   qp_const2=(1<<q_bits_sp)/2;  //sp_pred
+ 
+ 
+   for (j=0; j < MB_BLOCK_SIZE>>1; j++)
+     for (i=0; i < MB_BLOCK_SIZE>>1; i++)
+     {
+       img->m7[j][i]+=img->mpr[j][i];
+       predicted_chroma_block[i][j]=img->mpr[j][i];
+     }
+ 
+   for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
+   {
+     for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
+     {
+ 
+       //  Horizontal transform.
+       for (j=0; j < BLOCK_SIZE; j++)
+       {
+         mb_y=n2+j;
+         for (i=0; i < 2; i++)
+         {
+           i1=3-i;
+           m5[i]=img->m7[mb_y][i+n1]+img->m7[mb_y][i1+n1];
+           m5[i1]=img->m7[mb_y][i+n1]-img->m7[mb_y][i1+n1];
+         }
+         img->m7[mb_y][n1]  =(m5[0]+m5[1]);
+         img->m7[mb_y][n1+2]=(m5[0]-m5[1]);
+         img->m7[mb_y][n1+1]=m5[3]*2+m5[2];
+         img->m7[mb_y][n1+3]=m5[3]-m5[2]*2;
+       }
+ 
+       //  Vertical transform.
+ 
+       for (i=0; i < BLOCK_SIZE; i++)
+       {
+         j1=n1+i;
+         for (j=0; j < 2; j++)
+         {
+           j2=3-j;
+           m5[j]=img->m7[n2+j][j1]+img->m7[n2+j2][j1];
+           m5[j2]=img->m7[n2+j][j1]-img->m7[n2+j2][j1];
+         }
+         img->m7[n2+0][j1]=(m5[0]+m5[1]);
+         img->m7[n2+2][j1]=(m5[0]-m5[1]);
+         img->m7[n2+1][j1]=m5[3]*2+m5[2];
+         img->m7[n2+3][j1]=m5[3]-m5[2]*2;
+       }
+     }
+   }
+   for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
+   {
+     for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
+     {
+ 
+       //  Horizontal transform.
+       for (j=0; j < BLOCK_SIZE; j++)
+       {
+         mb_y=n2+j;
+         for (i=0; i < 2; i++)
+         {
+           i1=3-i;
+           m5[i]=predicted_chroma_block[i+n1][mb_y]+predicted_chroma_block[i1+n1][mb_y];
+           m5[i1]=predicted_chroma_block[i+n1][mb_y]-predicted_chroma_block[i1+n1][mb_y];
+         }
+         predicted_chroma_block[n1][mb_y]  =(m5[0]+m5[1]);
+         predicted_chroma_block[n1+2][mb_y]=(m5[0]-m5[1]);
+         predicted_chroma_block[n1+1][mb_y]=m5[3]*2+m5[2];
+         predicted_chroma_block[n1+3][mb_y]=m5[3]-m5[2]*2;
+       }
+ 
+       //  Vertical transform.
+ 
+       for (i=0; i < BLOCK_SIZE; i++)
+       {
+         j1=n1+i;
+         for (j=0; j < 2; j++)
+         {
+           j2=3-j;
+           m5[j]=predicted_chroma_block[j1][n2+j]+predicted_chroma_block[j1][n2+j2];
+           m5[j2]=predicted_chroma_block[j1][n2+j]-predicted_chroma_block[j1][n2+j2];
+         }
+         predicted_chroma_block[j1][n2+0]=(m5[0]+m5[1]);
+         predicted_chroma_block[j1][n2+2]=(m5[0]-m5[1]);
+         predicted_chroma_block[j1][n2+1]=m5[3]*2+m5[2];
+         predicted_chroma_block[j1][n2+3]=m5[3]-m5[2]*2;
+       }
+     }
+   }
+ 
+   //     2X2 transform of DC coeffs.
+   m1[0]=(img->m7[0][0]+img->m7[0][4]+img->m7[4][0]+img->m7[4][4]);
+   m1[1]=(img->m7[0][0]-img->m7[0][4]+img->m7[4][0]-img->m7[4][4]);
+   m1[2]=(img->m7[0][0]+img->m7[0][4]-img->m7[4][0]-img->m7[4][4]);
+   m1[3]=(img->m7[0][0]-img->m7[0][4]-img->m7[4][0]+img->m7[4][4]);
+ 
+   //     2X2 transform of DC coeffs.
+   mp1[0]=(predicted_chroma_block[0][0]+predicted_chroma_block[4][0]+predicted_chroma_block[0][4]+predicted_chroma_block[4][4]);
+   mp1[1]=(predicted_chroma_block[0][0]-predicted_chroma_block[4][0]+predicted_chroma_block[0][4]-predicted_chroma_block[4][4]);
+   mp1[2]=(predicted_chroma_block[0][0]+predicted_chroma_block[4][0]-predicted_chroma_block[0][4]-predicted_chroma_block[4][4]);
+   mp1[3]=(predicted_chroma_block[0][0]-predicted_chroma_block[4][0]-predicted_chroma_block[0][4]+predicted_chroma_block[4][4]);
+ 
+   run=-1;
+   scan_pos=0;
+ 
+   for (coeff_ctr=0; coeff_ctr < 4; coeff_ctr++)
+   {
+     run++;
+     ilev=0;
+ 
+   // case 1
+     c_err1 = (absm (mp1[coeff_ctr]) * quant_coef[qp_rem_sp][0][0] + 2 * qp_const2) >> (q_bits_sp + 1);
+     c_err1 = (c_err1 << (q_bits_sp + 1)) / quant_coef[qp_rem_sp][0][0];
+     c_err1 = m1[coeff_ctr] - sign(c_err1, mp1[coeff_ctr]);
+     level1 = (absm(c_err1) * quant_coef[qp_rem][0][0] + 2 * qp_const) >> (q_bits+1);
+ 
+   // case 2
+     c_err2 = m1[coeff_ctr] - mp1[coeff_ctr];
+     level2 = (absm(c_err2) * quant_coef[qp_rem][0][0] + 2 * qp_const) >> (q_bits+1);
+ 
+     if (level1 != level2 && level1 != 0 && level2 != 0)
+     {
+       D_dis1 = m1[coeff_ctr] - ((sign(level1,c_err1)*dequant_coef[qp_rem][0][0]*A[0][0]<< qp_per) >>5)- mp1[coeff_ctr];
+       levrun_linfo_c2x2(level1, run, &len, &info);
+       D_dis1 = D_dis1 * D_dis1 + lambda_mode * len;
+       
+       D_dis2 = m1[coeff_ctr] - ((sign(level2,c_err2)*dequant_coef[qp_rem][0][0]*A[0][0]<< qp_per) >>5)- mp1[coeff_ctr];
+       levrun_linfo_c2x2(level2, run, &len, &info);
+       D_dis2 = D_dis2 * D_dis2 + lambda_mode * len;
+       
+       if (D_dis1 == D_dis2)
+         level = (absm(level1) < absm(level2)) ? level1 : level2;
+       else
+       {
+         if (D_dis1 < D_dis2)
+           level = level1;
+         else
+           level = level2;
+       }
+       c_err = (level == level1) ? c_err1 : c_err2;
+     }
+     else if (level1 == level2)
+     {
+       level = level1;
+       c_err = c_err1;
+     }
+     else
+     {
+       level = (level1 == 0) ? level1 : level2;
+       c_err = (level1 == 0) ? c_err1 : c_err2;
+     }
+     
+     if (input->symbol_mode == UVLC && img->qp < 4) 
+     {
+       if (level > CAVLC_LEVEL_LIMIT) 
+       {
+         level = CAVLC_LEVEL_LIMIT;
+       }
+     }
+ 
+     if (level  != 0)
+     {
+       currMB->cbp_blk |= 0xf0000 << (uv << 2) ;  // if one of the 2x2-DC levels is != 0 the coded-bit
+       cr_cbp=max(1,cr_cbp);
+       DCLevel[scan_pos] = sign(level ,c_err);
+       DCRun  [scan_pos] = run;
+       scan_pos++;
+       run=-1;
+       ilev=((sign(level,c_err)*dequant_coef[qp_rem][0][0]*A[0][0]<< qp_per) >>5);
+     }
+     ilev+= mp1[coeff_ctr];
+     m1[coeff_ctr]=sign((absm(ilev)  * quant_coef[qp_rem_sp][0][0] + 2 * qp_const2) >> (q_bits_sp+1), ilev) * dequant_coef[qp_rem_sp][0][0] << qp_per_sp;
+   }
+   DCLevel[scan_pos] = 0;
+ 
+   //  Invers transform of 2x2 DC levels
+ 
+   img->m7[0][0]=(m1[0]+m1[1]+m1[2]+m1[3])/2;
+   img->m7[0][4]=(m1[0]-m1[1]+m1[2]-m1[3])/2;
+   img->m7[4][0]=(m1[0]+m1[1]-m1[2]-m1[3])/2;
+   img->m7[4][4]=(m1[0]-m1[1]-m1[2]+m1[3])/2;
+ 
+   //     Quant of chroma AC-coeffs.
+   coeff_cost=0;
+   cr_cbp_tmp=0;
+ 
+   for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
+   {
+     for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
+     {
+       b4      = 2*(n2 >> 2) + (n1 >> 2);
+       ACLevel = img->cofAC[uv+4][b4][0];
+       ACRun   = img->cofAC[uv+4][b4][1];
+ 
+       run      = -1;
+       scan_pos =  0;
+ 
+       for (coeff_ctr=1; coeff_ctr < 16; coeff_ctr++)// start change rd_quant
+       {
+ 
+         if (is_field_mode) 
+         {  // Alternate scan for field coding
+           i=FIELD_SCAN[coeff_ctr][0];
+           j=FIELD_SCAN[coeff_ctr][1];
+         }
+         else 
+         {
+           i=SNGL_SCAN[coeff_ctr][0];
+           j=SNGL_SCAN[coeff_ctr][1];
+         }
+         ++run;
+         ilev=0;
+ 
+     // quantization on prediction
+     c_err1 = (absm(predicted_chroma_block[n1+i][n2+j]) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp;
+     c_err1 = (c_err1 << q_bits_sp) / quant_coef[qp_rem_sp][i][j];
+     c_err1 = img->m7[n2+j][n1+i] - sign(c_err1, predicted_chroma_block[n1+i][n2+j]);
+     level1 = (absm(c_err1) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits;
+ 
+     // no quantization on prediction
+     c_err2 = img->m7[n2+j][n1+i] - predicted_chroma_block[n1+i][n2+j];
+     level2 = (absm(c_err2) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits;
+ 
+     if (level1 != level2 && level1 != 0 && level2 != 0)
+     {
+       D_dis1 = img->m7[n2+j][n1+i] - ((sign(level1,c_err1)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_chroma_block[n1+i][n2+j]; 
+ 
+       levrun_linfo_inter(level1, run, &len, &info);
+       D_dis1 = D_dis1 * D_dis1 + lambda_mode * len;
+ 
+       D_dis2 = img->m7[n2+j][n1+i] - ((sign(level2,c_err2)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_chroma_block[n1+i][n2+j]; 
+       levrun_linfo_inter(level2, run, &len, &info);
+       D_dis2 = D_dis2 * D_dis2 + lambda_mode * len;
+       
+       if (D_dis1 == D_dis2)
+         level = (absm(level1) < absm(level2)) ? level1 : level2;
+       else
+       {
+         if (D_dis1 < D_dis2)
+           level = level1;
+         else
+           level = level2;
+       }
+       c_err = (level == level1) ? c_err1 : c_err2;
+     }
+     else if (level1 == level2)
+     {
+       level = level1;
+       c_err = c_err1;
+     }
+     else
+     {
+       level = (level1 == 0) ? level1 : level2;
+       c_err = (level1 == 0) ? c_err1 : c_err2;
+     }
+ 
+         if (level  != 0)
+         {
+           currMB->cbp_blk |=  1 << (16 + (uv << 2) + ((n2 >> 1) + (n1 >> 2))) ;
+           if (level > 1)
+             coeff_cost += MAX_VALUE;                // set high cost, shall not be discarded
+           else
+             coeff_cost += COEFF_COST[input->disthres][run];
+ 
+           cr_cbp_tmp=2;
+           ACLevel[scan_pos] = sign(level,c_err);
+           ACRun  [scan_pos] = run;
+           ++scan_pos;
+           run=-1;
+           ilev=((sign(level,c_err)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6);
+         }
+         ilev+=predicted_chroma_block[n1+i][n2+j];
+         img->m7[n2+j][n1+i] = sign((absm(ilev) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp,ilev) * dequant_coef[qp_rem_sp][i][j] << qp_per_sp;
+       }
+       ACLevel[scan_pos] = 0;
+     }
+   }
+ 
+   // * reset chroma coeffs
+ 
+   if(cr_cbp_tmp==2)
+       cr_cbp=2;
+   //     IDCT.
+ 
+       //     Horizontal.
+   for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
+   {
+     for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
+     {
+       for (j=0; j < BLOCK_SIZE; j++)
+       {
+         for (i=0; i < BLOCK_SIZE; i++)
+         {
+           m5[i]=img->m7[n2+j][n1+i];
+         }
+         m6[0]=(m5[0]+m5[2]);
+         m6[1]=(m5[0]-m5[2]);
+         m6[2]=(m5[1]>>1)-m5[3];
+         m6[3]=m5[1]+(m5[3]>>1);
+ 
+         for (i=0; i < 2; i++)
+         {
+           i1=3-i;
+           img->m7[n2+j][n1+i]=m6[i]+m6[i1];
+           img->m7[n2+j][n1+i1]=m6[i]-m6[i1];
+         }
+       }
+ 
+       //     Vertical.
+       for (i=0; i < BLOCK_SIZE; i++)
+       {
+         for (j=0; j < BLOCK_SIZE; j++)
+         {
+           m5[j]=img->m7[n2+j][n1+i];
+         }
+         m6[0]=(m5[0]+m5[2]);
+         m6[1]=(m5[0]-m5[2]);
+         m6[2]=(m5[1]>>1)-m5[3];
+         m6[3]=m5[1]+(m5[3]>>1);
+ 
+         for (j=0; j < 2; j++)
+         {
+           j2=3-j;
+           img->m7[n2+j][n1+i] =min(img->max_imgpel_value_uv,max(0,(m6[j]+m6[j2]+DQ_ROUND)>>DQ_BITS));
+           img->m7[n2+j2][n1+i]=min(img->max_imgpel_value_uv,max(0,(m6[j]-m6[j2]+DQ_ROUND)>>DQ_BITS));
+         }
+       }
+     }
+   }
+ 
+   //  Decoded block moved to memory
+   for (j=0; j < BLOCK_SIZE*2; j++)
+     for (i=0; i < BLOCK_SIZE*2; i++)
+     {
+       enc_picture->imgUV[uv][img->pix_c_y+j][img->pix_c_x+i]= img->m7[j][i];
+     }
+ 
+   return cr_cbp;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    The routine performs transform,quantization,inverse transform, adds the diff.
+  *    to the prediction and writes the result to the decoded luma frame. Includes the
+  *    RD constrained quantization also.
+  *
+  * \par Input:
+  *    block_x,block_y: Block position inside a macro block (0,4,8,12).
+  *
+  * \par Output:
+  *    nonzero: 0 if no levels are nonzero.  1 if there are nonzero levels.            \n
+  *    coeff_cost: Counter for nonzero coefficients, used to discard expencive levels.
+  ************************************************************************
+  */
+ void copyblock_sp(int block_x,int block_y)
+ {
+   int sign(int a,int b);
+ 
+   int i,j,i1,j1,m5[4],m6[4];
+ 
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+ 
+   int predicted_block[BLOCK_SIZE][BLOCK_SIZE];
+   int qp_per = (currMB->qpsp-MIN_QP)/6;
+   int qp_rem = (currMB->qpsp-MIN_QP)%6;
+   int q_bits    = Q_BITS+qp_per;
+   int qp_const2=(1<<q_bits)/2;  //sp_pred
+ 
+   //  Horizontal transform
+   for (j=0; j< BLOCK_SIZE; j++)
+     for (i=0; i< BLOCK_SIZE; i++)
+     {
+       predicted_block[i][j]=img->mpr[j+block_y][i+block_x];
+     }
+ 
+   for (j=0; j < BLOCK_SIZE; j++)
+   {
+     for (i=0; i < 2; i++)
+     {
+       i1=3-i;
+       m5[i]=predicted_block[i][j]+predicted_block[i1][j];
+       m5[i1]=predicted_block[i][j]-predicted_block[i1][j];
+     }
+     predicted_block[0][j]=(m5[0]+m5[1]);
+     predicted_block[2][j]=(m5[0]-m5[1]);
+     predicted_block[1][j]=m5[3]*2+m5[2];
+     predicted_block[3][j]=m5[3]-m5[2]*2;
+   }
+ 
+   //  Vertival transform
+ 
+   for (i=0; i < BLOCK_SIZE; i++)
+   {
+     for (j=0; j < 2; j++)
+     {
+       j1=3-j;
+       m5[j]=predicted_block[i][j]+predicted_block[i][j1];
+       m5[j1]=predicted_block[i][j]-predicted_block[i][j1];
+     }
+     predicted_block[i][0]=(m5[0]+m5[1]);
+     predicted_block[i][2]=(m5[0]-m5[1]);
+     predicted_block[i][1]=m5[3]*2+m5[2];
+     predicted_block[i][3]=m5[3]-m5[2]*2;
+   }
+ 
+   // Quant
+   for (j=0;j < BLOCK_SIZE; j++)
+     for (i=0; i < BLOCK_SIZE; i++)
+        img->m7[j][i]=sign((absm(predicted_block[i][j])* quant_coef[qp_rem][i][j]+qp_const2)>> q_bits,predicted_block[i][j])*dequant_coef[qp_rem][i][j]<<qp_per;
+ 
+   //     IDCT.
+   //     horizontal
+ 
+   for (j=0;j<BLOCK_SIZE;j++)
+   {
+     for (i=0;i<BLOCK_SIZE;i++)
+     {
+       m5[i]=img->m7[j][i];
+     }
+     m6[0]=(m5[0]+m5[2]);
+     m6[1]=(m5[0]-m5[2]);
+     m6[2]=(m5[1]>>1)-m5[3];
+     m6[3]=m5[1]+(m5[3]>>1);
+ 
+     for (i=0;i<2;i++)
+     {
+       i1=3-i;
+       img->m7[j][i]=m6[i]+m6[i1];
+       img->m7[j][i1]=m6[i]-m6[i1];
+     }
+   }
+   // vertical
+   for (i=0;i<BLOCK_SIZE;i++)
+   {
+     for (j=0;j<BLOCK_SIZE;j++)
+       m5[j]=img->m7[j][i];
+ 
+     m6[0]=(m5[0]+m5[2]);
+     m6[1]=(m5[0]-m5[2]);
+     m6[2]=(m5[1]>>1)-m5[3];
+     m6[3]=m5[1]+(m5[3]>>1);
+ 
+     for (j=0;j<2;j++)
+     {
+       j1=3-j;
+       img->m7[j][i] =min(img->max_imgpel_value,max(0,(m6[j]+m6[j1]+DQ_ROUND)>>DQ_BITS));
+       img->m7[j1][i]=min(img->max_imgpel_value,max(0,(m6[j]-m6[j1]+DQ_ROUND)>>DQ_BITS));
+     }
+   }
+ 
+   //  Decoded block moved to frame memory
+ 
+   for (j=0; j < BLOCK_SIZE; j++)
+     for (i=0; i < BLOCK_SIZE; i++)
+       enc_picture->imgY[img->pix_y+block_y+j][img->pix_x+block_x+i]=img->m7[j][i];
+ }
+ 
+ 
+ 
+ int writeIPCMBytes(Bitstream *currStream)
+ {
+   int i,j, jj;
+   int len = 0, uv;
+   int             mb_nr     = img->current_mb_nr;
+   Macroblock*     currMB    = &img->mb_data[mb_nr];
+   SyntaxElement  *currSE    = &img->MB_SyntaxElements[currMB->currSEnr];
+   
+   
+   for (j=0;j<16;j++)
+   {
+     jj = img->pix_y+j;
+     for (i=0;i<16;i++)
+     {
+       currSE->len = img->bitdepth_luma;  
+       len += currSE->len;
+       currSE->bitpattern = enc_picture->imgY[jj][img->pix_x+i];
+       writeSyntaxElement2Buf_Fixed(currSE, currStream);
+     }
+   }
+   
+   for (uv = 0; uv < 2; uv ++)
+   {
+     for (j=0;j<img->mb_cr_size_y;j++)
+     {
+       jj = img->pix_c_y+j;
+       for (i=0;i<img->mb_cr_size_x;i++)
+       {
+         currSE->len = img->bitdepth_chroma;
+         len += currSE->len;
+         currSE->bitpattern = enc_picture->imgUV[uv][jj][img->pix_c_x+i];
+         writeSyntaxElement2Buf_Fixed(currSE, currStream);
+       }
+     }
+   }  
+   return len;
+ }
+ 
+ int writePCMByteAlign(Bitstream *currStream)
+ {
+   int len = 0;
+   if (currStream->bits_to_go < 8)
+   { // trailing bits to process
+     len = 8 - currStream->bits_to_go;
+     currStream->byte_buf = (currStream->byte_buf <<currStream->bits_to_go) | (0xff >> (8 - currStream->bits_to_go));
+     stats->bit_use_stuffingBits[img->type]+=currStream->bits_to_go;
+     currStream->streamBuffer[currStream->byte_pos++]=currStream->byte_buf;
+     currStream->bits_to_go = 8;
+   }
+   return len;
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/block.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/block.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/block.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,179 ----
+ 
+ /*!
+  ************************************************************************
+  * \file block.h
+  *
+  * \brief 
+  *    constant arrays for single block processing
+  *
+  * \author
+  *    Inge Lille-Langoy               <inge.lille-langoy at telenor.com>    \n
+  *    Telenor Satellite Services                                         \n
+  *    P.O.Box 6914 St.Olavs plass                                        \n
+  *    N-0130 Oslo, Norway
+  *
+  ************************************************************************
+  */
+ 
+ #ifndef _BLOCK_H_
+ #define _BLOCK_H_
+ 
+ //! make chroma QP from quant
+ const byte QP_SCALE_CR[52]=
+ {
+     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,
+    12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,
+    28,29,29,30,31,32,32,33,34,34,35,35,36,36,37,37,
+    37,38,38,38,39,39,39,39
+ };
+ 
+ 
+ //! single scan pattern
+ const byte SNGL_SCAN[16][2] =
+ {
+   {0,0},{1,0},{0,1},{0,2},
+   {1,1},{2,0},{3,0},{2,1},
+   {1,2},{0,3},{1,3},{2,2},
+   {3,1},{3,2},{2,3},{3,3}
+ };
+ 
+ //! field scan pattern
+ const byte FIELD_SCAN[16][2] =
+ {
+   {0,0},{0,1},{1,0},{0,2},
+   {0,3},{1,1},{1,2},{1,3},
+   {2,0},{2,1},{2,2},{2,3},
+   {3,0},{3,1},{3,2},{3,3}
+ };
+ 
+ 
+ //! array used to find expencive coefficients
+ const byte COEFF_COST[2][16] =
+ {
+   {3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0},
+   {9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9}
+ };
+ 
+ 
+ 
+ //! bit cost for coefficients
+ const byte COEFF_BIT_COST[3][16][16]=
+ {
+   { // 2x2 scan (corrested per Gisle's Email 11/23/2000 by StW
+     { 3, 5, 7, 9, 9,11,11,11,11,13,13,13,13,13,13,13},
+     { 5, 7, 9, 9,11,11,11,11,13,13,13,13,13,13,13,13},
+     { 7, 9, 9,11,11,11,11,13,13,13,13,13,13,13,13,15},
+     { 7, 9, 9,11,11,11,11,13,13,13,13,13,13,13,13,15},
+     { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+     { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+     { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+     { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+     { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+     { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+     { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+     { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+     { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+     { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+     { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+     { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13},
+   },
+   {  // double scan
+     { 3, 5, 7, 7, 7, 9, 9, 9, 9,11,11,13,13,13,13,15},
+     { 5, 9, 9,11,11,13,13,13,13,15,15,15,15,15,15,15},
+     { 7,11,11,13,13,13,13,15,15,15,15,15,15,15,15,17},
+     { 9,11,11,13,13,13,13,15,15,15,15,15,15,15,15,17},
+     { 9,11,11,13,13,13,13,15,15,15,15,15,15,15,15,17},
+     {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+     {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+     {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+     {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+     {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+     {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+     {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+     {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+     {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17},
+   },
+   {    // single scan
+     { 3, 7, 9, 9,11,13,13,15,15,15,15,17,17,17,17,17},
+     { 5, 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17},
+     { 5, 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17},
+     { 7,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+     { 7,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+     { 7,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+     { 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+     { 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+     { 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+     { 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17},
+     {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19},
+     {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19},
+     {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19},
+     {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19},
+     {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19},
+     {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19},
+   },
+ };
+ 
+ //! single scan pattern
+ const byte SCAN_YUV422  [8][2] =
+ {
+   {0,0},{0,1},
+   {1,0},{0,2},
+   {0,3},{1,1},
+   {1,2},{1,3}
+ };
+ 
+ //! look up tables for FRExt-chroma support
+ const unsigned char hor_offset[4][4][4] =
+   {{{0, 0, 0, 0}, 
+   {0, 0, 0, 0}, 
+   {0, 0, 0, 0}, 
+   {0, 0, 0, 0}},
+   
+   {{0, 4, 0, 4}, 
+   {0, 0, 0, 0}, 
+   {0, 0, 0, 0}, 
+   {0, 0, 0, 0}},
+   
+   {{0, 4, 0, 4}, 
+   {0, 4, 0, 4}, 
+   {0, 0, 0, 0}, 
+   {0, 0, 0, 0}},
+   
+   {{0, 4, 0, 4}, 
+   {8,12, 8,12},
+   {0, 4, 0, 4},
+   {8,12, 8,12}}};
+   
+ const unsigned char ver_offset[4][4][4] =
+   { {{0, 0, 0, 0}, 
+   {0, 0, 0, 0}, 
+   {0, 0, 0, 0}, 
+   {0, 0, 0, 0}},             
+   
+   {{0, 0, 4, 4}, 
+   {0, 0, 0, 0}, 
+   {0, 0, 0, 0}, 
+   {0, 0, 0, 0}},
+   
+   {{0, 0, 4, 4}, 
+   {8, 8,12,12}, 
+   {0, 0, 0, 0}, 
+   {0, 0, 0, 0}},
+   
+   {{0, 0, 4, 4},
+   {0, 0, 4, 4},
+   {8, 8,12,12},
+   {8, 8,12,12}}};
+ 
+ static unsigned char cbp_blk_chroma[8][4] =
+ 	{ {16, 17, 18, 19},
+ 		{20, 21, 22, 23},
+ 		{24, 25, 26, 27},
+ 		{28, 29, 30, 31},
+ 		{32, 33, 34, 35},
+ 		{36, 37, 38, 39},
+ 		{40, 41, 42, 43},
+ 		{44, 45, 46, 47} };
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/cabac.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/cabac.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/cabac.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,1503 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file cabac.c
+  *
+  * \brief
+  *    CABAC entropy coding routines
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Detlev Marpe                    <marpe at hhi.de>
+  **************************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <assert.h>
+ #include <memory.h>
+ #include "global.h"
+ 
+ #include "cabac.h"
+ #include "image.h"
+ #include "mb_access.h"
+ 
+ int last_dquant = 0;
+ 
+ /***********************************************************************
+  * L O C A L L Y   D E F I N E D   F U N C T I O N   P R O T O T Y P E S
+  ***********************************************************************
+  */
+ 
+ 
+ void unary_bin_encode(EncodingEnvironmentPtr eep_frame,
+                       unsigned int symbol,
+                       BiContextTypePtr ctx,
+                       int ctx_offset);
+ 
+ void unary_bin_max_encode(EncodingEnvironmentPtr eep_frame,
+                           unsigned int symbol,
+                           BiContextTypePtr ctx,
+                           int ctx_offset,
+                           unsigned int max_symbol);
+ 
+ void unary_exp_golomb_level_encode( EncodingEnvironmentPtr eep_dp,
+                                    unsigned int symbol,
+                                    BiContextTypePtr ctx);
+ 
+ void unary_exp_golomb_mv_encode(EncodingEnvironmentPtr eep_dp,
+                                 unsigned int symbol,
+                                 BiContextTypePtr ctx,
+                                 unsigned int max_bin);
+ 
+ 
+ void cabac_new_slice()
+ {
+   last_dquant=0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Check for available neighbouring blocks
+  *    and set pointers in current macroblock
+  ************************************************************************
+  */
+ void CheckAvailabilityOfNeighborsCABAC()
+ {
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   PixelPos up, left;
+   
+   getNeighbour(img->current_mb_nr, -1,  0, 1, &left);
+   getNeighbour(img->current_mb_nr,  0, -1, 1, &up);
+   
+   if (up.available)
+     currMB->mb_available_up = &img->mb_data[up.mb_addr];
+   else
+     currMB->mb_available_up = NULL;
+   
+   if (left.available)
+     currMB->mb_available_left = &img->mb_data[left.mb_addr];
+   else
+     currMB->mb_available_left = NULL;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocation of contexts models for the motion info
+  *    used for arithmetic encoding
+  ************************************************************************
+  */
+ MotionInfoContexts* create_contexts_MotionInfo(void)
+ {
+   MotionInfoContexts* enco_ctx;
+ 
+   enco_ctx = (MotionInfoContexts*) calloc(1, sizeof(MotionInfoContexts) );
+   if( enco_ctx == NULL )
+     no_mem_exit("create_contexts_MotionInfo: enco_ctx");
+ 
+   return enco_ctx;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocates of contexts models for the texture info
+  *    used for arithmetic encoding
+  ************************************************************************
+  */
+ TextureInfoContexts* create_contexts_TextureInfo(void)
+ {
+   TextureInfoContexts*  enco_ctx;
+ 
+   enco_ctx = (TextureInfoContexts*) calloc(1, sizeof(TextureInfoContexts) );
+   if( enco_ctx == NULL )
+     no_mem_exit("create_contexts_TextureInfo: enco_ctx");
+ 
+   return enco_ctx;
+ }
+ 
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Frees the memory of the contexts models
+  *    used for arithmetic encoding of the motion info.
+  ************************************************************************
+  */
+ void delete_contexts_MotionInfo(MotionInfoContexts *enco_ctx)
+ {
+   if( enco_ctx == NULL )
+     return;
+ 
+   free( enco_ctx );
+ 
+   return;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Frees the memory of the contexts models
+  *    used for arithmetic encoding of the texture info.
+  ************************************************************************
+  */
+ void delete_contexts_TextureInfo(TextureInfoContexts *enco_ctx)
+ {
+   if( enco_ctx == NULL )
+     return;
+ 
+   free( enco_ctx );
+ 
+   return;
+ }
+ 
+ 
+ /*!
+  **************************************************************************
+  * \brief
+  *    generates arithmetic code and passes the code to the buffer
+  **************************************************************************
+  */
+ int writeSyntaxElement_CABAC(SyntaxElement *se, DataPartition *this_dataPart)
+ {
+   EncodingEnvironmentPtr eep_dp = &(this_dataPart->ee_cabac);
+   int curr_len = arienco_bits_written(eep_dp);
+ 
+   // perform the actual coding by calling the appropriate method
+   se->writing(se, eep_dp);
+ 
+   if(se->type != SE_HEADER)
+     this_dataPart->bitstream->write_flag = 1;
+ 
+   return (se->len = (arienco_bits_written(eep_dp) - curr_len));
+ }
+ 
+ /*!
+  ***************************************************************************
+  * \brief
+  *    This function is used to arithmetically encode the field
+  *    mode info of a given MB  in the case of mb-based frame/field decision
+  ***************************************************************************
+  */
+ void writeFieldModeInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp)
+ {
+   int a,b,act_ctx;
+   MotionInfoContexts *ctx         = (img->currentSlice)->mot_ctx;
+   Macroblock         *currMB      = &img->mb_data[img->current_mb_nr];
+   int                mb_field     = se->value1;
+   
+   a = currMB->mbAvailA ? img->mb_data[currMB->mbAddrA].mb_field : 0;
+   b = currMB->mbAvailB ? img->mb_data[currMB->mbAddrB].mb_field : 0;
+   
+   act_ctx = a + b;
+   
+   biari_encode_symbol(eep_dp, (signed short) (mb_field != 0),&ctx->mb_aff_contexts[act_ctx]);
+   
+   se->context = act_ctx;
+   
+   return;
+ }
+ 
+ /*!
+ ***************************************************************************
+ * \brief
+ *    This function is used to arithmetically encode the mb_skip_flag.
+ ***************************************************************************
+ */
+ void writeMB_skip_flagInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp)
+ {
+   int a,b,act_ctx;
+   int bframe   = (img->type==B_SLICE);
+   MotionInfoContexts *ctx         = (img->currentSlice)->mot_ctx;
+   Macroblock         *currMB      = &img->mb_data[img->current_mb_nr];
+   int                curr_mb_type = se->value1;
+   
+   if (bframe)
+   {
+     if (currMB->mb_available_up == NULL)
+       b = 0;
+     else
+       b = (currMB->mb_available_up->skip_flag==0 ? 1 : 0);
+     if (currMB->mb_available_left == NULL)
+       a = 0;
+     else
+       a = (currMB->mb_available_left->skip_flag==0 ? 1 : 0);
+     
+     act_ctx = 7 + a + b;
+ 
+     if (se->value1==0 && se->value2==0) // DIRECT mode, no coefficients
+       biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]);
+     else
+       biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[2][act_ctx]);   
+ 
+     currMB->skip_flag = (se->value1==0 && se->value2==0)?1:0;
+   }
+   else
+   {
+     if (currMB->mb_available_up == NULL)
+       b = 0;
+     else
+       b = (( (currMB->mb_available_up)->skip_flag == 0) ? 1 : 0 );
+     if (currMB->mb_available_left == NULL)
+       a = 0;
+     else
+       a = (( (currMB->mb_available_left)->skip_flag == 0) ? 1 : 0 );
+ 
+     act_ctx = a + b;
+ 
+     if (curr_mb_type==0) // SKIP
+       biari_encode_symbol(eep_dp, 1,&ctx->mb_type_contexts[1][act_ctx]);
+     else
+       biari_encode_symbol(eep_dp, 0,&ctx->mb_type_contexts[1][act_ctx]);
+ 
+     currMB->skip_flag = (curr_mb_type==0)?1:0;
+   }
+   se->context = act_ctx;
+ 
+   return;
+ }
+ 
+ /*!
+ ***************************************************************************
+ * \brief
+ *    This function is used to arithmetically encode the macroblock
+ *    intra_pred_size flag info of a given MB.
+ ***************************************************************************
+ */
+ 
+ void writeMB_transform_size_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp)
+ {
+   int a, b;
+   int act_ctx = 0;
+   int act_sym;
+   
+   MotionInfoContexts *ctx         = (img->currentSlice)->mot_ctx;
+   Macroblock         *currMB      = &img->mb_data[img->current_mb_nr];
+   
+   
+   b = (currMB->mb_available_up == NULL) ? 0 : currMB->mb_available_up->luma_transform_size_8x8_flag;
+   a = (currMB->mb_available_left == NULL) ? 0 :currMB->mb_available_left->luma_transform_size_8x8_flag;
+     
+   act_ctx     = a + b;
+   act_sym     = currMB->luma_transform_size_8x8_flag;
+   se->context = act_ctx; // store context
+   biari_encode_symbol(eep_dp, (signed short) (act_sym != 0), ctx->transform_size_contexts + act_ctx );  
+ }
+ 
+ /*!
+  ***************************************************************************
+  * \brief
+  *    This function is used to arithmetically encode the macroblock
+  *    type info of a given MB.
+  ***************************************************************************
+  */
+ 
+ void writeMB_typeInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp)
+ {
+   int a, b;
+   int act_ctx = 0;
+   int act_sym;
+   signed short csym;
+   int bframe   = (img->type==B_SLICE);
+   int mode_sym = 0;
+   int mode16x16;
+ 
+ 
+   MotionInfoContexts *ctx         = (img->currentSlice)->mot_ctx;
+   Macroblock         *currMB      = &img->mb_data[img->current_mb_nr];
+   int                curr_mb_type = se->value1;
+ 
+   if(img->type == I_SLICE)  // INTRA-frame
+   {
+     if (currMB->mb_available_up == NULL)
+       b = 0;
+     else 
+       b = ((currMB->mb_available_up->mb_type != I4MB &&  currMB->mb_available_up->mb_type != I8MB) ? 1 : 0 );
+ 
+     if (currMB->mb_available_left == NULL)
+       a = 0;
+     else 
+       a = ((currMB->mb_available_left->mb_type != I4MB &&  currMB->mb_available_left->mb_type != I8MB) ? 1 : 0 );
+     
+     act_ctx     = a + b;
+     act_sym     = curr_mb_type;
+     se->context = act_ctx; // store context
+ 
+     if (act_sym==0) // 4x4 Intra
+     {
+       biari_encode_symbol(eep_dp, 0, ctx->mb_type_contexts[0] + act_ctx );
+     }
+     else if( act_sym == 25 ) // PCM-MODE
+     {
+       biari_encode_symbol(eep_dp, 1, ctx->mb_type_contexts[0] + act_ctx );
+       biari_encode_symbol_final(eep_dp, 1);
+     }
+     else // 16x16 Intra
+     {
+       biari_encode_symbol(eep_dp, 1, ctx->mb_type_contexts[0] + act_ctx );
+ 
+       biari_encode_symbol_final(eep_dp, 0);
+ 
+       mode_sym = act_sym-1; // Values in the range of 0...23
+       act_ctx  = 4;
+       act_sym  = mode_sym/12;
+       biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[0] + act_ctx ); // coding of AC/no AC
+       mode_sym = mode_sym % 12;
+       act_sym  = mode_sym / 4; // coding of cbp: 0,1,2
+       act_ctx  = 5;
+       if (act_sym==0)
+       {
+         biari_encode_symbol(eep_dp, 0, ctx->mb_type_contexts[0] + act_ctx );
+       }
+       else
+       {
+         biari_encode_symbol(eep_dp, 1, ctx->mb_type_contexts[0] + act_ctx );
+         act_ctx=6;
+         biari_encode_symbol(eep_dp, (signed short) (act_sym!=1), ctx->mb_type_contexts[0] + act_ctx );
+       }
+       mode_sym = mode_sym & 0x03; // coding of I pred-mode: 0,1,2,3
+       act_sym  = mode_sym >> 1;
+       act_ctx  = 7;
+       biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[0] + act_ctx );
+       act_ctx  = 8;
+       act_sym  = mode_sym & 0x01;
+       biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[0] + act_ctx );
+     }
+   }
+   else // INTER
+   {
+     
+     if (bframe)
+     {
+       if (currMB->mb_available_up == NULL)
+         b = 0;
+       else
+         b = ((currMB->mb_available_up->mb_type != 0) ? 1 : 0 );
+ 
+       if (currMB->mb_available_left == NULL)
+         a = 0;
+       else
+         a = ((currMB->mb_available_left->mb_type != 0) ? 1 : 0 );
+       act_ctx = a + b;
+       se->context = act_ctx; // store context
+     }
+     act_sym = curr_mb_type;
+ 
+     if (act_sym>=(mode16x16=(bframe?24:7)))
+     {
+       mode_sym = act_sym-mode16x16;
+       act_sym  = mode16x16; // 16x16 mode info
+     }
+ 
+     if (!bframe)
+     {
+       switch (act_sym)
+       {
+       case 0:
+         break;
+       case 1:
+         biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][4]);
+         biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][5]);
+         biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][6]);
+         break;
+       case 2:
+         biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][4]);
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][5]);
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][7]);
+         break;
+       case 3:
+         biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][4]);
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][5]);
+         biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][7]);
+         break;
+       case 4:
+       case 5:
+         biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][4]);
+         biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][5]);
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][6]);
+         break;
+       case 6:
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][4]);
+         biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][7]);
+         break;
+       case 7:
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][4]);
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][7]);
+         break;
+       default:
+         printf ("Unsupported MB-MODE in writeMB_typeInfo_CABAC!\n");
+         exit (1);
+       }
+     }
+     else //===== B-FRAMES =====
+     {
+       if (act_sym==0)
+       {
+         biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[2][act_ctx]);
+       }
+       else if (act_sym<=2)
+       {
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]);
+         biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[2][4]);
+         csym = (act_sym-1 != 0);
+         biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+       }
+       else if (act_sym<=10)
+       {
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]);
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][4]);
+         biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[2][5]);
+         csym=(((act_sym-3)>>2)&0x01) != 0;
+         biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+         csym=(((act_sym-3)>>1)&0x01) != 0;
+         biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+         csym=((act_sym-3)&0x01) != 0;
+         biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+       }
+       else if (act_sym==11 || act_sym==22)
+       {
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]);
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][4]);
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][5]);
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][6]);
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][6]);
+         csym = (act_sym != 11);
+         biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+       }
+       else
+       {
+         if (act_sym > 22) act_sym--;
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]);
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][4]);
+         biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][5]);
+         csym=(((act_sym-12)>>3)&0x01) != 0;
+         biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+         csym=(((act_sym-12)>>2)&0x01) != 0;
+         biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+         csym=(((act_sym-12)>>1)&0x01) != 0;
+         biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);
+         csym=((act_sym-12)&0x01) != 0;
+         biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]);         
+         if (act_sym >=22) act_sym++;
+       }
+     }
+ 
+     if(act_sym==mode16x16) // additional info for 16x16 Intra-mode
+     {
+       if( mode_sym==25 )
+       {
+         biari_encode_symbol_final(eep_dp, 1 );
+         return;
+       }
+       biari_encode_symbol_final(eep_dp, 0 );
+ 
+       act_ctx = 8;
+       act_sym = mode_sym/12;
+       biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[1] + act_ctx ); // coding of AC/no AC
+       mode_sym = mode_sym % 12;
+ 
+       act_sym = mode_sym / 4; // coding of cbp: 0,1,2
+       act_ctx = 9;
+       if (act_sym==0)
+       {
+         biari_encode_symbol(eep_dp, 0, ctx->mb_type_contexts[1] + act_ctx );
+       }
+       else
+       {
+         biari_encode_symbol(eep_dp, 1, ctx->mb_type_contexts[1] + act_ctx );
+         biari_encode_symbol(eep_dp, (signed short) (act_sym!=1), ctx->mb_type_contexts[1] + act_ctx );
+       }
+ 
+       mode_sym = mode_sym % 4; // coding of I pred-mode: 0,1,2,3
+       act_ctx  = 10;
+       act_sym  = mode_sym/2;
+       biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[1] + act_ctx );
+       act_sym  = mode_sym%2;
+       biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[1] + act_ctx );
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ***************************************************************************
+  * \brief
+  *    This function is used to arithmetically encode the 8x8 block
+  *    type info
+  ***************************************************************************
+  */
+ void writeB8_typeInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp)
+ {
+   int act_ctx;
+   int act_sym;
+   signed short csym;
+   int bframe=(img->type==B_SLICE);
+ 
+   MotionInfoContexts *ctx = (img->currentSlice)->mot_ctx;
+ 
+   act_sym = se->value1;
+   act_ctx = 0;
+ 
+   if (!bframe)  
+   {
+     switch (act_sym)
+     {
+     case 0:
+       biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[0][1]);
+       break;
+     case 1:
+       biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][1]);
+       biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][3]);
+       break;
+     case 2:
+       biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][1]);
+       biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[0][3]);
+       biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[0][4]);
+       break;
+     case 3:
+       biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][1]);
+       biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[0][3]);
+       biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][4]);
+       break;
+     }
+   }
+   else //===== B-FRAME =====
+   {
+     if (act_sym==0)
+     {
+       biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[1][0]);
+       return;
+     }
+     else
+     {
+       biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][0]);
+       act_sym--;
+     }
+     if (act_sym<2)
+     {
+       biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[1][1]);
+       biari_encode_symbol (eep_dp, (signed short) (act_sym!=0), &ctx->b8_type_contexts[1][3]);
+     }
+     else if (act_sym<6)
+     {
+       biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][1]);
+       biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[1][2]);
+       csym=(((act_sym-2)>>1)&0x01) != 0;
+       biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]);
+       csym=((act_sym-2)&0x01) != 0;
+       biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]);
+     }
+     else
+     {
+       biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][1]);
+       biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][2]);
+       csym=(((act_sym-6)>>2)&0x01);
+       if (csym)
+       {
+         biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][3]);
+         csym=((act_sym-6)&0x01) != 0;
+         biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]);
+       }
+       else
+       {
+         biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[1][3]);
+         csym=(((act_sym-6)>>1)&0x01) != 0;
+         biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]);
+         csym=((act_sym-6)&0x01) != 0;
+         biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]);
+       }
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ****************************************************************************
+  * \brief
+  *    This function is used to arithmetically encode a pair of
+  *    intra prediction modes of a given MB.
+  ****************************************************************************
+  */
+ void writeIntraPredMode_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp)
+ {
+   TextureInfoContexts *ctx = img->currentSlice->tex_ctx;
+ 
+   // use_most_probable_mode
+   if (se->value1 == -1)
+     biari_encode_symbol(eep_dp, 1, ctx->ipr_contexts);
+   else
+   {
+     biari_encode_symbol(eep_dp, 0, ctx->ipr_contexts);
+         
+     // remaining_mode_selector
+     biari_encode_symbol(eep_dp,(signed short)( se->value1 & 0x1    ), ctx->ipr_contexts+1);
+     biari_encode_symbol(eep_dp,(signed short)((se->value1 & 0x2)>>1), ctx->ipr_contexts+1);
+     biari_encode_symbol(eep_dp,(signed short)((se->value1 & 0x4)>>2), ctx->ipr_contexts+1);
+   }
+ }
+ /*!
+  ****************************************************************************
+  * \brief
+  *    This function is used to arithmetically encode the reference
+  *    parameter of a given MB.
+  ****************************************************************************
+  */
+ void writeRefFrame_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp)
+ {
+   MotionInfoContexts  *ctx    = img->currentSlice->mot_ctx;
+   Macroblock          *currMB = &img->mb_data[img->current_mb_nr];
+   int                 addctx  = 0;
+ 
+   int   a, b;
+   int   act_ctx;
+   int   act_sym;
+   char** refframe_array = enc_picture->ref_idx[se->value2];
+ 
+   int bslice = (img->type==B_SLICE);
+ 
+   int   b8a, b8b;
+ 
+   PixelPos block_a, block_b;
+   
+   getLuma4x4Neighbour(img->current_mb_nr, img->subblock_x, img->subblock_y, -1,  0, &block_a);
+   getLuma4x4Neighbour(img->current_mb_nr, img->subblock_x, img->subblock_y,  0, -1, &block_b);
+ 
+   b8a=((block_a.x >> 1) & 0x01)+2*((block_a.y >> 1) & 0x01);
+   b8b=((block_b.x >> 1) & 0x01)+2*((block_b.y >> 1) & 0x01);
+ 
+   
+   if (!block_b.available)
+     b=0;
+   else if (IS_DIRECT(&img->mb_data[block_b.mb_addr]) || (img->mb_data[block_b.mb_addr].b8mode[b8b]==0 && bslice))
+     b=0;
+   else
+   {
+     if (img->MbaffFrameFlag && (currMB->mb_field == 0) && (img->mb_data[block_b.mb_addr].mb_field == 1))
+       b = (refframe_array[block_b.pos_y][block_b.pos_x] > 1 ? 1 : 0);
+     else
+       b = (refframe_array[block_b.pos_y][block_b.pos_x] > 0 ? 1 : 0);
+   }
+ 
+   if (!block_a.available)
+     a=0;
+   else if (IS_DIRECT(&img->mb_data[block_a.mb_addr]) || (img->mb_data[block_a.mb_addr].b8mode[b8a]==0 && bslice))
+     a=0;
+   else 
+   {
+     if (img->MbaffFrameFlag && (currMB->mb_field == 0) && (img->mb_data[block_a.mb_addr].mb_field == 1))
+       a = (refframe_array[block_a.pos_y][block_a.pos_x] > 1 ? 1 : 0);
+     else
+       a = (refframe_array[block_a.pos_y][block_a.pos_x] > 0 ? 1 : 0);
+   }
+ 
+   act_ctx     = a + 2*b; 
+   se->context = act_ctx; // store context
+   act_sym     = se->value1;
+ 
+   if (act_sym==0)
+   {
+     biari_encode_symbol(eep_dp, 0, ctx->ref_no_contexts[addctx] + act_ctx );
+   }
+   else
+   {
+     biari_encode_symbol(eep_dp, 1, ctx->ref_no_contexts[addctx] + act_ctx);
+     act_sym--;
+     act_ctx=4;
+     unary_bin_encode(eep_dp, act_sym,ctx->ref_no_contexts[addctx]+act_ctx,1);
+   }
+ }
+ 
+ /*!
+  ****************************************************************************
+  * \brief
+  *    This function is used to arithmetically encode the coded
+  *    block pattern of a given delta quant.
+  ****************************************************************************
+  */
+ void writeDquant_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp)
+ {
+   MotionInfoContexts *ctx = img->currentSlice->mot_ctx;
+ 
+   int act_ctx;
+   int act_sym;
+   int dquant = se->value1;
+   int sign=0;
+ 
+   Macroblock     *currMB       = &img->mb_data[img->current_mb_nr];
+ 
+   last_dquant=currMB->prev_delta_qp;
+ 
+   if (dquant <= 0)
+     sign = 1;
+   act_sym = absm(dquant) << 1;
+ 
+   act_sym += sign;
+   act_sym --;
+ 
+   act_ctx = ( (last_dquant != 0) ? 1 : 0);
+ 
+   if (act_sym==0)
+   {
+     biari_encode_symbol(eep_dp, 0, ctx->delta_qp_contexts + act_ctx );
+   }
+   else
+   {
+     biari_encode_symbol(eep_dp, 1, ctx->delta_qp_contexts + act_ctx);
+     act_ctx=2;
+     act_sym--;
+     unary_bin_encode(eep_dp, act_sym,ctx->delta_qp_contexts+act_ctx,1);
+   }
+ }
+ 
+ /*!
+  ****************************************************************************
+  * \brief
+  *    This function is used to arithmetically encode the motion
+  *    vector data of a B-frame MB.
+  ****************************************************************************
+  */
+ void writeMVD_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp)
+ {
+   int i = img->subblock_x;
+   int j = img->subblock_y;
+   int a, b;
+   int act_ctx;
+   int act_sym;
+   int mv_pred_res;
+   int mv_local_err;
+   int mv_sign;
+   int list_idx = se->value2 & 0x01;
+   int k = (se->value2>>1); // MVD component
+ 
+   PixelPos block_a, block_b;
+ 
+   MotionInfoContexts  *ctx    = img->currentSlice->mot_ctx;
+   Macroblock          *currMB = &img->mb_data[img->current_mb_nr];
+ 
+   getLuma4x4Neighbour(img->current_mb_nr, i, j, -1,  0, &block_a);
+   getLuma4x4Neighbour(img->current_mb_nr, i, j,  0, -1, &block_b);
+ 
+   if (block_b.available)
+   {
+     b = absm(img->mb_data[block_b.mb_addr].mvd[list_idx][block_b.y][block_b.x][k]);
+     if (img->MbaffFrameFlag && (k==1)) 
+     {
+       if ((currMB->mb_field==0) && (img->mb_data[block_b.mb_addr].mb_field==1))
+         b *= 2;
+       else if ((currMB->mb_field==1) && (img->mb_data[block_b.mb_addr].mb_field==0))
+         b /= 2;
+     }
+   }
+   else
+     b=0;
+           
+   if (block_a.available)
+   {
+     a = absm(img->mb_data[block_a.mb_addr].mvd[list_idx][block_a.y][block_a.x][k]);
+     if (img->MbaffFrameFlag && (k==1)) 
+     {
+       if ((currMB->mb_field==0) && (img->mb_data[block_a.mb_addr].mb_field==1))
+         a *= 2;
+       else if ((currMB->mb_field==1) && (img->mb_data[block_a.mb_addr].mb_field==0))
+         a /= 2;
+     }
+   }
+   else
+     a = 0;
+ 
+   if ((mv_local_err=a+b)<3)
+     act_ctx = 5*k;
+   else
+   {
+     if (mv_local_err>32)
+       act_ctx=5*k+3;
+     else
+       act_ctx=5*k+2;
+   }
+ 
+   mv_pred_res = se->value1;
+   se->context = act_ctx;
+ 
+   act_sym = absm(mv_pred_res);
+ 
+   if (act_sym == 0)
+     biari_encode_symbol(eep_dp, 0, &ctx->mv_res_contexts[0][act_ctx] );
+   else
+   {
+     biari_encode_symbol(eep_dp, 1, &ctx->mv_res_contexts[0][act_ctx] );
+     act_sym--;
+     act_ctx=5*k;
+     unary_exp_golomb_mv_encode(eep_dp,act_sym,ctx->mv_res_contexts[1]+act_ctx,3);
+     mv_sign = (mv_pred_res<0) ? 1: 0;
+     biari_encode_symbol_eq_prob(eep_dp, (signed short) mv_sign);
+   }
+ }
+ 
+ 
+ /*!
+  ****************************************************************************
+  * \brief
+  *    This function is used to arithmetically encode the chroma
+  *    intra prediction mode of an 8x8 block
+  ****************************************************************************
+  */
+ void writeCIPredMode_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp)
+ {
+   TextureInfoContexts *ctx     = img->currentSlice->tex_ctx;
+   Macroblock          *currMB  = &img->mb_data[img->current_mb_nr];
+   int                 act_ctx,a,b;
+   int                 act_sym  = se->value1;
+ 
+   if (currMB->mb_available_up == NULL) b = 0;
+   else  b = ( ((currMB->mb_available_up)->c_ipred_mode != 0) ? 1 : 0);
+ 
+   if (currMB->mb_available_left == NULL) a = 0;
+   else  a = ( ((currMB->mb_available_left)->c_ipred_mode != 0) ? 1 : 0);
+ 
+   act_ctx = a+b;
+ 
+   if (act_sym==0) 
+     biari_encode_symbol(eep_dp, 0, ctx->cipr_contexts + act_ctx );
+   else
+   {
+     biari_encode_symbol(eep_dp, 1, ctx->cipr_contexts + act_ctx );
+     unary_bin_max_encode(eep_dp,(unsigned int) (act_sym-1),ctx->cipr_contexts+3,0,2);
+   }
+ }
+ 
+ 
+ /*!
+  ****************************************************************************
+  * \brief
+  *    This function is used to arithmetically encode the coded
+  *    block pattern of an 8x8 block
+  ****************************************************************************
+  */
+ void writeCBP_BIT_CABAC (int b8, int bit, int cbp, Macroblock* currMB, int inter, EncodingEnvironmentPtr eep_dp)
+ {
+   PixelPos block_a;
+   int a, b;
+   
+   int mb_x=(b8 & 0x01)<<1;
+   int mb_y=(b8 >> 1)<<1;
+ 
+   if (mb_y == 0)
+   {
+     if (currMB->mb_available_up == NULL)
+       b = 0;
+     else
+     {
+       if((currMB->mb_available_up)->mb_type==IPCM)
+         b=0;
+       else
+         b = (( ((currMB->mb_available_up)->cbp & (1<<(2+(mb_x>>1)))) == 0) ? 1 : 0);   //VG-ADD
+     }    
+     
+   }
+   else
+     b = ( ((cbp & (1<<(mb_x/2))) == 0) ? 1: 0);
+   
+   if (mb_x == 0)
+   {
+     getLuma4x4Neighbour(img->current_mb_nr, mb_x, mb_y, -1, 0, &block_a);
+     if (block_a.available)
+     {
+       {
+         if(img->mb_data[block_a.mb_addr].mb_type==IPCM)
+           a=0;
+         else
+           a = (( (img->mb_data[block_a.mb_addr].cbp & (1<<(2*(block_a.y>>1)+1))) == 0) ? 1 : 0); //VG-ADD
+       }
+       
+     }
+     else
+       a=0;
+   }
+   else
+     a = ( ((cbp & (1<<mb_y)) == 0) ? 1: 0);
+   
+   //===== WRITE BIT =====
+   biari_encode_symbol (eep_dp, (signed short) bit,
+     img->currentSlice->tex_ctx->cbp_contexts[0] + a+2*b);
+ }
+ 
+ /*!
+ ****************************************************************************
+ * \brief
+ *    This function is used to arithmetically encode the coded
+ *    block pattern of a macroblock
+ ****************************************************************************
+ */
+ void writeCBP_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp)
+ {
+   TextureInfoContexts *ctx = img->currentSlice->tex_ctx;
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   
+   int a, b;
+   int curr_cbp_ctx, curr_cbp_idx;
+   int cbp = se->value1; // symbol to encode
+   int cbp_bit;
+   int b8;
+   
+   for (b8=0; b8<4; b8++)
+   {
+     curr_cbp_idx = (currMB->b8mode[b8] == IBLOCK ? 0 : 1);
+     writeCBP_BIT_CABAC (b8, cbp&(1<<b8), cbp, currMB, curr_cbp_idx, eep_dp);
+   }
+ 
+   if (img->yuv_format != YUV400)
+   {
+     // coding of chroma part
+     b = 0;
+     if (currMB->mb_available_up != NULL)
+     {
+       if((currMB->mb_available_up)->mb_type==IPCM)
+         b=1;
+       else
+         b = ((currMB->mb_available_up)->cbp > 15) ? 1 : 0;
+     }
+     
+     
+     a = 0;
+     if (currMB->mb_available_left != NULL)
+     {
+       if((currMB->mb_available_left)->mb_type==IPCM)
+         a=1;
+       else
+         a = ((currMB->mb_available_left)->cbp > 15) ? 1 : 0;
+     }
+     
+     curr_cbp_ctx = a+2*b;
+     cbp_bit = (cbp > 15 ) ? 1 : 0;
+     biari_encode_symbol(eep_dp, (signed short) cbp_bit, ctx->cbp_contexts[1] + curr_cbp_ctx );
+     
+     if (cbp > 15)
+     {
+       b = 0;
+       if (currMB->mb_available_up != NULL)
+       {
+         if((currMB->mb_available_up)->mb_type==IPCM)
+           b=1;
+         else
+           if ((currMB->mb_available_up)->cbp > 15)
+             b = (( ((currMB->mb_available_up)->cbp >> 4) == 2) ? 1 : 0);
+       }
+       
+       
+       a = 0;
+       if (currMB->mb_available_left != NULL)
+       {
+         if((currMB->mb_available_left)->mb_type==IPCM)
+           a=1;
+         else
+           if ((currMB->mb_available_left)->cbp > 15)
+             a = (( ((currMB->mb_available_left)->cbp >> 4) == 2) ? 1 : 0);
+       }
+       
+       curr_cbp_ctx = a+2*b;
+       cbp_bit = ((cbp>>4) == 2) ? 1 : 0;
+       biari_encode_symbol(eep_dp, (signed short) cbp_bit, ctx->cbp_contexts[2] + curr_cbp_ctx );
+     }
+   }
+ }
+ 
+ static const int maxpos       [] = {16, 15, 64, 32, 32, 16,  4, 15,  8, 16};
+ static const int c1isdc       [] = { 1,  0,  1,  1,  1,  1,  1,  0,  1,  1};
+ 
+ static const int type2ctx_bcbp[] = { 0,  1,  2,  2,  3,  4,  5,  6,  5,  5}; // 7
+ static const int type2ctx_map [] = { 0,  1,  2,  3,  4,  5,  6,  7,  6,  6}; // 8
+ static const int type2ctx_last[] = { 0,  1,  2,  3,  4,  5,  6,  7,  6,  6}; // 8
+ static const int type2ctx_one [] = { 0,  1,  2,  3,  3,  4,  5,  6,  5,  5}; // 7
+ static const int type2ctx_abs [] = { 0,  1,  2,  3,  3,  4,  5,  6,  5,  5}; // 7
+ static const int max_c2       [] = { 4,  4,  4,  4,  4,  4,  3,  4,  3,  3}; // 9
+ 
+ 
+ 
+ /*!
+  ****************************************************************************
+  * \brief
+  *    Write CBP4-BIT
+  ****************************************************************************
+  */
+ void write_and_store_CBP_block_bit (Macroblock* currMB, EncodingEnvironmentPtr eep_dp, int type, int cbp_bit)
+ {
+ #define BIT_SET(x,n)  ((int)(((x)&((int64)1<<(n)))>>(n)))
+ 
+   int y_ac        = (type==LUMA_16AC || type==LUMA_8x8 || type==LUMA_8x4 || type==LUMA_4x8 || type==LUMA_4x4);
+   int y_dc        = (type==LUMA_16DC);
+   int u_ac        = (type==CHROMA_AC && !img->is_v_block);
+   int v_ac        = (type==CHROMA_AC &&  img->is_v_block);
+   int chroma_dc   = (type==CHROMA_DC || type==CHROMA_DC_2x4 || type==CHROMA_DC_4x4);
+   int u_dc        = (chroma_dc && !img->is_v_block);
+   int v_dc        = (chroma_dc &&  img->is_v_block);
+   int j           = (y_ac || u_ac || v_ac ? img->subblock_y : 0);
+   int i           = (y_ac || u_ac || v_ac ? img->subblock_x : 0);
+   int bit         = (y_dc ? 0 : y_ac ? 1 : u_dc ? 17 : v_dc ? 18 : u_ac ? 19 : 23);
+   int default_bit = (img->is_intra_block ? 1 : 0);
+   int upper_bit   = default_bit;
+   int left_bit    = default_bit;
+   int ctx;
+ 
+   int bit_pos_a   = 0;
+   int bit_pos_b   = 0;
+ 
+   PixelPos block_a, block_b;
+ 
+   if (y_ac || y_dc)
+   {
+     getLuma4x4Neighbour(img->current_mb_nr, i, j, -1,  0, &block_a);
+     getLuma4x4Neighbour(img->current_mb_nr, i, j,  0, -1, &block_b);
+     if (y_ac)
+     {
+       if (block_a.available)
+         bit_pos_a = 4*block_a.y + block_a.x;
+       if (block_b.available)
+         bit_pos_b = 4*block_b.y + block_b.x;
+     }
+   }
+   else
+   {
+     getChroma4x4Neighbour(img->current_mb_nr, i, j, -1,  0, &block_a);
+     getChroma4x4Neighbour(img->current_mb_nr, i, j,  0, -1, &block_b);
+     if (u_ac||v_ac)
+     {
+       if (block_a.available)
+         bit_pos_a = 4*block_a.y + block_a.x;
+       if (block_b.available)
+         bit_pos_b = 4*block_b.y + block_b.x;
+     }
+   }
+ 
+   bit = (y_dc ? 0 : y_ac ? 1+4*j+i : u_dc ? 17 : v_dc ? 18 : u_ac ? 19+4*j+i : 35+4*j+i);
+   //--- set bits for current block ---
+   if (cbp_bit)
+   {
+     if (type==LUMA_8x8)
+     {
+       currMB->cbp_bits   |= (1<< bit   );
+       currMB->cbp_bits   |= (1<<(bit+1));
+       currMB->cbp_bits   |= (1<<(bit+4));
+       currMB->cbp_bits   |= (1<<(bit+5));
+     }
+     else if (type==LUMA_8x4)
+     {
+       currMB->cbp_bits   |= (1<< bit   );
+       currMB->cbp_bits   |= (1<<(bit+1));
+     }
+     else if (type==LUMA_4x8)
+     {
+       currMB->cbp_bits   |= (1<< bit   );
+       currMB->cbp_bits   |= (1<<(bit+4));
+     }
+     else
+     {
+       currMB->cbp_bits   |= ((int64)1<<bit);
+     }
+   }
+ 
+   bit = (y_dc ? 0 : y_ac ? 1 : u_dc ? 17 : v_dc ? 18 : u_ac ? 19 : 35);
+   if (type!=LUMA_8x8)
+   {
+     if (block_b.available)
+     {
+       if(img->mb_data[block_b.mb_addr].mb_type==IPCM)
+         upper_bit=1;
+       else
+         upper_bit = BIT_SET(img->mb_data[block_b.mb_addr].cbp_bits,bit+bit_pos_b);
+     }
+ 
+     
+     if (block_a.available)
+     {
+       if(img->mb_data[block_a.mb_addr].mb_type==IPCM)
+         left_bit=1;
+       else
+         left_bit = BIT_SET(img->mb_data[block_a.mb_addr].cbp_bits,bit+bit_pos_a);
+     }
+ 
+     ctx = 2*upper_bit+left_bit;
+ 
+     //===== encode symbol =====
+     biari_encode_symbol (eep_dp, (short)cbp_bit, img->currentSlice->tex_ctx->bcbp_contexts[type2ctx_bcbp[type]]+ctx);
+   }
+ }
+ 
+ 
+ 
+ 
+ //===== position -> ctx for MAP =====
+ //--- zig-zag scan ----
+ static const int  pos2ctx_map8x8 [] = { 0,  1,  2,  3,  4,  5,  5,  4,  4,  3,  3,  4,  4,  4,  5,  5,
+                                         4,  4,  4,  4,  3,  3,  6,  7,  7,  7,  8,  9, 10,  9,  8,  7,
+                                         7,  6, 11, 12, 13, 11,  6,  7,  8,  9, 14, 10,  9,  8,  6, 11,
+                                        12, 13, 11,  6,  9, 14, 10,  9, 11, 12, 13, 11 ,14, 10, 12, 14}; // 15 CTX
+ static const int  pos2ctx_map8x4 [] = { 0,  1,  2,  3,  4,  5,  7,  8,  9, 10, 11,  9,  8,  6,  7,  8,
+                                         9, 10, 11,  9,  8,  6, 12,  8,  9, 10, 11,  9, 13, 13, 14, 14}; // 15 CTX
+ static const int  pos2ctx_map4x4 [] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 14}; // 15 CTX
+ static const int  pos2ctx_map2x4c[] = { 0,  0,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2}; // 15 CTX
+ static const int  pos2ctx_map4x4c[] = { 0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2}; // 15 CTX
+ static const int* pos2ctx_map    [] = {pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8, pos2ctx_map8x4,
+                                        pos2ctx_map8x4, pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map4x4,
+                                        pos2ctx_map2x4c, pos2ctx_map4x4c};
+ 
+ //--- interlace scan ----
+ //Taken from ABT
+ static const int  pos2ctx_map8x8i[] = { 0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  7,  7,  8,  4,  5,
+                                         6,  9, 10, 10,  8, 11, 12, 11,  9,  9, 10, 10,  8, 11, 12, 11,
+                                         9,  9, 10, 10,  8, 11, 12, 11,  9,  9, 10, 10,  8, 13, 13,  9,
+                                         9, 10, 10,  8, 13, 13,  9,  9, 10, 10, 14, 14, 14, 14, 14, 14}; // 15 CTX
+ 
+ static const int  pos2ctx_map8x4i[] = { 0,  1,  2,  3,  4,  5,  6,  3,  4,  5,  6,  3,  4,  7,  6,  8,
+                                         9,  7,  6,  8,  9, 10, 11, 12, 12, 10, 11, 13, 13, 14, 14, 14}; // 15 CTX
+ static const int  pos2ctx_map4x8i[] = { 0,  1,  1,  1,  2,  3,  3,  4,  4,  4,  5,  6,  2,  7,  7,  8,
+                                         8,  8,  5,  6,  9, 10, 10, 11, 11, 11, 12, 13, 13, 14, 14, 14}; // 15 CTX
+ static const int* pos2ctx_map_int[] = {pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8i,pos2ctx_map8x4i,
+                                        pos2ctx_map4x8i,pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map4x4,
+                                        pos2ctx_map2x4c, pos2ctx_map4x4c};
+ 
+ 
+ //===== position -> ctx for LAST =====
+ static const int  pos2ctx_last8x8 [] = { 0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+                                          2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+                                          3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
+                                          5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8}; //  9 CTX
+ static const int  pos2ctx_last8x4 [] = { 0,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,
+                                          3,  3,  3,  3,  4,  4,  4,  4,  5,  5,  6,  6,  7,  7,  8,  8}; //  9 CTX
+ static const int  pos2ctx_last4x4 [] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15}; // 15 CTX
+ static const int  pos2ctx_last2x4c[] = { 0,  0,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2}; // 15 CTX
+ static const int  pos2ctx_last4x4c[] = { 0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2}; // 15 CTX
+ static const int* pos2ctx_last    [] = {pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last8x8, pos2ctx_last8x4,
+                                         pos2ctx_last8x4, pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last4x4,
+                                         pos2ctx_last2x4c, pos2ctx_last4x4c};
+ 
+ 
+ 
+ 
+ /*!
+ ****************************************************************************
+ * \brief
+ *    Write Significance MAP
+ ****************************************************************************
+ */
+ void write_significance_map (Macroblock* currMB, EncodingEnvironmentPtr eep_dp, int type, int coeff[], int coeff_ctr)
+ {
+   int   k;
+   unsigned short sig, last;
+   int   k0      = 0;
+   int   k1      = maxpos[type]-1;
+   
+   int               fld       = ( img->structure!=FRAME || currMB->mb_field );
+   BiContextTypePtr  map_ctx   = ( fld ? img->currentSlice->tex_ctx->fld_map_contexts[type2ctx_map [type]]
+     : img->currentSlice->tex_ctx->map_contexts[type2ctx_map [type]] );
+   BiContextTypePtr  last_ctx  = ( fld ? img->currentSlice->tex_ctx->fld_last_contexts[type2ctx_last[type]]
+     : img->currentSlice->tex_ctx->last_contexts[type2ctx_last[type]] );
+   
+   if (!c1isdc[type])
+   {
+     k0++; k1++; coeff--;
+   }
+   
+   if (!fld)
+   {
+     for (k=k0; k<k1; k++) // if last coeff is reached, it has to be significant
+     {
+       sig   = (coeff[k] != 0);      
+       biari_encode_symbol  (eep_dp, sig,  map_ctx+pos2ctx_map     [type][k]);
+       if (sig)
+       {
+         last = (--coeff_ctr == 0);
+         
+         biari_encode_symbol(eep_dp, last, last_ctx+pos2ctx_last[type][k]);
+         if (last) return;
+       }
+     }
+     return;
+   }
+   else
+   {
+     for (k=k0; k<k1; k++) // if last coeff is reached, it has to be significant
+     {
+       sig   = (coeff[k] != 0);
+       
+       biari_encode_symbol  (eep_dp, sig,  map_ctx+pos2ctx_map_int [type][k]);
+       if (sig)
+       {
+         last = (--coeff_ctr == 0);
+         
+         biari_encode_symbol(eep_dp, last, last_ctx+pos2ctx_last[type][k]);
+         if (last) return;
+       }
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ****************************************************************************
+  * \brief
+  *    Write Levels
+  ****************************************************************************
+  */
+ void write_significant_coefficients (Macroblock* currMB, EncodingEnvironmentPtr eep_dp, int type, int coeff[])
+ {
+   int   i;
+   int   absLevel;
+   int   ctx;
+   short sign;
+   short greater_one;
+   int   c1 = 1;
+   int   c2 = 0;
+   
+   for (i=maxpos[type]-1; i>=0; i--)
+   {
+     if (coeff[i]!=0)
+     {
+       if (coeff[i]>0) {absLevel =  coeff[i];  sign = 0;}
+       else            {absLevel = -coeff[i];  sign = 1;}
+ 
+       greater_one = (absLevel>1);
+ 
+       //--- if coefficient is one ---
+       ctx = min(c1,4);    
+       biari_encode_symbol (eep_dp, greater_one, img->currentSlice->tex_ctx->one_contexts[type2ctx_one[type]] + ctx);
+ 
+       if (greater_one)
+       {
+         ctx = min(c2, max_c2[type]);
+         unary_exp_golomb_level_encode(eep_dp, absLevel-2, img->currentSlice->tex_ctx->abs_contexts[type2ctx_abs[type]] + ctx);
+         c1 = 0;
+         c2++;
+       }
+       else if (c1)
+       {
+         c1++;
+       }
+       biari_encode_symbol_eq_prob (eep_dp, sign);
+     }
+   }
+ }
+ 
+ 
+ 
+ /*!
+  ****************************************************************************
+  * \brief
+  *    Write Block-Transform Coefficients
+  ****************************************************************************
+  */
+ void writeRunLevel_CABAC (SyntaxElement *se, EncodingEnvironmentPtr eep_dp)
+ {
+   static int  coeff[64];
+   static int  coeff_ctr = 0;
+   static int  pos       = 0;
+     
+   //--- accumulate run-level information ---
+   if (se->value1 != 0)
+   {
+     pos += se->value2;
+     coeff[pos++] = se->value1; 
+     coeff_ctr++;
+     //return;
+   }
+   else
+   {
+     Macroblock* currMB    = &img->mb_data[img->current_mb_nr];
+     //===== encode CBP-BIT =====
+     if (coeff_ctr>0)
+     {      
+       write_and_store_CBP_block_bit  (currMB, eep_dp, se->context, 1);      
+       //===== encode significance map =====
+       write_significance_map         (currMB, eep_dp, se->context, coeff, coeff_ctr);      
+       //===== encode significant coefficients =====
+       write_significant_coefficients (currMB, eep_dp, se->context, coeff);
+     }
+     else
+       write_and_store_CBP_block_bit  (currMB, eep_dp, se->context, 0);
+     
+     //--- reset counters ---
+     pos = coeff_ctr = 0;  
+     memset(coeff, 0 , 64 * sizeof(int));
+   }
+ }
+ 
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Unary binarization and encoding of a symbol by using
+  *    one or two distinct models for the first two and all
+  *    remaining bins
+ *
+ ************************************************************************/
+ void unary_bin_encode(EncodingEnvironmentPtr eep_dp,
+                       unsigned int symbol,
+                       BiContextTypePtr ctx,
+                       int ctx_offset)
+ {
+   unsigned int l;
+   BiContextTypePtr ictx;
+ 
+   if (symbol==0)
+   {
+     biari_encode_symbol(eep_dp, 0, ctx );
+     return;
+   }
+   else
+   {
+     biari_encode_symbol(eep_dp, 1, ctx );
+     l = symbol;
+     ictx = ctx+ctx_offset;
+     while ((--l)>0)
+       biari_encode_symbol(eep_dp, 1, ictx);
+     biari_encode_symbol(eep_dp, 0, ictx);
+   }
+   return;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Unary binarization and encoding of a symbol by using
+  *    one or two distinct models for the first two and all
+  *    remaining bins; no terminating "0" for max_symbol
+  *    (finite symbol alphabet)
+  ************************************************************************
+  */
+ void unary_bin_max_encode(EncodingEnvironmentPtr eep_dp,
+                           unsigned int symbol,
+                           BiContextTypePtr ctx,
+                           int ctx_offset,
+                           unsigned int max_symbol)
+ {
+   unsigned int l;
+   BiContextTypePtr ictx;
+ 
+   if (symbol==0)
+   {
+     biari_encode_symbol(eep_dp, 0, ctx );
+     return;
+   }
+   else
+   {
+     biari_encode_symbol(eep_dp, 1, ctx );
+     l=symbol;
+     ictx=ctx+ctx_offset;
+     while ((--l)>0)
+       biari_encode_symbol(eep_dp, 1, ictx);
+     if (symbol<max_symbol)
+       biari_encode_symbol(eep_dp, 0, ictx);
+   }
+   return;
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Exp Golomb binarization and encoding
+  ************************************************************************
+  */
+ void exp_golomb_encode_eq_prob( EncodingEnvironmentPtr eep_dp,
+                                 unsigned int symbol,
+                                 int k) 
+ {
+   while(1)
+   {
+     if (symbol >= (unsigned int)(1<<k))   
+     {
+       biari_encode_symbol_eq_prob(eep_dp, 1);   //first unary part
+       symbol = symbol - (1<<k);
+       k++;
+     }
+     else                  
+     {
+       biari_encode_symbol_eq_prob(eep_dp, 0);   //now terminated zero of unary part
+       while (k--)                               //next binary part
+         biari_encode_symbol_eq_prob(eep_dp, (signed short)((symbol>>k)&1)); 
+       break;
+     }
+   }
+ 
+   return;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Exp-Golomb for Level Encoding
+ *
+ ************************************************************************/
+ void unary_exp_golomb_level_encode( EncodingEnvironmentPtr eep_dp,
+                                     unsigned int symbol,
+                                     BiContextTypePtr ctx)
+ {
+   unsigned int l,k;
+   unsigned int exp_start = 13; // 15-2 : 0,1 level decision always sent
+ 
+   if (symbol==0)
+   {
+     biari_encode_symbol(eep_dp, 0, ctx );
+     return;
+   }
+   else
+   {
+     biari_encode_symbol(eep_dp, 1, ctx );
+     l=symbol;
+     k=1;
+     while (((--l)>0) && (++k <= exp_start))
+       biari_encode_symbol(eep_dp, 1, ctx);
+     if (symbol < exp_start) biari_encode_symbol(eep_dp, 0, ctx);
+     else exp_golomb_encode_eq_prob(eep_dp,symbol-exp_start,0);
+   }
+   return;
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Exp-Golomb for MV Encoding
+ *
+ ************************************************************************/
+ void unary_exp_golomb_mv_encode(EncodingEnvironmentPtr eep_dp,
+                                 unsigned int symbol,
+                                 BiContextTypePtr ctx,
+                                 unsigned int max_bin)
+ {
+   unsigned int l,k;
+   unsigned int bin=1;
+   BiContextTypePtr ictx=ctx;
+   unsigned int exp_start = 8; // 9-1 : 0 mvd decision always sent
+ 
+   if (symbol==0)
+   {
+     biari_encode_symbol(eep_dp, 0, ictx );
+     return;
+   }
+   else
+   {
+     biari_encode_symbol(eep_dp, 1, ictx );
+     l=symbol;
+     k=1;
+     ictx++;
+     while (((--l)>0) && (++k <= exp_start))
+     {
+       biari_encode_symbol(eep_dp, 1, ictx  );
+       if ((++bin)==2) ictx++;
+       if (bin==max_bin) ictx++;
+     }
+     if (symbol < exp_start) biari_encode_symbol(eep_dp, 0, ictx);
+     else exp_golomb_encode_eq_prob(eep_dp,symbol-exp_start,3);
+   }
+   return;
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/cabac.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/cabac.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/cabac.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,65 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file
+  *    cabac.h
+  *
+  * \brief
+  *    Headerfile for entropy coding routines
+  *
+  * \author
+  *    Detlev Marpe                                                         \n
+  *    Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved.
+  *
+  * \date
+  *    21. Oct 2000 (Changes by Tobias Oelbaum 28.08.2001)
+  ***************************************************************************
+  */
+ 
+ 
+ #ifndef _CABAC_H_
+ #define _CABAC_H_
+ 
+ // CABAC
+ int get_pic_bin_count();
+ void reset_pic_bin_count();
+ 
+ void arienco_start_encoding(EncodingEnvironmentPtr eep, unsigned char *code_buffer, int *code_len);
+ int  arienco_bits_written(EncodingEnvironmentPtr eep);
+ void arienco_done_encoding(EncodingEnvironmentPtr eep);
+ void biari_init_context (BiContextTypePtr ctx, const int* ini);
+ void rescale_cum_freq(BiContextTypePtr bi_ct);
+ void biari_encode_symbol(EncodingEnvironmentPtr eep, signed short symbol, BiContextTypePtr bi_ct );
+ void biari_encode_symbol_eq_prob(EncodingEnvironmentPtr eep, signed short symbol);
+ void biari_encode_symbol_final(EncodingEnvironmentPtr eep, signed short symbol);
+ MotionInfoContexts* create_contexts_MotionInfo(void);
+ TextureInfoContexts* create_contexts_TextureInfo(void);
+ void init_contexts_MotionInfo (MotionInfoContexts  *enco_ctx);
+ void init_contexts_TextureInfo(TextureInfoContexts *enco_ctx);
+ void delete_contexts_MotionInfo(MotionInfoContexts *enco_ctx);
+ void delete_contexts_TextureInfo(TextureInfoContexts *enco_ctx);
+ void writeHeaderToBuffer();
+ int  writeSyntaxElement_CABAC(SyntaxElement *se, DataPartition *this_dataPart);
+ void writeMB_typeInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp);
+ void writeIntraPredMode_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp);
+ void writeB8_typeInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp);
+ void writeRefFrame2Buffer_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp);
+ void writeRefFrame_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp);
+ void writeMVD_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp);
+ void writeCBP_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp);
+ void writeDquant_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp);
+ void writeRunLevel_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp);
+ void writeBiDirBlkSize_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp);
+ void writeCIPredMode_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp);
+ void print_ctx_TextureInfo(TextureInfoContexts *enco_ctx);
+ void writeMB_skip_flagInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp);
+ void writeFieldModeInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); //GB
+ void writeCBP_BIT_CABAC (int b8, int bit, int cbp, Macroblock* currMB, int inter, EncodingEnvironmentPtr eep_dp);
+ void cabac_new_slice();
+ void CheckAvailabilityOfNeighborsCABAC();
+ 
+ void writeMB_transform_size_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp);
+ 
+ 
+ #endif  // CABAC_H
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/configfile.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/configfile.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/configfile.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,1162 ----
+ 
+ /*!
+  ***********************************************************************
+  * \file
+  *    configfile.c
+  * \brief
+  *    Configuration handling.
+  * \author
+  *  Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Stephan Wenger           <stewe at cs.tu-berlin.de>
+  * \note
+  *    In the future this module should hide the Parameters and offer only
+  *    Functions for their access.  Modules which make frequent use of some parameters
+  *    (e.g. picture size in macroblocks) are free to buffer them on local variables.
+  *    This will not only avoid global variable and make the code more readable, but also
+  *    speed it up.  It will also greatly facilitate future enhancements such as the
+  *    handling of different picture sizes in the same sequence.                         \n
+  *                                                                                      \n
+  *    For now, everything is just copied to the inp_par structure (gulp)
+  *
+  **************************************************************************************
+  * \par Configuration File Format
+  **************************************************************************************
+  * Format is line oriented, maximum of one parameter per line                           \n
+  *                                                                                      \n
+  * Lines have the following format:                                                     \n
+  * \<ParameterName\> = \<ParameterValue\> # Comments \\n                                    \n
+  * Whitespace is space and \\t
+  * \par
+  * \<ParameterName\> are the predefined names for Parameters and are case sensitive.
+  *   See configfile.h for the definition of those names and their mapping to
+  *   configinput->values.
+  * \par
+  * \<ParameterValue\> are either integers [0..9]* or strings.
+  *   Integers must fit into the wordlengths, signed values are generally assumed.
+  *   Strings containing no whitespace characters can be used directly.  Strings containing
+  *   whitespace characters are to be inclosed in double quotes ("string with whitespace")
+  *   The double quote character is forbidden (may want to implement something smarter here).
+  * \par
+  * Any Parameters whose ParameterName is undefined lead to the termination of the program
+  * with an error message.
+  *
+  * \par Known bug/Shortcoming:
+  *    zero-length strings (i.e. to signal an non-existing file
+  *    have to be coded as "".
+  *
+  * \par Rules for using command files
+  *                                                                                      \n
+  * All Parameters are initially taken from DEFAULTCONFIGFILENAME, defined in configfile.h.
+  * If an -f \<config\> parameter is present in the command line then this file is used to
+  * update the defaults of DEFAULTCONFIGFILENAME.  There can be more than one -f parameters
+  * present.  If -p <ParameterName = ParameterValue> parameters are present then these
+  * override the default and the additional config file's settings, and are themselves
+  * overridden by future -p parameters.  There must be whitespace between -f and -p commands
+  * and their respective parameters
+  ***********************************************************************
+  */
+ 
+ #define INCLUDED_BY_CONFIGFILE_C
+ 
+ #include <stdlib.h>
+ #include <string.h>
+ #include <assert.h>
+ 
+ #if defined WIN32
+   #include <io.h>
+   #define strcasecmp strcmpi
+ #else
+   #include <unistd.h>
+ #endif
+ #include <fcntl.h>
+ #include <sys/stat.h>
+ 
+ #include "global.h"
+ #include "configfile.h"
+ 
+ #include "fmo.h"
+ 
+        char *GetConfigFileContent (char *Filename);
+ static void ParseContent (char *buf, int bufsize);
+ static int ParameterNameToMapIndex (char *s);
+ static int InitEncoderParams();
+ static int TestEncoderParams(int bitdepth_qp_scale);
+ static int DisplayEncoderParams();
+ static void PatchInp ();
+ static void ProfileCheck();
+ static void LevelCheck();
+ 
+ 
+ #define MAX_ITEMS_TO_PARSE  10000
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *   print help message and exit
+  ***********************************************************************
+  */
+ void JMHelpExit ()
+ {
+   fprintf( stderr, "\n   lencod [-h] [-d defenc.cfg] {[-f curenc1.cfg]...[-f curencN.cfg]}"
+     " {[-p EncParam1=EncValue1]..[-p EncParamM=EncValueM]}\n\n"    
+     "## Parameters\n\n"
+ 
+     "## Options\n"
+     "   -h :  prints function usage\n"
+     "   -d :  use <defenc.cfg> as default file for parameter initializations.\n"
+     "         If not used then file defaults to encoder.cfg in local directory.\n"
+     "   -f :  read <curencM.cfg> for reseting selected encoder parameters.\n"
+     "         Multiple files could be used that set different parameters\n"
+     "   -p :  Set parameter <EncParamM> to <EncValueM>.\n"
+     "         See default encoder.cfg file for description of all parameters.\n\n"
+     
+     "## Supported video file formats\n"
+     "   RAW:  .yuv -> YUV 4:2:0\n\n"
+     
+     "## Examples of usage:\n"
+     "   lencod\n"
+     "   lencod  -h\n"
+     "   lencod  -d default.cfg\n"
+     "   lencod  -f curenc1.cfg\n"
+     "   lencod  -f curenc1.cfg -p InputFile=\"e:\\data\\container_qcif_30.yuv\" -p SourceWidth=176 -p SourceHeight=144\n"  
+     "   lencod  -f curenc1.cfg -p FramesToBeEncoded=30 -p QPISlice=28 -p QPPSlice=28 -p QPBSlice=30\n");
+ 
+   exit(-1);
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Parse the command line parameters and read the config files.
+  * \param ac
+  *    number of command line parameters
+  * \param av
+  *    command line parameters
+  ***********************************************************************
+  */
+ void Configure (int ac, char *av[])
+ {
+   char *content;
+   int CLcount, ContentLen, NumberParams;
+   char *filename=DEFAULTCONFIGFILENAME;
+ 
+   memset (&configinput, 0, sizeof (InputParameters));
+   //Set default parameters.
+   printf ("Setting Default Parameters...\n");
+   InitEncoderParams();
+ 
+   // Process default config file
+   CLcount = 1;
+ 
+   if (ac==2)
+   {
+     if (0 == strncmp (av[1], "-h", 2))
+     {
+       JMHelpExit();
+     }
+   }
+ 
+   if (ac>=3)
+   {
+     if (0 == strncmp (av[1], "-d", 2))
+     {
+       filename=av[2];
+       CLcount = 3;
+     }
+     if (0 == strncmp (av[1], "-h", 2))
+     {
+       JMHelpExit();
+     }
+   }
+   printf ("Parsing Configfile %s", filename);
+   content = GetConfigFileContent (filename);
+   if (NULL==content)
+     error (errortext, 300);
+   ParseContent (content, strlen(content));
+   printf ("\n");
+   free (content);
+ 
+   // Parse the command line
+ 
+   while (CLcount < ac)
+   {
+     if (0 == strncmp (av[CLcount], "-h", 2))
+     {
+       JMHelpExit();
+     }
+     
+     if (0 == strncmp (av[CLcount], "-f", 2))  // A file parameter?
+     {
+       content = GetConfigFileContent (av[CLcount+1]);
+       if (NULL==content)
+         error (errortext, 300);
+       printf ("Parsing Configfile %s", av[CLcount+1]);
+       ParseContent (content, strlen (content));
+       printf ("\n");
+       free (content);
+       CLcount += 2;
+     } else
+     {
+       if (0 == strncmp (av[CLcount], "-p", 2))  // A config change?
+       {
+         // Collect all data until next parameter (starting with -<x> (x is any character)),
+         // put it into content, and parse content.
+ 
+         CLcount++;
+         ContentLen = 0;
+         NumberParams = CLcount;
+ 
+         // determine the necessary size for content
+         while (NumberParams < ac && av[NumberParams][0] != '-')
+           ContentLen += strlen (av[NumberParams++]);        // Space for all the strings
+         ContentLen += 1000;                     // Additional 1000 bytes for spaces and \0s
+ 
+ 
+         if ((content = malloc (ContentLen))==NULL) no_mem_exit("Configure: content");;
+         content[0] = '\0';
+ 
+         // concatenate all parameters identified before
+ 
+         while (CLcount < NumberParams)
+         {
+           char *source = &av[CLcount][0];
+           char *destin = &content[strlen (content)];
+ 
+           while (*source != '\0')
+           {
+             if (*source == '=')  // The Parser expects whitespace before and after '='
+             {
+               *destin++=' '; *destin++='='; *destin++=' ';  // Hence make sure we add it
+             } else
+               *destin++=*source;
+             source++;
+           }
+           *destin = '\0';
+           CLcount++;
+         }
+         printf ("Parsing command line string '%s'", content);
+         ParseContent (content, strlen(content));
+         free (content);
+         printf ("\n");
+       }
+       else
+       {
+         snprintf (errortext, ET_SIZE, "Error in command line, ac %d, around string '%s', missing -f or -p parameters?", CLcount, av[CLcount]);
+         error (errortext, 300);
+       }
+     }
+   }
+   printf ("\n");
+   PatchInp();
+   if (input->DisplayEncParams)
+     DisplayEncoderParams();
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    allocates memory buf, opens file Filename in f, reads contents into
+  *    buf and returns buf
+  * \param Filename
+  *    name of config file
+  * \return
+  *    if successfull, content of config file
+  *    NULL in case of error. Error message will be set in errortext
+  ***********************************************************************
+  */
+ char *GetConfigFileContent (char *Filename)
+ {
+   long FileSize;
+   FILE *f;
+   char *buf;
+ 
+   if (NULL == (f = fopen (Filename, "r")))
+   {
+       snprintf (errortext, ET_SIZE, "Cannot open configuration file %s.", Filename);
+       return NULL;
+   }
+ 
+   if (0 != fseek (f, 0, SEEK_END))
+   {
+     snprintf (errortext, ET_SIZE, "Cannot fseek in configuration file %s.", Filename);
+     return NULL;
+   }
+ 
+   FileSize = ftell (f);
+   if (FileSize < 0 || FileSize > 60000)
+   {
+     snprintf (errortext, ET_SIZE, "Unreasonable Filesize %ld reported by ftell for configuration file %s.", FileSize, Filename);
+     return NULL;
+   }
+   if (0 != fseek (f, 0, SEEK_SET))
+   {
+     snprintf (errortext, ET_SIZE, "Cannot fseek in configuration file %s.", Filename);
+     return NULL;
+   }
+ 
+   if ((buf = malloc (FileSize + 1))==NULL) no_mem_exit("GetConfigFileContent: buf");
+ 
+   // Note that ftell() gives us the file size as the file system sees it.  The actual file size,
+   // as reported by fread() below will be often smaller due to CR/LF to CR conversion and/or
+   // control characters after the dos EOF marker in the file.
+ 
+   FileSize = fread (buf, 1, FileSize, f);
+   buf[FileSize] = '\0';
+ 
+ 
+   fclose (f);
+   return buf;
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Parses the character array buf and writes global variable input, which is defined in
+  *    configfile.h.  This hack will continue to be necessary to facilitate the addition of
+  *    new parameters through the Map[] mechanism (Need compiler-generated addresses in map[]).
+  * \param buf
+  *    buffer to be parsed
+  * \param bufsize
+  *    buffer size of buffer
+  ***********************************************************************
+  */
+ void ParseContent (char *buf, int bufsize)
+ {
+ 
+   char *items[MAX_ITEMS_TO_PARSE];
+   int MapIdx;
+   int item = 0;
+   int InString = 0, InItem = 0;
+   char *p = buf;
+   char *bufend = &buf[bufsize];
+   int IntContent;
+   double DoubleContent;
+   int i;
+ 
+ // Stage one: Generate an argc/argv-type list in items[], without comments and whitespace.
+ // This is context insensitive and could be done most easily with lex(1).
+ 
+   while (p < bufend)
+   {
+     switch (*p)
+     {
+       case 13:
+         p++;
+         break;
+       case '#':                 // Found comment
+         *p = '\0';              // Replace '#' with '\0' in case of comment immediately following integer or string
+         while (*p != '\n' && p < bufend)  // Skip till EOL or EOF, whichever comes first
+           p++;
+         InString = 0;
+         InItem = 0;
+         break;
+       case '\n':
+         InItem = 0;
+         InString = 0;
+         *p++='\0';
+         break;
+       case ' ':
+       case '\t':              // Skip whitespace, leave state unchanged
+         if (InString)
+           p++;
+         else
+         {                     // Terminate non-strings once whitespace is found
+           *p++ = '\0';
+           InItem = 0;
+         }
+         break;
+ 
+       case '"':               // Begin/End of String
+         *p++ = '\0';
+         if (!InString)
+         {
+           items[item++] = p;
+           InItem = ~InItem;
+         }
+         else
+           InItem = 0;
+         InString = ~InString; // Toggle
+         break;
+ 
+       default:
+         if (!InItem)
+         {
+           items[item++] = p;
+           InItem = ~InItem;
+         }
+         p++;
+     }
+   }
+ 
+   item--;
+ 
+   for (i=0; i<item; i+= 3)
+   {
+     if (0 > (MapIdx = ParameterNameToMapIndex (items[i])))
+     {
+       snprintf (errortext, ET_SIZE, " Parsing error in config file: Parameter Name '%s' not recognized.", items[i]);
+       error (errortext, 300);
+     }
+     if (strcasecmp ("=", items[i+1]))
+     {
+       snprintf (errortext, ET_SIZE, " Parsing error in config file: '=' expected as the second token in each line.");
+       error (errortext, 300);
+     }
+ 
+     // Now interpret the Value, context sensitive...
+ 
+     switch (Map[MapIdx].Type)
+     {
+       case 0:           // Numerical
+         if (1 != sscanf (items[i+2], "%d", &IntContent))
+         {
+           snprintf (errortext, ET_SIZE, " Parsing error: Expected numerical value for Parameter of %s, found '%s'.", items[i], items[i+2]);
+           error (errortext, 300);
+         }
+         * (int *) (Map[MapIdx].Place) = IntContent;
+         printf (".");
+         break;
+       case 1:
+         strncpy ((char *) Map[MapIdx].Place, items [i+2], FILE_NAME_SIZE);
+         printf (".");
+         break;
+       case 2:           // Numerical double
+         if (1 != sscanf (items[i+2], "%lf", &DoubleContent))
+         {
+           snprintf (errortext, ET_SIZE, " Parsing error: Expected numerical value for Parameter of %s, found '%s'.", items[i], items[i+2]);
+           error (errortext, 300);
+         }
+         * (double *) (Map[MapIdx].Place) = DoubleContent;
+         printf (".");
+         break;
+       default:
+         assert ("Unknown value type in the map definition of configfile.h");
+     }
+   }
+   memcpy (input, &configinput, sizeof (InputParameters));
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Returns the index number from Map[] for a given parameter name.
+  * \param s
+  *    parameter name string
+  * \return
+  *    the index number if the string is a valid parameter name,         \n
+  *    -1 for error
+  ***********************************************************************
+  */
+ static int ParameterNameToMapIndex (char *s)
+ {
+   int i = 0;
+ 
+   while (Map[i].TokenName != NULL)
+     if (0==strcasecmp (Map[i].TokenName, s))
+       return i;
+     else
+       i++;
+   return -1;
+ };
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Sets initial values for encoding parameters.
+  * \return
+  *    -1 for error
+  ***********************************************************************
+  */
+ static int InitEncoderParams()
+ {
+   int i = 0;
+ 
+   while (Map[i].TokenName != NULL)
+   {
+     if (Map[i].Type == 0)
+         * (int *) (Map[i].Place) = (int) Map[i].Default;
+     else if (Map[i].Type == 2)
+     * (double *) (Map[i].Place) = Map[i].Default;
+       i++;
+   }
+   return -1;
+ };
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Validates encoding parameters.
+  * \return
+  *    -1 for error
+  ***********************************************************************
+  */
+ static int TestEncoderParams(int bitdepth_qp_scale)
+ {
+   int i = 0;
+ 
+   while (Map[i].TokenName != NULL)
+   {
+     if (Map[i].param_limits == 1)
+     {
+       if (Map[i].Type == 0)
+       {
+         if ( * (int *) (Map[i].Place) < (int) Map[i].min_limit || * (int *) (Map[i].Place) > (int) Map[i].max_limit )
+         {
+           snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should be in [%d, %d] range.", Map[i].TokenName, (int) Map[i].min_limit,(int)Map[i].max_limit );
+           error (errortext, 400);
+         }
+         
+       }
+       else if (Map[i].Type == 2)
+       {
+         if ( * (double *) (Map[i].Place) < Map[i].min_limit || * (double *) (Map[i].Place) > Map[i].max_limit )
+         {
+           snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should be in [%.2f, %.2f] range.", Map[i].TokenName,Map[i].min_limit ,Map[i].max_limit );
+           error (errortext, 400);
+         }        
+       }            
+     }
+     else if (Map[i].param_limits == 2)
+     {
+       if (Map[i].Type == 0)
+       {
+         if ( * (int *) (Map[i].Place) < (int) Map[i].min_limit )
+         {
+           snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should not be smaller than %d.", Map[i].TokenName, (int) Map[i].min_limit);
+           error (errortext, 400);
+         }
+         
+       }
+       else if (Map[i].Type == 2)
+       {
+         if ( * (double *) (Map[i].Place) < Map[i].min_limit )
+         {
+           snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should not be smaller than %2.f.", Map[i].TokenName,Map[i].min_limit);
+           error (errortext, 400);
+         }        
+       }
+     }
+     else if (Map[i].param_limits == 3) // Only used for QPs
+     {
+       if (Map[i].Type == 0)
+       {
+         if ( * (int *) (Map[i].Place) < (int) (Map[i].min_limit - bitdepth_qp_scale) || * (int *) (Map[i].Place) > (int) Map[i].max_limit )
+         {
+           snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should be in [%d, %d] range.", Map[i].TokenName, (int) (Map[i].min_limit - bitdepth_qp_scale),(int)Map[i].max_limit );
+           error (errortext, 400);
+         }
+         
+       }
+     }
+  
+     i++;
+   }
+   return -1;
+ };
+ 
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Outputs encoding parameters.
+  * \return
+  *    -1 for error
+  ***********************************************************************
+  */
+ static int DisplayEncoderParams()
+ {
+   int i = 0;
+ 
+   printf("******************************************************\n");
+   printf("*               Encoder Parameters                   *\n");
+   printf("******************************************************\n");
+   while (Map[i].TokenName != NULL)
+   {
+     if (Map[i].Type == 0)
+       printf("Parameter %s = %d\n",Map[i].TokenName,* (int *) (Map[i].Place));
+     else if (Map[i].Type == 1)
+       printf("Parameter %s = ""%s""\n",Map[i].TokenName,(char *)  (Map[i].Place));
+     else if (Map[i].Type == 2)
+       printf("Parameter %s = %.2f\n",Map[i].TokenName,* (double *) (Map[i].Place));
+       i++;
+   }
+   printf("******************************************************\n");
+   return -1;
+ };
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    calculate Ceil(Log2(uiVal))
+  ************************************************************************
+  */
+ unsigned CeilLog2( unsigned uiVal)
+ {
+   unsigned uiTmp = uiVal-1;
+   unsigned uiRet = 0;
+ 
+   while( uiTmp != 0 )
+   {
+     uiTmp >>= 1;
+     uiRet++;
+   }
+   return uiRet;
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Checks the input parameters for consistency.
+  ***********************************************************************
+  */
+ static void PatchInp ()
+ {
+   int bitdepth_qp_scale = 6*(input->BitDepthLuma - 8);
+   
+   // These variables are added for FMO
+   FILE * sgfile=NULL;
+   int i,j;
+   int frame_mb_only;
+   int mb_width, mb_height, mapunit_height;
+   int storedBplus1;
+ 
+   TestEncoderParams(bitdepth_qp_scale);
+ 
+   if (input->FrameRate == 0.0)
+     input->FrameRate = INIT_FRAME_RATE;
+ 
+   // Set block sizes
+ 
+     // Skip/Direct16x16
+     input->part_size[0][0] = 4;
+     input->part_size[0][1] = 4;
+   // 16x16
+     input->part_size[1][0] = 4;
+     input->part_size[1][1] = 4;
+   // 16x8
+     input->part_size[2][0] = 4;
+     input->part_size[2][1] = 2;
+   // 8x16
+     input->part_size[3][0] = 2;
+     input->part_size[3][1] = 4;
+   // 8x8
+     input->part_size[4][0] = 2;
+     input->part_size[4][1] = 2;
+   // 8x4
+     input->part_size[5][0] = 2;
+     input->part_size[5][1] = 1;
+   // 4x8
+     input->part_size[6][0] = 1;
+     input->part_size[6][1] = 2;
+   // 4x4
+     input->part_size[7][0] = 1;
+     input->part_size[7][1] = 1;
+ 
+   for (j = 0; j<8;j++)
+   {
+     for (i = 0; i<2; i++) 
+     {
+       input->blc_size[j][i] = input->part_size[j][i] * BLOCK_SIZE;
+     }
+   }
+ 
+   // set proper log2_max_frame_num_minus4.
+   storedBplus1 = (input->BRefPictures ) ? input->successive_Bframe + 1: 1;
+ 
+   if (input->Log2MaxFNumMinus4 == -1)
+     log2_max_frame_num_minus4 = Clip3(0,12, (int) (CeilLog2(input->no_frames * storedBplus1) - 4));
+   else 
+     log2_max_frame_num_minus4 = input->Log2MaxFNumMinus4;
+   
+   if (log2_max_frame_num_minus4 == 0 && input->num_ref_frames == 16)
+   {
+     snprintf(errortext, ET_SIZE, " NumberReferenceFrames=%d and Log2MaxFNumMinus4=%d may lead to an invalid value of frame_num.", input->num_ref_frames, input-> Log2MaxFNumMinus4);
+     error (errortext, 500);
+   } 
+ 
+   // set proper log2_max_pic_order_cnt_lsb_minus4.
+   if (input->Log2MaxPOCLsbMinus4 == - 1)
+     log2_max_pic_order_cnt_lsb_minus4 = Clip3(0,12, (int) (CeilLog2( 2*input->no_frames * (input->jumpd + 1)) - 4));
+   else 
+     log2_max_pic_order_cnt_lsb_minus4 = input->Log2MaxPOCLsbMinus4;
+ 
+   if (((1<<(log2_max_pic_order_cnt_lsb_minus4 + 3)) < input->jumpd * 4) && input->Log2MaxPOCLsbMinus4 != -1)
+     error("log2_max_pic_order_cnt_lsb_minus4 might not be sufficient for encoding. Increase value.",400);
+ 
+   // B picture consistency check
+   if(input->successive_Bframe > input->jumpd)
+   {
+     snprintf(errortext, ET_SIZE, "Number of B-frames %d can not exceed the number of frames skipped", input->successive_Bframe);
+     error (errortext, 400);
+   }
+ 
+   // Direct Mode consistency check
+   if(input->successive_Bframe && input->direct_spatial_mv_pred_flag != DIR_SPATIAL && input->direct_spatial_mv_pred_flag != DIR_TEMPORAL)
+   {
+     snprintf(errortext, ET_SIZE, "Unsupported direct mode=%d, use TEMPORAL=0 or SPATIAL=1", input->direct_spatial_mv_pred_flag);
+     error (errortext, 400);
+   }
+ 
+   if (input->PicInterlace>0 || input->MbInterlace>0)
+   {
+     if (input->directInferenceFlag==0)
+       printf("\nDirectInferenceFlag set to 1 due to interlace coding.");
+     input->directInferenceFlag=1;
+   }
+ 
+   if (input->PicInterlace>0)
+   {
+     if (input->IntraBottom!=0 && input->IntraBottom!=1)
+     {
+       snprintf(errortext, ET_SIZE, "Incorrect value %d for IntraBottom. Use 0 (disable) or 1 (enable).", input->IntraBottom);
+       error (errortext, 400);
+     }
+   } 
+   // Cabac/UVLC consistency check
+   if (input->symbol_mode != UVLC && input->symbol_mode != CABAC)
+   {
+     snprintf (errortext, ET_SIZE, "Unsupported symbol mode=%d, use UVLC=0 or CABAC=1",input->symbol_mode);
+     error (errortext, 400);
+   }
+ 
+   // Open Files
+   if ((p_in=open(input->infile, OPENFLAGS_READ))==-1)
+   {
+     snprintf(errortext, ET_SIZE, "Input file %s does not exist",input->infile);
+     error (errortext, 500);
+   }
+ 
+   if (strlen (input->ReconFile) > 0 && (p_dec=open(input->ReconFile, OPENFLAGS_WRITE, OPEN_PERMISSIONS))==-1)
+   {
+     snprintf(errortext, ET_SIZE, "Error open file %s", input->ReconFile);
+     error (errortext, 500);
+   }
+ 
+ #if TRACE
+   if (strlen (input->TraceFile) > 0 && (p_trace=fopen(input->TraceFile,"w"))==NULL)
+   {
+     snprintf(errortext, ET_SIZE, "Error open file %s", input->TraceFile);
+     error (errortext, 500);
+   }
+ #endif
+ 
+   if (input->img_width % 16 != 0)
+   {
+     img->auto_crop_right = 16-(input->img_width % 16);
+   }
+   else
+   {
+     img->auto_crop_right=0;
+   }
+   if (input->PicInterlace || input->MbInterlace)
+   {
+     if (input->img_height % 2 != 0)
+     {
+       error ("even number of lines required for interlaced coding", 500);
+     }
+     if (input->img_height % 32 != 0)
+     {
+       img->auto_crop_bottom = 32-(input->img_height % 32);
+     }
+     else
+     {
+       img->auto_crop_bottom=0;
+     }
+   }
+   else
+   {
+     if (input->img_height % 16 != 0)
+     {
+       img->auto_crop_bottom = 16-(input->img_height % 16);
+     }
+     else
+     {
+       img->auto_crop_bottom=0;
+     }
+   }
+   if (img->auto_crop_bottom || img->auto_crop_right)
+   {
+     printf ("Warning: Automatical cropping activated: Coded frame Size: %dx%d\n", input->img_width+img->auto_crop_right, input->img_height+img->auto_crop_bottom);
+   }
+ 
+   /*
+   // add check for MAXSLICEGROUPIDS
+   if(input->num_slice_groups_minus1>=MAXSLICEGROUPIDS)
+   {
+     snprintf(errortext, ET_SIZE, "num_slice_groups_minus1 exceeds MAXSLICEGROUPIDS");
+     error (errortext, 500);
+   }
+   */
+ 
+   // Following codes are to read slice group configuration from SliceGroupConfigFileName for slice group type 0,2 or 6
+   if( (input->num_slice_groups_minus1!=0)&&
+     ((input->slice_group_map_type == 0) || (input->slice_group_map_type == 2) || (input->slice_group_map_type == 6)) )
+   { 
+     if (strlen (input->SliceGroupConfigFileName) > 0 && (sgfile=fopen(input->SliceGroupConfigFileName,"r"))==NULL)
+     {
+       snprintf(errortext, ET_SIZE, "Error open file %s", input->SliceGroupConfigFileName);
+       error (errortext, 500);
+     }
+     else
+     {
+       if (input->slice_group_map_type == 0) 
+       {
+         input->run_length_minus1=(int *)malloc(sizeof(int)*(input->num_slice_groups_minus1+1));
+         if (NULL==input->run_length_minus1) 
+           no_mem_exit("PatchInp: input->run_length_minus1");
+         
+         // each line contains one 'run_length_minus1' value
+         for(i=0;i<=input->num_slice_groups_minus1;i++)
+         {
+           fscanf(sgfile,"%d",(input->run_length_minus1+i));
+           fscanf(sgfile,"%*[^\n]");
+           
+         }
+       }
+       else if (input->slice_group_map_type == 2)
+       {
+         input->top_left=(int *)malloc(sizeof(int)*input->num_slice_groups_minus1);
+         input->bottom_right=(int *)malloc(sizeof(int)*input->num_slice_groups_minus1);
+         if (NULL==input->top_left) 
+           no_mem_exit("PatchInp: input->top_left");
+         if (NULL==input->bottom_right) 
+           no_mem_exit("PatchInp: input->bottom_right");
+         
+         // every two lines contain 'top_left' and 'bottom_right' value
+         for(i=0;i<input->num_slice_groups_minus1;i++)
+         {
+           fscanf(sgfile,"%d",(input->top_left+i));
+           fscanf(sgfile,"%*[^\n]");
+           fscanf(sgfile,"%d",(input->bottom_right+i));
+           fscanf(sgfile,"%*[^\n]");
+         }
+         
+       }
+       else if (input->slice_group_map_type == 6)
+       {
+         int tmp;
+ 
+         frame_mb_only = !(input->PicInterlace || input->MbInterlace);
+         mb_width= (input->img_width+img->auto_crop_right)/16;
+         mb_height= (input->img_height+img->auto_crop_bottom)/16;
+         mapunit_height=mb_height/(2-frame_mb_only);
+         
+         input->slice_group_id=(byte * ) malloc(sizeof(byte)*mapunit_height*mb_width);
+         if (NULL==input->slice_group_id) 
+           no_mem_exit("PatchInp: input->slice_group_id");
+         
+         // each line contains slice_group_id for one Macroblock
+         for (i=0;i<mapunit_height*mb_width;i++)
+         {
+           fscanf(sgfile,"%d", &tmp);
+           input->slice_group_id[i]= (byte) tmp;
+           if ( *(input->slice_group_id+i) > input->num_slice_groups_minus1 )
+           {
+             snprintf(errortext, ET_SIZE, "Error read slice group information from file %s", input->SliceGroupConfigFileName);
+             error (errortext, 500);
+           }
+           fscanf(sgfile,"%*[^\n]");
+         }
+       }
+       fclose(sgfile);
+     }
+   }
+   
+   
+   if (input->PyramidRefReorder && input->PyramidCoding && (input->PicInterlace || input->MbInterlace))
+   {
+     snprintf(errortext, ET_SIZE, "PyramidRefReorder Not supported with Interlace encoding methods\n");
+     error (errortext, 400);
+   }
+ 
+   if (input->PocMemoryManagement && input->PyramidCoding && (input->PicInterlace || input->MbInterlace))
+   {
+     snprintf(errortext, ET_SIZE, "PocMemoryManagement not supported with Interlace encoding methods\n");
+     error (errortext, 400);
+   }
+ 
+ 
+   // frame/field consistency check
+   if (input->PicInterlace != FRAME_CODING && input->PicInterlace != ADAPTIVE_CODING && input->PicInterlace != FIELD_CODING)
+   {
+     snprintf (errortext, ET_SIZE, "Unsupported PicInterlace=%d, use frame based coding=0 or field based coding=1 or adaptive=2",input->PicInterlace);
+     error (errortext, 400);
+   }
+ 
+   // frame/field consistency check
+   if (input->MbInterlace != FRAME_CODING && input->MbInterlace != ADAPTIVE_CODING && input->MbInterlace != FIELD_CODING)
+   {
+     snprintf (errortext, ET_SIZE, "Unsupported MbInterlace=%d, use frame based coding=0 or field based coding=1 or adaptive=2",input->MbInterlace);
+     error (errortext, 400);
+   }
+    
+  
+   if ((!input->rdopt)&&(input->MbInterlace))
+   {
+     snprintf(errortext, ET_SIZE, "MB AFF is not compatible with non-rd-optimized coding.");
+     error (errortext, 500);
+   }
+ 
+   if (input->rdopt>2)
+   {
+     snprintf(errortext, ET_SIZE, "RDOptimization=3 mode has been deactivated do to diverging of real and simulated decoders.");
+     error (errortext, 500);
+   }
+ 
+   // check RDoptimization mode and profile. FMD does not support Frex Profiles.
+   if (input->rdopt==2 && input->ProfileIDC>=FREXT_HP)
+   {
+     snprintf(errortext, ET_SIZE, "Fast Mode Decision methods does not support FREX Profiles");
+     error (errortext, 500);
+   }
+ 
+   // the two HEX FME schemes support FAST Subpel ME. EPZS does not but works fine with
+   // Hadamard reduction with similar speed up. Subpel FME may be added at a later stage
+   // for this scheme for further speed increase.
+   if (input->hadamard == 2 && input->FMEnable != 0 && input->FMEnable != 3)
+   {
+     snprintf(errortext, ET_SIZE, "UseHadamard=2 is not allowed when UseFME is set to 1 or 2.");
+     error (errortext, 500);
+   }
+ 
+   // Tian Dong: May 31, 2002
+   // The number of frames in one sub-seq in enhanced layer should not exceed
+   // the number of reference frame number.
+   if ( input->NumFramesInELSubSeq >= input->num_ref_frames || input->NumFramesInELSubSeq < 0 )
+   {
+     snprintf(errortext, ET_SIZE, "NumFramesInELSubSeq (%d) is out of range [0,%d).", input->NumFramesInELSubSeq, input->num_ref_frames);
+     error (errortext, 500);
+   }
+   // Tian Dong: Enhanced GOP is not supported in bitstream mode. September, 2002
+   if ( input->NumFramesInELSubSeq > 0 && input->of_mode == PAR_OF_ANNEXB )
+   {
+     snprintf(errortext, ET_SIZE, "Enhanced GOP is not supported in bitstream mode and RTP mode yet.");
+     error (errortext, 500);
+   }
+   // Tian Dong (Sept 2002)
+   // The AFF is not compatible with spare picture for the time being.
+   if ((input->PicInterlace || input->MbInterlace) && input->SparePictureOption == TRUE)
+   {
+     snprintf(errortext, ET_SIZE, "AFF is not compatible with spare picture.");
+     error (errortext, 500);
+   }
+ 
+   // Only the RTP mode is compatible with spare picture for the time being.
+   if (input->of_mode != PAR_OF_RTP && input->SparePictureOption == TRUE)
+   {
+     snprintf(errortext, ET_SIZE, "Only RTP output mode is compatible with spare picture features.");
+     error (errortext, 500);
+   }
+ 
+   if( (input->WeightedPrediction > 0 || input->WeightedBiprediction > 0) && (input->MbInterlace))
+   {
+     printf("Weighted prediction coding is not supported for MB AFF currently.");
+     error (errortext, 500);
+   }
+   if ( input->NumFramesInELSubSeq > 0 && input->WeightedPrediction > 0)
+   {
+     snprintf(errortext, ET_SIZE, "Enhanced GOP is not supported in weighted prediction coding mode yet.");
+     error (errortext, 500);
+   }
+ 
+   //! the number of slice groups is forced to be 1 for slice group type 3-5
+   if(input->num_slice_groups_minus1 > 0)
+   {
+     if( (input->slice_group_map_type >= 3) && (input->slice_group_map_type<=5) ) 
+       input->num_slice_groups_minus1 = 1;
+   }
+   
+   // Rate control
+   if(input->RCEnable)
+   {
+     if ( ((input->img_height+img->auto_crop_bottom)*(input->img_width+img->auto_crop_right)/256)%input->basicunit!=0)
+     {
+       snprintf(errortext, ET_SIZE, "Frame size in macroblocks must be a multiple of BasicUnit.");
+       error (errortext, 500);
+     }
+   }
+ 
+   if ((input->successive_Bframe)&&(input->BRefPictures)&&(input->idr_enable)&&(input->intra_period)&&(input->pic_order_cnt_type!=0))
+   {
+     error("Stored B pictures combined with IDR pictures only supported in Picture Order Count type 0\n",-1000);
+   }
+   
+   if( !input->direct_spatial_mv_pred_flag && input->num_ref_frames<2 && input->successive_Bframe >0)
+     error("temporal direct needs at least 2 ref frames\n",-1000);
+ 
+   // frext
+   if(input->Transform8x8Mode && input->sp_periodicity /*SP-frames*/)
+   {
+     snprintf(errortext, ET_SIZE, "\nThe new 8x8 mode is not implemented for sp-frames.");
+     error (errortext, 500);
+   }
+ 
+   if(input->Transform8x8Mode && (input->ProfileIDC<FREXT_HP || input->ProfileIDC>FREXT_Hi444))
+   {
+     snprintf(errortext, ET_SIZE, "\nTransform8x8Mode may be used only with ProfileIDC %d to %d.", FREXT_HP, FREXT_Hi444);
+     error (errortext, 500);
+   }
+   if(input->ScalingMatrixPresentFlag && (input->ProfileIDC<FREXT_HP || input->ProfileIDC>FREXT_Hi444))
+   {
+     snprintf(errortext, ET_SIZE, "\nScalingMatrixPresentFlag may be used only with ProfileIDC %d to %d.", FREXT_HP, FREXT_Hi444);
+     error (errortext, 500);
+   }
+ 
+   if(input->yuv_format==YUV422 && input->ProfileIDC < FREXT_Hi422)
+   {
+     snprintf(errortext, ET_SIZE, "\nFRExt Profile(YUV Format) Error!\nYUV422 can be used only with ProfileIDC %d or %d\n",FREXT_Hi422, FREXT_Hi444);
+     error (errortext, 500);
+   }
+   if(input->yuv_format==YUV444 && input->ProfileIDC < FREXT_Hi444)
+   {
+     snprintf(errortext, ET_SIZE, "\nFRExt Profile(YUV Format) Error!\nYUV444 can be used only with ProfileIDC %d.\n",FREXT_Hi444);
+     error (errortext, 500);
+   }
+   
+   // Residue Color Transform
+   if(input->yuv_format!=YUV444 && input->residue_transform_flag)
+   {
+     snprintf(errortext, ET_SIZE, "\nResidue color transform is supported only in YUV444.");
+     error (errortext, 500);
+   }
+ 
+   if ((input->BiPredMotionEstimation) && (input->search_range < input->BiPredMESearchRange))
+   {
+     snprintf(errortext, ET_SIZE, "\nBiPredMESearchRange must be smaller or equal SearchRange.");
+     error (errortext, 500);
+   }
+ 
+ 
+   if (input->EnableOpenGOP) input->PyramidRefReorder = 1;
+   if (input->EnableOpenGOP && input->PicInterlace) 
+   {
+     snprintf(errortext, ET_SIZE, "Open Gop currently not supported for Field coded pictures.");
+     error (errortext, 500);
+   }
+   
+   ProfileCheck();
+   LevelCheck();
+ }
+ 
+ void PatchInputNoFrames()
+ {
+   // Tian Dong: May 31, 2002
+   // If the frames are grouped into two layers, "FramesToBeEncoded" in the config file
+   // will give the number of frames which are in the base layer. Here we let input->no_frames
+   // be the total frame numbers.
+   input->no_frames = 1+ (input->no_frames-1) * (input->NumFramesInELSubSeq+1);
+   if ( input->NumFrameIn2ndIGOP )
+     input->NumFrameIn2ndIGOP = 1+(input->NumFrameIn2ndIGOP-1) * (input->NumFramesInELSubSeq+1);
+   FirstFrameIn2ndIGOP = input->no_frames;
+ }
+ 
+ static void ProfileCheck()
+ {
+   if((input->ProfileIDC != 66 ) &&
+      (input->ProfileIDC != 77 ) && 
+      (input->ProfileIDC != 88 ) && 
+      (input->ProfileIDC != FREXT_HP    ) && 
+      (input->ProfileIDC != FREXT_Hi10P ) && 
+      (input->ProfileIDC != FREXT_Hi422 ) && 
+      (input->ProfileIDC != FREXT_Hi444 ))
+   {
+     snprintf(errortext, ET_SIZE, "Profile must be baseline(66)/main(77)/extended(88) or FRExt (%d to %d).", FREXT_HP,FREXT_Hi444);
+     error (errortext, 500);
+   }
+   // baseline
+   if (input->ProfileIDC == 66 )
+   {
+     if (input->successive_Bframe || input->BRefPictures==2)
+     {
+       snprintf(errortext, ET_SIZE, "B pictures are not allowed in baseline.");
+       error (errortext, 500);
+     }
+     if (input->sp_periodicity)
+     {
+       snprintf(errortext, ET_SIZE, "SP pictures are not allowed in baseline.");
+       error (errortext, 500);
+     }
+     if (input->partition_mode)
+     {
+       snprintf(errortext, ET_SIZE, "Data partitioning is not allowed in baseline.");
+       error (errortext, 500);
+     }
+     if (input->WeightedPrediction)
+     {
+       snprintf(errortext, ET_SIZE, "Weighted prediction is not allowed in baseline.");
+       error (errortext, 500);
+     }
+     if (input->WeightedBiprediction)
+     {
+       snprintf(errortext, ET_SIZE, "Weighted prediction is not allowed in baseline.");
+       error (errortext, 500);
+     }
+     if (input->symbol_mode == CABAC)
+     {
+       snprintf(errortext, ET_SIZE, "CABAC is not allowed in baseline.");
+       error (errortext, 500);
+     }
+   }
+ 
+   // main
+   if (input->ProfileIDC == 77 )
+   {
+     if (input->sp_periodicity)
+     {
+       snprintf(errortext, ET_SIZE, "SP pictures are not allowed in main.");
+       error (errortext, 500);
+     }
+     if (input->partition_mode)
+     {
+       snprintf(errortext, ET_SIZE, "Data partitioning is not allowed in main.");
+       error (errortext, 500);
+     }
+     if (input->num_slice_groups_minus1)
+     {
+       snprintf(errortext, ET_SIZE, "num_slice_groups_minus1>0 (FMO) is not allowed in main.");
+       error (errortext, 500);
+     }
+     if (input->redundant_slice_flag)
+     {
+       snprintf(errortext, ET_SIZE, "Redundant pictures are not allowed in main.");
+       error (errortext, 500);
+     }
+   }
+ 
+   // extended
+   if (input->ProfileIDC == 88 )
+   {
+     if (!input->directInferenceFlag)
+     {
+       snprintf(errortext, ET_SIZE, "direct_8x8_inference flag must be equal to 1 in extended.");
+       error (errortext, 500);
+     }
+ 
+     if (input->symbol_mode == CABAC)
+     {
+       snprintf(errortext, ET_SIZE, "CABAC is not allowed in extended.");
+       error (errortext, 500);
+     }
+   }
+   
+ }
+ 
+ static void LevelCheck()
+ {  
+   if ( (input->LevelIDC>=30) && (input->directInferenceFlag==0))
+   {
+     printf("\nLevelIDC 3.0 and above require direct_8x8_inference to be set to 1. Please check your settings.\n");
+     input->directInferenceFlag=1;
+   }
+   if ( ((input->LevelIDC<21) || (input->LevelIDC>41)) && (input->PicInterlace > 0 || input->MbInterlace > 0) )
+   {
+     snprintf(errortext, ET_SIZE, "nInterlace modes only supported for LevelIDC in the range of 2.1 and 4.1. Please check your settings.\n");
+     error (errortext, 500);
+   }
+ 
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/configfile.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/configfile.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/configfile.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,271 ----
+ 
+ /*!
+  ***********************************************************************
+  *  \file
+  *     configfile.h
+  *  \brief
+  *     Prototypes for configfile.c and definitions of used structures.
+  ***********************************************************************
+  */
+ 
+ #include "fmo.h"
+ 
+ #ifndef _CONFIGFILE_H_
+ #define _CONFIGFILE_H_
+ 
+ #define DEFAULTCONFIGFILENAME "encoder.cfg"
+ 
+ #define PROFILE_IDC     88
+ #define LEVEL_IDC       21
+ 
+ 
+ typedef struct {
+   char *TokenName;
+   void *Place;
+   int Type;
+   double Default;
+   int param_limits; //! 0: no limits, 1: both min and max, 2: only min (i.e. no negatives), 3: specialcase for QPs since min needs bitdepth_qp_scale
+   double min_limit;
+   double max_limit;
+ } Mapping;
+ 
+ 
+ 
+ InputParameters configinput;
+ 
+ 
+ #ifdef INCLUDED_BY_CONFIGFILE_C
+ 
+ Mapping Map[] = {
+     {"ProfileIDC",               &configinput.ProfileIDC,                   0,   (double) PROFILE_IDC,      0,  0.0,              0.0              },
+     {"LevelIDC",                 &configinput.LevelIDC,                     0,   (double) LEVEL_IDC,        0,  0.0,              0.0              },
+     {"FrameRate",                &configinput.FrameRate,                    2,   (double) INIT_FRAME_RATE,  1,  0.0,            100.0              },
+     {"IDRIntraEnable",           &configinput.idr_enable,                   0,   0.0,                       1,  0.0,              1.0              },
+     {"StartFrame",               &configinput.start_frame,                  0,   0.0,                       2,  0.0,              0.0              },
+     {"IntraPeriod",              &configinput.intra_period,                 0,   0.0,                       2,  0.0,              0.0              },
+     {"EnableOpenGOP",            &configinput.EnableOpenGOP,                0,   0.0,                       3,  0.0,              1.0              },
+     {"FramesToBeEncoded",        &configinput.no_frames,                    0,   1.0,                       2,  1.0,              0.0              },
+     {"QPISlice",                 &configinput.qp0,                          0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+     {"QPPSlice",                 &configinput.qpN,                          0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+     {"QPBSlice",                 &configinput.qpB,                          0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+     {"FrameSkip",                &configinput.jumpd,                        0,   0.0,                       2,  0.0,              0.0              },
+     {"UseHadamard",              &configinput.hadamard,                     0,   0.0,                       1,  0.0,              2.0              },
+     {"DisableSubpelME",          &configinput.DisableSubpelME,              0,   0.0,                       1,  0.0,              1.0              },
+     {"SearchRange",              &configinput.search_range,                 0,   16.0,                      2,  0.0,              0.0              },
+     {"NumberReferenceFrames",    &configinput.num_ref_frames,               0,   1.0,                       1,  1.0,             16.0              },
+     {"PList0References",         &configinput.P_List0_refs,                 0,   0.0,                       1,  0.0,             16.0              },
+     {"BList0References",         &configinput.B_List0_refs,                 0,   0.0,                       1,  0.0,             16.0              },
+     {"BList1References",         &configinput.B_List1_refs,                 0,   1.0,                       1,  0.0,             16.0              },
+     {"Log2MaxFNumMinus4",        &configinput.Log2MaxFNumMinus4,            0,   0.0,                       1, -1.0,             12.0              },
+     {"Log2MaxPOCLsbMinus4",      &configinput.Log2MaxPOCLsbMinus4,          0,   2.0,                       1, -1.0,             12.0              },
+     {"GenerateMultiplePPS",      &configinput.GenerateMultiplePPS,          0,   0.0,                       1,  0.0,              1.0              },
+     {"ResendPPS",                &configinput.ResendPPS,                    0,   0.0,                       1,  0.0,              1.0              },
+     {"SourceWidth",              &configinput.img_width,                    0,   176.0,                     2, 16.0,              0.0              },
+     {"SourceHeight",             &configinput.img_height,                   0,   144.0,                     2, 16.0,              0.0              },
+     {"MbLineIntraUpdate",        &configinput.intra_upd,                    0,   0.0,                       1,  0.0,              1.0              },
+     {"SliceMode",                &configinput.slice_mode,                   0,   0.0,                       1,  0.0,              3.0              },
+     {"SliceArgument",            &configinput.slice_argument,               0,   0.0,                       2,  1.0,              1.0              },
+     {"UseConstrainedIntraPred",  &configinput.UseConstrainedIntraPred,      0,   0.0,                       1,  0.0,              1.0              },
+     {"InputFile",                &configinput.infile,                       1,   0.0,                       0,  0.0,              0.0              },
+     {"InputHeaderLength",        &configinput.infile_header,                0,   0.0,                       2,  0.0,              1.0              },
+     {"OutputFile",               &configinput.outfile,                      1,   0.0,                       0,  0.0,              0.0              },
+     {"ReconFile",                &configinput.ReconFile,                    1,   0.0,                       0,  0.0,              0.0              },
+     {"TraceFile",                &configinput.TraceFile,                    1,   0.0,                       0,  0.0,              0.0              },
+     {"DisposableP",              &configinput.DisposableP,                  0,   0.0,                       1,  0.0,              1.0              },
+     {"DispPQPOffset",            &configinput.DispPQPOffset,                0,   0.0,                       0,-51.0,             51.0              },
+     {"NumberBFrames",            &configinput.successive_Bframe,            0,   0.0,                       2,  0.0,              0.0              },
+     {"BRefPicQPOffset",          &configinput.qpBRSOffset,                  0,   0.0,                       0,-51.0,             51.0              },
+     {"DirectModeType",           &configinput.direct_spatial_mv_pred_flag,  0,   0.0,                       1,  0.0,              1.0              },
+     {"DirectInferenceFlag",      &configinput.directInferenceFlag,          0,   0.0,                       1,  0.0,              1.0              },
+     {"SPPicturePeriodicity",     &configinput.sp_periodicity,               0,   0.0,                       2,  0.0,              0.0              },
+     {"QPSPSlice",                &configinput.qpsp,                         0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+     {"QPSP2Slice",               &configinput.qpsp_pred,                    0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+     {"SymbolMode",               &configinput.symbol_mode,                  0,   0.0,                       1,  (double) UVLC,    (double) CABAC   },
+     {"OutFileMode",              &configinput.of_mode,                      0,   0.0,                       1,  0.0,              1.0              },
+     {"PartitionMode",            &configinput.partition_mode,               0,   0.0,                       1,  0.0,              1.0              },
+     {"InterSearch16x16",         &configinput.InterSearch16x16,             0,   1.0,                       1,  0.0,              1.0              },
+     {"InterSearch16x8",          &configinput.InterSearch16x8 ,             0,   1.0,                       1,  0.0,              1.0              },
+     {"InterSearch8x16",          &configinput.InterSearch8x16,              0,   1.0,                       1,  0.0,              1.0              },
+     {"InterSearch8x8",           &configinput.InterSearch8x8 ,              0,   1.0,                       1,  0.0,              1.0              },
+     {"InterSearch8x4",           &configinput.InterSearch8x4,               0,   1.0,                       1,  0.0,              1.0              },
+     {"InterSearch4x8",           &configinput.InterSearch4x8,               0,   1.0,                       1,  0.0,              1.0              },
+     {"InterSearch4x4",           &configinput.InterSearch4x4,               0,   1.0,                       1,  0.0,              1.0              },
+     {"IntraDisableInterOnly",    &configinput.IntraDisableInterOnly,        0,   0.0,                       1,  0.0,              1.0              },
+     {"Intra4x4ParDisable",       &configinput.Intra4x4ParDisable,           0,   0.0,                       1,  0.0,              1.0              },
+     {"Intra4x4DiagDisable",      &configinput.Intra4x4DiagDisable,          0,   0.0,                       1,  0.0,              1.0              },
+     {"Intra4x4DirDisable",       &configinput.Intra4x4DirDisable,           0,   0.0,                       1,  0.0,              1.0              },
+     {"Intra16x16ParDisable",     &configinput.Intra16x16ParDisable,         0,   0.0,                       1,  0.0,              1.0              },
+     {"Intra16x16PlaneDisable",   &configinput.Intra16x16PlaneDisable,       0,   0.0,                       1,  0.0,              1.0              },
+     {"EnableIPCM",               &configinput.EnableIPCM,                   0,   0.0,                       1,  0.0,              1.0              },
+     {"ChromaIntraDisable",       &configinput.ChromaIntraDisable,           0,   0.0,                       1,  0.0,              1.0              },
+ 
+ #ifdef _FULL_SEARCH_RANGE_
+     {"RestrictSearchRange",      &configinput.full_search,                  0,   2.0,                       1,  0.0,              2.0              },
+ #endif
+ #ifdef _ADAPT_LAST_GROUP_
+     {"LastFrameNumber",          &configinput.last_frame,                   0,   0.0,                       2,  0.0,              0.0              },
+ #endif
+ #ifdef _CHANGE_QP_
+     {"ChangeQPI",                &configinput.qp02,                         0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+     {"ChangeQPP",                &configinput.qpN2,                         0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+     {"ChangeQPB",                &configinput.qpB2,                         0,   24.0,                      3,  (double) MIN_QP,  (double) MAX_QP  },
+     {"ChangeQPBSRefOffset",      &configinput.qpBRS2Offset,                 0,   0.0,                       1,-51.0,             51.0              },
+     {"ChangeQPStart",            &configinput.qp2start,                     0,   0.0,                       2,  0.0,              0.0              },
+ #endif
+     {"RDOptimization",           &configinput.rdopt,                        0,   0.0,                       1,  0.0,              2.0              },
+     {"DisableThresholding",      &configinput.disthres,                     0,   0.0,                       1,  0.0,              1.0              },
+     {"DisableBSkipRDO",          &configinput.nobskip,                      0,   0.0,                       1,  0.0,              1.0              },
+     {"LossRateA",                &configinput.LossRateA,                    0,   0.0,                       2,  0.0,              0.0              },
+     {"LossRateB",                &configinput.LossRateB,                    0,   0.0,                       2,  0.0,              0.0              },
+     {"LossRateC",                &configinput.LossRateC,                    0,   0.0,                       2,  0.0,              0.0              },
+     {"NumberOfDecoders",         &configinput.NoOfDecoders,                 0,   0.0,                       2,  0.0,              0.0              },
+     {"RestrictRefFrames",        &configinput.RestrictRef ,                 0,   0.0,                       1,  0.0,              1.0              },
+ #ifdef _LEAKYBUCKET_
+     {"NumberofLeakyBuckets",     &configinput.NumberLeakyBuckets,           0,   2.0,                       1,  2.0,              255.0            },
+     {"LeakyBucketRateFile",      &configinput.LeakyBucketRateFile,          1,   0.0,                       0,  0.0,              0.0              },
+     {"LeakyBucketParamFile",     &configinput.LeakyBucketParamFile,         1,   0.0,                       0,  0.0,              0.0              },
+ #endif
+     {"PicInterlace",             &configinput.PicInterlace,                 0,   0.0,                       1,  0.0,              2.0              },
+     {"MbInterlace",              &configinput.MbInterlace,                  0,   0.0,                       1,  0.0,              2.0              },
+ 
+     {"IntraBottom",              &configinput.IntraBottom,                  0,   0.0,                       1,  0.0,              1.0              },
+ 
+     {"NumberFramesInEnhancementLayerSubSequence", &configinput.NumFramesInELSubSeq, 0,   0.0,               2,  0.0,              0.0              },
+     {"NumberOfFrameInSecondIGOP",&configinput.NumFrameIn2ndIGOP,            0,   0.0,                       2,  0.0,              0.0              },
+     {"RandomIntraMBRefresh",     &configinput.RandomIntraMBRefresh,         0,   0.0,                       2,  0.0,              0.0              },
+ 		
+ 		
+     {"WeightedPrediction",       &configinput.WeightedPrediction,           0,   0.0,                       1,  0.0,              1.0              },
+     {"WeightedBiprediction",     &configinput.WeightedBiprediction,         0,   0.0,                       1,  0.0,              2.0              },
+     {"UseWeightedReferenceME",   &configinput.UseWeightedReferenceME,       0,   0.0,                       1,  0.0,              1.0              },
+     {"RDPictureDecision",        &configinput.RDPictureDecision,            0,   0.0,                       1,  0.0,              1.0              },
+     {"RDPictureIntra",           &configinput.RDPictureIntra,               0,   0.0,                       1,  0.0,              1.0              },    
+     {"RDPSliceWeightOnly",       &configinput.RDPSliceWeightOnly,           0,   1.0,                       1,  0.0,              1.0              },    
+     {"RDPSliceBTest",            &configinput.RDPSliceBTest,                0,   0.0,                       1,  0.0,              1.0              },
+     {"RDBSliceWeightOnly",       &configinput.RDBSliceWeightOnly,           0,   0.0,                       1,  0.0,              1.0              },    
+ 
+     {"SkipIntraInInterSlices",   &configinput.SkipIntraInInterSlices,       0,   0.0,                       1,  0.0,              1.0              },    
+     {"BReferencePictures",       &configinput.BRefPictures,                 0,   0.0,                       1,  0.0,              2.0              },
+     {"PyramidCoding",            &configinput.PyramidCoding,                0,   0.0,                       1,  0.0,              3.0              },
+     {"PyramidLevelQPEnable",     &configinput.PyramidLevelQPEnable,         0,   0.0,                       1,  0.0,              1.0              },
+     {"ExplicitPyramidFormat",    &configinput.ExplicitPyramidFormat,        1,   0.0,                       0,  0.0,              0.0              },
+     {"PyramidRefReorder",        &configinput.PyramidRefReorder,            0,   0.0,                       1,  0.0,              1.0              },
+     {"PocMemoryManagement",      &configinput.PocMemoryManagement,          0,   0.0,                       1,  0.0,              1.0              },
+ 
+     {"BiPredMotionEstimation",   &configinput.BiPredMotionEstimation,       0,   0.0,                       1,  0.0,              1.0              },
+     {"BiPredMERefinements",      &configinput.BiPredMERefinements,          0,   0.0,                       1,  0.0,              5.0              },
+     {"BiPredMESearchRange",      &configinput.BiPredMESearchRange,          0,   8.0,                       2,  0.0,              0.0              },
+     {"BiPredMESubPel",           &configinput.BiPredMESubPel,               0,   1.0,                       1,  0.0,              2.0              },
+ 
+     {"LoopFilterParametersFlag", &configinput.LFSendParameters,             0,   0.0,                       1,  0.0,              1.0              },
+     {"LoopFilterDisable",        &configinput.LFDisableIdc,                 0,   0.0,                       1,  0.0,              2.0              },
+     {"LoopFilterAlphaC0Offset",  &configinput.LFAlphaC0Offset,              0,   0.0,                       1, -6.0,              6.0              },
+     {"LoopFilterBetaOffset",     &configinput.LFBetaOffset,                 0,   0.0,                       1, -6.0,              6.0              },
+     {"SparePictureOption",       &configinput.SparePictureOption,           0,   0.0,                       1,  0.0,              1.0              },
+     {"SparePictureDetectionThr", &configinput.SPDetectionThreshold,         0,   0.0,                       2,  0.0,              0.0              },
+     {"SparePicturePercentageThr",&configinput.SPPercentageThreshold,        0,   0.0,                       2,  0.0,            100.0              },
+ 
+     {"num_slice_groups_minus1",  &configinput.num_slice_groups_minus1,      0,   0.0,                       1,  0.0,  (double)MAXSLICEGROUPIDS - 1 },
+     {"slice_group_map_type",     &configinput.slice_group_map_type,         0,   0.0,                       1,  0.0,              6.0              },               
+     {"slice_group_change_direction_flag", &configinput.slice_group_change_direction_flag, 0,   0.0,         1,  0.0,              2.0              },
+     {"slice_group_change_rate_minus1",    &configinput.slice_group_change_rate_minus1,    0,   0.0,         2,  0.0,              1.0              },
+     {"SliceGroupConfigFileName", &configinput.SliceGroupConfigFileName,     1,   0.0,                       0,  0.0,              0.0              },
+ 		
+ 
+     {"UseRedundantSlice",        &configinput.redundant_slice_flag,         0,   0.0,                       1,  0.0,              1.0              },
+     {"PicOrderCntType",          &configinput.pic_order_cnt_type,           0,   0.0,                       1,  0.0,              2.0              },
+ 
+     {"ContextInitMethod",        &configinput.context_init_method,          0,   0.0,                       1,  0.0,              1.0              },
+     {"FixedModelNumber",         &configinput.model_number,                 0,   0.0,                       1,  0.0,              2.0              },
+ 
+     {"Transform8x8Mode",         &configinput.Transform8x8Mode,             0,   0.0,                       1,  0.0,              2.0              },
+     {"ReportFrameStats",         &configinput.ReportFrameStats,             0,   0.0,                       1,  0.0,              1.0              },
+     {"DisplayEncParams",         &configinput.DisplayEncParams,             0,   0.0,                       1,  0.0,              1.0              },
+     {"Verbose",                  &configinput.Verbose,                      0,   1.0,                       1,  0.0,              2.0              },
+     // Rate Control
+     {"RateControlEnable",        &configinput.RCEnable,                     0,   0.0,                       1,  0.0,              1.0              },
+     {"Bitrate",                  &configinput.bit_rate,                     0,   0.0,                       2,  0.0,              0.0              },
+     {"InitialQP",                &configinput.SeinitialQP,                  0,   0.0,                       3,  (double) MIN_QP,  (double) MAX_QP  },
+     {"BasicUnit",                &configinput.basicunit,                    0,   0.0,                       2,  0.0,              0.0              },
+     {"ChannelType",              &configinput.channel_type,                 0,   0.0,                       1,  0.0,              1.0              },
+ 
+     // Q_Matrix
+     {"QmatrixFile",              &configinput.QmatrixFile,                  1,   0.0,                       0,  0.0,              0.0              },
+     {"ScalingMatrixPresentFlag", &configinput.ScalingMatrixPresentFlag,     0,   0.0,                       1,  0.0,              3.0              },
+     {"ScalingListPresentFlag0",  &configinput.ScalingListPresentFlag[0],    0,   0.0,                       1,  0.0,              3.0              },
+     {"ScalingListPresentFlag1",  &configinput.ScalingListPresentFlag[1],    0,   0.0,                       1,  0.0,              3.0              },
+     {"ScalingListPresentFlag2",  &configinput.ScalingListPresentFlag[2],    0,   0.0,                       1,  0.0,              3.0              },
+     {"ScalingListPresentFlag3",  &configinput.ScalingListPresentFlag[3],    0,   0.0,                       1,  0.0,              3.0              },
+     {"ScalingListPresentFlag4",  &configinput.ScalingListPresentFlag[4],    0,   0.0,                       1,  0.0,              3.0              },
+     {"ScalingListPresentFlag5",  &configinput.ScalingListPresentFlag[5],    0,   0.0,                       1,  0.0,              3.0              },
+     {"ScalingListPresentFlag6",  &configinput.ScalingListPresentFlag[6],    0,   0.0,                       1,  0.0,              3.0              },
+     {"ScalingListPresentFlag7",  &configinput.ScalingListPresentFlag[7],    0,   0.0,                       1,  0.0,              3.0              },
+ 
+     // Fast ME enable
+     {"UseFME",                   &configinput.FMEnable,                     0,   0.0,                       1,  0.0,              3.0              },
+     {"EPZSPattern",              &configinput.EPZSPattern,                  0,   2.0,                       1,  0.0,              3.0              },
+     {"EPZSDualRefinement",       &configinput.EPZSDual,                     0,   3.0,                       1,  0.0,              4.0              },
+     {"EPZSFixedPredictors",      &configinput.EPZSFixed,                    0,   2.0,                       1,  0.0,              2.0              },
+     {"EPZSTemporal",             &configinput.EPZSTemporal,                 0,   1.0,                       1,  0.0,              1.0              },
+     {"EPZSSpatialMem",           &configinput.EPZSSpatialMem,               0,   1.0,                       1,  0.0,              1.0              },
+     {"EPZSMinThresScale",        &configinput.EPZSMinThresScale,            0,   0.0,                       0,  0.0,              0.0              },
+     {"EPZSMaxThresScale",        &configinput.EPZSMaxThresScale,            0,   1.0,                       0,  0.0,              0.0              },
+     {"EPZSMedThresScale",        &configinput.EPZSMedThresScale,            0,   1.0,                       0,  0.0,              0.0              },
+     {"ChromaQPOffset",           &configinput.chroma_qp_index_offset,       0,   0.0,                       1,-51.0,             51.0              },
+ 
+     // Fidelity Range Extensions
+     {"BitDepthLuma",             &configinput.BitDepthLuma,                 0,   8.0,                       1,  8.0,             12.0              },
+     {"BitDepthChroma",           &configinput.BitDepthChroma,               0,   8.0,                       1,  8.0,             12.0              },
+     {"YUVFormat",                &configinput.yuv_format,                   0,   1.0,                       1,  0.0,              3.0              },
+     {"RGBInput",                 &configinput.rgb_input_flag,               0,   0.0,                       1,  0.0,              1.0              },
+     {"CbQPOffset",               &configinput.cb_qp_index_offset,           0,   0.0,                       1,-51.0,             51.0              },
+     {"CrQPOffset",               &configinput.cr_qp_index_offset,           0,   0.0,                       1,-51.0,             51.0              },
+    
+     // Lossless Coding
+     {"QPPrimeYZeroTransformBypassFlag", &configinput.lossless_qpprime_y_zero_flag,      0,   0.0,           1,  0.0,              1.0              },
+ 
+     // Residue Color Transform
+     {"ResidueTransformFlag",     &configinput.residue_transform_flag ,      0,   0.0,                       1,  0.0,              1.0              },
+ 
+     // Explicit Lambda Usage
+     {"UseExplicitLambdaParams",  &configinput.UseExplicitLambdaParams,      0,   0.0,                       1,  0.0,              3.0              },
+     {"LambdaWeightPslice",       &configinput.LambdaWeight[0],              2,   0.68,                      2,  0.0,              0.0              },
+     {"LambdaWeightBslice",       &configinput.LambdaWeight[1],              2,   2.00,                      2,  0.0,              0.0              },
+     {"LambdaWeightIslice",       &configinput.LambdaWeight[2],              2,   0.65,                      2,  0.0,              0.0              },
+     {"LambdaWeightSPslice",      &configinput.LambdaWeight[3],              2,   1.50,                      2,  0.0,              0.0              },
+     {"LambdaWeightSIslice",      &configinput.LambdaWeight[4],              2,   0.65,                      2,  0.0,              0.0              },
+     {"LambdaWeightRefBslice",    &configinput.LambdaWeight[5],              2,   1.50,                      2,  0.0,              0.0              },
+     
+     {"QOffsetMatrixFile",        &configinput.QOffsetMatrixFile,            1,   0.0,                       0,  0.0,              0.0              },
+     {"OffsetMatrixPresentFlag",  &configinput.OffsetMatrixPresentFlag,      0,   0.0,                       1,  0.0,              1.0              },
+ 
+     // Fast Mode Decision
+     {"EarlySkipEnable",          &configinput.EarlySkipEnable,              0,   0.0,                       1,  0.0,              1.0              },
+     {"SelectiveIntraEnable",     &configinput.SelectiveIntraEnable,         0,   0.0,                       1,  0.0,              1.0              },
+ 
+     // Adaptive rounding technique based on JVT-N011
+     {"AdaptiveRounding",         &configinput.AdaptiveRounding,             0,   0.0,                       1,  0.0,              1.0              },
+     {"AdaptRndPeriod",           &configinput.AdaptRndPeriod,               0,  16.0,                       2,  0.0,              0.0              },
+     {"AdaptRndChroma",           &configinput.AdaptRndChroma,               0,   0.0,                       1,  0.0,              1.0              },
+     {"AdaptRndWFactorIRef",      &configinput.AdaptRndWFactor[1][I_SLICE],  0,   4.0,                       1,  0.0,           1024.0              },
+     {"AdaptRndWFactorPRef",      &configinput.AdaptRndWFactor[1][P_SLICE],  0,   4.0,                       1,  0.0,           1024.0              },
+     {"AdaptRndWFactorBRef",      &configinput.AdaptRndWFactor[1][B_SLICE],  0,   4.0,                       1,  0.0,           1024.0              },
+     {"AdaptRndWFactorINRef",     &configinput.AdaptRndWFactor[0][I_SLICE],  0,   4.0,                       1,  0.0,           1024.0              },
+     {"AdaptRndWFactorPNRef",     &configinput.AdaptRndWFactor[0][P_SLICE],  0,   4.0,                       1,  0.0,           1024.0              },
+     {"AdaptRndWFactorBNRef",     &configinput.AdaptRndWFactor[0][B_SLICE],  0,   4.0,                       1,  0.0,           1024.0              },
+ 
+     {NULL,                       NULL,                                     -1,   0.0,                       0,  0.0,              0.0              }
+ };
+ 
+ #endif
+ 
+ #ifndef INCLUDED_BY_CONFIGFILE_C
+ extern Mapping Map[];
+ #endif
+ 
+ 
+ void Configure (int ac, char *av[]);
+ void PatchInputNoFrames();
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/context_ini.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/context_ini.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/context_ini.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,365 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file context_ini.c
+  *
+  * \brief
+  *    CABAC context initializations
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Detlev Marpe                    <marpe at hhi.de>
+  *    - Heiko Schwarz                   <hschwarz at hhi.de>
+  **************************************************************************************
+  */
+ 
+ #define CONTEXT_INI_C
+ 
+ #include <stdlib.h>
+ #include <math.h>
+ 
+ #include "global.h"
+ 
+ #include "ctx_tables.h"
+ #include "cabac.h"
+ 
+ #define DEFAULT_CTX_MODEL   0
+ #define RELIABLE_COUNT      32.0
+ #define FRAME_TYPES         4
+ #define FIXED               0
+ 
+ 
+ int                     num_mb_per_slice;
+ int                     number_of_slices;
+ int***                  initialized;
+ int***                  model_number;
+ 
+ 
+ double entropy    [128];
+ double probability[128] =
+ {
+   0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+   0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+   0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+   0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+   0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+   0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+   0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+   0.000000, 0.000000, 0.000000, 0.000000,    0.000000, 0.000000, 0.000000, 0.000000,
+   //--------------------------------------------------------------------------------
+   0.500000, 0.474609, 0.450507, 0.427629,    0.405912, 0.385299, 0.365732, 0.347159,
+   0.329530, 0.312795, 0.296911, 0.281833,    0.267520, 0.253935, 0.241039, 0.228799,
+   0.217180, 0.206151, 0.195682, 0.185744,    0.176312, 0.167358, 0.158859, 0.150792,
+   0.143134, 0.135866, 0.128966, 0.122417,    0.116200, 0.110299, 0.104698, 0.099381,
+   0.094334, 0.089543, 0.084996, 0.080680,    0.076583, 0.072694, 0.069002, 0.065498,
+   0.062172, 0.059014, 0.056018, 0.053173,    0.050473, 0.047909, 0.045476, 0.043167,
+   0.040975, 0.038894, 0.036919, 0.035044,    0.033264, 0.031575, 0.029972, 0.028450,
+   0.027005, 0.025633, 0.024332, 0.023096,    0.021923, 0.020810, 0.019753, 0.018750
+ };
+ 
+ 
+ 
+ void create_context_memory ()
+ {
+   int i, j, k;
+   int num_mb    = img->FrameSizeInMbs; // number of macroblocks for frame
+  
+   num_mb_per_slice  = (input->slice_mode==1 ? input->slice_argument : num_mb);
+   number_of_slices  = (num_mb + num_mb_per_slice - 1) / num_mb_per_slice;
+ 
+   if ((initialized  = (int***) malloc (3 * sizeof(int**))) == NULL)
+   {
+     no_mem_exit ("create_context_memory: initialized");
+   }
+   if ((model_number = (int***) malloc (3 * sizeof(int**))) == NULL)
+   {
+     no_mem_exit ("create_context_memory: model_number");
+   }
+ 
+   for (k=0; k<3; k++)
+   {
+     if ((initialized[k] = (int**) malloc (FRAME_TYPES * sizeof(int*))) == NULL)
+     {
+       no_mem_exit ("create_context_memory: initialized");
+     }
+     if ((model_number[k]= (int**) malloc (FRAME_TYPES * sizeof(int*))) == NULL)
+     {
+       no_mem_exit ("create_context_memory: model_number");
+     }
+ 
+     for (i=0; i<FRAME_TYPES; i++)
+     {
+       if ((initialized[k][i] = (int*) malloc (number_of_slices * sizeof(int))) == NULL)
+       {
+         no_mem_exit ("create_context_memory: initialized");
+       }
+       if ((model_number[k][i]= (int*) malloc (number_of_slices * sizeof(int))) == NULL)
+       {
+         no_mem_exit ("create_context_memory: model_number");
+       }
+     }
+   }
+ 
+   //===== set all context sets as "uninitialized" =====
+   for (k=0; k<3; k++)
+   {
+     for (i=0; i<FRAME_TYPES; i++)
+     {
+       for (j=0; j<number_of_slices; j++)
+       {
+         initialized[k][i][j] = 0;
+       }
+     }
+   }
+ 
+   //----- init tables -----
+   for( k=0; k<64; k++ )
+   {
+     probability[k] = 1.0 - probability[127-k];
+     entropy    [k] = log10(probability[    k])/log10(2.0);
+     entropy[127-k] = log10(probability[127-k])/log10(2.0);
+   }
+ }
+ 
+ 
+ 
+ 
+ void free_context_memory ()
+ {
+   int i, k;
+ 
+   for (k=0; k<3; k++)
+   {
+     for (i=0; i<FRAME_TYPES; i++)
+     {
+       free (initialized [k][i]);
+       free (model_number[k][i]);
+     }
+     free (initialized [k]);
+     free (model_number[k]);
+   }
+   free (initialized);
+   free (model_number);
+ }
+ 
+ 
+ 
+ 
+ 
+ 
+ #define BIARI_CTX_INIT2(ii,jj,ctx,tab,num) \
+ { \
+   for (i=0; i<ii; i++) \
+   for (j=0; j<jj; j++) \
+   { \
+     if      (img->type==I_SLICE)  biari_init_context (&(ctx[i][j]), &(tab ## _I[num][i][j][0])); \
+     else                            biari_init_context (&(ctx[i][j]), &(tab ## _P[num][i][j][0])); \
+   } \
+ }
+ #define BIARI_CTX_INIT1(jj,ctx,tab,num) \
+ { \
+   for (j=0; j<jj; j++) \
+   { \
+     if      (img->type==I_SLICE)  biari_init_context (&(ctx[j]), &(tab ## _I[num][0][j][0])); \
+     else                            biari_init_context (&(ctx[j]), &(tab ## _P[num][0][j][0])); \
+   } \
+ }
+ 
+ 
+ 
+ void SetCtxModelNumber ()
+ {
+   int frame_field = img->field_picture;
+   int img_type    = img->type;
+   int ctx_number  = img->currentSlice->start_mb_nr / num_mb_per_slice;
+ 
+   if(img->type==I_SLICE)
+   {
+     img->model_number=DEFAULT_CTX_MODEL;
+     return;
+   }
+   if(input->context_init_method==FIXED)
+   {
+     img->model_number=input->model_number;
+     return;
+   }
+ 
+   if (initialized [frame_field][img_type][ctx_number])
+   {
+     img->model_number = model_number[frame_field][img_type][ctx_number];
+   }
+   else if (ctx_number && initialized[frame_field][img_type][ctx_number-1])
+   {
+     img->model_number = model_number[frame_field][img_type][ctx_number-1];
+   }
+   else
+   {
+     img->model_number = DEFAULT_CTX_MODEL;
+   }
+ }
+ 
+ 
+ 
+ void init_contexts ()
+ {
+   MotionInfoContexts*  mc = img->currentSlice->mot_ctx;
+   TextureInfoContexts* tc = img->currentSlice->tex_ctx;
+   int i, j;
+ 
+   //--- motion coding contexts ---
+   BIARI_CTX_INIT2 (3, NUM_MB_TYPE_CTX,   mc->mb_type_contexts,     INIT_MB_TYPE,    img->model_number);
+   BIARI_CTX_INIT2 (2, NUM_B8_TYPE_CTX,   mc->b8_type_contexts,     INIT_B8_TYPE,    img->model_number);
+   BIARI_CTX_INIT2 (2, NUM_MV_RES_CTX,    mc->mv_res_contexts,      INIT_MV_RES,     img->model_number);
+   BIARI_CTX_INIT2 (2, NUM_REF_NO_CTX,    mc->ref_no_contexts,      INIT_REF_NO,     img->model_number);
+   BIARI_CTX_INIT1 (   NUM_DELTA_QP_CTX,  mc->delta_qp_contexts,    INIT_DELTA_QP,   img->model_number);
+   BIARI_CTX_INIT1 (   NUM_MB_AFF_CTX,    mc->mb_aff_contexts,      INIT_MB_AFF,     img->model_number);
+   BIARI_CTX_INIT1 (   NUM_TRANSFORM_SIZE_CTX,  mc->transform_size_contexts,    INIT_TRANSFORM_SIZE,   img->model_number);
+   
+   //--- texture coding contexts ---
+   BIARI_CTX_INIT1 (                 NUM_IPR_CTX,  tc->ipr_contexts,     INIT_IPR,       img->model_number);
+   BIARI_CTX_INIT1 (                 NUM_CIPR_CTX, tc->cipr_contexts,    INIT_CIPR,      img->model_number);
+   BIARI_CTX_INIT2 (3,               NUM_CBP_CTX,  tc->cbp_contexts,     INIT_CBP,       img->model_number);
+   BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_BCBP_CTX, tc->bcbp_contexts,    INIT_BCBP,      img->model_number);
+   BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX,  tc->map_contexts,     INIT_MAP,       img->model_number);
+   BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->last_contexts,    INIT_LAST,      img->model_number);
+   BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ONE_CTX,  tc->one_contexts,     INIT_ONE,       img->model_number);
+   BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ABS_CTX,  tc->abs_contexts,     INIT_ABS,       img->model_number);
+   BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX,  tc->fld_map_contexts, INIT_FLD_MAP,   img->model_number);
+   BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->fld_last_contexts,INIT_FLD_LAST,  img->model_number);
+ }
+ 
+ 
+ 
+ 
+ 
+ double XRate (BiContextTypePtr ctx, const int* model)
+ {
+   int     ctx_state, mod_state;
+   double  weight, xr = 0.0;
+   int     qp = max(0,img->qp);
+ 
+   weight    = min (1.0, (double)ctx->count/(double)RELIABLE_COUNT);
+ 
+   mod_state = ((model[0]*qp)>>4)+model[1];
+   mod_state = min (max (0, mod_state), 127);
+   ctx_state = (ctx->MPS ? 64+ctx->state : 63-ctx->state);
+ 
+   xr -= weight * probability[    ctx_state] * entropy[    mod_state];
+   xr -= weight * probability[127-ctx_state] * entropy[127-mod_state];
+ 
+   return xr;
+ }
+ 
+ #define ADD_XRATE2(ii,jj,ctx,tab,num) \
+ { \
+   for (i=0; i<ii; i++) \
+   for (j=0; j<jj; j++) \
+   { \
+     if      (img->type==I_SLICE)  xr += XRate (&(ctx[i][j]), &(tab ## _I[num][i][j][0])); \
+     else                            xr += XRate (&(ctx[i][j]), &(tab ## _P[num][i][j][0])); \
+   } \
+ }
+ #define ADD_XRATE1(jj,ctx,tab,num) \
+ { \
+   for (j=0; j<jj; j++) \
+   { \
+     if      (img->type==I_SLICE)  xr += XRate (&(ctx[j]), &(tab ## _I[num][0][j][0])); \
+     else                            xr += XRate (&(ctx[j]), &(tab ## _P[num][0][j][0])); \
+   } \
+ }
+ 
+ 
+ void GetCtxModelNumber (int* mnumber, MotionInfoContexts* mc, TextureInfoContexts* tc)
+ {
+   int     model, j, i;
+   int     num_models = (img->type==I_SLICE ? NUM_CTX_MODELS_I : NUM_CTX_MODELS_P);
+   double  xr, min_xr = 1e30;
+ 
+   for (model=0; model<num_models; model++)
+   {
+     xr = 0.0;
+     //--- motion coding contexts ---
+     ADD_XRATE2 (3, NUM_MB_TYPE_CTX,   mc->mb_type_contexts,     INIT_MB_TYPE,   model);
+     ADD_XRATE2 (2, NUM_B8_TYPE_CTX,   mc->b8_type_contexts,     INIT_B8_TYPE,   model);
+     ADD_XRATE2 (2, NUM_MV_RES_CTX,    mc->mv_res_contexts,      INIT_MV_RES,    model);
+     ADD_XRATE2 (2, NUM_REF_NO_CTX,    mc->ref_no_contexts,      INIT_REF_NO,    model);
+     ADD_XRATE1 (   NUM_DELTA_QP_CTX,  mc->delta_qp_contexts,    INIT_DELTA_QP,  model);
+     ADD_XRATE1 (   NUM_MB_AFF_CTX,    mc->mb_aff_contexts,      INIT_MB_AFF,    model);
+     ADD_XRATE1 (   NUM_TRANSFORM_SIZE_CTX,  mc->transform_size_contexts, INIT_TRANSFORM_SIZE,  model);
+ 
+     //--- texture coding contexts ---
+     ADD_XRATE1 (                  NUM_IPR_CTX,  tc->ipr_contexts,       INIT_IPR,       model);
+     ADD_XRATE1 (                  NUM_CIPR_CTX, tc->cipr_contexts,      INIT_CIPR,      model);
+     ADD_XRATE2 (3,                NUM_CBP_CTX,  tc->cbp_contexts,       INIT_CBP,       model);
+     ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_BCBP_CTX, tc->bcbp_contexts,      INIT_BCBP,      model);
+     ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_MAP_CTX,  tc->map_contexts,       INIT_MAP,       model);
+     ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_LAST_CTX, tc->last_contexts,      INIT_LAST,      model);
+     ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_ONE_CTX,  tc->one_contexts,       INIT_ONE,       model);
+     ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_ABS_CTX,  tc->abs_contexts,       INIT_ABS,       model);
+     ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_MAP_CTX,  tc->fld_map_contexts,   INIT_FLD_MAP,   model);
+     ADD_XRATE2 (NUM_BLOCK_TYPES,  NUM_LAST_CTX, tc->fld_last_contexts,  INIT_FLD_LAST,  model);
+ 
+     if (xr<min_xr)
+     {
+       min_xr    = xr;
+       *mnumber  = model;
+     }
+   }
+ }
+ 
+ #undef ADD_XRATE2
+ #undef ADD_XRATE1
+ 
+ 
+ 
+ 
+ 
+ 
+ void store_contexts ()
+ {
+   int frame_field = img->field_picture;
+   int img_type    = img->type;
+   int ctx_number  = img->currentSlice->start_mb_nr / num_mb_per_slice;
+ 
+   if( input->context_init_method )
+   {
+     initialized [frame_field][img_type][ctx_number] = 1;
+     GetCtxModelNumber (model_number[frame_field][img_type]+ctx_number, img->currentSlice->mot_ctx, img->currentSlice->tex_ctx);
+   }
+   else
+   {
+     // do nothing
+   }
+ }
+ 
+ 
+ void update_field_frame_contexts (int field)
+ {
+   int i, j;
+ 
+   if (field)
+   {
+     // set frame contexts
+     for (j=0; j<FRAME_TYPES; j++)
+     {
+       for (i=0; i<number_of_slices; i++)
+       {
+         initialized [0][j][i] = initialized [1][j][i>>1];
+         model_number[0][j][i] = model_number[1][j][i>>1];
+       }
+     }
+   }
+   else
+   {
+     // set field contexts
+     for (j=0; j<FRAME_TYPES; j++)
+     {
+       for (i=0; i<((number_of_slices+1)>>1); i++)
+       {
+         initialized [1][j][i] = initialized [0][j][i<<1];
+         model_number[1][j][i] = model_number[0][j][i<<1];
+       }
+     }
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/context_ini.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/context_ini.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/context_ini.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,32 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file context_ini.h
+  *
+  * \brief
+  *    CABAC context initializations
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Detlev Marpe                    <marpe at hhi.de>
+  *    - Heiko Schwarz                   <hschwarz at hhi.de>
+  **************************************************************************************
+  */
+ 
+ #ifndef _CONTEXT_INI_
+ #define _CONTEXT_INI_
+ 
+ 
+ void  create_context_memory ();
+ void  free_context_memory   ();
+ 
+ void  init_contexts  ();
+ void  store_contexts ();
+ 
+ void  update_field_frame_contexts (int);
+ void  update_rd_picture_contexts  (int);
+ 
+ void  SetCtxModelNumber ();
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/contributors.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/contributors.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/contributors.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,212 ----
+ 
+ /*! \file
+  *     contributors.h
+  *  \brief
+  *     List of contributors and copyright information.
+  *
+  *  \par Copyright statements
+     \verbatim
+    H.264 JM coder/decoder
+ 
+    Copyright (C) 2000 by
+       Telenor Broadband Services, Norway
+       Ericsson Radio Systems, Sweden
+       TELES AG, Germany
+       Nokia Inc., USA
+       Nokia Corporation, Finland
+       Siemens AG, Germany
+       Heinrich-Hertz-Institute for Communication Technology GmbH, Germany
+       University of Hannover, Institut of Communication Theory and Signal Processing,Germany
+       Videolocus, Canada
+ 	  LSI Logic, Canada
+       Motorola Inc., USA
+       Microsoft Corp., USA
+       Apple Computer, Inc.
+       RealNetworks, Inc., USA
+       Thomson, Inc., USA
+    \endverbatim
+    \par Full Contact Information
+    \verbatim
+ 
+       Lowell Winger                   <lwinger at videolocus.com><lwinger at uwaterloo.ca><lwinger at lsil.com>
+       Guy Côté                        <gcote at videolocus.com>
+       Michael Gallant                 <mgallant at videolocus.com>
+       VideoLocus Inc.
+       97 Randall Dr.
+       Waterloo, ON, Canada  N2V1C5
+ 
+       Inge Lille-Langøy               <inge.lille-langoy at telenor.com>
+       Telenor Broadband Services
+       P.O.Box 6914 St.Olavs plass
+       N-0130 Oslo, Norway
+ 
+       Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+       Ericsson Radio Systems
+       KI/ERA/T/VV
+       164 80 Stockholm, Sweden
+ 
+       Stephan Wenger                  <stewe at cs.tu-berlin.de>
+       TU Berlin / TELES AG
+       Sekr. FR 6-3
+       Franklinstr. 28-29
+       D-10587 Berlin, Germany
+ 
+       Jani Lainema                    <jani.lainema at nokia.com>
+       Nokia Inc. / Nokia Research Center
+       6000 Connection Drive
+       Irving, TX 75039, USA
+ 
+       Sebastian Purreiter             <sebastian.purreiter at mch.siemens.de>
+       Siemens AG
+       ICM MD MP RD MCH 83
+       P.O.Box 80 17 07
+       D-81617 Munich, Germany
+ 
+       Thomas Wedi                     <wedi at tnt.uni-hannover.de>
+       University of Hannover
+       Institut of Communication Theory and Signal Processing
+       Appelstr. 9a
+       30167 Hannover
+ 
+       Thomas Stockhammer              <stockhammer at ei.tum.de>
+       Institute for Communications Engineering
+       Munich University of Technology
+       80290 Munich
+       Germany
+ 
+       Detlev Marpe                    <marpe at hhi.de>
+       Heinrich-Hertz-Institute
+       Einsteinufer 37
+       10587 Berlin
+       Germany
+ 
+       Guido Heising                   <heising at hhi.de>
+       Heinrich-Hertz-Institute
+       Einsteinufer 37
+       10587 Berlin
+       Germany
+ 
+       Ragip Kurceren                  <ragip.kurceren at nokia.com>
+       Nokia Inc. / Nokia Research Center
+       6000 Connection Drive
+       Irving, TX 75039, USA
+ 
+       Tobias Oelbaum (TO)             <drehvial at gmx.net>
+       Institute for Communications Engineering
+       Munich University of Technology
+       80290 Munich
+       Germany
+ 
+       Yann Le Maguet                  <yann.lemaguet at philips.com>
+       Philips Research France
+ 
+       Dong Tian                       <tian at cs.tut.fi>
+       Ye-Kui Wang                     <wyk at ieee.org>
+       Tampere University of Technology
+       Tampere International Center for Signal Processing
+       33720 Tampere, Finland
+ 
+       Miska M. Hannuksela             <miska.hannuksela at nokia.com>
+       Nokia Corporation / Nokia Mobile Phones
+       P.O. Box 88
+       33721 Tampere, Finland
+ 
+       Karsten Suehring                <suehring at hhi.de>
+       Heinrich-Hertz-Institute
+       Einsteinufer 37
+       10587 Berlin
+       Germany
+ 
+       Heiko Schwarz                   <hschwarz at hhi.de>
+       Heinrich-Hertz-Institute
+       Einsteinufer 37
+       10587 Berlin
+       Germany
+ 
+       Limin Wang                      <liwang at gi.com>
+       Krit Panusopone                 <kpanusopone at gi.com>
+       Rajeev Gandhi                   <rgandhi at gi.com>
+       Yue Yu                          <yyu at gi.com>
+       Motorola Inc.
+       6450 Sequence Drive
+       San Diego, CA 92121 USA
+ 
+       Feng Wu                         <fengwu at microsoft.com>
+       Xiaoyan Sun                     <sunxiaoyan at msrchina.research.microsoft.com>
+       Microsoft Research Asia
+       3/F, Beijing Sigma Center
+       No.49, Zhichun Road, Hai Dian District,
+       Beijing China 100080
+ 
+ 
+       Mathias Wien                    <wien at ient.rwth-aachen.de>
+       Institut und Lehrstuhl für Nachrichtentechnik
+       RWTH Aachen University
+       52072 Aachen
+       Germany
+ 
+       Achim Dahlhoff                  <dahlhoff at ient.rwth-aachen.de>
+       Institut und Lehrstuhl für Nachrichtentechnik
+       RWTH Aachen University
+       52072 Aachen
+       Germany
+ 
+       Yoshihiro Kikuchi               <yoshihiro.kikuchi at toshiba.co.jp>
+       Takeshi Chujoh                  <takeshi.chujoh at toshiba.co.jp>
+       Toshiba Corporation
+       Research and Development Center
+       Kawasaki 212-8582, Japan
+ 
+       Shinya Kadono                   <kadono at drl.mei.co.jp>
+       Matsushita Electric Industrial Co., Ltd.
+       1006 Kadoma, Kadoma
+       Osaka 663-8113, Japan
+ 
+       Dzung Hoang                     <dzung.hoang at conexant.com>
+       Eric Viscito                    <eric.viscito at conexant.com>
+       Conexant Systems. Inc.
+       MPEG Compression Group
+       20450 Stevens Creek Blvd.
+       Cupertino, CA 95014
+ 
+       Barry Haskell
+       Apple Computer, Inc.            <bhaskell at apple.com>
+       2 Infinite Loop
+       Cupertino, California 95014
+ 
+       Greg Conklin
+       RealNetworks, Inc.              <gregc at real.com>
+       2601 Elliott Ave
+       Seattle, WA 98101
+ 
+       Jill Boyce                      <jill.boyce at thomson.net>
+       Cristina Gomila                 <cristina.gomila at thomson.net>
+       Thomson
+       2 Independence Way
+       Princeton, NJ 08540
+       
+       Siwei Ma                        <swma at jdl.ac.cn>
+       Institute of Computing Technology
+       Chinese Academy of Sciences 
+       Kexueyuan South Road 6
+       Haidian District
+       Beijing, China
+       
+       Zhibo Chen                      <chenzhibo at tsinghua.org.cn>
+       JianFeng Xu                     <fenax at video.mdc.tsinghua.edu.cn>
+       Wenfang Fu                      <fwf at video.mdc.tsinghua.edu.cn>
+       Dept.of E&E, Tsinghua Univ
+       Haidian District 
+       Beijing China, 100084
+         
+       Alexis Michael Tourapis         <alexismt at ieee.org>
+   
+       Xiaoquan Yi                     <xyi at engr.scu.edu>
+       Jun Zhang                       <jzhang2 at engr.scu.edu>
+       Dept. of C.E. Santa Clara Univ.
+       500 El Camino Real 
+       Santa Clara, CA 95053
+ 
+    \endverbatim
+  */
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/ctx_tables.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/ctx_tables.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/ctx_tables.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,729 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file ctx_tables.h
+  *
+  * \brief
+  *    CABAC context initialization tables
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Detlev Marpe                    <marpe at hhi.de>
+  *    - Heiko Schwarz                   <hschwarz at hhi.de>
+  **************************************************************************************
+  */
+ 
+ #define CTX_UNUSED          {0,64}
+ #define CTX_UNDEF           {0,63}
+ 
+ #ifdef CONTEXT_INI_C
+ 
+ 
+ #define NUM_CTX_MODELS_I     1
+ #define NUM_CTX_MODELS_P     3
+ 
+ 
+ static const int INIT_MB_TYPE_I[1][3][11][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  20, -15} , {   2,  54} , {   3,  74} ,  CTX_UNUSED , { -28, 127} , { -23, 104} , {  -6,  53} , {  -1,  54} , {   7,  51} ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  20, -15} , {   2,  54} , {   3,  74} , {  20, -15} , {   2,  54} , {   3,  74} , { -28, 127} , { -23, 104} , {  -6,  53} , {  -1,  54} , {   7,  51} }, // SI (unused at the moment)
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   }
+ };
+ static const int INIT_MB_TYPE_P[3][3][11][2] =
+ {
+   //----- model 0 -----
+   {
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+   { {  23,  33} , {  23,   2} , {  21,   0} ,  CTX_UNUSED , {   1,   9} , {   0,  49} , { -37, 118} , {   5,  57} , { -13,  78} , { -11,  65} , {   1,  62} },
+   { {  26,  67} , {  16,  90} , {   9, 104} ,  CTX_UNUSED , { -46, 127} , { -20, 104} , {   1,  67} , {  18,  64} , {   9,  43} , {  29,   0} ,  CTX_UNUSED }
+   },
+   //----- model 1 -----
+   {
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  22,  25} , {  34,   0} , {  16,   0} ,  CTX_UNUSED , {  -2,   9} , {   4,  41} , { -29, 118} , {   2,  65} , {  -6,  71} , { -13,  79} , {   5,  52} },
+     { {  57,   2} , {  41,  36} , {  26,  69} ,  CTX_UNUSED , { -45, 127} , { -15, 101} , {  -4,  76} , {  26,  34} , {  19,  22} , {  40,   0} ,  CTX_UNUSED }
+   },
+   //----- model 2 -----
+   {
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  29,  16} , {  25,   0} , {  14,   0} ,  CTX_UNUSED , { -10,  51} , {  -3,  62} , { -27,  99} , {  26,  16} , {  -4,  85} , { -24, 102} , {   5,  57} },
+   { {  54,   0} , {  37,  42} , {  12,  97} ,  CTX_UNUSED , { -32, 127} , { -22, 117} , {  -2,  74} , {  20,  40} , {  20,  10} , {  29,   0} ,  CTX_UNUSED }
+   }
+ };
+ 
+ 
+ 
+ 
+ 
+ static const int INIT_B8_TYPE_I[1][2][9][2] =
+ {
+   //----- model 0 -----
+   {
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   }
+ };
+ static const int INIT_B8_TYPE_P[3][2][9][2] =
+ {
+   //----- model 0 -----
+   {
+     {  CTX_UNUSED , {  12,  49} ,  CTX_UNUSED , {  -4,  73} , {  17,  50} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  -6,  86} , { -17,  95} , {  -6,  61} , {   9,  45} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   },
+   //----- model 1 -----
+   {
+     {  CTX_UNUSED , {   9,  50} ,  CTX_UNUSED , {  -3,  70} , {  10,  54} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {   6,  69} , { -13,  90} , {   0,  52} , {   8,  43} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   },
+   //----- model 2 -----
+   {
+     {  CTX_UNUSED , {   6,  57} ,  CTX_UNUSED , { -17,  73} , {  14,  57} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  -6,  93} , { -14,  88} , {  -6,  44} , {   4,  55} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   }
+ };
+ 
+ 
+ 
+ 
+ 
+ static const int INIT_MV_RES_I[1][2][10][2] =
+ {
+   //----- model 0 -----
+   {
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   }
+ };
+ static const int INIT_MV_RES_P[3][2][10][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  -3,  69} ,  CTX_UNUSED , {  -6,  81} , { -11,  96} ,  CTX_UNUSED , {   0,  58} ,  CTX_UNUSED , {  -3,  76} , { -10,  94} ,  CTX_UNUSED },
+     { {   6,  55} , {   7,  67} , {  -5,  86} , {   2,  88} ,  CTX_UNUSED , {   5,  54} , {   4,  69} , {  -3,  81} , {   0,  88} ,  CTX_UNUSED }
+   },
+   //----- model 1 -----
+   {
+     { {  -2,  69} ,  CTX_UNUSED , {  -5,  82} , { -10,  96} ,  CTX_UNUSED , {   1,  56} ,  CTX_UNUSED , {  -3,  74} , {  -6,  85} ,  CTX_UNUSED },
+     { {   2,  59} , {   2,  75} , {  -3,  87} , {  -3, 100} ,  CTX_UNUSED , {   0,  59} , {  -3,  81} , {  -7,  86} , {  -5,  95} ,  CTX_UNUSED }
+   },
+   //----- model 2 -----
+   {
+     { { -11,  89} ,  CTX_UNUSED , { -15, 103} , { -21, 116} ,  CTX_UNUSED , {   1,  63} ,  CTX_UNUSED , {  -5,  85} , { -13, 106} ,  CTX_UNUSED },
+     { {  19,  57} , {  20,  58} , {   4,  84} , {   6,  96} ,  CTX_UNUSED , {   5,  63} , {   6,  75} , {  -3,  90} , {  -1, 101} ,  CTX_UNUSED }
+   }
+ };
+ 
+ 
+ 
+ 
+ 
+ static const int INIT_REF_NO_I[1][2][6][2] =
+ {
+   //----- model 0 -----
+   {
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   }
+ };
+ static const int INIT_REF_NO_P[3][2][6][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  -7,  67} , {  -5,  74} , {  -4,  74} , {  -5,  80} , {  -7,  72} , {   1,  58} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   },
+   //----- model 1 -----
+   {
+     { {  -1,  66} , {  -1,  77} , {   1,  70} , {  -2,  86} , {  -5,  72} , {   0,  61} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   },
+   //----- model 2 -----
+   {
+     { {   3,  55} , {  -4,  79} , {  -2,  75} , { -12,  97} , {  -7,  50} , {   1,  60} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   }
+ };
+ 
+ 
+ static const int INIT_TRANSFORM_SIZE_I[1][1][3][2]=
+ {
+   //----- model 0 -----
+   {
+     {  {  31,  21} , {  31,  31} , {  25,  50} },
+ //    { {   0,  41} , {   0,  63} , {   0,  63} },
+   }
+ };
+ 
+ static const int INIT_TRANSFORM_SIZE_P[3][1][3][2]=
+ {
+   //----- model 0 -----
+   {
+     {  {  12,  40} , {  11,  51} , {  14,  59} },
+ //    { {   0,  41} , {   0,  63} , {   0,  63} },
+   },
+   //----- model 1 -----
+   {
+     {  {  25,  32} , {  21,  49} , {  21,  54} },
+ //    { {   0,  41} , {   0,  63} , {   0,  63} },
+   },
+   //----- model 2 -----
+   {
+     {  {  21,  33} , {  19,  50} , {  17,  61} },
+ //    { {   0,  41} , {   0,  63} , {   0,  63} },
+   }
+ };
+ 
+ static const int INIT_DELTA_QP_I[1][1][4][2]=
+ {
+   //----- model 0 -----
+   {
+     { {   0,  41} , {   0,  63} , {   0,  63} , {   0,  63} },
+   }
+ };
+ static const int INIT_DELTA_QP_P[3][1][4][2]=
+ {
+   //----- model 0 -----
+   {
+     { {   0,  41} , {   0,  63} , {   0,  63} , {   0,  63} },
+   },
+   //----- model 1 -----
+   {
+     { {   0,  41} , {   0,  63} , {   0,  63} , {   0,  63} },
+   },
+   //----- model 2 -----
+   {
+     { {   0,  41} , {   0,  63} , {   0,  63} , {   0,  63} },
+   }
+ };
+ 
+ 
+ 
+ 
+ 
+ static const int INIT_MB_AFF_I[1][1][4][2] =
+ {
+   //----- model 0 -----
+   {
+     { {   0,  11} , {   1,  55} , {   0,  69} ,  CTX_UNUSED }
+   }
+ };
+ static const int INIT_MB_AFF_P[3][1][4][2] =
+ {
+   //----- model 0 -----
+   {
+     { {   0,  45} , {  -4,  78} , {  -3,  96} ,  CTX_UNUSED }
+   },
+   //----- model 1 -----
+   {
+     { {  13,  15} , {   7,  51} , {   2,  80} ,  CTX_UNUSED }
+   },
+   //----- model 2 -----
+   {
+     { {   7,  34} , {  -9,  88} , { -20, 127} ,  CTX_UNUSED }
+   }
+ };
+ 
+ 
+ 
+ 
+ 
+ static const int INIT_IPR_I[1][1][2][2] =
+ {
+   //----- model 0 -----
+   {
+     { { 13,  41} , {   3,  62} }
+   }
+ };
+ static const int INIT_IPR_P[3][1][2][2] =
+ {
+   //----- model 0 -----
+   {
+     { { 13,  41} , {   3,  62} }
+   },
+   //----- model 1 -----
+   {
+     { { 13,  41} , {   3,  62} }
+   },
+   //----- model 2 -----
+   {
+     { { 13,  41} , {   3,  62} }
+   }
+ };
+ 
+ 
+ 
+ 
+ 
+ static const int INIT_CIPR_I[1][1][4][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  -9,  83} , {   4,  86} , {   0,  97} , {  -7,  72} }
+   }
+ };
+ static const int INIT_CIPR_P[3][1][4][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  -9,  83} , {   4,  86} , {   0,  97} , {  -7,  72} }
+   },
+   //----- model 1 -----
+   {
+     { {  -9,  83} , {   4,  86} , {   0,  97} , {  -7,  72} }
+   },
+   //----- model 2 -----
+   {
+     { {  -9,  83} , {   4,  86} , {   0,  97} , {  -7,  72} }
+   }
+ };
+ 
+ 
+ 
+ 
+ 
+ 
+ static const int INIT_CBP_I[1][3][4][2] =
+ {
+   //----- model 0 -----
+   {
+     { { -17, 127} , { -13, 102} , {   0,  82} , {  -7,  74} },
+     { { -21, 107} , { -27, 127} , { -31, 127} , { -24, 127} },
+     { { -18,  95} , { -27, 127} , { -21, 114} , { -30, 127} }
+   }
+ };
+ static const int INIT_CBP_P[3][3][4][2] =
+ {
+   //----- model 0 -----
+   {
+     { { -27, 126} , { -28,  98} , { -25, 101} , { -23,  67} },
+     { { -28,  82} , { -20,  94} , { -16,  83} , { -22, 110} },
+     { { -21,  91} , { -18, 102} , { -13,  93} , { -29, 127} }
+   },
+   //----- model 1 -----
+   {
+     { { -39, 127} , { -18,  91} , { -17,  96} , { -26,  81} },
+     { { -35,  98} , { -24, 102} , { -23,  97} , { -27, 119} },
+     { { -24,  99} , { -21, 110} , { -18, 102} , { -36, 127} }
+   },
+   //----- model 2 -----
+   {
+     { { -36, 127} , { -17,  91} , { -14,  95} , { -25,  84} },
+     { { -25,  86} , { -12,  89} , { -17,  91} , { -31, 127} },
+     { { -14,  76} , { -18, 103} , { -13,  90} , { -37, 127} }
+   }
+ };
+ 
+ 
+ 
+ 
+ 
+ static const int INIT_BCBP_I[1][8][4][2] =
+ {
+   //----- model 0 -----
+   {
+     { { -17, 123} , { -12, 115} , { -16, 122} , { -11, 115} },
+     { { -12,  63} , {  -2,  68} , { -15,  84} , { -13, 104} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  -3,  70} , {  -8,  93} , { -10,  90} , { -30, 127} },
+     { {  -1,  74} , {  -6,  97} , {  -7,  91} , { -20, 127} },
+     { {  -4,  56} , {  -5,  82} , {  -7,  76} , { -22, 125} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   }
+ };
+ static const int INIT_BCBP_P[3][8][4][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  -7,  92} , {  -5,  89} , {  -7,  96} , { -13, 108} },
+     { {  -3,  46} , {  -1,  65} , {  -1,  57} , {  -9,  93} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  -3,  74} , {  -9,  92} , {  -8,  87} , { -23, 126} },
+     { {   5,  54} , {   6,  60} , {   6,  59} , {   6,  69} },
+     { {  -1,  48} , {   0,  68} , {  -4,  69} , {  -8,  88} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   },
+   //----- model 1 -----
+   {
+     { {   0,  80} , {  -5,  89} , {  -7,  94} , {  -4,  92} },
+     { {   0,  39} , {   0,  65} , { -15,  84} , { -35, 127} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  -2,  73} , { -12, 104} , {  -9,  91} , { -31, 127} },
+     { {   3,  55} , {   7,  56} , {   7,  55} , {   8,  61} },
+     { {  -3,  53} , {   0,  68} , {  -7,  74} , {  -9,  88} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   },
+   //----- model 2 -----
+   {
+     { {  11,  80} , {   5,  76} , {   2,  84} , {   5,  78} },
+     { {  -6,  55} , {   4,  61} , { -14,  83} , { -37, 127} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  -5,  79} , { -11, 104} , { -11,  91} , { -30, 127} },
+     { {   0,  65} , {  -2,  79} , {   0,  72} , {  -4,  92} },
+     { {  -6,  56} , {   3,  68} , {  -8,  71} , { -13,  98} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   }
+ };
+ 
+ 
+ 
+ 
+ 
+ static const int INIT_MAP_I[1][8][15][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  -7,  93} , { -11,  87} , {  -3,  77} , {  -5,  71} , {  -4,  63} , {  -4,  68} , { -12,  84} , {  -7,  62} , {  -7,  65} , {   8,  61} , {   5,  56} , {  -2,  66} , {   1,  64} , {   0,  61} , {  -2,  78} },
+     {  CTX_UNUSED , {   1,  50} , {   7,  52} , {  10,  35} , {   0,  44} , {  11,  38} , {   1,  45} , {   0,  46} , {   5,  44} , {  31,  17} , {   1,  51} , {   7,  50} , {  28,  19} , {  16,  33} , {  14,  62} },
+     {  { -17, 120} , { -20, 112} , { -18, 114} , { -11,  85} , { -15,  92} , { -14,  89} , { -26,  71} , { -15,  81} , { -14,  80} , {   0,  68} , { -14,  70} , { -24,  56} , { -23,  68} , { -24,  50} , { -11,  74} },
+ //    { {  -1,  73} , {  -7,  73} , {  -6,  76} , {  -7,  71} , {  -9,  72} , {  -5,  65} , { -14,  83} , {  -8,  72} , { -10,  75} , {  -5,  64} , {  -4,  59} , { -13,  79} , {  -9,  69} , {  -8,  66} , {   3,  55} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { { -13, 108} , { -15, 100} , { -13, 101} , { -13,  91} , { -12,  94} , { -10,  88} , { -16,  84} , { -10,  86} , {  -7,  83} , { -13,  87} , { -19,  94} , {   1,  70} , {   0,  72} , {  -5,  74} , {  18,  59} },
+     { {  -8, 102} , { -15, 100} , {   0,  95} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {  -4,  75} , {   2,  72} , { -11,  75} , {  -3,  71} , {  15,  46} , { -13,  69} , {   0,  62} , {   0,  65} , {  21,  37} , { -15,  72} , {   9,  57} , {  16,  54} , {   0,  62} , {  12,  72} }
+   }
+ };
+ static const int INIT_MAP_P[3][8][15][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  -2,  85} , {  -6,  78} , {  -1,  75} , {  -7,  77} , {   2,  54} , {   5,  50} , {  -3,  68} , {   1,  50} , {   6,  42} , {  -4,  81} , {   1,  63} , {  -4,  70} , {   0,  67} , {   2,  57} , {  -2,  76} },
+     {  CTX_UNUSED , {  11,  35} , {   4,  64} , {   1,  61} , {  11,  35} , {  18,  25} , {  12,  24} , {  13,  29} , {  13,  36} , { -10,  93} , {  -7,  73} , {  -2,  73} , {  13,  46} , {   9,  49} , {  -7, 100} },
+     {  {  -4,  79} , {  -7,  71} , {  -5,  69} , {  -9,  70} , {  -8,  66} , { -10,  68} , { -19,  73} , { -12,  69} , { -16,  70} , { -15,  67} , { -20,  62} , { -19,  70} , { -16,  66} , { -22,  65} , { -20,  63} },
+ //    { {  -4,  60} , {  -3,  49} , {  -2,  50} , {  -4,  49} , {  -5,  48} , {  -2,  46} , {  -7,  54} , {  -1,  45} , {  -4,  49} , {   4,  39} , {   0,  42} , {   2,  43} , {   0,  44} , {   5,  32} , {  15,  30}  },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {   9,  53} , {   2,  53} , {   5,  53} , {  -2,  61} , {   0,  56} , {   0,  56} , { -13,  63} , {  -5,  60} , {  -1,  62} , {   4,  57} , {  -6,  69} , {   4,  57} , {  14,  39} , {   4,  51} , {  13,  68} },
+     { {   3,  64} , {   1,  61} , {   9,  63} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {   7,  50} , {  16,  39} , {   5,  44} , {   4,  52} , {  11,  48} , {  -5,  60} , {  -1,  59} , {   0,  59} , {  22,  33} , {   5,  44} , {  14,  43} , {  -1,  78} , {   0,  60} , {   9,  69} }
+   },
+   //----- model 1 -----
+   {
+     { { -13, 103} , { -13,  91} , {  -9,  89} , { -14,  92} , {  -8,  76} , { -12,  87} , { -23, 110} , { -24, 105} , { -10,  78} , { -20, 112} , { -17,  99} , { -78, 127} , { -70, 127} , { -50, 127} , { -46, 127} },
+     {  CTX_UNUSED , {  -4,  66} , {  -5,  78} , {  -4,  71} , {  -8,  72} , {   2,  59} , {  -1,  55} , {  -7,  70} , {  -6,  75} , {  -8,  89} , { -34, 119} , {  -3,  75} , {  32,  20} , {  30,  22} , { -44, 127} },
+     {  {  -5,  85} , {  -6,  81} , { -10,  77} , {  -7,  81} , { -17,  80} , { -18,  73} , {  -4,  74} , { -10,  83} , {  -9,  71} , {  -9,  67} , {  -1,  61} , {  -8,  66} , { -14,  66} , {   0,  59} , {   2,  59} },
+ //    { {  -4,  60} , {  -3,  49} , {  -2,  50} , {  -4,  49} , {  -5,  48} , {  -2,  46} , {  -7,  54} , {  -1,  45} , {  -4,  49} , {   4,  39} , {   0,  42} , {   2,  43} , {   0,  44} , {   5,  32} , {  15,  30}  },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {   0,  54} , {  -5,  61} , {   0,  58} , {  -1,  60} , {  -3,  61} , {  -8,  67} , { -25,  84} , { -14,  74} , {  -5,  65} , {   5,  52} , {   2,  57} , {   0,  61} , {  -9,  69} , { -11,  70} , {  18,  55} },
+     { {  -4,  71} , {   0,  58} , {   7,  61} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {   9,  41} , {  18,  25} , {   9,  32} , {   5,  43} , {   9,  47} , {   0,  44} , {   0,  51} , {   2,  46} , {  19,  38} , {  -4,  66} , {  15,  38} , {  12,  42} , {   9,  34} , {   0,  89} }
+   },
+   //----- model 2 -----
+   {
+     { {  -4,  86} , { -12,  88} , {  -5,  82} , {  -3,  72} , {  -4,  67} , {  -8,  72} , { -16,  89} , {  -9,  69} , {  -1,  59} , {   5,  66} , {   4,  57} , {  -4,  71} , {  -2,  71} , {   2,  58} , {  -1,  74} },
+     {  CTX_UNUSED , {  -4,  44} , {  -1,  69} , {   0,  62} , {  -7,  51} , {  -4,  47} , {  -6,  42} , {  -3,  41} , {  -6,  53} , {   8,  76} , {  -9,  78} , { -11,  83} , {   9,  52} , {   0,  67} , {  -5,  90} },
+     {  {  -3,  78} , {  -8,  74} , {  -9,  72} , { -10,  72} , { -18,  75} , { -12,  71} , { -11,  63} , {  -5,  70} , { -17,  75} , { -14,  72} , { -16,  67} , {  -8,  53} , { -14,  59} , {  -9,  52} , { -11,  68} },
+ //    { {  -4,  60} , {  -3,  49} , {  -2,  50} , {  -4,  49} , {  -5,  48} , {  -2,  46} , {  -7,  54} , {  -1,  45} , {  -4,  49} , {   4,  39} , {   0,  42} , {   2,  43} , {   0,  44} , {   5,  32} , {  15,  30}  },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {   1,  67} , { -15,  72} , {  -5,  75} , {  -8,  80} , { -21,  83} , { -21,  64} , { -13,  31} , { -25,  64} , { -29,  94} , {   9,  75} , {  17,  63} , {  -8,  74} , {  -5,  35} , {  -2,  27} , {  13,  91} },
+     { {   3,  65} , {  -7,  69} , {   8,  77} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , { -10,  66} , {   3,  62} , {  -3,  68} , { -20,  81} , {   0,  30} , {   1,   7} , {  -3,  23} , { -21,  74} , {  16,  66} , { -23, 124} , {  17,  37} , {  44, -18} , {  50, -34} , { -22, 127} }
+   }
+ };
+ 
+ 
+ 
+ 
+ static const int INIT_LAST_I[1][8][15][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  24,   0} , {  15,   9} , {   8,  25} , {  13,  18} , {  15,   9} , {  13,  19} , {  10,  37} , {  12,  18} , {   6,  29} , {  20,  33} , {  15,  30} , {   4,  45} , {   1,  58} , {   0,  62} , {   7,  61} },
+     {  CTX_UNUSED , {  12,  38} , {  11,  45} , {  15,  39} , {  11,  42} , {  13,  44} , {  16,  45} , {  12,  41} , {  10,  49} , {  30,  34} , {  18,  42} , {  10,  55} , {  17,  51} , {  17,  46} , {   0,  89} },
+     {  {  23, -13} , {  26, -13} , {  40, -15} , {  49, -14} , {  44,   3} , {  45,   6} , {  44,  34} , {  33,  54} , {  19,  82} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+ //    { {  12,  33} , {   5,  38} , {   9,  34} , {  18,  22} , {  19,  22} , {  23,  19} , {  26,  16} , {  14,  44} , {  40,  14} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  26, -19} , {  22, -17} , {  26, -17} , {  30, -25} , {  28, -20} , {  33, -23} , {  37, -27} , {  33, -23} , {  40, -28} , {  38, -17} , {  33, -11} , {  40, -15} , {  41,  -6} , {  38,   1} , {  41,  17} },
+     { {  30,  -6} , {  27,   3} , {  26,  22} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {  37, -16} , {  35,  -4} , {  38,  -8} , {  38,  -3} , {  37,   3} , {  38,   5} , {  42,   0} , {  35,  16} , {  39,  22} , {  14,  48} , {  27,  37} , {  21,  60} , {  12,  68} , {   2,  97} }
+   }
+ };
+ static const int INIT_LAST_P[3][8][15][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  11,  28} , {   2,  40} , {   3,  44} , {   0,  49} , {   0,  46} , {   2,  44} , {   2,  51} , {   0,  47} , {   4,  39} , {   2,  62} , {   6,  46} , {   0,  54} , {   3,  54} , {   2,  58} , {   4,  63} },
+     {  CTX_UNUSED , {   6,  51} , {   6,  57} , {   7,  53} , {   6,  52} , {   6,  55} , {  11,  45} , {  14,  36} , {   8,  53} , {  -1,  82} , {   7,  55} , {  -3,  78} , {  15,  46} , {  22,  31} , {  -1,  84} },
+     {  {   9,  -2} , {  26,  -9} , {  33,  -9} , {  39,  -7} , {  41,  -2} , {  45,   3} , {  49,   9} , {  45,  27} , {  36,  59} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+ //    { {  17,  27} , {  23,  13} , {  24,  16} , {  22,  25} , {  23,  27} , {  23,  32} , {  17,  43} , {  17,  49} , {   2,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  25,   7} , {  30,  -7} , {  28,   3} , {  28,   4} , {  32,   0} , {  34,  -1} , {  30,   6} , {  30,   6} , {  32,   9} , {  31,  19} , {  26,  27} , {  26,  30} , {  37,  20} , {  28,  34} , {  17,  70} },
+     { {   1,  67} , {   5,  59} , {   9,  67} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {  16,  30} , {  18,  32} , {  18,  35} , {  22,  29} , {  24,  31} , {  23,  38} , {  18,  43} , {  20,  41} , {  11,  63} , {   9,  59} , {   9,  64} , {  -1,  94} , {  -2,  89} , {  -9, 108} }
+   },
+   //----- model 1 -----
+   {
+     { {   4,  45} , {  10,  28} , {  10,  31} , {  33, -11} , {  52, -43} , {  18,  15} , {  28,   0} , {  35, -22} , {  38, -25} , {  34,   0} , {  39, -18} , {  32, -12} , { 102, -94} , {   0,   0} , {  56, -15} },
+     {  CTX_UNUSED , {  33,  -4} , {  29,  10} , {  37,  -5} , {  51, -29} , {  39,  -9} , {  52, -34} , {  69, -58} , {  67, -63} , {  44,  -5} , {  32,   7} , {  55, -29} , {  32,   1} , {   0,   0} , {  27,  36} },
+     {  {  17, -10} , {  32, -13} , {  42,  -9} , {  49,  -5} , {  53,   0} , {  64,   3} , {  68,  10} , {  66,  27} , {  47,  57} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+ //    { {  17,  27} , {  23,  13} , {  24,  16} , {  22,  25} , {  23,  27} , {  23,  32} , {  17,  43} , {  17,  49} , {   2,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  33, -25} , {  34, -30} , {  36, -28} , {  38, -28} , {  38, -27} , {  34, -18} , {  35, -16} , {  34, -14} , {  32,  -8} , {  37,  -6} , {  35,   0} , {  30,  10} , {  28,  18} , {  26,  25} , {  29,  41} },
+     { {   0,  75} , {   2,  72} , {   8,  77} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {  14,  35} , {  18,  31} , {  17,  35} , {  21,  30} , {  17,  45} , {  20,  42} , {  18,  45} , {  27,  26} , {  16,  54} , {   7,  66} , {  16,  56} , {  11,  73} , {  10,  67} , { -10, 116} }
+   },
+   //----- model 2 -----
+   {
+     { {   4,  39} , {   0,  42} , {   7,  34} , {  11,  29} , {   8,  31} , {   6,  37} , {   7,  42} , {   3,  40} , {   8,  33} , {  13,  43} , {  13,  36} , {   4,  47} , {   3,  55} , {   2,  58} , {   6,  60} },
+     {  CTX_UNUSED , {   8,  44} , {  11,  44} , {  14,  42} , {   7,  48} , {   4,  56} , {   4,  52} , {  13,  37} , {   9,  49} , {  19,  58} , {  10,  48} , {  12,  45} , {   0,  69} , {  20,  33} , {   8,  63} },
+     {  {   9,  -2} , {  30, -10} , {  31,  -4} , {  33,  -1} , {  33,   7} , {  31,  12} , {  37,  23} , {  31,  38} , {  20,  64} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+ //    { {  17,  27} , {  23,  13} , {  24,  16} , {  22,  25} , {  23,  27} , {  23,  32} , {  17,  43} , {  17,  49} , {   2,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  35, -18} , {  33, -25} , {  28,  -3} , {  24,  10} , {  27,   0} , {  34, -14} , {  52, -44} , {  39, -24} , {  19,  17} , {  31,  25} , {  36,  29} , {  24,  33} , {  34,  15} , {  30,  20} , {  22,  73} },
+     { {  20,  34} , {  19,  31} , {  27,  44} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {  19,  16} , {  15,  36} , {  15,  36} , {  21,  28} , {  25,  21} , {  30,  20} , {  31,  12} , {  27,  16} , {  24,  42} , {   0,  93} , {  14,  56} , {  15,  57} , {  26,  38} , { -24, 127} }
+   }
+ };
+ 
+ 
+ 
+ 
+ 
+ static const int INIT_ONE_I[1][8][5][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  -3,  71} , {  -6,  42} , {  -5,  50} , {  -3,  54} , {  -2,  62} },
+     { {  -5,  67} , {  -5,  27} , {  -3,  39} , {  -2,  44} , {   0,  46} },
+     {  {  -3,  75} , {  -1,  23} , {   1,  34} , {   1,  43} , {   0,  54} },
+ //    { {  -9,  75} , {  -1,  44} , {  -2,  49} , {  -2,  51} , {  -1,  51} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { { -12,  92} , { -15,  55} , { -10,  60} , {  -6,  62} , {  -4,  65} },
+     { { -11,  97} , { -20,  84} , { -11,  79} , {  -6,  73} , {  -4,  74} },
+     { {  -8,  78} , {  -5,  33} , {  -4,  48} , {  -2,  53} , {  -3,  62} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   }
+ };
+ static const int INIT_ONE_P[3][8][5][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  -6,  76} , {  -2,  44} , {   0,  45} , {   0,  52} , {  -3,  64} },
+     { {  -9,  77} , {   3,  24} , {   0,  42} , {   0,  48} , {   0,  55} },
+     {  {  -6,  66} , {  -7,  35} , {  -7,  42} , {  -8,  45} , {  -5,  48} },
+ //    { {  -3,  58} , {  -1,  28} , {   0,  29} , {   2,  30} , {   1,  35} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {   1,  58} , {  -3,  29} , {  -1,  36} , {   1,  38} , {   2,  43} },
+     { {   0,  70} , {  -4,  29} , {   5,  31} , {   7,  42} , {   1,  59} },
+     { {   0,  58} , {   8,   5} , {  10,  14} , {  14,  18} , {  13,  27} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   },
+   //----- model 1 -----
+   {
+     { { -23, 112} , { -15,  71} , {  -7,  61} , {   0,  53} , {  -5,  66} },
+     { { -21, 101} , {  -3,  39} , {  -5,  53} , {  -7,  61} , { -11,  75} },
+     {  {  -5,  71} , {   0,  24} , {  -1,  36} , {  -2,  42} , {  -2,  52} },
+ //    { {  -3,  58} , {  -1,  28} , {   0,  29} , {   2,  30} , {   1,  35} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { { -11,  76} , { -10,  44} , { -10,  52} , { -10,  57} , {  -9,  58} },
+     { {   2,  66} , {  -9,  34} , {   1,  32} , {  11,  31} , {   5,  52} },
+     { {   3,  52} , {   7,   4} , {  10,   8} , {  17,   8} , {  16,  19} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   },
+   //----- model 2 -----
+   {
+     { { -24, 115} , { -22,  82} , {  -9,  62} , {   0,  53} , {   0,  59} },
+     { { -21, 100} , { -14,  57} , { -12,  67} , { -11,  71} , { -10,  77} },
+     {  {  -9,  71} , {  -7,  37} , {  -8,  44} , { -11,  49} , { -10,  56} },
+ //    { {  -3,  58} , {  -1,  28} , {   0,  29} , {   2,  30} , {   1,  35} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { { -10,  82} , {  -8,  48} , {  -8,  61} , {  -8,  66} , {  -7,  70} },
+     { {  -4,  79} , { -22,  69} , { -16,  75} , {  -2,  58} , {   1,  58} },
+     { { -13,  81} , {  -6,  38} , { -13,  62} , {  -6,  58} , {  -2,  59} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   }
+ };
+ 
+ 
+ 
+ 
+ 
+ static const int INIT_ABS_I[1][8][5][2] =
+ {
+   //----- model 0 -----
+   {
+     { {   0,  58} , {   1,  63} , {  -2,  72} , {  -1,  74} , {  -9,  91} },
+     { { -16,  64} , {  -8,  68} , { -10,  78} , {  -6,  77} , { -10,  86} },
+     {  {  -2,  55} , {   0,  61} , {   1,  64} , {   0,  68} , {  -9,  92} },
+ //    { {  -4,  56} , {  -1,  59} , {  -6,  71} , {  -8,  74} , { -11,  85} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { { -12,  73} , {  -8,  76} , {  -7,  80} , {  -9,  88} , { -17, 110} },
+     { { -13,  86} , { -13,  96} , { -11,  97} , { -19, 117} ,  CTX_UNUSED },
+     { { -13,  71} , { -10,  79} , { -12,  86} , { -13,  90} , { -14,  97} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   }
+ };
+ static const int INIT_ABS_P[3][8][5][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  -2,  59} , {  -4,  70} , {  -4,  75} , {  -8,  82} , { -17, 102} },
+     { {  -6,  59} , {  -7,  71} , { -12,  83} , { -11,  87} , { -30, 119} },
+     {  { -12,  56} , {  -6,  60} , {  -5,  62} , {  -8,  66} , {  -8,  76} },
+ //    { {  -7,  54} , {  -2,  58} , {  -4,  63} , {  -5,  66} , {   1,  64} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  -6,  55} , {   0,  58} , {   0,  64} , {  -3,  74} , { -10,  90} },
+     { {  -2,  58} , {  -3,  72} , {  -3,  81} , { -11,  97} ,  CTX_UNUSED },
+     { {   2,  40} , {   0,  58} , {  -3,  70} , {  -6,  79} , {  -8,  85} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   },
+   //----- model 1 -----
+   {
+     { { -11,  77} , {  -9,  80} , {  -9,  84} , { -10,  87} , { -34, 127} },
+     { { -15,  77} , { -17,  91} , { -25, 107} , { -25, 111} , { -28, 122} },
+     {  {  -9,  57} , {  -6,  63} , {  -4,  65} , {  -4,  67} , {  -7,  82} },
+ //    { {  -7,  54} , {  -2,  58} , {  -4,  63} , {  -5,  66} , {   1,  64} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { { -16,  72} , {  -7,  69} , {  -4,  69} , {  -5,  74} , {  -9,  86} },
+     { {  -2,  55} , {  -2,  67} , {   0,  73} , {  -8,  89} ,  CTX_UNUSED },
+     { {   3,  37} , {  -1,  61} , {  -5,  73} , {  -1,  70} , {  -4,  78} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   },
+   //----- model 2 -----
+   {
+     { { -14,  85} , { -13,  89} , { -13,  94} , { -11,  92} , { -29, 127} },
+     { { -21,  85} , { -16,  88} , { -23, 104} , { -15,  98} , { -37, 127} },
+     {  { -12,  59} , {  -8,  63} , {  -9,  67} , {  -6,  68} , { -10,  79} },
+ //    { {  -7,  54} , {  -2,  58} , {  -4,  63} , {  -5,  66} , {   1,  64} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { { -14,  75} , { -10,  79} , {  -9,  83} , { -12,  92} , { -18, 108} },
+     { { -13,  78} , {  -9,  83} , {  -4,  81} , { -13,  99} ,  CTX_UNUSED },
+     { { -16,  73} , { -10,  76} , { -13,  86} , {  -9,  83} , { -10,  87} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED }
+   }
+ };
+ 
+ 
+ 
+ 
+ 
+ static const int INIT_FLD_MAP_I[1][8][15][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  -6,  93} , {  -6,  84} , {  -8,  79} , {   0,  66} , {  -1,  71} , {   0,  62} , {  -2,  60} , {  -2,  59} , {  -5,  75} , {  -3,  62} , {  -4,  58} , {  -9,  66} , {  -1,  79} , {   0,  71} , {   3,  68} },
+     {  CTX_UNUSED , {  10,  44} , {  -7,  62} , {  15,  36} , {  14,  40} , {  16,  27} , {  12,  29} , {   1,  44} , {  20,  36} , {  18,  32} , {   5,  42} , {   1,  48} , {  10,  62} , {  17,  46} , {   9,  64} },
+     {  { -14, 106} , { -13,  97} , { -15,  90} , { -12,  90} , { -18,  88} , { -10,  73} , {  -9,  79} , { -14,  86} , { -10,  73} , { -10,  70} , { -10,  69} , {  -5,  66} , {  -9,  64} , {  -5,  58} , {   2,  59} },
+ //    { {  -1,  73} , {  -7,  73} , {  -6,  76} , {  -7,  71} , {  -9,  72} , {  -5,  65} , { -14,  83} , {  -8,  72} , { -10,  75} , {  -5,  64} , {  -4,  59} , { -13,  79} , {  -9,  69} , {  -8,  66} , {   3,  55} },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { { -12, 104} , { -11,  97} , { -16,  96} , {  -7,  88} , {  -8,  85} , {  -7,  85} , {  -9,  85} , { -13,  88} , {   4,  66} , {  -3,  77} , {  -3,  76} , {  -6,  76} , {  10,  58} , {  -1,  76} , {  -1,  83} },
+     { {  -7,  99} , { -14,  95} , {   2,  95} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {   0,  76} , {  -5,  74} , {   0,  70} , { -11,  75} , {   1,  68} , {   0,  65} , { -14,  73} , {   3,  62} , {   4,  62} , {  -1,  68} , { -13,  75} , {  11,  55} , {   5,  64} , {  12,  70} }
+   }
+ };
+ static const int INIT_FLD_MAP_P[3][8][15][2] =
+ {
+   //----- model 0 -----
+   {
+     { { -13, 106} , { -16, 106} , { -10,  87} , { -21, 114} , { -18, 110} , { -14,  98} , { -22, 110} , { -21, 106} , { -18, 103} , { -21, 107} , { -23, 108} , { -26, 112} , { -10,  96} , { -12,  95} , {  -5,  91} },
+     {  CTX_UNUSED , {  -9,  93} , { -22,  94} , {  -5,  86} , {   9,  67} , {  -4,  80} , { -10,  85} , {  -1,  70} , {   7,  60} , {   9,  58} , {   5,  61} , {  12,  50} , {  15,  50} , {  18,  49} , {  17,  54} },
+     {  {  -5,  85} , {  -6,  81} , { -10,  77} , {  -7,  81} , { -17,  80} , { -18,  73} , {  -4,  74} , { -10,  83} , {  -9,  71} , {  -9,  67} , {  -1,  61} , {  -8,  66} , { -14,  66} , {   0,  59} , {   2,  59} },
+ //    { {  -4,  60} , {  -3,  49} , {  -2,  50} , {  -4,  49} , {  -5,  48} , {  -2,  46} , {  -7,  54} , {  -1,  45} , {  -4,  49} , {   4,  39} , {   0,  42} , {   2,  43} , {   0,  44} , {   5,  32} , {  15,  30}  },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  10,  41} , {   7,  46} , {  -1,  51} , {   7,  49} , {   8,  52} , {   9,  41} , {   6,  47} , {   2,  55} , {  13,  41} , {  10,  44} , {   6,  50} , {   5,  53} , {  13,  49} , {   4,  63} , {   6,  64} },
+     { {  -2,  69} , {  -2,  59} , {   6,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {  10,  44} , {   9,  31} , {  12,  43} , {   3,  53} , {  14,  34} , {  10,  38} , {  -3,  52} , {  13,  40} , {  17,  32} , {   7,  44} , {   7,  38} , {  13,  50} , {  10,  57} , {  26,  43} }
+   },
+   //----- model 1 -----
+   {
+     { { -21, 126} , { -23, 124} , { -20, 110} , { -26, 126} , { -25, 124} , { -17, 105} , { -27, 121} , { -27, 117} , { -17, 102} , { -26, 117} , { -27, 116} , { -33, 122} , { -10,  95} , { -14, 100} , {  -8,  95} },
+     {  CTX_UNUSED , { -17, 111} , { -28, 114} , {  -6,  89} , {  -2,  80} , {  -4,  82} , {  -9,  85} , {  -8,  81} , {  -1,  72} , {   5,  64} , {   1,  67} , {   9,  56} , {   0,  69} , {   1,  69} , {   7,  69} },
+     {  {  -3,  81} , {  -3,  76} , {  -7,  72} , {  -6,  78} , { -12,  72} , { -14,  68} , {  -3,  70} , {  -6,  76} , {  -5,  66} , {  -5,  62} , {   0,  57} , {  -4,  61} , {  -9,  60} , {   1,  54} , {   2,  58} },
+ //    { {  -4,  60} , {  -3,  49} , {  -2,  50} , {  -4,  49} , {  -5,  48} , {  -2,  46} , {  -7,  54} , {  -1,  45} , {  -4,  49} , {   4,  39} , {   0,  42} , {   2,  43} , {   0,  44} , {   5,  32} , {  15,  30}  },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  -7,  69} , {  -6,  67} , { -16,  77} , {  -2,  64} , {   2,  61} , {  -6,  67} , {  -3,  64} , {   2,  57} , {  -3,  65} , {  -3,  66} , {   0,  62} , {   9,  51} , {  -1,  66} , {  -2,  71} , {  -2,  75} },
+     { {  -1,  70} , {  -9,  72} , {  14,  60} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {  16,  37} , {   0,  47} , {  18,  35} , {  11,  37} , {  12,  41} , {  10,  41} , {   2,  48} , {  12,  41} , {  13,  41} , {   0,  59} , {   3,  50} , {  19,  40} , {   3,  66} , {  18,  50} }
+   },
+   //----- model 2 -----
+   {
+     { { -22, 127} , { -25, 127} , { -25, 120} , { -27, 127} , { -19, 114} , { -23, 117} , { -25, 118} , { -26, 117} , { -24, 113} , { -28, 118} , { -31, 120} , { -37, 124} , { -10,  94} , { -15, 102} , { -10,  99} },
+     {  CTX_UNUSED , { -13, 106} , { -50, 127} , {  -5,  92} , {  17,  57} , {  -5,  86} , { -13,  94} , { -12,  91} , {  -2,  77} , {   0,  71} , {  -1,  73} , {   4,  64} , {  -7,  81} , {   5,  64} , {  15,  57} },
+     {  {  -3,  78} , {  -8,  74} , {  -9,  72} , { -10,  72} , { -18,  75} , { -12,  71} , { -11,  63} , {  -5,  70} , { -17,  75} , { -14,  72} , { -16,  67} , {  -8,  53} , { -14,  59} , {  -9,  52} , { -11,  68} },
+ //    { {  -4,  60} , {  -3,  49} , {  -2,  50} , {  -4,  49} , {  -5,  48} , {  -2,  46} , {  -7,  54} , {  -1,  45} , {  -4,  49} , {   4,  39} , {   0,  42} , {   2,  43} , {   0,  44} , {   5,  32} , {  15,  30}  },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {   1,  67} , {   0,  68} , { -10,  67} , {   1,  68} , {   0,  77} , {   2,  64} , {   0,  68} , {  -5,  78} , {   7,  55} , {   5,  59} , {   2,  65} , {  14,  54} , {  15,  44} , {   5,  60} , {   2,  70} },
+     { {  -2,  76} , { -18,  86} , {  12,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {   5,  64} , { -12,  70} , {  11,  55} , {   5,  56} , {   0,  69} , {   2,  65} , {  -6,  74} , {   5,  54} , {   7,  54} , {  -6,  76} , { -11,  82} , {  -2,  77} , {  -2,  77} , {  25,  42} }
+   }
+ };
+ 
+ 
+ 
+ 
+ 
+ static const int INIT_FLD_LAST_I[1][8][15][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  15,   6} , {   6,  19} , {   7,  16} , {  12,  14} , {  18,  13} , {  13,  11} , {  13,  15} , {  15,  16} , {  12,  23} , {  13,  23} , {  15,  20} , {  14,  26} , {  14,  44} , {  17,  40} , {  17,  47} },
+     {  CTX_UNUSED , {  24,  17} , {  21,  21} , {  25,  22} , {  31,  27} , {  22,  29} , {  19,  35} , {  14,  50} , {  10,  57} , {   7,  63} , {  -2,  77} , {  -4,  82} , {  -3,  94} , {   9,  69} , { -12, 109} },
+     {  {  21, -10} , {  24, -11} , {  28,  -8} , {  28,  -1} , {  29,   3} , {  29,   9} , {  35,  20} , {  29,  36} , {  14,  67} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+ //    { {  12,  33} , {   5,  38} , {   9,  34} , {  18,  22} , {  19,  22} , {  23,  19} , {  26,  16} , {  14,  44} , {  40,  14} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  36, -35} , {  36, -34} , {  32, -26} , {  37, -30} , {  44, -32} , {  34, -18} , {  34, -15} , {  40, -15} , {  33,  -7} , {  35,  -5} , {  33,   0} , {  38,   2} , {  33,  13} , {  23,  35} , {  13,  58} },
+     { {  29,  -3} , {  26,   0} , {  22,  30} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {  31,  -7} , {  35, -15} , {  34,  -3} , {  34,   3} , {  36,  -1} , {  34,   5} , {  32,  11} , {  35,   5} , {  34,  12} , {  39,  11} , {  30,  29} , {  34,  26} , {  29,  39} , {  19,  66} }
+   }
+ };
+ static const int INIT_FLD_LAST_P[3][8][15][2] =
+ {
+   //----- model 0 -----
+   {
+     { {  14,  11} , {  11,  14} , {   9,  11} , {  18,  11} , {  21,   9} , {  23,  -2} , {  32, -15} , {  32, -15} , {  34, -21} , {  39, -23} , {  42, -33} , {  41, -31} , {  46, -28} , {  38, -12} , {  21,  29} },
+     {  CTX_UNUSED , {  45, -24} , {  53, -45} , {  48, -26} , {  65, -43} , {  43, -19} , {  39, -10} , {  30,   9} , {  18,  26} , {  20,  27} , {   0,  57} , { -14,  82} , {  -5,  75} , { -19,  97} , { -35, 125} },
+     {  {  21, -13} , {  33, -14} , {  39,  -7} , {  46,  -2} , {  51,   2} , {  60,   6} , {  61,  17} , {  55,  34} , {  42,  62} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+ //    { {  17,  27} , {  23,  13} , {  24,  16} , {  22,  25} , {  23,  27} , {  23,  32} , {  17,  43} , {  17,  49} , {   2,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  27,   0} , {  28,   0} , {  31,  -4} , {  27,   6} , {  34,   8} , {  30,  10} , {  24,  22} , {  33,  19} , {  22,  32} , {  26,  31} , {  21,  41} , {  26,  44} , {  23,  47} , {  16,  65} , {  14,  71} },
+     { {   8,  60} , {   6,  63} , {  17,  65} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {  21,  24} , {  23,  20} , {  26,  23} , {  27,  32} , {  28,  23} , {  28,  24} , {  23,  40} , {  24,  32} , {  28,  29} , {  23,  42} , {  19,  57} , {  22,  53} , {  22,  61} , {  11,  86} }
+   },
+   //----- model 1 -----
+   {
+     { {  19,  -6} , {  18,  -6} , {  14,   0} , {  26, -12} , {  31, -16} , {  33, -25} , {  33, -22} , {  37, -28} , {  39, -30} , {  42, -30} , {  47, -42} , {  45, -36} , {  49, -34} , {  41, -17} , {  32,   9} },
+     {  CTX_UNUSED , {  69, -71} , {  63, -63} , {  66, -64} , {  77, -74} , {  54, -39} , {  52, -35} , {  41, -10} , {  36,   0} , {  40,  -1} , {  30,  14} , {  28,  26} , {  23,  37} , {  12,  55} , {  11,  65} },
+     {  {  17, -10} , {  32, -13} , {  42,  -9} , {  49,  -5} , {  53,   0} , {  64,   3} , {  68,  10} , {  66,  27} , {  47,  57} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+ //    { {  17,  27} , {  23,  13} , {  24,  16} , {  22,  25} , {  23,  27} , {  23,  32} , {  17,  43} , {  17,  49} , {   2,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  37, -33} , {  39, -36} , {  40, -37} , {  38, -30} , {  46, -33} , {  42, -30} , {  40, -24} , {  49, -29} , {  38, -12} , {  40, -10} , {  38,  -3} , {  46,  -5} , {  31,  20} , {  29,  30} , {  25,  44} },
+     { {  12,  48} , {  11,  49} , {  26,  45} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {  22,  22} , {  23,  22} , {  27,  21} , {  33,  20} , {  26,  28} , {  30,  24} , {  27,  34} , {  18,  42} , {  25,  39} , {  18,  50} , {  12,  70} , {  21,  54} , {  14,  71} , {  11,  83} }
+   },
+   //----- model 2 -----
+   {
+     { {  17, -13} , {  16,  -9} , {  17, -12} , {  27, -21} , {  37, -30} , {  41, -40} , {  42, -41} , {  48, -47} , {  39, -32} , {  46, -40} , {  52, -51} , {  46, -41} , {  52, -39} , {  43, -19} , {  32,  11} },
+     {  CTX_UNUSED , {  61, -55} , {  56, -46} , {  62, -50} , {  81, -67} , {  45, -20} , {  35,  -2} , {  28,  15} , {  34,   1} , {  39,   1} , {  30,  17} , {  20,  38} , {  18,  45} , {  15,  54} , {   0,  79} },
+     {  {   9,  -2} , {  30, -10} , {  31,  -4} , {  33,  -1} , {  33,   7} , {  31,  12} , {  37,  23} , {  31,  38} , {  20,  64} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+ //    { {  17,  27} , {  23,  13} , {  24,  16} , {  22,  25} , {  23,  27} , {  23,  32} , {  17,  43} , {  17,  49} , {   2,  70} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     { {  36, -16} , {  37, -14} , {  37, -17} , {  32,   1} , {  34,  15} , {  29,  15} , {  24,  25} , {  34,  22} , {  31,  16} , {  35,  18} , {  31,  28} , {  33,  41} , {  36,  28} , {  27,  47} , {  21,  62} },
+     { {  18,  31} , {  19,  26} , {  36,  24} ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED ,  CTX_UNUSED },
+     {  CTX_UNUSED , {  24,  23} , {  27,  16} , {  24,  30} , {  31,  29} , {  22,  41} , {  22,  42} , {  16,  60} , {  15,  52} , {  14,  60} , {   3,  78} , { -16, 123} , {  21,  53} , {  22,  56} , {  25,  61} }
+   }
+ };
+ 
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/decoder.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/decoder.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/decoder.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,655 ----
+ 
+ /*! 
+  *************************************************************************************
+  * \file decoder.c
+  *
+  * \brief
+  *    Contains functions that implement the "decoders in the encoder" concept for the
+  *    rate-distortion optimization with losses.
+  * \date
+  *    October 22nd, 2001
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and 
+  *    affiliation details)
+  *    - Dimitrios Kontopodis                    <dkonto at eikon.tum.de>
+  *************************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <memory.h>
+ 
+ #include "global.h"
+ #include "refbuf.h"
+ #include "image.h"
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    decodes one 8x8 partition
+  *
+  * \note
+  *    Gives the expected value in the decoder of one 8x8 block. This is done based on the 
+  *    stored reconstructed residue decs->resY[][], the reconstructed values imgY[][]
+  *    and the motion vectors. The decoded 8x8 block is moved to decs->decY[][].
+  *************************************************************************************
+  */
+ void decode_one_b8block (int decoder, int mbmode, int b8block, int b8mode, int b8ref)
+ {
+   int i,j,block_y,block_x,bx,by;
+   int ref_inx = (IMG_NUMBER-1)%img->num_ref_frames;
+ 
+   int mv[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE];
+   int resY_tmp[MB_BLOCK_SIZE][MB_BLOCK_SIZE];
+ 
+   int i0 = (b8block%2)<<3,   i1 = i0+8,   bx0 = i0>>2,   bx1 = bx0+2;
+   int j0 = (b8block/2)<<3,   j1 = j0+8,   by0 = j0>>2,   by1 = by0+2;
+ 
+   if (img->type==I_SLICE)
+   {
+     for(i=i0;i<i1;i++)
+     for(j=j0;j<j1;j++)
+     {
+       decs->decY[decoder][img->pix_y+j][img->pix_x+i]=enc_picture->imgY[img->pix_y+j][img->pix_x+i];
+     }
+   }
+   else
+   {
+     if (mbmode==0 && (img->type==P_SLICE || (img->type==B_SLICE && img->nal_reference_idc>0)))
+     {
+       for(i=i0;i<i1;i++)
+       for(j=j0;j<j1;j++)
+       {
+         resY_tmp[j][i]=0;
+       }
+       for (by=by0; by<by1; by++)
+       for (bx=bx0; bx<bx1; bx++)
+       {
+         mv[0][by][bx] = mv[1][by][bx] = 0;
+       }
+     }
+     else
+     {
+       if (b8mode>=1 && b8mode<=7)
+       {
+         for (by=by0; by<by1; by++)
+         for (bx=bx0; bx<bx1; bx++)
+         {
+           mv[0][by][bx] = img->all_mv[by][bx][LIST_0][b8ref][b8mode][0];
+           mv[1][by][bx] = img->all_mv[by][bx][LIST_0][b8ref][b8mode][1];
+         }
+       }
+       else
+       {
+         for (by=by0; by<by1; by++)
+         for (bx=bx0; bx<bx1; bx++)
+         {
+           mv[0][by][bx] = mv[1][by][bx] = 0;
+         }
+       }
+           
+       for(i=i0;i<i1;i++)
+       for(j=j0;j<j1;j++)
+       {
+         resY_tmp[j][i]=decs->resY[j][i];
+       }
+     }
+ 
+     // Decode Luminance
+     if ((b8mode>=1 && b8mode<=7) || (mbmode==0 && (img->type==P_SLICE || (img->type==B_SLICE && img->nal_reference_idc>0))))
+     {
+       for (by=by0; by<by1; by++)
+       for (bx=bx0; bx<bx1; bx++)
+       {
+         block_x = img->block_x+bx;
+         block_y = img->block_y+by;
+         if (img->type == B_SLICE && enc_picture != enc_frame_picture)
+           ref_inx = (IMG_NUMBER-b8ref-2)%img->num_ref_frames;
+ 
+         Get_Reference_Block (decs->decref[decoder][ref_inx],
+                              block_y, block_x,
+                              mv[0][by][bx],
+                              mv[1][by][bx],
+                              decs->RefBlock);
+         for (j=0; j<4; j++)
+         for (i=0; i<4; i++)
+         {
+           /*
+           if (decs->RefBlock[j][i] != UMVPelY_14 (mref[ref_inx],
+                                                   (block_y*4+j)*4+mv[1][by][bx],
+                                                   (block_x*4+i)*4+mv[0][by][bx]))
+           ref_inx = (img->number-ref-1)%img->num_ref_frames;
+           */
+           decs->decY[decoder][block_y*4+j][block_x*4+i] = resY_tmp[by*4+j][bx*4+i] + decs->RefBlock[j][i];
+         }
+       }
+     }
+     else 
+     {
+       // Intra Refresh - Assume no spatial prediction
+       for(i=i0;i<i1;i++)
+       for(j=j0;j<j1;j++)
+       {
+         decs->decY[decoder][img->pix_y+j][img->pix_x+i] = enc_picture->imgY[img->pix_y+j][img->pix_x+i];
+       }
+     }
+   }
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    decodes one macroblock
+  *************************************************************************************
+  */
+ void decode_one_mb (int decoder, Macroblock* currMB)
+ {
+   /*
+   decode_one_b8block (decoder, currMB->mb_type, 0, currMB->b8mode[0], refFrArr[img->block_y+0][img->block_x+0]);
+   decode_one_b8block (decoder, currMB->mb_type, 1, currMB->b8mode[1], refFrArr[img->block_y+0][img->block_x+2]);
+   decode_one_b8block (decoder, currMB->mb_type, 2, currMB->b8mode[2], refFrArr[img->block_y+2][img->block_x+0]);
+   decode_one_b8block (decoder, currMB->mb_type, 3, currMB->b8mode[3], refFrArr[img->block_y+2][img->block_x+2]);
+   */
+ }
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Finds the reference MB given the decoded reference frame
+  * \note
+  *    This is based on the function UnifiedOneForthPix, only it is modified to
+  *    be used at the "many decoders in the encoder" RD optimization. In this case
+  *    we dont want to keep full upsampled reference frames for all decoders, so
+  *    we just upsample when it is necessary.
+  * \param imY
+  *    The frame to be upsampled
+  * \param block_y
+  *    The row of the block, whose prediction we want to find
+  * \param block_x
+  *    The column of the block, whose prediction we want to track
+  * \param mvhor
+  *    Motion vector, horizontal part
+  * \param mvver
+  *    Motion vector, vertical part
+  * \param out
+  *    Output: The prediction for the block (block_y, block_x)
+  *************************************************************************************
+  */
+ void Get_Reference_Block(imgpel **imY, 
+                          int block_y, 
+                          int block_x, 
+                          int mvhor, 
+                          int mvver, 
+                          imgpel **out)
+ {
+   int i,j,y,x;
+ 
+   y = block_y * BLOCK_SIZE * 4 + mvver;
+   x = block_x * BLOCK_SIZE * 4 + mvhor;
+ 
+   for (j=0; j<BLOCK_SIZE; j++)
+     for (i=0; i<BLOCK_SIZE; i++)
+       out[j][i] = Get_Reference_Pixel(imY, y+j*4, x+i*4);
+ }
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Finds a pixel (y,x) of the upsampled reference frame
+  * \note
+  *    This is based on the function UnifiedOneForthPix, only it is modified to
+  *    be used at the "many decoders in the encoder" RD optimization. In this case
+  *    we dont want to keep full upsampled reference frames for all decoders, so
+  *    we just upsample when it is necessary.
+  *************************************************************************************
+  */
+ byte Get_Reference_Pixel(imgpel **imY, int y_pos, int x_pos)
+ {
+ 
+   int dx, x;
+   int dy, y;
+   int maxold_x,maxold_y;
+ 
+   int result = 0, result1, result2;
+   int pres_x;
+   int pres_y; 
+ 
+   int tmp_res[6];
+ 
+   static const int COEF[6] = {
+     1, -5, 20, 20, -5, 1
+   };
+ 
+ 
+   dx = x_pos&3;
+   dy = y_pos&3;
+   x_pos = (x_pos-dx)/4;
+   y_pos = (y_pos-dy)/4;
+   maxold_x = img->width-1;
+   maxold_y = img->height-1;
+ 
+   if (dx == 0 && dy == 0) { /* fullpel position */
+     result = imY[max(0,min(maxold_y,y_pos))][max(0,min(maxold_x,x_pos))];
+   }
+   else { /* other positions */
+ 
+     if (dy == 0) {
+ 
+       pres_y = max(0,min(maxold_y,y_pos));
+       for(x=-2;x<4;x++) {
+         pres_x = max(0,min(maxold_x,x_pos+x));
+         result += imY[pres_y][pres_x]*COEF[x+2];
+       }
+ 
+       result = max(0, min(img->max_imgpel_value, (result+16)/32));
+ 
+       if (dx == 1) {
+         result = (result + imY[pres_y][max(0,min(maxold_x,x_pos))])/2;
+       }
+       else if (dx == 3) {
+         result = (result + imY[pres_y][max(0,min(maxold_x,x_pos+1))])/2;
+       }
+     }
+     else if (dx == 0) {
+ 
+       pres_x = max(0,min(maxold_x,x_pos));
+       for(y=-2;y<4;y++) {
+         pres_y = max(0,min(maxold_y,y_pos+y));
+         result += imY[pres_y][pres_x]*COEF[y+2];
+       }
+ 
+       result = max(0, min(img->max_imgpel_value, (result+16)/32));
+ 
+       if (dy == 1) {
+         result = (result + imY[max(0,min(maxold_y,y_pos))][pres_x])/2;
+       }
+       else if (dy == 3) {
+         result = (result + imY[max(0,min(maxold_y,y_pos+1))][pres_x])/2;
+       }
+     }
+     else if (dx == 2) {
+ 
+       for(y=-2;y<4;y++) {
+         result = 0;
+         pres_y = max(0,min(maxold_y,y_pos+y));
+         for(x=-2;x<4;x++) {
+           pres_x = max(0,min(maxold_x,x_pos+x));
+           result += imY[pres_y][pres_x]*COEF[x+2];
+         }
+         tmp_res[y+2] = result;
+       }
+ 
+       result = 0;
+       for(y=-2;y<4;y++) {
+         result += tmp_res[y+2]*COEF[y+2];
+       }
+ 
+       result = max(0, min(img->max_imgpel_value, (result+512)/1024));
+ 
+       if (dy == 1) {
+         result = (result + max(0, min(img->max_imgpel_value, (tmp_res[2]+16)/32)))/2;
+       }
+       else if (dy == 3) {
+         result = (result + max(0, min(img->max_imgpel_value, (tmp_res[3]+16)/32)))/2;
+       }
+     }
+     else if (dy == 2) {
+ 
+       for(x=-2;x<4;x++) {
+         result = 0;
+         pres_x = max(0,min(maxold_x,x_pos+x));
+         for(y=-2;y<4;y++) {
+           pres_y = max(0,min(maxold_y,y_pos+y));
+           result += imY[pres_y][pres_x]*COEF[y+2];
+         }
+         tmp_res[x+2] = result;
+       }
+ 
+       result = 0;
+       for(x=-2;x<4;x++) {
+         result += tmp_res[x+2]*COEF[x+2];
+       }
+ 
+       result = max(0, min(img->max_imgpel_value, (result+512)/1024));
+ 
+       if (dx == 1) {
+         result = (result + max(0, min(img->max_imgpel_value, (tmp_res[2]+16)/32)))/2;
+       }
+       else {
+         result = (result + max(0, min(img->max_imgpel_value, (tmp_res[3]+16)/32)))/2;
+       }
+     }
+     else {
+ 
+       result = 0;
+       pres_y = dy == 1 ? y_pos : y_pos+1;
+       pres_y = max(0,min(maxold_y,pres_y));
+ 
+       for(x=-2;x<4;x++) {
+         pres_x = max(0,min(maxold_x,x_pos+x));
+         result += imY[pres_y][pres_x]*COEF[x+2];
+       }
+ 
+       result1 = max(0, min(img->max_imgpel_value, (result+16)/32));
+ 
+       result = 0;
+       pres_x = dx == 1 ? x_pos : x_pos+1;
+       pres_x = max(0,min(maxold_x,pres_x));
+ 
+       for(y=-2;y<4;y++) {
+         pres_y = max(0,min(maxold_y,y_pos+y));
+         result += imY[pres_y][pres_x]*COEF[y+2];
+       }
+ 
+       result2 = max(0, min(img->max_imgpel_value, (result+16)/32));
+       result = (result1+result2)/2;
+     }
+   }
+ 
+   return result;
+ }
+   
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Performs the simulation of the packet losses, calls the error concealment funcs
+  *    and copies the decoded images to the reference frame buffers of the decoders 
+  *
+  *************************************************************************************
+  */
+ void UpdateDecoders()
+ {
+   int k;
+   for (k=0; k<input->NoOfDecoders; k++)
+   {
+     Build_Status_Map(decs->status_map); // simulates the packet losses
+     Error_Concealment(decs->decY_best[k], decs->status_map, decs->decref[k]); // for the moment error concealment is just a "copy"
+     // Move decoded frames to reference buffers: (at the decoders this is done 
+     // without interpolation (upsampling) - upsampling is done while decoding
+     DecOneForthPix(decs->decY_best[k], decs->decref[k]); 
+   }
+ }
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Copies one (reconstructed) image to the respective reference frame buffer
+  *
+  * \note
+  *    This is used at the "many decoders in the encoder"
+  * \param dY
+  *    The reconstructed image
+  * \param dref
+  *    The reference buffer
+  *************************************************************************************
+  */
+ void DecOneForthPix(imgpel **dY, imgpel ***dref)
+ {
+   int j, ref=IMG_NUMBER%img->buf_cycle;
+ 
+   for (j=0; j<img->height; j++)
+     memcpy(dref[ref][j], dY[j], img->width*sizeof(imgpel));
+ }
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Gives the prediction residue for a 8x8 block
+  *************************************************************************************
+  */
+ void compute_residue_b8block (int b8block, int i16mode) // if not INTRA16x16 it has to be -1
+ {
+   int i,j;
+   int i0 = (b8block%2)<<3,   i1 = i0+8;
+   int j0 = (b8block/2)<<3,   j1 = j0+8;
+ 
+   if (i16mode>=0)
+   {
+     for (i=i0; i<i1; i++)
+     for (j=j0; j<j1; j++)
+     {
+       decs->resY[j][i] = enc_picture->imgY[img->pix_y+j][img->pix_x+i] - img->mprr_2[i16mode][j][i];
+     }
+   }
+   else
+   {
+     for (i=i0; i<i1; i++)
+     for (j=j0; j<j1; j++)
+     {
+       decs->resY[j][i] = enc_picture->imgY[img->pix_y+j][img->pix_x+i] - img->mpr[j][i];
+     }
+   }
+ }
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Gives the prediction residue for a macroblock
+  *************************************************************************************
+  */
+ void compute_residue_mb (int i16mode)
+ {
+   compute_residue_b8block (0, i16mode);
+   compute_residue_b8block (1, i16mode);
+   compute_residue_b8block (2, i16mode);
+   compute_residue_b8block (3, i16mode);
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Builds a random status map showing whether each MB is received or lost, based
+  *    on the packet loss rate and the slice structure.
+  *
+  * \param s_map
+  *    The status map to be filled
+  *************************************************************************************
+  */
+ void Build_Status_Map(byte **s_map)
+ {
+   int i,j,slice=-1,mb=0,jj,ii,packet_lost=0;
+ 
+   jj = img->height/MB_BLOCK_SIZE;
+   ii = img->width/MB_BLOCK_SIZE;
+   
+   for (j=0 ; j<jj; j++)
+   for (i=0 ; i<ii; i++)
+   {
+     if (!input->slice_mode || img->mb_data[mb].slice_nr != slice) /* new slice */
+     {
+       packet_lost=0;
+       if ((double)rand()/(double)RAND_MAX*100 < input->LossRateC)   packet_lost += 3;
+       if ((double)rand()/(double)RAND_MAX*100 < input->LossRateB)   packet_lost += 2;
+       if ((double)rand()/(double)RAND_MAX*100 < input->LossRateA)   packet_lost  = 1;
+       slice++;
+     }
+     if (!packet_lost)
+     {
+       s_map[j][i]=0;  //! Packet OK
+     }
+     else
+     {
+       s_map[j][i]=packet_lost;
+       if(input->partition_mode == 0)  s_map[j][i]=1;
+     }
+     mb++;
+   }
+ }
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Performs some sort of error concealment for the areas that are lost according
+  *    to the status_map
+  *    
+  * \param inY
+  *    Error concealment is performed on this frame imY[][]
+  * \param s_map
+  *    The status map shows which areas are lost.
+  * \param refY
+  *    The set of reference frames - may be used for the error concealment.
+  *************************************************************************************
+  */
+ void Error_Concealment(imgpel **inY, byte **s_map, imgpel ***refY)
+ {
+   int mb_y, mb_x, mb_h, mb_w;
+   mb_h = img->height/MB_BLOCK_SIZE;
+   mb_w = img->width/MB_BLOCK_SIZE;
+   
+   for (mb_y=0; mb_y < mb_h; mb_y++)
+   for (mb_x=0; mb_x < mb_w; mb_x++)
+   {
+     if (s_map[mb_y][mb_x])   Conceal_Error(inY, mb_y, mb_x, refY, s_map);
+   }
+ }
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Copies a certain MB (mb_y,mb_x) of the frame inY[][] from the previous frame.
+  *    For the time there is no better EC...
+  *************************************************************************************
+  */
+ void Conceal_Error(imgpel **inY, int mb_y, int mb_x, imgpel ***refY, byte **s_map)
+ {
+   int i,j,block_x, block_y;
+   int ref_inx = (IMG_NUMBER-1)%img->num_ref_frames;
+   int pos_y = mb_y*MB_BLOCK_SIZE, pos_x = mb_x*MB_BLOCK_SIZE;
+   int mv[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE];
+   int resY[MB_BLOCK_SIZE][MB_BLOCK_SIZE];
+   int copy  = (decs->dec_mb_mode[mb_x][mb_y]==0 && (img->type==P_SLICE || (img->type==B_SLICE && img->nal_reference_idc>0)));
+   int inter = (((decs->dec_mb_mode[mb_x][mb_y]>=1 && decs->dec_mb_mode[mb_x][mb_y]<=3) || decs->dec_mb_mode[mb_x][mb_y]==P8x8) && (img->type==P_SLICE || (img->type==B_SLICE && img->nal_reference_idc>0)));
+   short ***tmp_mv = enc_picture->mv[LIST_0];
+   
+   switch(s_map[mb_y][mb_x])
+   {
+   case 1: //! whole slice lost (at least partition A lost)
+     if (img->type!=I_SLICE)
+     {
+       for (j=0;j<MB_BLOCK_SIZE;j++)
+         for (i=0;i<MB_BLOCK_SIZE;i++)
+           inY[pos_y+j][pos_x+i] = refY[ref_inx][pos_y+j][pos_x+i];
+     }
+     else
+     {
+       for (j=0;j<MB_BLOCK_SIZE;j++)
+         for (i=0;i<MB_BLOCK_SIZE;i++)
+           inY[pos_y+j][pos_x+i] = 127;
+     }
+     break;
+   case 5: //! partition B and partition C lost
+     
+     //! Copy motion vectors 
+     for (block_y=0; block_y<BLOCK_MULTIPLE; block_y++)
+       for (block_x=0; block_x<BLOCK_MULTIPLE; block_x++)
+         for (i=0;i<2;i++)
+           mv[i][block_y][block_x]=tmp_mv[mb_y*BLOCK_SIZE+block_y][mb_x*BLOCK_SIZE+block_x+4][i];
+     
+     //! Residuum ist set to zero    
+     for(i=0;i<MB_BLOCK_SIZE;i++)
+       for(j=0;j<MB_BLOCK_SIZE;j++)
+         resY[j][i]=0;
+     
+     //! not first frame
+     if (img->type!=I_SLICE)
+     {
+       //! if copy mb
+       if (copy)
+       {
+         for (j=0;j<MB_BLOCK_SIZE;j++)
+           for (i=0;i<MB_BLOCK_SIZE;i++)
+             inY[pos_y+j][pos_x+i] = refY[ref_inx][pos_y+j][pos_x+i];
+       }
+       //! if inter mb
+       else if (inter)  
+       {
+         for (block_y = mb_y*BLOCK_SIZE ; block_y < (mb_y*BLOCK_SIZE + BLOCK_MULTIPLE) ; block_y++)
+           for (block_x = mb_x*BLOCK_SIZE ; block_x < (mb_x*BLOCK_SIZE + BLOCK_MULTIPLE) ; block_x++)
+           {
+             Get_Reference_Block(refY[ref_inx],
+                                 block_y, block_x,
+                                 mv[0][block_y - mb_y*BLOCK_SIZE][block_x - mb_x*BLOCK_SIZE],
+                                 mv[1][block_y - mb_y*BLOCK_SIZE][block_x - mb_x*BLOCK_SIZE],
+                                 decs->RefBlock);
+             for (j=0;j<BLOCK_SIZE;j++)
+               for (i=0;i<BLOCK_SIZE;i++)
+               {
+                 inY[block_y*BLOCK_SIZE + j][block_x*BLOCK_SIZE + i] = decs->RefBlock[j][i];
+               }
+           }
+       }
+       else //intra; up to now only copy mb, may integrate nokia EC 
+       {
+         for (j=0;j<MB_BLOCK_SIZE;j++)
+           for (i=0;i<MB_BLOCK_SIZE;i++)
+             inY[pos_y+j][pos_x+i] = refY[ref_inx][pos_y+j][pos_x+i];
+       }
+     }
+     else //! first frame; up to now set value to grey, may integrate nokia EC 
+     {
+       for (j=0;j<MB_BLOCK_SIZE;j++)
+         for (i=0;i<MB_BLOCK_SIZE;i++)
+           inY[pos_y+j][pos_x+i] = 127;
+     }
+     break;
+   case 3: //! Partition C lost
+     if(img->type!=I_SLICE)
+     {
+       //! Copy motion vectors 
+       for (block_y=0; block_y<BLOCK_MULTIPLE; block_y++)
+         for (block_x=0; block_x<BLOCK_MULTIPLE; block_x++)
+           for (i=0;i<2;i++)
+             mv[i][block_y][block_x]=tmp_mv[mb_y*BLOCK_SIZE+block_y][mb_x*BLOCK_SIZE+block_x+4][i];
+     
+       //! Residuum ist set to zero    
+       for(i=0;i<MB_BLOCK_SIZE;i++)
+         for(j=0;j<MB_BLOCK_SIZE;j++)
+           resY[j][i]=0;
+ 
+       //! if copy mb
+       if (copy)
+       {
+         for (j=0;j<MB_BLOCK_SIZE;j++)
+           for (i=0;i<MB_BLOCK_SIZE;i++)
+             inY[pos_y+j][pos_x+i] = refY[ref_inx][pos_y+j][pos_x+i];
+       }
+       //! if inter mb
+       else if (inter)  
+       {
+         for (block_y = mb_y*BLOCK_SIZE ; block_y < (mb_y*BLOCK_SIZE + BLOCK_MULTIPLE) ; block_y++)
+           for (block_x = mb_x*BLOCK_SIZE ; block_x < (mb_x*BLOCK_SIZE + BLOCK_MULTIPLE) ; block_x++)
+             {
+               Get_Reference_Block(refY[ref_inx],
+                                   block_y, block_x,
+                                   mv[0][block_y - mb_y*BLOCK_SIZE][block_x - mb_x*BLOCK_SIZE],
+                                   mv[1][block_y - mb_y*BLOCK_SIZE][block_x - mb_x*BLOCK_SIZE],
+                                   decs->RefBlock);
+               for (j=0;j<BLOCK_SIZE;j++)
+                 for (i=0;i<BLOCK_SIZE;i++)
+                 {
+                   inY[block_y*BLOCK_SIZE + j][block_x*BLOCK_SIZE + i] = decs->RefBlock[j][i];
+                 }
+             }
+       }
+     }
+     break;
+   case 2: //! Partition B lost
+     if(img->type!=I_SLICE)
+     {
+       if(!inter)
+       {
+         for (j=0;j<MB_BLOCK_SIZE;j++)
+           for (i=0;i<MB_BLOCK_SIZE;i++)
+             inY[pos_y+j][pos_x+i] = refY[ref_inx][pos_y+j][pos_x+i];
+       }
+     }
+     else //! first frame; up to now set value to grey, may integrate nokia EC 
+     {
+       for (j=0;j<MB_BLOCK_SIZE;j++)
+         for (i=0;i<MB_BLOCK_SIZE;i++)
+           inY[pos_y+j][pos_x+i] = 127;
+     }
+     break;
+   } //! End Switch
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/defines.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/defines.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/defines.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,207 ----
+ 
+ /*!
+  **************************************************************************
+  * \file defines.h
+  *
+  * \brief
+  *    Header file containing some useful global definitions
+  *
+  * \author
+  *    Detlev Marpe 
+  *    Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved.
+  *
+  * \date
+  *    21. March 2001
+  **************************************************************************
+  */
+ 
+ 
+ #ifndef _DEFINES_H_
+ #define _DEFINES_H_
+ 
+ #if defined _DEBUG
+ #define TRACE           0                   //!< 0:Trace off 1:Trace on 2:detailed CABAC context information
+ #else
+ #define TRACE           0                   //!< 0:Trace off 1:Trace on 2:detailed CABAC context information
+ #endif
+ 
+ typedef unsigned char byte;    //!< byte type definition
+ 
+ //FREXT Profile IDC definitions
+ #define FREXT_HP        100      //!< YUV 4:2:0/8 "High"
+ #define FREXT_Hi10P     110      //!< YUV 4:2:0/10 "High 10"
+ #define FREXT_Hi422     122      //!< YUV 4:2:2/10 "High 4:2:2"
+ #define FREXT_Hi444     144      //!< YUV 4:4:4/12 "High 4:4:4"
+ 
+ #define YUV400 0
+ #define YUV420 1
+ #define YUV422 2
+ #define YUV444 3
+ 
+ enum {
+   LIST_0 = 0,
+   LIST_1 = 1,
+   BI_PRED = 2,
+   BI_PRED_L0 = 3,
+   BI_PRED_L1 = 4
+ };
+ 
+ #define ZEROSNR 1
+ // CAVLC
+ #define LUMA              0
+ #define LUMA_INTRA16x16DC 1
+ #define LUMA_INTRA16x16AC 2
+ 
+ #define LEVEL_NUM      6
+ #define TOTRUN_NUM    15
+ #define RUNBEFORE_NUM  7
+ 
+ #define CAVLC_LEVEL_LIMIT 2063
+ 
+ 
+ //--- block types for CABAC
+ #define LUMA_16DC       0
+ #define LUMA_16AC       1
+ #define LUMA_8x8        2
+ #define LUMA_8x4        3
+ #define LUMA_4x8        4
+ #define LUMA_4x4        5
+ #define CHROMA_DC       6
+ #define CHROMA_AC       7
+ #define CHROMA_DC_2x4   8
+ #define CHROMA_DC_4x4   9
+ #define NUM_BLOCK_TYPES 10
+ 
+ 
+ #define _FAST_FULL_ME_
+ 
+ #define _FULL_SEARCH_RANGE_
+ #define _ADAPT_LAST_GROUP_
+ #define _CHANGE_QP_
+ #define _LEAKYBUCKET_
+ 
+ // ---------------------------------------------------------------------------------
+ // FLAGS and DEFINES for new chroma intra prediction, Dzung Hoang
+ // Threshold values to zero out quantized transform coefficients.
+ // Recommend that _CHROMA_COEFF_COST_ be low to improve chroma quality
+ #define _LUMA_COEFF_COST_       4 //!< threshold for luma coeffs
+ #define _CHROMA_COEFF_COST_     4 //!< threshold for chroma coeffs, used to be 7
+ #define _LUMA_MB_COEFF_COST_    5 //!< threshold for luma coeffs of inter Macroblocks
+ #define _LUMA_8x8_COEFF_COST_   5 //!< threshold for luma coeffs of 8x8 Inter Partition
+ 
+ #define IMG_PAD_SIZE            4 //!< Number of pixels padded around the reference frame (>=4)
+ #define IMG_PAD_SIZE_TIMES4    16 //!< Number of pixels padded around the reference frame in subpel units(>=16)
+ 
+ #define absm(A) ((A)<(0) ? (-(A)):(A)) //!< abs macro, faster than procedure
+ #define MAX_VALUE       999999   //!< used for start value for some variables
+ 
+ #define INVALIDINDEX  (-135792468)
+ 
+ #define Clip1(a)            ((a)>img->max_imgpel_value?img->max_imgpel_value:((a)<0?0:(a)))
+ #define Clip1_Chr(a)        ((a)>img->max_imgpel_value_uv?img->max_imgpel_value_uv:((a)<0?0:(a)))
+ #define Clip3(min,max,val) (((val)<(min))?(min):(((val)>(max))?(max):(val)))
+ 
+ #define P8x8    8
+ #define I4MB    9
+ #define I16MB   10
+ #define IBLOCK  11
+ #define SI4MB   12
+ #define I8MB    13
+ #define IPCM    14
+ #define MAXMODE 15
+ 
+ 
+ #define  LAMBDA_ACCURACY_BITS         16
+ #define  LAMBDA_FACTOR(lambda)        ((int)((double)(1<<LAMBDA_ACCURACY_BITS)*lambda+0.5))
+ #define  WEIGHTED_COST(factor,bits)   (((factor)*(bits))>>LAMBDA_ACCURACY_BITS)
+ #define  MV_COST(f,s,cx,cy,px,py)     (WEIGHTED_COST(f,mvbits[((cx)<<(s))-px]+mvbits[((cy)<<(s))-py]))
+ #define  REF_COST(f,ref,list_offset) (WEIGHTED_COST(f,((listXsize[list_offset]<=1)? 0:refbits[(ref)])))
+ 
+ #define IS_INTRA(MB)    ((MB)->mb_type==I4MB  || (MB)->mb_type==I16MB || (MB)->mb_type==I8MB || (MB)->mb_type==IPCM)
+ #define IS_NEWINTRA(MB) ((MB)->mb_type==I16MB)
+ #define IS_OLDINTRA(MB) ((MB)->mb_type==I4MB)
+ #define IS_IPCM(MB) ((MB)->mb_type==IPCM)
+ 
+ #define IS_INTER(MB)    ((MB)->mb_type!=I4MB  && (MB)->mb_type!=I16MB && (MB)->mb_type!=I8MB)
+ #define IS_INTERMV(MB)  ((MB)->mb_type!=I4MB  && (MB)->mb_type!=I16MB && (MB)->mb_type!=I8MB  && (MB)->mb_type!=0)
+ #define IS_DIRECT(MB)   ((MB)->mb_type==0     && (img->type==B_SLICE))
+ #define IS_COPY(MB)     ((MB)->mb_type==0     && (img->type==P_SLICE||img ->type==SP_SLICE));
+ #define IS_P8x8(MB)     ((MB)->mb_type==P8x8)
+ 
+ // Quantization parameter range
+ 
+ #define MIN_QP          0
+ #define MAX_QP          51
+ #define SHIFT_QP        12
+ 
+ // Direct Mode types
+ #define DIR_TEMPORAL    0   //!< Temporal Direct Mode
+ #define DIR_SPATIAL     1   //!< Spatial Direct Mode
+ 
+ #define MAX_REFERENCE_PICTURES 32
+ 
+ #define BLOCK_SHIFT     2
+ #define BLOCK_SIZE      4
+ #define MB_BLOCK_SIZE   16
+ 
+ // number of intra prediction modes
+ #define NO_INTRA_PMODE  9        
+ 
+ // 4x4 intra prediction modes
+ #define VERT_PRED             0
+ #define HOR_PRED              1
+ #define DC_PRED               2
+ #define DIAG_DOWN_LEFT_PRED   3
+ #define DIAG_DOWN_RIGHT_PRED  4
+ #define VERT_RIGHT_PRED       5
+ #define HOR_DOWN_PRED         6
+ #define VERT_LEFT_PRED        7
+ #define HOR_UP_PRED           8
+ 
+ // 16x16 intra prediction modes
+ #define VERT_PRED_16    0
+ #define HOR_PRED_16     1
+ #define DC_PRED_16      2
+ #define PLANE_16        3
+ 
+ // 8x8 chroma intra prediction modes
+ #define DC_PRED_8       0
+ #define HOR_PRED_8      1
+ #define VERT_PRED_8     2
+ #define PLANE_8         3
+ 
+ #define INIT_FRAME_RATE 30
+ #define EOS             1         //!< End Of Sequence
+ 
+ 
+ #define MVPRED_MEDIAN   0
+ #define MVPRED_L        1
+ #define MVPRED_U        2
+ #define MVPRED_UR       3
+ 
+ #define BLOCK_MULTIPLE        (MB_BLOCK_SIZE/BLOCK_SIZE)
+ #define MB_BLOCK_PARTITIONS   (BLOCK_MULTIPLE * BLOCK_MULTIPLE)
+ #define MB_PIXELS             (MB_BLOCK_SIZE * MB_BLOCK_SIZE)
+ #define BLOCK_CONTEXT         (2 * 2 * MB_BLOCK_PARTITIONS)
+ 
+ #define MAX_SYMBOLS_PER_MB  1200  //!< Maximum number of different syntax elements for one MB
+                                   // CAVLC needs more symbols per MB
+ 
+ 
+ #define MAX_PART_NR     3 /*!< Maximum number of different data partitions.
+                                Some reasonable number which should reflect
+                                what is currently defined in the SE2Partition map (elements.h) */
+ 
+ //Start code and Emulation Prevention need this to be defined in identical manner at encoder and decoder
+ #define ZEROBYTES_SHORTSTARTCODE 2 //indicates the number of zero bytes in the short start-code prefix
+ 
+ #define Q_BITS          15
+ #define DQ_BITS         6
+ #define DQ_ROUND        (1<<(DQ_BITS-1))
+ 
+ #define Q_BITS_8        16
+ #define DQ_BITS_8       6 
+ #define DQ_ROUND_8      (1<<(DQ_BITS_8-1))
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/elements.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/elements.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/elements.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,109 ----
+ 
+ /*!
+  **************************************************************************
+  *  \file elements.h
+  *  \brief  Header file for elements in H.264 streams
+  *  \date 6.10.2000, 
+  *  \version
+  *      1.1
+  *
+  * \note
+  *    Version 1.0 included three partition modes, no DP, 2 partitionsper slice
+  *      and 4 partitions per slice.  As per document VCEG-N72 this is changed
+  *      in version 1.1 to only two partition modes, one without DP and one with 
+  *      3 partition per slice
+  *
+  *  \author Sebastian Purreiter     <sebastian.purreiter at mch.siemens.de>
+  *  \author Stephan Wenger          <stewe at cs.tu-berlin.de>
+  *
+  **************************************************************************
+  */
+ 
+ #ifndef _ELEMENTS_H_
+ #define _ELEMENTS_H_
+ 
+ /*!
+  *  definition of H.264 syntax elements
+  *  order of elements follow dependencies for picture reconstruction
+  */
+ /*!
+  * \brief   Assignment of old TYPE or partition elements to new
+  *          elements
+  *
+  *  old element     | new elements
+  *  ----------------+-------------------------------------------------------------------
+  *  TYPE_HEADER     | SE_HEADER, SE_PTYPE
+  *  TYPE_MBHEADER   | SE_MBTYPE, SE_REFFRAME, SE_INTRAPREDMODE
+  *  TYPE_MVD        | SE_MVD
+  *  TYPE_CBP        | SE_CBP_INTRA, SE_CBP_INTER
+  *  TYPE_COEFF_Y    | SE_LUM_DC_INTRA, SE_LUM_AC_INTRA, SE_LUM_DC_INTER, SE_LUM_AC_INTER
+  *  TYPE_2x2DC      | SE_CHR_DC_INTRA, SE_CHR_DC_INTER
+  *  TYPE_COEFF_C    | SE_CHR_AC_INTRA, SE_CHR_AC_INTER
+  *  TYPE_EOS        | SE_EOS
+ */
+ 
+ 
+ 
+ 
+ #define MAXPARTITIONMODES 2 //!< maximum possible partition modes as defined in assignSE2partition[][]
+ 
+ /*!
+  *  \brief  lookup-table to assign different elements to partition
+  *
+  *  \note here we defined up to 6 different partitions similar to
+  *      document Q15-k-18 described in the PROGFRAMEMODE.
+  *      The Sliceheader contains the PSYNC information. \par
+  *
+  *      Elements inside a partition are not ordered. They are
+  *      ordered by occurence in the stream.
+  *      Assumption: Only partitionlosses are considered. \par
+  *
+  *      The texture elements luminance and chrominance are
+  *      not ordered in the progressive form
+  *      This may be changed in image.c \par
+  *
+  *  -IMPORTANT:
+  *      Picture- or Sliceheaders must be assigned to partition 0. \par
+  *      Furthermore partitions must follow syntax dependencies as
+  *      outlined in document Q15-J-23.
+  */
+ 
+ 
+ // A note on this table:
+ //
+ // While the assignment of values in enum data types is specified in C, it is not
+ // very ood style to have an "elementnumber", not even as a comment.
+ //
+ // Hence a copy of the relevant structure from global.h here
+ /*
+ typedef enum {
+  0  SE_HEADER,
+  1  SE_PTYPE,
+  2  SE_MBTYPE,
+  3  SE_REFFRAME,
+  4  SE_INTRAPREDMODE,
+  5  SE_MVD,
+  6  SE_CBP_INTRA,
+  7  SE_LUM_DC_INTRA,
+  8  SE_CHR_DC_INTRA,
+  9  SE_LUM_AC_INTRA,
+ 10  SE_CHR_AC_INTRA,
+ 11  SE_CBP_INTER,
+ 12  SE_LUM_DC_INTER,
+ 13  SE_CHR_DC_INTER,
+ 14  SE_LUM_AC_INTER,
+ 15  SE_CHR_AC_INTER,
+ 16  SE_DELTA_QUANT_INTER,
+ 17  SE_DELTA_QUANT_INTRA,
+ 18  SE_BFRAME,
+ 19  SE_EOS,
+ 20  SE_MAX_ELEMENTS */ // number of maximum syntax elements
+ //} SE_type;
+ 
+ 
+ extern int * assignSE2partition[2];
+ extern int assignSE2partition_NoDP[SE_MAX_ELEMENTS];
+ extern int assignSE2partition_DP[SE_MAX_ELEMENTS];
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/epzs.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/epzs.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/epzs.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,2321 ----
+ 
+ /*!
+ *************************************************************************************
+ * \file epzs.c
+ *
+ * \brief
+ *    Motion Estimation using EPZS
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *      - Alexis Michael Tourapis <alexismt at ieee.org>
+ *
+ *************************************************************************************
+ */
+ 
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ #include <limits.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ #include "image.h"
+ #include "memalloc.h"
+ #include "mb_access.h"
+ 
+ #include "epzs.h"
+ 
+ #define EPZSREF 1
+ 
+ extern int *mvbits;
+ extern unsigned int *byte_abs;
+ 
+ // Define Global Parameters
+ static const short blk_parent[8] = {1, 1, 1, 1, 2, 4, 4, 5}; //!< {skip, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8, 4x4}
+ static const short blk_child[8]  = {1, 2, 4, 4, 5, 7, 7, 7}; //!< {skip, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8, 4x4}
+ static const int   minthres_base[8] = {0,  64,  32,  32,  16,  8,  8,  4};
+ static const int   medthres_base[8] = {0, 256, 128, 128,  64, 32, 32, 16};
+ static const int   maxthres_base[8] = {0, 768, 384, 384, 192, 96, 96, 48};
+ static short img_width;
+ static short img_height;  
+ static short weight1, weight2, offsetBi;
+ 
+ //! Define EPZS Refinement patterns
+ static int pattern_data[4][12][4] = 
+ {  
+   { // Small Diamond pattern
+     {  0,  1,  3, 3 }, {  1,  0,  0, 3 }, {  0, -1,  1, 3 }, { -1,  0, 2, 3 }
+   },    
+   { // Square pattern
+     {  0,  1,  7, 3 }, {  1,  1,  7, 5 }, {  1,  0,  1, 3 }, {  1, -1, 1, 5 },
+     {  0, -1,  3, 3 }, { -1, -1,  3, 5 }, { -1,  0,  5, 3 }, { -1,  1, 5, 5 }
+   },    
+   { // Enhanced Diamond pattern
+     { -1,  1, 10, 5 }, {  0,  2, 10, 8 }, {  0,  1, 10, 7 }, {  1,  1, 1, 5 },
+     {  2,  0, 1,  8 }, {  1,  0,  1, 7 }, {  1, -1,  4, 5 }, {  0, -2, 4, 8 },
+     {  0, -1, 4,  7 }, { -1, -1, 7,  5 }, { -2,  0,  7, 8 }, { -1,  0, 7, 7 }
+   },  
+   { // Large Diamond pattern
+     {  0,  2, 6,  5 }, {  1,  1, 0,  3 }, {  2,  0, 0,  5 }, {  1, -1, 2, 3 },
+     {  0, -2, 2,  5 }, { -1, -1, 4,  3 }, { -2,  0, 4,  5 }, { -1,  1, 6, 3 }
+   }
+ };
+ 
+ // Other definitions
+ const  char c_EPZSPattern[4][20] = { "Diamond", "Square", "Extended Diamond", "Large Diamond"};
+ const  char c_EPZSDualPattern[5][20] = { "Disabled","Diamond", "Square", "Extended Diamond", "Large Diamond"};
+ const  char c_EPZSFixed[3][20] = { "Disabled","All P", "All P + B"};
+ const  char c_EPZSOther[2][20] = { "Disabled","Enabled"};
+ 
+ int medthres[8];
+ int maxthres[8];
+ int minthres[8];
+ int mv_scale[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES];
+ 
+ static byte **EPZSMap;  //!< Memory Map definition 
+ int ***EPZSDistortion;  //!< Array for storing SAD Values
+ #if EPZSREF
+ short ******EPZSMotion;  //!< Array for storing Motion Vectors
+ #else
+ short *****EPZSMotion;  //!< Array for storing Motion Vectors
+ #endif
+ 
+ //
+ EPZSStructure *searchPattern,*searchPatternD, *predictor;
+ EPZSStructure *window_predictor, *window_predictor_extended;
+ EPZSStructure *sdiamond,*square,*ediamond,*ldiamond;
+ EPZSColocParams *EPZSCo_located;
+ 
+ int (*computeBiPredSad)(pel_t **, int, int, int, int, int, int, int, int, int);
+ 
+ static pel_t *(*get_ref_line1)(int, pel_t *, int, int, int, int);
+ static pel_t *(*get_ref_line2)(int, pel_t *, int, int, int, int);
+ static pel_t *(*get_ref_line) (int, pel_t *, int, int, int, int);  
+ static pel_t *ref_pic;
+ static pel_t *ref_pic1;
+ static pel_t *ref_pic2;
+ /*!
+ ************************************************************************
+ * \brief
+ *    Allocate co-located memory 
+ *
+ * \param size_x
+ *    horizontal luma size
+ * \param size_y
+ *    vertical luma size
+ * \param mb_adaptive_frame_field_flag
+ *    flag that indicates macroblock adaptive frame/field coding
+ *
+ * \return
+ *    the allocated EPZSColocParams structure
+ ************************************************************************
+ */
+ EPZSColocParams* allocEPZScolocated(int size_x, int size_y, int mb_adaptive_frame_field_flag)
+ {
+   EPZSColocParams *s;
+   
+   s = calloc(1, sizeof(EPZSColocParams)); 
+   if (NULL == s)
+     no_mem_exit("alloc_EPZScolocated: s");
+   
+   s->size_x = size_x;
+   s->size_y = size_y;
+   get_mem4Dshort (&(s->mv), 2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE, 2);
+   
+   if (mb_adaptive_frame_field_flag)
+   {
+     get_mem4Dshort (&(s->top_mv),   2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE, 2);
+     get_mem4Dshort (&(s->bottom_mv),2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE, 2);
+   }
+   
+   s->mb_adaptive_frame_field_flag  = mb_adaptive_frame_field_flag;
+   
+   return s;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Free co-located memory.
+ *
+ * \param p
+ *    structure to be freed
+ *
+ ************************************************************************
+ */
+ void freeEPZScolocated(EPZSColocParams* p)
+ {
+   if (p)
+   {
+     free_mem4Dshort (p->mv, 2, p->size_y / BLOCK_SIZE);
+     
+     if (p->mb_adaptive_frame_field_flag)
+     {
+       free_mem4Dshort (p->top_mv, 2, p->size_y / BLOCK_SIZE / 2);
+       free_mem4Dshort (p->bottom_mv, 2, p->size_y / BLOCK_SIZE / 2);                 
+     }
+     
+     free(p);
+     
+     p=NULL;
+   }
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Allocate EPZS pattern memory
+ *
+ * \param searchpoints 
+ *    number of searchpoints to allocate
+ *
+ * \return
+ *    the allocated EPZSStructure structure
+ ************************************************************************
+ */
+ EPZSStructure* allocEPZSpattern(int searchpoints)
+ {
+   EPZSStructure *s;
+   
+   s = calloc(1, sizeof(EPZSStructure)); 
+   if (NULL == s)
+     no_mem_exit("alloc_EPZSpattern: s");
+   
+   s->searchPoints = searchpoints;
+   s->point = (SPoint*) calloc(searchpoints, sizeof(SPoint)); 
+   
+   return s;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Free EPZS pattern memory.
+ *
+ * \param p
+ *    structure to be freed
+ *
+ ************************************************************************
+ */
+ void freeEPZSpattern(EPZSStructure* p)
+ {
+   if (p)
+   {
+     free ( (SPoint*) p->point);
+     free(p);
+     p=NULL;
+   }
+ }
+ 
+ void assignEPZSpattern(EPZSStructure *pattern,int type)
+ {
+   int i;
+   
+   for (i = 0; i < pattern->searchPoints; i++)
+   {
+     pattern->point[i].x           = pattern_data[type][i][0];
+     pattern->point[i].y           = pattern_data[type][i][1];
+     pattern->point[i].start_nmbr  = pattern_data[type][i][2];
+     pattern->point[i].next_points = pattern_data[type][i][3];
+   }
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    calculate RoundLog2(uiVal)
+ ************************************************************************
+ */
+ static int RoundLog2 (int iValue)
+ {
+   int iRet = 0;
+   int iValue_square = iValue * iValue;
+   
+   while ((1 << (iRet + 1)) <= iValue_square)
+     iRet++;
+   
+   iRet = (iRet + 1) >> 1;
+   
+   return iRet;
+ }
+ 
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    EPZS Search Window Predictor Initialization
+ ************************************************************************
+ */
+ void EPZSWindowPredictorInit (short search_range, EPZSStructure * predictor, short mode)
+ {
+   int pos;
+   short searchpos, fieldsearchpos;
+   short prednum = 0;
+   short i;
+   if (mode == 0)
+   {
+     for (pos = RoundLog2 (search_range) - 2; pos > -1; pos--)
+     {
+       searchpos = (search_range >> pos);
+       
+       for (i=1; i>=-1; i-=2)
+       {
+         predictor->point[prednum  ].x =  i * searchpos;
+         predictor->point[prednum++].y =  0;
+         predictor->point[prednum  ].x =  i * searchpos;
+         predictor->point[prednum++].y =  i * searchpos;
+         predictor->point[prednum  ].x =  0;
+         predictor->point[prednum++].y =  i * searchpos;
+         predictor->point[prednum  ].x = -i * searchpos;
+         predictor->point[prednum++].y =  i * searchpos;
+       }
+     }
+   }
+   else // if (mode == 0)
+   {
+     for (pos = RoundLog2 (search_range) - 2; pos > -1; pos--)
+     {
+       searchpos = (search_range >> pos);      
+       fieldsearchpos = (3 * searchpos + 1) >> 1;
+       
+       for (i=1; i>=-1; i-=2)
+       {
+         predictor->point[prednum  ].x =  i * searchpos;
+         predictor->point[prednum++].y =  0;
+         predictor->point[prednum  ].x =  i * searchpos;
+         predictor->point[prednum++].y =  i * searchpos;
+         predictor->point[prednum  ].x =  0;
+         predictor->point[prednum++].y =  i * searchpos;
+         predictor->point[prednum  ].x = -i * searchpos;
+         predictor->point[prednum++].y =  i * searchpos;
+       }
+       
+       for (i=1; i>=-1; i-=2)
+       {
+         predictor->point[prednum  ].x =  i * fieldsearchpos;
+         predictor->point[prednum++].y = -i * searchpos;
+         predictor->point[prednum  ].x =  i * fieldsearchpos;
+         predictor->point[prednum++].y =  0;
+         predictor->point[prednum  ].x =  i * fieldsearchpos;
+         predictor->point[prednum++].y =  i * searchpos;
+         predictor->point[prednum  ].x =  i * searchpos;
+         predictor->point[prednum++].y =  i * fieldsearchpos;
+         predictor->point[prednum  ].x =  0;
+         predictor->point[prednum++].y =  i * fieldsearchpos;
+         predictor->point[prednum  ].x = -i * searchpos;
+         predictor->point[prednum++].y =  i * fieldsearchpos;
+       }
+     }
+   }
+   predictor->searchPoints = prednum;  
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    EPZS Global Initialization
+ ************************************************************************
+ */
+ int
+ EPZSInit ()
+ {
+   int pel_error_me = 1 << (img->bitdepth_luma - 8);
+   int i, memory_size = 0;
+   int searcharray = input->BiPredMotionEstimation? 2 * max (input->search_range, input->BiPredMESearchRange) + 1 : 2 * input->search_range + 1;
+   int searchlevels = RoundLog2 (input->search_range) - 1;  
+   
+   //! In this implementation we keep threshold limits fixed.
+   //! However one could adapt these limits based on lagrangian 
+   //! optimization considerations (i.e. qp), while also allow 
+   //! adaptation of the limits themselves based on content or complexity. 
+   for (i=0;i<8;i++)
+   {
+     medthres[i] = input->EPZSMedThresScale * medthres_base[i] * pel_error_me;
+     maxthres[i] = input->EPZSMaxThresScale * maxthres_base[i] * pel_error_me;
+     minthres[i] = input->EPZSMinThresScale * minthres_base[i] * pel_error_me;
+   }
+   
+   //! Definition of pottential EPZS patterns.
+   //! It is possible to also define other patterns, or even use
+   //! resizing patterns (such as the PMVFAST scheme. These patterns
+   //! are only shown here as reference, while the same also holds
+   //! for this implementation (i.e. new conditions could be added
+   //! on adapting predictors, or thresholds etc. Note that search
+   //! could also be performed on subpel positions directly while 
+   //! pattern needs not be restricted on integer positions only.
+   
+   //! Allocate memory and assign search patterns
+   sdiamond = allocEPZSpattern(4);
+   assignEPZSpattern(sdiamond, SDIAMOND);
+   square = allocEPZSpattern(8);
+   assignEPZSpattern(square, SQUARE);
+   ediamond = allocEPZSpattern(12);
+   assignEPZSpattern(ediamond, EDIAMOND);
+   ldiamond = allocEPZSpattern(8);  
+   assignEPZSpattern(ldiamond, LDIAMOND);
+   
+   //! Allocate and assign window based predictors. 
+   //! Other window types could also be used, while method could be 
+   //! made a bit more adaptive (i.e. patterns could be assigned 
+   //! based on neighborhood
+   window_predictor = allocEPZSpattern(searchlevels * 8);
+   window_predictor_extended = allocEPZSpattern(searchlevels * 20);  
+   EPZSWindowPredictorInit ((short) input->search_range, window_predictor, 0);
+   EPZSWindowPredictorInit ((short) input->search_range, window_predictor_extended, 1);
+   //! Also assing search predictor memory
+   // maxwindow + spatial + blocktype + temporal + memspatial
+   predictor = allocEPZSpattern(searchlevels * 20 + 5 + 5 + 9 * (input->EPZSTemporal) + 3 * (input->EPZSSpatialMem));
+   
+   //! Finally assign memory for all other elements 
+   //! (distortion, EPZSMap, and temporal predictors)
+   
+   memory_size += get_mem3Dint (&EPZSDistortion, 6, 7, img->width/BLOCK_SIZE);
+   memory_size += get_mem2D (&EPZSMap, searcharray, searcharray );
+   if (input->EPZSSpatialMem)
+   {
+ #if EPZSREF
+     memory_size += get_mem6Dshort (&EPZSMotion, 6, img->max_num_references, 7, 4, img->width/BLOCK_SIZE, 2);
+ #else
+     memory_size += get_mem5Dshort (&EPZSMotion, 6, 7, 4, img->width/BLOCK_SIZE, 2);
+ #endif
+   }
+   
+   if (input->EPZSTemporal)
+     EPZSCo_located = allocEPZScolocated (img->width, img->height, 
+     active_sps->mb_adaptive_frame_field_flag);
+   
+   switch (input->EPZSPattern)
+   {
+   case 3:
+     searchPattern = ldiamond;
+     break;
+   case 2:
+     searchPattern = ediamond;
+     break;
+   case 1:
+     searchPattern = square;
+     break;
+   case 0:
+   default:
+     searchPattern = sdiamond;
+     break;
+   }
+   
+   switch (input->EPZSDual)
+   {
+   case 4:
+     searchPatternD = ldiamond;
+     break;
+   case 3:
+     searchPatternD = ediamond;
+     break;
+   case 2:
+     searchPatternD = square;
+     break;
+   case 1:
+   default:
+     searchPatternD = sdiamond;
+     break;
+   }
+   
+   return memory_size;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Delete EPZS Alocated memory
+ ************************************************************************
+ */
+ void EPZSDelete ()
+ {
+   if (input->EPZSTemporal)
+     freeEPZScolocated (EPZSCo_located);
+   
+   free_mem2D (EPZSMap);
+   free_mem3Dint (EPZSDistortion, 6);
+   freeEPZSpattern(window_predictor_extended);
+   freeEPZSpattern(window_predictor);
+   freeEPZSpattern(predictor);
+   freeEPZSpattern(ldiamond);
+   freeEPZSpattern(ediamond);
+   freeEPZSpattern(sdiamond);
+   freeEPZSpattern(square);
+   if (input->EPZSSpatialMem)
+   {
+ #if EPZSREF
+     free_mem6Dshort (EPZSMotion, 6, img->max_num_references, 7, 4);
+ #else
+     free_mem5Dshort (EPZSMotion, 6, 7, 4); 
+ #endif
+   }
+   
+ }
+ 
+ //! For ME purposes restricting the co-located partition is not necessary.
+ /*!
+ ************************************************************************
+ * \brief
+ *    EPZS Slice Level Initialization 
+ ************************************************************************
+ */
+ void
+ EPZSSliceInit (EPZSColocParams * p, 
+                StorablePicture ** listX[6])
+ {
+   StorablePicture *fs, *fs_top, *fs_bottom;
+   StorablePicture *fs1, *fs_top1, *fs_bottom1, *fsx;
+   int i, j, k, jj, jdiv, loffset;
+   int prescale, iTRb, iTRp;
+   int list = img->type == B_SLICE ? LIST_1 : LIST_0;
+   int tempmv_scale[2];
+   int epzs_scale[2][6][MAX_LIST_SIZE];
+   int iref; 
+   
+   // Lets compute scaling factoes between all references in lists.
+   // Needed to scale spatial predictors.
+   for (j = LIST_0; j < 2 + (img->MbaffFrameFlag * 4); j ++)
+   {
+     for (k = 0; k < listXsize[j]; k++)   
+     {
+       for (i = 0; i < listXsize[j]; i++)
+       {
+         if (j/2 == 0) 
+         {
+           iTRb = Clip3 (-128, 127, enc_picture->poc - listX[j][i]->poc);
+           iTRp = Clip3 (-128, 127, enc_picture->poc - listX[j][k]->poc);
+         }
+         else if (j/2 == 1)
+         {
+           iTRb = Clip3 (-128, 127, enc_picture->top_poc - listX[j][i]->poc);
+           iTRp = Clip3 (-128, 127, enc_picture->top_poc - listX[j][k]->poc);
+         }
+         else 
+         {
+           iTRb = Clip3 (-128, 127, enc_picture->bottom_poc - listX[j][i]->poc);
+           iTRp = Clip3 (-128, 127, enc_picture->bottom_poc - listX[j][k]->poc);
+         }
+         
+         if (iTRp != 0)
+         {
+           prescale = (16384 + abs (iTRp / 2)) / iTRp;
+           mv_scale[j][i][k] = Clip3 (-2048, 2047, (iTRb * prescale + 32) >> 6);
+         }
+         else
+           mv_scale[j][i][k] = 256;
+       }
+     }
+   }      
+   
+   if (input->EPZSTemporal)
+   {
+     fs_top = fs_bottom = fs = listX[list][0];
+     if (listXsize[0]> 1)
+       fs_top1 = fs_bottom1 = fs1 = listX[list][1];
+     else
+       fs_top1 = fs_bottom1 = fs1 = listX[list][0];
+     
+     for (j = 0; j < 6; j++)
+     {
+       for (i = 0; i < 6; i++)
+       {
+         epzs_scale[0][j][i] = 256;
+         epzs_scale[1][j][i] = 256;
+       }
+     }
+     
+     for (j = 0; j < 2 + (img->MbaffFrameFlag * 4); j += 2)
+     {
+       for (i = 0; i < listXsize[j]; i++)
+       {      
+         if (j == 0)
+           iTRb = Clip3 (-128, 127, enc_picture->poc - listX[LIST_0 + j][i]->poc);
+         else if (j == 2)
+           iTRb = Clip3 (-128, 127, enc_picture->top_poc - listX[LIST_0 + j][i]->poc);
+         else
+           iTRb = Clip3 (-128, 127, enc_picture->bottom_poc - listX[LIST_0 + j][i]->poc);
+         iTRp = Clip3 (-128, 127, listX[list + j][0]->poc - listX[LIST_0 + j][i]->poc);      
+         if (iTRp != 0)
+         {
+           prescale = (16384 + abs (iTRp / 2)) / iTRp;
+           prescale = Clip3 (-2048, 2047, (iTRb * prescale + 32) >> 6);        
+           //prescale = (iTRb * prescale + 32) >> 6;
+         }
+         else			// This could not happen but lets use it in case that reference is removed.
+           prescale = 256;      
+         epzs_scale[0][j][i] = (mv_scale[j][0][i] * prescale + 128) >> 8;
+         epzs_scale[0][j + 1][i] = prescale - 256;      
+         if (listXsize[j]>1)
+         {
+           iTRp = Clip3 (-128, 127, listX[list + j][1]->poc - listX[LIST_0 + j][i]->poc);      
+           if (iTRp != 0)
+           {
+             prescale = (16384 + abs (iTRp / 2)) / iTRp;
+             prescale = Clip3 (-2048, 2047, (iTRb * prescale + 32) >> 6);        
+             //prescale = (iTRb * prescale + 32) >> 6;
+           }
+           else			// This could not happen but lets use it for case that reference is removed.
+             prescale = 256;      
+           epzs_scale[1][j][i] = (mv_scale[j][1][i] * prescale + 128) >> 8;
+           epzs_scale[1][j + 1][i] = prescale - 256;      
+         }
+         else
+         {
+           epzs_scale[1][j][i] = epzs_scale[0][j][i];
+           epzs_scale[1][j + 1][i] = epzs_scale[0][j + 1][i];
+         }
+       }
+     }  
+     if (img->MbaffFrameFlag)
+     {
+       fs_top = listX[list + 2][0];
+       fs_bottom = listX[list + 4][0];
+       if (listXsize[0]> 1)
+       {
+         fs_top1 = listX[list + 2][1];
+         fs_bottom = listX[list + 4][1];
+       }
+     }
+     else
+     {
+       if (img->structure != FRAME)
+       {
+         if ((img->structure != fs->structure) && (fs->coded_frame))
+         {
+           if (img->structure == TOP_FIELD)
+           {
+             fs_top = fs_bottom = fs = listX[list][0]->top_field;
+             fs_top1 = fs_bottom1 = fs1 = listX[list][0]->bottom_field;
+           }
+           else
+           {
+             fs_top = fs_bottom = fs = listX[list][0]->bottom_field;
+             fs_top1 = fs_bottom1 = fs1 = listX[list][0]->top_field;
+           }
+         }
+       }
+     }
+     
+     //if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)
+     if (!active_sps->frame_mbs_only_flag)
+     {
+       for (j = 0; j < fs->size_y >> 2; j++)
+       {
+         jj = j / 2;
+         jdiv = j / 2 + 4 * (j / 8);
+         for (i = 0; i < fs->size_x >> 2; i++)
+         {
+           if (img->MbaffFrameFlag && fs->field_frame[j][i])
+           {
+             //! Assign frame buffers for field MBs   
+             //! Check whether we should use top or bottom field mvs.
+             //! Depending on the assigned poc values.                
+             if (abs (enc_picture->poc - fs_bottom->poc) > abs (enc_picture->poc - fs_top->poc))
+             {
+               tempmv_scale[LIST_0] = 256;
+               tempmv_scale[LIST_1] = 0;
+               
+               if (fs->ref_id [LIST_0][jdiv][i] < 0 && listXsize[LIST_0] > 1)
+               {
+                 fsx = fs_top1;
+                 loffset = 1;
+               }
+               else 
+               {
+                 fsx = fs_top;
+                 loffset = 0;
+               }             
+               
+               if (fs->ref_id [LIST_0][jdiv][i] != -1)
+               {
+                 for (iref=0;iref<min(img->num_ref_idx_l0_active,listXsize[LIST_0]);iref++)
+                 {
+                   if (enc_picture->ref_pic_num[LIST_0][iref]==fs->ref_id [LIST_0][jdiv][i])
+                   {
+                     tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0][iref];
+                     tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1][iref];
+                     break;
+                   }
+                 }
+                 p->mv[LIST_0][j][i][0] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][jj][i][0] + 32) >> 6;
+                 p->mv[LIST_0][j][i][1] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][jj][i][1] + 32) >> 6;
+                 p->mv[LIST_1][j][i][0] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][jj][i][0] + 32) >> 6;
+                 p->mv[LIST_1][j][i][1] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][jj][i][1] + 32) >> 6;
+               }
+               else
+               {
+                 p->mv[LIST_0][j][i][0] = 0;
+                 p->mv[LIST_0][j][i][1] = 0;
+                 p->mv[LIST_1][j][i][0] = 0;
+                 p->mv[LIST_1][j][i][1] = 0;        
+               }          
+               
+             }
+             else
+             {
+               tempmv_scale[LIST_0] = 256;
+               tempmv_scale[LIST_1] = 0;
+               
+               if (fs->ref_id [LIST_0][jdiv + 4][i] < 0 && listXsize[LIST_0] > 1)
+               {
+                 fsx = fs_bottom1;
+                 loffset = 1;
+               }
+               else 
+               {
+                 fsx = fs_bottom;
+                 loffset = 0;
+               }             
+               
+               if (fs->ref_id [LIST_0][jdiv + 4][i] != -1)
+               {
+                 for (iref=0;iref<min(img->num_ref_idx_l0_active,listXsize[LIST_0]);iref++)
+                 {
+                   if (enc_picture->ref_pic_num[LIST_0][iref]==fs->ref_id [LIST_0][jdiv + 4][i])
+                   {
+                     tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0][iref];
+                     tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1][iref];
+                     break;
+                   }
+                 }
+                 p->mv[LIST_0][j][i][0] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][jj][i][0] + 32) >> 6;
+                 p->mv[LIST_0][j][i][1] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][jj][i][1] + 32) >> 6;
+                 p->mv[LIST_1][j][i][0] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][jj][i][0] + 32) >> 6;
+                 p->mv[LIST_1][j][i][1] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][jj][i][1] + 32) >> 6;        
+               }
+               else
+               {
+                 p->mv[LIST_0][j][i][0] = 0;
+                 p->mv[LIST_0][j][i][1] = 0;
+                 p->mv[LIST_1][j][i][0] = 0;
+                 p->mv[LIST_1][j][i][1] = 0;        
+               }           
+             }
+           }
+           else
+           {          
+             tempmv_scale[LIST_0] = 256;
+             tempmv_scale[LIST_1] = 0;
+             if (fs->ref_id [LIST_0][j][i] < 0 && listXsize[LIST_0] > 1)
+             {
+               fsx = fs1;
+               loffset = 1;
+             }
+             else 
+             {
+               fsx = fs;
+               loffset = 0;
+             }             
+             
+             if (fsx->ref_id [LIST_0][j][i] != -1)
+             {
+               for (iref=0;iref<min(img->num_ref_idx_l0_active,listXsize[LIST_0]);iref++)
+               {
+                 if (enc_picture->ref_pic_num[LIST_0][iref]==fsx->ref_id [LIST_0][j][i])
+                 {
+                   tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0][iref];
+                   tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1][iref];
+                   break;
+                 }
+               }
+               p->mv[LIST_0][j][i][0] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][0] + 32) >> 6;
+               p->mv[LIST_0][j][i][1] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][1] + 32) >> 6;
+               p->mv[LIST_1][j][i][0] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][0] + 32) >> 6;
+               p->mv[LIST_1][j][i][1] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][1] + 32) >> 6;        
+             }
+             else
+             {
+               p->mv[LIST_0][j][i][0] = 0;
+               p->mv[LIST_0][j][i][1] = 0;
+               p->mv[LIST_1][j][i][0] = 0;
+               p->mv[LIST_1][j][i][1] = 0;        
+             }          
+           }
+         }
+       }
+     }
+     
+     //! Generate field MVs from Frame MVs  
+     if (img->structure || img->MbaffFrameFlag)
+     {
+       for (j = 0; j < fs->size_y / 8; j++)
+       {
+         for (i = 0; i < fs->size_x / 4; i++)
+         {
+           if (!img->MbaffFrameFlag)
+           {
+             tempmv_scale[LIST_0] = 256;
+             tempmv_scale[LIST_1] = 0;
+             
+             if (fs->ref_id [LIST_0][j][i] < 0 && listXsize[LIST_0] > 1)
+             {
+               fsx = fs1;
+               loffset = 1;
+             }
+             else 
+             {
+               fsx = fs;
+               loffset = 0;
+             }             
+             
+             if (fsx->ref_id [LIST_0][j][i] != -1)
+             {
+               for (iref=0;iref<min(img->num_ref_idx_l0_active,listXsize[LIST_0]);iref++)
+               {
+                 if (enc_picture->ref_pic_num[LIST_0][iref]==fsx->ref_id [LIST_0][j][i])
+                 {
+                   tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0][iref];
+                   tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1][iref];
+                   break;
+                 }
+               }
+               p->mv[LIST_0][j][i][0] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][0] + 32) >> 6;
+               p->mv[LIST_0][j][i][1] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][1] + 32) >> 6;
+               p->mv[LIST_1][j][i][0] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][0] + 32) >> 6;
+               p->mv[LIST_1][j][i][1] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][1] + 32) >> 6;                  
+               
+             }
+             else
+             {
+               p->mv[LIST_0][j][i][0] = 0;
+               p->mv[LIST_0][j][i][1] = 0;
+               p->mv[LIST_1][j][i][0] = 0;
+               p->mv[LIST_1][j][i][1] = 0;                  
+             }          
+           }
+           else
+           {
+             tempmv_scale[LIST_0] = 256;
+             tempmv_scale[LIST_1] = 0;
+             
+             if (fs_bottom->ref_id [LIST_0][j][i] < 0 && listXsize[LIST_0] > 1)
+             {
+               fsx = fs_bottom1;
+               loffset = 1;
+             }
+             else 
+             {
+               fsx = fs_bottom;
+               loffset = 0;
+             }             
+             
+             if (fsx->ref_id [LIST_0][j][i] != -1)
+             {
+               for (iref=0;iref<min(2*img->num_ref_idx_l0_active,listXsize[LIST_0 + 4]);iref++)
+               {
+                 if (enc_picture->ref_pic_num[LIST_0 + 4][iref]==fsx->ref_id [LIST_0][j][i])
+                 {
+                   tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0 + 4][iref];
+                   tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1 + 4][iref];
+                   break;
+                 }
+               }
+               p->bottom_mv[LIST_0][j][i][0] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][0] + 32) >> 6;
+               p->bottom_mv[LIST_0][j][i][1] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][1] + 32) >> 6;
+               p->bottom_mv[LIST_1][j][i][0] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][0] + 32) >> 6;
+               p->bottom_mv[LIST_1][j][i][1] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][1] + 32) >> 6;                              
+             }
+             else
+             {
+               p->bottom_mv[LIST_0][j][i][0] = 0;
+               p->bottom_mv[LIST_0][j][i][1] = 0;
+               p->bottom_mv[LIST_1][j][i][0] = 0;
+               p->bottom_mv[LIST_1][j][i][1] = 0;                  
+             }
+             
+             if (!fs->field_frame[2 * j][i])
+             {
+               p->bottom_mv[LIST_0][j][i][1] = (p->bottom_mv[LIST_0][j][i][1] + 1) >> 1;
+               p->bottom_mv[LIST_1][j][i][1] = (p->bottom_mv[LIST_1][j][i][1] + 1) >> 1;
+             }
+             
+             tempmv_scale[LIST_0] = 256;
+             tempmv_scale[LIST_1] = 0;
+             
+             if (fs_top->ref_id [LIST_0][j][i] < 0 && listXsize[LIST_0] > 1)
+             {
+               fsx = fs_top1;
+               loffset = 1;
+             }
+             else 
+             {
+               fsx = fs_top;
+               loffset = 0;
+             }             
+             
+             if (fsx->ref_id [LIST_0][j][i] != -1)
+             {
+               for (iref=0;iref<min(2*img->num_ref_idx_l0_active,listXsize[LIST_0 + 2]);iref++)
+               {
+                 if (enc_picture->ref_pic_num[LIST_0 + 2][iref]==fsx->ref_id [LIST_0][j][i])
+                 {
+                   tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0 + 2][iref];
+                   tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1 + 2][iref];
+                   break;
+                 }
+               }
+               p->top_mv[LIST_0][j][i][0] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][0] + 32) >> 6;
+               p->top_mv[LIST_0][j][i][1] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][1] + 32) >> 6;
+               p->top_mv[LIST_1][j][i][0] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][0] + 32) >> 6;
+               p->top_mv[LIST_1][j][i][1] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][1] + 32) >> 6;                  
+             }
+             else
+             {
+               p->top_mv[LIST_0][j][i][0] = 0;
+               p->top_mv[LIST_0][j][i][1] = 0;
+               p->top_mv[LIST_1][j][i][0] = 0;
+               p->top_mv[LIST_1][j][i][1] = 0;                  
+             }
+             
+             if (!fs->field_frame[2 * j][i])
+             {
+               p->top_mv[LIST_0][j][i][1] = (p->top_mv[LIST_0][j][i][1] + 1) >> 1;
+               p->top_mv[LIST_1][j][i][1] = (p->top_mv[LIST_1][j][i][1] + 1) >> 1;
+             }
+           }
+         }
+       }
+     }
+     
+     //if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)
+     if (!active_sps->frame_mbs_only_flag )
+     {
+       //! Use inference flag to remap mvs/references
+       //! Frame with field co-located    
+       if (!img->structure)
+       {
+         for (j = 0; j < fs->size_y >> 2; j++)
+         {
+           jj = j>>1;
+           jdiv = (j>>1) + ((j>>3) << 2);
+           for (i = 0; i < fs->size_x >> 2; i++)
+           {        
+             if (fs->field_frame[j][i])
+             {
+               tempmv_scale[LIST_0] = 256;
+               tempmv_scale[LIST_1] = 0;
+               
+               if (fs->ref_id [LIST_0][jdiv][i] < 0 && listXsize[LIST_0] > 1)
+               {
+                 fsx = fs1;
+                 loffset = 1;
+               }
+               else 
+               {
+                 fsx = fs;
+                 loffset = 0;
+               }             
+               if (fsx->ref_id [LIST_0][jdiv][i] != -1)
+               {
+                 for (iref=0;iref<min(img->num_ref_idx_l0_active,listXsize[LIST_0]);iref++)
+                 {
+                   if (enc_picture->ref_pic_num[LIST_0][iref]==fsx->ref_id [LIST_0][jdiv][i])
+                   {
+                     tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0][iref];
+                     tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1][iref];
+                     break;
+                   }
+                 }
+                 
+                 if (abs (enc_picture->poc - fsx->bottom_field->poc) > abs (enc_picture->poc - fsx->top_field->poc))
+                 {              
+                   p->mv[LIST_0][j][i][0] = (tempmv_scale[LIST_0] * fsx->top_field->mv[LIST_0][jj][i][0] + 32) >> 6;
+                   p->mv[LIST_0][j][i][1] = (tempmv_scale[LIST_0] * fsx->top_field->mv[LIST_0][jj][i][1] + 32) >> 6;
+                   p->mv[LIST_1][j][i][0] = (tempmv_scale[LIST_1] * fsx->top_field->mv[LIST_0][jj][i][0] + 32) >> 6;
+                   p->mv[LIST_1][j][i][1] = (tempmv_scale[LIST_1] * fsx->top_field->mv[LIST_0][jj][i][1] + 32) >> 6;        
+                 }
+                 else
+                 {
+                   p->mv[LIST_0][j][i][0] = (tempmv_scale[LIST_0] * fsx->bottom_field->mv[LIST_0][jj][i][0] + 32) >> 6;
+                   p->mv[LIST_0][j][i][1] = (tempmv_scale[LIST_0] * fsx->bottom_field->mv[LIST_0][jj][i][1] + 32) >> 6;
+                   p->mv[LIST_1][j][i][0] = (tempmv_scale[LIST_1] * fsx->bottom_field->mv[LIST_0][jj][i][0] + 32) >> 6;
+                   p->mv[LIST_1][j][i][1] = (tempmv_scale[LIST_1] * fsx->bottom_field->mv[LIST_0][jj][i][1] + 32) >> 6;        
+                 }
+               }
+               else
+               {              
+                 p->mv[LIST_0][j][i][0] = 0;
+                 p->mv[LIST_0][j][i][1] = 0;
+                 p->mv[LIST_1][j][i][0] = 0;
+                 p->mv[LIST_1][j][i][1] = 0;
+               }
+             }
+           }
+         }
+       }
+     }
+     
+     if (active_sps->frame_mbs_only_flag)
+     {
+       for (j = 0; j < fs->size_y >> 2; j++)
+       {
+         for (i = 0; i < fs->size_x >> 2; i++)
+         {
+           tempmv_scale[LIST_0] = 256;
+           tempmv_scale[LIST_1] = 0;
+           if (fs->ref_id [LIST_0][j][i] < 0 && listXsize[LIST_0] > 1)
+           {
+             fsx = fs1;
+             loffset = 1;
+           }
+           else 
+           {
+             fsx = fs;
+             loffset = 0;
+           }             
+           if (fsx->ref_id [LIST_0][j][i] != -1)
+           {
+             for (iref=0;iref<min(img->num_ref_idx_l0_active,listXsize[LIST_0]);iref++)
+             {
+               if (enc_picture->ref_pic_num[LIST_0][iref]==fsx->ref_id [LIST_0][j][i])
+               {
+                 tempmv_scale[LIST_0] = epzs_scale[loffset][LIST_0][iref];
+                 tempmv_scale[LIST_1] = epzs_scale[loffset][LIST_1][iref];
+                 break;
+               }
+             }
+             p->mv[LIST_0][j][i][0] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][0] + 32) >> 6;
+             p->mv[LIST_0][j][i][1] = (tempmv_scale[LIST_0] * fsx->mv[LIST_0][j][i][1] + 32) >> 6;
+             p->mv[LIST_1][j][i][0] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][0] + 32) >> 6;
+             p->mv[LIST_1][j][i][1] = (tempmv_scale[LIST_1] * fsx->mv[LIST_0][j][i][1] + 32) >> 6;                
+           }
+           else
+           {
+             p->mv[LIST_0][j][i][0] = 0;
+             p->mv[LIST_0][j][i][1] = 0;
+             p->mv[LIST_1][j][i][0] = 0;
+             p->mv[LIST_1][j][i][1] = 0;
+           }
+         }
+       }
+     }
+     
+     if (!active_sps->frame_mbs_only_flag)
+     {
+       for (j = 0; j < fs->size_y >> 2; j++)
+       {
+         for (i = 0; i < fs->size_x >> 2; i++)
+         {
+           if ((!img->MbaffFrameFlag && !img->structure && fs->field_frame[j][i]) || (img->MbaffFrameFlag && fs->field_frame[j][i]))
+           {
+             p->mv[LIST_0][j][i][1] *= 2;        
+             p->mv[LIST_1][j][i][1] *= 2;
+           }
+           else if (img->structure && !fs->field_frame[j][i])
+           {
+             p->mv[LIST_0][j][i][1] = (p->mv[LIST_0][j][i][1] + 1) >> 1;
+             p->mv[LIST_1][j][i][1] = (p->mv[LIST_1][j][i][1] + 1) >> 1;
+           }
+         }
+       }
+     }
+   }
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    Spatial Predictors
+ *    AMT/HYC
+ ***********************************************************************
+ */
+ static short EPZSSpatialPredictors (PixelPos block_a, 
+                                     PixelPos block_b,
+                                     PixelPos block_c, 
+                                     PixelPos block_d,
+                                     int list, 
+                                     int list_offset, 
+                                     short ref,
+                                     char **refPic, 
+                                     short ***tmp_mv,
+                                     EPZSStructure * predictor)
+ {
+   int refA, refB, refC, refD;
+   int *mot_scale = mv_scale[list + list_offset][ref];
+   
+   // zero predictor
+   predictor->point[0].x = 0;
+   predictor->point[0].y = 0;
+   
+   // Non MB-AFF mode
+   if (!img->MbaffFrameFlag)
+   {
+     refA = block_a.available ? (int) refPic[block_a.pos_y][block_a.pos_x] : -1;
+     refB = block_b.available ? (int) refPic[block_b.pos_y][block_b.pos_x] : -1;
+     refC = block_c.available ? (int) refPic[block_c.pos_y][block_c.pos_x] : -1;
+     refD = block_d.available ? (int) refPic[block_d.pos_y][block_d.pos_x] : -1;
+     
+     // Left Predictor
+     predictor->point[1].x = (block_a.available) 
+       ? (mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][0] + 512) >> 10 :  3;
+     predictor->point[1].y = (block_a.available) 
+       ? (mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][1] + 512) >> 10 :  0;
+     
+     // Up predictor
+     predictor->point[2].x = (block_b.available) 
+       ? (mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][0] + 512) >> 10 :  0;
+     predictor->point[2].y = (block_b.available) 
+       ? (mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][1] + 512) >> 10 :  3;
+     
+     // Up-Right predictor
+     predictor->point[3].x = (block_c.available) 
+       ? (mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][0] + 512) >> 10 : -3;
+     predictor->point[3].y = (block_c.available) 
+       ? (mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][1] + 512) >> 10 :  0;
+     
+     //Up-Left predictor
+     predictor->point[4].x = (block_d.available) 
+       ? (mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][0] + 512) >> 10 :  0;
+     predictor->point[4].y = (block_d.available) 
+       ? (mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][1] + 512) >> 10 : -3;    
+   }
+   else  // MB-AFF mode
+   {    
+     // Field Macroblock
+     if (list_offset)
+     {
+       refA = block_a.available 
+         ? img->mb_data[block_a.mb_addr].mb_field 
+         ? (int) refPic[block_a.pos_y][block_a.pos_x] 
+         : (int) refPic[block_a.pos_y][block_a.pos_x] * 2 : -1;
+       refB =block_b.available 
+         ? img->mb_data[block_b.mb_addr].mb_field 
+         ? (int) refPic[block_b.pos_y][block_b.pos_x]
+         : (int) refPic[block_b.pos_y][block_b.pos_x] * 2 : -1;
+       refC = block_c.available 
+         ? img->mb_data[block_c.mb_addr].mb_field 
+         ? (int) refPic[block_c.pos_y][block_c.pos_x] 
+         : (int) refPic[block_c.pos_y][block_c.pos_x] * 2 : -1;
+       refD = block_d.available 
+         ? img->mb_data[block_d.mb_addr].mb_field 
+         ? (int) refPic[block_d.pos_y][block_d.pos_x] 
+         : (int) refPic[block_d.pos_y][block_d.pos_x] * 2 : -1;
+       
+       // Left Predictor
+       predictor->point[1].x = (block_a.available) 
+         ? (mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][0] + 512 ) >> 10 :  3;
+       predictor->point[1].y = (block_a.available) 
+         ? img->mb_data[block_a.mb_addr].mb_field 
+         ? (mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][1] + 512 ) >> 10 
+         : (mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][1] + 1024) >> 11 :  0;
+       
+       // Up predictor
+       predictor->point[2].x = (block_b.available) 
+         ? (mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][0] + 512 ) >> 10 :  0;
+       predictor->point[2].y = (block_b.available) 
+         ? img->mb_data[block_b.mb_addr].mb_field 
+         ? (mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][1] + 512 ) >> 10 
+         : (mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][1] + 1024) >> 11 :  3;
+       
+       // Up-Right predictor
+       predictor->point[3].x = (block_c.available) 
+         ? (mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][0] + 512 ) >> 10 : -3;
+       predictor->point[3].y = (block_c.available) 
+         ? img->mb_data[block_c.mb_addr].mb_field 
+         ? (mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][1] + 512 ) >> 10 
+         : (mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][1] + 1024) >> 11 :  0;
+       
+       //Up-Left predictor
+       predictor->point[4].x = (block_d.available) 
+         ? (mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][0] + 512 ) >> 10 :  0;
+       predictor->point[4].y = (block_d.available) 
+         ? img->mb_data[block_d.mb_addr].mb_field 
+         ? (mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][1] + 512 ) >> 10 
+         : (mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][1] + 1024) >> 11 : -3;
+     }
+     else // Frame macroblock
+     {
+       refA = block_a.available 
+         ? img->mb_data[block_a.mb_addr].mb_field 
+         ? (int) refPic[block_a.pos_y][block_a.pos_x] >> 1
+         : (int) refPic[block_a.pos_y][block_a.pos_x] : -1;
+       refB = block_b.available 
+         ? img->mb_data[block_b.mb_addr].mb_field 
+         ? (int) refPic[block_b.pos_y][block_b.pos_x] >> 1 
+         : (int) refPic[block_b.pos_y][block_b.pos_x] : -1;
+       refC = block_c.available 
+         ? img->mb_data[block_c.mb_addr].mb_field 
+         ? (int) refPic[block_c.pos_y][block_c.pos_x] >> 1 
+         : (int) refPic[block_c.pos_y][block_c.pos_x] : -1;
+       refD = block_d.available 
+         ? img->mb_data[block_d.mb_addr].mb_field 
+         ? (int) refPic[block_d.pos_y][block_d.pos_x] >> 1 
+         : (int) refPic[block_d.pos_y][block_d.pos_x] : -1;
+       
+       // Left Predictor
+       predictor->point[1].x = (block_a.available) 
+         ? (mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][0] + 512) >> 10 :  3;
+       predictor->point[1].y = (block_a.available) 
+         ? img->mb_data[block_a.mb_addr].mb_field 
+         ? (mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][1] + 256) >> 9  
+         : (mot_scale[refA] * tmp_mv[block_a.pos_y][block_a.pos_x][1] + 512) >> 10 :  0;
+       
+       // Up predictor
+       predictor->point[2].x = (block_b.available) 
+         ? (mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][0] + 512) >> 10 :  0;
+       predictor->point[2].y = (block_b.available) 
+         ? img->mb_data[block_b.mb_addr].mb_field 
+         ? (mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][1] + 256) >> 9  
+         : (mot_scale[refB] * tmp_mv[block_b.pos_y][block_b.pos_x][1] + 512) >> 10 :  3;
+       
+       // Up-Right predictor
+       predictor->point[3].x = (block_c.available) 
+         ? (mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][0] + 512) >> 10 : -3;
+       predictor->point[3].y = (block_c.available) 
+         ? img->mb_data[block_c.mb_addr].mb_field 
+         ? (mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][1] + 256) >> 9  
+         : (mot_scale[refC] * tmp_mv[block_c.pos_y][block_c.pos_x][1] + 512) >> 10 :  3;
+       
+       //Up-Left predictor
+       predictor->point[4].x = (block_d.available) 
+         ? (mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][0] + 512) >> 10 :  0;
+       predictor->point[4].y = (block_d.available) 
+         ? img->mb_data[block_d.mb_addr].mb_field 
+         ? (mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][1] + 256) >>  9 
+         : (mot_scale[refD] * tmp_mv[block_d.pos_y][block_d.pos_x][1] + 512) >> 10 : -3;
+     }
+   }
+   
+   return ((refA == -1) + (refB == -1) + (refC == -1 && refD == -1));
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    Spatial Predictors
+ *    AMT/HYC
+ ***********************************************************************
+ */
+ 
+ static void EPZSSpatialMemPredictors (int list, 
+                                       short ref,
+                                       int blocktype,
+                                       int pic_x, 
+                                       int bs_x, 
+                                       int bs_y, 
+                                       int by,
+                                       int *prednum,
+                                       int img_width,
+                                       EPZSStructure * predictor)
+ {
+ #if EPZSREF
+   short ***mv = EPZSMotion[list][ref][blocktype];  
+   
+   // Left Predictor
+   predictor->point[*prednum].x = (pic_x > 0) ? mv[by][pic_x - bs_x][0] :  0;
+   predictor->point[*prednum].y = (pic_x > 0) ? mv[by][pic_x - bs_x][1] :  0;
+   *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+   
+   // Up predictor
+   predictor->point[*prednum].x = (by > 0) ? mv[by - bs_y][pic_x][0] : mv[4 - bs_y][pic_x][0] ;
+   predictor->point[*prednum].y = (by > 0) ? mv[by - bs_y][pic_x][1] : mv[4 - bs_y][pic_x][1];
+   *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+   
+   // Up-Right predictor
+   predictor->point[*prednum].x = (pic_x + bs_x < img_width) ? (by > 0) 
+     ? mv[by - bs_y][pic_x + bs_x][0] : mv[4 - bs_y][pic_x + bs_x][0] : 0;
+   predictor->point[*prednum].y = (pic_x + bs_x < img_width) ? (by > 0) 
+     ? mv[by - bs_y][pic_x + bs_x][1] : mv[4 - bs_y][pic_x + bs_x][1] : 0;
+   *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+   
+ #else
+   int mot_scale = mv_scale[list][ref][0];
+   short **mv = EPZSMotion[list][blocktype];  
+   
+   // Left Predictor
+   predictor->point[*prednum].x = (pic_x > 0) 
+     ? (mot_scale * mv[by][pic_x - bs_x][0] + 128) >> 8 
+     : 0;
+   predictor->point[*prednum].y = (pic_x > 0) 
+     ? (mot_scale * mv[by][pic_x - bs_x][1] + 128) >> 8 
+     : 0;
+   *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+   
+   // Up predictor
+   predictor->point[*prednum].x = (by > 0) 
+     ? (mot_scale * mv[by - bs_y][pic_x][0] + 128) >> 8
+     : (mot_scale * mv[4  - bs_y][pic_x][0] + 128) >> 8;
+   predictor->point[*prednum].y = (by > 0) 
+     ? (mot_scale * mv[by - bs_y][pic_x][1] + 128) >> 8
+     : (mot_scale * mv[4  - bs_y][pic_x][0] + 128) >> 8;
+   *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+   
+   // Up-Right predictor
+   predictor->point[*prednum].x = (pic_x + bs_x < img_width) 
+     ? (by > 0) 
+     ? (mot_scale * mv[by - bs_y][pic_x + bs_x][0] + 128) >> 8
+     : (mot_scale * mv[4  - bs_y][pic_x + bs_x][0] + 128) >> 8
+     : 0;
+   predictor->point[*prednum].y = (pic_x + bs_x < img_width) 
+     ? (by > 0) 
+     ? (mot_scale * mv[by - bs_y][pic_x + bs_x][1] + 128) >> 8
+     : (mot_scale * mv[4  - bs_y][pic_x + bs_x][1] + 128) >> 8
+     : 0;
+   *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+ #endif
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    Temporal Predictors
+ *    AMT/HYC
+ ***********************************************************************
+ */
+ static void
+ EPZSTemporalPredictors (int list,         // <--  current list
+                         int list_offset,  // <--  list offset for MBAFF
+                         short ref,        // <--  current reference frame
+                         int o_block_x,	// <--  absolute x-coordinate of regarded AxB block
+                         int o_block_y,	// <--  absolute y-coordinate of regarded AxB block
+                         EPZSStructure * predictor, 
+                         int *prednum,
+                         int block_available_left, 
+                         int block_available_up,
+                         int block_available_right, 
+                         int block_available_below,
+                         int blockshape_x, 
+                         int blockshape_y,
+                         int stopCriterion, 
+                         int min_mcost)
+ {  
+   int mvScale = mv_scale[list + list_offset][ref][0];
+   short ***col_mv = (list_offset == 0) ? EPZSCo_located->mv[list]
+     : (list_offset == 2) ? EPZSCo_located->top_mv[list] : EPZSCo_located->bottom_mv[list];
+   
+   predictor->point[*prednum].x = (mvScale * col_mv[o_block_y][o_block_x][0] + 2048) >> 12;
+   predictor->point[*prednum].y = (mvScale * col_mv[o_block_y][o_block_x][1] + 2048) >> 12;
+   *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+   
+   if (min_mcost > stopCriterion && ref < 2)
+   {
+     if (block_available_left)
+     {    
+       predictor->point[*prednum].x = (mvScale * col_mv[o_block_y][o_block_x - 1][0] + 2048) >> 12;
+       predictor->point[*prednum].y = (mvScale * col_mv[o_block_y][o_block_x - 1][1] + 2048) >> 12;
+       *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+       
+       //Up_Left
+       if (block_available_up)
+       {
+         predictor->point[*prednum].x = (mvScale * col_mv[o_block_y - 1][o_block_x - 1][0] + 2048) >> 12;
+         predictor->point[*prednum].y = (mvScale * col_mv[o_block_y - 1][o_block_x - 1][1] + 2048) >> 12;
+         *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));;
+       }
+       //Down_Left
+       if (block_available_below)
+       {
+         predictor->point[*prednum].x = (mvScale * col_mv[o_block_y + blockshape_y][o_block_x - 1][0] + 2048) >> 12;
+         predictor->point[*prednum].y = (mvScale * col_mv[o_block_y + blockshape_y][o_block_x - 1][1] + 2048) >> 12;
+         *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+       }
+     }
+     // Up
+     if (block_available_up)
+     {
+       predictor->point[*prednum].x = (mvScale * col_mv[o_block_y - 1][o_block_x][0] + 2048) >> 12;
+       predictor->point[*prednum].y = (mvScale * col_mv[o_block_y - 1][o_block_x][1] + 2048) >> 12;
+       *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+     }
+     
+     // Up - Right
+     if (block_available_right)
+     {
+       predictor->point[*prednum].x = (mvScale * col_mv[o_block_y][o_block_x + blockshape_x][0] + 2048) >> 12;
+       predictor->point[*prednum].y = (mvScale * col_mv[o_block_y][o_block_x + blockshape_x][1] + 2048) >> 12;
+       *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+       
+       if (block_available_up)
+       {
+         predictor->point[*prednum].x = (mvScale * col_mv[o_block_y - 1][o_block_x + blockshape_x][0] + 2048) >> 12;
+         predictor->point[*prednum].y = (mvScale * col_mv[o_block_y - 1][o_block_x + blockshape_x][1] + 2048) >> 12;
+         *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+       }
+       if (block_available_below)
+       {
+         predictor->point[*prednum].x = (mvScale * col_mv[o_block_y + blockshape_y][o_block_x + blockshape_x][0] + 2048) >> 12;
+         predictor->point[*prednum].y = (mvScale * col_mv[o_block_y + blockshape_y][o_block_x + blockshape_x][1] + 2048) >> 12;
+         *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+       }
+     }
+     
+     if (block_available_below)
+     {
+       predictor->point[*prednum].x = (mvScale * col_mv[o_block_y + blockshape_y][o_block_x][0] + 2048) >> 12;
+       predictor->point[*prednum].y = (mvScale * col_mv[o_block_y + blockshape_y][o_block_x][1] + 2048) >> 12;
+       *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));
+     }
+   }
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    EPZS Block Type Predictors 
+ ************************************************************************
+ */
+ static void EPZSBlockTypePredictors (int block_x, int block_y, int blocktype, int ref, int list,
+                                      EPZSStructure * predictor, int *prednum)
+ {
+   short ***all_mv = img->all_mv[block_y][block_x][list];
+   
+   
+   if ((ref > 0) && (blocktype < 5 || img->structure != FRAME))
+   {
+     predictor->point[*prednum].x = (mv_scale[list][ref][ref-1] * all_mv[ref-1][blocktype][0] + 512) >> 10;
+     predictor->point[*prednum].y = (mv_scale[list][ref][ref-1] * all_mv[ref-1][blocktype][1] + 512) >> 10;
+     *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));    
+     
+     predictor->point[*prednum].x = (mv_scale[list][ref][0] * all_mv[0][blocktype][0] + 512) >> 10;
+     predictor->point[*prednum].y = (mv_scale[list][ref][0] * all_mv[0][blocktype][1] + 512) >> 10;
+     *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));        
+   }
+   
+   predictor->point[*prednum].x = (all_mv[ref][blk_parent[blocktype]][0] + 2) >> 2;
+   predictor->point[*prednum].y = (all_mv[ref][blk_parent[blocktype]][1] + 2) >> 2;
+   *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));    
+   
+   if (blocktype != 1)
+   {
+     predictor->point[*prednum].x = (all_mv[ref][1][0] + 2) >> 2;
+     predictor->point[*prednum].y = (all_mv[ref][1][1] + 2) >> 2;
+     *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));        
+   }  
+   
+   if (blocktype != 4)
+   {
+     predictor->point[*prednum].x = (all_mv[ref][4][0] + 2) >> 2;
+     predictor->point[*prednum].y = (all_mv[ref][4][1] + 2) >> 2;
+     *prednum += ((predictor->point[*prednum].x != 0) || (predictor->point[*prednum].y != 0));    
+   }
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    EPZS Window Based Predictors 
+ ************************************************************************
+ */
+ static void EPZSWindowPredictors (int mv_x, int mv_y, EPZSStructure *predictor, int *prednum, int extended)
+ {
+   int pos;
+   EPZSStructure *windowPred = (extended) ? window_predictor_extended : window_predictor;
+   
+   for (pos = 0; pos < windowPred->searchPoints; pos++)
+   {
+     predictor->point[(*prednum)  ].x = mv_x + windowPred->point[pos].x;
+     predictor->point[(*prednum)++].y = mv_y + windowPred->point[pos].y;
+   }
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    SAD computation 
+ ************************************************************************
+ */
+ static int computeSad(pel_t** cur_pic,
+                int blocksize_y,
+                int blocksize_x, 
+                int blockshape_x,
+                int mcost,
+                int min_mcost,
+                int cand_x,
+                int cand_y)
+ {
+   int y,x4;
+   
+   pel_t *cur_line, *ref_line;
+   for (y=0; y<blocksize_y; y++)
+   {
+     ref_line  = get_ref_line (blocksize_x, ref_pic, cand_y + y, cand_x, img_height, img_width);
+     cur_line = cur_pic [y];
+     
+     for (x4 = 0; x4 < blockshape_x; x4++)
+     {
+       mcost += byte_abs[ *cur_line++ - *ref_line++ ];
+       mcost += byte_abs[ *cur_line++ - *ref_line++ ];
+       mcost += byte_abs[ *cur_line++ - *ref_line++ ];
+       mcost += byte_abs[ *cur_line++ - *ref_line++ ];
+       //mcost += abs( *cur_line++ - *ref_line++ );
+       //mcost += abs( *cur_line++ - *ref_line++ );
+       //mcost += abs( *cur_line++ - *ref_line++ );
+       //mcost += abs( *cur_line++ - *ref_line++ );
+     }    
+     if (mcost >= min_mcost) break;
+   }
+   return mcost;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    BiPred SAD computation (no weights)
+ ************************************************************************
+ */
+ static int computeBiPredSad1(pel_t** cur_pic,
+                              int blocksize_y,
+                              int blocksize_x, 
+                              int blockshape_x,
+                              int mcost,
+                              int min_mcost,
+                              int cand_x1, int cand_y1, 
+                              int cand_x2, int cand_y2)
+ {
+   pel_t *cur_line, *ref1_line, *ref2_line;
+   int bi_diff; 
+   int y,x4;  
+   
+   for (y = 0; y < blocksize_y; y++)
+   {
+     ref2_line = get_ref_line2 (blocksize_x, ref_pic2, cand_y2 + y, cand_x2, img_height, img_width);
+     ref1_line = get_ref_line1 (blocksize_x, ref_pic1, cand_y1 + y, cand_x1, img_height, img_width);
+     cur_line = cur_pic [y];
+     
+     for (x4 = 0; x4 < blockshape_x; x4++)
+     {         
+       bi_diff = (*cur_line++) - ((*ref1_line++ + *ref2_line++)>>1);
+       mcost += byte_abs[bi_diff];
+       bi_diff = (*cur_line++) - ((*ref1_line++ + *ref2_line++)>>1);
+       mcost += byte_abs[bi_diff];
+       bi_diff = (*cur_line++) - ((*ref1_line++ + *ref2_line++)>>1);
+       mcost += byte_abs[bi_diff];
+       bi_diff = (*cur_line++) - ((*ref1_line++ + *ref2_line++)>>1);
+       mcost += byte_abs[bi_diff];
+     }        
+     
+     if (mcost >= min_mcost) break;
+   }
+   return mcost;
+ }
+ 
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    BiPred SAD computation (with weights)
+ ************************************************************************
+ */
+ static int computeBiPredSad2(pel_t** cur_pic,
+                              int blocksize_y,
+                              int blocksize_x, 
+                              int blockshape_x,
+                              int mcost,
+                              int min_mcost,
+                              int cand_x1, int cand_y1, 
+                              int cand_x2, int cand_y2)
+ {
+   pel_t *cur_line, *ref1_line, *ref2_line;
+   int bi_diff; 
+   int denom = luma_log_weight_denom + 1;
+   int lround = 2 * wp_luma_round;
+   int y,x4;  
+   int weightedpel, pixel1, pixel2;
+   for (y=0; y<blocksize_y; y++)
+   {
+     ref2_line  = get_ref_line2 (blocksize_x, ref_pic2, cand_y2 + y, cand_x2, img_height, img_width);
+     ref1_line  = get_ref_line1 (blocksize_x, ref_pic1, cand_y1 + y, cand_x1, img_height, img_width);
+     cur_line = cur_pic [y];
+     
+     for (x4 = 0; x4 < blockshape_x; x4++)
+     { 
+       pixel1 = weight1 * (*ref1_line++);
+       pixel2 = weight2 * (*ref2_line++);
+       weightedpel =  Clip3 (0, img->max_imgpel_value ,((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+       bi_diff = (*cur_line++)  - weightedpel;
+       mcost += byte_abs[bi_diff];
+       
+       pixel1 = weight1 * (*ref1_line++);
+       pixel2 = weight2 * (*ref2_line++);
+       weightedpel =  Clip3 (0, img->max_imgpel_value ,((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+       bi_diff = (*cur_line++)  - weightedpel;
+       mcost += byte_abs[bi_diff];
+       
+       pixel1 = weight1 * (*ref1_line++);
+       pixel2 = weight2 * (*ref2_line++);
+       weightedpel =  Clip3 (0, img->max_imgpel_value ,((pixel1 + pixel2 + lround) >> denom) + offsetBi);                     
+       bi_diff = (*cur_line++)  - weightedpel;
+       mcost += byte_abs[bi_diff];
+       
+       pixel1 = weight1 * (*ref1_line++);
+       pixel2 = weight2 * (*ref2_line++);
+       weightedpel =  Clip3 (0, img->max_imgpel_value ,((pixel1 + pixel2 + lround) >> denom) + offsetBi);
+       bi_diff = (*cur_line++)  - weightedpel;
+       mcost += byte_abs[bi_diff];
+       if (mcost >= min_mcost) break;
+     }    
+     
+     if (mcost >= min_mcost) break;
+   }
+   return mcost;
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    FAST Motion Estimation using EPZS
+ *    AMT/HYC
+ ***********************************************************************
+ */
+ int				                                    //  ==> minimum motion cost after search
+ EPZSPelBlockMotionSearch (pel_t ** cur_pic,	  // <--  original pixel values for the AxB block
+                           short ref,	        // <--  reference picture 
+                           int list,           // <--  reference list
+                           int list_offset,    // <--  offset for Mbaff
+                           char ***refPic,    // <--  reference array
+                           short ****tmp_mv,   // <--  mv array
+                           int pic_pix_x,	    // <--  absolute x-coordinate of regarded AxB block
+                           int pic_pix_y,	    // <--  absolute y-coordinate of regarded AxB block
+                           int blocktype,    	// <--  block type (1-16x16 ... 7-4x4)
+                           short pred_mv_x,	  // <--  motion vector predictor (x) in sub-pel units
+                           short pred_mv_y,	  // <--  motion vector predictor (y) in sub-pel units
+                           short *mv_x,	      // <--> in: search center (x) / out: motion vector (x) - in pel units
+                           short *mv_y,	      // <--> in: search center (y) / out: motion vector (y) - in pel units
+                           int search_range,	  // <--  1-d search range in pel units
+                           int min_mcost,	    // <--  minimum motion cost (cost for center or huge value)
+                           int lambda_factor)	    // <--  lagrangian parameter for determining motion cost
+ {
+   StorablePicture *ref_picture = listX[list+list_offset][ref];
+   short blocksize_y = input->blc_size[blocktype][1];	// vertical block size
+   short blocksize_x = input->blc_size[blocktype][0];	// horizontal block size
+   short blockshape_x = (blocksize_x >> 2);	// horizontal block size in 4-pel units
+   short blockshape_y = (blocksize_y >> 2);	// vertical block size in 4-pel units
+   
+   short mb_x = pic_pix_x - img->opix_x; 
+   short mb_y = pic_pix_y - img->opix_y;
+   short pic_pix_x2 = pic_pix_x >> 2;
+   short pic_pix_y2 = pic_pix_y >> 2;
+   short block_x = (mb_x >> 2);
+   short block_y = (mb_y >> 2);  
+    
+   int   pred_x = (pic_pix_x << 2) + pred_mv_x;	// predicted position x (in sub-pel units)
+   int   pred_y = (pic_pix_y << 2) + pred_mv_y;	// predicted position y (in sub-pel units)
+   int   center_x = pic_pix_x + *mv_x;	// center position x (in pel units)
+   int   center_y = pic_pix_y + *mv_y;	// center position y (in pel units)
+   int   cand_x = center_x;
+   int   cand_y = center_y;
+   int   tempmv_x  = *mv_x, tempmv_y  = *mv_y;
+   int   tempmv_x2 = 0, tempmv_y2 = 0;
+   int   stopCriterion = medthres[blocktype];
+   int   mapCenter_x = search_range - *mv_x;
+   int   mapCenter_y = search_range - *mv_y;
+   int   second_mcost = INT_MAX;
+   short apply_weights = (active_pps->weighted_pred_flag > 0 || active_pps->weighted_bipred_idc == 1);
+   int   *prevSad = EPZSDistortion[list + list_offset][blocktype - 1];
+   short *motion=NULL;
+   
+   short invalid_refs = 0;
+   byte  checkMedian = FALSE;  
+   EPZSStructure *searchPatternF = searchPattern;
+   ref_pic = (apply_weights && input->UseWeightedReferenceME) ? ref_picture->imgY_11_w : ref_picture->imgY_11;
+   
+   if (input->EPZSSpatialMem)
+   {
+ #if EPZSREF  
+     motion = EPZSMotion[list + list_offset][ref][blocktype - 1][block_y][pic_pix_x2];  
+ #else
+     motion = EPZSMotion[list + list_offset][blocktype - 1][block_y][pic_pix_x2];  
+ #endif
+   }
+   img_width  = ref_picture->size_x;
+   img_height = ref_picture->size_y;  
+   
+   
+   //===== set function for getting reference picture lines =====
+   get_ref_line = CHECK_RANGE ? FastLineX : UMVLineX;
+   
+   // Clear EPZSMap
+   memset(EPZSMap[0],FALSE,(2*search_range+1)*(2*search_range+1));
+   // Check median candidate;  
+   EPZSMap[search_range][search_range] = TRUE;
+   
+   //--- initialize motion cost (cost for motion vector) and check ---
+   min_mcost = MV_COST (lambda_factor, 2, cand_x, cand_y, pred_x, pred_y);  
+   
+   //--- add residual cost to motion cost ---
+   min_mcost = computeSad(cur_pic, blocksize_y,blocksize_x,
+     blockshape_x,min_mcost, INT_MAX, cand_x,cand_y);
+   // Additional threshold for ref>0  
+   if ((ref>0 && img->structure == FRAME)  
+     && (prevSad[pic_pix_x2] < medthres[blocktype]) 
+     && (prevSad[pic_pix_x2] < min_mcost))
+   {
+     return min_mcost;  
+   } 
+   
+   if ((center_x > search_range) && (center_x < img_width  - search_range - blocksize_x) &&
+     (center_y > search_range) && (center_y < img_height - search_range - blocksize_y)   )
+     get_ref_line = FastLineX;
+   else
+     get_ref_line = UMVLineX;
+   
+   //! If medthres satisfied, then terminate, otherwise generate Predictors  
+   //! Condition could be strengthened by consideration distortion of adjacent partitions.
+   if (min_mcost > stopCriterion)
+   {    
+     int mb_available_right   = (img->mb_x < (img_width  >> 4) - 1);
+     int mb_available_below   = (img->mb_y < (img_height >> 4) - 1);
+     
+     int sadA, sadB, sadC;    
+     int block_available_right;
+     int block_available_below;
+     int prednum = 5;
+     int patternStop = 0, pointNumber = 0, checkPts;
+     int totalCheckPts = 0, motionDirection = 0;
+     int conditionEPZS;
+     int mvx, mvy;
+     int pos, mcost;  
+     PixelPos block_a, block_b, block_c, block_d;
+     
+     getLuma4x4Neighbour (img->current_mb_nr, block_x, block_y, -1, 0, &block_a);
+     getLuma4x4Neighbour (img->current_mb_nr, block_x, block_y, 0, -1, &block_b);
+     getLuma4x4Neighbour (img->current_mb_nr, block_x, block_y, blocksize_x, -1, &block_c);
+     getLuma4x4Neighbour (img->current_mb_nr, block_x, block_y, -1, -1, &block_d);
+     
+     if (mb_y > 0)
+     {
+       if (mb_x < 8)		// first column of 8x8 blocks
+       {
+         if (mb_y == 8)
+         {
+           block_available_right = (blocksize_x != MB_BLOCK_SIZE) || mb_available_right;
+           if (blocksize_x == MB_BLOCK_SIZE) 
+             block_c.available = 0;
+         }
+         else
+         {
+           block_available_right = (mb_x + blocksize_x != 8) || mb_available_right;
+           if (mb_x + blocksize_x == 8) 
+             block_c.available = 0;
+         }
+       }
+       else
+       {
+         block_available_right = (mb_x + blocksize_x != MB_BLOCK_SIZE) || mb_available_right;
+         if (mb_x + blocksize_x == MB_BLOCK_SIZE) 
+           block_c.available = 0;
+       }
+     }
+     else
+     {
+       block_available_right = (mb_x + blocksize_x != MB_BLOCK_SIZE) || mb_available_right;
+     }    
+     block_available_below = (mb_y + blocksize_y != MB_BLOCK_SIZE) || (mb_available_below);    
+     
+     sadA = block_a.available ? prevSad[pic_pix_x2 - blockshape_x] : INT_MAX;
+     sadB = block_b.available ? prevSad[pic_pix_x2] : INT_MAX;
+     sadC = block_c.available ? prevSad[pic_pix_x2 + blockshape_x] : INT_MAX;
+     
+     stopCriterion = min(sadA,min(sadB,sadC));
+     stopCriterion = max(stopCriterion,minthres[blocktype]);
+     stopCriterion = min(stopCriterion,maxthres[blocktype]);
+     
+     stopCriterion = (9 * max (medthres[blocktype], stopCriterion) + 2 * medthres[blocktype]) >> 3;
+     
+     //! Add Spatial Predictors in predictor list.
+     //! Scheme adds zero, left, top-left, top, top-right. Note that top-left adds very little
+     //! in terms of performance and could be removed with little penalty if any.
+     invalid_refs = EPZSSpatialPredictors (block_a, block_b, block_c, block_d, 
+       list, list_offset, ref, refPic[list], tmp_mv[list], predictor);
+     if (input->EPZSSpatialMem)
+       EPZSSpatialMemPredictors (list + list_offset, ref, blocktype - 1, pic_pix_x2, 
+       blockshape_x, blockshape_y, block_y, &prednum, img_width>>2, predictor);
+     
+     // Temporal predictors  
+     if (input->EPZSTemporal)
+       EPZSTemporalPredictors (list, list_offset, ref, pic_pix_x2, pic_pix_y2, predictor, &prednum, 
+       block_a.available, block_b.available, block_available_right, 
+       block_available_below, blockshape_x, blockshape_y, stopCriterion, min_mcost);
+     
+     //! Window Size Based Predictors
+     //! Basically replaces a Hierarchical ME concept and helps escaping local minima, or
+     //! determining large motion variations.
+     //! Following predictors can be adjusted further (i.e. removed, conditioned etc) 
+     //! based on distortion, correlation of adjacent MVs, complexity etc. These predictors
+     //! and their conditioning could also be moved after all other predictors have been
+     //! tested. Adaptation could also be based on type of material and coding mode (i.e. 
+     //! field/frame coding,MBAFF etc considering the higher dependency with opposite parity field
+     //conditionEPZS = ((min_mcost > stopCriterion) 
+     // && (input->EPZSFixed > 1 || (input->EPZSFixed && img->type == P_SLICE)));
+     //conditionEPZS = ((ref == 0) && (blocktype < 5) && (min_mcost > stopCriterion) 
+     //&& (input->EPZSFixed > 1 || (input->EPZSFixed && img->type == P_SLICE)));
+     conditionEPZS = ((min_mcost > stopCriterion) && ((ref < 2 && blocktype < 5) 
+       || ((img->structure!=FRAME || list_offset) && ref < 3)) 
+       && (input->EPZSFixed > 1 || (input->EPZSFixed && img->type == P_SLICE))); 
+     
+     if (conditionEPZS)
+       EPZSWindowPredictors ((int) *mv_x, (int) *mv_y, predictor, &prednum, 
+       (blocktype < 5) && (invalid_refs > 2) && (ref < 1 + (img->structure!=FRAME || list_offset)));
+     
+     //! Blocktype/Reference dependent predictors. 
+     //! Since already mvs for other blocktypes/references have been computed, we can reuse
+     //! them in order to easier determine the optimal point. Use of predictors could depend
+     //! on cost,
+     //conditionEPZS = (ref == 0 || (ref > 0 && min_mcost > stopCriterion) || img->structure != FRAME || list_offset);
+     conditionEPZS = (ref == 0 || (ref > 0 && min_mcost > stopCriterion));
+     
+     if (conditionEPZS)
+       EPZSBlockTypePredictors (block_x, block_y, blocktype, ref, list, predictor, &prednum);
+     
+     //! Check all predictors          
+     for (pos = 0; pos < prednum; pos++)
+     {
+       mvx = predictor->point[pos].x;
+       mvy = predictor->point[pos].y;
+       //if ((abs (mvx - *mv_x) > search_range || abs (mvy - *mv_y) > search_range) && (mvx || mvy))
+       if (abs (mvx - *mv_x) > search_range || abs (mvy - *mv_y) > search_range)
+         continue;
+       
+       if ((abs (mvx - *mv_x) <= search_range) && (abs (mvy - *mv_y) <= search_range))
+       {
+         if (EPZSMap[mapCenter_y + mvy][mapCenter_x + mvx] == TRUE)
+           continue;
+         else
+           EPZSMap[mapCenter_y + mvy][mapCenter_x + mvx] = TRUE;
+       }
+       
+       cand_x = pic_pix_x + mvx;
+       cand_y = pic_pix_y + mvy;
+       
+       //--- set motion cost (cost for motion vector) and check ---
+       mcost = MV_COST (lambda_factor, 2, cand_x, cand_y, pred_x, pred_y);
+       
+       if (mcost >= second_mcost) continue;            
+       get_ref_line = CHECK_RANGE ? FastLineX : UMVLineX;      
+       
+       mcost = computeSad(cur_pic, blocksize_y,blocksize_x, 
+         blockshape_x, mcost, second_mcost, cand_x, cand_y);
+       
+       //--- check if motion cost is less than minimum cost ---
+       if (mcost < min_mcost)
+       {
+         tempmv_x2 = tempmv_x;
+         tempmv_y2 = tempmv_y;
+         second_mcost = min_mcost;
+         tempmv_x = mvx;
+         tempmv_y = mvy;
+         min_mcost = mcost;
+         checkMedian = TRUE;
+       }
+       //else if (mcost < second_mcost && (tempmv_x != mvx || tempmv_y != mvy))
+       else if (mcost < second_mcost)
+       {
+         tempmv_x2 = mvx;
+         tempmv_y2 = mvy;
+         second_mcost = mcost;
+         checkMedian = TRUE;
+       }
+     }
+     
+     //! Refine using EPZS pattern if needed
+     //! Note that we are using a conservative threshold method. Threshold 
+     //! could be tested after checking only a certain number of predictors
+     //! instead of the full set. Code could be easily modified for this task.
+     if (min_mcost > stopCriterion)
+     {
+       //! Adapt pattern based on different conditions.
+       if (input->EPZSPattern != 0)
+       {
+         if ((min_mcost < stopCriterion + ((3 * medthres[blocktype]) >> 1)))
+         {
+           if ((tempmv_x == 0 && tempmv_y == 0) 
+             || (abs (tempmv_x - *mv_x) < 2 && abs (tempmv_y - *mv_y) < 2))
+             searchPatternF = sdiamond;
+           else
+             searchPatternF = square;
+         }
+         else if (blocktype > 5 || (ref > 0 && blocktype != 1))
+           searchPatternF = square;
+         else
+           searchPatternF = searchPattern;
+       }
+       
+       totalCheckPts = searchPatternF->searchPoints;
+       
+       //! center on best predictor      
+       center_x = tempmv_x;
+       center_y = tempmv_y;
+       while(1)
+       {
+         do
+         {
+           checkPts = totalCheckPts;
+           do
+           {
+             mvx = center_x + searchPatternF->point[pointNumber].x;
+             mvy = center_y + searchPatternF->point[pointNumber].y;
+             cand_x = pic_pix_x + mvx;
+             cand_y = pic_pix_y + mvy;
+             
+             if ((abs (mvx - *mv_x) <= search_range) 
+               && (abs (mvy - *mv_y) <= search_range))
+             {
+               if (EPZSMap[mapCenter_y + mvy][mapCenter_x + mvx] != TRUE)
+                 EPZSMap[mapCenter_y + mvy][mapCenter_x + mvx] = TRUE;
+               else
+               {
+                 pointNumber += 1;
+                 if (pointNumber >= searchPatternF->searchPoints)
+                   pointNumber -= searchPatternF->searchPoints;
+                 checkPts -= 1;
+                 continue;
+               }
+               mcost = MV_COST (lambda_factor, 2, cand_x, cand_y, pred_x, pred_y);
+               
+               if (mcost < min_mcost)
+               {              
+                 get_ref_line = CHECK_RANGE ? FastLineX : UMVLineX;      
+                 mcost = computeSad(cur_pic, blocksize_y,blocksize_x,
+                   blockshape_x, mcost, min_mcost, cand_x, cand_y);
+                 if (mcost < min_mcost)
+                 {
+                   min_mcost = mcost;
+                   tempmv_x = mvx;
+                   tempmv_y = mvy;
+                   motionDirection = pointNumber;
+                 }
+               }
+             }
+             pointNumber += 1;
+             if (pointNumber >= searchPatternF->searchPoints)
+               pointNumber -= searchPatternF->searchPoints;
+             checkPts -= 1;
+           }
+           while (checkPts > 0);
+           
+           if ((tempmv_x == center_x) && (tempmv_y == center_y))
+           {
+             totalCheckPts = searchPatternF->searchPoints;
+             patternStop = 1;
+             motionDirection = 0;
+             pointNumber = 0;
+           }
+           else
+           {
+             totalCheckPts = searchPatternF->point[motionDirection].next_points;
+             pointNumber = searchPatternF->point[motionDirection].start_nmbr;
+             center_x = tempmv_x;
+             center_y = tempmv_y;
+           }
+         }
+         while (patternStop != 1);
+         
+         if ((ref>0) && (img->structure == FRAME) 
+           && (( 4 * prevSad[pic_pix_x2] < min_mcost) ||
+           ((3 * prevSad[pic_pix_x2] < min_mcost) && (prevSad[pic_pix_x2] <= stopCriterion))))              
+         {
+           *mv_x = tempmv_x;
+           *mv_y = tempmv_y;  
+           return min_mcost;  
+         } 
+         
+         //! Check Second best predictor with EPZS pattern     
+         conditionEPZS = (checkMedian == TRUE) 
+           && ((img->type == P_SLICE) || (blocktype < 5)) 
+           && (min_mcost > stopCriterion) && (input->EPZSDual > 0);
+         
+         if (!conditionEPZS) break;
+         
+         pointNumber = 0;
+         patternStop = 0;
+         motionDirection = 0;        
+         
+         if ((tempmv_x == 0 && tempmv_y == 0) 
+           || (tempmv_x == *mv_x && tempmv_y == *mv_y))
+         {
+           if (abs (tempmv_x - *mv_x) < 2 && abs (tempmv_y - *mv_y) < 2)
+             searchPatternF = sdiamond;
+           else
+             searchPatternF = square;
+         }
+         else
+           searchPatternF = searchPatternD;        
+         totalCheckPts = searchPatternF->searchPoints;
+         
+         //! Second best. Note that following code is identical as for best predictor.
+         center_x = tempmv_x2;
+         center_y = tempmv_y2;
+         checkMedian = FALSE;        
+       }
+     }
+   }
+   
+   if ((ref==0) || (prevSad[pic_pix_x2] > min_mcost))
+     prevSad[pic_pix_x2] = min_mcost;
+ #if EPZSREF  
+   if (input->EPZSSpatialMem)  
+ #else
+     if (input->EPZSSpatialMem && ref == 0)  
+ #endif
+     {
+       motion[0]  = tempmv_x;
+       motion[1]  = tempmv_y;
+     }
+     
+     *mv_x = tempmv_x;
+     *mv_y = tempmv_y;
+     return min_mcost;
+ }
+ 
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    FAST Motion Estimation using EPZS
+ *    AMT/HYC
+ ***********************************************************************
+ */
+ int				                                        //  ==> minimum motion cost after search
+ EPZSBiPredBlockMotionSearch (pel_t ** cur_pic,	  // <--  original pixel values for the AxB block
+                              short  ref,	        // <--  reference picture 
+                              int    list,         // <--  reference list
+                              int    list_offset,  // <--  offset for Mbaff
+                              char  ***refPic,    // <--  reference array
+                              short  ****tmp_mv,   // <--  mv array
+                              int    pic_pix_x,	  // <--  absolute x-coordinate of regarded AxB block
+                              int    pic_pix_y,	  // <--  absolute y-coordinate of regarded AxB block
+                              int    blocktype,    //<--  block type (1-16x16 ... 7-4x4)
+                              short  pred_mv_x1,   // <--  motion vector predictor (x) in sub-pel units
+                              short  pred_mv_y1,   // <--  motion vector predictor (y) in sub-pel units
+                              short  pred_mv_x2,   // <--  motion vector predictor (x) in sub-pel units
+                              short  pred_mv_y2,   // <--  motion vector predictor (y) in sub-pel units
+                              short  *mv_x,	      // <--> in: search center (x) / out: motion vector (x) - in pel units
+                              short  *mv_y,	      // <--> in: search center (y) / out: motion vector (y) - in pel units
+                              short  *s_mv_x,      // <--> in: search center (x) / out: motion vector (x) - in pel units
+                              short  *s_mv_y,      // <--> in: search center (y) / out: motion vector (y) - in pel units
+                              int    search_range,	// <--  1-d search range in pel units
+                              int    min_mcost,	  // <--  minimum motion cost (cost for center or huge value)
+                              int    lambda_factor)	      // <--  lagrangian parameter for determining motion cost
+ {
+   short blocksize_y = input->blc_size[blocktype][1];	// vertical block size
+   short blocksize_x = input->blc_size[blocktype][0];	// horizontal block size
+   short blockshape_x = (blocksize_x >> 2);	// horizontal block size in 4-pel units
+   short mb_x = pic_pix_x - img->opix_x;
+   short mb_y = pic_pix_y - img->opix_y;
+   short block_x = (mb_x >> 2);
+   short block_y = (mb_y >> 2);  
+   int   pred_x1 = (pic_pix_x << 2) + pred_mv_x1; // predicted position x (in sub-pel units)
+   int   pred_y1 = (pic_pix_y << 2) + pred_mv_y1; // predicted position y (in sub-pel units)
+   int   pred_x2 = (pic_pix_x << 2) + pred_mv_x2; // predicted position x (in sub-pel units)
+   int   pred_y2 = (pic_pix_y << 2) + pred_mv_y2; // predicted position y (in sub-pel units)
+   int   center2_x = pic_pix_x + *mv_x;          // center position x (in pel units)
+   int   center2_y = pic_pix_y + *mv_y;          // center position y (in pel units)
+   int   center1_x = pic_pix_x + *s_mv_x;   // mvx of second pred (in pel units)
+   int   center1_y = pic_pix_y + *s_mv_y;   // mvy of second pred (in pel units)
+   int   tempmv_x  = *mv_x, tempmv_y  = *mv_y;
+   int   tempmv_x2 = 0, tempmv_y2 = 0;
+   int stopCriterion = medthres[blocktype];  
+   int mapCenter_x = search_range - *mv_x;
+   int mapCenter_y = search_range - *mv_y;
+   int second_mcost = INT_MAX;
+   short apply_weights = (active_pps->weighted_bipred_idc != 0);
+   short offset1 = apply_weights ? list == 0
+     ? wp_offset[list_offset         ][ref]     [0]
+     : wp_offset[list_offset + LIST_1][0  ]     [0] 
+     : 0;
+   short offset2 = apply_weights ? list == 0
+     ? wp_offset[list_offset + LIST_1][ref]     [0]
+     : wp_offset[list_offset         ][0  ]     [0] 
+     : 0;  
+   short invalid_refs = 0;
+   byte checkMedian = FALSE;  
+   EPZSStructure *searchPatternF = searchPattern;
+   ref_pic1 = listX[ list      + list_offset][ref]->imgY_11;
+   ref_pic2 = listX[(list ^ 1) + list_offset][ 0 ]->imgY_11;  
+   
+   img_width  = listX[ list    + list_offset][ref]->size_x;
+   img_height = listX[ list    + list_offset][ref]->size_y;
+   
+   weight1 = apply_weights ? list == 0 
+     ? wbp_weight[list_offset         ][ref][0  ][0]
+     : wbp_weight[list_offset + LIST_1][0  ][ref][0]
+     : 1<<luma_log_weight_denom;
+   weight2 = apply_weights ? list == 0
+     ? wbp_weight[list_offset + LIST_1][ref][0  ][0]
+     : wbp_weight[list_offset         ][0  ][ref][0]
+     : 1<<luma_log_weight_denom;
+   offsetBi=(offset1 + offset2 + 1)>>1;
+   
+   computeBiPredSad = apply_weights? computeBiPredSad2 : computeBiPredSad1;
+   
+   //===== set function for getting reference picture lines from reference 1=====  
+   if ( (center2_x > search_range) && (center2_x < img_width  - search_range-blocksize_x) 
+     && (center2_y > search_range) && (center2_y < img_height - search_range-blocksize_y))
+   {
+     get_ref_line2 = FastLineX;
+   }
+   else
+   {
+     get_ref_line2 = UMVLineX;
+   }
+   
+   //===== set function for getting reference picture lines from reference 2=====
+   if ( (center1_x > search_range) && (center1_x < img_width  - search_range-blocksize_x) 
+     && (center1_y > search_range) && (center1_y < img_height - search_range-blocksize_y))
+   {
+     get_ref_line1 = FastLineX;
+   }
+   else
+   {
+     get_ref_line1 = UMVLineX;
+   }  
+   // Clear EPZSMap
+   memset(EPZSMap[0],FALSE,(2*search_range+1)*(2*search_range+1));
+   // Check median candidate;  
+   EPZSMap[search_range][search_range] = TRUE;
+   
+   //--- initialize motion cost (cost for motion vector) and check ---
+   min_mcost  = MV_COST (lambda_factor, 2, center1_x, center1_y, pred_x1, pred_y1);
+   min_mcost += MV_COST (lambda_factor, 2, center2_x, center2_y, pred_x2, pred_y2);
+   
+   //--- add residual cost to motion cost ---
+   min_mcost = computeBiPredSad(cur_pic, blocksize_y, blocksize_x, blockshape_x, 
+     min_mcost, INT_MAX, center1_x, center1_y, center2_x, center2_y);
+   
+   //! If medthres satisfied, then terminate, otherwise generate Predictors  
+   if (min_mcost > stopCriterion)
+   {       
+     int prednum = 5;
+     int patternStop = 0, pointNumber = 0, checkPts;
+     int totalCheckPts = 0, motionDirection = 0;
+     int   conditionEPZS;
+     int   mvx, mvy, cand_x, cand_y;
+     int   pos, mcost;  
+     PixelPos block_a, block_b, block_c, block_d;
+     
+     getLuma4x4Neighbour (img->current_mb_nr, block_x, block_y, -1, 0, &block_a);
+     getLuma4x4Neighbour (img->current_mb_nr, block_x, block_y, 0, -1, &block_b);
+     getLuma4x4Neighbour (img->current_mb_nr, block_x, block_y, blocksize_x, -1, &block_c);
+     getLuma4x4Neighbour (img->current_mb_nr, block_x, block_y, -1, -1, &block_d);
+     
+     if (mb_y > 0)
+     {
+       if (mb_x < 8)  // first column of 8x8 blocks
+       {
+         if (mb_y==8)
+         {
+           if (blocksize_x == MB_BLOCK_SIZE) 
+             block_c.available  = 0;
+         }
+         else if (mb_x+blocksize_x == 8) 
+           block_c.available = 0;
+       }
+       else if (mb_x+blocksize_x == MB_BLOCK_SIZE) 
+         block_c.available = 0;
+     }
+     
+     stopCriterion = (11 * medthres[blocktype]) >> 3;
+     
+     //! Add Spatial Predictors in predictor list.
+     //! Scheme adds zero, left, top-left, top, top-right. Note that top-left adds very little
+     //! in terms of performance and could be removed with little penalty if any.
+     invalid_refs = EPZSSpatialPredictors (block_a, block_b, block_c, block_d, 
+       list, list_offset, ref, refPic[list], tmp_mv[list], predictor);
+     
+     //! Check all predictors          
+     for (pos = 0; pos < prednum; pos++)
+     {
+       mvx = predictor->point[pos].x;
+       mvy = predictor->point[pos].y;
+       if ((abs (mvx - *mv_x) > search_range || abs (mvy - *mv_y) > search_range) && (mvx || mvy))
+         continue;
+       
+       if ((abs (mvx - *mv_x) <= search_range) && (abs (mvy - *mv_y) <= search_range))
+       {
+         if (EPZSMap[mapCenter_y + mvy][mapCenter_x + mvx] == TRUE)
+           continue;
+         else
+           EPZSMap[mapCenter_y + mvy][mapCenter_x + mvx] = TRUE;
+       }
+       
+       cand_x = pic_pix_x + mvx;
+       cand_y = pic_pix_y + mvy;
+       
+       //--- set motion cost (cost for motion vector) and check ---
+       mcost  = MV_COST (lambda_factor, 2, center1_x, center1_y, pred_x1, pred_y1);
+       mcost += MV_COST (lambda_factor, 2, cand_x   ,    cand_y, pred_x2, pred_y2);
+       
+       if (mcost >= second_mcost) continue;
+       
+       mcost = computeBiPredSad(cur_pic, blocksize_y, blocksize_x, blockshape_x, 
+         mcost, second_mcost, center1_x, center1_y, cand_x, cand_y);
+       
+       //--- check if motion cost is less than minimum cost ---
+       if (mcost < min_mcost)
+       {
+         tempmv_x2 = tempmv_x;
+         tempmv_y2 = tempmv_y;
+         second_mcost = min_mcost;
+         tempmv_x = mvx;
+         tempmv_y = mvy;
+         min_mcost = mcost;
+         checkMedian = TRUE;
+       }      
+       //else if (mcost < second_mcost && (tempmv_x != mvx || tempmv_y != mvy))
+       else if (mcost < second_mcost)
+       {
+         tempmv_x2 = mvx;
+         tempmv_y2 = mvy;
+         second_mcost = mcost;
+         checkMedian = TRUE;
+       }
+     }
+     
+     //! Refine using EPZS pattern if needed.
+     //! Note that we are using a simplistic threshold computation.
+     if (min_mcost > stopCriterion)
+     {
+       //! Adapt pattern based on different conditions.
+       if (input->EPZSPattern != 0)
+       {
+         if ((min_mcost < stopCriterion + ((3 * medthres[blocktype]) >> 1)))
+         {
+           if ((tempmv_x == 0 && tempmv_y == 0)
+             || (abs (tempmv_x - *mv_x) < 2 && abs (tempmv_y - *mv_y) < 2))
+             searchPatternF = sdiamond;
+           else
+             searchPatternF = square;
+         }
+         else if (blocktype > 5 || (ref > 0 && blocktype != 1))
+           searchPatternF = square;
+         else
+           searchPatternF = searchPattern;
+       }
+       
+       totalCheckPts = searchPatternF->searchPoints;
+       
+       //! center on best predictor      
+       center2_x = tempmv_x;
+       center2_y = tempmv_y;
+       while (1)
+       {
+         do
+         {
+           checkPts = totalCheckPts;
+           do
+           {
+             mvx = center2_x + searchPatternF->point[pointNumber].x;
+             mvy = center2_y + searchPatternF->point[pointNumber].y;
+             cand_x = pic_pix_x + mvx;
+             cand_y = pic_pix_y + mvy;
+             
+             if ((abs (mvx - *mv_x) <= search_range) 
+               && (abs (mvy - *mv_y) <= search_range))
+             {
+               if (EPZSMap[mapCenter_y + mvy][mapCenter_x + mvx] != TRUE)
+                 EPZSMap[mapCenter_y + mvy][mapCenter_x + mvx] = TRUE;
+               else
+               {
+                 pointNumber += 1;
+                 if (pointNumber >= searchPatternF->searchPoints)
+                   pointNumber -= searchPatternF->searchPoints;
+                 checkPts -= 1;
+                 continue;
+               }
+               
+               mcost  = MV_COST (lambda_factor, 2, center1_x, center1_y, pred_x1, pred_y1);
+               mcost += MV_COST (lambda_factor, 2, cand_x   ,    cand_y, pred_x2, pred_y2);     
+               
+               if (mcost < min_mcost)
+               {         
+                 mcost = computeBiPredSad(cur_pic, blocksize_y, blocksize_x, blockshape_x, 
+                   mcost, min_mcost, center1_x, center1_y, cand_x, cand_y);
+                 
+                 if (mcost < min_mcost)
+                 {
+                   min_mcost = mcost;
+                   tempmv_x = mvx;
+                   tempmv_y = mvy;
+                   motionDirection = pointNumber;
+                 }
+               }
+             }
+             pointNumber += 1;
+             if (pointNumber >= searchPatternF->searchPoints)
+               pointNumber -= searchPatternF->searchPoints;
+             checkPts -= 1;
+           }
+           while (checkPts > 0);
+           
+           if ((tempmv_x == center2_x) && (tempmv_y == center2_y))
+           {
+             totalCheckPts = searchPatternF->searchPoints;
+             patternStop = 1;
+             motionDirection = 0;
+             pointNumber = 0;
+           }
+           else
+           {
+             totalCheckPts = searchPatternF->point[motionDirection].next_points;
+             pointNumber = searchPatternF->point[motionDirection].start_nmbr;
+             center2_x = tempmv_x;
+             center2_y = tempmv_y;
+           }
+         }
+         while (patternStop != 1);
+         
+         //! Check Second best predictor with EPZS pattern     
+         
+         conditionEPZS = (checkMedian == TRUE) && (blocktype < 5) && (min_mcost > stopCriterion) && (input->EPZSDual > 0);
+         
+         if (!conditionEPZS) break;
+         
+         pointNumber = 0;
+         patternStop = 0;
+         motionDirection = 0;        
+         
+         if ((tempmv_x == 0 && tempmv_y == 0) 
+           || (tempmv_x == *mv_x && tempmv_y == *mv_y))
+         {
+           if (abs (tempmv_x - *mv_x) < 2 && abs (tempmv_y - *mv_y) < 2)
+             searchPatternF = sdiamond;
+           else
+             searchPatternF = square;
+         }
+         else
+           searchPatternF = searchPatternD;
+         totalCheckPts = searchPatternF->searchPoints;
+         
+         //! Second best. Note that following code is identical as for best predictor.
+         center2_x = tempmv_x2;
+         center2_y = tempmv_y2;
+         
+         checkMedian = FALSE;
+       }
+     }
+   }
+   *mv_x = tempmv_x;
+   *mv_y = tempmv_y;  
+   return min_mcost;
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    Report function for EPZS Fast ME
+ *    AMT/HYC
+ ***********************************************************************
+ */
+ void
+ EPZSOutputStats (FILE * stat, short stats_file)
+ {  
+   if (stats_file == 1)
+   {
+     fprintf (stat, " EPZS Pattern                 : %s\n",c_EPZSPattern[input->EPZSPattern]);
+     fprintf (stat, " EPZS Dual Pattern            : %s\n",c_EPZSDualPattern[input->EPZSDual]);
+     fprintf (stat, " EPZS Fixed Predictors        : %s\n",c_EPZSFixed[input->EPZSFixed]);
+     fprintf (stat, " EPZS Temporal Predictors     : %s\n",c_EPZSOther[input->EPZSTemporal]);
+     fprintf (stat, " EPZS Spatial Predictors      : %s\n",c_EPZSOther[input->EPZSSpatialMem]);
+     fprintf (stat, " EPZS Thresholds (16x16)      : (%d %d %d)\n",medthres[1], minthres[1], maxthres[1]);
+   }
+   else
+   {
+     fprintf (stat, " EPZS Pattern                      : %s\n",c_EPZSPattern[input->EPZSPattern]);
+     fprintf (stat, " EPZS Dual Pattern                 : %s\n",c_EPZSDualPattern[input->EPZSDual]);
+     fprintf (stat, " EPZS Fixed Predictors             : %s\n",c_EPZSFixed[input->EPZSFixed]);
+     fprintf (stat, " EPZS Temporal Predictors          : %s\n",c_EPZSOther[input->EPZSTemporal]);
+     fprintf (stat, " EPZS Spatial Predictors           : %s\n",c_EPZSOther[input->EPZSSpatialMem]);    
+     fprintf (stat, " EPZS Thresholds (16x16)           : (%d %d %d)\n",medthres[1], minthres[1], maxthres[1]);
+   }
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/epzs.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/epzs.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/epzs.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,71 ----
+  
+ /*!
+  ************************************************************************
+  * \file epzs.h
+  *
+  * \author
+  *    Alexis Michael Tourapis        <alexismt at ieee.org>              
+  *
+  ************************************************************************
+  */
+ 
+ #ifndef _EPZS_H_
+ #define _EPZS_H_
+ 
+ 
+ #define CHECK_RANGE  ((cand_x >= 0) && (cand_x < img_width  - blocksize_x) &&(cand_y >= 0) && (cand_y < img_height - blocksize_y)) 
+ 
+ 
+ typedef struct 
+ {
+   int         mb_adaptive_frame_field_flag;
+   int         size_x, size_y;
+ 
+   // Frame
+   short ****  mv;            //!< motion vector       [list][subblock_x][subblock_y][component]  
+   // Top field
+   short ****  top_mv;        //!< motion vector       [list][subblock_x][subblock_y][component]  
+   // Bottom field params
+   short ****  bottom_mv;     //!< motion vector       [list][subblock_x][subblock_y][component]   
+ 
+ } EPZSColocParams;
+ 
+ typedef struct
+ {
+   int x;
+   int y;
+   int start_nmbr;
+   int next_points;
+ }
+ SPoint;
+ 
+ typedef struct 
+ {
+   int  searchPoints;
+   SPoint *point;
+ }
+ EPZSStructure;
+ 
+ typedef enum
+ {
+   SDIAMOND = 0,
+   SQUARE   = 1,
+   EDIAMOND = 2,
+   LDIAMOND = 3
+ } EPZSPatterns;
+ 
+ extern EPZSColocParams *EPZSCo_located;
+ extern int ***EPZSDistortion;  //!< Array for storing SAD Values
+ 
+ extern int EPZSInit();
+ extern void EPZSDelete ();
+ extern void EPZSOutputStats(FILE *stat,short stats_file);
+ extern void EPZSSliceInit(EPZSColocParams* p, StorablePicture **listX[6]);
+ extern int EPZSPelBlockMotionSearch (pel_t **, short, int, int, char ***, short ****, 
+                                      int, int, int, short, short, short*, short*, int, int, int);
+ 
+ extern int EPZSBiPredBlockMotionSearch (pel_t **,	short, int, int, char  ***, short  ****, 
+                                         int, int, int, short, short, short, short, 
+                                         short *, short *,	short *, short *, int, int, int);	     
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/explicit_gop.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/explicit_gop.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/explicit_gop.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,472 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file explicit_gop.c
+  *
+  * \brief
+  *    Code for explicit gop support and pyramidal coding.
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *     - Alexis Michael Tourapis                     <alexismt at ieee.org> 
+  *************************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <ctype.h>
+ #include <limits.h>
+ #include "global.h"
+ #include "contributors.h"
+ #include "explicit_gop.h"
+ #include "image.h"
+ #include "nalucommon.h"
+ #include "string.h"
+ 
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Generation of Pyramid GOP
+ ************************************************************************
+ */
+ void create_pyramid()
+ {
+   int i, j;
+   int centerB=input->successive_Bframe/2;
+   GOP_DATA tmp;
+ 
+   if (input->PyramidCoding == 1)
+   {
+     for (i=0;i<input->successive_Bframe;i++)
+     {
+       if (i < centerB)
+       {
+         gop_structure[i].slice_type = B_SLICE;
+         gop_structure[i].display_no = i * 2 + 1;
+         gop_structure[i].pyramid_layer = 0;
+         gop_structure[i].reference_idc = NALU_PRIORITY_HIGH;
+         gop_structure[i].slice_qp = max(0, (input->qpB + (input->PyramidLevelQPEnable ? -1: input->qpBRSOffset)));
+ 
+       }
+       else
+       {
+         gop_structure[i].slice_type = B_SLICE;
+         gop_structure[i].display_no = (i - centerB) * 2;
+         gop_structure[i].pyramid_layer = 1;
+         gop_structure[i].reference_idc = NALU_PRIORITY_DISPOSABLE;
+         gop_structure[i].slice_qp = input->qpB;
+       }      
+     }
+   }
+   else
+   {    
+     int GOPlevels = 1;
+     int Bframes = input->successive_Bframe;
+     int *curGOPLevelfrm,*curGOPLeveldist ;
+     int curlevel = GOPlevels ;
+     int prvlevelrefs = input->successive_Bframe;
+     int levelrefs = input->successive_Bframe;
+     int i;
+ 
+     while (((Bframes + 1 ) >> GOPlevels) > 1)
+     {
+       GOPlevels ++;
+     }
+      
+     curlevel = GOPlevels ;
+ 
+     if (NULL == (curGOPLevelfrm = (int*)malloc(GOPlevels * sizeof(int)))) no_mem_exit("create_pyramid:curGOPLevelfrm");
+     if (NULL == (curGOPLeveldist= (int*)malloc(GOPlevels * sizeof(int)))) no_mem_exit("create_pyramid:curGOPLeveldist");
+     
+     for (i=0; i <input->successive_Bframe; i++)
+     {
+       gop_structure[i].display_no = i;
+       gop_structure[i].slice_type = B_SLICE;
+       gop_structure[i].pyramid_layer = 0;
+       gop_structure[i].reference_idc = NALU_PRIORITY_DISPOSABLE;
+       gop_structure[i].slice_qp = input->qpB;
+     }
+ 
+     for (j = 1; j < GOPlevels; j++) 
+     {
+       for (i = (1 << j) - 1; i < Bframes + 1 - (1 << j); i += (1 << j)) {
+         gop_structure[i].pyramid_layer  = j;
+         gop_structure[i].reference_idc  = NALU_PRIORITY_HIGH;
+         gop_structure[i].slice_qp = max(0, input->qpB + (input->PyramidLevelQPEnable ? -j: input->qpBRSOffset));
+       }     
+    }
+ 
+     for (i = 1; i < Bframes; i++) 
+ {
+       j = i;
+   
+       while (j > 0 && gop_structure[j].pyramid_layer > gop_structure[j-1].pyramid_layer) 
+   {
+         tmp = gop_structure[j-1];
+         gop_structure[j-1] = gop_structure[j];
+         gop_structure[j] = tmp;
+         j--;
+     }
+   }
+   }  
+ }
+ 
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Initialization of GOP structure.
+ *
+ ************************************************************************
+ */
+ void init_gop_structure()
+ {
+   int max_gopsize = input->PyramidCoding != 3 ? input->successive_Bframe  : input->jumpd;
+   
+   gop_structure = calloc(max(10,max_gopsize), sizeof (GOP_DATA)); // +1 for reordering
+   if (NULL==gop_structure) 
+     no_mem_exit("init_gop_structure: gop_structure");
+ }
+ 
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Clear GOP structure
+ ************************************************************************
+ */
+ void clear_gop_structure()
+ {
+   if (gop_structure)
+     free(gop_structure);
+ }
+ 
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Interpret GOP struct from input parameters
+ ************************************************************************
+ */
+ void interpret_gop_structure()
+ {
+ 
+   int nLength = strlen(input->ExplicitPyramidFormat);
+   int i =0, k, dqp, display_no;
+   int slice_read =0, order_read = 0, stored_read = 0, qp_read =0;
+   int coded_frame = 0;
+   
+   if (nLength > 0)
+   {
+     
+     for (i = 0; i < nLength ; i++)
+     {    
+       //! First lets read slice type
+       if (slice_read == 0)
+       {
+         switch (input->ExplicitPyramidFormat[i])
+         {
+         case 'P':
+         case 'p':
+           gop_structure[coded_frame].slice_type=P_SLICE;
+           break;
+         case 'B':
+         case 'b':
+           gop_structure[coded_frame].slice_type=B_SLICE;
+           break;
+         case 'I':
+         case 'i':
+           gop_structure[coded_frame].slice_type=I_SLICE;
+           break;
+         default:
+           snprintf(errortext, ET_SIZE, "Slice Type invalid in ExplicitPyramidFormat param. Please check configuration file.");
+           error (errortext, 400);
+           break;
+         }
+         slice_read = 1;
+       }
+       else
+       {
+         //! Next is Display Order
+         if (order_read == 0)
+         {
+           if (isdigit((int)(*(input->ExplicitPyramidFormat+i))))
+           {
+             sscanf(input->ExplicitPyramidFormat+i,"%d",&display_no);
+             gop_structure[coded_frame].display_no = display_no;
+             order_read = 1;
+             if (display_no<0 || display_no>=input->jumpd)
+             {
+               snprintf(errortext, ET_SIZE, "Invalid Frame Order value. Frame position needs to be in [0,%d] range.",input->jumpd-1);
+               error (errortext, 400);          
+             }
+             for (k=0;k<coded_frame;k++)
+             {
+               if (gop_structure[k].display_no == display_no)
+               {
+                 snprintf(errortext, ET_SIZE, "Frame Order value %d in frame %d already used for enhancement frame %d.",display_no,coded_frame,k);
+                 error (errortext, 400);          
+               }
+             }
+           }
+           else
+           {
+             snprintf(errortext, ET_SIZE, "Slice Type needs to be followed by Display Order. Please check configuration file.");
+             error (errortext, 400);
+           }
+         }
+         else if (order_read == 1)
+         { 
+           if (stored_read == 0 && !(isdigit((int)(*(input->ExplicitPyramidFormat+i)))))
+           {
+             switch (input->ExplicitPyramidFormat[i])
+             {
+             case 'E':
+             case 'e':
+               gop_structure[coded_frame].reference_idc = NALU_PRIORITY_DISPOSABLE;
+               break;
+             case 'R':
+             case 'r':
+               gop_structure[coded_frame].reference_idc= NALU_PRIORITY_HIGH;
+               break;
+             default:
+               snprintf(errortext, ET_SIZE, "Reference_IDC invalid in ExplicitPyramidFormat param. Please check configuration file.");
+               error (errortext, 400);
+               break;
+             }
+             stored_read = 1;          
+           }
+           else if (stored_read == 1 && qp_read == 0)
+           {
+             if (isdigit((int)(*(input->ExplicitPyramidFormat+i))))
+             {
+               sscanf(input->ExplicitPyramidFormat+i,"%d",&dqp);
+ 
+               if (gop_structure[coded_frame].slice_type == I_SLICE)
+                 gop_structure[coded_frame].slice_qp = input->qp0;
+               else if (gop_structure[coded_frame].slice_type == P_SLICE)
+                 gop_structure[coded_frame].slice_qp = input->qpN;
+               else
+                 gop_structure[coded_frame].slice_qp = input->qpB;
+ 
+               gop_structure[coded_frame].slice_qp = Clip3(-img->bitdepth_luma_qp_scale, 51,gop_structure[coded_frame].slice_qp + dqp);
+                 qp_read = 1;
+             }
+             else
+             {
+               snprintf(errortext, ET_SIZE, "Reference_IDC needs to be followed by QP. Please check configuration file.");
+               error (errortext, 400);
+             }
+           }
+           else if (stored_read == 1 && qp_read == 1 && !(isdigit((int)(*(input->ExplicitPyramidFormat+i)))) && (i < nLength - 2))
+           {
+             stored_read =0;
+             qp_read=0;
+             order_read=0;
+             slice_read=0;
+             i--;
+             coded_frame ++;
+             if (coded_frame >= input->jumpd )
+             {
+               snprintf(errortext, ET_SIZE, "Total number of frames in Enhancement GOP need to be fewer or equal to FrameSkip parameter.");
+               error (errortext, 400);
+             }
+             
+           }
+         }
+         
+       }      
+     }
+   }
+   else
+   {
+     snprintf(errortext, ET_SIZE, "ExplicitPyramidFormat is empty. Please check configuration file.");
+     error (errortext, 400);
+   }
+ 
+   input->successive_Bframe = coded_frame + 1;
+ }
+ 
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Encode Enhancement Layer.
+ ************************************************************************
+ */
+ void encode_enhancement_layer()
+ {
+   int previous_ref_idc = 1;
+   
+   if ((input->successive_Bframe != 0) && (IMG_NUMBER > 0)) // B-frame(s) to encode
+   {
+     img->type = B_SLICE;            // set image type to B-frame
+     
+     if (input->NumFramesInELSubSeq == 0)
+       img->layer = 0;
+     else
+       img->layer = 1;
+     
+     if (input->BRefPictures != 1 && input->PyramidCoding==0)
+     {
+       img->frame_num++; //increment frame_num once for B-frames
+       img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4));
+     }
+     img->nal_reference_idc = 0;    
+     
+     //if (input->PyramidCoding == 3 || input->PyramidCoding == 1)
+     if (input->PyramidCoding)
+     {
+       for(img->b_frame_to_code=1; img->b_frame_to_code<=input->successive_Bframe; img->b_frame_to_code++)
+       {
+         
+         img->nal_reference_idc = 0;    
+         
+         img->type = gop_structure[img->b_frame_to_code - 1].slice_type;
+         
+         if (previous_ref_idc == 1)           
+         {
+           img->frame_num++;                 //increment frame_num for each stored B slice
+           img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4));
+         }
+         
+         if (gop_structure[img->b_frame_to_code - 1].reference_idc== NALU_PRIORITY_HIGH )
+         {
+           img->nal_reference_idc = 1;
+           previous_ref_idc = 1;
+         }
+         else
+           previous_ref_idc = 0;
+         
+         img->b_interval =
+           ((double) (input->jumpd + 1) / (input->successive_Bframe + 1.0) );
+         
+         if (input->PyramidCoding == 3)
+           img->b_interval = 1.0;
+         
+         if(input->intra_period && input->idr_enable)
+           img->toppoc = 2*(((IMG_NUMBER%input->intra_period)-1)*(input->jumpd+1) + (int)(img->b_interval * (double)(1 + gop_structure[img->b_frame_to_code - 1].display_no)));
+         else
+           img->toppoc = 2*((IMG_NUMBER-1)*(input->jumpd + 1) + (int)(img->b_interval * (double)(1 + gop_structure[img->b_frame_to_code -1].display_no)));
+         
+         if (img->b_frame_to_code == 1)
+           img->delta_pic_order_cnt[0] = img->toppoc - 2*(start_tr_in_this_IGOP  + (IMG_NUMBER)*((input->jumpd+1)));
+         else
+           img->delta_pic_order_cnt[0] = img->toppoc - 2*(start_tr_in_this_IGOP  + (IMG_NUMBER-1)*((input->jumpd+1)) + (int) (2.0 *img->b_interval * (double) (1+ gop_structure[img->b_frame_to_code - 2].display_no)));
+         
+         if ((input->PicInterlace==FRAME_CODING)&&(input->MbInterlace==FRAME_CODING))
+           img->bottompoc = img->toppoc;     //progressive
+         else
+           img->bottompoc = img->toppoc+1;
+         
+         img->framepoc = min (img->toppoc, img->bottompoc);
+         
+         img->delta_pic_order_cnt[1]= 0;   // POC200301
+         
+         encode_one_frame();  // encode one B-frame
+         if (input->ReportFrameStats)
+           report_frame_statistic();
+         
+         if (gop_structure[img->b_frame_to_code - 1].reference_idc== NALU_PRIORITY_HIGH && img->b_frame_to_code==input->successive_Bframe)           
+         {
+           img->frame_num++;                 //increment frame_num for each stored B slice
+           img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4));
+         }
+       }
+       img->b_frame_to_code = 0;
+     }
+     else
+     {     
+       for(img->b_frame_to_code=1; img->b_frame_to_code<=input->successive_Bframe; img->b_frame_to_code++)
+       {
+         
+         img->nal_reference_idc = 0;    
+         if (input->BRefPictures == 1 )
+         {
+           img->nal_reference_idc = 1;
+           img->frame_num++;                 //increment frame_num once for B-frames
+           img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4));
+         }
+         
+         img->b_interval =
+           ((double) (input->jumpd + 1) / (input->successive_Bframe + 1.0) );
+         
+         if (input->PyramidCoding == 3)
+           img->b_interval = 1.0;
+         
+         if(input->intra_period && input->idr_enable)
+           img->toppoc = 2*(((IMG_NUMBER% input->intra_period)-1)*(input->jumpd+1) + (int) (img->b_interval * (double)img->b_frame_to_code));
+         else
+           img->toppoc = 2*((IMG_NUMBER-1)*(input->jumpd+1) + (int) (img->b_interval * (double)img->b_frame_to_code));
+         
+         if ((input->PicInterlace==FRAME_CODING)&&(input->MbInterlace==FRAME_CODING))
+           img->bottompoc = img->toppoc;     //progressive
+         else
+           img->bottompoc = img->toppoc+1;
+         
+         img->framepoc = min (img->toppoc, img->bottompoc);
+         
+         //the following is sent in the slice header
+         if (input->BRefPictures != 1)
+         {
+           img->delta_pic_order_cnt[0]= 2*(img->b_frame_to_code-1);
+         }
+         else
+         {
+           img->delta_pic_order_cnt[0]= -2;
+         }
+         
+         img->delta_pic_order_cnt[1]= 0;   // POC200301
+         
+         encode_one_frame();  // encode one B-frame
+ 
+         if (input->BRefPictures == 1 && img->b_frame_to_code==input->successive_Bframe)           
+         {
+           img->frame_num++;                 //increment frame_num for each stored B slice
+           img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4));
+         } 
+ 
+         if (input->ReportFrameStats)
+           report_frame_statistic();
+       }
+     }
+   }
+   img->b_frame_to_code = 0;
+ }
+ 
+ 
+ void poc_based_ref_management(int current_pic_num)
+ {
+   unsigned i, pic_num = 0;
+ 
+   int min_poc=INT_MAX;
+   DecRefPicMarking_t *tmp_drpm,*tmp_drpm2;
+ 
+   if (img->dec_ref_pic_marking_buffer!=NULL)
+     return;
+ 
+   if ((dpb.ref_frames_in_buffer+dpb.ltref_frames_in_buffer)==0)
+     return;
+ 
+   for (i=0; i<dpb.used_size;i++)
+   {
+     if (dpb.fs[i]->is_reference  && (!(dpb.fs[i]->is_long_term)) && dpb.fs[i]->poc < min_poc)
+     {
+       min_poc = dpb.fs[i]->frame->poc ;
+       pic_num =  dpb.fs[i]->frame->pic_num;
+     }
+   }
+   
+   if (NULL==(tmp_drpm=(DecRefPicMarking_t*)calloc (1,sizeof (DecRefPicMarking_t)))) no_mem_exit("poc_based_ref_management: tmp_drpm");
+   tmp_drpm->Next=NULL;
+   
+   tmp_drpm->memory_management_control_operation = 0;
+   
+   if (NULL==(tmp_drpm2=(DecRefPicMarking_t*)calloc (1,sizeof (DecRefPicMarking_t)))) no_mem_exit("poc_based_ref_management: tmp_drpm2");
+   tmp_drpm2->Next=tmp_drpm;
+   
+   tmp_drpm2->memory_management_control_operation = 1;
+   tmp_drpm2->difference_of_pic_nums_minus1 = current_pic_num - pic_num - 1;
+   img->dec_ref_pic_marking_buffer = tmp_drpm2;
+ 
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/explicit_gop.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/explicit_gop.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/explicit_gop.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,25 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file explicit_gop.h
+  *
+  * \brief
+  *    Functions for explicit gop and pyramid support
+  *
+  * \author
+  *     Main contributors (see contributors.h for copyright, address and affiliation details)
+  *     - Alexis Michael Tourapis          <alexismt at ieee.org>
+  *************************************************************************************
+  */
+ 
+ #ifndef _EXPLICIT_GOP_H_
+ #define _EXPLICIT_GOP_H_
+ 
+ // GOP Pyramid
+ void init_gop_structure(); 
+ void interpret_gop_structure();
+ void create_pyramid();
+ void clear_gop_structure();
+ void encode_enhancement_layer();
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/fast_me.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/fast_me.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/fast_me.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,914 ----
+ 
+ /*!
+  ************************************************************************
+  *
+  * \file fast_me.c
+  *
+  * \brief
+  *   Fast integer pel motion estimation and fractional pel motion estimation
+  *   algorithms are described in this file.
+  *   1. get_mem_FME() and free_mem_FME() are functions for allocation and release
+  *      of memories about motion estimation
+  *   2. FME_BlockMotionSearch() is the function for fast integer pel motion 
+  *      estimation and fractional pel motion estimation
+  *   3. DefineThreshold() defined thresholds for early termination
+  * \author 
+  *    Main contributors: (see contributors.h for copyright, address and affiliation details)
+  *    - Zhibo Chen         <chenzhibo at tsinghua.org.cn>
+  *    - JianFeng Xu        <fenax at video.mdc.tsinghua.edu.cn>  
+  *    - Wenfang Fu         <fwf at video.mdc.tsinghua.edu.cn>
+  * \date    
+  *    2003.8
+  ************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ 
+ #include "memalloc.h"
+ #include "fast_me.h"
+ #include "refbuf.h"
+ 
+ #define Q_BITS          15
+ 
+ extern  unsigned int*   byte_abs;
+ extern  int*   mvbits;
+ extern  short*   spiral_search_x;
+ extern  short*   spiral_search_y;
+ 
+ 
+ static pel_t *(*get_line) (pel_t**, int, int, int, int);
+ 
+ static const int quant_coef[6][4][4] = {
+   {{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243},{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243}},
+   {{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660},{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660}},
+   {{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194},{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194}},
+   {{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647},{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647}},
+   {{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355},{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355}},
+   {{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893},{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893}}
+ };
+ 
+ 
+ void DefineThreshold()
+ {
+   AlphaSec[1] = 0.01f;
+   AlphaSec[2] = 0.01f;
+   AlphaSec[3] = 0.01f;
+   AlphaSec[4] = 0.02f;
+   AlphaSec[5] = 0.03f;
+   AlphaSec[6] = 0.03f;
+   AlphaSec[7] = 0.04f;
+ 
+   AlphaThird[1] = 0.06f;
+   AlphaThird[2] = 0.07f;
+   AlphaThird[3] = 0.07f;
+   AlphaThird[4] = 0.08f;
+   AlphaThird[5] = 0.12f;
+   AlphaThird[6] = 0.11f;
+   AlphaThird[7] = 0.15f;
+ 
+   DefineThresholdMB();
+   return;
+ }
+ 
+ void DefineThresholdMB()
+ {
+   int gb_qp_per    = (input->qpN-MIN_QP)/6;
+   int gb_qp_rem    = (input->qpN-MIN_QP)%6;
+   
+   int gb_q_bits    = Q_BITS+gb_qp_per;
+   int gb_qp_const,Thresh4x4;
+ 
+   float Quantize_step;
+ 
+   gb_qp_const=(1<<gb_q_bits)/6;
+   
+   Thresh4x4 =   ((1<<gb_q_bits) - gb_qp_const)/quant_coef[gb_qp_rem][0][0];
+   Quantize_step = Thresh4x4/(4*5.61f);
+   Bsize[7]=(16*16)*Quantize_step;
+ 
+   Bsize[6]=Bsize[7]*4;
+   Bsize[5]=Bsize[7]*4;
+   Bsize[4]=Bsize[5]*4;
+   Bsize[3]=Bsize[4]*4;
+   Bsize[2]=Bsize[4]*4;
+   Bsize[1]=Bsize[2]*4;
+ }
+ 
+ 
+ int get_mem_FME()
+ {
+   int memory_size = 0;
+   if (NULL==(flag_intra = calloc ((img->width>>4)+1,sizeof(byte)))) no_mem_exit("get_mem_FME: flag_intra"); //fwf 20050330
+ 
+   memory_size += get_mem2D(&McostState, 2*input->search_range+1, 2*input->search_range+1); 
+   memory_size += get_mem4Dint(&(fastme_ref_cost), img->max_num_references, 9, 4, 4);
+   memory_size += get_mem3Dint(&(fastme_l0_cost), 9, img->height/4, img->width/4);
+   memory_size += get_mem3Dint(&(fastme_l1_cost), 9, img->height/4, img->width/4);
+   memory_size += get_mem2D(&SearchState,7,7);
+   
+   return memory_size;
+ }
+ 
+ 
+ void free_mem_FME()
+ {
+   free_mem2D(McostState);
+   free_mem4Dint(fastme_ref_cost, img->max_num_references, 9);
+   free_mem3Dint(fastme_l0_cost, 9);
+   free_mem3Dint(fastme_l1_cost, 9);
+   free_mem2D(SearchState);
+ 
+   free (flag_intra);
+ }
+ 
+ 
+ int PartCalMad(pel_t *ref_pic,pel_t** orig_pic,pel_t *(*get_ref_line)(int, pel_t*, int, int, int, int), int blocksize_y,int blocksize_x, int blocksize_x4,int mcost,int min_mcost,int cand_x,int cand_y)
+ {
+   int y,x4;
+   int height=((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))?img->height/2:img->height;
+   pel_t *orig_line, *ref_line;
+   for (y=0; y<blocksize_y; y++)
+     {
+     ref_line  = get_ref_line (blocksize_x, ref_pic, cand_y+y, cand_x, height, img->width);//2004.3.3
+     orig_line = orig_pic [y];
+     
+     for (x4=0; x4<blocksize_x4; x4++)
+     {
+       mcost += byte_abs[ *orig_line++ - *ref_line++ ];
+       mcost += byte_abs[ *orig_line++ - *ref_line++ ];
+       mcost += byte_abs[ *orig_line++ - *ref_line++ ];
+       mcost += byte_abs[ *orig_line++ - *ref_line++ ];
+     }
+     if (mcost >= min_mcost)
+     {
+       break;
+     }
+     }
+     return mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    FastIntegerPelBlockMotionSearch: fast pixel block motion search 
+  *    this algrithm is called UMHexagonS(see JVT-D016),which includes 
+  *    four steps with different kinds of search patterns
+  * \par Input:
+  * pel_t**   orig_pic,     // <--  original picture
+  * int       ref,          // <--  reference frame (0... or -1 (backward))
+  * int       pic_pix_x,    // <--  absolute x-coordinate of regarded AxB block
+  * int       pic_pix_y,    // <--  absolute y-coordinate of regarded AxB block
+  * int       blocktype,    // <--  block type (1-16x16 ... 7-4x4)
+  * int       pred_mv_x,    // <--  motion vector predictor (x) in sub-pel units
+  * int       pred_mv_y,    // <--  motion vector predictor (y) in sub-pel units
+  * int*      mv_x,         //  --> motion vector (x) - in pel units
+  * int*      mv_y,         //  --> motion vector (y) - in pel units
+  * int       search_range, // <--  1-d search range in pel units                         
+  * int       min_mcost,    // <--  minimum motion cost (cost for center or huge value)
+  * int       lambda_factor // <--  lagrangian parameter for determining motion cost
+  * \par
+  * Three macro definitions defined in this program:
+  * 1. EARLY_TERMINATION: early termination algrithm, refer to JVT-D016.doc
+  * 2. SEARCH_ONE_PIXEL: search one pixel in search range
+  * 3. SEARCH_ONE_PIXEL1(value_iAbort): search one pixel in search range,
+  *                                 but give a parameter to show if mincost refeshed
+  * \author
+  *   Main contributors: (see contributors.h for copyright, address and affiliation details)
+  *   - Zhibo Chen         <chenzhibo at tsinghua.org.cn>
+  *   - JianFeng Xu        <fenax at video.mdc.tsinghua.edu.cn>  
+  * \date   :
+  *   2003.8
+  ************************************************************************
+  */
+ int                                     //  ==> minimum motion cost after search
+ FastIntegerPelBlockMotionSearch  (pel_t**   orig_pic,     // <--  not used
+                                   short     ref,          // <--  reference frame (0... or -1 (backward))
+                                   int       list,
+                                   int       pic_pix_x,    // <--  absolute x-coordinate of regarded AxB block
+                                   int       pic_pix_y,    // <--  absolute y-coordinate of regarded AxB block
+                                   int       blocktype,    // <--  block type (1-16x16 ... 7-4x4)
+                                   short     pred_mv_x,    // <--  motion vector predictor (x) in sub-pel units
+                                   short     pred_mv_y,    // <--  motion vector predictor (y) in sub-pel units
+                                   short*    mv_x,         //  --> motion vector (x) - in pel units
+                                   short*    mv_y,         //  --> motion vector (y) - in pel units
+                                   int       search_range, // <--  1-d search range in pel units                         
+                                   int       min_mcost,    // <--  minimum motion cost (cost for center or huge value)
+                                   int       lambda_factor)       // <--  lagrangian parameter for determining motion cost
+ {
+   static int Diamond_x[4] = {-1, 0, 1, 0};
+   static int Diamond_y[4] = {0, 1, 0, -1};
+   static int Hexagon_x[6] = {2, 1, -1, -2, -1, 1};
+   static int Hexagon_y[6] = {0, -2, -2, 0,  2, 2};
+   static int Big_Hexagon_x[16] = {0,-2, -4,-4,-4, -4, -4, -2,  0,  2,  4,  4, 4, 4, 4, 2};
+   static int Big_Hexagon_y[16] = {4, 3, 2,  1, 0, -1, -2, -3, -4, -3, -2, -1, 0, 1, 2, 3};
+ 
+   int   pos, cand_x, cand_y,  mcost;
+   pel_t *(*get_ref_line)(int, pel_t*, int, int, int, int);
+   int   list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))? img->current_mb_nr%2 ? 4 : 2 : 0;
+ 
+   int   mvshift       = 2;                  // motion vector shift for getting sub-pel units
+   int   blocksize_y   = input->blc_size[blocktype][1];            // vertical block size
+   int   blocksize_x   = input->blc_size[blocktype][0];            // horizontal block size
+   int   blocksize_x4  = blocksize_x >> 2;                         // horizontal block size in 4-pel units
+   int   pred_x        = (pic_pix_x << mvshift) + pred_mv_x;       // predicted position x (in sub-pel units)
+   int   pred_y        = (pic_pix_y << mvshift) + pred_mv_y;       // predicted position y (in sub-pel units)
+   int   center_x      = pic_pix_x + *mv_x;                        // center position x (in pel units)
+   int   center_y      = pic_pix_y + *mv_y;                        // center position y (in pel units)
+   int   best_x = 0, best_y = 0;
+   int   search_step,iYMinNow, iXMinNow;
+   int   i,m; 
+   int   iAbort;
+   float betaSec,betaThird;
+   int height=((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))?img->height/2:img->height;
+    
+   //===== Use weighted Reference for ME ====
+   pel_t*  ref_pic;
+   int  apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+                          (active_pps->weighted_bipred_idc && (img->type == B_SLICE)));  
+ 
+   if (apply_weights && input->UseWeightedReferenceME)
+     ref_pic       = listX[list+list_offset][ref]->imgY_11_w;
+   else
+     ref_pic       = listX[list+list_offset][ref]->imgY_11;
+    
+ 
+   //===== set function for getting reference picture lines =====
+   if ((center_x > search_range) && (center_x < img->width -1-search_range-blocksize_x) &&
+     (center_y > search_range) && (center_y < height-1-search_range-blocksize_y)   )
+   {
+     get_ref_line = FastLineX;
+   }
+   else
+   {
+     get_ref_line = UMVLineX;
+   }
+   
+   //////allocate memory for search state//////////////////////////
+   memset(McostState[0],0,(2*input->search_range+1)*(2*input->search_range+1));
+   
+    ///////Threshold defined for early termination///////////////////  
+   if(list==0 && ref>0) 
+   {
+     if(pred_SAD_ref!=0)
+     {
+       betaSec = Bsize[blocktype]/(pred_SAD_ref*pred_SAD_ref)-AlphaSec[blocktype];
+       betaThird = Bsize[blocktype]/(pred_SAD_ref*pred_SAD_ref)-AlphaThird[blocktype];
+     }
+     else
+     {
+       betaSec = 0;
+       betaThird = 0;
+     }
+   }
+   else 
+   {
+     if(blocktype==1)
+     {
+       if(pred_SAD_space !=0)
+       {
+         betaSec = Bsize[blocktype]/(pred_SAD_space*pred_SAD_space)-AlphaSec[blocktype];
+         betaThird = Bsize[blocktype]/(pred_SAD_space*pred_SAD_space)-AlphaThird[blocktype];
+       }
+       else
+       {
+         betaSec = 0;
+         betaThird = 0;
+       }
+     }
+     else
+     {
+       if(pred_SAD_uplayer !=0)
+       {
+         betaSec = Bsize[blocktype]/(pred_SAD_uplayer*pred_SAD_uplayer)-AlphaSec[blocktype];
+         betaThird = Bsize[blocktype]/(pred_SAD_uplayer*pred_SAD_uplayer)-AlphaThird[blocktype];
+       }
+       else
+       {
+         betaSec = 0;
+         betaThird = 0;
+       }
+     }
+   }
+   /*****************************/
+ 
+   //check the center median predictor
+   cand_x = center_x ;
+   cand_y = center_y ;
+   mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);
+   mcost = PartCalMad(ref_pic, orig_pic, get_ref_line,blocksize_y,blocksize_x,blocksize_x4,mcost,min_mcost,cand_x,cand_y);
+   McostState[search_range][search_range] = 1;
+   if (mcost < min_mcost)
+   {
+     min_mcost = mcost;
+     best_x = cand_x;
+     best_y = cand_y;
+   }
+ 
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+   for (m = 0; m < 4; m++)
+   {   
+     cand_x = iXMinNow + Diamond_x[m];
+     cand_y = iYMinNow + Diamond_y[m];   
+     SEARCH_ONE_PIXEL
+   } 
+ 
+   if(center_x != pic_pix_x || center_y != pic_pix_y)
+   {
+     cand_x = pic_pix_x ;
+     cand_y = pic_pix_y ;
+     SEARCH_ONE_PIXEL
+ 
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 4; m++)
+     {   
+       cand_x = iXMinNow + Diamond_x[m];
+       cand_y = iYMinNow + Diamond_y[m];   
+       SEARCH_ONE_PIXEL
+     } 
+   }
+   
+     if(blocktype>1)
+   {
+     cand_x = pic_pix_x + (pred_MV_uplayer[0]/4);
+     cand_y = pic_pix_y + (pred_MV_uplayer[1]/4);
+     SEARCH_ONE_PIXEL
+     if ((min_mcost-pred_SAD_uplayer)<pred_SAD_uplayer*betaThird)
+       goto third_step;
+     else if((min_mcost-pred_SAD_uplayer)<pred_SAD_uplayer*betaSec)
+       goto sec_step;
+   } 
+ 
+ 
+   //prediciton using mV of last ref moiton vector
+   if (img->field_picture)
+   {
+     if ((list==0 && ref>1) || (img->type == B_SLICE && list == 0 && (ref==0 ||ref==1 ) )) 
+       //Notes: for interlace case, ref==1 should be added
+     {
+       cand_x = pic_pix_x + pred_MV_ref[0]/4;
+       cand_y = pic_pix_y + pred_MV_ref[1]/4;
+       SEARCH_ONE_PIXEL
+     }
+   }
+   else
+   {
+     if ((list==0 && ref > 0) || (img->type == B_SLICE && list == 0 && ref==0 )) 
+       //Notes: for interlace case, ref==1 should be added
+     {
+       cand_x = pic_pix_x + pred_MV_ref[0]/4;
+       cand_y = pic_pix_y + pred_MV_ref[1]/4;
+       SEARCH_ONE_PIXEL
+     }
+   }
+   //small local search
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+   for (m = 0; m < 4; m++)
+   {   
+     cand_x = iXMinNow + Diamond_x[m];
+     cand_y = iYMinNow + Diamond_y[m];   
+     SEARCH_ONE_PIXEL
+   } 
+ 
+   //early termination alogrithm, refer to JVT-G016
+     EARLY_TERMINATION
+   
+   if(blocktype>6)
+     goto sec_step;
+   else
+     goto first_step;
+   
+ first_step: //Unsymmetrical-cross search 
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+   
+   for(i=1;i<=search_range/2;i++)
+   {
+     search_step = 2*i - 1;
+     cand_x = iXMinNow + search_step;
+     cand_y = iYMinNow ;
+     SEARCH_ONE_PIXEL    
+     cand_x = iXMinNow - search_step;
+     cand_y = iYMinNow ;
+     SEARCH_ONE_PIXEL
+   }
+   
+   for(i=1;i<=search_range/4;i++)
+   {
+     search_step = 2*i - 1;
+     cand_x = iXMinNow ;
+     cand_y = iYMinNow + search_step;
+     SEARCH_ONE_PIXEL
+     cand_x = iXMinNow ;
+     cand_y = iYMinNow - search_step;
+     SEARCH_ONE_PIXEL
+   }
+   //early termination alogrithm, refer to JVT-G016
+     EARLY_TERMINATION
+   
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+     // Uneven Multi-Hexagon-grid Search 
+   for(pos=1;pos<25;pos++)
+   {
+     cand_x = iXMinNow + spiral_search_x[pos];
+     cand_y = iYMinNow + spiral_search_y[pos];
+     SEARCH_ONE_PIXEL
+   }
+ 
+   //early termination alogrithm, refer to JVT-G016
+   
+   for(i=1;i<=search_range/4; i++)
+   {
+     for (m = 0; m < 16; m++)
+     {
+       cand_x = iXMinNow + Big_Hexagon_x[m]*i;
+       cand_y = iYMinNow + Big_Hexagon_y[m]*i; 
+       SEARCH_ONE_PIXEL1(1)
+     }
+   }
+ sec_step:  //Extended Hexagon-based Search
+       iXMinNow = best_x;
+       iYMinNow = best_y;
+       for(i=0;i<search_range;i++) 
+       {
+         iAbort = 1;   
+         for (m = 0; m < 6; m++)
+         {   
+           cand_x = iXMinNow + Hexagon_x[m];
+           cand_y = iYMinNow + Hexagon_y[m];   
+           SEARCH_ONE_PIXEL1(0)
+         } 
+         if(iAbort)
+           break;
+         iXMinNow = best_x;
+         iYMinNow = best_y;
+       }
+ third_step: // the third step with a small search pattern
+       iXMinNow = best_x;
+       iYMinNow = best_y;
+       for(i=0;i<search_range;i++) 
+       {
+         iAbort = 1;   
+         for (m = 0; m < 4; m++)
+         {   
+           cand_x = iXMinNow + Diamond_x[m];
+           cand_y = iYMinNow + Diamond_y[m];   
+           SEARCH_ONE_PIXEL1(0)
+         } 
+         if(iAbort)
+           break;
+         iXMinNow = best_x;
+         iYMinNow = best_y;
+       }
+ 
+       *mv_x = best_x - pic_pix_x;
+       *mv_y = best_y - pic_pix_y; 
+       return min_mcost;
+   }
+ 
+ 
+   /*!
+  ************************************************************************
+  * \brief
+  * Functions for fast fractional pel motion estimation.
+  * 1. int AddUpSADQuarter() returns SADT of a fractiona pel MV
+  * 2. int FastSubPelBlockMotionSearch () proceed the fast fractional pel ME
+  * \authors  
+  *    Zhibo Chen
+  *    Dept.of EE, Tsinghua Univ.
+  * \date 
+  *    2003.4
+  ************************************************************************
+  */
+ int AddUpSADQuarter(int pic_pix_x,int pic_pix_y,int blocksize_x,int blocksize_y,
+                     int cand_mv_x,int cand_mv_y, StorablePicture *ref_picture, pel_t**   orig_pic, 
+                     int Mvmcost, int min_mcost,int useABT, int blocktype)
+ {
+ 
+   int j, i, k;  
+   int diff[16], *d; 
+   int mcost = Mvmcost;
+   int c_diff[MB_PIXELS];
+   int y_offset, ypels =(128 - 64 * (blocktype == 3));
+   int ry0, ry4, ry8, ry12;
+   int y0, y1, y2, y3;
+   int x0, x1, x2, x3;
+   int abort_search, rx0; 
+   int img_width  = ((ref_picture->size_x + 2*IMG_PAD_SIZE - 1)<<2);
+   int img_height = ((ref_picture->size_y + 2*IMG_PAD_SIZE - 1)<<2);
+ 
+   //===== Use weighted Reference for ME ====
+   pel_t **ref_pic;      
+   pel_t *ref_line;
+   pel_t *orig_line;
+   int  apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+                          (active_pps->weighted_bipred_idc && (img->type == B_SLICE)));  
+   
+   if (apply_weights && input->UseWeightedReferenceME)
+   {
+     ref_pic = ref_picture->imgY_ups_w;
+   }
+   else
+     ref_pic = ref_picture->imgY_ups;
+   ///////////////////////////////////////////
+ 
+   
+   for (y0=0, abort_search=0; y0<blocksize_y && !abort_search; y0+=4)
+   {
+     y_offset = (y0>7)*ypels;
+     ry0  = (y0<<2) + cand_mv_y;
+     ry4  = ry0 + 4;
+     ry8  = ry4 + 4;
+     ry12 = ry8 + 4;
+     y1 = y0 + 1;
+     y2 = y1 + 1;
+     y3 = y2 + 1;
+ 
+ 
+     for (x0=0; x0<blocksize_x; x0+=4)
+     {
+         rx0 = (x0<<2) + cand_mv_x;
+         x1  = x0 + 1;
+         x2  = x1 + 1;
+         x3  = x2 + 1;
+         d   = diff;
+ 
+         orig_line = orig_pic [y0];    
+         ref_line  = get_line (ref_pic, ry0, rx0, img_height, img_width);
+         *d++      = orig_line[x0] - *(ref_line     );
+         *d++      = orig_line[x1] - *(ref_line + 4 );
+         *d++      = orig_line[x2] - *(ref_line + 8 );
+         *d++      = orig_line[x3] - *(ref_line + 12);
+ 
+         orig_line = orig_pic [y1];    
+         ref_line  = get_line (ref_pic, ry4, rx0, img_height, img_width);
+         *d++      = orig_line[x0] - *(ref_line     );
+         *d++      = orig_line[x1] - *(ref_line + 4 );
+         *d++      = orig_line[x2] - *(ref_line + 8 );
+         *d++      = orig_line[x3] - *(ref_line + 12);
+ 
+         orig_line = orig_pic [y2];
+         ref_line  = get_line (ref_pic, ry8, rx0, img_height, img_width);
+         *d++      = orig_line[x0] - *(ref_line     );
+         *d++      = orig_line[x1] - *(ref_line += 4 );
+         *d++      = orig_line[x2] - *(ref_line += 4 );
+         *d++      = orig_line[x3] - *(ref_line += 4);
+ 
+         orig_line = orig_pic [y3];    
+         ref_line  = get_line (ref_pic, ry12, rx0, img_height, img_width);
+         *d++      = orig_line[x0] - *(ref_line     );
+         *d++      = orig_line[x1] - *(ref_line += 4);
+         *d++      = orig_line[x2] - *(ref_line += 4);
+         *d        = orig_line[x3] - *(ref_line += 4);
+ 
+       if (!useABT)
+       {
+         if ((mcost += SATD (diff, input->hadamard)) > min_mcost)
+         {
+           abort_search = 1;
+           break;
+         }
+       }
+       else  // copy diff to curr_diff for ABT SATD calculation
+       {
+           i = (x0&0x7) +  (x0>7) * 64 + y_offset;
+           for(k=0, j=y0; j<BLOCK_SIZE + y0; j++, k+=BLOCK_SIZE)
+             memcpy(&(c_diff[i + ((j&0x7)<<3)]), &diff[k], BLOCK_SIZE*sizeof(int));
+       }
+     }
+   }
+   
+   if(useABT)
+   {
+     mcost += find_SATD (c_diff, blocktype);
+   }
+ 
+   return mcost;
+ }
+ 
+ 
+ int                                                   //  ==> minimum motion cost after search
+ FastSubPelBlockMotionSearch (pel_t**   orig_pic,      // <--  original pixel values for the AxB block
+                              short     ref,           // <--  reference frame (0... or -1 (backward))
+                              int       list,
+                              int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                              int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                              int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                              short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                              short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                              short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                              short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                              int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                              int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                              int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                              int       lambda_factor,
+                              int       useABT)        // <--  lagrangian parameter for determining motion cost
+ {
+   static int Diamond_x[4] = {-1, 0, 1, 0};
+   static int Diamond_y[4] = {0, 1, 0, -1};
+   int   mcost;
+   int   cand_mv_x, cand_mv_y;
+   
+   int   list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))? img->current_mb_nr%2 ? 4 : 2 : 0;
+   StorablePicture *ref_picture = listX[list+list_offset][ref];
+   
+   int   mv_shift        = 0;
+   int   blocksize_x     = input->blc_size[blocktype][0];
+   int   blocksize_y     = input->blc_size[blocktype][1];
+   int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<< 2);
+   int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<< 2);
+   short max_pos_x4      = ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
+   short max_pos_y4      = ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
+   
+   int   search_range_dynamic,iXMinNow,iYMinNow,i;
+   int   m,currmv_x = 0,currmv_y = 0;
+   int   pred_frac_mv_x,pred_frac_mv_y,abort_search;
+   int   mv_cost; 
+   
+   int   pred_frac_up_mv_x, pred_frac_up_mv_y;
+   
+   if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 1) &&
+       (pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 1)   )
+   {
+     get_line = FastLine4X;
+   }
+   else
+   {
+     get_line = UMVLine4X;    
+   }
+ 
+   search_range_dynamic = 3;
+   pred_frac_mv_x = (pred_mv_x - *mv_x)%4;
+   pred_frac_mv_y = (pred_mv_y - *mv_y)%4; 
+   
+   pred_frac_up_mv_x = (pred_MV_uplayer[0] - *mv_x)%4;
+   pred_frac_up_mv_y = (pred_MV_uplayer[1] - *mv_y)%4;
+   
+   
+   memset(SearchState[0],0,(2*search_range_dynamic+1)*(2*search_range_dynamic+1));
+   
+   if(input->hadamard)
+   {
+     cand_mv_x = *mv_x;    
+     cand_mv_y = *mv_y;    
+     mv_cost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);    
+     mcost = AddUpSADQuarter(pic_pix_x,pic_pix_y,blocksize_x,blocksize_y,cand_mv_x + pic4_pix_x,cand_mv_y + pic4_pix_y,ref_picture,orig_pic,mv_cost,min_mcost,useABT, blocktype);
+     SearchState[search_range_dynamic][search_range_dynamic] = 1;
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       currmv_x = cand_mv_x;
+       currmv_y = cand_mv_y; 
+     }
+   }
+   else
+   {
+     SearchState[search_range_dynamic][search_range_dynamic] = 1;
+     currmv_x = *mv_x;
+     currmv_y = *mv_y; 
+   }
+   
+   if(pred_frac_mv_x!=0 || pred_frac_mv_y!=0)
+   {
+     cand_mv_x = *mv_x + pred_frac_mv_x;    
+     cand_mv_y = *mv_y + pred_frac_mv_y;    
+     mv_cost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);    
+     mcost = AddUpSADQuarter(pic_pix_x,pic_pix_y,blocksize_x,blocksize_y,cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y,ref_picture,orig_pic,mv_cost,min_mcost,useABT, blocktype);
+     SearchState[cand_mv_y -*mv_y + search_range_dynamic][cand_mv_x - *mv_x + search_range_dynamic] = 1;
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       currmv_x = cand_mv_x;
+       currmv_y = cand_mv_y; 
+     }
+   }
+   
+   
+   iXMinNow = currmv_x;
+   iYMinNow = currmv_y;
+   for(i=0;i<search_range_dynamic;i++) 
+   {
+     abort_search=1;
+     for (m = 0; m < 4; m++)
+     {
+       cand_mv_x = iXMinNow + Diamond_x[m];    
+       cand_mv_y = iYMinNow + Diamond_y[m]; 
+       
+       if(abs(cand_mv_x - *mv_x) <=search_range_dynamic && abs(cand_mv_y - *mv_y)<= search_range_dynamic)
+       {
+         if(!SearchState[cand_mv_y -*mv_y+ search_range_dynamic][cand_mv_x -*mv_x+ search_range_dynamic])
+         {
+           mv_cost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);    
+           mcost = AddUpSADQuarter(pic_pix_x,pic_pix_y,blocksize_x,blocksize_y,cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y,ref_picture,orig_pic,mv_cost,min_mcost,useABT, blocktype);
+           SearchState[cand_mv_y - *mv_y + search_range_dynamic][cand_mv_x - *mv_x + search_range_dynamic] = 1;
+           if (mcost < min_mcost)
+           {
+             min_mcost = mcost;
+             currmv_x = cand_mv_x;
+             currmv_y = cand_mv_y; 
+             abort_search = 0; 
+             
+           }
+         }
+       }
+     }
+     iXMinNow = currmv_x;
+     iYMinNow = currmv_y;
+     if(abort_search)
+       break;
+   }
+   
+   *mv_x = currmv_x;
+   *mv_y = currmv_y;
+   
+   //===== return minimum motion cost =====
+   return min_mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  * Functions for SAD prediction of intra block cases.
+  * 1. void   decide_intrabk_SAD() judges the block coding type(intra/inter) 
+  *    of neibouring blocks
+  * 2. void skip_intrabk_SAD() set the SAD to zero if neigouring block coding 
+  *    type is intra
+  * \date
+  *    2003.4
+  ************************************************************************
+  */
+ void   decide_intrabk_SAD()
+ {
+   if (img->type != I_SLICE)
+   {
+     if (img->pix_x == 0 && img->pix_y == 0)
+     {
+       flag_intra_SAD = 0;
+     }
+     else if (img->pix_x == 0)
+     {
+       flag_intra_SAD = flag_intra[(img->pix_x)>>4];
+     }
+     else if (img->pix_y == 0)
+     {
+       flag_intra_SAD = flag_intra[((img->pix_x)>>4)-1];
+     }
+     else 
+     {
+       flag_intra_SAD = ((flag_intra[(img->pix_x)>>4])||(flag_intra[((img->pix_x)>>4)-1])||(flag_intra[((img->pix_x)>>4)+1])) ;
+     }
+   }
+   return;
+ }
+ 
+ void skip_intrabk_SAD(int best_mode, int ref_max)
+ {
+   int i,j,k, ref;
+   if (img->number > 0) 
+     flag_intra[(img->pix_x)>>4] = (best_mode == 9 || best_mode == 10) ? 1:0;
+   if (img->type != I_SLICE  && (best_mode == 9 || best_mode == 10))
+   {
+     for (i=0; i < 4; i++)
+     {
+       for (j=0; j < 4; j++)
+       {
+         for (k=0; k < 9;k++)
+         {
+ 	  fastme_l0_cost[k][j][i] = 0;
+ 	  fastme_l1_cost[k][j][i] = 0;
+           for (ref=0; ref<ref_max;ref++)
+           {
+             fastme_ref_cost[ref][k][j][i] = 0;
+           }
+         }
+       }
+     }
+   
+   }
+   return;
+ }
+ 
+ void setup_FME(short ref, int list, int block_y, int block_x, int blocktype, short   ******all_mv)
+ {
+   int  N_Bframe=0, n_Bframe=0;
+   
+   N_Bframe = input->successive_Bframe;
+   n_Bframe =(N_Bframe) ? (frame_ctr[B_SLICE]%(N_Bframe+1)): 0;
+   
+   /**************************** MV prediction **********************/ 
+   //MV uplayer prediction
+ 
+   if(blocktype>6)
+   {
+     pred_MV_uplayer[0] = all_mv[block_y][block_x][list][ref][5][0];
+     pred_MV_uplayer[1] = all_mv[block_y][block_x][list][ref][5][1];
+     
+   }
+   else if(blocktype>4)
+   {
+     pred_MV_uplayer[0] = all_mv[block_y][block_x][list][ref][4][0];
+     pred_MV_uplayer[1] = all_mv[block_y][block_x][list][ref][4][1];
+     
+   }
+   else if(blocktype == 4)
+   {
+     pred_MV_uplayer[0] = all_mv[block_y][block_x][list][ref][2][0];
+     pred_MV_uplayer[1] = all_mv[block_y][block_x][list][ref][2][1];
+   }
+   else if(blocktype > 1)
+   {
+     pred_MV_uplayer[0] = all_mv[block_y][block_x][list][ref][1][0];
+     pred_MV_uplayer[1] = all_mv[block_y][block_x][list][ref][1][1];
+   }
+   
+   //MV ref-frame prediction
+   if (img->field_picture) 
+   {
+     if (list==0 && ref > 1)
+     {
+       pred_MV_ref[0] = all_mv[block_y][block_x][0][ref-2][blocktype][0];
+       pred_MV_ref[0] = (int)(pred_MV_ref[0]*((ref>>1)+1)/(float)((ref>>1)));
+       pred_MV_ref[1] = all_mv[block_y][block_x][0][ref-2][blocktype][1];
+       pred_MV_ref[1] = (int)(pred_MV_ref[1]*((ref>>1)+1)/(float)((ref>>1)));
+     }
+     if (img->type == B_SLICE && list==0 && (ref==0 || ref==1) )
+     {
+       pred_MV_ref[0] =(int) (all_mv[block_y][block_x][1][0][blocktype][0]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
+       pred_MV_ref[1] =(int) (all_mv[block_y][block_x][1][0][blocktype][1]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
+     }
+   }
+   else //frame case
+   {
+     if (list==0 && ref > 0)
+     {
+       pred_MV_ref[0] = all_mv[block_y][block_x][0][ref-1][blocktype][0];
+       pred_MV_ref[0] = (int)(pred_MV_ref[0]*(ref+1)/(float)(ref));
+       pred_MV_ref[1] = all_mv[block_y][block_x][0][ref-1][blocktype][1];
+       pred_MV_ref[1] = (int)(pred_MV_ref[1]*(ref+1)/(float)(ref));
+     }
+     if (img->type == B_SLICE && (list==0 && ref==0)) //B frame forward prediction, first ref
+     {
+       pred_MV_ref[0] =(int) (all_mv[block_y][block_x][1][0][blocktype][0]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
+       pred_MV_ref[1] =(int) (all_mv[block_y][block_x][1][0][blocktype][1]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
+     }
+   }
+   
+   /******************************SAD prediction**********************************/
+   if (list==0 && ref>0)
+   {
+     if (img->field_picture)
+     {
+       if (ref > 1)
+       {
+         pred_SAD_ref = fastme_ref_cost[ref-2][blocktype][block_y][block_x];
+         pred_SAD_ref = flag_intra_SAD ? 0 : pred_SAD_ref;//add this for irregular motion
+       }
+       else
+       {
+         pred_SAD_ref = fastme_ref_cost[0][blocktype][block_y][block_x];
+         pred_SAD_ref = flag_intra_SAD ? 0 : pred_SAD_ref;//add this for irregular motion
+       }
+     }
+     else
+     {
+       pred_SAD_ref = fastme_ref_cost[ref-1][blocktype][block_y][block_x];
+       pred_SAD_ref = flag_intra_SAD ? 0 : pred_SAD_ref;//add this for irregular motion
+     }
+   }
+   else if (blocktype>1)
+   {
+     if(blocktype>6)
+     {   
+       pred_SAD_uplayer = (list==1) ? (fastme_l1_cost[5][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]) : (fastme_l0_cost[5][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
+       pred_SAD_uplayer /= 2; 	     
+     }
+     else if(blocktype>4)
+     {
+       pred_SAD_uplayer = (list==1) ? (fastme_l1_cost[4][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]) : (fastme_l0_cost[4][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
+       pred_SAD_uplayer   /= 2; 
+     }
+     else if(blocktype == 4)
+     {
+       pred_SAD_uplayer = (list==1) ? (fastme_l1_cost[2][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]) : (fastme_l0_cost[2][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
+       pred_SAD_uplayer   /= 2; 
+     }
+     else
+     {
+       pred_SAD_uplayer = (list==1) ? (fastme_l1_cost[1][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]) : (fastme_l0_cost[1][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
+       pred_SAD_uplayer   /= 2; 
+     }  
+     
+     pred_SAD_uplayer = flag_intra_SAD ? 0 : pred_SAD_uplayer;// for irregular motion
+   }
+ 
+ 
+   FME_blocktype=blocktype;
+ 
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/fast_me.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/fast_me.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/fast_me.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,168 ----
+ 
+ /*!
+  ************************************************************************
+  *
+  * \file fast_me.h
+  *
+  * \brief
+  *   Macro definitions and global variables for fast integer pel motion 
+  *   estimation and fractional pel motion estimation
+  *
+  * \author
+  *   Main contributors: (see contributors.h for copyright, address and affiliation details)
+  *    - Zhibo Chen         <chenzhibo at tsinghua.org.cn>
+  *    - JianFeng Xu        <fenax at video.mdc.tsinghua.edu.cn>  
+  *    - Wenfang Fu         <fwf at video.mdc.tsinghua.edu.cn>
+  *
+  * \date
+  *   Apr. 2003
+  ************************************************************************
+  */
+ 
+ #ifndef _FAST_ME_H_
+ #define _FAST_ME_H_
+ 
+ #include "mbuffer.h"
+ 
+ #define EARLY_TERMINATION  if(list==0 && ref>0) \
+   {                                                                    \
+   if ((min_mcost-pred_SAD_ref)<pred_SAD_ref*betaThird)             \
+   goto third_step;                                             \
+   else if((min_mcost-pred_SAD_ref)<pred_SAD_ref*betaSec)           \
+   goto sec_step;                                               \
+   }                                                                    \
+   else if(blocktype>1)                                                 \
+   {                                                                    \
+   if ((min_mcost-pred_SAD_uplayer)<pred_SAD_uplayer*betaThird)     \
+     {                                                                \
+     goto third_step;                                             \
+     }                                                                \
+     else if((min_mcost-pred_SAD_uplayer)<pred_SAD_uplayer*betaSec)   \
+     goto sec_step;                                               \
+   }                                                                    \
+   else                                                                 \
+   {                                                                    \
+   if ((min_mcost-pred_SAD_space)<pred_SAD_space*betaThird)         \
+     {                                                                \
+     goto third_step;                                             \
+     }                                                                \
+     else if((min_mcost-pred_SAD_space)<pred_SAD_space*betaSec)       \
+     goto sec_step;                                               \
+   }
+ 
+ 
+ #define SEARCH_ONE_PIXEL  if(abs(cand_x - center_x) <=search_range && abs(cand_y - center_y)<= search_range) \
+     { \
+     if(!McostState[cand_y-center_y+search_range][cand_x-center_x+search_range]) \
+     { \
+     mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y); \
+     mcost = PartCalMad(ref_pic, orig_pic, get_ref_line,blocksize_y,blocksize_x,blocksize_x4,mcost,min_mcost,cand_x,cand_y); \
+     McostState[cand_y-center_y+search_range][cand_x-center_x+search_range] = 1; \
+     if (mcost < min_mcost) \
+     { \
+     best_x = cand_x; \
+     best_y = cand_y; \
+     min_mcost = mcost; \
+     } \
+     } \
+     }
+ #define SEARCH_ONE_PIXEL1(value_iAbort) if(abs(cand_x - center_x) <=search_range && abs(cand_y - center_y)<= search_range) \
+       { \
+       if(!McostState[cand_y-center_y+search_range][cand_x-center_x+search_range]) \
+         { \
+         mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y); \
+         mcost = PartCalMad(ref_pic, orig_pic, get_ref_line,blocksize_y,blocksize_x,blocksize_x4,mcost,min_mcost,cand_x,cand_y); \
+         McostState[cand_y-center_y+search_range][cand_x-center_x+search_range] = 1; \
+         if (mcost < min_mcost) \
+           { \
+           best_x = cand_x; \
+           best_y = cand_y; \
+           min_mcost = mcost; \
+           iAbort = value_iAbort; \
+           } \
+         } \
+       }
+ 
+ byte **McostState; //state for integer pel search
+ byte **SearchState; //state for fractional pel search
+ 
+ int ****fastme_ref_cost; //store SAD information needed for forward ref-frame prediction
+ int ***fastme_l0_cost; //store SAD information needed for forward median and uplayer prediction
+ int ***fastme_l1_cost; //store SAD information needed for backward median and uplayer prediction
+ 
+ int pred_SAD_space, pred_SAD_ref,pred_SAD_uplayer;  //SAD prediction
+ int pred_MV_ref[2], pred_MV_uplayer[2];             //pred motion vector by space or temporal correlation,Median is provided
+ 
+ int FME_blocktype;  //blocktype for FME SetMotionVectorPredictor
+ 
+ //for early termination
+ float  Bsize[8];
+ float AlphaSec[8];
+ float AlphaThird[8];
+ byte *flag_intra;
+ int  flag_intra_SAD;
+ 
+ void DefineThreshold();
+ void DefineThresholdMB();
+ int get_mem_FME();
+ void free_mem_FME();
+ 
+ void decide_intrabk_SAD();
+ void skip_intrabk_SAD(int best_mode, int ref_max);
+ void setup_FME(short ref, int list, int block_y, int block_x, int blocktype, short   ******all_mv);
+ 
+ int                                     //  ==> minimum motion cost after search
+ FastIntegerPelBlockMotionSearch  (pel_t**   orig_pic,      // <--  not used
+                                   short     ref,           // <--  reference frame (0... or -1 (backward))
+                                   int       list,
+                                   int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                                   int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                                   int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                                   short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                                   short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                                   short*    mv_x,          //  --> motion vector (x) - in pel units
+                                   short*    mv_y,          //  --> motion vector (y) - in pel units
+                                   int       search_range,  // <--  1-d search range in pel units                         
+                                   int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                                   int       lambda_factor);// <--  lagrangian parameter for determining motion cost
+ 
+ int AddUpSADQuarter(int pic_pix_x,int pic_pix_y,int blocksize_x,int blocksize_y,
+                     int cand_mv_x,int cand_mv_y, StorablePicture *ref_picture, pel_t**   orig_pic, 
+                     int Mvmcost, int min_mcost,int useABT,int blocktype);
+ 
+ int                                                   //  ==> minimum motion cost after search
+ FastSubPelBlockMotionSearch (pel_t**   orig_pic,      // <--  original pixel values for the AxB block
+                              short       ref,           // <--  reference frame (0... or -1 (backward))
+                              int       list,
+                              int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                              int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                              int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                              short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                              short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                              short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                              short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                              int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                              int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                              int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                              int       lambda_factor, // <--  lagrangian parameter for determining motion cost
+                              int  useABT);
+ 
+ int                                               //  ==> minimum motion cost after search
+ SubPelBlockMotionSearch (pel_t**   orig_pic,      // <--  original pixel values for the AxB block
+                          short     ref,           // <--  reference frame (0... or -1 (backward))
+                          int       list,
+                          int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                          int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                          int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                          int       pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                          int       pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                          short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                          short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                          int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                          int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                          int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                          int      lambda_factor         // <--  lagrangian parameter for determining motion cost
+                          );
+ 
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/filehandle.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/filehandle.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/filehandle.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,140 ----
+ 
+ /*!
+  **************************************************************************************
+  * \file
+  *    filehandle.c
+  * \brief
+  *    Start and terminate sequences
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *      - Thomas Stockhammer            <stockhammer at ei.tum.de>
+  *      - Detlev Marpe                  <marpe at hhi.de>
+  ***************************************************************************************
+  */
+ 
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ 
+ #include "global.h"
+ 
+ #include "rtp.h"
+ #include "annexb.h"
+ #include "parset.h"
+ #include "mbuffer.h"
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Error handling procedure. Print error message to stderr and exit
+  *    with supplied code.
+  * \param text
+  *    Error message
+  * \param code
+  *    Exit code
+  ************************************************************************
+  */
+ void error(char *text, int code)
+ {
+   fprintf(stderr, "%s\n", text);
+   flush_dpb();
+   exit(code);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *     This function generates and writes the PPS 
+  *
+  ************************************************************************
+  */
+ int write_PPS(int len, int PPS_id)
+ {  
+   NALU_t *nalu;
+   nalu = NULL;
+   nalu = GeneratePic_parameter_set_NALU (PPS_id);
+   len += WriteNALU (nalu);
+   FreeNALU (nalu);
+   
+   return len;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    This function opens the output files and generates the
+  *    appropriate sequence header
+  ************************************************************************
+  */
+ int start_sequence()
+ {
+   int i,len=0, total_pps = (input->GenerateMultiplePPS) ? 3 : 1;
+   NALU_t *nalu;
+   
+ 
+   switch(input->of_mode)
+   {
+     case PAR_OF_ANNEXB:
+       OpenAnnexbFile (input->outfile);
+       WriteNALU = WriteAnnexbNALU;
+       break;
+     case PAR_OF_RTP:
+       OpenRTPFile (input->outfile);
+       WriteNALU = WriteRTPNALU;
+       break;
+     default:
+       snprintf(errortext, ET_SIZE, "Output File Mode %d not supported", input->of_mode);
+       error(errortext,1);
+       return 1;
+   }
+ 
+   //! As a sequence header, here we write the both sequence and picture
+   //! parameter sets.  As soon as IDR is implemented, this should go to the
+   //! IDR part, as both parsets have to be transmitted as part of an IDR.
+   //! An alternative may be to consider this function the IDR start function.
+   
+   nalu = NULL;
+   nalu = GenerateSeq_parameter_set_NALU ();
+   len += WriteNALU (nalu);
+   FreeNALU (nalu);
+   
+   //! Lets write now the Picture Parameter sets. Output will be equal to the total number of bits spend here.
+   for (i=0;i<total_pps;i++)
+   {
+      len = write_PPS(len, i);
+   }
+   stats->bit_ctr_parametersets_n = len;
+   return 0;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *     This function terminates the sequence and closes the
+  *     output files
+  ************************************************************************
+  */
+ int terminate_sequence()
+ {
+ //  Bitstream *currStream;
+ 
+   // Mainly flushing of everything
+   // Add termination symbol, etc.
+ 
+   switch(input->of_mode)
+   {
+     case PAR_OF_ANNEXB:
+       CloseAnnexbFile();
+       break;
+     case PAR_OF_RTP:
+       CloseRTPFile();
+       return 0;
+     default:
+       snprintf(errortext, ET_SIZE, "Output File Mode %d not supported", input->of_mode);
+       error(errortext,1);
+       return 1;
+   }
+   return 1;   // make lint happy
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/fmo.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/fmo.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/fmo.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,730 ----
+ /*!
+  *****************************************************************************
+  *
+  * \file fmo.c
+  *
+  * \brief
+  *    Support for Flexible Macroblock Ordering for different Slice Group Modes: MBAmap handling
+  *
+  * \date
+  *    16 June, 2002  Modified April 25, 2004
+  *
+  * \author
+  *    Stephan Wenger   stewe at cs.tu-berlin.de
+  *    Dong Wang (modify)	Dong.Wang at bristol.ac.uk
+  * 
+  *****************************************************************************/
+ 
+ /*!
+  ****************************************************************************
+  *   Notes by Dong Wang (April 25 2004)
+  *
+  *  Source codes are modified to support 7 slice group types (fmo modes).
+  *  The functions for generating map are very similar to that in decoder, but have 
+  *  a little difference. 
+  *
+  *  The MB map is calculated at the beginning of coding of each picture (frame or field).
+  *
+  *  'slice_group_change_cycle' in structure 'ImageParameters' is the syntax in the slice 
+  *  header. It's set to be 1 before the initialization of FMO in function code_a_picture().
+  *  It can be changed every time if needed.
+  *
+  **************************************************************************** 
+  */
+ 
+ /*!
+  *****************************************************************************
+  *  How does a MBAmap look like?
+  *
+  *  An MBAmap is a one-diemnsional array of ints.  Each int 
+  *  represents an MB in scan order.  A zero or positive value represents
+  *  a slice group ID.  Negative values are reserved for future extensions.
+  *  The numbering range for the SliceGroupIDs is 0..7 as per JVT-C167.
+  *
+  *  This module contains a static variable MBAmap.  This is the MBAmap of the
+  *  picture currently coded.  It can be accessed only through the access
+  *  functions.
+  *****************************************************************************
+ */
+ 
+ //#define PRINT_FMO_MAPS  1
+ 
+ 
+ #include <stdlib.h>
+ #include <string.h>
+ #include <assert.h>
+ #include <memory.h>
+ 
+ #include "global.h"
+ 
+ #include "fmo.h"
+ 
+ 
+ static int FirstMBInSlice[MAXSLICEGROUPIDS];
+ 
+ byte *MBAmap = NULL;   
+ byte *MapUnitToSliceGroupMap = NULL; 
+ unsigned PicSizeInMapUnits;
+ 
+ 
+ static void FmoGenerateType0MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+ static void FmoGenerateType1MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+ static void FmoGenerateType2MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+ static void FmoGenerateType3MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+ static void FmoGenerateType4MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+ static void FmoGenerateType5MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+ static void FmoGenerateType6MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+ 
+ 
+ static int FmoGenerateMapUnitToSliceGroupMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps);
+ static int FmoGenerateMBAmap (ImageParameters * img, seq_parameter_set_rbsp_t* sps);
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Generates MapUnitToSliceGroupMap
+  *
+  * \param img
+  *    Image Parameter to be used for map generation
+  * \param pps
+  *    Picture Parameter set to be used for map generation
+  *
+  ************************************************************************
+  */
+ static int FmoGenerateMapUnitToSliceGroupMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps)
+ {
+   PicSizeInMapUnits = img->PicHeightInMapUnits * img->PicWidthInMbs;
+   
+   
+   if (pps->slice_group_map_type == 6)
+   {
+     if ((pps->pic_size_in_map_units_minus1+1) != PicSizeInMapUnits)
+     {
+       error ("wrong pps->pic_size_in_map_units_minus1 for used SPS and FMO type 6", 500);
+     }
+   }
+   
+   // allocate memory for MapUnitToSliceGroupMap
+   if (MapUnitToSliceGroupMap)
+     free (MapUnitToSliceGroupMap);
+   
+   if ((MapUnitToSliceGroupMap = malloc ((PicSizeInMapUnits) * sizeof (byte))) == NULL)
+   {
+     printf ("cannot allocated %d bytes for MapUnitToSliceGroupMap, exit\n", PicSizeInMapUnits * sizeof (byte));
+     exit (-1);
+   }
+   
+   if (pps->num_slice_groups_minus1 == 0)    // only one slice group
+   {
+     memset (MapUnitToSliceGroupMap, 0,  PicSizeInMapUnits * sizeof (byte));
+     return 0;
+   }
+   
+   switch (pps->slice_group_map_type)
+   {
+   case 0:
+     FmoGenerateType0MapUnitMap (img, pps);
+     break;
+   case 1:
+     FmoGenerateType1MapUnitMap (img, pps);
+     break;
+   case 2:
+     FmoGenerateType2MapUnitMap (img, pps);
+     break;
+   case 3:
+     FmoGenerateType3MapUnitMap (img, pps);
+     break;
+   case 4:
+     FmoGenerateType4MapUnitMap (img, pps);
+     break;
+   case 5:
+     FmoGenerateType5MapUnitMap (img, pps);
+     break;
+   case 6:
+     FmoGenerateType6MapUnitMap (img, pps);
+     break;
+   default:
+     printf ("Illegal slice_group_map_type %d , exit \n", pps->slice_group_map_type);
+     exit (-1);
+   }
+   return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Generates MBAmap from MapUnitToSliceGroupMap
+  *
+  * \param img
+  *    Image Parameter to be used for map generation
+  * \param sps
+  *    Sequence Parameter set to be used for map generation
+  *
+  ************************************************************************
+  */
+ static int FmoGenerateMBAmap (ImageParameters * img, seq_parameter_set_rbsp_t* sps)
+ {
+   unsigned i;
+   
+   // allocate memory for MBAmap
+   if (MBAmap)
+     free (MBAmap);
+   
+   
+   if ((MBAmap = malloc ((img->PicSizeInMbs) * sizeof (byte))) == NULL)
+   {
+     printf ("cannot allocated %d bytes for MBAmap, exit\n", (img->PicSizeInMbs) * sizeof (byte));
+     exit (-1);
+   }
+   
+   if ((sps->frame_mbs_only_flag) || img->field_picture)
+   {
+     for (i=0; i<img->PicSizeInMbs; i++)
+     {
+       MBAmap[i] = MapUnitToSliceGroupMap[i];
+     }
+   }
+   else
+     if (sps->mb_adaptive_frame_field_flag  &&  (! img->field_picture))
+     {
+       for (i=0; i<img->PicSizeInMbs; i++)
+       {
+         MBAmap[i] = MapUnitToSliceGroupMap[i/2];
+       }
+     }
+     else
+     {
+       for (i=0; i<img->PicSizeInMbs; i++)
+       {
+         MBAmap[i] = MapUnitToSliceGroupMap[(i/(2*img->PicWidthInMbs))*img->PicWidthInMbs+(i%img->PicWidthInMbs)];
+       }
+     }
+     return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    FMO initialization: Generates MapUnitToSliceGroupMap and MBAmap.
+  *
+  * \param img
+  *    Image Parameter to be used for map generation
+  * \param pps
+  *    Picture Parameter set to be used for map generation
+  * \param sps
+  *    Sequence Parameter set to be used for map generation
+  ************************************************************************
+  */
+ int FmoInit(ImageParameters * img, pic_parameter_set_rbsp_t * pps, seq_parameter_set_rbsp_t * sps)
+ {
+   
+ #ifdef PRINT_FMO_MAPS
+   unsigned i,j;
+   int bottom;
+ #endif
+   
+   int k;
+   for (k=0;k<MAXSLICEGROUPIDS;k++)
+     FirstMBInSlice[k] = -1;
+   
+   
+   
+   FmoGenerateMapUnitToSliceGroupMap(img, pps);
+   FmoGenerateMBAmap(img, sps);
+   
+ #ifdef PRINT_FMO_MAPS
+   printf("\n");
+   printf("FMO Map (Units):\n");
+   
+   for (j=0; j<img->PicHeightInMapUnits; j++)
+   {
+     for (i=0; i<img->PicWidthInMbs; i++)
+     {
+       printf("%d ",MapUnitToSliceGroupMap[i+j*img->PicWidthInMbs]);
+     }
+     printf("\n");
+   }
+   printf("\n");
+   
+   if(sps->mb_adaptive_frame_field_flag==0)
+   {
+     printf("FMO Map (Mb):\n");	
+     for (j=0; j<(img->PicSizeInMbs/img->PicWidthInMbs); j++)
+     {
+       for (i=0; i<img->PicWidthInMbs; i++)
+       {
+         printf("%d ",MBAmap[i+j*img->PicWidthInMbs]);
+       }
+       printf("\n");
+     }
+     printf("\n");
+   }
+   else 
+   {
+     printf("FMO Map (Mb in scan order for MBAFF):\n");	
+     for (j=0; j<(img->PicSizeInMbs/img->PicWidthInMbs); j++)
+     {
+       for (i=0; i<img->PicWidthInMbs; i++)
+       {
+         bottom=(j%2);
+         printf("%d ",MBAmap[(j-bottom)*img->PicWidthInMbs+i*2+bottom]);
+       }
+       printf("\n");
+       
+     }
+     printf("\n");
+     
+   }
+   
+ #endif
+   
+   return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free memory if allocated by FMO functions
+  ************************************************************************
+  */
+ void FmoUninit()
+ {
+   if (MBAmap)
+   {
+     free (MBAmap);
+     MBAmap = NULL;
+   }
+   if (MapUnitToSliceGroupMap)
+   {
+     free (MapUnitToSliceGroupMap);
+     MapUnitToSliceGroupMap = NULL; 
+   }
+   
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Generate interleaved slice group map type MapUnit map (type 0)
+  *
+  * \param img
+  *    Image Parameter to be used for map generation
+  * \param pps
+  *    Picture Parameter set to be used for map generation
+  ************************************************************************
+  */
+ static void FmoGenerateType0MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+ {
+   unsigned iGroup, j;
+   unsigned i = 0;
+   do
+   {
+     for( iGroup = 0; 
+     (iGroup <= pps->num_slice_groups_minus1) && (i < PicSizeInMapUnits); 
+     i += pps->run_length_minus1[iGroup++] + 1)
+     {
+       for( j = 0; j <= pps->run_length_minus1[ iGroup ] && i + j < PicSizeInMapUnits; j++ )
+         MapUnitToSliceGroupMap[i+j] = iGroup;
+     }
+   }
+   while( i < PicSizeInMapUnits );
+ }
+ 
+   
+ /*!
+  ************************************************************************
+  * \brief
+  *    Generate dispersed slice group map type MapUnit map (type 1)
+  *
+  * \param img
+  *    Image Parameter to be used for map generation
+  * \param pps
+  *    Picture Parameter set to be used for map generation
+  ************************************************************************
+  */
+ static void FmoGenerateType1MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+ {
+   unsigned i;
+   for( i = 0; i < PicSizeInMapUnits; i++ )
+   {
+     MapUnitToSliceGroupMap[i] = ((i%img->PicWidthInMbs)+(((i/img->PicWidthInMbs)*(pps->num_slice_groups_minus1+1))/2))
+       %(pps->num_slice_groups_minus1+1);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Generate foreground with left-over slice group map type MapUnit map (type 2)
+  *
+  * \param img
+  *    Image Parameter to be used for map generation
+  * \param pps
+  *    Picture Parameter set to be used for map generation
+  ************************************************************************
+  */
+ static void FmoGenerateType2MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+ {
+   int iGroup;
+   unsigned i, x, y;
+   unsigned yTopLeft, xTopLeft, yBottomRight, xBottomRight;
+   
+   for( i = 0; i < PicSizeInMapUnits; i++ )
+     MapUnitToSliceGroupMap[ i ] = pps->num_slice_groups_minus1;
+   
+   for( iGroup = pps->num_slice_groups_minus1 - 1 ; iGroup >= 0; iGroup-- ) 
+   {
+     yTopLeft = pps->top_left[ iGroup ] / img->PicWidthInMbs;
+     xTopLeft = pps->top_left[ iGroup ] % img->PicWidthInMbs;
+     yBottomRight = pps->bottom_right[ iGroup ] / img->PicWidthInMbs;
+     xBottomRight = pps->bottom_right[ iGroup ] % img->PicWidthInMbs;
+     for( y = yTopLeft; y <= yBottomRight; y++ )
+       for( x = xTopLeft; x <= xBottomRight; x++ )
+         MapUnitToSliceGroupMap[ y * img->PicWidthInMbs + x ] = iGroup;
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Generate box-out slice group map type MapUnit map (type 3)
+  *
+  * \param img
+  *    Image Parameter to be used for map generation
+  * \param pps
+  *    Picture Parameter set to be used for map generation
+  ************************************************************************
+  */
+ static void FmoGenerateType3MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+ {
+   unsigned i, k;
+   int leftBound, topBound, rightBound, bottomBound;
+   int x, y, xDir, yDir;
+   int mapUnitVacant;
+   
+   unsigned mapUnitsInSliceGroup0 = min((pps->slice_group_change_rate_minus1 + 1) * img->slice_group_change_cycle, PicSizeInMapUnits);
+   
+   for( i = 0; i < PicSizeInMapUnits; i++ )
+     MapUnitToSliceGroupMap[ i ] = 2;
+   
+   x = ( img->PicWidthInMbs - pps->slice_group_change_direction_flag ) / 2;
+   y = ( img->PicHeightInMapUnits - pps->slice_group_change_direction_flag ) / 2;
+   
+   leftBound   = x;
+   topBound    = y;
+   rightBound  = x;
+   bottomBound = y;
+   
+   xDir =  pps->slice_group_change_direction_flag - 1;
+   yDir =  pps->slice_group_change_direction_flag;
+   
+   for( k = 0; k < PicSizeInMapUnits; k += mapUnitVacant ) 
+   {
+     mapUnitVacant = ( MapUnitToSliceGroupMap[ y * img->PicWidthInMbs + x ]  ==  2 );
+     if( mapUnitVacant )
+       MapUnitToSliceGroupMap[ y * img->PicWidthInMbs + x ] = ( k >= mapUnitsInSliceGroup0 );
+     
+     if( xDir  ==  -1  &&  x  ==  leftBound ) 
+     {
+       leftBound = max( leftBound - 1, 0 );
+       x = leftBound;
+       xDir = 0;
+       yDir = 2 * pps->slice_group_change_direction_flag - 1;
+     }
+     else 
+       if( xDir  ==  1  &&  x  ==  rightBound ) 
+       {
+         rightBound = min( rightBound + 1, (int)img->PicWidthInMbs - 1 );
+         x = rightBound;
+         xDir = 0;
+         yDir = 1 - 2 * pps->slice_group_change_direction_flag;
+       }
+       else
+         if( yDir  ==  -1  &&  y  ==  topBound ) 
+         {
+           topBound = max( topBound - 1, 0 );
+           y = topBound;
+           xDir = 1 - 2 * pps->slice_group_change_direction_flag;
+           yDir = 0;
+         }
+         else 
+           if( yDir  ==  1  &&  y  ==  bottomBound ) 
+           {
+             bottomBound = min( bottomBound + 1, (int)img->PicHeightInMapUnits - 1 );
+             y = bottomBound;
+             xDir = 2 * pps->slice_group_change_direction_flag - 1;
+             yDir = 0;
+           }
+           else
+           {
+             x = x + xDir;
+             y = y + yDir;
+           }
+   }
+   
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Generate raster scan slice group map type MapUnit map (type 4)
+  *
+  * \param img
+  *    Image Parameter to be used for map generation
+  * \param pps
+  *    Picture Parameter set to be used for map generation
+  ************************************************************************
+  */
+ static void FmoGenerateType4MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+ {
+   
+   unsigned mapUnitsInSliceGroup0 = min((pps->slice_group_change_rate_minus1 + 1) * img->slice_group_change_cycle, PicSizeInMapUnits);
+   unsigned sizeOfUpperLeftGroup = pps->slice_group_change_direction_flag ? ( PicSizeInMapUnits - mapUnitsInSliceGroup0 ) : mapUnitsInSliceGroup0;
+   
+   unsigned i;
+   
+   for( i = 0; i < PicSizeInMapUnits; i++ )
+     if( i < sizeOfUpperLeftGroup )
+       MapUnitToSliceGroupMap[ i ] = pps->slice_group_change_direction_flag;
+     else
+       MapUnitToSliceGroupMap[ i ] = 1 - pps->slice_group_change_direction_flag;
+     
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Generate wipe slice group map type MapUnit map (type 5)
+  *
+  * \param img
+  *    Image Parameter to be used for map generation
+  * \param pps
+  *    Picture Parameter set to be used for map generation
+  ************************************************************************
+ */
+ static void FmoGenerateType5MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+ {
+   
+   unsigned mapUnitsInSliceGroup0 = min((pps->slice_group_change_rate_minus1 + 1) * img->slice_group_change_cycle, PicSizeInMapUnits);
+   unsigned sizeOfUpperLeftGroup = pps->slice_group_change_direction_flag ? ( PicSizeInMapUnits - mapUnitsInSliceGroup0 ) : mapUnitsInSliceGroup0;
+   
+   unsigned i,j, k = 0;
+   
+   for( j = 0; j < img->PicWidthInMbs; j++ )
+     for( i = 0; i < img->PicHeightInMapUnits; i++ )
+       if( k++ < sizeOfUpperLeftGroup )
+         MapUnitToSliceGroupMap[ i * img->PicWidthInMbs + j ] = 1 - pps->slice_group_change_direction_flag;
+       else
+         MapUnitToSliceGroupMap[ i * img->PicWidthInMbs + j ] = pps->slice_group_change_direction_flag;
+       
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Generate explicit slice group map type MapUnit map (type 6)
+  *
+  * \param img
+  *    Image Parameter to be used for map generation
+  * \param pps
+  *    Picture Parameter set to be used for map generation
+  ************************************************************************
+  */
+ static void FmoGenerateType6MapUnitMap (ImageParameters * img, pic_parameter_set_rbsp_t * pps )
+ {
+   unsigned i;
+   for (i=0; i<PicSizeInMapUnits; i++)
+   {
+     MapUnitToSliceGroupMap[i] = pps->slice_group_id[i];
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    FmoStartPicture: initializes FMO at the begin of each new picture
+  *
+  * \par Input:
+  *    None
+  ************************************************************************
+  */
+ int FmoStartPicture ()
+ {
+   int i;
+   
+   assert (MBAmap != NULL);
+   
+   for (i=0; i<MAXSLICEGROUPIDS; i++)
+     FirstMBInSlice[i] = FmoGetFirstMBOfSliceGroup (i);
+   return 0;
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    FmoEndPicture: Ends the Scattered Slices Module (called once
+  *    per picture).
+  *
+  * \par Input:
+  *    None
+  ************************************************************************
+  */
+ int FmoEndPicture ()
+ {
+   // Do nothing
+   return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    FmoMB2Slice: Returns SliceID for a given MB
+  *
+  * \par Input:
+  *    Macroblock Nr (in scan order)
+  ************************************************************************
+  */
+ int FmoMB2SliceGroup ( int mb)
+ {
+   assert (mb < (int)img->PicSizeInMbs);
+   assert (MBAmap != NULL);
+   return MBAmap[mb];
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    FmoGetNextMBBr: Returns the MB-Nr (in scan order) of the next
+  *    MB in the (FMO) Slice, -1 if the SliceGroup is finished
+  *
+  * \par Input:
+  *    CurrentMbNr
+  ************************************************************************
+  */
+ int FmoGetNextMBNr (int CurrentMbNr)
+ {
+   
+   int  SliceGroupID = FmoMB2SliceGroup (CurrentMbNr); 
+   
+   while (++CurrentMbNr<(int)img->PicSizeInMbs &&  MBAmap[CurrentMbNr] != SliceGroupID)
+     ;
+   
+   if (CurrentMbNr >= (int)img->PicSizeInMbs)
+     return -1;    // No further MB in this slice (could be end of picture)
+   else
+     return CurrentMbNr;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    FmoGetNextMBBr: Returns the MB-Nr (in scan order) of the next
+  *    MB in the (FMO) Slice, -1 if the SliceGroup is finished
+  *
+  * \par Input:
+  *    CurrentMbNr
+  ************************************************************************
+  */
+ int FmoGetPreviousMBNr (int CurrentMbNr)
+ {
+   
+   int  SliceGroupID = FmoMB2SliceGroup (CurrentMbNr); 
+   CurrentMbNr--;
+   while (CurrentMbNr>=0 &&  MBAmap[CurrentMbNr] != SliceGroupID)
+     CurrentMbNr--;
+   
+   if (CurrentMbNr < 0)
+     return -1;    // No previous MB in this slice
+   else
+     return CurrentMbNr;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    FmoGetFirstMBOfSliceGroup: Returns the MB-Nr (in scan order) of the 
+  *    next first MB of the Slice group, -1 if no such MB exists
+  *
+  * \par Input:
+  *    SliceGroupID: Id of SliceGroup
+  ************************************************************************
+  */
+ int FmoGetFirstMBOfSliceGroup (int SliceGroupID)
+ {
+   int i = 0;
+   while ((i<(int)img->PicSizeInMbs) && (FmoMB2SliceGroup (i) != SliceGroupID))
+     i++;
+ 
+   if (i < (int)img->PicSizeInMbs)
+     return i;
+   else
+     return -1;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    FmoGetLastCodedMBOfSlice: Returns the MB-Nr (in scan order) of 
+  *    the last MB of the slice group
+  *
+  * \par Input:
+  *    SliceGroupID
+  * \par Return
+  *    MB Nr in case of success (is always >= 0)
+  *    -1 if the SliceGroup doesn't exist
+  ************************************************************************
+  */
+ int FmoGetLastCodedMBOfSliceGroup (int SliceGroupID)
+ {
+   int i;
+   int LastMB = -1;
+   
+   for (i=0; i<(int)img->PicSizeInMbs; i++)
+     if (FmoMB2SliceGroup (i) == SliceGroupID)
+       LastMB = i;
+   return LastMB;
+ }
+ 
+ 
+ void FmoSetLastMacroblockInSlice ( int mb)
+ {
+   // called by terminate_slice(), writes the last processed MB into the
+   // FirstMBInSlice[MAXSLICEGROUPIDS] array.  FmoGetFirstMacroblockInSlice()
+   // uses this info to identify the first uncoded MB in each slice group
+   
+   int currSliceGroup = FmoMB2SliceGroup (mb);
+   assert (mb >= 0);
+   mb = FmoGetNextMBNr (mb);   // The next (still uncoded) MB, or -1 if SG is finished
+   FirstMBInSlice[currSliceGroup] = mb;
+ }
+ 
+ int FmoGetFirstMacroblockInSlice ( int SliceGroup)
+ {
+   return FirstMBInSlice[SliceGroup];
+   // returns the first uncoded MB in each slice group, -1 if there is no
+   // more to do in this slice group
+ }
+ 
+ 
+ int FmoSliceGroupCompletelyCoded( int SliceGroupID)
+ {
+   if (FmoGetFirstMacroblockInSlice (SliceGroupID) < 0)  // slice group completelty coded or not present
+     return TRUE;
+   else
+     return FALSE;
+ }
+ 
+ 
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/fmo.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/fmo.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/fmo.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,39 ----
+ 
+ /*!
+  ***************************************************************************
+  *
+  * \file fmo.h
+  *
+  * \brief
+  *    Support for Flexible Macroblock Ordering
+  *
+  * \date
+  *    16 June 2002
+  *
+  * \author
+  *    Stephan Wenger   stewe at cs.tu-berlin.de
+  **************************************************************************/
+ 
+ #ifndef _FMO_H_
+ #define _FMO_H_
+ 
+ #define MAXSLICEGROUPIDS 8
+ 
+ int FmoInit(ImageParameters * img, pic_parameter_set_rbsp_t * pps, seq_parameter_set_rbsp_t * sps);
+ void FmoUninit ();
+ int FmoFinit (seq_parameter_set_rbsp_t * sps);
+ int FmoMB2SliceGroup (int mb);
+ int FmoGetFirstMBOfSliceGroup (int SliceGroupID);
+ int FmoGetFirstMacroblockInSlice (int SliceGroup);
+ int FmoGetNextMBNr (int CurrentMbNr);
+ int FmoGetLastCodedMBOfSliceGroup (int SliceGroupID);
+ int FmoStartPicture ();
+ int FmoEndPicture();
+ int FmoSliceGroupCompletelyCoded(int SliceGroupID);
+ void FmoSetLastMacroblockInSlice (int mb);
+ 
+ int FmoGetPreviousMBNr (int CurrentMbNr);
+ 
+ extern byte *MBAmap; 
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/global.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/global.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/global.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,1430 ----
+ 
+ /*!
+  ************************************************************************
+  *  \file
+  *     global.h
+  *
+  *  \brief
+  *     global definitions for for H.264 encoder.
+  *
+  *  \author
+  *     Copyright (C) 1999  Telenor Satellite Services,Norway
+  *                         Ericsson Radio Systems, Sweden
+  *
+  *     Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+  *
+  *     Telenor Satellite Services
+  *     Keysers gt.13                       tel.:   +47 23 13 86 98
+  *     N-0130 Oslo,Norway                  fax.:   +47 22 77 79 80
+  *
+  *     Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+  *
+  *     Ericsson Radio Systems
+  *     KI/ERA/T/VV
+  *     164 80 Stockholm, Sweden
+  *
+  ************************************************************************
+  */
+ #ifndef _GLOBAL_H_
+ #define _GLOBAL_H_
+ 
+ #include <stdio.h>
+ 
+ #include "defines.h"
+ #include "parsetcommon.h"
+ #include "q_matrix.h"
+ #include "q_offsets.h"
+ #include "minmax.h"
+ 
+ #ifdef WIN32
+   #define  snprintf _snprintf
+ #endif
+ 
+ #if defined(WIN32) && !defined(__GNUC__)
+   typedef __int64   int64;
+ #ifndef INT64_MIN
+ # define INT64_MIN        (-9223372036854775807i64 - 1i64)
+ #endif
+ #else
+   typedef long long int64;
+ #ifndef INT64_MIN
+ # define INT64_MIN        (-9223372036854775807LL - 1LL)
+ #endif
+ #endif
+ 
+ #ifdef WIN32
+ #define  snprintf _snprintf
+ #define  open     _open
+ #define  close    _close
+ #define  read     _read
+ #define  write    _write
+ #define  lseek    _lseeki64
+ #define  fsync    _commit
+ #define  OPENFLAGS_WRITE _O_WRONLY|_O_CREAT|_O_BINARY|_O_TRUNC
+ #define  OPEN_PERMISSIONS _S_IREAD | _S_IWRITE
+ #define  OPENFLAGS_READ  _O_RDONLY|_O_BINARY
+ #else
+ #define  OPENFLAGS_WRITE O_WRONLY|O_CREAT|O_TRUNC
+ #define  OPENFLAGS_READ  O_RDONLY
+ #define  OPEN_PERMISSIONS S_IRUSR | S_IWUSR
+ #endif
+ 
+ 
+ /***********************************************************************
+  * T y p e    d e f i n i t i o n s    f o r    T M L
+  ***********************************************************************
+  */
+ 
+ //#define pel_t byte
+ 
+ #define imgpel unsigned short
+ #define distpel int
+ //#define imgpel byte
+ //#define distpel unsigned short
+ 
+ #define pel_t imgpel
+ 
+ //! Data Partitioning Modes
+ typedef enum
+ {
+   PAR_DP_1,   //!< no data partitioning is supported
+   PAR_DP_3    //!< data partitioning with 3 partitions
+ } PAR_DP_TYPE;
+ 
+ 
+ //! Output File Types
+ typedef enum
+ {
+   PAR_OF_ANNEXB,    //!< Annex B byte stream format
+   PAR_OF_RTP       //!< RTP packets in outfile
+ } PAR_OF_TYPE;
+ 
+ //! Field Coding Types
+ typedef enum 
+ {
+   FRAME_CODING,
+   FIELD_CODING,
+   ADAPTIVE_CODING
+ } CodingType;
+ 
+ //! definition of H.264 syntax elements
+ typedef enum 
+ {
+   SE_HEADER,
+   SE_PTYPE,
+   SE_MBTYPE,
+   SE_REFFRAME,
+   SE_INTRAPREDMODE,
+   SE_MVD,
+   SE_CBP_INTRA,
+   SE_LUM_DC_INTRA,
+   SE_CHR_DC_INTRA,
+   SE_LUM_AC_INTRA,
+   SE_CHR_AC_INTRA,
+   SE_CBP_INTER,
+   SE_LUM_DC_INTER,
+   SE_CHR_DC_INTER,
+   SE_LUM_AC_INTER,
+   SE_CHR_AC_INTER,
+   SE_DELTA_QUANT_INTER,
+   SE_DELTA_QUANT_INTRA,
+   SE_BFRAME,
+   SE_EOS,
+   SE_MAX_ELEMENTS  //!< number of maximum syntax elements
+ } SE_type;         // substituting the definitions in elements.h
+ 
+ 
+ typedef enum 
+ {
+   INTER_MB,
+   INTRA_MB_4x4,
+   INTRA_MB_16x16
+ } IntraInterDecision;
+ 
+ 
+ typedef enum 
+ {
+   BITS_HEADER,
+   BITS_TOTAL_MB,
+   BITS_MB_MODE,
+   BITS_INTER_MB,
+   BITS_CBP_MB,
+   BITS_COEFF_Y_MB,
+   BITS_COEFF_UV_MB,
+   BITS_DELTA_QUANT_MB,
+   MAX_BITCOUNTER_MB
+ } BitCountType;
+ 
+ 
+ typedef enum 
+ {
+   NO_SLICES,
+   FIXED_MB,
+   FIXED_RATE,
+   CALLBACK,
+   FMO
+ } SliceMode;
+ 
+ 
+ typedef enum 
+ {
+   UVLC,
+   CABAC
+ } SymbolMode;
+ 
+ 
+ typedef enum 
+ {
+   FRAME,
+   TOP_FIELD,
+   BOTTOM_FIELD
+ } PictureStructure;           //!< New enum for field processing
+ 
+ typedef enum 
+ {
+   P_SLICE = 0,
+   B_SLICE,
+   I_SLICE,
+   SP_SLICE,
+   SI_SLICE
+ } SliceType;
+ 
+ /***********************************************************************
+  * D a t a    t y p e s   f o r  C A B A C
+  ***********************************************************************
+  */
+ 
+ //! struct to characterize the state of the arithmetic coding engine
+ typedef struct
+ {
+   unsigned int  Elow, Erange;
+   unsigned int  Ebuffer;
+   unsigned int  Ebits_to_go;
+   unsigned int  Ebits_to_follow;
+   byte          *Ecodestrm;
+   int           *Ecodestrm_len;
+   int           C;
+   int           E;
+ 
+   // storage in case of recode MB
+   unsigned int  ElowS, ErangeS;
+   unsigned int  EbufferS;
+   unsigned int  Ebits_to_goS;
+   unsigned int  Ebits_to_followS;
+   byte          *EcodestrmS;
+   int           *Ecodestrm_lenS;
+   int           CS;
+   int           ES;
+ } EncodingEnvironment;
+ 
+ typedef EncodingEnvironment *EncodingEnvironmentPtr;
+ 
+ //! struct for context management
+ typedef struct
+ {
+   unsigned short state;         // index into state-table CP  
+   unsigned char  MPS;           // Least Probable Symbol 0/1 CP
+ 
+   unsigned long  count;
+ 
+ } BiContextType;
+ 
+ typedef BiContextType *BiContextTypePtr;
+ 
+ 
+ /**********************************************************************
+  * C O N T E X T S   F O R   T M L   S Y N T A X   E L E M E N T S
+  **********************************************************************
+  */
+ 
+ 
+ #define NUM_MB_TYPE_CTX  11
+ #define NUM_B8_TYPE_CTX  9
+ #define NUM_MV_RES_CTX   10
+ #define NUM_REF_NO_CTX   6
+ #define NUM_DELTA_QP_CTX 4
+ #define NUM_MB_AFF_CTX 4
+ 
+ #define NUM_TRANSFORM_SIZE_CTX 3
+ 
+ typedef struct
+ {
+   BiContextType mb_type_contexts [3][NUM_MB_TYPE_CTX];
+   BiContextType b8_type_contexts [2][NUM_B8_TYPE_CTX];
+   BiContextType mv_res_contexts  [2][NUM_MV_RES_CTX];
+   BiContextType ref_no_contexts  [2][NUM_REF_NO_CTX];
+   BiContextType delta_qp_contexts   [NUM_DELTA_QP_CTX];
+   BiContextType mb_aff_contexts     [NUM_MB_AFF_CTX];
+   BiContextType transform_size_contexts   [NUM_TRANSFORM_SIZE_CTX];
+ } MotionInfoContexts;
+ 
+ 
+ #define NUM_IPR_CTX    2
+ #define NUM_CIPR_CTX   4
+ #define NUM_CBP_CTX    4
+ #define NUM_BCBP_CTX   4
+ #define NUM_MAP_CTX   15
+ #define NUM_LAST_CTX  15
+ #define NUM_ONE_CTX    5
+ #define NUM_ABS_CTX    5
+ 
+ 
+ typedef struct
+ {
+   BiContextType  ipr_contexts [NUM_IPR_CTX]; 
+   BiContextType  cipr_contexts[NUM_CIPR_CTX]; 
+   BiContextType  cbp_contexts [3][NUM_CBP_CTX];
+   BiContextType  bcbp_contexts[NUM_BLOCK_TYPES][NUM_BCBP_CTX];
+   BiContextType  map_contexts [NUM_BLOCK_TYPES][NUM_MAP_CTX];
+   BiContextType  last_contexts[NUM_BLOCK_TYPES][NUM_LAST_CTX];
+   BiContextType  one_contexts [NUM_BLOCK_TYPES][NUM_ONE_CTX];
+   BiContextType  abs_contexts [NUM_BLOCK_TYPES][NUM_ABS_CTX];
+   BiContextType  fld_map_contexts [NUM_BLOCK_TYPES][NUM_MAP_CTX];
+   BiContextType  fld_last_contexts[NUM_BLOCK_TYPES][NUM_LAST_CTX];
+ } TextureInfoContexts;
+ 
+ //*********************** end of data type definition for CABAC *******************
+ 
+ typedef struct pix_pos
+ {
+   int available;
+   int mb_addr;
+   int x;
+   int y;
+   int pos_x;
+   int pos_y;
+ } PixelPos;
+ 
+ /*! Buffer structure for RMPNI commands */
+ typedef struct RMPNIbuffer_s
+ {
+   int RMPNI;
+   int Data;
+   struct RMPNIbuffer_s *Next;
+ } RMPNIbuffer_t;
+ 
+ /*! Buffer structure for decoded referenc picture marking commands */
+ typedef struct DecRefPicMarking_s
+ {
+   int memory_management_control_operation;
+   int difference_of_pic_nums_minus1;
+   int long_term_pic_num;
+   int long_term_frame_idx;
+   int max_long_term_frame_idx_plus1;
+   struct DecRefPicMarking_s *Next;
+ } DecRefPicMarking_t;
+ 
+ //! Syntaxelement
+ typedef struct syntaxelement
+ {
+   int                 type;           //!< type of syntax element for data part.
+   int                 value1;         //!< numerical value of syntax element
+   int                 value2;         //!< for blocked symbols, e.g. run/level
+   int                 len;            //!< length of code
+   int                 inf;            //!< info part of UVLC code
+   unsigned int        bitpattern;     //!< UVLC bitpattern
+   int                 context;        //!< CABAC context
+   int                 k;              //!< CABAC context for coeff_count,uv
+ 
+ #if TRACE
+   #define             TRACESTRING_SIZE 100            //!< size of trace string
+   char                tracestring[TRACESTRING_SIZE];  //!< trace string
+ #endif
+ 
+   //!< for mapping of syntaxElement to UVLC
+   void    (*mapping)(int value1, int value2, int* len_ptr, int* info_ptr);
+   //!< used for CABAC: refers to actual coding method of each individual syntax element type
+   void    (*writing)(struct syntaxelement *, EncodingEnvironmentPtr);
+ 
+ } SyntaxElement;
+ 
+ //! Macroblock
+ typedef struct macroblock
+ {
+   int                 currSEnr;                   //!< number of current syntax element
+   int                 slice_nr;
+   int                 delta_qp;
+   int                 qp ;
+   int                 qpsp ;
+   int                 bitcounter[MAX_BITCOUNTER_MB];
+ 
+   struct macroblock   *mb_available_up;   //!< pointer to neighboring MB (CABAC)
+   struct macroblock   *mb_available_left; //!< pointer to neighboring MB (CABAC)
+ 
+   int                 mb_type;
+   int                 mvd[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE][2];          //!< indices correspond to [forw,backw][block_y][block_x][x,y]
+   char                intra_pred_modes[MB_BLOCK_PARTITIONS];
+   char                intra_pred_modes8x8[MB_BLOCK_PARTITIONS];                             //!< four 8x8 blocks in a macroblock
+   int                 cbp ;
+   int64               cbp_blk ;    //!< 1 bit set for every 4x4 block with coefs (not implemented for INTRA)
+   int                 b8mode[4];
+   int                 b8pdir[4];
+   int64               cbp_bits;
+ 
+   int                 c_ipred_mode;      //!< chroma intra prediction mode
+   int                 IntraChromaPredModeFlag;
+   
+   int                 mb_field;
+   int                 list_offset;
+ 
+   int mbAddrA, mbAddrB, mbAddrC, mbAddrD;
+   int mbAvailA, mbAvailB, mbAvailC, mbAvailD;
+ 
+   int                 all_blk_8x8;
+   int                 luma_transform_size_8x8_flag;
+   int                 NoMbPartLessThan8x8Flag;
+ 
+   short    bi_pred_me;
+   
+   // rate control
+   double              actj;               // macroblock activity measure for macroblock j
+   int                 prev_qp;
+   int                 prev_delta_qp;
+   int                 prev_cbp;
+   int                 predict_qp;
+   int                 predict_error;
+ 
+   int                 LFDisableIdc;
+   int                 LFAlphaC0Offset;
+   int                 LFBetaOffset;
+ 
+   int                 skip_flag;
+ } Macroblock;
+ 
+ 
+ 
+ //! Bitstream
+ typedef struct
+ {
+   int             byte_pos;           //!< current position in bitstream;
+   int             bits_to_go;         //!< current bitcounter
+   byte            byte_buf;           //!< current buffer for last written byte
+   int             stored_byte_pos;    //!< storage for position in bitstream;
+   int             stored_bits_to_go;  //!< storage for bitcounter
+   byte            stored_byte_buf;    //!< storage for buffer of last written byte
+ 
+   byte            byte_buf_skip;      //!< current buffer for last written byte
+   int             byte_pos_skip;      //!< storage for position in bitstream;
+   int             bits_to_go_skip;    //!< storage for bitcounter
+ 
+   byte            *streamBuffer;      //!< actual buffer for written bytes
+   int             write_flag;         //!< Bitstream contains data and needs to be written
+ 
+ } Bitstream;
+ 
+ //! DataPartition
+ typedef struct datapartition
+ {
+ 
+   Bitstream           *bitstream;
+   EncodingEnvironment ee_cabac;
+ 
+   int                 (*writeSyntaxElement)(SyntaxElement *, struct datapartition *);
+                       /*!< virtual function;
+                            actual method depends on chosen data partition and
+                            entropy coding method  */
+ } DataPartition;
+ 
+ //! Slice
+ typedef struct
+ {
+   int                 picture_id;
+   int                 qp;
+   int                 picture_type; //!< picture type
+   int                 start_mb_nr;
+   int                 max_part_nr;  //!< number of different partitions
+   int                 num_mb;       //!< number of MBs in the slice
+   DataPartition       *partArr;     //!< array of partitions
+   MotionInfoContexts  *mot_ctx;     //!< pointer to struct of context models for use in CABAC
+   TextureInfoContexts *tex_ctx;     //!< pointer to struct of context models for use in CABAC
+ 
+   // !KS: RMPNI buffer should be retired. just do some sore simple stuff
+   RMPNIbuffer_t        *rmpni_buffer; //!< stores the slice temporary buffer remapping commands
+ 
+   int                 ref_pic_list_reordering_flag_l0;
+   int                 *reordering_of_pic_nums_idc_l0;
+   int                 *abs_diff_pic_num_minus1_l0;
+   int                 *long_term_pic_idx_l0;
+   int                 ref_pic_list_reordering_flag_l1;
+   int                 *reordering_of_pic_nums_idc_l1;
+   int                 *abs_diff_pic_num_minus1_l1;
+   int                 *long_term_pic_idx_l1;
+ 
+   Boolean             (*slice_too_big)(int bits_slice); //!< for use of callback functions
+ 
+   int                 field_ctx[3][2]; //GB
+ 
+ } Slice;
+ 
+ 
+ #define MAXSLICEPERPICTURE 100
+ typedef struct 
+ {
+   int   no_slices;
+   int   idr_flag;
+   Slice *slices[MAXSLICEPERPICTURE];
+   int bits_per_picture;
+   float distortion_y;
+   float distortion_u;
+   float distortion_v;
+ } Picture;
+ 
+ Picture *top_pic;
+ Picture *bottom_pic;
+ Picture *frame_pic;
+ Picture *frame_pic_1;
+ Picture *frame_pic_2;
+ Picture *frame_pic_3;
+ 
+ // global picture format dependend buffers, mem allocation in image.c
+ imgpel **imgY_org;           //!< Reference luma image
+ imgpel ***imgUV_org;         //!< Reference chroma image
+ int    **img4Y_tmp;          //!< for quarter pel interpolation
+ 
+ unsigned int log2_max_frame_num_minus4;
+ unsigned int log2_max_pic_order_cnt_lsb_minus4;
+ 
+ int  me_tot_time,me_time;
+ pic_parameter_set_rbsp_t *active_pps;
+ seq_parameter_set_rbsp_t *active_sps;
+ 
+ // B pictures
+ // motion vector : forward, backward, direct
+ int  mb_adaptive;       //!< For MB level field/frame coding tools
+ int  MBPairIsField;     //!< For MB level field/frame coding tools
+ 
+ 
+ //Weighted prediction
+ int ***wp_weight;  // weight in [list][index][component] order
+ int ***wp_offset;  // offset in [list][index][component] order
+ int ****wbp_weight;  // weight in [list][fwd_index][bwd_idx][component] order
+ int luma_log_weight_denom;
+ int chroma_log_weight_denom;
+ int wp_luma_round;
+ int wp_chroma_round;
+ 
+ // global picture format dependend buffers, mem allocation in image.c (field picture)
+ imgpel   **imgY_org_top;
+ imgpel   **imgY_org_bot;
+ 
+ imgpel  ***imgUV_org_top;
+ imgpel  ***imgUV_org_bot;
+ 
+ imgpel   **imgY_org_frm;
+ imgpel  ***imgUV_org_frm;
+ 
+ imgpel   **imgY_com;               //!< Encoded luma images
+ imgpel  ***imgUV_com;              //!< Encoded croma images
+ 
+ char    ***direct_ref_idx;           //!< direct mode reference index buffer
+ short    **direct_pdir;              //!< direct mode reference index buffer
+ 
+ // Buffers for rd optimization with packet losses, Dim. Kontopodis
+ byte **pixel_map;   //!< Shows the latest reference frame that is reliable for each pixel
+ byte **refresh_map; //!< Stores the new values for pixel_map  
+ int intras;         //!< Counts the intra updates in each frame.
+ 
+ int  frame_ctr[5];
+ int  frame_no, nextP_tr_fld, nextP_tr_frm;
+ int  tot_time;
+ 
+ #define ET_SIZE 300      //!< size of error text buffer
+ char errortext[ET_SIZE]; //!< buffer for error message for exit with error()
+ 
+ // Residue Color Transform
+ int resTrans_R[16][16], resTrans_G[16][16], resTrans_B[16][16];
+ int rec_resR[16][16], rec_resG[16][16], rec_resB[16][16];
+ int mprRGB[3][16][16];
+ int dc_level[2][4][4], dc_level_temp[2][4][4];
+ int   cbp_chroma_block[2][4][4], cbp_chroma_block_temp[2][4][4];
+ char b8_ipredmode8x8[4][4], b8_intra_pred_modes8x8[16];
+ 
+ //! Info for the "decoders-in-the-encoder" used for rdoptimization with packet losses
+ typedef struct
+ {
+   int  **resY;               //!< Residue of Luminance
+   imgpel ***decY;            //!< Decoded values at the simulated decoders
+   imgpel ****decref;         //!< Reference frames of the simulated decoders
+   imgpel ***decY_best;       //!< Decoded frames for the best mode for all decoders
+   imgpel **RefBlock;
+   byte **status_map;
+   byte **dec_mb_mode;
+ } Decoders;
+ extern Decoders *decs;
+ 
+ //! SNRParameters
+ typedef struct
+ {
+   float snr_y;               //!< current Y SNR
+   float snr_u;               //!< current U SNR
+   float snr_v;               //!< current V SNR
+   float snr_y1;              //!< SNR Y(dB) first frame
+   float snr_u1;              //!< SNR U(dB) first frame
+   float snr_v1;              //!< SNR V(dB) first frame
+   float snr_yt[5];           //!< SNR Y(dB) based on frame type
+   float snr_ut[5];           //!< SNR U(dB) based on frame type
+   float snr_vt[5];           //!< SNR V(dB) based on frame type
+   float snr_ya;              //!< Average SNR Y(dB) remaining frames
+   float snr_ua;              //!< Average SNR U(dB) remaining frames
+   float snr_va;              //!< Average SNR V(dB) remaining frames
+   float sse_y;               //!< SSE Y
+   float sse_u;               //!< SSE U
+   float sse_v;               //!< SSE V
+   float msse_y;              //!< Average SSE Y
+   float msse_u;              //!< Average SSE U
+   float msse_v;              //!< Average SSE V
+   int   frame_ctr;           //!< number of coded frames
+ } SNRParameters;
+ 
+ #define FILE_NAME_SIZE 200
+                              //! all input parameters
+ typedef struct
+ {
+   int ProfileIDC;               //!< profile idc
+   int LevelIDC;                 //!< level idc
+ 
+   int no_frames;                //!< number of frames to be encoded
+   int qp0;                      //!< QP of first frame
+   int qpN;                      //!< QP of remaining frames
+   int jumpd;                    //!< number of frames to skip in input sequence (e.g 2 takes frame 0,3,6,9...)
+   int hadamard;                 /*!< 0: 'normal' SAD in sub pixel search.  1: use 4x4 Hadamard transform and '
+                                      Sum of absolute transform difference' in sub pixel search                   */
+   int DisableSubpelME;          //!< Disable Subpixel Motion Estimation
+   int search_range;             /*!< search range - integer pel search and 16x16 blocks.  The search window is
+                                      generally around the predicted vector. Max vector is 2xmcrange.  For 8x8
+                                      and 4x4 block sizes the search range is 1/2 of that for 16x16 blocks.       */
+   int num_ref_frames;           //!< number of reference frames to be used
+   int P_List0_refs;
+   int B_List0_refs;
+   int B_List1_refs;
+   int Log2MaxFNumMinus4;
+   int Log2MaxPOCLsbMinus4;
+   int ResendPPS;
+   int GenerateMultiplePPS;
+ 
+   int img_width;                //!< image width  (must be a multiple of 16 pels)
+   int img_height;               //!< image height (must be a multiple of 16 pels)
+   int yuv_format;               //!< YUV format (0=4:0:0, 1=4:2:0, 2=4:2:2, 3=4:4:4)
+   int intra_upd;                /*!< For error robustness. 0: no special action. 1: One GOB/frame is intra coded
+                                      as regular 'update'. 2: One GOB every 2 frames is intra coded etc.
+                                      In connection with this intra update, restrictions is put on motion vectors
+                                      to prevent errors to propagate from the past                                */
+   int blc_size[8][2];           //!< array for different block sizes
+   int part_size[8][2];          //!< array for different partition sizes
+   int slice_mode;               //!< Indicate what algorithm to use for setting slices
+   int slice_argument;           //!< Argument to the specified slice algorithm
+   int UseConstrainedIntraPred;  //!< 0: Inter MB pixels are allowed for intra prediction 1: Not allowed
+   int  infile_header;           //!< If input file has a header set this to the length of the header
+   char infile[FILE_NAME_SIZE];             //!< YUV 4:2:0 input format
+   char outfile[FILE_NAME_SIZE];            //!< H.264 compressed output bitstream
+   char ReconFile[FILE_NAME_SIZE];          //!< Reconstructed Pictures
+   char TraceFile[FILE_NAME_SIZE];          //!< Trace Outputs
+   char QmatrixFile[FILE_NAME_SIZE];        //!< Q matrix cfg file
+   int intra_period;             //!< Random Access period though intra
+   int EnableOpenGOP;            //!< support for open gops.
+ 
+   int idr_enable;				//!< Encode intra slices as IDR
+   int start_frame;				//!< Encode sequence starting from Frame start_frame
+ 
+   // B pictures
+   int successive_Bframe;        //!< number of B frames that will be used
+   int qpB;                      //!< QP for non-reference B slice coded pictures
+   int qpBRSOffset;                     //!< QP for reference B slice coded pictures
+   int direct_spatial_mv_pred_flag;              //!< Direct Mode type to be used (0: Temporal, 1: Spatial)
+   int directInferenceFlag;      //!< Direct Inference Flag
+ 
+   int BiPredMotionEstimation;
+   int BiPredMERefinements;
+   int BiPredMESearchRange;
+   int BiPredMESubPel;
+ 
+ 
+   // SP Pictures
+   int sp_periodicity;           //!< The periodicity of SP-pictures
+   int qpsp;                     //!< SP Picture QP for prediction error
+   int qpsp_pred;                //!< SP Picture QP for predicted block
+ 
+   int WeightedPrediction;        //!< Weighted prediciton for P frames (0: not used, 1: explicit)
+   int WeightedBiprediction;      //!< Weighted prediciton for B frames (0: not used, 1: explicit, 2: implicit)
+   int UseWeightedReferenceME;    //!< Use Weighted Reference for ME.
+   int RDPictureDecision;         //!< Perform RD optimal decision between various coded versions of same picture
+   int RDPictureIntra;            //!< Enabled RD pic decision for intra as well.
+   int RDPSliceWeightOnly;        //!< If enabled, does not check QP variations for P slices.
+   int RDPSliceBTest;             //!< Tests B slice replacement for P.      
+   int RDBSliceWeightOnly;        //!< If enabled, does not check QP variations for B slices.
+   int SkipIntraInInterSlices;    //!< Skip intra type checking in inter slices if best_mode is skip/direct
+   int BRefPictures;              //!< B coded reference pictures replace P pictures (0: not used, 1: used)
+   int PyramidCoding;
+   int PyramidLevelQPEnable;
+   char ExplicitPyramidFormat[1024];  //!< Explicit GOP format (PyramidCoding==3). 
+   int PyramidRefReorder;       //!< Reordering based on Poc distances for PyramidCoding
+   int PocMemoryManagement;       //!< Memory management based on Poc distances for PyramidCoding
+ 
+   int symbol_mode;              //!< Specifies the mode the symbols are mapped on bits
+   int of_mode;                  //!< Specifies the mode of the output file
+   int partition_mode;           //!< Specifies the mode of data partitioning
+ 
+   int InterSearch16x16;
+   int InterSearch16x8;
+   int InterSearch8x16;
+   int InterSearch8x8;
+   int InterSearch8x4;
+   int InterSearch4x8;
+   int InterSearch4x4;
+ 
+   int IntraDisableInterOnly;
+   int Intra4x4ParDisable;
+   int Intra4x4DiagDisable;
+   int Intra4x4DirDisable;
+   int Intra16x16ParDisable;
+   int Intra16x16PlaneDisable;
+   int ChromaIntraDisable;
+ 
+   int EnableIPCM;
+ 
+   double FrameRate;
+ 
+   int EPZSPattern;
+   int EPZSDual;
+   int EPZSFixed;
+   int EPZSTemporal;
+   int EPZSSpatialMem;
+   int EPZSMinThresScale;
+   int EPZSMaxThresScale;
+   int EPZSMedThresScale;
+ 
+   int chroma_qp_index_offset;
+ #ifdef _FULL_SEARCH_RANGE_
+   int full_search;
+ #endif
+ #ifdef _ADAPT_LAST_GROUP_
+   int last_frame;
+ #endif
+ #ifdef _CHANGE_QP_
+   int qpN2, qpB2, qp2start;
+   int qp02, qpBRS2Offset;
+ #endif
+   int rdopt;
+   int disthres;
+   int nobskip;
+ 
+ #ifdef _LEAKYBUCKET_
+   int NumberLeakyBuckets;
+   char LeakyBucketRateFile[FILE_NAME_SIZE];
+   char LeakyBucketParamFile[FILE_NAME_SIZE];
+ #endif
+ 
+   int PicInterlace;           //!< picture adaptive frame/field
+   int MbInterlace;            //!< macroblock adaptive frame/field
+ 
+   int IntraBottom;            //!< Force Intra Bottom at GOP periods.
+ 
+   int LossRateA;              //!< assumed loss probablility of partition A (or full slice), in per cent, used for loss-aware R/D optimization
+   int LossRateB;              //!< assumed loss probablility of partition B, in per cent, used for loss-aware R/D 
+   int LossRateC;              //!< assumed loss probablility of partition C, in per cent, used for loss-aware R/D 
+   int NoOfDecoders;
+   int RestrictRef;
+   int NumFramesInELSubSeq;
+   int NumFrameIn2ndIGOP;
+ 
+   int RandomIntraMBRefresh;     //!< Number of pseudo-random intra-MBs per picture
+ 
+   int LFSendParameters;
+   int LFDisableIdc;
+   int LFAlphaC0Offset;
+   int LFBetaOffset;
+ 
+   int SparePictureOption;
+   int SPDetectionThreshold;
+   int SPPercentageThreshold;
+ 
+   // FMO
+   char SliceGroupConfigFileName[FILE_NAME_SIZE];    //!< Filename for config info fot type 0, 2, 6	
+   int num_slice_groups_minus1;           //!< "FmoNumSliceGroups" in encoder.cfg, same as FmoNumSliceGroups, which should be erased later
+   int slice_group_map_type; 
+ 
+   int *top_left;                         //!< top_left and bottom_right store values indicating foregrounds
+   int *bottom_right; 
+   byte *slice_group_id;                   //!< slice_group_id is for slice group type being 6  
+   int *run_length_minus1;                //!< run_length_minus1 is for slice group type being 0
+ 
+   int slice_group_change_direction_flag;
+   int slice_group_change_rate_minus1;
+   int slice_group_change_cycle;
+ 
+   int redundant_slice_flag; //! whether redundant slices exist,  JVT-D101
+   int pic_order_cnt_type;   // POC200301
+ 
+   int context_init_method;
+   int model_number;
+   int Transform8x8Mode;
+   int ReportFrameStats;
+   int DisplayEncParams;
+   int Verbose;
+ 
+   //! Rate Control on JVT standard 
+   int RCEnable;    
+   int bit_rate;
+   int SeinitialQP;
+   int basicunit;
+   int channel_type;
+ 
+   int ScalingMatrixPresentFlag;
+   int ScalingListPresentFlag[8];
+ 
+   // FastME enable
+   int FMEnable;
+ 
+   // Fidelity Range Extensions
+   int BitDepthLuma;
+   int BitDepthChroma;
+   int img_height_cr;
+   int img_width_cr;
+   int rgb_input_flag;
+   int cb_qp_index_offset;
+   int cr_qp_index_offset;
+ 
+   // Lossless Coding
+   int lossless_qpprime_y_zero_flag;
+ 
+   //Residue Color Transform
+   int residue_transform_flag;
+ 
+   // Lambda Params
+   int UseExplicitLambdaParams;
+   double LambdaWeight[6];
+ 
+   char QOffsetMatrixFile[FILE_NAME_SIZE];        //!< Quantization Offset matrix cfg file
+   int  OffsetMatrixPresentFlag;                  //!< Enable Explicit Quantization Offset Matrices
+ 
+   int AdaptiveRounding;                          //!< Adaptive Rounding parameter based on JVT-N011
+   int AdaptRndPeriod;                            //!< Set period for adaptive rounding of JVT-N011 in MBs
+   int AdaptRndChroma;
+   int AdaptRndWFactor[2][5];                     //!< Weighting factors based on reference indicator and slice type 
+   // Fast Mode Decision
+   int EarlySkipEnable;
+   int SelectiveIntraEnable;
+   int DisposableP;
+   int DispPQPOffset;
+ } InputParameters;
+ 
+ //! ImageParameters
+ typedef struct
+ {
+   int number;                  //!< current image number to be encoded
+   int pn;                      //!< picture number
+   int LevelIndex;              //!< mapped level idc
+   int current_mb_nr;
+   int total_number_mb;
+   int current_slice_nr;
+   int type;
+   int structure;               //!< picture structure
+   int num_ref_frames;          //!< number of reference frames to be used
+   int max_num_references;      //!< maximum number of reference pictures that may occur
+   int qp;                      //!< quant for the current frame
+   int qpsp;                    //!< quant for the prediction frame of SP-frame
+   float framerate;
+   int width;                   //!< Number of pels
+   int width_cr;                //!< Number of pels chroma
+   int height;                  //!< Number of lines
+   int height_cr;               //!< Number of lines  chroma
+   int height_cr_frame;         //!< Number of lines  chroma frame
+   int subblock_x;              //!< current subblock horizontal
+   int subblock_y;              //!< current subblock vertical
+   int is_intra_block;
+   int is_v_block;
+   int mb_y_upd;
+   int mb_y_intra;              //!< which GOB to intra code
+   int block_c_x;               //!< current block chroma vertical
+   char **ipredmode;             //!< intra prediction mode
+   char **ipredmode8x8;          //!< help storage for 8x8 modes, inserted by YV
+ 
+   int cod_counter;             //!< Current count of number of skipped macroblocks in a row
+   int ***nz_coeff;             //!< number of coefficients per block (CAVLC)
+ 
+   int mb_x;                    //!< current MB horizontal
+   int mb_y;                    //!< current MB vertical
+   int block_x;                 //!< current block horizontal
+   int block_y;                 //!< current block vertical
+   int pix_x;                   //!< current pixel horizontal
+   int pix_y;                   //!< current pixel vertical
+   int pix_c_x;                 //!< current pixel chroma horizontal
+   int pix_c_y;                 //!< current pixel chroma vertical
+ 
+   int opix_x;                   //!< current original picture pixel horizontal
+   int opix_y;                   //!< current original picture pixel vertical
+   int opix_c_x;                 //!< current original picture pixel chroma horizontal
+   int opix_c_y;                 //!< current original picture pixel chroma vertical
+ 
+ 
+   // some temporal buffers
+   imgpel mprr[9][16][16];      //!< all 9 prediction modes? // enlarged from 4 to 16 for ABT (is that neccessary?)
+ 
+   imgpel mprr_2[5][16][16];    //!< all 4 new intra prediction modes
+   imgpel mprr_3[9][8][8];      //!< all 9 prediction modes for 8x8 transformation
+   imgpel mprr_c[2][4][16][16]; //!< chroma intra prediction modes
+   imgpel mpr[16][16];          //!< current best prediction mode
+   int m7[16][16];              //!< the diff pixel values between orginal image and prediction
+ 
+   int ****cofAC;               //!< AC coefficients [8x8block][4x4block][level/run][scan_pos]
+   int ***cofDC;                //!< DC coefficients [yuv][level/run][scan_pos]
+ 
+   int fadjust4x4[4][16][16];        //!< Transform coefficients for 4x4 luma. Excludes DC for I16x16
+   int fadjust8x8[3][16][16];        //!< Transform coefficients for 8x8 luma       
+   int fadjust4x4Cr[4][2][16][16];   //!< Transform coefficients for 4x4 chroma. Excludes DC chroma.
+   int fadjust8x8Cr[1][2][16][16];   //!< Transform coefficients for 4x4 chroma within 8x8 inter blocks. 
+ 
+   Picture     *currentPicture; //!< The coded picture currently in the works (typically frame_pic, top_pic, or bottom_pic)
+   Slice       *currentSlice;                                //!< pointer to current Slice data struct
+   Macroblock    *mb_data;                                   //!< array containing all MBs of a whole frame
+   SyntaxElement   MB_SyntaxElements[MAX_SYMBOLS_PER_MB];    //!< temporal storage for all chosen syntax elements of one MB
+ 
+   int *quad;               //!< Array containing square values,used for snr computation  */                                         /* Values are limited to 5000 for pixel differences over 70 (sqr(5000)).
+   int *intra_block;
+ 
+   int tr;
+   int fld_type;                        //!< top or bottom field
+   unsigned int fld_flag;                                
+   unsigned int rd_pass;
+   int direct_intraP_ref[4][4];
+   int pstruct_next_P;
+   int imgtr_next_P_frm;
+   int imgtr_last_P_frm;
+   int imgtr_next_P_fld;
+   int imgtr_last_P_fld;
+ 
+   // B pictures
+   double b_interval;
+   int p_interval;
+   int b_frame_to_code;
+   int fw_mb_mode;
+   int bw_mb_mode;
+ 
+   short****** pred_mv;                 //!< motion vector predictors for all block types and all reference frames
+   short****** all_mv;                  //!< replaces local all_mv
+ 
+   short****** bipred_mv1;              //!< Biprediction MVs
+   short****** bipred_mv2;              //!< Biprediction MVs
+   short bi_pred_me[MAXMODE];
+ 
+   int LFDisableIdc;
+   int LFAlphaC0Offset;
+   int LFBetaOffset;
+ 
+   int direct_spatial_mv_pred_flag;              //!< Direct Mode type to be used (0: Temporal, 1: Spatial)
+ 
+   int num_ref_idx_l0_active;
+   int num_ref_idx_l1_active;
+ 
+   int field_mode;     //!< For MB level field/frame -- field mode on flag
+   int top_field;      //!< For MB level field/frame -- top field flag
+   int mvscale[6][MAX_REFERENCE_PICTURES];
+   int buf_cycle;
+   int i16offset;
+ 
+   int layer;             //!< which layer this picture belonged to
+   int old_layer;         //!< old layer number
+   int NoResidueDirect;
+   int AdaptiveRounding;                          //!< Adaptive Rounding parameter based on JVT-N011
+ 
+   int redundant_pic_cnt; // JVT-D101
+ 
+   int MbaffFrameFlag;    //!< indicates frame with mb aff coding
+ 
+   //the following should probably go in sequence parameters
+   // unsigned int log2_max_frame_num_minus4;
+   unsigned int pic_order_cnt_type;
+   // for poc mode 0, POC200301
+   // unsigned int log2_max_pic_order_cnt_lsb_minus4;  
+   // for poc mode 1, POC200301
+   unsigned int delta_pic_order_always_zero_flag;
+            int offset_for_non_ref_pic;
+            int offset_for_top_to_bottom_field;
+   unsigned int num_ref_frames_in_pic_order_cnt_cycle;
+            int offset_for_ref_frame[1];  // MAX_LENGTH_POC_CYCLE in decoder
+ 
+   // POC200301
+   //the following is for slice header syntax elements of poc
+   // for poc mode 0.
+   unsigned int pic_order_cnt_lsb;
+            int delta_pic_order_cnt_bottom;
+   // for poc mode 1.
+            int delta_pic_order_cnt[2];
+ 
+ 
+   // POC200301
+   unsigned int field_picture;
+     signed int toppoc;      //!< poc for this frame or field
+     signed int bottompoc;   //!< for completeness - poc of bottom field of a frame (always = poc+1)
+     signed int framepoc;    //!< min (toppoc, bottompoc)
+     signed int ThisPOC;     //!< current picture POC
+   unsigned int frame_num;   //!< frame_num for this frame
+   
+   unsigned PicWidthInMbs;
+   unsigned PicHeightInMapUnits;
+   unsigned FrameHeightInMbs;
+   unsigned PicHeightInMbs;
+   unsigned PicSizeInMbs;
+   unsigned FrameSizeInMbs;
+ 
+   //the following should probably go in picture parameters
+   unsigned int pic_order_present_flag; // ????????
+ 
+   //the following are sent in the slice header
+ //  int delta_pic_order_cnt[2];
+   int nal_reference_idc;
+ 
+   int adaptive_ref_pic_buffering_flag;
+   int no_output_of_prior_pics_flag;
+   int long_term_reference_flag;
+ 
+   DecRefPicMarking_t *dec_ref_pic_marking_buffer;
+ 
+   int model_number;
+ 
+ 
+   /*rate control*/
+   int NumberofHeaderBits; 
+   int NumberofTextureBits;
+   int NumberofBasicUnitHeaderBits;
+   int NumberofBasicUnitTextureBits;
+   double TotalMADBasicUnit;
+   int NumberofMBTextureBits;
+   int NumberofMBHeaderBits;
+   int NumberofCodedBFrame; 
+   int NumberofCodedPFrame;
+   int NumberofGOP;
+   int TotalQpforPPicture;
+   int NumberofPPicture;
+   double *MADofMB;
+   int BasicUnitQP;
+   int TopFieldFlag;
+   int FieldControl;
+   int FieldFrame;
+   int Frame_Total_Number_MB;
+   int IFLAG;
+   int NumberofCodedMacroBlocks;
+   int BasicUnit;
+   int write_macroblock;
+   int bot_MB;
+   int write_macroblock_frame;
+ 
+   int DeblockCall;
+         
+   int last_pic_bottom_field;
+   int last_has_mmco_5;
+   int pre_frame_num;
+ 
+   int slice_group_change_cycle;
+ 
+   int pic_unit_size_on_disk;
+   int bitdepth_luma;
+   int bitdepth_chroma;
+   int bitdepth_luma_qp_scale;
+   int bitdepth_chroma_qp_scale;
+   int bitdepth_lambda_scale;
+   // Lagrangian Parameters
+   double lambda_md[10][52];     //!< Mode decision Lambda
+   double lambda_me[10][52];     //!< Motion Estimation Lambda
+   int lambda_mf[10][52];        //!< Integer formatted Motion Estimation Lambda
+ 
+   unsigned int dc_pred_value;   //!< value for DC prediction (depends on pel bit depth)
+   int max_imgpel_value;         //!< max value that one picture element (pixel) can take (depends on pic_unit_bitdepth)
+   int max_imgpel_value_uv;
+ 
+   int num_blk8x8_uv;
+   int num_cdc_coeff;
+   int yuv_format;
+   int lossless_qpprime_flag;
+   int mb_cr_size_x;
+   int mb_cr_size_y;
+ 
+   int chroma_qp_offset[2];      //!< offset for qp for chroma [0-Cb, 1-Cr] 
+ 
+   // Residue Color Transform
+   int residue_transform_flag;
+ 
+   int auto_crop_right;
+   int auto_crop_bottom;
+ 
+   short checkref;
+   int last_valid_reference;
+ 
+ 
+   int bytes_in_picture;
+ } ImageParameters;
+ 
+ #define NUM_PIC_TYPE 5
+                                 //!< statistics
+ typedef struct
+ {
+   int   quant0;                 //!< quant for the first frame
+   int   quant1;                 //!< average quant for the remaining frames
+   float bitr;                   //!< bit rate for current frame, used only for output til terminal
+   float bitrate;                //!< average bit rate for the sequence except first frame
+   int   bit_ctr;                //!< counter for bit usage
+   int   bit_ctr_n;              //!< bit usage for the current frame
+   int   bit_slice;              //!< number of bits in current slice
+   int   bit_ctr_emulationprevention; //!< stored bits needed to prevent start code emulation
+   int   b8_mode_0_use[NUM_PIC_TYPE][2];
+   int   mode_use_transform_8x8[NUM_PIC_TYPE][MAXMODE];
+   int   mode_use_transform_4x4[NUM_PIC_TYPE][MAXMODE];
+   int   intra_chroma_mode[4];
+   
+   // B pictures
+   int   successive_Bframe;
+   int   *mode_use_Bframe;
+   int   *bit_use_mode_Bframe;
+   int   bit_ctr_I;
+   int   bit_ctr_P;
+   int   bit_ctr_B;
+   float bitrate_I;
+   float bitrate_P;
+   float bitrate_B;
+ 
+   int   mode_use            [NUM_PIC_TYPE][MAXMODE]; //!< Macroblock mode usage for Intra frames
+   int   bit_use_mode        [NUM_PIC_TYPE][MAXMODE]; //!< statistics of bit usage
+   int   bit_use_stuffingBits[NUM_PIC_TYPE];
+   int   bit_use_mb_type     [NUM_PIC_TYPE];
+   int   bit_use_header      [NUM_PIC_TYPE];
+   int   tmp_bit_use_cbp     [NUM_PIC_TYPE];
+   int   bit_use_coeffY      [NUM_PIC_TYPE];
+   int   bit_use_coeffC      [NUM_PIC_TYPE];
+   int   bit_use_delta_quant [NUM_PIC_TYPE];
+ 
+   int   em_prev_bits_frm;
+   int   em_prev_bits_fld;
+   int  *em_prev_bits;
+   int   bit_ctr_parametersets;
+   int   bit_ctr_parametersets_n;
+   } StatParameters;
+ 
+ //!< For MB level field/frame coding tools
+ //!< temporary structure to store MB data for field/frame coding
+ typedef struct
+ {
+   double min_rdcost;
+ 
+   imgpel rec_mbY[16][16];       // hold the Y component of reconstructed MB
+   imgpel rec_mbU[16][16], rec_mbV[16][16]; 
+   int    ****cofAC;
+   int    ***cofDC;
+   int    mb_type;
+   short  bi_pred_me;
+ 
+   int    b8mode[4], b8pdir[4];
+   char   **ipredmode;
+   char   intra_pred_modes[16];
+   int    cbp;
+   int64  cbp_blk;
+   int    mode;
+   short  ******pred_mv;        //!< predicted motion vectors
+   short  ******all_mv;         //!< all modes motion vectors
+   char   refar[2][4][4];       //!< reference frame array [list][y][x]
+   int    i16offset;
+   int    c_ipred_mode;
+ 
+   int    luma_transform_size_8x8_flag;
+   int    NoMbPartLessThan8x8Flag;
+   
+   int    qp;
+   int    prev_qp;
+   int    prev_delta_qp;
+   int    delta_qp;
+   int    prev_cbp;
+ } RD_DATA;
+ 
+ 
+ //!< Set Explicit GOP Parameters.
+ //!< Currently only supports Enhancement GOP but could be easily extended
+ typedef struct
+ {
+   int slice_type;       //! Slice type
+   int display_no;       //! GOP Display order
+   int reference_idc;    //! Is reference?
+   int slice_qp;         //! Assigned QP
+   int pyramid_layer;    //! Pyramid layer (used with GOP Pyramid option 2
+   int pyramidPocDelta;  //! Currently unused
+ } GOP_DATA;
+ 
+ 
+ typedef struct
+ {
+   int cost8x8;
+   int rec_resG_8x8[16][16];
+   int resTrans_R_8x8[16][16];
+   int resTrans_B_8x8[16][16];
+   int mprRGB_8x8[3][16][16];
+   short part8x8mode[4];
+   short part8x8pdir[4];
+   char  part8x8fwref[4];
+   char  part8x8bwref[4];
+   imgpel rec_mbY8x8[16][16];    
+   imgpel mpr8x8[16][16];
+ } RD_8x8DATA;
+ 
+ typedef struct
+ {  
+   double lambda_md;     //!< Mode decision Lambda
+   double lambda_me;     //!< Motion Estimation Lambda
+   int    lambda_mf;     //!< Integer formatted Motion Estimation Lambda
+ 
+   short  valid[MAXMODE];
+   short  list_offset[2];
+   short  curr_mb_field;
+   short  best_ref[2];
+   int    best_mcost[2];
+ } RD_PARAMS;
+ 
+ GOP_DATA *gop_structure;
+ RD_DATA *rdopt; 
+ RD_DATA rddata_top_frame_mb, rddata_bot_frame_mb; //!< For MB level field/frame coding tools
+ RD_DATA rddata_top_field_mb, rddata_bot_field_mb; //!< For MB level field/frame coding tools
+ 
+ extern InputParameters *input;
+ extern ImageParameters *img;
+ extern StatParameters  *stats;
+ 
+ extern SNRParameters *snr;
+ 
+ // files
+ FILE *p_stat;                    //!< status file for the last encoding session
+ FILE *p_log;                     //!< SNR file
+ FILE *p_trace;                   //!< Trace file
+ int  p_in;                       //!< original YUV file handle
+ int  p_dec;                      //!< decoded image file handle
+ 
+ 
+ /***********************************************************************
+  * P r o t o t y p e s   f o r    T M L
+  ***********************************************************************
+  */
+ 
+ void intrapred_luma(int CurrPixX,int CurrPixY, int *left_available, int *up_available, int *all_available);
+ void init();
+ int  dct_luma(int pos_mb1,int pos_mb2,int *cnt_nonz, int intra);
+ int  dct_luma_sp(int pos_mb1,int pos_mb2,int *cnt_nonz);
+ void copyblock_sp(int pos_mb1,int pos_mb2);
+ int  dct_chroma(int uv,int i11);
+ int  dct_chroma_sp(int uv,int i11);
+ // Residue Color Transform
+ int  dct_chroma4x4(int uv, int b8, int b4);
+ int  dct_chroma_DC(int uv, int cr_cbp);
+ 
+ int  motion_search(int isi);
+ int  sign(int a,int b);
+ void intrapred_chroma(int,int,int uv);
+ void intrapred_luma_16x16();
+ int  find_sad_16x16(int *intra_mode);
+ 
+ int dct_luma_16x16(int);
+ 
+ void init_poc();
+ 
+ void init_img();
+ void report();
+ void information_init();
+ int  get_picture_type();
+ int clip1a(int a);
+ void DeblockFrame(ImageParameters *img, imgpel **, imgpel ***) ;
+ void MarkAllMacroblockModes(ImageParameters *img, imgpel **, imgpel ***);
+ 
+ int  TransformDecision(int, int*);
+ int  SATD8X8(int*, int);
+ 
+ void LumaPrediction4x4 (int, int, int, int, int, short, short);
+ int  SATD (int*, int);
+ int  find_SATD (int c_diff[MB_PIXELS], int blocktype);
+ 
+ pel_t* FastLineX (int, pel_t*, int, int, int, int);
+ pel_t* UMVLineX  (int, pel_t*, int, int, int, int);
+ 
+ void LumaResidualCoding ();
+ void ChromaResidualCoding (int*);
+ void IntraChromaPrediction (int*, int*, int*);
+ void ChromaPrediction4x4 (int, int, int, int, int, int, short, short);
+ 
+ int writeMBLayer (int rdopt, int *coeff_rate);
+ 
+ extern int*   refbits;
+ extern int**** motion_cost;
+ 
+ void  Get_Direct_Motion_Vectors ();
+ void  PartitionMotionSearch     (int, int, int);
+ int   BIDPartitionCost          (int, int, short, short, int);
+ int   LumaResidualCoding8x8     (int*, int64*, int, short, int, int, short, short);
+ int   writeLumaCoeff8x8         (int, int, int);
+ int   writeMotionVector8x8      (int  i0, int  j0, int  i1, int  j1, int  refframe, int  list_idx, int  mv_mode);
+ int   writeReferenceFrame       (int, int, int, int, int);
+ int   writeAbpCoeffIndex        (int, int, int, int);
+ int   writeIntra4x4Modes        (int);
+ int   writeChromaIntraPredMode  ();
+ 
+ void estimate_weighting_factor_B_slice();
+ void estimate_weighting_factor_P_slice(int offset);
+ int  test_wp_P_slice(int offset);
+ int  test_wp_B_slice(int method);
+ void poc_based_ref_management(int current_pic_num);
+ int  picture_coding_decision (Picture *picture1, Picture *picture2, int qp);
+ 
+ unsigned CeilLog2( unsigned uiVal);
+ 
+ int  Get_Direct_Cost8x8 (int, int*);
+ 
+ int   BPredPartitionCost  (int, int, short, short, int, int);
+ void  LumaPrediction4x4Bi (int, int,   int,   int, int, short, short, int);
+ int   SATDBI (int* , int );
+ 
+ int  Get_Direct_CostMB  (int);
+ int  B8Mode2Value (int b8mode, int b8pdir);
+ 
+ int  GetSkipCostMB (int lambda_factor);
+ void FindSkipModeMotionVector ();
+ 
+ 
+ // dynamic mem allocation
+ int  init_global_buffers();
+ void free_global_buffers();
+ void no_mem_exit  (char *where);
+ 
+ int  get_mem_mv  (short*******);
+ void free_mem_mv (short******);
+ void free_img    ();
+ 
+ int  get_mem_ACcoeff  (int*****);
+ int  get_mem_DCcoeff  (int****);
+ void free_mem_ACcoeff (int****);
+ void free_mem_DCcoeff (int***);
+ 
+ int  decide_fld_frame(float snr_frame_Y, float snr_field_Y, int bit_field, int bit_frame, double lambda_picture);
+ void combine_field();
+ 
+ Picture *malloc_picture();
+ void     free_picture (Picture *pic);
+ 
+ int   encode_one_slice(int SLiceGroupId, Picture *pic, int TotalCodedMBs);   //! returns the number of MBs in the slice
+ 
+ void  start_macroblock(int mb_addr, int mb_field);
+ void  set_MB_parameters (int mb_addr);           //! sets up img-> according to input-> and currSlice->
+ 
+ int   writeMotionInfo2NAL ();
+ 
+ void  terminate_macroblock(Boolean *end_of_slice, Boolean *recode_macroblock);
+ int   slice_too_big(int rlc_bits);
+ void  write_one_macroblock(int eos_bit);
+ void  proceed2nextMacroblock();
+ 
+ void free_slice_list(Picture *currPic);
+ 
+ void report_stats_on_error();
+ 
+ #if TRACE
+ void  trace2out(SyntaxElement *se);
+ #endif
+ 
+ 
+ void error(char *text, int code);
+ int  start_sequence();
+ int  terminate_sequence();
+ int  start_slice();
+ int  terminate_slice();
+ int  write_PPS(int, int);
+ 
+ // B pictures
+ int  get_fwMV(int *min_fw_sad, int tot_intra_sad);
+ void get_bwMV(int *min_bw_sad);
+ void get_bid(int *bid_sad, int fw_predframe_no);
+ void get_dir(int *dir_sad);
+ void compare_sad(int tot_intra_sad, int fw_sad, int bw_sad, int bid_sad, int dir_sad, int);
+ int  BlkSize2CodeNumber(int blc_size_h, int blc_size_v);
+ 
+ void InitMotionVectorSearchModule();
+ 
+ int  field_flag_inference();
+ 
+ void set_mbaff_parameters();  // For MB AFF
+ void writeVlcByteAlign(Bitstream* currStream);
+ 
+ 
+ int   writeLumaCoeff4x4_CABAC     (int, int, int);
+ int   writeLumaCoeff8x8_CABAC     (int, int);
+ int   writeCBPandLumaCoeff        ();
+ int   writeChromaCoeff            ();
+ int   writeMB_bits_for_4x4_luma   (int, int, int);
+ int   writeMB_bits_for_16x16_luma ();
+ int   writeMB_bits_for_luma       (int);
+ int   writeMB_bits_for_DC_chroma  (int);
+ int   writeMB_bits_for_AC_chroma  (int);
+ int   writeMB_bits_for_CBP        ();
+ 
+ int   SingleUnifiedMotionSearch   (int, int, int**, int***, int*****, int, int*****, double);
+ 
+ //============= rate-distortion optimization ===================
+ void  clear_rdopt      ();
+ void  init_rdopt       ();
+ void  RD_Mode_Decision ();
+ //============= rate-distortion opt with packet losses ===========
+ void decode_one_macroblock();
+ void decode_one_mb (int, Macroblock*);
+ void decode_one_b8block (int, int, int, int, int);
+ void Get_Reference_Block(imgpel **imY, int block_y, int block_x, int mvhor, int mvver, imgpel **out);
+ byte Get_Reference_Pixel(imgpel **imY, int y, int x);
+ int  Half_Upsample(imgpel **imY, int j, int i);
+ void DecOneForthPix(imgpel **dY, imgpel ***dref);
+ void compute_residue(int mode);
+ void compute_residue_b8block (int, int);
+ void compute_residue_mb (int);
+ void UpdateDecoders();
+ void Build_Status_Map(byte **s_map);
+ void Error_Concealment(imgpel **inY, byte **s_map, imgpel ***refY);
+ void Conceal_Error(imgpel **inY, int mb_y, int mb_x, imgpel ***refY, byte **s_map);
+ //============= restriction of reference frames based on the latest intra-refreshes==========
+ void UpdatePixelMap();
+ 
+ //============= fast full integer search =======================
+ #ifdef _FAST_FULL_ME_
+ void  ClearFastFullIntegerSearch    ();
+ void  ResetFastFullIntegerSearch    ();
+ #endif
+ 
+ void process_2nd_IGOP();
+ void SetImgType();
+ 
+ // Tian Dong: for IGOPs
+ extern Boolean In2ndIGOP;
+ extern int start_frame_no_in_this_IGOP;
+ extern int start_tr_in_this_IGOP;
+ extern int FirstFrameIn2ndIGOP;
+ #define IMG_NUMBER (img->number-start_frame_no_in_this_IGOP)
+ 
+ void AllocNalPayloadBuffer();
+ void FreeNalPayloadBuffer();
+ void SODBtoRBSP(Bitstream *currStream);
+ int RBSPtoEBSP(byte *streamBuffer, int begin_bytepos, int end_bytepos, int min_num_bytes);
+ int Bytes_After_Header;
+ 
+ // JVT-D101: the bit for redundant_pic_cnt in slice header may be changed, 
+ // therefore the bit position in the bitstream must be stored.
+ int rpc_bytes_to_go;
+ int rpc_bits_to_go;
+ void modify_redundant_pic_cnt(unsigned char *streamBuffer);
+ // End JVT-D101
+ 
+ // Fast ME enable
+ int BlockMotionSearch (short,int,int,int,int,int, int);
+ void low_complexity_encode_md (void);
+ void encode_one_macroblock (void);
+ void fasthigh_complexity_encode_md (void);
+ 
+ int RDCost_for_4x4Blocks_Chroma (int b8, int b4, int  chroma);
+ 
+ #endif
+ 
+ #include "context_ini.h"
+ 
+ void store_coding_state_cs_cm();
+ void reset_coding_state_cs_cm();
+ 
+ int writeIPCMBytes(Bitstream *currStream);
+ int writePCMByteAlign(Bitstream *currStream);
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/header.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/header.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/header.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,564 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file header.c
+  *
+  * \brief
+  *    H.264 Slice and Sequence headers
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *      - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+  *      - Karsten Suehring                <suehring at hhi.de>
+  *************************************************************************************
+  */
+ 
+ #include <math.h>
+ #include <assert.h>
+ #include <string.h>
+ #include <stdlib.h>
+ 
+ #include "global.h"
+ 
+ #include "elements.h"
+ #include "header.h"
+ #include "rtp.h"
+ #include "mbuffer.h"
+ #include "defines.h"
+ #include "vlc.h"
+ #include "parset.h"
+ 
+ // A little trick to avoid those horrible #if TRACE all over the source code
+ #if TRACE
+ #define SYMTRACESTRING(s) strncpy(sym->tracestring,s,TRACESTRING_SIZE)
+ #else
+ #define SYMTRACESTRING(s) // do nothing
+ #endif
+ 
+ int * assignSE2partition[2] ;
+ int assignSE2partition_NoDP[SE_MAX_ELEMENTS] =
+   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ int assignSE2partition_DP[SE_MAX_ELEMENTS] =
+   {  0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 2, 2, 2, 2, 0, 0, 0, 0 } ;
+ 
+ static int ref_pic_list_reordering();
+ static int dec_ref_pic_marking();
+ static int pred_weight_table();
+ 
+ /*!
+  ********************************************************************************************
+  * \brief 
+  *    Write a slice header
+  *
+  * \return
+  *    number of bits used 
+  ********************************************************************************************
+ */
+ int SliceHeader()
+ {
+   int dP_nr = assignSE2partition[input->partition_mode][SE_HEADER];
+   Bitstream *bitstream = img->currentSlice->partArr[dP_nr].bitstream;
+   Slice* currSlice = img->currentSlice;
+   int len = 0;
+   unsigned int field_pic_flag = 0, bottom_field_flag = 0;
+ 
+   int num_bits_slice_group_change_cycle;
+   float numtmp;	
+ 	
+   if (img->MbaffFrameFlag)
+     len  = ue_v("SH: first_mb_in_slice", img->current_mb_nr >> 1,   bitstream);
+   else
+     len  = ue_v("SH: first_mb_in_slice", img->current_mb_nr,   bitstream);
+ 
+   len += ue_v("SH: slice_type",        get_picture_type (),   bitstream);
+ 
+   len += ue_v("SH: pic_parameter_set_id" , active_pps->pic_parameter_set_id ,bitstream);
+ 
+   len += u_v (log2_max_frame_num_minus4 + 4,"SH: frame_num", img->frame_num, bitstream);
+ 
+   if (!active_sps->frame_mbs_only_flag)
+   {
+     // field_pic_flag    u(1)
+     field_pic_flag = (img->structure ==TOP_FIELD || img->structure ==BOTTOM_FIELD)?1:0;
+     assert( field_pic_flag == img->fld_flag );
+     len += u_1("SH: field_pic_flag", field_pic_flag, bitstream);
+ 
+     if (field_pic_flag)
+     {
+       //bottom_field_flag     u(1)
+       bottom_field_flag = (img->structure == BOTTOM_FIELD)?1:0;
+       len += u_1("SH: bottom_field_flag" , bottom_field_flag ,bitstream);
+     }
+   }
+ 
+   if (img->currentPicture->idr_flag)
+   {
+     // idr_pic_id
+     len += ue_v ("SH: idr_pic_id", (img->number % 2), bitstream);
+   }
+ 
+   if (img->pic_order_cnt_type == 0)
+   {
+     if (active_sps->frame_mbs_only_flag)
+     {
+       img->pic_order_cnt_lsb = (img->toppoc & ~((((unsigned int)(-1)) << (log2_max_pic_order_cnt_lsb_minus4+4))) );
+     }
+     else
+     {
+       if (!field_pic_flag || img->structure == TOP_FIELD)
+         img->pic_order_cnt_lsb = (img->toppoc & ~((((unsigned int)(-1)) << (log2_max_pic_order_cnt_lsb_minus4+4))) );
+       else if ( img->structure == BOTTOM_FIELD )
+         img->pic_order_cnt_lsb = (img->bottompoc & ~((((unsigned int)(-1)) << (log2_max_pic_order_cnt_lsb_minus4+4))) );
+     }
+ 
+     len += u_v (log2_max_pic_order_cnt_lsb_minus4+4, "SH: pic_order_cnt_lsb", img->pic_order_cnt_lsb, bitstream);
+ 
+     if (img->pic_order_present_flag && !field_pic_flag)
+     {
+       len += se_v ("SH: delta_pic_order_cnt_bottom", img->delta_pic_order_cnt_bottom, bitstream);
+     }
+   }
+   if (img->pic_order_cnt_type == 1 && !img->delta_pic_order_always_zero_flag)
+   {
+     len += se_v ("SH: delta_pic_order_cnt[0]", img->delta_pic_order_cnt[0], bitstream);
+ 
+     if (img->pic_order_present_flag && !field_pic_flag)
+     {
+       len += se_v ("SH: delta_pic_order_cnt[1]", img->delta_pic_order_cnt[1], bitstream);
+     }
+   }
+ 
+   if (input->redundant_slice_flag)
+   {
+     len += ue_v ("SH: redundant_pic_cnt", img->redundant_pic_cnt, bitstream);
+   }
+ 
+   // Direct Mode Type selection for B pictures
+   if (img->type==B_SLICE)
+   {
+     len +=  u_1 ("SH: direct_spatial_mv_pred_flag", img->direct_spatial_mv_pred_flag, bitstream);  	
+   }
+ 
+   if ((img->type == P_SLICE) || (img->type == B_SLICE) || (img->type==SP_SLICE))
+   {
+     int override_flag;
+     if ((img->type == P_SLICE) || (img->type==SP_SLICE))
+     {
+       override_flag = (img->num_ref_idx_l0_active != (active_pps->num_ref_idx_l0_active_minus1 +1)) ? 1 : 0;
+     }
+     else
+     {
+       override_flag = ((img->num_ref_idx_l0_active != (active_pps->num_ref_idx_l0_active_minus1 +1)) 
+                       || (img->num_ref_idx_l1_active != (active_pps->num_ref_idx_l1_active_minus1 +1))) ? 1 : 0;
+     }
+ 
+     len +=  u_1 ("SH: num_ref_idx_active_override_flag", override_flag, bitstream);
+     
+     if (override_flag) 
+     {
+       len += ue_v ("SH: num_ref_idx_l0_active_minus1", img->num_ref_idx_l0_active-1, bitstream);
+       if (img->type==B_SLICE)
+       {
+         len += ue_v ("SH: num_ref_idx_l1_active_minus1", img->num_ref_idx_l1_active-1, bitstream);
+       }
+     }
+ 
+   }
+   len += ref_pic_list_reordering();
+ 
+   if (((img->type == P_SLICE || img->type == SP_SLICE) && active_pps->weighted_pred_flag) || 
+      ((img->type == B_SLICE) && active_pps->weighted_bipred_idc == 1))  
+   {
+     len += pred_weight_table();
+   }
+ 
+   if (img->nal_reference_idc)
+     len += dec_ref_pic_marking();
+ 
+   if(input->symbol_mode==CABAC && img->type!=I_SLICE /*&& img->type!=SI_IMG*/)
+   {
+     len += ue_v("SH: cabac_init_idc", img->model_number, bitstream);
+   }
+ 
+   len += se_v("SH: slice_qp_delta", (currSlice->qp - 26 - active_pps->pic_init_qp_minus26), bitstream);  
+ 
+   if (img->type==SP_SLICE /*|| img->type==SI_SLICE*/)
+   {
+     if (img->type==SP_SLICE) // Switch Flag only for SP pictures
+     {
+       len += u_1 ("SH: sp_for_switch_flag", 0, bitstream);   // 1 for switching SP, 0 for normal SP
+     }
+     len += se_v ("SH: slice_qs_delta", (img->qpsp - 26), bitstream );
+   }
+ 
+   if (active_pps->deblocking_filter_control_present_flag)
+   {
+     len += ue_v("SH: disable_deblocking_filter_idc",img->LFDisableIdc, bitstream);  // Turn loop filter on/off on slice basis 
+ 
+     if (img->LFDisableIdc!=1)
+     {
+       len += se_v ("SH: slice_alpha_c0_offset_div2", img->LFAlphaC0Offset / 2, bitstream);
+ 
+       len += se_v ("SH: slice_beta_offset_div2", img->LFBetaOffset / 2, bitstream);
+     }
+   }
+ 
+ 	
+   if ( active_pps->num_slice_groups_minus1>0 &&
+     active_pps->slice_group_map_type>=3 && active_pps->slice_group_map_type<=5)
+   {
+     numtmp=img->PicHeightInMapUnits*img->PicWidthInMbs/(float)(active_pps->slice_group_change_rate_minus1+1)+1;
+     num_bits_slice_group_change_cycle = (int)ceil(log(numtmp)/log(2));
+ 
+     //! img->slice_group_change_cycle can be changed before calling FmoInit()
+     len += u_v (num_bits_slice_group_change_cycle, "SH: slice_group_change_cycle", img->slice_group_change_cycle, bitstream);
+   }
+ 
+   // NOTE: The following syntax element is actually part 
+   //        Slice data bitstream A RBSP syntax
+ 
+   if(input->partition_mode&&!img->currentPicture->idr_flag)
+   {
+     len += ue_v("DPA: slice_id", img->current_slice_nr, bitstream);
+   }
+ 
+   return len;
+ }
+ 
+ /*!
+  ********************************************************************************************
+  * \brief 
+  *    writes the ref_pic_list_reordering syntax
+  *    based on content of according fields in img structure
+  *
+  * \return
+  *    number of bits used 
+  ********************************************************************************************
+ */
+ static int ref_pic_list_reordering()
+ {
+   int dP_nr = assignSE2partition[input->partition_mode][SE_HEADER];
+   Bitstream *bitstream = img->currentSlice->partArr[dP_nr].bitstream;
+   Slice *currSlice = img->currentSlice;
+ 
+   int i, len=0;
+ 
+   if ((img->type!=I_SLICE) /*&&(img->type!=SI_IMG)*/ )
+   {
+     len += u_1 ("SH: ref_pic_list_reordering_flag_l0", currSlice->ref_pic_list_reordering_flag_l0, bitstream);
+     if (currSlice->ref_pic_list_reordering_flag_l0)
+     {
+       i=-1;
+       do
+       {
+         i++;
+         len += ue_v ("SH: reordering_of_pic_num_idc", currSlice->reordering_of_pic_nums_idc_l0[i], bitstream);
+         if (currSlice->reordering_of_pic_nums_idc_l0[i]==0 ||
+             currSlice->reordering_of_pic_nums_idc_l0[i]==1)
+         {
+           len += ue_v ("SH: abs_diff_pic_num_minus1_l0", currSlice->abs_diff_pic_num_minus1_l0[i], bitstream);
+         }
+         else
+         {
+           if (currSlice->reordering_of_pic_nums_idc_l0[i]==2)
+           {
+             len += ue_v ("SH: long_term_pic_idx_l0", currSlice->long_term_pic_idx_l0[i], bitstream);
+           }
+         }
+ 
+       } while (currSlice->reordering_of_pic_nums_idc_l0[i] != 3);
+     }
+   }
+ 
+   if (img->type==B_SLICE)
+   {
+     len += u_1 ("SH: ref_pic_list_reordering_flag_l1", currSlice->ref_pic_list_reordering_flag_l1, bitstream);
+     if (currSlice->ref_pic_list_reordering_flag_l1)
+     {
+       i=-1;
+       do
+       {
+         i++;
+         len += ue_v ("SH: remapping_of_pic_num_idc", currSlice->reordering_of_pic_nums_idc_l1[i], bitstream);
+         if (currSlice->reordering_of_pic_nums_idc_l1[i]==0 ||
+             currSlice->reordering_of_pic_nums_idc_l1[i]==1)
+         {
+           len += ue_v ("SH: abs_diff_pic_num_minus1_l1", currSlice->abs_diff_pic_num_minus1_l1[i], bitstream);
+         }
+         else
+         {
+           if (currSlice->reordering_of_pic_nums_idc_l1[i]==2)
+           {
+             len += ue_v ("SH: long_term_pic_idx_l1", currSlice->long_term_pic_idx_l1[i], bitstream);
+           }
+         }
+       } while (currSlice->reordering_of_pic_nums_idc_l1[i] != 3);
+     }
+   }
+ 
+   return len;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    write the memory management control operations
+  *
+  * \return
+  *    number of bits used 
+  ************************************************************************
+  */
+ static int dec_ref_pic_marking()
+ {
+   int dP_nr = assignSE2partition[input->partition_mode][SE_HEADER];
+   Bitstream *bitstream = img->currentSlice->partArr[dP_nr].bitstream;
+ 
+   DecRefPicMarking_t *tmp_drpm;
+ 
+   int val, len=0;
+ 
+   if (img->currentPicture->idr_flag)
+   {
+     len += u_1("SH: no_output_of_prior_pics_flag", img->no_output_of_prior_pics_flag, bitstream);
+     len += u_1("SH: long_term_reference_flag", img->long_term_reference_flag, bitstream);
+   }
+   else
+   {
+     img->adaptive_ref_pic_buffering_flag = (img->dec_ref_pic_marking_buffer!=NULL);
+ 
+     len += u_1("SH: adaptive_ref_pic_buffering_flag", img->adaptive_ref_pic_buffering_flag, bitstream);
+ 
+     if (img->adaptive_ref_pic_buffering_flag)
+     {
+       tmp_drpm = img->dec_ref_pic_marking_buffer;
+       // write Memory Management Control Operation 
+       do
+       {
+         if (tmp_drpm==NULL) error ("Error encoding MMCO commands", 500);
+         
+         val = tmp_drpm->memory_management_control_operation;
+         len += ue_v("SH: memory_management_control_operation", val, bitstream);
+ 
+         if ((val==1)||(val==3)) 
+         {
+           len += 1 + ue_v("SH: difference_of_pic_nums_minus1", tmp_drpm->difference_of_pic_nums_minus1, bitstream);
+         }
+         if (val==2)
+         {
+           len+= ue_v("SH: long_term_pic_num", tmp_drpm->long_term_pic_num, bitstream);
+         }
+         if ((val==3)||(val==6))
+         {
+           len+= ue_v("SH: long_term_frame_idx", tmp_drpm->long_term_frame_idx, bitstream);
+         }
+         if (val==4)
+         {
+           len += ue_v("SH: max_long_term_pic_idx_plus1", tmp_drpm->max_long_term_frame_idx_plus1, bitstream);
+         }
+         
+         tmp_drpm=tmp_drpm->Next;
+         
+       } while (val != 0);
+       
+     }
+   }
+   return len;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    write prediction weight table
+  *
+  * \return
+  *    number of bits used 
+  ************************************************************************
+  */
+ static int pred_weight_table()
+ {
+   int dP_nr = assignSE2partition[input->partition_mode][SE_HEADER];
+   Bitstream *bitstream = img->currentSlice->partArr[dP_nr].bitstream;
+ 
+   int len = 0;
+   int i,j;
+ 
+   len += ue_v("SH: luma_log_weight_denom", luma_log_weight_denom, bitstream);
+ 
+   if ( 0 != active_sps->chroma_format_idc)
+   {
+     len += ue_v("SH: chroma_log_weight_denom", chroma_log_weight_denom, bitstream);
+   }
+ 
+   for (i=0; i< img->num_ref_idx_l0_active; i++)
+   {
+     if ( (wp_weight[0][i][0] != 1<<luma_log_weight_denom) || (wp_offset[0][i][0] != 0) )
+     {
+       len += u_1 ("SH: luma_weight_flag_l0", 1, bitstream);
+       
+       len += se_v ("SH: luma_weight_l0", wp_weight[0][i][0], bitstream);
+         
+       len += se_v ("SH: luma_offset_l0", wp_offset[0][i][0], bitstream);
+     }
+     else
+     {
+         len += u_1 ("SH: luma_weight_flag_l0", 0, bitstream);
+     }
+ 
+     if (active_sps->chroma_format_idc!=0)
+     {
+       if ( (wp_weight[0][i][1] != 1<<chroma_log_weight_denom) || (wp_offset[0][i][1] != 0) || 
+         (wp_weight[0][i][2] != 1<<chroma_log_weight_denom) || (wp_offset[0][i][2] != 0)  )
+       {
+         len += u_1 ("chroma_weight_flag_l0", 1, bitstream);
+         for (j=1; j<3; j++)
+         {
+           len += se_v ("chroma_weight_l0", wp_weight[0][i][j] ,bitstream);
+           
+           len += se_v ("chroma_offset_l0", wp_offset[0][i][j] ,bitstream);
+         }
+       }
+       else
+       {
+         len += u_1 ("chroma_weight_flag_l0", 0, bitstream);
+       }
+     }
+   }
+ 
+   if (img->type == B_SLICE)
+   {
+     for (i=0; i< img->num_ref_idx_l1_active; i++)
+     {
+       if ( (wp_weight[1][i][0] != 1<<luma_log_weight_denom) || (wp_offset[1][i][0] != 0) )
+       {
+         len += u_1 ("SH: luma_weight_flag_l1", 1, bitstream);
+         
+         len += se_v ("SH: luma_weight_l1", wp_weight[1][i][0], bitstream);
+         
+         len += se_v ("SH: luma_offset_l1", wp_offset[1][i][0], bitstream);
+       }
+       else
+       {
+         len += u_1 ("SH: luma_weight_flag_l1", 0, bitstream);
+       }
+       
+       if (active_sps->chroma_format_idc!=0)
+       {
+         if ( (wp_weight[1][i][1] != 1<<chroma_log_weight_denom) || (wp_offset[1][i][1] != 0) || 
+           (wp_weight[1][i][2] != 1<<chroma_log_weight_denom) || (wp_offset[1][i][2] != 0) )
+         {
+           len += u_1 ("chroma_weight_flag_l1", 1, bitstream);
+           for (j=1; j<3; j++)
+           {
+             len += se_v ("chroma_weight_l1", wp_weight[1][i][j] ,bitstream);
+             
+             len += se_v ("chroma_offset_l1", wp_offset[1][i][j] ,bitstream);
+           }
+         }
+         else
+         {
+           len += u_1 ("chroma_weight_flag_l1", 0, bitstream);
+         }
+       }
+     }
+   }
+   return len;
+ }
+   
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Selects picture type and codes it to symbol
+  *
+  * \return
+  *    symbol value for picture type
+  ************************************************************************
+  */
+ int get_picture_type()
+ {
+   // set this value to zero for transmission without signaling 
+   // that the whole picture has the same slice type
+   int same_slicetype_for_whole_frame = 5;
+ 
+   switch (img->type)
+   {
+   case I_SLICE:
+     return 2 + same_slicetype_for_whole_frame;
+     break;
+   case P_SLICE:
+     return 0 + same_slicetype_for_whole_frame;
+     break;
+   case B_SLICE:
+     return 1 + same_slicetype_for_whole_frame;
+     break;
+   case SP_SLICE:
+     return 3 + same_slicetype_for_whole_frame;
+     break;
+   default:
+     error("Picture Type not supported!",1);
+     break;
+   }
+    
+   return 0;
+ }
+ 
+ 
+ 
+ /*!
+  *****************************************************************************
+  *
+  * \brief 
+  *    int Partition_BC_Header () write the Partition type B, C header
+  *
+  * \return
+  *    Number of bits used by the partition header
+  *
+  * \par Parameters
+  *    PartNo: Partition Number to which the header should be written
+  *
+  * \par Side effects
+  *    Partition header as per VCEG-N72r2 is written into the appropriate 
+  *    partition bit buffer
+  *
+  * \par Limitations/Shortcomings/Tweaks
+  *    The current code does not support the change of picture parameters within
+  *    one coded sequence, hence there is only one parameter set necessary.  This
+  *    is hard coded to zero.
+  *
+  * \date
+  *    October 24, 2001
+  *
+  * \author
+  *    Stephan Wenger   stewe at cs.tu-berlin.de
+  *****************************************************************************/
+ int Partition_BC_Header(int PartNo)
+ {
+   DataPartition *partition = &((img->currentSlice)->partArr[PartNo]);
+   SyntaxElement symbol, *sym = &symbol;
+ 
+   int len = 0;
+ 
+   assert (PartNo > 0 && PartNo < img->currentSlice->max_part_nr);
+ 
+   sym->type = SE_HEADER;         // This will be true for all symbols generated here
+   sym->mapping = ue_linfo;       // Mapping rule: Simple code number to len/info
+   sym->value2  = 0;
+ 
+   //ZL 
+   //changed according to the g050r1
+   SYMTRACESTRING("RTP-PH: Slice ID");
+   sym->value1 = img->current_slice_nr;
+   len += writeSyntaxElement_UVLC (sym, partition);
+ 
+   if(active_pps->redundant_pic_cnt_present_flag)
+   {
+   SYMTRACESTRING("RTP-PH: Picture ID");
+   sym->value1 = img->currentSlice->picture_id;
+   len += writeSyntaxElement_UVLC (sym, partition);
+   }
+ 
+ 
+   return len;
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/header.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/header.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/header.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,22 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file header.h
+  *
+  * \brief
+  *    Prototypes for header.c
+  *************************************************************************************
+  */
+ 
+ #ifndef _HEADER_H_
+ #define _HEADER_H_
+ 
+ int SliceHeader();
+ int Partition_BC_Header();
+ 
+ int  writeERPS(SyntaxElement *sym, DataPartition *partition);
+ // int  SequenceHeader(FILE *outf);
+ void write_terminating_bit (short);
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/image.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/image.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/image.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,2640 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file image.c
+  *
+  * \brief
+  *    Code one image/slice
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *     - Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+  *     - Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+  *     - Jani Lainema                    <jani.lainema at nokia.com>
+  *     - Sebastian Purreiter             <sebastian.purreiter at mch.siemens.de>
+  *     - Byeong-Moon Jeon                <jeonbm at lge.com>
+  *     - Yoon-Seong Soh                  <yunsung at lge.com>
+  *     - Thomas Stockhammer              <stockhammer at ei.tum.de>
+  *     - Detlev Marpe                    <marpe at hhi.de>
+  *     - Guido Heising                   <heising at hhi.de>
+  *     - Thomas Wedi                     <wedi at tnt.uni-hannover.de>
+  *     - Ragip Kurceren                  <ragip.kurceren at nokia.com>
+  *     - Antti Hallapuro                 <antti.hallapuro at nokia.com>
+  *     - Alexis Michael Tourapis         <alexismt at ieee.org> 
+  *************************************************************************************
+  */
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ #include <math.h>
+ #include <time.h>
+ #include <sys/timeb.h>
+ #include <string.h>
+ #include <memory.h>
+ #include <assert.h>
+ 
+ #ifdef WIN32
+ #include <io.h>
+ #else
+ #include <unistd.h>
+ #endif
+ 
+ #include "global.h"
+ 
+ #include "refbuf.h"
+ #include "mbuffer.h"
+ #include "intrarefresh.h"
+ #include "fmo.h"
+ #include "sei.h"
+ #include "memalloc.h"
+ #include "nalu.h"
+ #include "ratectl.h"
+ #include "mb_access.h"
+ #include "output.h"
+ #include "cabac.h"
+ 
+ extern pic_parameter_set_rbsp_t *PicParSet[MAXPPS];
+ 
+ void code_a_picture(Picture *pic);
+ void frame_picture (Picture *frame, int method);
+ void field_picture(Picture *top, Picture *bottom);
+ 
+ static int  writeout_picture(Picture *pic);
+ 
+ static int  picture_structure_decision(Picture *frame, Picture *top, Picture *bot);
+ static void distortion_fld (float *dis_fld_y, float *dis_fld_u, float *dis_fld_v);
+ static void find_snr();
+ static void find_distortion();
+ 
+ static void field_mode_buffer(int bit_field, float snr_field_y, float snr_field_u, float snr_field_v);
+ static void frame_mode_buffer (int bit_frame, float snr_frame_y, float snr_frame_u, float snr_frame_v);
+ 
+ static void init_frame();
+ static void init_field();
+ 
+ static void put_buffer_frame();
+ static void put_buffer_top();
+ static void put_buffer_bot();
+ 
+ static void copy_motion_vectors_MB();
+ 
+ static void PaddAutoCropBorders (int org_size_x, int org_size_y, int img_size_x, int img_size_y,
+                                  int org_size_x_cr, int org_size_y_cr, int img_size_x_cr, int img_size_y_cr);
+ 
+ static void ReadOneFrame (int FrameNoInFile, int HeaderSize, int xs, int ys, int xs_cr, int ys_cr);
+ 
+ static void writeUnit(Bitstream* currStream ,int partition);
+ static void rdPictureCoding();
+ 
+ #ifdef _ADAPT_LAST_GROUP_
+ int *last_P_no;
+ int *last_P_no_frm;
+ int *last_P_no_fld;
+ #endif
+ 
+ static void ReportFirstframe(int tmp_time, int me_time);
+ static void ReportIntra(int tmp_time, int me_time);
+ static void ReportSP(int tmp_time, int me_time);
+ static void ReportP(int tmp_time, int me_time);
+ static void ReportB(int tmp_time, int me_time);
+ static void ReportNALNonVLCBits(int tmp_time, int me_time);
+ 
+ static int CalculateFrameNumber();  // Calculates the next frame number
+ 
+ StorablePicture *enc_picture;
+ StorablePicture *enc_frame_picture;
+ StorablePicture *enc_frame_picture2;
+ StorablePicture *enc_frame_picture3;
+ StorablePicture *enc_top_picture;
+ StorablePicture *enc_bottom_picture;
+ //Rate control
+ int    QP;
+ 
+ const int ONE_FOURTH_TAP[3][2] =
+ {
+   {20,20},
+   {-5,-4},
+   { 1, 0},
+ };
+ 
+ 
+ void MbAffPostProc()
+ {
+   imgpel temp[32][16];
+ 
+   imgpel ** imgY  = enc_picture->imgY;
+   imgpel ***imgUV = enc_picture->imgUV;
+   int i, y, x0, y0, uv;
+ 
+   if (img->yuv_format != YUV400)
+   {
+     for (i=0; i<(int)img->PicSizeInMbs; i+=2)
+     {
+       if (enc_picture->mb_field[i])
+       {
+         get_mb_pos(i, &x0, &y0);
+         for (y=0; y<(2*MB_BLOCK_SIZE);y++)
+           memcpy(&temp[y],&imgY[y0+y][x0], MB_BLOCK_SIZE * sizeof(imgpel));
+ 
+         for (y=0; y<MB_BLOCK_SIZE;y++)
+         {
+           memcpy(&imgY[y0+(2*y)][x0],temp[y], MB_BLOCK_SIZE * sizeof(imgpel));
+           memcpy(&imgY[y0+(2*y + 1)][x0],temp[y+ MB_BLOCK_SIZE], MB_BLOCK_SIZE * sizeof(imgpel));
+         }
+ 
+         x0 = x0 / (16/img->mb_cr_size_x);
+         y0 = y0 / (16/img->mb_cr_size_y);
+ 
+         for (uv=0; uv<2; uv++)
+         {
+           for (y=0; y<(2*img->mb_cr_size_y);y++)    
+             memcpy(&temp[y],&imgUV[uv][y0+y][x0], img->mb_cr_size_x * sizeof(imgpel));
+           
+           for (y=0; y<img->mb_cr_size_y;y++)
+           {
+             memcpy(&imgUV[uv][y0+(2*y)][x0],temp[y], img->mb_cr_size_x * sizeof(imgpel));
+             memcpy(&imgUV[uv][y0+(2*y + 1)][x0],temp[y+ img->mb_cr_size_y], img->mb_cr_size_x * sizeof(imgpel));
+             
+           }
+         }
+       }
+     }
+   }
+   else
+   {
+     for (i=0; i<(int)img->PicSizeInMbs; i+=2)
+     {
+       if (enc_picture->mb_field[i])
+       {
+         get_mb_pos(i, &x0, &y0);
+         for (y=0; y<(2*MB_BLOCK_SIZE);y++)
+           memcpy(&temp[y],&imgY[y0+y][x0], MB_BLOCK_SIZE * sizeof(imgpel));
+ 
+         for (y=0; y<MB_BLOCK_SIZE;y++)
+         {
+           memcpy(&imgY[y0+(2*y)][x0],temp[y], MB_BLOCK_SIZE * sizeof(imgpel));
+           memcpy(&imgY[y0+(2*y + 1)][x0],temp[y+ MB_BLOCK_SIZE], MB_BLOCK_SIZE * sizeof(imgpel));
+         }
+       }
+     }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Encodes a picture
+  *
+  *    This is the main picture coding loop.. It is called by all this
+  *    frame and field coding stuff after the img-> elements have been
+  *    set up.  Not sure whether it is useful for MB-adaptive frame/field
+  *    coding
+  ************************************************************************
+  */
+ void code_a_picture(Picture *pic)
+ {
+   unsigned int NumberOfCodedMBs = 0;
+   int SliceGroup = 0;
+   int j;
+ 
+   img->currentPicture = pic;
+ 
+   img->currentPicture->idr_flag = ((!IMG_NUMBER) && (!(img->structure==BOTTOM_FIELD))) || (input->idr_enable && (img->type == I_SLICE || img->type==SP_SLICE || img->type==SI_SLICE)&& (!(img->structure==BOTTOM_FIELD)));
+ 
+   pic->no_slices = 0;
+   pic->distortion_u = pic->distortion_v = pic->distortion_y = 0.0;
+ 
+   RandomIntraNewPicture ();     //! Allocates forced INTRA MBs (even for fields!)
+ 
+   // The slice_group_change_cycle can be changed here.
+   // FmoInit() is called before coding each picture, frame or field
+   img->slice_group_change_cycle=1;
+   FmoInit(img, active_pps, active_sps);
+   FmoStartPicture ();           //! picture level initialization of FMO
+ 
+   CalculateQuantParam();
+   CalculateOffsetParam();
+ 
+   if(input->Transform8x8Mode)
+   {
+     CalculateQuant8Param();
+     CalculateOffset8Param();
+   }
+ 
+   reset_pic_bin_count();
+   img->bytes_in_picture = 0;
+ 
+   while (NumberOfCodedMBs < img->PicSizeInMbs)       // loop over slices
+   {
+     // Encode one SLice Group
+     while (!FmoSliceGroupCompletelyCoded (SliceGroup))
+     {
+       // Encode the current slice
+       NumberOfCodedMBs += encode_one_slice (SliceGroup, pic, NumberOfCodedMBs);
+       FmoSetLastMacroblockInSlice (img->current_mb_nr);
+       // Proceed to next slice
+       img->current_slice_nr++;
+       stats->bit_slice = 0;
+     }
+     // Proceed to next SliceGroup
+     SliceGroup++;
+   }
+   FmoEndPicture ();
+ 
+   // Modified for Fast Mode Decision. Inchoon Choi, SungKyunKwan Univ.
+   if (input->rdopt == 3 && (img->type != B_SLICE))
+     for (j = 0; j < input->NoOfDecoders; j++)
+       DeblockFrame (img, decs->decY_best[j], NULL);
+ 
+   DeblockFrame (img, enc_picture->imgY, enc_picture->imgUV); //comment out to disable loop filter
+ 
+   if (img->MbaffFrameFlag)
+     MbAffPostProc();
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Encodes one frame
+  ************************************************************************
+  */
+ int encode_one_frame ()
+ {
+   static int prev_frame_no = 0; // POC200301
+   static int consecutive_non_reference_pictures = 0; // POC200301
+   int        FrameNumberInFile;
+ 
+ #ifdef _LEAKYBUCKET_
+   extern long Bit_Buffer[10000];
+   extern unsigned long total_frame_buffer;
+ #endif
+ 
+   time_t ltime1;
+   time_t ltime2;
+ 
+ #ifdef WIN32
+   struct _timeb tstruct1;
+   struct _timeb tstruct2;
+ #else
+   struct timeb tstruct1;
+   struct timeb tstruct2;
+ #endif
+ 
+   int tmp_time;
+   int bits_frm = 0, bits_fld = 0;
+   float dis_frm = 0.0, dis_frm_y = 0.0, dis_frm_u = 0.0, dis_frm_v = 0.0;
+   float dis_fld = 0.0, dis_fld_y = 0.0, dis_fld_u = 0.0, dis_fld_v = 0.0;
+ 
+   //Rate control
+   int pic_type, bits = 0; 
+ 
+   me_time=0;
+   img->rd_pass = 0;
+   enc_frame_picture  = NULL;
+   enc_frame_picture2 = NULL;
+   enc_frame_picture3 = NULL;
+ 
+ #ifdef WIN32
+   _ftime (&tstruct1);           // start time ms
+ #else
+   ftime (&tstruct1);
+ #endif
+   time (&ltime1);               // start time s
+ 
+   //Rate control 
+   img->write_macroblock = 0;
+ /*
+   //Shankar Regunathan (Oct 2002)
+   //Prepare Panscanrect SEI payload
+   UpdatePanScanRectInfo ();
+   //Prepare Arbitrarydata SEI Payload
+   UpdateUser_data_unregistered ();
+   //Prepare Registered data SEI Payload
+   UpdateUser_data_registered_itu_t_t35 ();
+   //Prepare RandomAccess SEI Payload
+   UpdateRandomAccess ();
+ */
+ 
+   if (input->ResendPPS && img->number !=0)
+   {
+     stats->bit_ctr_parametersets_n=write_PPS(0, 0);
+     //stats->bit_slice += stats->bit_ctr_parametersets_n;
+     stats->bit_ctr_parametersets += stats->bit_ctr_parametersets_n;
+   }
+ 
+   put_buffer_frame ();      // sets the pointers to the frame structures 
+                             // (and not to one of the field structures)
+   init_frame ();
+   FrameNumberInFile = CalculateFrameNumber();
+ 
+   ReadOneFrame (FrameNumberInFile, input->infile_header,
+                 input->img_width, input->img_height, input->img_width_cr, input->img_height_cr);
+ 
+   PaddAutoCropBorders (input->img_width, input->img_height, img->width, img->height,
+                        input->img_width_cr, input->img_height_cr, img->width_cr, img->height_cr);
+ 
+   // set parameters for direct mode and deblocking filter
+   img->direct_spatial_mv_pred_flag     = input->direct_spatial_mv_pred_flag;
+   img->LFDisableIdc    = input->LFDisableIdc;
+   img->LFAlphaC0Offset = input->LFAlphaC0Offset;
+   img->LFBetaOffset    = input->LFBetaOffset;
+   img->AdaptiveRounding = input->AdaptiveRounding;
+   // Following code should consider optimal coding mode. Currently also does not support
+   // multiple slices per frame.
+   frame_ctr[img->type]++;
+   snr->frame_ctr++;
+ 
+   if (input->PicInterlace == FIELD_CODING)
+   {
+     //Rate control
+     img->FieldControl=1;
+ 
+     img->field_picture = 1;  // we encode fields
+     field_picture (top_pic, bottom_pic);
+     img->fld_flag = 1;
+   }
+   else
+   {
+     //Rate control
+     img->FieldControl=0;
+ 
+     // For frame coding, turn MB level field/frame coding flag on
+     if (input->MbInterlace)
+       mb_adaptive = 1;
+ 
+     img->field_picture = 0; // we encode a frame
+ 
+     //Rate control
+     if(input->RCEnable)
+     { 
+     /*update the number of MBs in the basic unit for MB adaptive 
+       f/f coding*/
+       if((input->MbInterlace)&&(input->basicunit<img->Frame_Total_Number_MB)\
+         &&(img->type==P_SLICE)&&(img->IFLAG==0))
+         img->BasicUnit=input->basicunit*2;
+       else
+         img->BasicUnit=input->basicunit;
+       
+       rc_init_pict(1,0,1); 
+       img->qp  = updateQuantizationParameter(0); 
+       
+       
+       pic_type = img->type;
+       QP =0;
+     }
+ 
+     if( active_sps->frame_mbs_only_flag)
+       img->TopFieldFlag=0;
+ 
+     if (input->GenerateMultiplePPS)
+       active_pps = PicParSet[0];
+ 
+     frame_picture (frame_pic_1, 0);
+ 
+     if ((input->RDPictureIntra || img->type!=I_SLICE) && input->RDPictureDecision)
+     {
+       rdPictureCoding();
+     }         
+     
+     // For field coding, turn MB level field/frame coding flag off
+     if (input->MbInterlace)
+       mb_adaptive = 0;
+     
+     if (input->PicInterlace == ADAPTIVE_CODING)
+     {
+       //Rate control
+       img->FieldControl=1;
+       img->write_macroblock = 0;
+       img->bot_MB = 0;
+ 
+       img->field_picture = 1;  // we encode fields
+       field_picture (top_pic, bottom_pic);
+       
+       //! Note: the distortion for a field coded picture is stored in the top field
+       //! the distortion values in the bottom field are dummies
+       dis_fld = top_pic->distortion_y + top_pic->distortion_u + top_pic->distortion_v;
+       dis_frm = frame_pic_1->distortion_y + frame_pic_1->distortion_u + frame_pic_1->distortion_v;
+       
+       img->fld_flag = picture_structure_decision (frame_pic_1, top_pic, bottom_pic);
+       update_field_frame_contexts (img->fld_flag);
+ 
+       //Rate control
+       if(img->fld_flag==0)
+         img->FieldFrame=1;
+       /*the current choice is field coding*/
+       else
+         img->FieldFrame=0;
+     }
+     else
+    
+       img->fld_flag = 0;
+   }
+ 
+   if (img->fld_flag)
+     stats->bit_ctr_emulationprevention += stats->em_prev_bits_fld;
+   else
+     stats->bit_ctr_emulationprevention += stats->em_prev_bits_frm;
+ 
+   if (img->type != B_SLICE)
+   {
+     img->pstruct_next_P = img->fld_flag;
+   }
+ 
+   // Here, img->structure may be either FRAME or BOTTOM FIELD depending on whether AFF coding is used
+   // The picture structure decision changes really only the fld_flag
+ 
+   if (img->fld_flag)            // field mode (use field when fld_flag=1 only)
+   {
+     field_mode_buffer (bits_fld, dis_fld_y, dis_fld_u, dis_fld_v);
+     writeout_picture (top_pic);
+     writeout_picture (bottom_pic);
+   }
+   else                          //frame mode
+   {
+     frame_mode_buffer (bits_frm, dis_frm_y, dis_frm_u, dis_frm_v);
+     
+     if (input->RDPictureDecision && img->rd_pass == 2)
+       writeout_picture (frame_pic_3);
+     else if (input->RDPictureDecision && img->rd_pass == 1)
+       writeout_picture (frame_pic_2);
+     else
+       writeout_picture (frame_pic_1);
+   }
+ 
+   if (frame_pic_3)
+     free_slice_list(frame_pic_3);  
+   if (frame_pic_2)
+     free_slice_list(frame_pic_2);  
+   if (frame_pic_1)
+     free_slice_list(frame_pic_1);
+   if (top_pic)
+     free_slice_list(top_pic);
+   if (bottom_pic)
+     free_slice_list(bottom_pic);
+ 
+   /*
+   // Tian Dong (Sept 2002)
+   // in frame mode, the newly reconstructed frame has been inserted to the mem buffer
+   // and it is time to prepare the spare picture SEI payload.
+   if (input->InterlaceCodingOption == FRAME_CODING
+       && input->SparePictureOption && img->type != B_SLICE)
+     CalculateSparePicture ();
+ */
+ 
+   //Rate control
+   if(input->RCEnable)
+   {
+     bits = stats->bit_ctr-stats->bit_ctr_n;
+     rc_update_pict_frame(bits);
+   }
+ 
+ /*
+     
+   if (input->InterlaceCodingOption == FRAME_CODING)
+   {
+     if (input->rdopt == 3 && img->type != B_SLICE)
+       UpdateDecoders ();      // simulate packet losses and move decoded image to reference buffers
+     
+     if (input->RestrictRef)
+       UpdatePixelMap ();
+   }
+ */
+ 
+   if (input->Verbose != 0)
+     find_snr ();
+    else
+    {
+      snr->snr_y = 0.0;
+      snr->snr_u = 0.0;
+      snr->snr_v = 0.0;
+      snr->sse_y = 0.0;
+      snr->sse_u = 0.0;
+      snr->sse_v = 0.0;
+    }
+ 
+   time (&ltime2);               // end time sec
+ #ifdef WIN32
+   _ftime (&tstruct2);           // end time ms
+ #else
+   ftime (&tstruct2);            // end time ms
+ #endif
+ 
+   tmp_time = (ltime2 * 1000 + tstruct2.millitm) - (ltime1 * 1000 + tstruct1.millitm);
+   tot_time = tot_time + tmp_time;
+ 
+   if (input->PicInterlace == ADAPTIVE_CODING)
+   {
+     if (img->fld_flag)
+     {
+       // store bottom field
+       store_picture_in_dpb(enc_bottom_picture);
+       free_storable_picture(enc_frame_picture);
+     }
+     else
+     {
+       // replace top with frame
+       replace_top_pic_with_frame(enc_frame_picture);
+       free_storable_picture(enc_bottom_picture);
+     }
+   }
+   else
+   {
+     if (img->fld_flag)
+     {
+       store_picture_in_dpb(enc_bottom_picture);
+     }
+     else
+     {
+       if (img->rd_pass==2)
+       {
+         store_picture_in_dpb(enc_frame_picture3);
+         free_storable_picture(enc_frame_picture);
+         free_storable_picture(enc_frame_picture2);
+       }
+       else if (img->rd_pass==1)
+       {
+         store_picture_in_dpb(enc_frame_picture2);
+         free_storable_picture(enc_frame_picture);
+         free_storable_picture(enc_frame_picture3);
+       }
+       else
+       {
+         store_picture_in_dpb(enc_frame_picture);
+         free_storable_picture(enc_frame_picture2);
+         free_storable_picture(enc_frame_picture3);
+       }
+     }
+   }
+ 
+ 
+ #ifdef _LEAKYBUCKET_
+   // Store bits used for this frame and increment counter of no. of coded frames
+   Bit_Buffer[total_frame_buffer] = stats->bit_ctr - stats->bit_ctr_n;
+   total_frame_buffer++;
+ #endif
+ 
+   // POC200301: Verify that POC coding type 2 is not used if more than one consecutive 
+   // non-reference frame is requested or if decoding order is different from output order
+   if (img->pic_order_cnt_type == 2)
+   {
+     if (!img->nal_reference_idc) consecutive_non_reference_pictures++;
+     else consecutive_non_reference_pictures = 0;
+ 
+     if (frame_no < prev_frame_no || consecutive_non_reference_pictures>1)
+       error("POC type 2 cannot be applied for the coding pattern where the encoding /decoding order of pictures are different from the output order.\n", -1);
+     prev_frame_no = frame_no;
+   }
+ 
+     if (stats->bit_ctr_parametersets_n!=0)
+       ReportNALNonVLCBits(tmp_time, me_time);
+     
+     if (IMG_NUMBER == 0)
+       ReportFirstframe(tmp_time,me_time);
+     else
+     {
+       //Rate control
+       if(input->RCEnable)
+       {
+         if((!input->PicInterlace)&&(!input->MbInterlace))
+           bits=stats->bit_ctr-stats->bit_ctr_n;
+         else
+         {
+           bits = stats->bit_ctr -Pprev_bits; // used for rate control update */
+           Pprev_bits = stats->bit_ctr;
+         }
+       }
+       
+       switch (img->type)
+       {
+       case I_SLICE:
+         stats->bit_ctr_I += stats->bit_ctr - stats->bit_ctr_n;
+         ReportIntra(tmp_time,me_time);
+         break;
+       case SP_SLICE:
+         stats->bit_ctr_P += stats->bit_ctr - stats->bit_ctr_n;
+         ReportSP(tmp_time,me_time);
+         break;
+       case B_SLICE:
+         stats->bit_ctr_B += stats->bit_ctr - stats->bit_ctr_n;
+           ReportB(tmp_time,me_time);
+         break;
+       default:      // P
+         stats->bit_ctr_P += stats->bit_ctr - stats->bit_ctr_n;
+         ReportP(tmp_time,me_time);      
+       }
+     }  
+ 
+   if (input->Verbose == 0)
+   { 
+     //for (i = 0; i <= (img->number & 0x0F); i++)
+     //printf(".");
+     //printf("                              \r");
+     printf("Completed Encoding Frame %05d.\r",frame_no);
+   }
+   // Flush output statistics
+   fflush(stdout);
+ 
+   stats->bit_ctr_n = stats->bit_ctr;
+ 
+   //Rate control
+   if(input->RCEnable) 
+   {
+     rc_update_pict(bits);
+      
+     // update the parameters of quadratic R-D model
+     if((img->type==P_SLICE)&&(active_sps->frame_mbs_only_flag))
+       updateRCModel();
+     else if((img->type==P_SLICE) && (!active_sps->frame_mbs_only_flag) && (img->IFLAG==0))
+       updateRCModel();
+   }
+ 
+   stats->bit_ctr_parametersets_n=0;
+ 
+   if (IMG_NUMBER == 0)
+     return 0;
+   else
+     return 1;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    This function write out a picture
+  * \return
+  *    0 if OK,                                                         \n
+  *    1 in case of error
+  *
+  ************************************************************************
+  */
+ static int writeout_picture(Picture *pic)
+ {
+   Bitstream *currStream;
+   int partition, slice;
+   Slice *currSlice;
+ 
+   img->currentPicture=pic;
+ 
+   for (slice=0; slice<pic->no_slices; slice++)
+   {
+     currSlice = pic->slices[slice];
+     for (partition=0; partition<currSlice->max_part_nr; partition++)
+     {
+       currStream = (currSlice->partArr[partition]).bitstream;
+       assert (currStream->bits_to_go == 8);    //! should always be the case, the 
+                                                //! byte alignment is done in terminate_slice
+       writeUnit (currSlice->partArr[partition].bitstream,partition);
+ 
+     }           // partition loop
+   }           // slice loop
+   return 0;   
+ }
+ 
+ 
+ void copy_params()
+ {
+   enc_picture->frame_mbs_only_flag = active_sps->frame_mbs_only_flag;
+   enc_picture->frame_cropping_flag = active_sps->frame_cropping_flag;
+   enc_picture->chroma_format_idc   = active_sps->chroma_format_idc;
+ 
+   if (active_sps->frame_cropping_flag)
+   {
+     enc_picture->frame_cropping_rect_left_offset=active_sps->frame_cropping_rect_left_offset; 
+     enc_picture->frame_cropping_rect_right_offset=active_sps->frame_cropping_rect_right_offset; 
+     enc_picture->frame_cropping_rect_top_offset=active_sps->frame_cropping_rect_top_offset; 
+     enc_picture->frame_cropping_rect_bottom_offset=active_sps->frame_cropping_rect_bottom_offset; 
+   }
+   else
+   {
+     enc_picture->frame_cropping_rect_left_offset=0; 
+     enc_picture->frame_cropping_rect_right_offset=0; 
+     enc_picture->frame_cropping_rect_top_offset=0; 
+     enc_picture->frame_cropping_rect_bottom_offset=0; 
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Encodes a frame picture
+  ************************************************************************
+  */
+ void frame_picture (Picture *frame, int rd_pass)
+ {
+   img->structure = FRAME;
+   img->PicSizeInMbs = img->FrameSizeInMbs;
+ 
+   if (rd_pass == 2)
+   {
+     enc_frame_picture3  = alloc_storable_picture (img->structure, img->width, img->height, img->width_cr, img->height_cr);
+     img->ThisPOC=enc_frame_picture3->poc=img->framepoc;
+     enc_frame_picture3->top_poc    = img->toppoc;
+     enc_frame_picture3->bottom_poc = img->bottompoc;
+     
+     enc_frame_picture3->frame_poc = img->framepoc;
+     
+     enc_frame_picture3->pic_num = img->frame_num;
+     enc_frame_picture3->frame_num = img->frame_num;
+     enc_frame_picture3->coded_frame = 1;
+     
+     enc_frame_picture3->MbaffFrameFlag = img->MbaffFrameFlag = (input->MbInterlace != FRAME_CODING);
+     
+     enc_picture=enc_frame_picture3;
+     copy_params();
+   }
+   else if (rd_pass == 1)
+   {
+     enc_frame_picture2  = alloc_storable_picture (img->structure, img->width, img->height, img->width_cr, img->height_cr);
+     img->ThisPOC=enc_frame_picture2->poc=img->framepoc;
+     enc_frame_picture2->top_poc    = img->toppoc;
+     enc_frame_picture2->bottom_poc = img->bottompoc;
+     
+     enc_frame_picture2->frame_poc = img->framepoc;
+     
+     enc_frame_picture2->pic_num = img->frame_num;
+     enc_frame_picture2->frame_num = img->frame_num;
+     enc_frame_picture2->coded_frame = 1;
+     
+     enc_frame_picture2->MbaffFrameFlag = img->MbaffFrameFlag = (input->MbInterlace != FRAME_CODING);
+     
+     enc_picture=enc_frame_picture2;
+     copy_params();
+   }
+   else
+   {
+     enc_frame_picture  = alloc_storable_picture (img->structure, img->width, img->height, img->width_cr, img->height_cr);
+     img->ThisPOC=enc_frame_picture->poc=img->framepoc;
+     enc_frame_picture->top_poc    = img->toppoc;
+     enc_frame_picture->bottom_poc = img->bottompoc;
+     
+     enc_frame_picture->frame_poc = img->framepoc;
+     
+     enc_frame_picture->pic_num = img->frame_num;
+     enc_frame_picture->frame_num = img->frame_num;
+     enc_frame_picture->coded_frame = 1;
+     
+     enc_frame_picture->MbaffFrameFlag = img->MbaffFrameFlag = (input->MbInterlace != FRAME_CODING);
+     
+     enc_picture=enc_frame_picture;
+     copy_params();
+   }
+ 
+ 
+   stats->em_prev_bits_frm = 0;
+   stats->em_prev_bits = &stats->em_prev_bits_frm;
+ 
+   img->fld_flag = 0;
+   code_a_picture(frame);
+ 
+   frame->bits_per_picture = 8 * ((((img->currentSlice)->partArr[0]).bitstream)->byte_pos);
+   
+   if (img->structure==FRAME)
+   {
+     find_distortion (snr, img);      
+     frame->distortion_y = snr->snr_y;
+     frame->distortion_u = snr->snr_u;
+     frame->distortion_v = snr->snr_v;
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Encodes a field picture, consisting of top and bottom field
+  ************************************************************************
+  */
+ void field_picture (Picture *top, Picture *bottom)
+ {
+   //Rate control
+   int old_pic_type;              // picture type of top field used for rate control    
+   int TopFieldBits;
+   
+   //Rate control
+   old_pic_type = img->type;
+ 
+   stats->em_prev_bits_fld = 0;
+   stats->em_prev_bits = &stats->em_prev_bits_fld;
+   img->number *= 2;
+   img->buf_cycle *= 2;
+   img->height = (input->img_height+img->auto_crop_bottom) / 2; 
+   img->height_cr = img->height_cr_frame / 2;
+   img->fld_flag = 1;
+   img->PicSizeInMbs = img->FrameSizeInMbs/2;
+   // Top field
+   
+   enc_top_picture  = alloc_storable_picture (img->structure, img->width, img->height, img->width_cr, img->height_cr);
+   enc_top_picture->poc=img->toppoc;
+   enc_top_picture->frame_poc = img->toppoc;
+   enc_top_picture->pic_num = img->frame_num;
+   enc_top_picture->frame_num = img->frame_num;
+   enc_top_picture->coded_frame = 0;
+   enc_top_picture->MbaffFrameFlag = img->MbaffFrameFlag = FALSE;
+   img->ThisPOC = img->toppoc;
+   
+   img->structure = TOP_FIELD;
+   enc_picture = enc_top_picture;
+   copy_params();
+ 
+   put_buffer_top ();
+   init_field ();
+   if (img->type == B_SLICE)       //all I- and P-frames
+     nextP_tr_fld--;
+ 
+ 
+   img->fld_flag = 1;
+  
+   //Rate control
+   if(input->RCEnable)
+   {
+     img->BasicUnit=input->basicunit;
+ 
+     if(input->PicInterlace==FIELD_CODING)
+       rc_init_pict(0,1,1); 
+     else
+       rc_init_pict(0,1,0);
+ 
+     img->qp  = updateQuantizationParameter(1); 
+    }
+   img->TopFieldFlag=1;
+ 
+   code_a_picture(top_pic);
+   enc_picture->structure = 1;
+     
+   store_picture_in_dpb(enc_top_picture);
+ 
+   top->bits_per_picture = 8 * ((((img->currentSlice)->partArr[0]).bitstream)->byte_pos);
+ 
+   //Rate control
+   TopFieldBits=top->bits_per_picture;
+ 
+   //  Bottom field
+   enc_bottom_picture  = alloc_storable_picture (img->structure, img->width, img->height, img->width_cr, img->height_cr);
+   enc_bottom_picture->poc=img->bottompoc;
+   enc_bottom_picture->frame_poc = img->bottompoc;
+   enc_bottom_picture->pic_num = img->frame_num;
+   enc_bottom_picture->frame_num = img->frame_num;
+   enc_bottom_picture->coded_frame = 0;
+   enc_bottom_picture->MbaffFrameFlag = img->MbaffFrameFlag = FALSE;
+   img->ThisPOC = img->bottompoc;
+   img->structure = BOTTOM_FIELD;
+   enc_picture = enc_bottom_picture;
+   copy_params();
+   put_buffer_bot ();
+   img->number++;
+ 
+   init_field ();
+ 
+   if (img->type == B_SLICE)       //all I- and P-frames
+     nextP_tr_fld++;             //check once coding B field
+ 
+  if (img->type == I_SLICE && input->IntraBottom!=1)
+    img->type = (input->BRefPictures == 2) ? B_SLICE : P_SLICE;
+ 
+   img->fld_flag = 1;
+ 
+   //Rate control
+   if(input->RCEnable)
+   {
+     setbitscount(TopFieldBits);
+     rc_init_pict(0,0,0); 
+     img->qp  = updateQuantizationParameter(0); 
+   }
+   img->TopFieldFlag=0;
+ 
+   enc_picture->structure = 2;
+   code_a_picture(bottom_pic);
+ 
+   bottom->bits_per_picture = 8 * ((((img->currentSlice)->partArr[0]).bitstream)->byte_pos);
+ 
+   // the distortion for a field coded frame (consisting of top and bottom field)
+   // lives in the top->distortion variables, the bottom-> are dummies
+   distortion_fld (&top->distortion_y, &top->distortion_u, &top->distortion_v);
+ 
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Distortion Field
+  ************************************************************************
+  */
+ static void distortion_fld (float *dis_fld_y, float *dis_fld_u, float *dis_fld_v)
+ {
+ 
+   img->number /= 2;
+   img->buf_cycle /= 2;
+   img->height = (input->img_height+img->auto_crop_bottom);
+   img->height_cr = img->height_cr_frame;
+ 
+   combine_field ();
+ 
+   imgY_org = imgY_org_frm;
+   imgUV_org = imgUV_org_frm;
+ 
+   find_distortion (snr, img);   // find snr from original frame picture
+ 
+   *dis_fld_y = snr->snr_y;
+   *dis_fld_u = snr->snr_u;
+   *dis_fld_v = snr->snr_v;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Picture Structure Decision
+  ************************************************************************
+  */
+ static int picture_structure_decision (Picture *frame, Picture *top, Picture *bot)
+ {
+   double lambda_picture;
+   int bframe = (img->type == B_SLICE);
+   float snr_frame, snr_field;
+   int bit_frame, bit_field;
+ 
+   lambda_picture = 0.68 * pow (2, img->bitdepth_lambda_scale + ((img->qp - SHIFT_QP) / 3.0)) * (bframe ? 1 : 1);
+   
+   snr_frame = frame->distortion_y + frame->distortion_u + frame->distortion_v;
+   //! all distrortions of a field picture are accumulated in the top field
+   snr_field = top->distortion_y + top->distortion_u + top->distortion_v;
+   bit_field = top->bits_per_picture + bot->bits_per_picture;
+   bit_frame = frame->bits_per_picture;
+ 
+   return decide_fld_frame (snr_frame, snr_field, bit_field, bit_frame, lambda_picture);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Field Mode Buffer
+  ************************************************************************
+  */
+ static void field_mode_buffer (int bit_field, float snr_field_y, float snr_field_u, float snr_field_v)
+ {
+   put_buffer_frame ();
+ 
+   snr->snr_y = snr_field_y;
+   snr->snr_u = snr_field_u;
+   snr->snr_v = snr_field_v;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Frame Mode Buffer
+  ************************************************************************
+  */
+ static void frame_mode_buffer (int bit_frame, float snr_frame_y, float snr_frame_u, float snr_frame_v)
+ {
+   put_buffer_frame ();
+ 
+   if ((input->PicInterlace != FRAME_CODING)||(input->MbInterlace != FRAME_CODING))
+   {
+     img->height = img->height / 2;
+     img->height_cr = img->height_cr / 2;
+     img->number *= 2;
+     
+     put_buffer_top ();
+      
+     img->number++;
+     put_buffer_bot ();
+     
+     img->number /= 2;         // reset the img->number to field
+     img->height = (input->img_height+img->auto_crop_bottom);
+     img->height_cr = img->height_cr_frame;
+ 
+     snr->snr_y = snr_frame_y;
+     snr->snr_u = snr_frame_u;
+     snr->snr_v = snr_frame_v;
+     put_buffer_frame ();
+     
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    mmco initializations should go here
+  ************************************************************************
+  */
+ static void init_dec_ref_pic_marking_buffer()
+ {
+   img->dec_ref_pic_marking_buffer=NULL;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Initializes the parameters for a new frame
+  ************************************************************************
+  */
+ static void init_frame ()
+ {
+   int i;
+   int prevP_no, nextP_no;
+   
+   last_P_no = last_P_no_frm;
+ 
+   img->current_mb_nr = 0;
+   img->current_slice_nr = 0;
+   stats->bit_slice = 0;
+ 
+   img->mb_y = img->mb_x = 0;
+   img->block_y = img->pix_y = img->pix_c_y = 0; 
+   img->block_x = img->pix_x = img->block_c_x = img->pix_c_x = 0;
+ 
+   // The 'slice_nr' of each macroblock is set to -1 here, to guarantee the correct encoding 
+   // with FMO (if no FMO, encoding is correct without following assignment), 
+   // for which MBs may not be encoded with scan order
+   for(i=0;i< ((img->width/MB_BLOCK_SIZE)*(img->height/MB_BLOCK_SIZE));i++)
+     img->mb_data[i].slice_nr=-1;
+ 	
+   if (img->b_frame_to_code == 0)
+   {
+     img->tr = start_tr_in_this_IGOP + IMG_NUMBER * (input->jumpd + 1);
+     
+     img->imgtr_last_P_frm = img->imgtr_next_P_frm;
+     img->imgtr_next_P_frm = img->tr;
+     
+ #ifdef _ADAPT_LAST_GROUP_
+     if (input->last_frame && img->number + 1 == input->no_frames)
+       img->tr = input->last_frame;
+ #endif
+     
+     if (IMG_NUMBER != 0 && input->successive_Bframe != 0)     // B pictures to encode
+       nextP_tr_frm = img->tr;
+     
+     //Rate control
+     if(!input->RCEnable)                  // without using rate control
+     {
+       if (img->type == I_SLICE)
+ #ifdef _CHANGE_QP_
+         if (input->qp2start > 0 && img->tr >= input->qp2start)
+           img->qp = input->qp02;
+         else
+ #endif    
+         img->qp = input->qp0;   // set quant. parameter for I-frame
+       else
+       {
+ #ifdef _CHANGE_QP_
+         if (input->qp2start > 0 && img->tr >= input->qp2start)
+           img->qp = input->qpN2 + (img->nal_reference_idc ? 0 : input->DispPQPOffset);
+         else
+ #endif
+           img->qp = input->qpN + (img->nal_reference_idc ? 0 : input->DispPQPOffset);
+         
+         if (img->type == SP_SLICE)
+         {
+           img->qp = input->qpsp;
+           img->qpsp = input->qpsp_pred;
+         }   
+       }
+     }
+ 
+     img->mb_y_intra = img->mb_y_upd;  //  img->mb_y_intra indicates which GOB to intra code for this frame
+     
+     if (input->intra_upd > 0) // if error robustness, find next GOB to update
+     {
+       img->mb_y_upd = (IMG_NUMBER / input->intra_upd) % (img->height / MB_BLOCK_SIZE);
+     }
+   }
+   else
+   {
+     img->p_interval = input->jumpd + 1;
+     prevP_no = start_tr_in_this_IGOP + (IMG_NUMBER - 1) * img->p_interval;
+     nextP_no = start_tr_in_this_IGOP + (IMG_NUMBER) * img->p_interval;
+     
+ #ifdef _ADAPT_LAST_GROUP_
+     last_P_no[0] = prevP_no;
+     for (i = 1; i < img->buf_cycle; i++)
+       last_P_no[i] = last_P_no[i - 1] - img->p_interval;
+     
+     if (input->last_frame && img->number + 1 == input->no_frames)
+     {
+       nextP_no = input->last_frame;
+       img->p_interval = nextP_no - prevP_no;
+     }
+ #endif
+     
+     img->b_interval =
+       ((double) (input->jumpd + 1) / (input->successive_Bframe + 1.0) );
+ 
+     if (input->PyramidCoding == 3)
+       img->b_interval = 1.0;
+ 
+     if (input->PyramidCoding)
+       img->tr = prevP_no + (int) (img->b_interval  * (double) (1 + gop_structure[img->b_frame_to_code - 1].display_no));      // from prev_P
+     else      
+       img->tr = prevP_no + (int) (img->b_interval * (double) img->b_frame_to_code);      // from prev_P
+     
+ 
+     if (img->tr >= nextP_no)
+       img->tr = nextP_no - 1;
+     //Rate control
+     if(!input->RCEnable && input->PyramidCoding == 0)                  // without using rate control   
+     {    
+ #ifdef _CHANGE_QP_
+       if (input->qp2start > 0 && img->tr >= input->qp2start)
+       {
+         img->qp = input->qpB2;
+       }
+       else
+ #endif
+       {
+         img->qp = input->qpB;
+       }
+ 
+       if (img->nal_reference_idc)
+       {
+ #ifdef _CHANGE_QP_
+         if (input->qp2start > 0 && img->tr >= input->qp2start)
+         {
+           img->qp = Clip3(-img->bitdepth_luma_qp_scale,51,input->qpB2 + input->qpBRS2Offset);
+         }
+         else
+ #endif
+         {
+           img->qp = Clip3(-img->bitdepth_luma_qp_scale,51,input->qpB + input->qpBRSOffset);
+         }
+       }
+     }
+     else if (input->PyramidCoding !=0)  
+     {
+       // Note that _CHANGE_QP_ does not anymore work for gop_structure. Needs to be fixed
+       img->qp =  gop_structure[img->b_frame_to_code - 1].slice_qp;
+     }
+   }
+   
+   UpdateSubseqInfo (img->layer);        // Tian Dong (Sept 2002)
+   UpdateSceneInformation (0, 0, 0, -1); // JVT-D099, scene information SEI, nothing included by default
+ 
+   //! Commented out by StW, needs fixing in SEI.h to keep the trace file clean
+   //  PrepareAggregationSEIMessage ();
+ 
+   img->no_output_of_prior_pics_flag = 0;
+   img->long_term_reference_flag = 0;
+ 
+   init_dec_ref_pic_marking_buffer();
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Initializes the parameters for a new field
+  ************************************************************************
+  */
+ static void init_field ()
+ {
+   int i;
+   int prevP_no, nextP_no;
+ 
+   last_P_no = last_P_no_fld;
+ 
+   img->current_mb_nr = 0;
+   img->current_slice_nr = 0;
+   stats->bit_slice = 0;
+ 
+   input->jumpd *= 2;
+   input->successive_Bframe *= 2;
+   img->number /= 2;
+   img->buf_cycle /= 2;
+ 
+   img->mb_y = img->mb_x = 0;
+   img->block_y = img->pix_y = img->pix_c_y = 0; // define vertical positions
+   img->block_x = img->pix_x = img->block_c_x = img->pix_c_x = 0;        // define horizontal positions
+ 
+   if (!img->b_frame_to_code)
+   {
+     img->tr = img->number * (input->jumpd + 2) + img->fld_type;
+     
+     if (!img->fld_type)
+     {
+       img->imgtr_last_P_fld = img->imgtr_next_P_fld;
+       img->imgtr_next_P_fld = img->tr;
+     }
+     
+ #ifdef _ADAPT_LAST_GROUP_
+     if (input->last_frame && img->number + 1 == input->no_frames)
+       img->tr = input->last_frame;
+ #endif
+     if (img->number != 0 && input->successive_Bframe != 0)    // B pictures to encode
+       nextP_tr_fld = img->tr;
+     
+       //Rate control
+     if(!input->RCEnable)                  // without using rate control
+     {
+       if (img->type == I_SLICE)
+       {
+ #ifdef _CHANGE_QP_
+         if (input->qp2start > 0 && img->tr >= input->qp2start)
+           img->qp = input->qp02;
+         else
+ #endif    
+           img->qp = input->qp0;   // set quant. parameter for I-frame
+       }
+       else
+       {
+ #ifdef _CHANGE_QP_
+         if (input->qp2start > 0 && img->tr >= input->qp2start)
+           img->qp = input->qpN2 + (img->nal_reference_idc ? 0 : input->DispPQPOffset);
+         else
+ #endif
+           img->qp = input->qpN + (img->nal_reference_idc ? 0 : input->DispPQPOffset);
+         if (img->type == SP_SLICE)
+         {
+           img->qp = input->qpsp;
+           img->qpsp = input->qpsp_pred;
+         }
+       }
+     }
+     img->mb_y_intra = img->mb_y_upd;  //  img->mb_y_intra indicates which GOB to intra code for this frame
+ 
+     if (input->intra_upd > 0) // if error robustness, find next GOB to update
+     {
+       img->mb_y_upd =
+         (img->number / input->intra_upd) % (img->width / MB_BLOCK_SIZE);
+     }
+   }
+   else
+   {
+     img->p_interval = input->jumpd + 2;
+     prevP_no = (img->number - 1) * img->p_interval + img->fld_type;
+     nextP_no = img->number * img->p_interval + img->fld_type;
+ #ifdef _ADAPT_LAST_GROUP_
+     if (!img->fld_type)       // top field
+     {
+       last_P_no[0] = prevP_no + 1;
+       last_P_no[1] = prevP_no;
+       for (i = 1; i <= img->buf_cycle; i++)
+       {
+         last_P_no[2 * i] = last_P_no[2 * i - 2] - img->p_interval;
+         last_P_no[2 * i + 1] = last_P_no[2 * i - 1] - img->p_interval;
+       }
+     }
+     else                      // bottom field
+     {
+       last_P_no[0] = nextP_no - 1;
+       last_P_no[1] = prevP_no;
+       for (i = 1; i <= img->buf_cycle; i++)
+       {
+         last_P_no[2 * i] = last_P_no[2 * i - 2] - img->p_interval;
+         last_P_no[2 * i + 1] = last_P_no[2 * i - 1] - img->p_interval;
+       }
+     }
+     
+     if (input->last_frame && img->number + 1 == input->no_frames)
+     {
+       nextP_no = input->last_frame;
+       img->p_interval = nextP_no - prevP_no;
+     }
+ #endif
+     img->b_interval =
+       ((double) (input->jumpd + 1) / (input->successive_Bframe + 1.0) );
+     
+     if (input->PyramidCoding == 3)
+       img->b_interval = 1.0;
+     
+     if (input->PyramidCoding)
+       img->tr = prevP_no + (int) ((img->b_interval + 1.0) * (double) (1 + gop_structure[img->b_frame_to_code - 1].display_no));      // from prev_P
+     else      
+       img->tr = prevP_no + (int) ((img->b_interval + 1.0) * (double) img->b_frame_to_code);      // from prev_P
+     
+     
+     if (img->tr >= nextP_no)
+       img->tr = nextP_no - 1; // ?????
+     //Rate control
+     if(!input->RCEnable && input->PyramidCoding == 0)                  // without using rate control
+     {
+ #ifdef _CHANGE_QP_
+       if (input->qp2start > 0 && img->tr >= input->qp2start)
+         img->qp = input->qpB2;
+       else
+ #endif
+         img->qp = input->qpB;
+       if (img->nal_reference_idc)
+       {
+ #ifdef _CHANGE_QP_
+         if (input->qp2start > 0 && img->tr >= input->qp2start)
+           img->qp = Clip3(-img->bitdepth_luma_qp_scale,51,input->qpB2 + input->qpBRS2Offset);
+         else
+ #endif
+           img->qp = Clip3(-img->bitdepth_luma_qp_scale,51,input->qpB + input->qpBRSOffset);
+         
+       }
+     }
+     else if (input->PyramidCoding != 0)
+     {          
+       img->qp =  gop_structure[img->b_frame_to_code - 1].slice_qp;
+     }
+   }
+   input->jumpd /= 2;
+   input->successive_Bframe /= 2;
+   img->buf_cycle *= 2;
+   img->number = 2 * img->number + img->fld_type;
+ }
+ 
+ 
+ #define Clip(min,max,val) (((val)<(min))?(min):(((val)>(max))?(max):(val)))
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+ *    Generate Full Pel Representation
+  ************************************************************************
+  */
+ static void GenerateFullPelRepresentation (pel_t ** Fourthpel,
+                                            pel_t * Fullpel, int xsize,
+                                            int ysize)
+ {
+   int x, y, yy , y_pos;
+   
+   for (y = 0; y < ysize; y++)
+   {
+     yy = (y + IMG_PAD_SIZE)<<2;
+     y_pos = y * xsize;
+     for (x = 0; x < xsize; x++)
+       PutPel_11 (Fullpel, y_pos + x, Fourthpel[yy][(x + IMG_PAD_SIZE)<<2]);
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Upsample 4 times, store them in out4x.  Color is simply copied
+  *
+  * \par Input:
+  *    srcy, srcu, srcv, out4y, out4u, out4v
+  *
+  * \par Side Effects_
+  *    Uses (writes) img4Y_tmp.  This should be moved to a static variable
+  *    in this module
+  ************************************************************************/
+ void UnifiedOneForthPix (StorablePicture *s)
+ {
+   int is;
+   int i, j, j4;
+   int ie2, je2, jj , jpad;
+   int maxy = s->size_y + 2 * IMG_PAD_SIZE - 1;
+   int ii, i1;
+   imgpel **out4Y;
+   imgpel  *ref11;
+   imgpel  **imgY = s->imgY;
+   int size_x_minus1 = s->size_x - 1;
+   int size_y_minus1 = s->size_y - 1;
+ 
+   // don't upsample twice
+   if (s->imgY_ups || s->imgY_11)
+     return;
+   
+   s->imgY_11 = malloc ((s->size_x * s->size_y) * sizeof (imgpel));
+   if (NULL == s->imgY_11)
+     no_mem_exit("alloc_storable_picture: s->imgY_11");
+   
+   get_mem2Dpel (&(s->imgY_ups), (2*IMG_PAD_SIZE + s->size_y)*4, (2*IMG_PAD_SIZE + s->size_x)*4);
+   
+   if (input->WeightedPrediction || input->WeightedBiprediction || input->GenerateMultiplePPS)
+   {
+     s->imgY_11_w = malloc ((s->size_x * s->size_y) * sizeof (imgpel));
+     if (NULL == s->imgY_11_w)
+       no_mem_exit("alloc_storable_picture: s->imgY_11_w");
+     get_mem2Dpel (&(s->imgY_ups_w), (2*IMG_PAD_SIZE + s->size_y)*4, (2*IMG_PAD_SIZE + s->size_x)*4);
+   }
+   out4Y = s->imgY_ups;
+   ref11 = s->imgY_11;
+   
+   for (j = -IMG_PAD_SIZE; j < s->size_y + IMG_PAD_SIZE; j++)
+   {
+     jj = max (0, min (size_y_minus1, j));
+     jpad = j + IMG_PAD_SIZE;
+     for (i = -IMG_PAD_SIZE; i < s->size_x + IMG_PAD_SIZE; i++)
+     {      
+       is =
+         (ONE_FOURTH_TAP[0][0] *
+         (imgY[jj][max (0, min (size_x_minus1, i))] +
+          imgY[jj][max (0, min (size_x_minus1, i + 1))]) +
+         ONE_FOURTH_TAP[1][0] *
+         (imgY[jj][max (0, min (size_x_minus1, i - 1))] +
+          imgY[jj][max (0, min (size_x_minus1, i + 2))]) +
+         ONE_FOURTH_TAP[2][0] *
+         (imgY[jj][max (0, min (size_x_minus1, i - 2))] +
+          imgY[jj][max (0, min (size_x_minus1, i + 3))]));
+ 
+       img4Y_tmp[jpad][(i + IMG_PAD_SIZE) * 2] = imgY[jj][max (0, min (size_x_minus1, i))] * 1024;    // 1/1 pix pos
+       img4Y_tmp[jpad][(i + IMG_PAD_SIZE) * 2 + 1] = is * 32;  // 1/2 pix pos
+     }
+   }
+   
+   for (i = 0; i < (s->size_x + 2 * IMG_PAD_SIZE) * 2; i++)
+   {
+     ii = i * 2;
+     for (j = 0; j < s->size_y + 2 * IMG_PAD_SIZE; j++)
+     {
+       j4 = j * 4;
+       
+       // change for TML4, use 6 TAP vertical filter
+       is =
+         ( ONE_FOURTH_TAP[0][0] *(img4Y_tmp[j][i] + img4Y_tmp[min (maxy, j + 1)][i]) 
+         + ONE_FOURTH_TAP[1][0] * (img4Y_tmp[max (0, j - 1)][i] + img4Y_tmp[min (maxy, j + 2)][i]) 
+         + ONE_FOURTH_TAP[2][0] * (img4Y_tmp[max (0, j - 2)][i] + img4Y_tmp[min (maxy, j + 3)][i])) / 32;
+       
+       out4Y[j4    ][ii] = (pel_t) Clip3 (0, img->max_imgpel_value, (int) ((img4Y_tmp[j][i] + 512) / 1024));  // 1/2 pix
+       out4Y[j4 + 2][ii] = (pel_t) Clip3 (0, img->max_imgpel_value, (int) ((is + 512) / 1024));   // 1/2 pix
+     }
+   }
+   
+   /* 1/4 pix */
+   /* luma */
+   ie2 = (s->size_x + 2 * IMG_PAD_SIZE - 1) * 4 + 2;
+   je2 = (s->size_y + 2 * IMG_PAD_SIZE - 1) * 4 + 2;
+   
+   for (j = 0; j < je2 + 2; j += 2)
+     for (i = 0; i < ie2 + 1; i += 2)
+     {
+       /*  '-'  */
+       out4Y[j][i+1] = (pel_t) (Clip3 (0, img->max_imgpel_value, (int) (out4Y[j][i] + out4Y[j][min (ie2, i + 2)] + 1) >> 1));
+     }
+     for (i = 0; i < ie2 + 2; i++)
+     {
+       ii = min (ie2, i + 1);
+       i1 = i - 1;
+       for (j = 0; j < je2 + 1; j += 2)
+       {
+         if ((i & 0x1) == 0)           /*  '|'  */
+         {
+           out4Y[j + 1][i] = (pel_t) (Clip3 (0, img->max_imgpel_value, (int) (out4Y[j][i] + out4Y[min (je2, j + 2)][i] + 1) >> 1));
+         }
+         else if (((j & 0x3) == 0 && (i & 0x3) == 1) || ((j & 0x3) == 2 && (i & 0x3) == 3))           /*  '/'  */
+         {
+           out4Y[j + 1][i] = (pel_t) (Clip3 (0, img->max_imgpel_value, (int) (out4Y[j][ii] + out4Y[min (je2, j + 2)][i1] + 1) >> 1));
+         }
+         else           /*  '\'  */
+         {
+           out4Y[j + 1][i] = (pel_t) (Clip3 (0, img->max_imgpel_value, (int) (out4Y[j][i1] + out4Y[min (je2, j + 2)][ii] + 1) >> 1));
+         }
+       }
+     }
+     
+     // Generate 1/1th pel representation (used for integer pel MV search)
+     GenerateFullPelRepresentation (out4Y, ref11, s->size_x, s->size_y);    
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Find SNR for all three components
+  ************************************************************************
+  */
+ static void find_snr ()
+ {
+   int i, j;
+   int64 diff_y=0, diff_u=0, diff_v=0;
+   int impix;
+   int impix_cr;
+   unsigned int max_pix_value_sqd = img->max_imgpel_value * img->max_imgpel_value;
+   unsigned int max_pix_value_sqd_uv = img->max_imgpel_value_uv * img->max_imgpel_value_uv;
+ 
+   //  Calculate  PSNR for Y, U and V.
+   
+   //     Luma.
+   impix = input->img_height * input->img_width;
+   impix_cr = input->img_height_cr * input->img_width_cr;
+ 
+   if (img->fld_flag != 0)
+   {
+       
+     diff_y = 0;
+     for (i = 0; i < input->img_width; ++i)
+     {
+       for (j = 0; j < input->img_height; ++j)
+       {
+         diff_y += img->quad[imgY_org[j][i] - imgY_com[j][i]];
+       }
+     }
+     
+     if (img->yuv_format != YUV400)
+     {
+       //     Chroma.
+       diff_u = 0;
+       diff_v = 0;
+       
+       for (i = 0; i < input->img_width_cr; i++)
+       {
+         for (j = 0; j < input->img_height_cr; j++)
+         {
+           diff_u += img->quad[imgUV_org[0][j][i] - imgUV_com[0][j][i]];
+           diff_v += img->quad[imgUV_org[1][j][i] - imgUV_com[1][j][i]];
+         }
+       }
+     }
+   }
+   else
+   { 
+     imgY_org  = imgY_org_frm;
+     imgUV_org = imgUV_org_frm;
+ 
+     if(input->PicInterlace==ADAPTIVE_CODING)
+     {
+       enc_picture = enc_frame_picture;
+     }  
+ 
+     diff_y = 0;
+     for (i = 0; i < input->img_width; ++i)
+     {
+       for (j = 0; j < input->img_height; ++j)
+       {
+         diff_y += img->quad[imgY_org[j][i] - enc_picture->imgY[j][i]];
+       }
+     }
+     
+     if (img->yuv_format != YUV400)
+     {
+       //     Chroma.
+       diff_u = 0;
+       diff_v = 0;
+       
+       for (i = 0; i < input->img_width_cr; i++)
+       {
+         for (j = 0; j < input->img_height_cr; j++)
+         {
+           diff_u += img->quad[imgUV_org[0][j][i] - enc_picture->imgUV[0][j][i]];
+           diff_v += img->quad[imgUV_org[1][j][i] - enc_picture->imgUV[1][j][i]];
+         }
+       }
+     }
+   }
+   snr->sse_y = (float)diff_y;
+   snr->sse_u = (float)diff_u;
+   snr->sse_v = (float)diff_v;
+ 
+ #if ZEROSNR
+   if (diff_y == 0)
+     diff_y = 1;
+   if (diff_u == 0)
+     diff_u = 1;
+   if (diff_v == 0)
+     diff_v = 1; 
+ #endif
+ 
+   //  Collecting SNR statistics
+   if (diff_y != 0)
+   {
+     snr->snr_y = (float) (10 * log10 (max_pix_value_sqd * (double)((double) impix    / diff_y)));         // luma snr for current frame
+     if (img->yuv_format != YUV400)
+     {
+       snr->snr_u = (float) (10 * log10 (max_pix_value_sqd_uv * (double)((double) impix_cr / diff_u)));   // u croma snr for current frame, 1/4 of luma samples
+       snr->snr_v = (float) (10 * log10 (max_pix_value_sqd_uv * (double)((double) impix_cr / diff_v)));   // v croma snr for current frame, 1/4 of luma samples
+     }
+     else
+     {
+       snr->snr_u = 0.0;
+       snr->snr_v = 0.0;
+     }
+   }
+   
+ 
+   if (img->number == 0)
+   {
+     snr->snr_y1 = snr->snr_y;  // keep luma snr for first frame
+     snr->snr_u1 = snr->snr_u;  // keep croma u snr for first frame
+     snr->snr_v1 = snr->snr_v;  // keep croma v snr for first frame
+     snr->snr_ya = snr->snr_y1;
+     snr->snr_ua = snr->snr_u1;
+     snr->snr_va = snr->snr_v1;
+     // sse stats
+     snr->msse_y = snr->sse_y;
+     snr->msse_u = snr->sse_u;
+     snr->msse_v = snr->sse_v;
+     for (i=0; i<5; i++)
+     {
+       snr->snr_yt[i] = 0.0;
+       snr->snr_ut[i] = 0.0;
+       snr->snr_vt[i] = 0.0;
+     }
+   }
+   else
+   {
+     //int total_frames = img->number + frame_ctr[B_SLICE];
+     int total_frames = snr->frame_ctr - 1;
+ 
+     snr->snr_ya = (float) (snr->snr_ya * total_frames + snr->snr_y) / (total_frames + 1); // average snr luma for all frames inc. first
+     snr->snr_ua = (float) (snr->snr_ua * total_frames + snr->snr_u) / (total_frames + 1); // average snr u croma for all frames inc. first
+     snr->snr_va = (float) (snr->snr_va * total_frames + snr->snr_v) / (total_frames + 1); // average snr v croma for all frames inc. first
+     snr->msse_y = (float) (snr->msse_y * total_frames + snr->sse_y) / (total_frames + 1); // average mse luma for all frames inc. first
+     snr->msse_u = (float) (snr->msse_u * total_frames + snr->sse_u) / (total_frames + 1); // average mse u croma for all frames inc. first
+     snr->msse_v = (float) (snr->msse_v * total_frames + snr->sse_v) / (total_frames + 1); // average mse v croma for all frames inc. first  
+   }
+   
+   snr->snr_yt[img->type] = (float) (snr->snr_yt[img->type] * (frame_ctr[img->type] - 1) + snr->snr_y) / ( frame_ctr[img->type] );  // average luma snr for img->type coded frames
+   snr->snr_ut[img->type] = (float) (snr->snr_ut[img->type] * (frame_ctr[img->type] - 1) + snr->snr_u) / ( frame_ctr[img->type] );  // average chroma u snr for img->type coded frames
+   snr->snr_vt[img->type] = (float) (snr->snr_vt[img->type] * (frame_ctr[img->type] - 1) + snr->snr_v) / ( frame_ctr[img->type] );  // average chroma v snr for img->type coded frames
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Find distortion for all three components
+  ************************************************************************
+  */
+ static void find_distortion ()
+ {
+   int i, j;
+   int64 diff_y, diff_u, diff_v;
+   int impix;
+   
+   //  Calculate  PSNR for Y, U and V.
+   
+   //     Luma.
+   impix = input->img_height * input->img_width;
+   
+   if (img->structure!=FRAME)
+   {
+     
+     diff_y = 0;
+     for (i = 0; i < input->img_width; ++i)
+     {
+       for (j = 0; j < input->img_height; ++j)
+       {
+         diff_y += img->quad[imgY_org[j][i] - imgY_com[j][i]];
+       }
+     }
+     
+     diff_u = 0;
+     diff_v = 0;
+     
+     if (img->yuv_format != YUV400)
+     {
+       //     Chroma.
+       for (i = 0; i < input->img_width_cr; i++)
+       {
+         for (j = 0; j < input->img_height_cr; j++)
+         {
+           diff_u += img->quad[imgUV_org[0][j][i] - imgUV_com[0][j][i]];
+           diff_v += img->quad[imgUV_org[1][j][i] - imgUV_com[1][j][i]];
+         }
+       }
+     }
+   }
+   else
+   {
+     imgY_org   = imgY_org_frm;
+     imgUV_org = imgUV_org_frm;
+     
+     diff_y = 0;
+     for (i = 0; i < input->img_width; ++i)
+     {
+       for (j = 0; j < input->img_height; ++j)
+       {
+         diff_y += img->quad[imgY_org[j][i] - enc_picture->imgY[j][i]];
+       }
+     }
+     
+     diff_u = 0;
+     diff_v = 0;
+     
+     if (img->yuv_format != YUV400)
+     {
+       //     Chroma.
+       for (i = 0; i < input->img_width_cr; i++)
+       {
+         for (j = 0; j < input->img_height_cr; j++)
+         {
+           diff_u += img->quad[imgUV_org[0][j][i] - enc_picture->imgUV[0][j][i]];
+           diff_v += img->quad[imgUV_org[1][j][i] - enc_picture->imgUV[1][j][i]];
+         }
+       }
+     }
+   }
+   // Calculate real PSNR at find_snr_avg()
+   snr->snr_y = (float) diff_y;
+   snr->snr_u = (float) diff_u;
+   snr->snr_v = (float) diff_v;
+ }
+ 
+   
+   /*!
+  ************************************************************************
+  * \brief
+  *    Just a placebo
+  ************************************************************************
+  */
+ Boolean dummy_slice_too_big (int bits_slice)
+ {
+   return FALSE;
+ }
+ 
+ 
+ /*! 
+ ***************************************************************************
+ // For MB level field/frame coding
+ ***************************************************************************
+ */
+ void copy_rdopt_data (int bot_block)
+ {
+   int mb_nr = img->current_mb_nr;
+   Macroblock *currMB = &img->mb_data[mb_nr];
+   int i, j, k;
+ 
+   int bframe = (img->type == B_SLICE);
+   int mode;
+   int b8mode, b8pdir;
+   int block_y;
+ 
+   int list_offset = currMB->list_offset;
+ 
+   mode                = rdopt->mode;
+   currMB->mb_type     = rdopt->mb_type;   // copy mb_type 
+   currMB->cbp         = rdopt->cbp;   // copy cbp
+   currMB->cbp_blk     = rdopt->cbp_blk;   // copy cbp_blk
+   currMB->bi_pred_me  = rdopt->bi_pred_me;   // copy biprediction
+   img->i16offset      = rdopt->i16offset;
+ 
+   currMB->prev_qp=rdopt->prev_qp;
+   currMB->prev_delta_qp=rdopt->prev_delta_qp;
+   currMB->prev_cbp=rdopt->prev_cbp;
+   currMB->delta_qp=rdopt->delta_qp;
+   currMB->qp=rdopt->qp;
+ 
+   currMB->c_ipred_mode = rdopt->c_ipred_mode;
+ 
+   for (i = 0; i < 4+img->num_blk8x8_uv; i++)
+   {
+     for (j = 0; j < 4; j++)
+       for (k = 0; k < 2; k++)
+         memcpy(img->cofAC[i][j][k],rdopt->cofAC[i][j][k], 65 * sizeof(int));
+   }
+   
+   for (i = 0; i < 3; i++)
+   {
+     for (k = 0; k < 2; k++)
+       memcpy(img->cofDC[i][k],rdopt->cofDC[i][k], 18 * sizeof(int));
+   }
+ 
+   for (j = 0; j < BLOCK_MULTIPLE; j++)
+   {
+     block_y = img->block_y + j;
+     memcpy(&enc_picture->ref_idx[LIST_0][block_y][img->block_x], rdopt->refar[LIST_0][j], BLOCK_MULTIPLE * sizeof(char));
+     for (i = 0; i < BLOCK_MULTIPLE; i++)
+       enc_picture->ref_pic_id [LIST_0][block_y][img->block_x + i] = 
+       enc_picture->ref_pic_num[LIST_0 + list_offset][(short)enc_picture->ref_idx[LIST_0][block_y][img->block_x+i]];
+   } 
+   if (bframe)
+   {
+     for (j = 0; j < BLOCK_MULTIPLE; j++)
+     {
+       block_y = img->block_y + j;
+       memcpy(&enc_picture->ref_idx[LIST_1][block_y][img->block_x], rdopt->refar[LIST_1][j], BLOCK_MULTIPLE * sizeof(char));
+       for (i = 0; i < BLOCK_MULTIPLE; i++)
+         enc_picture->ref_pic_id [LIST_1][block_y][img->block_x + i] = 
+         enc_picture->ref_pic_num[LIST_1 + list_offset][(short)enc_picture->ref_idx[LIST_1][block_y][img->block_x+i]];
+     }
+   }
+ 
+   //===== reconstruction values =====
+   for (j = 0; j < MB_BLOCK_SIZE; j++)
+     memcpy(&enc_picture->imgY[img->pix_y + j][img->pix_x],rdopt->rec_mbY[j], MB_BLOCK_SIZE * sizeof(imgpel));
+   
+   if (img->yuv_format != YUV400)
+   {
+     for (j = 0; j < img->mb_cr_size_y; j++)
+     {
+       memcpy(&enc_picture->imgUV[0][img->pix_c_y + j][img->pix_c_x],rdopt->rec_mbU[j], img->mb_cr_size_x * sizeof(imgpel));
+       memcpy(&enc_picture->imgUV[1][img->pix_c_y + j][img->pix_c_x],rdopt->rec_mbV[j], img->mb_cr_size_x * sizeof(imgpel));
+     }
+   }
+ 
+   memcpy(currMB->b8mode,rdopt->b8mode, 4 * sizeof(int));
+   memcpy(currMB->b8pdir,rdopt->b8pdir, 4 * sizeof(int));
+ 
+   currMB->luma_transform_size_8x8_flag = rdopt->luma_transform_size_8x8_flag;
+   
+   //==== intra prediction modes ====
+   if (mode == P8x8)
+   {
+     memcpy(currMB->intra_pred_modes,rdopt->intra_pred_modes, MB_BLOCK_PARTITIONS * sizeof(char));
+     for (j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+       memcpy(&img->ipredmode[j][img->block_x],&rdopt->ipredmode[j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+   }
+   else if (mode != I4MB && mode != I8MB)
+   {
+     memset(currMB->intra_pred_modes,DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
+     for (j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+       memset(&img->ipredmode[j][img->block_x],DC_PRED, BLOCK_MULTIPLE * sizeof(char));
+   }
+   else if (mode == I4MB || mode == I8MB)
+   {
+     memcpy(currMB->intra_pred_modes,rdopt->intra_pred_modes, MB_BLOCK_PARTITIONS * sizeof(char));
+     for (j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+       memcpy(&img->ipredmode[j][img->block_x],&rdopt->ipredmode[j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+   }
+ 
+   if (img->MbaffFrameFlag)
+   {
+     // motion vectors
+     copy_motion_vectors_MB ();
+     
+     if (!IS_INTRA(currMB))
+     {
+       for (j = 0; j < 4; j++)
+         for (i = 0; i < 4; i++)
+         {
+           b8mode = currMB->b8mode[i/2+2*(j/2)];
+           b8pdir = currMB->b8pdir[i/2+2*(j/2)];
+ 
+           if (b8pdir!=1)
+           {
+             enc_picture->mv[LIST_0][j+img->block_y][i+img->block_x][0] = rdopt->all_mv[j][i][LIST_0][(short)rdopt->refar[LIST_0][j][i]][b8mode][0];
+             enc_picture->mv[LIST_0][j+img->block_y][i+img->block_x][1] = rdopt->all_mv[j][i][LIST_0][(short)rdopt->refar[LIST_0][j][i]][b8mode][1];
+           }
+           else
+           {
+             enc_picture->mv[LIST_0][j+img->block_y][i+img->block_x][0] = 0;
+             enc_picture->mv[LIST_0][j+img->block_y][i+img->block_x][1] = 0;
+           }
+           if (bframe)
+           {
+             if (b8pdir!=0)
+             {
+               enc_picture->mv[LIST_1][j+img->block_y][i+img->block_x][0] = rdopt->all_mv[j][i][LIST_1][(short)rdopt->refar[LIST_1][j][i]][b8mode][0];
+               enc_picture->mv[LIST_1][j+img->block_y][i+img->block_x][1] = rdopt->all_mv[j][i][LIST_1][(short)rdopt->refar[LIST_1][j][i]][b8mode][1];
+             }
+             else
+             {
+               enc_picture->mv[LIST_1][j+img->block_y][i+img->block_x][0] = 0;
+               enc_picture->mv[LIST_1][j+img->block_y][i+img->block_x][1] = 0;
+             }
+           }
+         }
+     }
+     else
+     {
+       for (j = 0; j < 4; j++)
+         memset(enc_picture->mv[LIST_0][j+img->block_y][img->block_x], 0, 2 * BLOCK_MULTIPLE * sizeof(short));
+       if (bframe)
+       {
+         for (j = 0; j < 4; j++)
+           memset(enc_picture->mv[LIST_1][j+img->block_y][img->block_x], 0, 2 * BLOCK_MULTIPLE * sizeof(short));
+       }
+     }
+   }  
+ }                             // end of copy_rdopt_data
+   
+ static void copy_motion_vectors_MB ()
+ {
+   int i,j,k,l;
+  
+   for (i = 0; i < 4; i++)
+   {
+     for (j = 0; j < 4; j++)
+     {
+       for (k = 0; k < img->max_num_references; k++)
+       {
+         for (l = 0; l < 9; l++)
+         {
+           img->all_mv[j][i][LIST_0][k][l][0] = rdopt->all_mv[j][i][LIST_0][k][l][0];
+           img->all_mv[j][i][LIST_0][k][l][1] = rdopt->all_mv[j][i][LIST_0][k][l][1];
+ 
+           img->all_mv[j][i][LIST_1][k][l][0] = rdopt->all_mv[j][i][LIST_1][k][l][0];
+           img->all_mv[j][i][LIST_1][k][l][1] = rdopt->all_mv[j][i][LIST_1][k][l][1];
+ 
+           img->pred_mv[j][i][LIST_0][k][l][0] = rdopt->pred_mv[j][i][LIST_0][k][l][0];
+           img->pred_mv[j][i][LIST_0][k][l][1] = rdopt->pred_mv[j][i][LIST_0][k][l][1];
+           
+           img->pred_mv[j][i][LIST_1][k][l][0] = rdopt->pred_mv[j][i][LIST_1][k][l][0];
+           img->pred_mv[j][i][LIST_1][k][l][1] = rdopt->pred_mv[j][i][LIST_1][k][l][1];
+         }
+       }
+     }
+   }
+ }
+   
+ 
+ static void ReportNALNonVLCBits(int tmp_time, int me_time)
+ {
+ 
+   //! Need to add type (i.e. SPS, PPS, SEI etc).
+   if (input->Verbose != 0)
+   printf ("%04d(NVB)%8d \n", frame_no, stats->bit_ctr_parametersets_n);
+ }
+ static void ReportFirstframe(int tmp_time,int me_time)
+ {
+   //Rate control
+   int bits;
+ 
+ #if 1  // FIXME: control with a runtime option.
+   tmp_time = me_time = 0;
+ #endif
+ 
+   if (input->Verbose == 1)
+   {
+     printf ("%04d(IDR)%8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+       frame_no, stats->bit_ctr - stats->bit_ctr_n, 
+       img->qp, snr->snr_y, snr->snr_u, snr->snr_v, tmp_time, me_time,
+       img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc); 
+   }
+   else if (input->Verbose == 2)
+   {
+     printf ("%04d(IDR)%8d %1d %2d %7.3f %7.3f %7.3f %9d %7d    %3s %5d   %2d %2d  %d   %d\n",
+       frame_no, stats->bit_ctr - stats->bit_ctr_n,0,
+       img->qp, snr->snr_y, snr->snr_u, snr->snr_v, tmp_time, me_time,
+       img->fld_flag ? "FLD" : "FRM", intras, img->num_ref_idx_l0_active, img->num_ref_idx_l1_active,img->rd_pass, img->nal_reference_idc);
+   }
+   //Rate control
+   if(input->RCEnable)
+   {
+     if((!input->PicInterlace)&&(!input->MbInterlace))
+         bits = stats->bit_ctr-stats->bit_ctr_n; // used for rate control update 
+     else
+     {
+       bits = stats->bit_ctr - Iprev_bits; // used for rate control update 
+       Iprev_bits = stats->bit_ctr;
+     }
+   }
+ 
+   stats->bit_ctr_I = stats->bit_ctr;
+   stats->bit_ctr = 0;
+   
+ }
+ 
+ 
+ static void ReportIntra(int tmp_time, int me_time)
+ {
+ #if 1  // FIXME: control with a runtime option.
+   tmp_time = me_time = 0;
+ #endif
+   if (input->Verbose == 1)
+   {
+    if (img->currentPicture->idr_flag == 1)
+       printf ("%04d(IDR)%8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+       frame_no, stats->bit_ctr - stats->bit_ctr_n, 
+       img->qp, snr->snr_y, snr->snr_u, snr->snr_v, tmp_time, me_time,
+       img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc); 
+     else
+       printf ("%04d(I)  %8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+       frame_no, stats->bit_ctr - stats->bit_ctr_n, 
+       img->qp, snr->snr_y, snr->snr_u, snr->snr_v, tmp_time, me_time,
+       img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc);
+   }
+   else if (input->Verbose == 2)
+   {
+     if (img->currentPicture->idr_flag == 1)
+       printf ("%04d(IDR)%8d %1d %2d %7.3f %7.3f %7.3f %9d %7d    %3s %5d   %2d %2d  %d   %d\n",
+       frame_no, stats->bit_ctr - stats->bit_ctr_n, 0,
+       img->qp, snr->snr_y, snr->snr_u, snr->snr_v, tmp_time, me_time,
+       img->fld_flag ? "FLD" : "FRM", intras, img->num_ref_idx_l0_active, img->num_ref_idx_l1_active,img->rd_pass, img->nal_reference_idc); 
+     else
+       printf ("%04d(I)  %8d %1d %2d %7.3f %7.3f %7.3f %9d %7d    %3s %5d   %2d %2d  %d   %d\n",
+       frame_no, stats->bit_ctr - stats->bit_ctr_n, 0,
+       img->qp, snr->snr_y, snr->snr_u, snr->snr_v, tmp_time, me_time,
+       img->fld_flag ? "FLD" : "FRM", intras, img->num_ref_idx_l0_active, img->num_ref_idx_l1_active,img->rd_pass, img->nal_reference_idc);
+   }
+ }
+ 
+ static void ReportSP(int tmp_time, int me_time)
+ {
+ #if 1  // FIXME: control with a runtime option.
+   tmp_time = me_time = 0;
+ #endif
+   if (input->Verbose == 1)
+   {
+     printf ("%04d(SP) %8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+       frame_no, stats->bit_ctr - stats->bit_ctr_n,  
+       img->qp, snr->snr_y, snr->snr_u, snr->snr_v, tmp_time, me_time,
+       img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc);
+   }
+   else if (input->Verbose == 2)
+   {
+     
+     printf ("%04d(SP) %8d %1d %2d %7.3f %7.3f %7.3f %9d %7d    %3s %5d   %2d %2d  %d   %d\n",
+       frame_no, stats->bit_ctr - stats->bit_ctr_n, active_pps->weighted_pred_flag, 
+       img->qp, snr->snr_y, snr->snr_u, snr->snr_v, tmp_time, me_time,
+       img->fld_flag ? "FLD" : "FRM", intras, img->num_ref_idx_l0_active, img->num_ref_idx_l1_active,img->rd_pass, img->nal_reference_idc);
+   }
+ }
+ 
+ static void ReportB(int tmp_time, int me_time)
+ {
+ #if 1  // FIXME: control with a runtime option.
+   tmp_time = me_time = 0;
+ #endif
+   if (input->Verbose == 1)
+   {
+     printf ("%04d(B)  %8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+     frame_no, stats->bit_ctr - stats->bit_ctr_n,
+     img->qp, snr->snr_y, snr->snr_u, snr->snr_v, tmp_time,me_time,
+     img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc);
+   }
+   else if (input->Verbose == 2)
+   {
+     printf ("%04d(B)  %8d %1d %2d %7.3f %7.3f %7.3f %9d %7d    %3s %5d %1d %2d %2d  %d   %d\n",
+     frame_no, stats->bit_ctr - stats->bit_ctr_n, active_pps->weighted_bipred_idc,
+     img->qp, snr->snr_y, snr->snr_u, snr->snr_v, tmp_time,me_time,
+     img->fld_flag ? "FLD" : "FRM",intras,img->direct_spatial_mv_pred_flag, img->num_ref_idx_l0_active, img->num_ref_idx_l1_active,img->rd_pass, img->nal_reference_idc);
+   }
+ }
+ 
+ 
+ static void ReportP(int tmp_time, int me_time)
+ {            
+ #if 1  // FIXME: control with a runtime option.
+   tmp_time = me_time = 0;
+ #endif
+   if (input->Verbose == 1)
+   {
+     printf ("%04d(P)  %8d   %2d %7.3f %7.3f %7.3f %9d %7d    %3s    %d\n",
+       frame_no, stats->bit_ctr - stats->bit_ctr_n, 
+       img->qp, snr->snr_y, snr->snr_u, snr->snr_v, tmp_time, me_time,
+       img->fld_flag ? "FLD" : "FRM", img->nal_reference_idc);
+   }
+   else if (input->Verbose == 2)
+   {
+     printf ("%04d(P)  %8d %1d %2d %7.3f %7.3f %7.3f %9d %7d    %3s %5d   %2d %2d  %d   %d\n",
+       frame_no, stats->bit_ctr - stats->bit_ctr_n, active_pps->weighted_pred_flag, 
+       img->qp, snr->snr_y, snr->snr_u, snr->snr_v, tmp_time, me_time,
+       img->fld_flag ? "FLD" : "FRM", intras, img->num_ref_idx_l0_active, img->num_ref_idx_l1_active, img->rd_pass, img->nal_reference_idc);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Padding of automatically added border for picture sizes that are not
+  *     multiples of macroblock/macroblock pair size
+  *
+  * \param org_size_x
+  *    original image horizontal size (luma)
+  * \param org_size_y
+  *    original image vertical size (luma)
+  * \param img_size_x
+  *    coded image horizontal size (luma)
+  * \param img_size_y
+  *    code image vertical size (luma)
+  * \param org_size_x_cr
+  *    original image horizontal size (chroma)
+  * \param org_size_y_cr
+  *    original image vertical size (chroma)
+  * \param img_size_x_cr
+  *    coded image horizontal size (chroma)
+  * \param img_size_y_cr
+  *    code image vertical size (chroma)
+  ************************************************************************
+  */
+ static void PaddAutoCropBorders (int org_size_x, int org_size_y, int img_size_x, int img_size_y,
+                                  int org_size_x_cr, int org_size_y_cr, int img_size_x_cr, int img_size_y_cr)
+ {
+   int x, y;
+   
+   //padding right border
+   for (y=0; y<org_size_y; y++)
+     for (x=org_size_x; x<img_size_x; x++)
+       imgY_org_frm [y][x] = imgY_org_frm [y][x-1];
+ 
+   //padding bottom border
+   for (y=org_size_y; y<img_size_y; y++)
+     for (x=0; x<img_size_x; x++)
+       imgY_org_frm [y][x] = imgY_org_frm [y-1][x];
+ 
+ 
+   if (img->yuv_format != YUV400)
+   {
+     //padding right border
+     for (y=0; y<org_size_y_cr; y++)
+       for (x=org_size_x_cr; x<img_size_x_cr; x++)
+       {
+         imgUV_org_frm [0][y][x] = imgUV_org_frm [0][y][x-1];
+         imgUV_org_frm [1][y][x] = imgUV_org_frm [1][y][x-1];
+       }
+      
+     //padding bottom border
+     for (y=org_size_y_cr; y<img_size_y_cr; y++)
+       for (x=0; x<img_size_x_cr; x++)
+       {
+         imgUV_org_frm [0][y][x] = imgUV_org_frm [0][y-1][x];
+         imgUV_org_frm [1][y][x] = imgUV_org_frm [1][y-1][x];
+       }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Calculates the absolute frame number in the source file out
+  *    of various variables in img-> and input->
+  * \return
+  *    frame number in the file to be read
+  * \par side effects
+  *    global variable frame_no updated -- dunno, for what this one is necessary
+  ************************************************************************
+  */
+ static int CalculateFrameNumber()
+ {
+   if (img->b_frame_to_code)
+   {
+     if (input->PyramidCoding)
+       frame_no = start_tr_in_this_IGOP + (IMG_NUMBER - 1) * (input->jumpd + 1) + (int) (img->b_interval * (double) (1 + gop_structure[img->b_frame_to_code - 1].display_no));
+     else      
+       frame_no = start_tr_in_this_IGOP + (IMG_NUMBER - 1) * (input->jumpd + 1) + (int) (img->b_interval * (double) img->b_frame_to_code);
+   }
+   else
+     {
+       frame_no = start_tr_in_this_IGOP + IMG_NUMBER * (input->jumpd + 1);
+ #ifdef _ADAPT_LAST_GROUP_
+       if (input->last_frame && img->number + 1 == input->no_frames)
+         frame_no = input->last_frame;
+ #endif
+     }
+ 
+   return frame_no;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Convert file read buffer to source picture structure
+  ************************************************************************
+  */
+ void buf2img ( imgpel** imgX,           //!< Pointer to image plane
+                unsigned char* buf,      //!< Buffer for file output
+                int size_x,              //!< horizontal size of picture
+                int size_y,              //!< vertical size of picture
+                int symbol_size_in_bytes //!< number of bytes in file used for one pixel
+                )
+ {
+   int i,j;
+ 
+   unsigned short tmp16, ui16;
+   unsigned long  tmp32, ui32;
+ 
+   if (symbol_size_in_bytes> sizeof(imgpel))
+   {
+     error ("Source picture has higher bit depth than imgpel data type. Please recompile with larger data type for imgpel.", 500);
+   }
+ 
+   if (( sizeof(char) == sizeof (imgpel)) && ( sizeof(char) == symbol_size_in_bytes))
+   {
+     // imgpel == pixel_in_file == 1 byte -> simple copy
+     for(j=0;j<size_y;j++)
+       memcpy(imgX[j], buf+j*size_x, size_x);
+   }
+   else
+   {
+     // sizeof (imgpel) > sizeof(char)
+     if (testEndian())
+     {
+       // big endian
+       switch (symbol_size_in_bytes)
+       {
+       case 1:
+         {
+           for(j=0;j<size_y;j++)
+             for(i=0;i<size_x;i++)
+             {
+               imgX[j][i]= buf[i+j*size_x];
+             }
+           break;
+         }
+       case 2:
+         {
+           for(j=0;j<size_y;j++)
+             for(i=0;i<size_x;i++)
+             {
+               memcpy(&tmp16, buf+((i+j*size_x)*2), 2);
+               ui16  = (tmp16 >> 8) | ((tmp16&0xFF)<<8);
+               imgX[j][i] = (imgpel) ui16;
+             }
+           break;
+         }
+       case 4:
+         {
+           for(j=0;j<size_y;j++)
+             for(i=0;i<size_x;i++)
+             {
+               memcpy(&tmp32, buf+((i+j*size_x)*4), 4);
+               ui32  = ((tmp32&0xFF00)<<8) | ((tmp32&0xFF)<<24) | ((tmp32&0xFF0000)>>8) | ((tmp32&0xFF000000)>>24);
+               imgX[j][i] = (imgpel) ui32;
+             }
+         }
+       default:
+         {
+            error ("reading only from formats of 8, 16 or 32 bit allowed on big endian architecture", 500);
+            break;
+         }
+       }
+     }
+     else
+     {
+       // little endian
+       for (j=0; j < size_y; j++)
+         for (i=0; i < size_x; i++)
+         {
+           imgX[j][i]=0;
+           memcpy(&(imgX[j][i]), buf +((i+j*size_x)*symbol_size_in_bytes), symbol_size_in_bytes);
+         }
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Reads one new frame from file
+  *
+  * \param FrameNoInFile
+  *    Frame number in the source file
+  * \param HeaderSize
+  *    Number of bytes in the source file to be skipped
+  * \param xs
+  *    horizontal size of frame in pixels
+  * \param ys
+  *    vertical size of frame in pixels
+  * \param xs_cr
+  *    horizontal chroma size of frame in pixels
+  * \param ys_cr
+  *    vertical chroma size of frame in pixels
+  ************************************************************************
+  */
+ static void ReadOneFrame (int FrameNoInFile, int HeaderSize, int xs, int ys, int xs_cr, int ys_cr)
+ {
+   unsigned int symbol_size_in_bytes = img->pic_unit_size_on_disk/8;
+   
+   const int imgsize_y = xs*ys;
+   const int imgsize_uv = xs_cr*ys_cr;
+ 
+   const int bytes_y = imgsize_y * symbol_size_in_bytes;
+   const int bytes_uv = imgsize_uv * symbol_size_in_bytes;
+ 
+   const int64 framesize_in_bytes = bytes_y + 2*bytes_uv;
+   unsigned char *buf;
+ 
+   Boolean rgb_input = (input->rgb_input_flag==1 && input->yuv_format==3);
+ 
+ 
+   assert (p_in != -1);
+ 
+   // KS: this buffer should actually be allocated only once, but this is still much faster than the previous version
+   if (NULL==(buf = malloc (xs*ys * symbol_size_in_bytes))) no_mem_exit("ReadOneFrame: buf");
+ 
+   // skip Header
+   if (lseek (p_in, HeaderSize, SEEK_SET) != HeaderSize)
+   {
+     error ("ReadOneFrame: cannot fseek to (Header size) in p_in", -1);
+   }
+ 
+   // skip starting frames
+   if (lseek (p_in, framesize_in_bytes * input->start_frame, SEEK_CUR) == -1) 
+   {
+     snprintf(errortext, ET_SIZE, "ReadOneFrame: cannot advance file pointer in p_in beyond frame %d\n", input->start_frame);
+     error (errortext,-1);
+   } 
+ 
+   // seek to current frame 
+   if (lseek (p_in, framesize_in_bytes * FrameNoInFile, SEEK_CUR) == -1) 
+   {
+     snprintf(errortext, ET_SIZE, "ReadOneFrame: cannot advance file pointer in p_in beyond frame %d\n", input->start_frame + FrameNoInFile);
+     error (errortext,-1);
+   }
+ 
+   // Here we are at the correct position for the source frame in the file.  Now
+   // read it.
+   if (img->pic_unit_size_on_disk%8 == 0)
+   {
+     if(rgb_input)
+       lseek (p_in, framesize_in_bytes/3, SEEK_CUR);
+ 
+     if (read(p_in, buf, bytes_y) != bytes_y)
+     {
+       printf ("ReadOneFrame: cannot read %d bytes from input file, unexpected EOF?, exiting", bytes_y);
+       report_stats_on_error();
+       exit (-1);
+     }
+ 
+     buf2img(imgY_org_frm, buf, xs, ys, symbol_size_in_bytes);
+     
+     if (img->yuv_format != YUV400)
+     {
+       if (read(p_in, buf, bytes_uv) != bytes_uv)
+       {
+         printf ("ReadOneFrame: cannot read %d bytes from input file, unexpected EOF?, exiting", bytes_y);
+         report_stats_on_error();
+         exit (-1);
+       }
+       buf2img(imgUV_org_frm[0], buf, xs_cr, ys_cr, symbol_size_in_bytes);
+       
+       if(rgb_input)
+         lseek (p_in, -framesize_in_bytes, SEEK_CUR);
+       
+       if (read(p_in, buf, bytes_uv) != bytes_uv)
+       {
+         printf ("ReadOneFrame: cannot read %d bytes from input file, unexpected EOF?, exiting", bytes_y);
+         report_stats_on_error();
+         exit (-1);
+       }
+       buf2img(imgUV_org_frm[1], buf, xs_cr, ys_cr, symbol_size_in_bytes);
+ 
+       if(rgb_input)
+         lseek (p_in, framesize_in_bytes*2/3, SEEK_CUR);
+     }
+   }
+   else
+   {
+     printf ("ReadOneFrame (NOT IMPLEMENTED): pic unit size on disk must be divided by 8");
+     exit (-1);
+   }
+   free (buf);
+   
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    point to frame coding variables 
+  ************************************************************************
+  */
+ static void put_buffer_frame()
+ {
+   imgY_org  = imgY_org_frm;
+   imgUV_org = imgUV_org_frm;  
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    point to top field coding variables 
+  ************************************************************************
+  */
+ static void put_buffer_top()
+ {
+   img->fld_type = 0;
+ 
+   imgY_org = imgY_org_top;
+   imgUV_org = imgUV_org_top;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    point to bottom field coding variables 
+  ************************************************************************
+  */
+ static void put_buffer_bot()
+ {
+   img->fld_type = 1;
+ 
+   imgY_org = imgY_org_bot;
+   imgUV_org = imgUV_org_bot;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Writes a NAL unit of a partition or slice
+  ************************************************************************
+  */
+ 
+ static void writeUnit(Bitstream* currStream,int partition)
+ {
+   const int buffer_size = 500 + img->FrameSizeInMbs * (128 + 256 * img->bitdepth_luma + 512 * img->bitdepth_chroma);
+                                                           // KS: this is approx. max. allowed code picture size
+   NALU_t *nalu;
+   assert (currStream->bits_to_go == 8);
+   nalu = AllocNALU(buffer_size);
+   nalu->startcodeprefix_len = 2+(img->current_mb_nr == 0?ZEROBYTES_SHORTSTARTCODE+1:ZEROBYTES_SHORTSTARTCODE);
+ //printf ("nalu->startcodeprefix_len %d\n", nalu->startcodeprefix_len);
+   nalu->len = currStream->byte_pos +1;            // add one for the first byte of the NALU
+ //printf ("nalu->len %d\n", nalu->len);
+   memcpy (&nalu->buf[1], currStream->streamBuffer, nalu->len-1);
+   if (img->currentPicture->idr_flag)
+   {
+     nalu->nal_unit_type = NALU_TYPE_IDR;
+     nalu->nal_reference_idc = NALU_PRIORITY_HIGHEST;
+   }
+   else if (img->type == B_SLICE)
+   {
+     //different nal header for different partitions
+     if(input->partition_mode == 0)
+     {
+     nalu->nal_unit_type = NALU_TYPE_SLICE;
+     }
+     else
+     {
+       nalu->nal_unit_type = NALU_TYPE_DPA +  partition;
+     }
+     
+     if (img->nal_reference_idc !=0)
+     {
+       nalu->nal_reference_idc = NALU_PRIORITY_HIGH;
+     }
+     else
+     {
+       nalu->nal_reference_idc = NALU_PRIORITY_DISPOSABLE;
+     }
+   }
+   else   // non-b frame, non IDR slice
+   {
+     //different nal header for different partitions
+     if(input->partition_mode == 0)
+     {
+      nalu->nal_unit_type = NALU_TYPE_SLICE;
+     }
+     else
+     {
+      nalu->nal_unit_type = NALU_TYPE_DPA +  partition;
+     }
+     if (img->nal_reference_idc !=0)
+     {
+       nalu->nal_reference_idc = NALU_PRIORITY_HIGH;
+     }
+     else
+     {
+       nalu->nal_reference_idc = NALU_PRIORITY_DISPOSABLE;
+     }
+   }      
+   nalu->forbidden_bit = 0;
+   stats->bit_ctr += WriteNALU (nalu);
+ 
+   FreeNALU(nalu);
+ }
+               
+ /*!
+  ************************************************************************
+  * \brief
+  *    performs multi-pass encoding of same picture using different 
+  *    coding conditions
+  ************************************************************************
+  */
+ 
+ static void rdPictureCoding()
+ {
+   int second_qp = img->qp, rd_qp = img->qp;
+   int previntras = intras;
+   int prevtype = img->type;
+   int skip_encode = 0;
+   pic_parameter_set_rbsp_t *sec_pps;
+     
+   
+   if (img->type!=I_SLICE && input->GenerateMultiplePPS)
+   {
+     if (img->type==P_SLICE)
+     {
+       if (test_wp_P_slice(0) == 1)
+       {
+         active_pps = PicParSet[1];
+       }
+       else
+       {
+         skip_encode = input->RDPSliceWeightOnly;
+         active_pps = PicParSet[0];
+         if (!img->AdaptiveRounding)
+           img->qp-=1;
+       }
+     }
+     else
+     {
+       active_pps = PicParSet[2];
+     }
+   }
+   else        
+   {
+     if (!img->AdaptiveRounding)
+       img->qp-=1;
+   }
+   
+   sec_pps = active_pps;
+   second_qp = img->qp;
+   
+   img->write_macroblock = 0;
+   
+   if (skip_encode)
+   {
+     img->rd_pass = 0;
+     enc_frame_picture2 = NULL;
+   }
+   else
+   {
+     frame_picture (frame_pic_2,1);
+     img->rd_pass=picture_coding_decision(frame_pic_1, frame_pic_2, rd_qp);
+   }
+   //      update_rd_picture_contexts (img->rd_pass); 
+   if (img->rd_pass==0)
+   {
+     enc_picture=enc_frame_picture;
+     if (img->type!=I_SLICE && input->GenerateMultiplePPS)
+     { 
+       img->qp=rd_qp;
+       active_pps = PicParSet[0];
+     }
+     else       
+     {
+       img->qp=rd_qp;
+     }
+     intras = previntras;
+     frame_pic = frame_pic_1;
+   }
+   else
+   {
+     previntras = intras;
+     frame_pic = frame_pic_2;
+   }
+   // Final Encoding pass - note that we should 
+   // make this more flexible in a later version.
+   
+   if (img->type!=I_SLICE && input->GenerateMultiplePPS)
+   {
+     skip_encode = 0;
+     img->qp    = rd_qp;
+     
+     if (img->type == P_SLICE && input->GenerateMultiplePPS && (intras * 100 )/img->FrameSizeInMbs >=75)
+     {
+       img->type=I_SLICE;
+       active_pps = PicParSet[0];
+     }
+     else if (img->type==P_SLICE)
+     {
+       if (test_wp_P_slice(1) == 1)
+       {
+         active_pps = PicParSet[1];
+       }
+       else if (input->RDPSliceBTest && active_sps->profile_idc != 66)
+       {
+         img->type = B_SLICE;
+         active_pps = PicParSet[0];        
+       }
+       else
+       {
+         skip_encode = input->RDPSliceWeightOnly;
+         active_pps = PicParSet[0];
+         if (!img->AdaptiveRounding)
+           img->qp+=1;
+       }
+     }
+     else
+     {
+       if (test_wp_B_slice(0) == 1)
+       {
+         active_pps = PicParSet[1];
+       }
+       else
+       {
+         skip_encode = input->RDBSliceWeightOnly;
+         img->qp = rd_qp + (img->nal_reference_idc ? - 1 : 1);
+       }      
+     }
+   }
+   else 
+   {
+     active_pps = PicParSet[0];
+     if (!img->AdaptiveRounding)
+       img->qp    = (rd_qp + 1);
+   }
+   
+   
+   img->write_macroblock = 0;
+   
+   if (skip_encode)
+   {
+     enc_frame_picture3 = NULL;
+     img->qp = rd_qp;
+   }
+   else
+   {
+     frame_picture (frame_pic_3,2);
+     
+     if (img->rd_pass==0)
+       img->rd_pass  = 2*picture_coding_decision(frame_pic_1, frame_pic_3, rd_qp);
+     else
+       img->rd_pass +=   picture_coding_decision(frame_pic_2, frame_pic_3, rd_qp);
+   }
+ 
+   //update_rd_picture_contexts (img->rd_pass); 
+   if (img->rd_pass==0)
+   {
+     enc_picture = enc_frame_picture;
+     img->type   = prevtype;
+     active_pps  = PicParSet[0];
+     img->qp     = rd_qp;
+     intras      = previntras;
+   }
+   else if (img->rd_pass==1)
+   {
+     enc_picture = enc_frame_picture2;
+     img->type   = prevtype;
+     active_pps  = sec_pps;
+     img->qp     = second_qp;
+     intras      = previntras;
+   }       
+ }
+  


Index: llvm-test/MultiSource/Applications/JM/lencod/image.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/image.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/image.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,34 ----
+ 
+ /*!
+  ************************************************************************
+  * \file image.h
+  *
+  * \brief
+  *    headers for image processing
+  *
+  * \author
+  *  Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+  *  Copyright (C) 1999  Telenor Satellite Services, Norway
+  ************************************************************************
+  */
+ #ifndef _IMAGE_H_
+ #define _IMAGE_H_
+ 
+ #include "mbuffer.h"
+ 
+ extern StorablePicture *enc_picture;
+ extern StorablePicture *enc_frame_picture;
+ extern StorablePicture *enc_frame_picture2;
+ extern StorablePicture *enc_frame_picture3;
+ extern StorablePicture *enc_top_picture;
+ extern StorablePicture *enc_bottom_picture;
+ 
+ int encode_one_frame ();
+ void report_frame_statistic();
+ Boolean dummy_slice_too_big(int bits_slice);
+ void copy_rdopt_data (int field_type);       // For MB level field/frame coding tools
+ 
+ void UnifiedOneForthPix (StorablePicture *s);
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/intrarefresh.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/intrarefresh.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/intrarefresh.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,136 ----
+ 
+ /*!
+  *****************************************************************************
+  *
+  * \file intrarefresh.c
+  *
+  * \brief
+  *    Encoder support for pseudo-random intra macroblock refresh
+  *
+  * \date
+  *    16 June 2002
+  *
+  * \author
+  *    Stephan Wenger   stewe at cs.tu-berlin.de
+  *****************************************************************************/
+ 
+ #include <stdlib.h>
+ #include <assert.h>
+ 
+ #include "global.h"
+ 
+ static int *RefreshPattern;
+ static int *IntraMBs;
+ static int WalkAround = 0;
+ static int NumberOfMBs = 0;
+ static int NumberIntraPerPicture;
+  
+ /*!
+  ************************************************************************
+  * \brief
+  *    RandomIntraInit: Initializes Random Intra module.  Should be called
+  *    only after initialization (or changes) of the picture size or the
+  *    random intra refresh value.  In version jm2.1 it is impossible to
+  *    change those values on-the-fly, hence RandomIntraInit should be
+  *    called immediately after the parsing of the config file
+  *
+  * \par Input:
+  *    xsize, ysize: size of the picture (in MBs)
+  *    refresh     : refresh rate in MBs per picture
+  ************************************************************************
+  */
+ 
+ void RandomIntraInit(int xsize, int ysize, int refresh)
+ {
+   int i, pos;
+ 
+   srand (1);      // A fixed random initializer to make things reproducible
+   NumberOfMBs = xsize * ysize;
+   NumberIntraPerPicture = refresh;
+ 
+   if (refresh != 0) 
+   { 
+     RefreshPattern = malloc (sizeof (int) * NumberOfMBs);
+     if (RefreshPattern == NULL) no_mem_exit("RandomIntraInit: RefreshPattern");
+     
+     IntraMBs = malloc (sizeof (int) * refresh);
+     if (IntraMBs == NULL) no_mem_exit("RandomIntraInit: IntraMBs");
+     
+     for (i= 0; i<NumberOfMBs; i++)
+       RefreshPattern[i] = -1;
+     
+     for (i=0; i<NumberOfMBs; i++)
+     {
+       do
+       {
+         pos = rand() % NumberOfMBs;
+       } while (RefreshPattern [pos] != -1);
+       RefreshPattern [pos] = i;
+     }
+     /*
+     for (i=0; i<NumberOfMBs; i++) printf ("%d\t", RefreshPattern[i]);
+     getchar();
+     */
+   }
+   else
+   {
+     RefreshPattern = NULL;
+     IntraMBs = NULL;
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    RandomIntra: Code an MB as Intra?
+  *
+  * \par Input
+  *    MacroblockNumberInScanOrder
+  * \par Output
+  *    1 if an MB should be forced to Intra, according the the 
+  *      RefreshPattern
+  *    0 otherwise
+  *
+  ************************************************************************
+  */
+ 
+ int RandomIntra (int mb)
+ {
+   int i;
+ 
+   for (i=0; i<NumberIntraPerPicture; i++)
+     if (IntraMBs[i] == mb)
+       return 1;
+   return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    RandomIntraNewPicture: Selects new set of MBs for forced Intra
+  *
+  * \par
+  *    This function should be called exactly once per picture, and 
+  *    requires a finished initialization 
+  *
+  ************************************************************************
+  */
+ 
+ void RandomIntraNewPicture ()
+ {
+   int i, j;
+ 
+   WalkAround += NumberIntraPerPicture;
+   for (j=0,i=WalkAround; j<NumberIntraPerPicture; j++, i++)
+     IntraMBs[j] = RefreshPattern [i%NumberOfMBs];
+ }
+ 
+ void RandomIntraUninit()
+ {
+   if (NumberIntraPerPicture >0 )
+   {
+     free(RefreshPattern);
+     free(IntraMBs);
+   }
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/intrarefresh.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/intrarefresh.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/intrarefresh.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,26 ----
+ 
+ /*!
+  ***************************************************************************
+  *
+  * \file intrarefresh.h
+  *
+  * \brief
+  *    Pseudo-Raqndom Intra macroblock refresh support
+  *
+  * \date
+  *    16 June 2002
+  *
+  * \author
+  *    Stephan Wenger   stewe at cs.tu-berlin.de
+  **************************************************************************/
+ 
+ #ifndef _INTRAREFRESH_H_
+ #define _INTRAREFRESH_H_
+ 
+ void RandomIntraInit(int xsize, int ysize, int refresh);
+ void RandomIntraUninit();
+ int RandomIntra (int mb);   //! returns 1 for MBs that need forced Intra
+ void RandomIntraNewPicture ();  //! to be called once per picture  
+ 
+ 
+ #endif //_INTRAREFRESH_H_


Index: llvm-test/MultiSource/Applications/JM/lencod/leaky_bucket.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/leaky_bucket.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/leaky_bucket.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,296 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file leaky_bucket.c
+  *
+  * \brief
+  *    calculate Leaky Buffer parameters
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Shankar Regunathan                   <shanre at microsoft.com>
+  ***************************************************************************
+  */
+ #include <stdlib.h>
+ 
+ #include "contributors.h"
+ #include "global.h"
+ 
+ #ifdef _LEAKYBUCKET_
+ 
+ long Bit_Buffer[10000];
+ unsigned long total_frame_buffer = 0;
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *   Function to get Leaky Bucket rates from rate file
+  * \param NumberLeakyBuckets
+  *    Number of Leaky Bucket Parameters
+  * \param Rmin
+  *    Rate values for each Bucket.
+  * \return
+  *    returns 1 if successful; else returns zero.
+  * \para SideEffects
+  *     None.
+  * \para Notes
+  *     Failure if LeakyBucketRate is missing or if it does not have
+  *     the correct number of entries.
+  * \author
+  *    Shankar Regunathan                   shanre at microsoft.com
+  *  \date 
+  *      December 06, 2001.
+  ***********************************************************************
+  */
+ 
+ int get_LeakyBucketRate(unsigned long NumberLeakyBuckets, unsigned long *Rmin)
+ {
+   FILE *f;
+   unsigned long i, buf;
+   
+   if((f = fopen(input->LeakyBucketRateFile, "r")) == NULL)
+   {
+     printf(" LeakyBucketRate File does not exist. Using rate calculated from avg. rate \n");
+     return 0;
+   }
+   
+   for(i=0; i<NumberLeakyBuckets; i++) 
+   {
+     if(1 != fscanf(f, "%ld", &buf)) 
+     {
+       printf(" Leaky BucketRateFile does not have valid entries.\n Using rate calculated from avg. rate \n");
+       fclose (f);
+       return 0;
+     }
+     Rmin[i] = buf;
+   }
+   fclose (f);
+   return 1;
+ }
+ /*!
+  ***********************************************************************
+  * \brief
+  *   Writes one unsigned long word in big endian order to a file. 
+  * \param dw
+  *    Value to be written
+  * \param fp
+  *    File pointer
+  * \return
+  *    None.
+  * \para SideEffects
+  *     None.
+  * \author
+  *    Shankar Regunathan                   shanre at microsoft.com
+  *  \date 
+  *      December 06, 2001.
+  ***********************************************************************
+  */
+ 
+ void PutBigDoubleWord(unsigned long dw, FILE *fp)
+ {
+   fputc((dw >> 0x18) & 0xFF, fp);
+   fputc((dw >> 0x10) & 0xFF, fp);
+   fputc((dw >> 0x08) & 0xFF, fp);
+   fputc(dw & 0xFF, fp);
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *   Stores the Leaky BucketParameters in file input->LeakyBucketParamFile.
+  * \param NumberLeakyBuckets
+  *    Number of LeakyBuckets.
+  * \param Rmin
+  *    Rate values of the buckets.
+  * \param Bmin
+  *    Minimum buffer values of the buckets.
+  *  \param Fmin
+  *     Minimum initial buffer fullness of the buckets
+  * \return
+  *    None.
+  * \para
+  *    Returns error if LeakyBucketParamFile cannot be opened.
+  * \para SideEffects
+  *     Prints the LeakyBucket Parameters in standard output.
+  * \author
+  *    Shankar Regunathan                   shanre at microsoft.com
+  *  \date 
+  *      December 06, 2001.
+  ***********************************************************************
+  */
+ 
+ 
+ void write_buffer(unsigned long NumberLeakyBuckets, unsigned long Rmin[], unsigned long Bmin[], unsigned long Fmin[])
+ {
+   FILE *outf;
+   unsigned long iBucket;
+   
+   if ((outf=fopen(input->LeakyBucketParamFile,"wb"))==NULL)
+   {
+     snprintf(errortext, ET_SIZE, "Error open file lk %s  \n",input->LeakyBucketParamFile);
+     error(errortext,1);
+   }
+   
+   PutBigDoubleWord(NumberLeakyBuckets, outf);
+   if (input->Verbose != 0)
+     printf(" Number Leaky Buckets: %ld \n     Rmin     Bmin     Fmin \n", NumberLeakyBuckets);
+   for(iBucket =0; iBucket < NumberLeakyBuckets; iBucket++) 
+   {
+     //assert(Rmin[iBucket]<4294967296); //Overflow should be corrected already.
+     //assert(Bmin[iBucket]<4294967296);
+     //assert(Fmin[iBucket]<4294967296);
+     PutBigDoubleWord(Rmin[iBucket], outf);
+     PutBigDoubleWord(Bmin[iBucket], outf);
+     PutBigDoubleWord(Fmin[iBucket], outf);
+     if (input->Verbose != 0)
+       printf(" %8ld %8ld %8ld \n", Rmin[iBucket], Bmin[iBucket], Fmin[iBucket]);
+   }
+   fclose(outf);
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Sorts the rate array in ascending order. 
+  * \param NumberLeakyBuckets
+  *    Number of LeakyBuckets.
+  * \param Rmin
+  *    Rate values of the buckets.
+  * \return
+  *    None.
+  * \author
+  *    Shankar Regunathan                   shanre at microsoft.com
+  *  \date 
+  *      December 06, 2001.
+  ***********************************************************************
+  */
+ 
+ 
+ void Sort(unsigned long NumberLeakyBuckets, unsigned long *Rmin)
+ {
+   unsigned long i, j;
+   unsigned long temp;
+   for(i=0; i< NumberLeakyBuckets-1; i++) 
+   {
+     for(j=i+1; j<NumberLeakyBuckets; j++) 
+     {
+       if(Rmin[i] > Rmin[j]) {
+         temp = Rmin[i];
+         Rmin[i] = Rmin[j];
+         Rmin[j] = temp;
+       }
+     }
+   }
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Main Routine to calculate Leaky Buffer parameters
+  * \param NumberLeakyBuckets
+  *    None.
+  * \return
+  *    None.
+  * \author
+  *    Shankar Regunathan                   shanre at microsoft.com
+  *  \date 
+  *      December 06, 2001.
+  ***********************************************************************
+  */
+ 
+ void calc_buffer()
+ {    
+   unsigned long AvgRate, TotalRate, NumberLeakyBuckets;
+   long *buffer_frame, minB;
+   unsigned long iBucket, iFrame,  FrameIndex = 0;
+   long maxBuffer, actualBuffer, InitFullness, iChannelRate;
+   unsigned long *Rmin, *Bmin, *Fmin;
+    
+   fprintf(stdout,"-------------------------------------------------------------------------------\n");
+   printf(" Total Frames:  %ld (%d) \n", total_frame_buffer, input->no_frames);
+   NumberLeakyBuckets = (unsigned long) input->NumberLeakyBuckets;
+   buffer_frame = calloc(total_frame_buffer+1, sizeof(long));
+   if(!buffer_frame)
+     no_mem_exit("init_buffer: buffer_frame");
+   Rmin = calloc(NumberLeakyBuckets, sizeof(unsigned long));
+   if(!Rmin)
+     no_mem_exit("init_buffer: Rmin");    
+   Bmin = calloc(NumberLeakyBuckets, sizeof(unsigned long));
+   if(!Bmin)
+     no_mem_exit("init_buffer: Bmin");
+   Fmin = calloc(NumberLeakyBuckets, sizeof(unsigned long));
+   if(!Fmin)
+     no_mem_exit("init_buffer: Fmin");
+ 
+   TotalRate = 0;
+   for(iFrame=0; iFrame < total_frame_buffer; iFrame++) 
+   {
+     TotalRate += (unsigned long) Bit_Buffer[iFrame];
+   }
+   AvgRate = (unsigned long) ((float) TotalRate/ total_frame_buffer);
+   
+   if(1 != get_LeakyBucketRate(NumberLeakyBuckets, Rmin))
+   { /* if rate file is not present, use default calculated from avg.rate */
+     for(iBucket=0; iBucket < NumberLeakyBuckets; iBucket++) 
+     {
+       if(iBucket == 0)
+         Rmin[iBucket] = (unsigned long)((float) AvgRate * img->framerate)/(input->jumpd+1); /* convert bits/frame to bits/second */
+       else
+         Rmin[iBucket] = (unsigned long) ((float) Rmin[iBucket-1] + (AvgRate/4) * (img->framerate) / (input->jumpd+1));    
+     }
+   }
+   Sort(NumberLeakyBuckets, Rmin);   
+ 
+   maxBuffer = AvgRate * 20; /* any initialization is good. */        
+   for(iBucket=0; iBucket< NumberLeakyBuckets; iBucket++) 
+   {           
+     iChannelRate = (long) (Rmin[iBucket] * (input->jumpd+1)/(img->framerate)); /* converts bits/second to bits/frame */
+     /* To calculate initial buffer size */
+     InitFullness = maxBuffer; /* set Initial Fullness to be buffer size */
+     buffer_frame[0] = InitFullness;
+     minB = maxBuffer; 
+     
+     for(iFrame=0; iFrame<total_frame_buffer ; iFrame++) 
+     {        
+       buffer_frame[iFrame] = buffer_frame[iFrame] - Bit_Buffer[iFrame];
+       if(buffer_frame[iFrame] < minB) 
+       {
+         minB = buffer_frame[iFrame];
+         FrameIndex = iFrame;
+       }
+       
+       buffer_frame[iFrame+1] = buffer_frame[iFrame] + iChannelRate;
+       if(buffer_frame[iFrame+1] > maxBuffer)
+         buffer_frame[iFrame+1] = maxBuffer;
+     }
+     actualBuffer = (maxBuffer - minB);
+ 
+     /* To calculate initial buffer Fullness */
+     InitFullness = Bit_Buffer[0];
+     buffer_frame[0] = InitFullness;
+     for(iFrame=0; iFrame < FrameIndex+1; iFrame++) 
+     {
+       buffer_frame[iFrame] = buffer_frame[iFrame] - Bit_Buffer[iFrame];
+       if(buffer_frame[iFrame] < 0) {
+         InitFullness -= buffer_frame[iFrame];
+         buffer_frame[iFrame] = 0;
+       }
+       buffer_frame[iFrame+1] = buffer_frame[iFrame] + iChannelRate;
+       if(buffer_frame[iFrame+1] > actualBuffer)
+         break;
+     }       
+     Bmin[iBucket] = (unsigned long) actualBuffer;
+     Fmin[iBucket] = (unsigned long) InitFullness;
+   }
+ 
+   write_buffer(NumberLeakyBuckets, Rmin, Bmin, Fmin);
+ 
+   free(buffer_frame);
+   free(Rmin);
+   free(Bmin);
+   free(Fmin);
+   return;
+ }
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/leaky_bucket.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/leaky_bucket.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/leaky_bucket.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,29 ----
+ 
+ /*!
+  ***************************************************************************
+  *
+  * \file leaky_bucket.h
+  *
+  * \brief
+  *    Header for Leaky Buffer parameters
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Shankar Regunathan                   <shanre at microsoft.com>
+  **************************************************************************/
+ 
+ #ifndef _LEAKY_BUCKET_H_
+ #define _LEAKY_BUCKET_H_
+ 
+ 
+ /* Leaky Bucket Parameter Optimization */
+ #ifdef _LEAKYBUCKET_
+ int get_LeakyBucketRate(unsigned long NumberLeakyBuckets, unsigned long *Rmin);
+ void PutBigDoubleWord(unsigned long dw, FILE *fp);
+ void write_buffer(unsigned long NumberLeakyBuckets, unsigned long Rmin[], unsigned long Bmin[], unsigned long Fmin[]);
+ void Sort(unsigned long NumberLeakyBuckets, unsigned long *Rmin);
+ void calc_buffer();
+ #endif
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/lencod.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/lencod.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/lencod.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,2421 ----
+ 
+ /*!
+  ***********************************************************************
+  *  \mainpage
+  *     This is the H.264/AVC encoder reference software. For detailed documentation
+  *     see the comments in each file.
+  *
+  *  \author
+  *     The main contributors are listed in contributors.h
+  *
+  *  \version
+  *     JM 10.1 (FRExt)
+  *
+  *  \note
+  *     tags are used for document system "doxygen"
+  *     available at http://www.doxygen.org
+  */
+ /*!
+  *  \file
+  *     lencod.c
+  *  \brief
+  *     H.264/AVC reference encoder project main()
+  *  \author
+  *   Main contributors (see contributors.h for copyright, address and affiliation details)
+  *   - Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+  *   - Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+  *   - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+  *   - Jani Lainema                    <jani.lainema at nokia.com>
+  *   - Byeong-Moon Jeon                <jeonbm at lge.com>
+  *   - Yoon-Seong Soh                  <yunsung at lge.com>
+  *   - Thomas Stockhammer              <stockhammer at ei.tum.de>
+  *   - Detlev Marpe                    <marpe at hhi.de>
+  *   - Guido Heising                   <heising at hhi.de>
+  *   - Valeri George                   <george at hhi.de>
+  *   - Karsten Suehring                <suehring at hhi.de>
+  *   - Alexis Michael Tourapis         <alexismt at ieee.org>
+  ***********************************************************************
+  */
+ 
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ #include <string.h>
+ #include <time.h>
+ #include <math.h>
+ #include <sys/timeb.h>
+ 
+ #ifdef WIN32
+ #include <io.h>
+ #else
+ #include <unistd.h>
+ #endif
+ 
+ #include "global.h"
+ 
+ #include "configfile.h"
+ #include "leaky_bucket.h"
+ #include "memalloc.h"
+ #include "intrarefresh.h"
+ #include "fmo.h"
+ #include "sei.h"
+ #include "parset.h"
+ #include "image.h"
+ #include "output.h"
+ #include "fast_me.h"
+ #include "simplified_fast_me.h"
+ #include "ratectl.h"
+ #include "explicit_gop.h"
+ #include "epzs.h"
+ 
+ #define JM      "10 (FRExt)"
+ #define VERSION "10.1"
+ #define EXT_VERSION "(FRExt)"
+ 
+ InputParameters inputs,      *input = &inputs;
+ ImageParameters images,      *img   = &images;
+ StatParameters  statistics,  *stats = &statistics;
+ SNRParameters   snrs,        *snr   = &snrs;
+ Decoders decoders, *decs=&decoders;
+ 
+ 
+ #ifdef _ADAPT_LAST_GROUP_
+ int initial_Bframes = 0;
+ #endif
+ 
+ Boolean In2ndIGOP = FALSE;
+ int    start_frame_no_in_this_IGOP = 0;
+ int    start_tr_in_this_IGOP = 0;
+ int    FirstFrameIn2ndIGOP=0;
+ int    cabac_encoding = 0;
+ int    frame_statistic_start;
+ extern ColocatedParams *Co_located;
+ 
+ void Init_Motion_Search_Module ();
+ void Clear_Motion_Search_Module ();
+ void report_frame_statistic();
+ void SetLevelIndices();
+ 
+ void init_stats()
+ {
+   stats->successive_Bframe = input->successive_Bframe;
+   stats->bit_ctr_I = 0;
+   stats->bit_ctr_P = 0;
+   stats->bit_ctr_B = 0;
+   snr->snr_y = 0.0;
+   snr->snr_u = 0.0;
+   snr->snr_v = 0.0;
+   snr->snr_y1 = 0.0;
+   snr->snr_u1 = 0.0;
+   snr->snr_v1 = 0.0;
+   snr->snr_ya = 0.0;
+   snr->snr_ua = 0.0;
+   snr->snr_va = 0.0;
+   snr->sse_y  = 0.0;
+   snr->sse_u  = 0.0;
+   snr->sse_v  = 0.0;
+   snr->msse_y = 0.0;
+   snr->msse_u = 0.0;
+   snr->msse_v = 0.0;
+   snr->frame_ctr = 0;
+ }
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Main function for encoder.
+  * \param argc
+  *    number of command line arguments
+  * \param argv
+  *    command line arguments
+  * \return
+  *    exit code
+  ***********************************************************************
+  */
+ int main(int argc,char **argv)
+ {
+   int M,N,n,np,nb;           //Rate control
+   int primary_disp = 0;
+ 
+   p_dec = p_in = -1;
+ 
+   p_stat = p_log = p_trace = NULL;
+ 
+   frame_statistic_start = 1;
+ 
+   Configure (argc, argv);
+ 
+   Init_QMatrix();
+ 
+   Init_QOffsetMatrix();
+ 
+   AllocNalPayloadBuffer();
+ 
+   init_poc();
+   GenerateParameterSets();
+   SetLevelIndices();
+   
+   init_img();
+   frame_pic_1= malloc_picture();
+ 
+   if (input->RDPictureDecision)
+   {
+     frame_pic_2 = malloc_picture();
+     frame_pic_3 = malloc_picture();
+   }
+ 
+   if (input->PicInterlace != FRAME_CODING)
+   {
+     top_pic = malloc_picture();
+     bottom_pic = malloc_picture();
+   }
+   init_rdopt ();
+ 
+   if (input->PyramidCoding )
+   {
+     init_gop_structure();
+     if (input->PyramidCoding == 3)
+     {
+       interpret_gop_structure();
+     }
+     else
+     {
+       create_pyramid();
+     }
+   }  
+ 
+   dpb.init_done = 0;
+   init_dpb(input);
+   init_out_buffer();
+   init_stats();
+ 
+ 
+   enc_picture = enc_frame_picture = enc_top_picture = enc_bottom_picture = NULL;
+ 
+   init_global_buffers();
+ 
+   create_context_memory ();
+ 
+   Init_Motion_Search_Module ();
+ 
+   information_init();
+ 
+   //Rate control 
+   if(input->RCEnable)
+     rc_init_seq();
+ 
+   if(input->FMEnable == 1)
+     DefineThreshold();
+ 
+   // Init frame type counter. Only supports single slice per frame.
+   memset(frame_ctr, 0, 5 * sizeof(int));
+ 
+   img->last_valid_reference = 0;
+   tot_time=0;                 // time for total encoding session
+ 
+ #ifdef _ADAPT_LAST_GROUP_
+   if (input->last_frame > 0)
+     input->no_frames = 1 + (input->last_frame + input->jumpd) / (input->jumpd + 1);
+   initial_Bframes = input->successive_Bframe;
+ #endif
+ 
+   PatchInputNoFrames();
+ 
+   // Write sequence header (with parameter sets)
+   stats->bit_ctr_parametersets = 0;
+   stats->bit_slice = start_sequence();
+   stats->bit_ctr_parametersets += stats->bit_ctr_parametersets_n;
+   start_frame_no_in_this_IGOP = 0;
+ 
+   for (img->number=0; img->number < input->no_frames; img->number++)
+   {
+     //img->nal_reference_idc = 1;
+     if (input->intra_period)
+       img->nal_reference_idc = ((IMG_NUMBER % input->intra_period) && input->DisposableP) ? (img->number + 1)% 2 : 1;
+     else
+       img->nal_reference_idc = (img->number && input->DisposableP) ? (img->number + 1)% 2 : 1;
+ 
+     //much of this can go in init_frame() or init_field()?
+     //poc for this frame or field
+     img->toppoc = (input->intra_period && input->idr_enable ? IMG_NUMBER % input->intra_period : IMG_NUMBER) * (2*(input->jumpd+1)); 
+ 
+     if ((input->PicInterlace==FRAME_CODING)&&(input->MbInterlace==FRAME_CODING))
+       img->bottompoc = img->toppoc;     //progressive
+     else 
+       img->bottompoc = img->toppoc+1;   //hard coded
+ 
+     img->framepoc = min (img->toppoc, img->bottompoc);
+ 
+     //frame_num for this frame
+     //if (input->BRefPictures== 0 || input->successive_Bframe == 0 || img-> number < 2)
+     if ((input->BRefPictures != 1 &&  input->PyramidCoding == 0) || input->successive_Bframe == 0 || img-> number < 2)// ||  input->PyramidCoding == 0)
+     {
+       if (input->intra_period && input->idr_enable)
+       {
+         img->frame_num =  ((IMG_NUMBER - primary_disp)  % input->intra_period ) % (1 << (log2_max_frame_num_minus4 + 4)); 
+         if (IMG_NUMBER % input->intra_period  == 0)
+         {
+           img->frame_num = 0;
+           primary_disp   = 0;
+         }
+       }
+       else
+       img->frame_num = (IMG_NUMBER - primary_disp) % (1 << (log2_max_frame_num_minus4 + 4)); 
+ 
+     }
+     else 
+     {
+       //img->frame_num ++;
+       if (input->intra_period && input->idr_enable)
+       {
+         if (0== (img->number % input->intra_period))
+         {
+           img->frame_num=0;
+           primary_disp   = 0;
+         }
+       }
+       img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4)); 
+     }
+     
+     //the following is sent in the slice header
+     img->delta_pic_order_cnt[0]=0;
+ 
+     if (input->BRefPictures == 1)
+     {
+       if (img->number)
+       {
+         img->delta_pic_order_cnt[0]=+2 * input->successive_Bframe;
+       }
+     }
+ 
+     SetImgType();
+ 
+ #ifdef _ADAPT_LAST_GROUP_
+     if (input->successive_Bframe && input->last_frame && IMG_NUMBER+1 == input->no_frames)
+     {                                           
+       int bi = (int)((float)(input->jumpd+1)/(input->successive_Bframe+1.0)+0.499999);
+       
+       input->successive_Bframe = (input->last_frame-(img->number-1)*(input->jumpd+1))/bi-1;
+ 
+       //about to code the last ref frame, adjust delta poc         
+       img->delta_pic_order_cnt[0]= -2*(initial_Bframes - input->successive_Bframe);
+       img->toppoc += img->delta_pic_order_cnt[0];
+       img->bottompoc += img->delta_pic_order_cnt[0];
+       img->framepoc = min (img->toppoc, img->bottompoc);
+     }
+ #endif
+ 
+      //Rate control
+     if (img->type == I_SLICE)
+     {
+       if(input->RCEnable)
+       {
+         if (input->intra_period == 0)
+         {
+           n = input->no_frames + (input->no_frames - 1) * input->successive_Bframe;
+           
+           /* number of P frames */
+           np = input->no_frames-1; 
+           
+           /* number of B frames */
+           nb = (input->no_frames - 1) * input->successive_Bframe;
+         }else
+         {
+           N = input->intra_period*(input->successive_Bframe+1);
+           M = input->successive_Bframe+1;
+           n = (img->number==0) ? N - ( M - 1) : N;
+           
+           /* last GOP may contain less frames */
+           if(img->number/input->intra_period >= input->no_frames / input->intra_period)
+           {
+             if (img->number != 0)
+               n = (input->no_frames - img->number) + (input->no_frames - img->number - 1) * input->successive_Bframe + input->successive_Bframe;
+             else
+               n = input->no_frames  + (input->no_frames - 1) * input->successive_Bframe;
+           }
+           
+           /* number of P frames */
+           if (img->number == 0)
+             np = (n + 2 * (M - 1)) / M - 1; /* first GOP */
+           else
+             np = (n + (M - 1)) / M - 1;
+           
+           /* number of B frames */
+           nb = n - np - 1;
+         }
+         rc_init_GOP(np,nb);
+       }
+     }
+ 
+ 
+     // which layer the image belonged to?
+     if ( IMG_NUMBER % (input->NumFramesInELSubSeq+1) == 0 )
+       img->layer = 0;
+     else
+       img->layer = 1;
+ 
+     encode_one_frame(); // encode one I- or P-frame
+     if (img->type == I_SLICE && input->EnableOpenGOP)
+       img->last_valid_reference = img->ThisPOC;
+ 
+     if (input->ReportFrameStats)
+       report_frame_statistic();
+     
+     if (img->nal_reference_idc == 0)
+     {
+             primary_disp ++;
+             img->frame_num -= 1;
+             img->frame_num %= (1 << (log2_max_frame_num_minus4 + 4));
+     }    
+     encode_enhancement_layer();
+     
+     process_2nd_IGOP();
+   }
+   // terminate sequence
+   terminate_sequence();
+ 
+   flush_dpb();
+ 
+   close(p_in);
+   if (-1!=p_dec)
+     close(p_dec);
+   if (p_trace)
+     fclose(p_trace);
+ 
+   Clear_Motion_Search_Module ();
+ 
+   RandomIntraUninit();
+   FmoUninit();
+   
+   if (input->PyramidCoding)
+     clear_gop_structure ();
+ 
+   // free structure for rd-opt. mode decision
+   clear_rdopt ();
+ 
+ #ifdef _LEAKYBUCKET_
+   calc_buffer();
+ #endif
+ 
+   // report everything
+   report();
+ 
+   free_picture (frame_pic_1);
+   
+   if (input->RDPictureDecision)
+   {
+     free_picture (frame_pic_2);
+     free_picture (frame_pic_3);
+   }
+ 
+   if (top_pic)
+     free_picture (top_pic);
+   if (bottom_pic)
+     free_picture (bottom_pic);
+ 
+   free_dpb();
+   free_colocated(Co_located);
+   uninit_out_buffer();
+ 
+   free_global_buffers();
+ 
+   // free image mem
+   free_img ();
+   free_context_memory ();
+   FreeNalPayloadBuffer();
+   FreeParameterSets();
+   return 0;                         //encode JM73_FME version
+ }
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Terminates and reports statistics on error.
+  * 
+  ***********************************************************************
+  */
+ void report_stats_on_error()
+ {
+   input->no_frames=img->number-1;
+   terminate_sequence();
+ 
+   flush_dpb();
+   
+   close(p_in);
+   if (-1!=p_dec)
+     close(p_dec);
+ 
+   if (p_trace)
+     fclose(p_trace);
+   
+   Clear_Motion_Search_Module ();
+   
+   RandomIntraUninit();
+   FmoUninit();
+   
+   if (input->PyramidCoding)
+     clear_gop_structure ();
+   
+   // free structure for rd-opt. mode decision
+   clear_rdopt ();
+   
+ #ifdef _LEAKYBUCKET_
+   calc_buffer();
+ #endif
+ 
+   if (input->ReportFrameStats)
+     report_frame_statistic();
+   
+   // report everything
+   report();
+   
+   free_picture (frame_pic_1);
+   if (top_pic)
+     free_picture (top_pic);
+   if (bottom_pic)
+     free_picture (bottom_pic);
+   
+   free_dpb();
+   free_colocated(Co_located);
+   uninit_out_buffer();
+   
+   free_global_buffers();
+   
+   // free image mem
+   free_img ();
+   free_context_memory ();
+   FreeNalPayloadBuffer();
+   FreeParameterSets();
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Initializes the POC structure with appropriate parameters.
+  * 
+  ***********************************************************************
+  */
+ void init_poc()
+ {
+   //the following should probably go in sequence parameters
+   // frame poc's increase by 2, field poc's by 1
+ 
+   img->pic_order_cnt_type=input->pic_order_cnt_type;
+ 
+   img->delta_pic_order_always_zero_flag=0;
+   img->num_ref_frames_in_pic_order_cnt_cycle= 1;
+ 
+   if (input->BRefPictures == 1)
+   {
+     img->offset_for_non_ref_pic  =   0;
+     img->offset_for_ref_frame[0] =   2;
+   }
+   else
+   {
+     img->offset_for_non_ref_pic  =  -2*(input->successive_Bframe);
+     img->offset_for_ref_frame[0] =   2*(input->successive_Bframe+1);
+   }
+ 
+   if ((input->PicInterlace==FRAME_CODING)&&(input->MbInterlace==FRAME_CODING))
+     img->offset_for_top_to_bottom_field=0;
+   else    
+     img->offset_for_top_to_bottom_field=1;
+ 
+   if ((input->PicInterlace==FRAME_CODING)&&(input->MbInterlace==FRAME_CODING))
+   {
+     img->pic_order_present_flag=0;
+     img->delta_pic_order_cnt_bottom = 0;
+   }
+   else    
+   {
+     img->pic_order_present_flag=1;
+     img->delta_pic_order_cnt_bottom = 1;
+   }
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Initializes the img->nz_coeff
+  * \par Input:
+  *    none
+  * \par  Output:
+  *    none
+  * \ side effects
+  *    sets omg->nz_coef[][][][] to -1
+  ***********************************************************************
+  */
+ void CAVLC_init()
+ {
+   unsigned int i, k, l;
+ 
+   for (i=0;i < img->PicSizeInMbs; i++)
+     for (k=0;k<4;k++)
+       for (l=0;l < (4 + (unsigned int)img->num_blk8x8_uv);l++)
+         img->nz_coeff[i][k][l]=0;
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Initializes the Image structure with appropriate parameters.
+  * \par Input:
+  *    Input Parameters struct inp_par *inp
+  * \par  Output:
+  *    Image Parameters struct img_par *img
+  ***********************************************************************
+  */
+ void init_img()
+ {
+   int i,j;
+   int byte_abs_range;
+ 
+   static int mb_width_cr[4] = {0,8, 8,16};
+   static int mb_height_cr[4]= {0,8,16,16};
+ 
+   img->yuv_format = input->yuv_format;
+ 
+   //pel bitdepth init
+   img->bitdepth_luma   = input->BitDepthLuma;
+   
+   img->bitdepth_luma_qp_scale   = 6*(img->bitdepth_luma   - 8);
+   img->bitdepth_lambda_scale    = 2*(img->bitdepth_luma   - 8);
+ 
+   img->dc_pred_value = 1<<(img->bitdepth_luma - 1);
+   img->max_imgpel_value = (1<<img->bitdepth_luma) - 1;
+ 
+   if (img->yuv_format != YUV400)  
+   {
+     img->bitdepth_chroma     = input->BitDepthChroma;
+     img->max_imgpel_value_uv = (1<<img->bitdepth_chroma) - 1;
+     img->num_blk8x8_uv       = (1<<img->yuv_format)&(~(0x1));
+     img->num_cdc_coeff       = img->num_blk8x8_uv<<1;
+     img->mb_cr_size_x        = (img->yuv_format==YUV420 || img->yuv_format==YUV422)? 8:16;
+     img->mb_cr_size_y        = (img->yuv_format==YUV444 || img->yuv_format==YUV422)? 16:8;
+ 
+     img->bitdepth_chroma_qp_scale = 6*(img->bitdepth_chroma - 8);
+     if(img->residue_transform_flag)
+       img->bitdepth_chroma_qp_scale += 6;
+ 
+     img->chroma_qp_offset[0] = active_pps->cb_qp_index_offset;
+     img->chroma_qp_offset[1] = active_pps->cr_qp_index_offset;
+   }
+   else
+   {
+     img->bitdepth_chroma     = 0;
+     img->max_imgpel_value_uv = 0;
+     img->num_blk8x8_uv       = 0;
+     img->num_cdc_coeff       = 0;
+     img->mb_cr_size_x        = 0;
+     img->mb_cr_size_y        = 0;
+     
+     img->bitdepth_chroma_qp_scale = 0;
+     img->bitdepth_chroma_qp_scale = 0;
+     
+     img->chroma_qp_offset[0] = 0;
+     img->chroma_qp_offset[1] = 0;
+   }
+ 
+   if((img->bitdepth_luma > img->bitdepth_chroma) || img->yuv_format == YUV400)
+     img->pic_unit_size_on_disk = (img->bitdepth_luma > 8)? 16:8;
+   else
+     img->pic_unit_size_on_disk = (img->bitdepth_chroma > 8)? 16:8;
+ 
+   img->num_ref_frames = active_sps->num_ref_frames;
+   img->max_num_references   = active_sps->frame_mbs_only_flag ? active_sps->num_ref_frames : 2 * active_sps->num_ref_frames;
+ 
+   img->buf_cycle = input->num_ref_frames;
+ 
+   img->DeblockCall = 0;
+ 
+ //  img->framerate=INIT_FRAME_RATE;   // The basic frame rate (of the original sequence)
+   img->framerate=(float) input->FrameRate;   // The basic frame rate (of the original sequence)
+ 
+ 
+   get_mem_mv (&(img->pred_mv));
+   get_mem_mv (&(img->all_mv));
+ 
+   if (input->BiPredMotionEstimation)
+   {
+     get_mem_mv (&(img->bipred_mv1));
+     get_mem_mv (&(img->bipred_mv2));
+   }
+ 
+   get_mem_ACcoeff (&(img->cofAC));
+   get_mem_DCcoeff (&(img->cofDC));
+ 
+   
+   if(input->MbInterlace)
+   {
+     get_mem_mv (&(rddata_top_frame_mb.pred_mv));
+     get_mem_mv (&(rddata_top_frame_mb.all_mv));
+ 
+     get_mem_mv (&(rddata_bot_frame_mb.pred_mv));
+     get_mem_mv (&(rddata_bot_frame_mb.all_mv));
+ 
+     get_mem_mv (&(rddata_top_field_mb.pred_mv));
+     get_mem_mv (&(rddata_top_field_mb.all_mv));
+ 
+     get_mem_mv (&(rddata_bot_field_mb.pred_mv));
+     get_mem_mv (&(rddata_bot_field_mb.all_mv));
+ 
+     get_mem_ACcoeff (&(rddata_top_frame_mb.cofAC));
+     get_mem_DCcoeff (&(rddata_top_frame_mb.cofDC));
+ 
+     get_mem_ACcoeff (&(rddata_bot_frame_mb.cofAC));
+     get_mem_DCcoeff (&(rddata_bot_frame_mb.cofDC));
+ 
+     get_mem_ACcoeff (&(rddata_top_field_mb.cofAC));
+     get_mem_DCcoeff (&(rddata_top_field_mb.cofDC));
+ 
+     get_mem_ACcoeff (&(rddata_bot_field_mb.cofAC));
+     get_mem_DCcoeff (&(rddata_bot_field_mb.cofDC));
+   }
+ 
+   if(img->max_imgpel_value > img->max_imgpel_value_uv)
+     byte_abs_range = (img->max_imgpel_value + 1) * 2;
+   else
+     byte_abs_range = (img->max_imgpel_value_uv + 1) * 2;
+ 
+   if ((img->quad = (int*)calloc (byte_abs_range, sizeof(int))) == NULL)
+     no_mem_exit ("init_img: img->quad");
+   img->quad+=byte_abs_range/2;
+   for (i=0; i < byte_abs_range/2; ++i)
+   {
+     img->quad[i]=img->quad[-i]=i*i;
+   }
+ 
+   img->width    = (input->img_width+img->auto_crop_right);
+   img->height   = (input->img_height+img->auto_crop_bottom);
+   if (img->yuv_format != YUV400)
+   {
+     img->width_cr = img->width/(16/mb_width_cr[img->yuv_format]);
+     img->height_cr= img->height/(16/mb_height_cr[img->yuv_format]);
+ 
+     input->img_width_cr  = input->img_width/(16/mb_width_cr[img->yuv_format]);
+     input->img_height_cr = input->img_height/(16/mb_height_cr[img->yuv_format]);
+   }
+   else
+   {
+     img->width_cr = 0;
+     img->height_cr= 0;
+ 
+     input->img_width_cr  = 0;
+     input->img_height_cr = 0;
+   }
+   img->height_cr_frame = img->height_cr;
+   
+   img->PicWidthInMbs    = (input->img_width+img->auto_crop_right)/MB_BLOCK_SIZE;
+   img->FrameHeightInMbs = (input->img_height+img->auto_crop_bottom)/MB_BLOCK_SIZE;
+   img->FrameSizeInMbs   = img->PicWidthInMbs * img->FrameHeightInMbs;
+ 
+   img->PicHeightInMapUnits = ( active_sps->frame_mbs_only_flag ? img->FrameHeightInMbs : img->FrameHeightInMbs/2 );
+ 
+   if(((img->mb_data) = (Macroblock *) calloc(img->FrameSizeInMbs,sizeof(Macroblock))) == NULL)
+     no_mem_exit("init_img: img->mb_data");
+ 
+   if(input->UseConstrainedIntraPred)
+   {
+     if(((img->intra_block) = (int*)calloc(img->FrameSizeInMbs,sizeof(int))) == NULL)
+       no_mem_exit("init_img: img->intra_block");
+   }
+ 
+   get_mem2D((byte***)&(img->ipredmode), img->height/BLOCK_SIZE, img->width/BLOCK_SIZE);        //need two extra rows at right and bottom
+   get_mem2D((byte***)&(img->ipredmode8x8), img->height/BLOCK_SIZE, img->width/BLOCK_SIZE);     // help storage for ipredmode 8x8, inserted by YV
+  
+   get_mem2D((byte***)&(rddata_top_frame_mb.ipredmode), img->height/BLOCK_SIZE, img->width/BLOCK_SIZE);
+   
+   if(input->MbInterlace) 
+   {
+     get_mem2D((byte***)&(rddata_bot_frame_mb.ipredmode), img->height/BLOCK_SIZE, img->width/BLOCK_SIZE);
+     get_mem2D((byte***)&(rddata_top_field_mb.ipredmode), img->height/BLOCK_SIZE, img->width/BLOCK_SIZE);
+     get_mem2D((byte***)&(rddata_bot_field_mb.ipredmode), img->height/BLOCK_SIZE, img->width/BLOCK_SIZE);
+   }
+   // CAVLC mem
+   get_mem3Dint(&(img->nz_coeff), img->FrameSizeInMbs, 4, 4+img->num_blk8x8_uv);
+ 
+   CAVLC_init();
+ 
+   for (i=0; i < img->width/BLOCK_SIZE; i++)
+     for (j=0; j < img->height/BLOCK_SIZE; j++)
+     {
+       img->ipredmode[j][i]=-1;
+       img->ipredmode8x8[j][i]=-1;
+     }
+ 
+   img->mb_y_upd=0;
+ 
+   RandomIntraInit (img->width/16, img->height/16, input->RandomIntraMBRefresh);
+ 
+   InitSEIMessages();  // Tian Dong (Sept 2002)
+ 
+   // Initialize filtering parameters. If sending parameters, the offsets are 
+   // multiplied by 2 since inputs are taken in "div 2" format.
+   // If not sending parameters, all fields are cleared 
+   if (input->LFSendParameters)
+   {
+     input->LFAlphaC0Offset <<= 1;
+     input->LFBetaOffset <<= 1;
+   }
+   else
+   {
+     input->LFDisableIdc = 0;
+     input->LFAlphaC0Offset = 0;
+     input->LFBetaOffset = 0;
+   }
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Free the Image structures
+  * \par Input:
+  *    Image Parameters struct img_par *img
+  ***********************************************************************
+  */
+ void free_img ()
+ {
+   CloseSEIMessages(); // Tian Dong (Sept 2002)
+   free_mem_mv (img->pred_mv);
+   free_mem_mv (img->all_mv);
+ 
+   if (input->BiPredMotionEstimation)
+   {
+     free_mem_mv (img->bipred_mv1);
+     free_mem_mv (img->bipred_mv2);
+   }
+ 
+   free_mem_ACcoeff (img->cofAC);
+   free_mem_DCcoeff (img->cofDC);
+ 
+   if(input->MbInterlace)
+   {
+     free_mem_mv (rddata_top_frame_mb.pred_mv);
+     free_mem_mv (rddata_top_frame_mb.all_mv);
+ 
+     free_mem_mv (rddata_bot_frame_mb.pred_mv);
+     free_mem_mv (rddata_bot_frame_mb.all_mv);
+ 
+     free_mem_mv (rddata_top_field_mb.pred_mv);
+     free_mem_mv (rddata_top_field_mb.all_mv);
+ 
+     free_mem_mv (rddata_bot_field_mb.pred_mv);
+     free_mem_mv (rddata_bot_field_mb.all_mv);
+ 
+     free_mem_ACcoeff (rddata_top_frame_mb.cofAC);
+     free_mem_DCcoeff (rddata_top_frame_mb.cofDC);
+ 
+     free_mem_ACcoeff (rddata_bot_frame_mb.cofAC);
+     free_mem_DCcoeff (rddata_bot_frame_mb.cofDC);
+ 
+     free_mem_ACcoeff (rddata_top_field_mb.cofAC);
+     free_mem_DCcoeff (rddata_top_field_mb.cofDC);
+ 
+     free_mem_ACcoeff (rddata_bot_field_mb.cofAC);
+     free_mem_DCcoeff (rddata_bot_field_mb.cofDC);
+   }
+ 
+   if(img->max_imgpel_value > img->max_imgpel_value_uv)
+     free (img->quad-(img->max_imgpel_value + 1));
+   else
+     free (img->quad-(img->max_imgpel_value_uv + 1));
+ 
+   if(input->MbInterlace) 
+   {
+     free_mem2D((byte**)rddata_bot_frame_mb.ipredmode);
+     free_mem2D((byte**)rddata_top_field_mb.ipredmode);
+     free_mem2D((byte**)rddata_bot_field_mb.ipredmode);
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocates the picture structure along with its dependent
+  *    data structures
+  * \return
+  *    Pointer to a Picture
+  ************************************************************************
+  */
+ 
+ Picture *malloc_picture()
+ {
+   Picture *pic;
+   if ((pic = calloc (1, sizeof (Picture))) == NULL) no_mem_exit ("malloc_picture: Picture structure");
+   //! Note: slice structures are allocated as needed in code_a_picture
+   return pic;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Frees a picture
+  * \param
+  *    pic: POinter to a Picture to be freed
+  ************************************************************************
+  */
+ 
+ 
+ void free_picture(Picture *pic)
+ {
+   if (pic != NULL)
+   {
+     free_slice_list(pic);
+     free (pic);
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Reports frame statistical data to a stats file
+  ************************************************************************
+  */
+ void report_frame_statistic()
+ {
+   FILE *p_stat_frm = NULL;
+   static int   last_mode_use[NUM_PIC_TYPE][MAXMODE];
+   static int   last_b8_mode_0[NUM_PIC_TYPE][2];
+   static int   last_mode_chroma_use[4];
+   static int   last_bit_ctr_n = 0;
+   int i;
+   char name[20];
+   int bitcounter;
+   
+ #ifndef WIN32
+   time_t now;
+   struct tm *l_time;
+   char string[1000];
+ #else
+   char timebuf[128];
+ #endif
+   
+ 
+   // write to log file
+   if ((p_stat_frm=fopen("stat_frame.dat","r"))==0)                      // check if file exist
+   {
+     if ((p_stat_frm=fopen("stat_frame.dat","a"))==NULL)            // append new statistic at the end
+     {
+       snprintf(errortext, ET_SIZE, "Error open file %s  \n","stat_frame.dat.dat");
+       error(errortext, 500);
+     }
+     else                                            // Create header for new log file
+     {
+       fprintf(p_stat_frm," --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- \n");
+       fprintf(p_stat_frm,"|            Encoder statistics. This file is generated during first encoding session, new sessions will be appended                                                                                                                                                                                                                                                                                                                                                              |\n");
+       fprintf(p_stat_frm," --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- \n");
+     }
+   }
+   else
+   {
+     fclose (p_stat_frm);
+     if ((p_stat_frm=fopen("stat_frame.dat","a"))==NULL)            // File exist,just open for appending
+     {
+       snprintf(errortext, ET_SIZE, "Error open file %s  \n","stat_frame.dat.dat");
+       error(errortext, 500);
+     }
+   }
+   
+   if (frame_statistic_start)
+   {
+     fprintf(p_stat_frm,"|  ver   | Date  | Time  |    Sequence        |Frm | QP |P/MbInt|   Bits   |  SNRY  |  SNRU  |  SNRV  |  I4  |  I8  | I16  | IC0  | IC1  | IC2  | IC3  | PI4  | PI8  | PI16 |  P0  |  P1  |  P2  |  P3  | P1*8*| P1*4*| P2*8*| P2*4*| P3*8*| P3*4*|  P8  | P8:4 | P4*8*| P4*4*| P8:5 | P8:6 | P8:7 | BI4  | BI8  | BI16 |  B0  |  B1  |  B2  |  B3  | B0*8*| B0*4*| B1*8*| B1*4*| B2*8*| B2*4*| B3*8*| B3*4*|  B8  | B8:0 |B80*8*|B80*4*| B8:4 | B4*8*| B4*4*| B8:5 | B8:6 | B8:7 |\n");
+     fprintf(p_stat_frm," --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- \n");
+   } 
+ 
+   //report
+   fprintf(p_stat_frm,"|%4s/%s", VERSION, EXT_VERSION); 
+   
+ #ifdef WIN32
+   _strdate( timebuf );
+   fprintf(p_stat_frm,"| %1.5s |",timebuf );
+   
+   _strtime( timebuf);
+   fprintf(p_stat_frm," % 1.5s |",timebuf);
+ #else
+   now = time ((time_t *) NULL); // Get the system time and put it into 'now' as 'calender time'
+   time (&now);
+   l_time = localtime (&now);
+   strftime (string, sizeof string, "%d-%b-%Y", l_time);
+   fprintf(p_stat_frm,"| %1.5s |",string );
+   
+   strftime (string, sizeof string, "%H:%M:%S", l_time);
+   fprintf(p_stat_frm," %1.5s |",string );
+ #endif
+   
+   for (i=0;i<20;i++)
+     name[i]=input->infile[i+max(0,(int) (strlen(input->infile)-20))]; // write last part of path, max 20 chars
+   fprintf(p_stat_frm,"%20.20s|",name);
+   
+   fprintf(p_stat_frm,"%3d |",frame_no);
+ 
+   fprintf(p_stat_frm,"%3d |",img->qp);
+ 
+   fprintf(p_stat_frm,"  %d/%d  |",input->PicInterlace, input->MbInterlace);
+   
+   
+   if (img->frame_num == 0)
+   {
+     bitcounter = stats->bit_ctr_I;
+   }
+   else
+   {
+     bitcounter = stats->bit_ctr_n - last_bit_ctr_n;
+     last_bit_ctr_n = stats->bit_ctr_n;
+   }
+ 
+   //report bitrate
+   fprintf(p_stat_frm, " %9d|", bitcounter);
+   
+   //report snr's
+   fprintf(p_stat_frm, " %2.4f| %2.4f| %2.4f|", snr->snr_y, snr->snr_u, snr->snr_v);
+   
+   //report modes
+   //I-Modes
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[I_SLICE][I4MB] - last_mode_use[I_SLICE][I4MB]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[I_SLICE][I8MB] - last_mode_use[I_SLICE][I8MB]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[I_SLICE][I16MB] - last_mode_use[I_SLICE][I16MB]);
+   
+   //chroma intra mode
+   fprintf(p_stat_frm, " %5d|",stats->intra_chroma_mode[0] - last_mode_chroma_use[0]);
+   fprintf(p_stat_frm, " %5d|",stats->intra_chroma_mode[1] - last_mode_chroma_use[1]);
+   fprintf(p_stat_frm, " %5d|",stats->intra_chroma_mode[2] - last_mode_chroma_use[2]);
+   fprintf(p_stat_frm, " %5d|",stats->intra_chroma_mode[3] - last_mode_chroma_use[3]);
+   
+   //P-Modes
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[P_SLICE][I4MB] - last_mode_use[P_SLICE][I4MB]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[P_SLICE][I8MB] - last_mode_use[P_SLICE][I8MB]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[P_SLICE][I16MB] - last_mode_use[P_SLICE][I16MB]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[P_SLICE][0   ] - last_mode_use[P_SLICE][0   ]);
+   
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[P_SLICE][1   ] - last_mode_use[P_SLICE][1   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[P_SLICE][2   ] - last_mode_use[P_SLICE][2   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[P_SLICE][3   ] - last_mode_use[P_SLICE][3   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[0][1]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[0][1]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[0][2]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[0][2]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[0][3]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[0][3]);
+   
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[P_SLICE][P8x8] - last_mode_use[P_SLICE][P8x8]);
+ //  fprintf(p_stat_frm, " %5d|",stats->b8_mode_0_use[P_SLICE][0]  - last_b8_mode_0[P_SLICE ][0]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[P_SLICE][4   ] - last_mode_use[P_SLICE][4   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[0][4]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[0][4]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[P_SLICE][5   ] - last_mode_use[P_SLICE][5   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[P_SLICE][6   ] - last_mode_use[P_SLICE][6   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[P_SLICE][7   ] - last_mode_use[P_SLICE][7   ]);
+   
+   //B-Modes
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][I4MB] - last_mode_use[B_SLICE][I4MB]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][I8MB] - last_mode_use[B_SLICE][I8MB]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][I16MB] - last_mode_use[B_SLICE][I16MB]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][0   ] - last_mode_use[B_SLICE][0   ]);
+   /*
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][1   ] - last_mode_use[B_SLICE][1   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][2   ] - last_mode_use[B_SLICE][2   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][3   ] - last_mode_use[B_SLICE][3   ]);
+   */
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][1   ] - last_mode_use[B_SLICE][1   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][2   ] - last_mode_use[B_SLICE][2   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][3   ] - last_mode_use[B_SLICE][3   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[1][0]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[1][0]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[1][1]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[1][1]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[1][2]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[1][2]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[1][3]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[1][3]);
+   
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][P8x8] - last_mode_use[B_SLICE][P8x8]);
+   fprintf(p_stat_frm, " %5d|",(stats->b8_mode_0_use [B_SLICE][0]+stats->b8_mode_0_use [B_SLICE][1]) - (last_b8_mode_0[B_SLICE][0]+last_b8_mode_0[B_SLICE][1]));
+   fprintf(p_stat_frm, " %5d|",stats->b8_mode_0_use [B_SLICE][1] - last_b8_mode_0[B_SLICE][1]);
+   fprintf(p_stat_frm, " %5d|",stats->b8_mode_0_use [B_SLICE][0] - last_b8_mode_0[B_SLICE][0]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][4   ] - last_mode_use[B_SLICE][4   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_8x8[1][4]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use_transform_4x4[1][4]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][5   ] - last_mode_use[B_SLICE][5   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][6   ] - last_mode_use[B_SLICE][6   ]);
+   fprintf(p_stat_frm, " %5d|",stats->mode_use[B_SLICE][7   ] - last_mode_use[B_SLICE][7   ]);
+ 
+   fprintf(p_stat_frm, "\n");
+   
+   //save the last results
+   memcpy(last_mode_use[I_SLICE],stats->mode_use[I_SLICE], MAXMODE *  sizeof(int));
+   memcpy(last_mode_use[P_SLICE],stats->mode_use[P_SLICE], MAXMODE *  sizeof(int));
+   memcpy(last_mode_use[B_SLICE],stats->mode_use[B_SLICE], MAXMODE *  sizeof(int));
+   memset(stats->mode_use_transform_8x8,0, 2 * MAXMODE *  sizeof(int));
+   memset(stats->mode_use_transform_4x4,0, 2 * MAXMODE *  sizeof(int));
+   memcpy(last_b8_mode_0[B_SLICE],stats->b8_mode_0_use[B_SLICE], 2 *  sizeof(int));
+   memcpy(last_mode_chroma_use,stats->intra_chroma_mode, 4 *  sizeof(int));
+ 
+   frame_statistic_start = 0;
+   fclose(p_stat_frm);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Reports the gathered information to appropriate outputs
+  * \par Input:
+  *    struct inp_par *inp,                                            \n
+  *    struct img_par *img,                                            \n
+  *    struct stat_par *stats,                                          \n
+  *    struct stat_par *stats                                           \n
+  *
+  * \par Output:
+  *    None
+  ************************************************************************
+  */
+ void report()
+ {
+   int bit_use[NUM_PIC_TYPE][2] ;
+   int i,j;
+   char name[30];
+   int total_bits;
+   float frame_rate;
+   float mean_motion_info_bit_use[2] = {0.0};
+ 
+ #ifndef WIN32
+   time_t now;
+   struct tm *l_time;
+   char string[1000];
+ #else
+   char timebuf[128];
+ #endif
+   bit_use[I_SLICE][0] = frame_ctr[I_SLICE];
+   bit_use[P_SLICE][0] = max(frame_ctr[P_SLICE] + frame_ctr[SP_SLICE],1);
+   bit_use[B_SLICE][0] = frame_ctr[B_SLICE];
+ 
+   //  Accumulate bit usage for inter and intra frames
+   for (j=0;j<NUM_PIC_TYPE;j++)
+   {
+     bit_use[j][1] = 0;
+   }
+ 
+   for (j=0;j<NUM_PIC_TYPE;j++)
+   {
+     for(i=0; i<MAXMODE; i++)
+       bit_use[j][1] += stats->bit_use_mode    [j][i]; 
+ 
+     bit_use[j][1]+=stats->bit_use_header[j];
+     bit_use[j][1]+=stats->bit_use_mb_type[j];
+     bit_use[j][1]+=stats->tmp_bit_use_cbp[j];
+     bit_use[j][1]+=stats->bit_use_coeffY[j];
+     bit_use[j][1]+=stats->bit_use_coeffC[j];
+     bit_use[j][1]+=stats->bit_use_delta_quant[j];
+     bit_use[j][1]+=stats->bit_use_stuffingBits[j];
+   }
+ 
+   frame_rate = (img->framerate *(float)(stats->successive_Bframe + 1)) / (float) (input->jumpd+1);
+ 
+ //! Currently adding NVB bits on P rate. Maybe additional stats info should be created instead and added in log file
+   stats->bitrate_I = (stats->bit_ctr_I)*(frame_rate)/(float) (input->no_frames + frame_ctr[B_SLICE]);
+   stats->bitrate_P = (stats->bit_ctr_P)*(frame_rate)/(float) (input->no_frames + frame_ctr[B_SLICE]);
+   stats->bitrate_B = (stats->bit_ctr_B)*(frame_rate)/(float) (input->no_frames + frame_ctr[B_SLICE]);    
+ 
+   fprintf(stdout,"-------------------------------------------------------------------------------\n");
+   if (input->Verbose != 0)
+   {
+     fprintf(stdout,  " Freq. for encoded bitstream       : %1.0f\n",img->framerate/(float)(input->jumpd+1));
+     switch (input->hadamard)
+     {
+     case 2:
+       fprintf(stdout," Hadamard transform                : Used for QPel\n");
+       break;
+     case 1:
+       fprintf(stdout," Hadamard transform                : Used\n");    
+       break;
+     default:
+       fprintf(stdout," Hadamard transform                : Not used\n");
+       break;
+     }
+     
+     fprintf(stdout,  " Image format                      : %dx%d\n",input->img_width,input->img_height);
+     
+     if(input->intra_upd)
+       fprintf(stdout," Error robustness                  : On\n");
+     else
+       fprintf(stdout," Error robustness                  : Off\n");
+     fprintf(stdout,  " Search range                      : %d\n",input->search_range);
+     
+     fprintf(stdout,  " Total number of references        : %d\n",input->num_ref_frames);
+     fprintf(stdout,  " References for P slices           : %d\n",input->P_List0_refs? input->P_List0_refs:input->num_ref_frames);
+     if(stats->successive_Bframe != 0)
+     {
+       fprintf(stdout," List0 references for B slices     : %d\n",input->B_List0_refs? input->B_List0_refs:input->num_ref_frames);
+       fprintf(stdout," List1 references for B slices     : %d\n",input->B_List1_refs? input->B_List1_refs:input->num_ref_frames);
+     }
+     
+ #if 0  // FIXME: control with a runtime option.   
+     fprintf(stdout,  " Total encoding time for the seq.  : %.3f sec (%.2f fps)\n",tot_time*0.001, 1000.0*(input->no_frames + frame_ctr[B_SLICE])/tot_time);
+     fprintf(stdout,  " Total ME time for sequence        : %.3f sec \n",me_tot_time*0.001);
+ #endif
+     
+     // B pictures
+     fprintf(stdout,  " Sequence type                     :" );
+     
+     if(stats->successive_Bframe>0 && input->PyramidCoding) 
+     {
+       fprintf(stdout, " Pyramid (QP: I %d, P %d, B %d) \n",
+         input->qp0, input->qpN, input->qpB);
+     }
+     else if(stats->successive_Bframe>0) 
+     {
+       char seqtype[80];
+       int i,j;
+       
+       strcpy (seqtype,"I");
+       
+       for (j=0;j<2;j++)
+       {
+         for (i=0;i<stats->successive_Bframe;i++)
+         {
+           if (input->BRefPictures)
+             strncat(seqtype,"-RB",max (0, (int) (79-strlen(seqtype))));
+           else
+             strncat(seqtype,"-B",max (0, (int) (79-strlen(seqtype))));
+         }
+         strncat(seqtype,"-P",max (0, (int) (79-strlen(seqtype))));
+       }
+       if (input->BRefPictures)
+         fprintf(stdout, " %s (QP: I %d, P %d, RB %d) \n", seqtype,input->qp0, input->qpN, Clip3(0,51,input->qpB+input->qpBRSOffset));
+       else
+         fprintf(stdout, " %s (QP: I %d, P %d, B %d) \n", seqtype,input->qp0, input->qpN, input->qpB);
+     }
+     else if(stats->successive_Bframe==0 && input->sp_periodicity==0) fprintf(stdout, " IPPP (QP: I %d, P %d) \n",   input->qp0, input->qpN);
+     
+     else fprintf(stdout, " I-P-P-SP-P (QP: I %d, P %d, SP (%d, %d)) \n",  input->qp0, input->qpN, input->qpsp, input->qpsp_pred);
+     
+     // report on entropy coding  method
+     if (input->symbol_mode == UVLC)
+       fprintf(stdout," Entropy coding method             : CAVLC\n");
+     else
+       fprintf(stdout," Entropy coding method             : CABAC\n");
+     
+     fprintf(stdout,  " Profile/Level IDC                 : (%d,%d)\n",input->ProfileIDC,input->LevelIDC);
+     
+   if (input->FMEnable==1)
+     fprintf(stdout,  " Motion Estimation Scheme          : HEX\n");    
+   else if (input->FMEnable==2)
+     fprintf(stdout,  " Motion Estimation Scheme          : SHEX\n");
+    else if (input->FMEnable == 3)
+    {
+      fprintf(stdout,  " Motion Estimation Scheme          : EPZS\n");
+      EPZSOutputStats(stdout, 0);
+    }
+   else
+     fprintf(stdout,  " Motion Estimation Scheme          : Full Search\n");
+ 
+ 
+     
+ #ifdef _FULL_SEARCH_RANGE_
+     if (input->full_search == 2)
+       fprintf(stdout," Search range restrictions         : none\n");
+     else if (input->full_search == 1)
+       fprintf(stdout," Search range restrictions         : older reference frames\n");
+     else
+       fprintf(stdout," Search range restrictions         : smaller blocks and older reference frames\n");
+ #endif
+     
+     if (input->rdopt)
+       fprintf(stdout," RD-optimized mode decision        : used\n");
+     else
+       fprintf(stdout," RD-optimized mode decision        : not used\n");
+     
+     switch(input->partition_mode)
+     {
+     case PAR_DP_1:
+       fprintf(stdout," Data Partitioning Mode            : 1 partition \n");
+       break;
+     case PAR_DP_3:
+       fprintf(stdout," Data Partitioning Mode            : 3 partitions \n");
+       break;
+     default:
+       fprintf(stdout," Data Partitioning Mode            : not supported\n");
+       break;
+     }
+     
+     switch(input->of_mode)
+     {
+     case PAR_OF_ANNEXB:
+       fprintf(stdout," Output File Format                : H.264 Bit Stream File Format \n");
+       break;
+     case PAR_OF_RTP:
+       fprintf(stdout," Output File Format                : RTP Packet File Format \n");
+       break;
+     default:
+       fprintf(stdout," Output File Format                : not supported\n");
+       break;
+     }
+     // Residue Color Transform
+     if(input->residue_transform_flag)
+       fprintf(stdout," Residue Color Transform           : used\n");
+     else
+       fprintf(stdout," Residue Color Transform           : not used\n");
+ }
+ 
+   fprintf(stdout,"------------------ Average data all frames  -----------------------------------\n");
+   if (input->Verbose != 0)
+   {
+     int  impix = input->img_height * input->img_width;
+     int  impix_cr = input->img_height_cr * input->img_width_cr;
+     unsigned int max_pix_value_sqd = img->max_imgpel_value * img->max_imgpel_value;
+     unsigned int max_pix_value_sqd_uv = img->max_imgpel_value_uv * img->max_imgpel_value_uv;
+     float csnr_y = (float) (10 * log10 (max_pix_value_sqd * 
+       (double)((double) impix / (snr->msse_y == 0.0? 1.0 : snr->msse_y))));  
+     float csnr_u = (float) (10 * log10 (max_pix_value_sqd_uv * 
+       (double)((double) impix_cr / (snr->msse_u == 0.0? 1.0 : snr->msse_u))));  
+     float csnr_v = (float) (10 * log10 (max_pix_value_sqd_uv * 
+       (double)((double) impix_cr / (snr->msse_v == 0.0? 1.0 : snr->msse_v))));  
+ 
+     fprintf(stdout," SNR Y(dB)                         : %5.2f\n",snr->snr_ya);
+     fprintf(stdout," SNR U(dB)                         : %5.2f\n",snr->snr_ua);
+     fprintf(stdout," SNR V(dB)                         : %5.2f\n",snr->snr_va);
+     fprintf(stdout," cSNR Y(dB)                        : %5.2f (%5.2f)\n",csnr_y,snr->msse_y/impix);
+     fprintf(stdout," cSNR U(dB)                        : %5.2f (%5.2f)\n",csnr_u,snr->msse_u/impix_cr);
+     fprintf(stdout," cSNR V(dB)                        : %5.2f (%5.2f)\n",csnr_v,snr->msse_v/impix_cr);
+   }
+   else
+     fprintf(stdout,  " Total encoding time for the seq.  : %.3f sec (%.2f fps)\n",tot_time*0.001, 1000.0*(input->no_frames + frame_ctr[B_SLICE])/tot_time);
+ 
+   if(frame_ctr[B_SLICE]!=0)
+   {
+     fprintf(stdout, " Total bits                        : %d (I %5d, P %5d, B %d NVB %d) \n",
+       total_bits=stats->bit_ctr_P + stats->bit_ctr_I + stats->bit_ctr_B + stats->bit_ctr_parametersets, stats->bit_ctr_I, stats->bit_ctr_P, stats->bit_ctr_B,stats->bit_ctr_parametersets);
+     
+     frame_rate = (img->framerate *(float)(stats->successive_Bframe + 1)) / (float) (input->jumpd+1);
+     stats->bitrate= ((float) total_bits * frame_rate)/((float) (input->no_frames + frame_ctr[B_SLICE]));
+     
+     fprintf(stdout, " Bit rate (kbit/s)  @ %2.2f Hz     : %5.2f\n", frame_rate, stats->bitrate/1000);
+     
+   }
+   else if (input->sp_periodicity==0)
+   {
+     fprintf(stdout, " Total bits                        : %d (I %5d, P %5d, NVB %d) \n",
+       total_bits=stats->bit_ctr_P + stats->bit_ctr_I + stats->bit_ctr_parametersets, stats->bit_ctr_I, stats->bit_ctr_P, stats->bit_ctr_parametersets);
+     
+     
+     frame_rate = img->framerate / ( (float) (input->jumpd + 1) );
+     stats->bitrate= ((float) total_bits * frame_rate)/((float) input->no_frames );
+     
+     fprintf(stdout, " Bit rate (kbit/s)  @ %2.2f Hz     : %5.2f\n", frame_rate, stats->bitrate/1000);
+   }
+   else
+   {
+     fprintf(stdout, " Total bits                        : %d (I %5d, P %5d, NVB %d) \n",
+       total_bits=stats->bit_ctr_P + stats->bit_ctr_I + stats->bit_ctr_parametersets, stats->bit_ctr_I, stats->bit_ctr_P, stats->bit_ctr_parametersets);
+     
+     
+     frame_rate = img->framerate / ( (float) (input->jumpd + 1) );
+     stats->bitrate= ((float) total_bits * frame_rate)/((float) input->no_frames );
+     
+     fprintf(stdout, " Bit rate (kbit/s)  @ %2.2f Hz     : %5.2f\n", frame_rate, stats->bitrate/1000);
+   }
+   
+   fprintf(stdout, " Bits to avoid Startcode Emulation : %d \n", stats->bit_ctr_emulationprevention);
+   fprintf(stdout, " Bits for parameter sets           : %d \n", stats->bit_ctr_parametersets);
+   
+   fprintf(stdout,"-------------------------------------------------------------------------------\n");
+   fprintf(stdout,"Exit JM %s encoder ver %s ", JM, VERSION);
+   fprintf(stdout,"\n");
+ 
+   // status file
+   if ((p_stat=fopen("stats.dat","wt"))==0)
+   {
+     snprintf(errortext, ET_SIZE, "Error open file %s", "stats.dat");
+     error(errortext, 500);
+   }
+   fprintf(p_stat," -------------------------------------------------------------- \n");
+   fprintf(p_stat,"  This file contains statistics for the last encoded sequence   \n");
+   fprintf(p_stat," -------------------------------------------------------------- \n");
+   fprintf(p_stat,   " Sequence                     : %s\n",input->infile);
+   fprintf(p_stat,   " No.of coded pictures         : %4d\n",input->no_frames+frame_ctr[B_SLICE]);
+   fprintf(p_stat,   " Freq. for encoded bitstream  : %4.0f\n",frame_rate);
+ 
+   fprintf(p_stat,   " I Slice Bitrate(kb/s)        : %6.2f\n", stats->bitrate_I/1000);
+   fprintf(p_stat,   " P Slice Bitrate(kb/s)        : %6.2f\n", stats->bitrate_P/1000);
+   // B pictures
+   if(stats->successive_Bframe != 0)
+     fprintf(p_stat,   " B Slice Bitrate(kb/s)        : %6.2f\n", stats->bitrate_B/1000);
+   fprintf(p_stat,   " Total Bitrate(kb/s)          : %6.2f\n", stats->bitrate/1000);
+ 
+   switch (input->hadamard)
+   {
+   case 2:
+     fprintf(p_stat," Hadamard transform           : Used for QPel\n");
+     break;
+   case 1:
+     fprintf(p_stat," Hadamard transform           : Used\n");
+     break;
+   default:
+     fprintf(p_stat," Hadamard transform           : Not used\n");
+     break;
+   }
+ 
+   fprintf(p_stat,  " Image format                 : %dx%d\n",input->img_width,input->img_height);
+ 
+   if(input->intra_upd)
+     fprintf(p_stat," Error robustness             : On\n");
+   else
+     fprintf(p_stat," Error robustness             : Off\n");
+ 
+   fprintf(p_stat,  " Search range                 : %d\n",input->search_range);
+ 
+   fprintf(p_stat,   " Total number of references   : %d\n",input->num_ref_frames);
+   fprintf(p_stat,   " References for P slices      : %d\n",input->P_List0_refs? input->P_List0_refs:input->num_ref_frames);
+   if(stats->successive_Bframe != 0)
+   {
+     fprintf(p_stat, " List0 refs for B slices      : %d\n",input->B_List0_refs? input->B_List0_refs:input->num_ref_frames);
+     fprintf(p_stat, " List1 refs for B slices      : %d\n",input->B_List1_refs? input->B_List1_refs:input->num_ref_frames);
+   }
+ 
+   if (input->symbol_mode == UVLC)
+     fprintf(p_stat,   " Entropy coding method        : CAVLC\n");
+   else
+     fprintf(p_stat,   " Entropy coding method        : CABAC\n");
+ 
+     fprintf(p_stat,   " Profile/Level IDC            : (%d,%d)\n",input->ProfileIDC,input->LevelIDC);
+   if(input->MbInterlace)
+     fprintf(p_stat, " MB Field Coding : On \n");
+ 
+   if (input->FMEnable == 3)
+     EPZSOutputStats(p_stat, 1);
+   
+ #ifdef _FULL_SEARCH_RANGE_
+   if (input->full_search == 2)
+     fprintf(p_stat," Search range restrictions    : none\n");
+   else if (input->full_search == 1)
+     fprintf(p_stat," Search range restrictions    : older reference frames\n");
+   else
+     fprintf(p_stat," Search range restrictions    : smaller blocks and older reference frames\n");
+ #endif
+   if (input->rdopt)
+     fprintf(p_stat," RD-optimized mode decision   : used\n");
+   else
+     fprintf(p_stat," RD-optimized mode decision   : not used\n");
+ 
+   fprintf(p_stat," ---------------------|----------------|---------------|\n");
+   fprintf(p_stat,"     Item             |     Intra      |   All frames  |\n");
+   fprintf(p_stat," ---------------------|----------------|---------------|\n");
+   fprintf(p_stat," SNR Y(dB)            |");
+   fprintf(p_stat," %5.2f          |",snr->snr_y1);
+   fprintf(p_stat," %5.2f         |\n",snr->snr_ya);
+   fprintf(p_stat," SNR U/V (dB)         |");
+   fprintf(p_stat," %5.2f/%5.2f    |",snr->snr_u1,snr->snr_v1);
+   fprintf(p_stat," %5.2f/%5.2f   |\n",snr->snr_ua,snr->snr_va);
+ 
+   // QUANT.
+   fprintf(p_stat," Average quant        |");
+   fprintf(p_stat," %5d          |",absm(input->qp0));
+   fprintf(p_stat," %5.2f         |\n",(float)stats->quant1/max(1.0,(float)stats->quant0));
+ 
+   fprintf(p_stat,"\n ---------------------|----------------|---------------|---------------|\n");
+   fprintf(p_stat,"     SNR              |        I       |       P       |       B       |\n");
+   fprintf(p_stat," ---------------------|----------------|---------------|---------------|\n");
+   fprintf(p_stat," SNR Y(dB)            |      %5.3f    |     %5.3f    |     %5.3f    |\n",
+                 snr->snr_yt[I_SLICE],snr->snr_yt[P_SLICE],snr->snr_yt[B_SLICE]);
+   fprintf(p_stat," SNR U(dB)            |      %5.3f    |     %5.3f    |     %5.3f    |\n",
+                 snr->snr_ut[I_SLICE],snr->snr_ut[P_SLICE],snr->snr_ut[B_SLICE]);
+   fprintf(p_stat," SNR V(dB)            |      %5.3f    |     %5.3f    |     %5.3f    |\n",
+                 snr->snr_vt[I_SLICE],snr->snr_vt[P_SLICE],snr->snr_vt[B_SLICE]);
+ 
+ 
+   // MODE
+   fprintf(p_stat,"\n ---------------------|----------------|\n");
+   fprintf(p_stat,"   Intra              |   Mode used    |\n");
+   fprintf(p_stat," ---------------------|----------------|\n");
+ 
+   fprintf(p_stat," Mode 0  intra 4x4    |  %5d         |\n",stats->mode_use[I_SLICE][I4MB ]);
+   fprintf(p_stat," Mode 1  intra 8x8    |  %5d         |\n",stats->mode_use[I_SLICE][I8MB ]);
+   fprintf(p_stat," Mode 2+ intra 16x16  |  %5d         |\n",stats->mode_use[I_SLICE][I16MB]);
+   fprintf(p_stat," Mode    intra IPCM   |  %5d         |\n",stats->mode_use[I_SLICE][IPCM ]);
+   
+   fprintf(p_stat,"\n ---------------------|----------------|-----------------|\n");
+   fprintf(p_stat,"   Inter              |   Mode used    | MotionInfo bits |\n");
+   fprintf(p_stat," ---------------------|----------------|-----------------|");
+   fprintf(p_stat,"\n Mode  0  (copy)      |  %5d         |    %8.2f     |",stats->mode_use[P_SLICE][0   ],(float)stats->bit_use_mode[P_SLICE][0   ]/(float)bit_use[P_SLICE][0]);
+   fprintf(p_stat,"\n Mode  1  (16x16)     |  %5d         |    %8.2f     |",stats->mode_use[P_SLICE][1   ],(float)stats->bit_use_mode[P_SLICE][1   ]/(float)bit_use[P_SLICE][0]);
+   fprintf(p_stat,"\n Mode  2  (16x8)      |  %5d         |    %8.2f     |",stats->mode_use[P_SLICE][2   ],(float)stats->bit_use_mode[P_SLICE][2   ]/(float)bit_use[P_SLICE][0]);
+   fprintf(p_stat,"\n Mode  3  (8x16)      |  %5d         |    %8.2f     |",stats->mode_use[P_SLICE][3   ],(float)stats->bit_use_mode[P_SLICE][3   ]/(float)bit_use[P_SLICE][0]);
+   fprintf(p_stat,"\n Mode  4  (8x8)       |  %5d         |    %8.2f     |",stats->mode_use[P_SLICE][P8x8],(float)stats->bit_use_mode[P_SLICE][P8x8]/(float)bit_use[P_SLICE][0]);
+   fprintf(p_stat,"\n Mode  5  intra 4x4   |  %5d         |-----------------|",stats->mode_use[P_SLICE][I4MB]);
+   fprintf(p_stat,"\n Mode  6  intra 8x8   |  %5d         |",stats->mode_use[P_SLICE][I8MB]);
+   fprintf(p_stat,"\n Mode  7+ intra 16x16 |  %5d         |",stats->mode_use[P_SLICE][I16MB]);
+   fprintf(p_stat,"\n Mode     intra IPCM  |  %5d         |",stats->mode_use[P_SLICE][IPCM ]);
+   mean_motion_info_bit_use[0] = (float)(stats->bit_use_mode[P_SLICE][0] + stats->bit_use_mode[P_SLICE][1] + stats->bit_use_mode[P_SLICE][2] 
+                                       + stats->bit_use_mode[P_SLICE][3] + stats->bit_use_mode[P_SLICE][P8x8])/(float) bit_use[P_SLICE][0]; 
+ 
+   // B pictures
+   if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+   {
+  
+     fprintf(p_stat,"\n\n ---------------------|----------------|-----------------|\n");
+     fprintf(p_stat,"   B frame            |   Mode used    | MotionInfo bits |\n");
+     fprintf(p_stat," ---------------------|----------------|-----------------|");
+     fprintf(p_stat,"\n Mode  0  (copy)      |  %5d         |    %8.2f     |",stats->mode_use[B_SLICE][0   ],(float)stats->bit_use_mode[B_SLICE][0   ]/(float)frame_ctr[B_SLICE]);
+     fprintf(p_stat,"\n Mode  1  (16x16)     |  %5d         |    %8.2f     |",stats->mode_use[B_SLICE][1   ],(float)stats->bit_use_mode[B_SLICE][1   ]/(float)frame_ctr[B_SLICE]);
+     fprintf(p_stat,"\n Mode  2  (16x8)      |  %5d         |    %8.2f     |",stats->mode_use[B_SLICE][2   ],(float)stats->bit_use_mode[B_SLICE][2   ]/(float)frame_ctr[B_SLICE]);
+     fprintf(p_stat,"\n Mode  3  (8x16)      |  %5d         |    %8.2f     |",stats->mode_use[B_SLICE][3   ],(float)stats->bit_use_mode[B_SLICE][3   ]/(float)frame_ctr[B_SLICE]);
+     fprintf(p_stat,"\n Mode  4  (8x8)       |  %5d         |    %8.2f     |",stats->mode_use[B_SLICE][P8x8],(float)stats->bit_use_mode[B_SLICE][P8x8]/(float)frame_ctr[B_SLICE]);
+     fprintf(p_stat,"\n Mode  5  intra 4x4   |  %5d         |-----------------|",stats->mode_use[B_SLICE][I4MB]);
+     fprintf(p_stat,"\n Mode  6  intra 8x8   |  %5d         |",stats->mode_use[B_SLICE][I8MB]);
+     fprintf(p_stat,"\n Mode  7+ intra 16x16 |  %5d         |",stats->mode_use[B_SLICE][I16MB]);
+     fprintf(p_stat,"\n Mode     intra IPCM  |  %5d         |",stats->mode_use[B_SLICE][IPCM ]);
+     mean_motion_info_bit_use[1] = (float)(stats->bit_use_mode[B_SLICE][0] + stats->bit_use_mode[B_SLICE][1] + stats->bit_use_mode[B_SLICE][2] 
+                                       + stats->bit_use_mode[B_SLICE][3] + stats->bit_use_mode[B_SLICE][P8x8])/(float) frame_ctr[B_SLICE]; 
+   }
+ 
+   fprintf(p_stat,"\n\n ---------------------|----------------|----------------|----------------|\n");
+   fprintf(p_stat,"  Bit usage:          |      Intra     |      Inter     |    B frame     |\n");
+   fprintf(p_stat," ---------------------|----------------|----------------|----------------|\n");
+ 
+   fprintf(p_stat," Header               |");
+   fprintf(p_stat," %10.2f     |",(float) stats->bit_use_header[I_SLICE]/bit_use[I_SLICE][0]);
+   fprintf(p_stat," %10.2f     |",(float) stats->bit_use_header[P_SLICE]/bit_use[P_SLICE][0]);
+   if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+     fprintf(p_stat," %10.2f     |",(float) stats->bit_use_header[B_SLICE]/frame_ctr[B_SLICE]);
+   else fprintf(p_stat," %10.2f     |", 0.);
+   fprintf(p_stat,"\n");
+ 
+   fprintf(p_stat," Mode                 |");
+   fprintf(p_stat," %10.2f     |",(float)stats->bit_use_mb_type[I_SLICE]/bit_use[I_SLICE][0]);
+   fprintf(p_stat," %10.2f     |",(float)stats->bit_use_mb_type[P_SLICE]/bit_use[P_SLICE][0]);
+   if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+     fprintf(p_stat," %10.2f     |",(float)stats->bit_use_mb_type[B_SLICE]/frame_ctr[B_SLICE]);
+   else fprintf(p_stat," %10.2f     |", 0.);
+   fprintf(p_stat,"\n");
+ 
+   fprintf(p_stat," Motion Info          |");
+   fprintf(p_stat,"        ./.     |");
+   fprintf(p_stat," %10.2f     |",mean_motion_info_bit_use[0]);
+   if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+     fprintf(p_stat," %10.2f     |",mean_motion_info_bit_use[1]);
+   else fprintf(p_stat," %10.2f     |", 0.);
+   fprintf(p_stat,"\n");
+ 
+   fprintf(p_stat," CBP Y/C              |");
+   fprintf(p_stat," %10.2f     |", (float)stats->tmp_bit_use_cbp[I_SLICE]/bit_use[I_SLICE][0]);
+   fprintf(p_stat," %10.2f     |", (float)stats->tmp_bit_use_cbp[P_SLICE]/bit_use[P_SLICE][0]);
+   if(stats->successive_Bframe!=0 && bit_use[B_SLICE][0]!=0)
+     fprintf(p_stat," %10.2f     |", (float)stats->tmp_bit_use_cbp[B_SLICE]/bit_use[B_SLICE][0]);
+   else fprintf(p_stat," %10.2f     |", 0.);
+   fprintf(p_stat,"\n");
+ 
+   if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+     fprintf(p_stat," Coeffs. Y            | %10.2f     | %10.2f     | %10.2f     |\n",
+     (float)stats->bit_use_coeffY[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_coeffY[P_SLICE]/bit_use[P_SLICE][0], (float)stats->bit_use_coeffY[B_SLICE]/frame_ctr[B_SLICE]);
+   else
+     fprintf(p_stat," Coeffs. Y            | %10.2f     | %10.2f     | %10.2f     |\n",
+       (float)stats->bit_use_coeffY[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_coeffY[P_SLICE]/(float)bit_use[P_SLICE][0], 0.);
+ 
+   if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+     fprintf(p_stat," Coeffs. C            | %10.2f     | %10.2f     | %10.2f     |\n",
+       (float)stats->bit_use_coeffC[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_coeffC[P_SLICE]/bit_use[P_SLICE][0], (float)stats->bit_use_coeffC[B_SLICE]/frame_ctr[B_SLICE]);
+   else
+     fprintf(p_stat," Coeffs. C            | %10.2f     | %10.2f     | %10.2f     |\n",
+       (float)stats->bit_use_coeffC[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_coeffC[P_SLICE]/bit_use[P_SLICE][0], 0.);
+ 
+   if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+     fprintf(p_stat," Delta quant          | %10.2f     | %10.2f     | %10.2f     |\n",
+       (float)stats->bit_use_delta_quant[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_delta_quant[P_SLICE]/bit_use[P_SLICE][0], (float)stats->bit_use_delta_quant[B_SLICE]/frame_ctr[B_SLICE]);
+   else
+     fprintf(p_stat," Delta quant          | %10.2f     | %10.2f     | %10.2f     |\n",
+       (float)stats->bit_use_delta_quant[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_delta_quant[P_SLICE]/bit_use[P_SLICE][0], 0.);
+ 
+   if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+     fprintf(p_stat," Stuffing Bits        | %10.2f     | %10.2f     | %10.2f     |\n",
+       (float)stats->bit_use_stuffingBits[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_stuffingBits[P_SLICE]/bit_use[P_SLICE][0], (float)stats->bit_use_stuffingBits[B_SLICE]/frame_ctr[B_SLICE]);
+   else
+     fprintf(p_stat," Stuffing Bits        | %10.2f     | %10.2f     | %10.2f     |\n",
+       (float)stats->bit_use_stuffingBits[I_SLICE]/bit_use[I_SLICE][0], (float)stats->bit_use_stuffingBits[P_SLICE]/bit_use[P_SLICE][0], 0.);
+ 
+ 
+ 
+   fprintf(p_stat," ---------------------|----------------|----------------|----------------|\n");
+ 
+   fprintf(p_stat," average bits/frame   |");
+ 
+   fprintf(p_stat," %10.2f     |", (float) bit_use[I_SLICE][1]/(float) bit_use[I_SLICE][0] );
+   fprintf(p_stat," %10.2f     |", (float) bit_use[P_SLICE][1]/(float) bit_use[P_SLICE][0] );
+ 
+   if(stats->successive_Bframe!=0 && frame_ctr[B_SLICE]!=0)
+     fprintf(p_stat," %10.2f     |", (float) bit_use[B_SLICE][1]/ (float) frame_ctr[B_SLICE] );
+   else fprintf(p_stat," %10.2f     |", 0.);
+ 
+   fprintf(p_stat,"\n");
+   fprintf(p_stat," ---------------------|----------------|----------------|----------------|\n");
+ 
+   fclose(p_stat);
+ 
+   // write to log file
+   if ((p_log=fopen("log.dat","r"))==0)                      // check if file exist
+   {
+     if ((p_log=fopen("log.dat","a"))==NULL)            // append new statistic at the end
+     {
+       snprintf(errortext, ET_SIZE, "Error open file %s  \n","log.dat");
+       error(errortext, 500);
+     }
+     else                                            // Create header for new log file
+     {
+       fprintf(p_log," ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ \n");
+       fprintf(p_log,"|                   Encoder statistics. This file is generated during first encoding session, new sessions will be appended                                                                                                                                                                            |\n");
+       fprintf(p_log," ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ \n");
+       fprintf(p_log,"|    ver    | Date  | Time  |         Sequence             | #Img |P/MbInt| QPI| QPP| QPB| Format  |Iperiod| #B | FMES | Hdmd | S.R |#Ref | Freq |Coding|RD-opt|Intra upd|8x8Tr| SNRY 1| SNRU 1| SNRV 1| SNRY N| SNRU N| SNRV N|#Bitr I|#Bitr P|#Bitr B|#Bitr IPB|     Total Time   |      Me Time     |\n");
+       fprintf(p_log," ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ \n");
+     }
+   }
+   else
+   {
+     fclose (p_log);
+     if ((p_log=fopen("log.dat","a"))==NULL)            // File exist,just open for appending
+     {
+       snprintf(errortext, ET_SIZE, "Error open file %s  \n","log.dat");
+       error(errortext, 500);
+     }
+   }
+   fprintf(p_log,"|%s/%-4s", VERSION, EXT_VERSION);
+ 
+ #ifdef WIN32
+   _strdate( timebuf );
+   fprintf(p_log,"| %1.5s |",timebuf );
+ 
+   _strtime( timebuf);
+   fprintf(p_log," % 1.5s |",timebuf);
+ #else
+   now = time ((time_t *) NULL); // Get the system time and put it into 'now' as 'calender time'
+   time (&now);
+   l_time = localtime (&now);
+   strftime (string, sizeof string, "%d-%b-%Y", l_time);
+   fprintf(p_log,"| %1.5s |",string );
+ 
+   strftime (string, sizeof string, "%H:%M:%S", l_time);
+   fprintf(p_log," %1.5s |",string );
+ #endif
+ 
+   for (i=0;i<30;i++)
+     name[i]=input->infile[i+max(0,((int)strlen(input->infile))-30)]; // write last part of path, max 20 chars
+   fprintf(p_log,"%30.30s|",name);
+ 
+   fprintf(p_log,"%5d |",input->no_frames);
+   fprintf(p_log,"  %d/%d  |",input->PicInterlace, input->MbInterlace);
+   fprintf(p_log," %-3d|",input->qp0);
+   fprintf(p_log," %-3d|",input->qpN);
+   fprintf(p_log," %-3d|",input->qpB);
+ 
+   fprintf(p_log,"%4dx%-4d|",input->img_width,input->img_height);
+ 
+   fprintf(p_log,"  %3d  |",input->intra_period);
+   fprintf(p_log,"%3d |",stats->successive_Bframe); 
+ 
+   if (input->FMEnable==1)
+     fprintf(p_log,"  HEX |");
+   else if (input->FMEnable==2)
+     fprintf(p_log," SHEX |");
+    else if (input->FMEnable == 3)
+     fprintf(p_log," EPZS |");
+   else
+     fprintf(p_log,"  OFF |");
+ 
+   switch (input->hadamard)
+   {
+   case 2:
+     fprintf(p_log,"  QPL |");
+     break;
+   case 1:
+     fprintf(p_log,"  ON  |");
+     break;
+   default:
+     fprintf(p_log,"  OFF |");
+     break;
+   }
+ 
+   fprintf(p_log," %3d |",input->search_range );
+ 
+   fprintf(p_log," %2d  |",input->num_ref_frames);
+ 
+ 
+ //  fprintf(p_log," %3d  |",img->framerate/(input->jumpd+1));
+     fprintf(p_log," %5.2f|",(img->framerate *(float) (stats->successive_Bframe + 1)) / (float)(input->jumpd+1));
+ 
+   if (input->symbol_mode == UVLC)
+     fprintf(p_log," CAVLC|");
+   else
+     fprintf(p_log," CABAC|");
+ 
+   fprintf(p_log,"   %d  |",input->rdopt);
+   
+   if (input->intra_upd==1)
+     fprintf(p_log,"   ON    |");
+   else
+     fprintf(p_log,"   OFF   |");
+ 
+   fprintf(p_log,"  %d  |",input->Transform8x8Mode);
+   
+   fprintf(p_log,"%7.3f|",snr->snr_y1);
+   fprintf(p_log,"%7.3f|",snr->snr_u1);
+   fprintf(p_log,"%7.3f|",snr->snr_v1);
+   fprintf(p_log,"%7.3f|",snr->snr_ya);
+   fprintf(p_log,"%7.3f|",snr->snr_ua);
+   fprintf(p_log,"%7.3f|",snr->snr_va);
+ /*
+   fprintf(p_log,"%-5.3f|",snr->snr_yt[I_SLICE]);
+   fprintf(p_log,"%-5.3f|",snr->snr_ut[I_SLICE]);
+   fprintf(p_log,"%-5.3f|",snr->snr_vt[I_SLICE]);
+   fprintf(p_log,"%-5.3f|",snr->snr_yt[P_SLICE]);
+   fprintf(p_log,"%-5.3f|",snr->snr_ut[P_SLICE]);
+   fprintf(p_log,"%-5.3f|",snr->snr_vt[P_SLICE]);
+   fprintf(p_log,"%-5.3f|",snr->snr_yt[B_SLICE]);
+   fprintf(p_log,"%-5.3f|",snr->snr_ut[B_SLICE]);
+   fprintf(p_log,"%-5.3f|",snr->snr_vt[B_SLICE]);
+ */
+   fprintf(p_log,"%7.0f|",stats->bitrate_I);
+   fprintf(p_log,"%7.0f|",stats->bitrate_P);
+   fprintf(p_log,"%7.0f|",stats->bitrate_B);
+   fprintf(p_log,"%9.0f|",stats->bitrate);
+ 
+   fprintf(p_log,"   %12d   |", tot_time);
+   fprintf(p_log,"   %12d   |\n", me_tot_time);
+ 
+   fclose(p_log);
+ 
+   p_log=fopen("data.txt","a");
+ 
+   if(stats->successive_Bframe != 0 && frame_ctr[B_SLICE] != 0) // B picture used
+   {
+     fprintf(p_log, "%3d %2d %2d %2.2f %2.2f %2.2f %5d "
+           "%2.2f %2.2f %2.2f %5d "
+         "%2.2f %2.2f %2.2f %5d %5d %.3f\n",
+         input->no_frames, input->qp0, input->qpN,
+         snr->snr_y1,
+         snr->snr_u1,
+         snr->snr_v1,
+         stats->bit_ctr_I,
+         0.0,
+         0.0,
+         0.0,
+         0,
+         snr->snr_ya,
+         snr->snr_ua,
+         snr->snr_va,
+         (stats->bit_ctr_I+stats->bit_ctr)/(input->no_frames+frame_ctr[B_SLICE]),
+         stats->bit_ctr_B/frame_ctr[B_SLICE],
+         (double)0.001*tot_time/(input->no_frames+frame_ctr[B_SLICE]));
+   }
+   else
+   {
+     if (input->no_frames!=0)
+     fprintf(p_log, "%3d %2d %2d %2.2f %2.2f %2.2f %5d "
+           "%2.2f %2.2f %2.2f %5d "
+         "%2.2f %2.2f %2.2f %5d %5d %.3f\n",
+         input->no_frames, input->qp0, input->qpN,
+         snr->snr_y1,
+         snr->snr_u1,
+         snr->snr_v1,
+         stats->bit_ctr_I,
+         0.0,
+         0.0,
+         0.0,
+         0,
+         snr->snr_ya,
+         snr->snr_ua,
+         snr->snr_va,
+         (stats->bit_ctr_I+stats->bit_ctr)/input->no_frames,
+         0,
+         (double)0.001*tot_time/input->no_frames);
+   }
+ 
+   fclose(p_log);
+ 
+   if (input->ReportFrameStats)
+   {
+     if ((p_log=fopen("stat_frame.dat","a"))==NULL)            // append new statistic at the end
+     {
+       snprintf(errortext, ET_SIZE, "Error open file %s  \n","stat_frame.dat.dat");
+       //    error(errortext, 500);
+     }
+     else
+     {
+       fprintf(p_log," --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- \n");
+       fclose(p_log);  
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Prints the header of the protocol.
+  * \par Input:
+  *    struct inp_par *inp
+  * \par Output:
+  *    none
+  ************************************************************************
+  */
+ void information_init()
+ {
+   char yuv_types[4][10]= {"YUV 4:0:0","YUV 4:2:0","YUV 4:2:2","YUV 4:4:4"};
+   if (input->Verbose == 0 || input->Verbose  == 1)
+     printf("------------------------------- JM %s %s --------------------------------\n",VERSION, EXT_VERSION);    
+   else   
+     printf("------------------------------- JM %s %s ------------------------------------------\n",VERSION, EXT_VERSION);
+   printf(" Input YUV file                    : %s \n",input->infile);
+   printf(" Output H.264 bitstream            : %s \n",input->outfile);
+   if (p_dec != -1)
+     printf(" Output YUV file                   : %s \n",input->ReconFile);
+   printf(" YUV Format                        : %s \n", &yuv_types[img->yuv_format][0]);//img->yuv_format==YUV422?"YUV 4:2:2":(img->yuv_format==YUV444)?"YUV 4:4:4":"YUV 4:2:0");
+   printf(" Frames to be encoded I-P/B        : %d/%d\n", input->no_frames, (input->successive_Bframe*(input->no_frames-1)));
+   printf(" PicInterlace / MbInterlace        : %d/%d\n", input->PicInterlace, input->MbInterlace);
+   printf(" Transform8x8Mode                  : %d\n", input->Transform8x8Mode);
+   
+   switch (input->Verbose) 
+   {
+     case 1:         
+       printf("-------------------------------------------------------------------------------\n");    
+       printf("  Frame  Bit/pic    QP   SnrY    SnrU    SnrV    Time(ms) MET(ms) Frm/Fld Ref  \n");
+       printf("-------------------------------------------------------------------------------\n");
+       break;
+     case 2:         
+       printf("---------------------------------------------------------------------------------------------\n");
+       printf("  Frame  Bit/pic WP QP   SnrY    SnrU    SnrV    Time(ms) MET(ms) Frm/Fld   I D L0 L1 RDP Ref\n");
+       printf("---------------------------------------------------------------------------------------------\n");
+       break;
+     case 0:
+     default:
+       printf("-------------------------------------------------------------------------------\n");
+       printf("\nEncoding. Please Wait.\n\n");
+       break;
+   }
+ }
+  
+ /*!
+  ************************************************************************
+  * \brief
+  *    memory allocation for original picture buffers
+  ************************************************************************
+  */
+ int init_orig_buffers()
+ {
+   int memory_size = 0;
+   
+   // allocate memory for reference frame buffers: imgY_org_frm, imgUV_org_frm
+   memory_size += get_mem2Dpel(&imgY_org_frm, img->height, img->width);
+ 
+   if (img->yuv_format != YUV400)
+     memory_size += get_mem3Dpel(&imgUV_org_frm, 2, img->height_cr, img->width_cr);
+ 
+ 
+   if(!active_sps->frame_mbs_only_flag)
+   {
+     // allocate memory for reference frame buffers: imgY_org, imgUV_org
+     init_top_bot_planes(imgY_org_frm, img->height, img->width, &imgY_org_top, &imgY_org_bot);
+ 
+     if (img->yuv_format != YUV400)
+     {
+       if(((imgUV_org_top) = (imgpel***)calloc(2,sizeof(imgpel**))) == NULL)
+         no_mem_exit("init_global_buffers: imgUV_org_top");
+       if(((imgUV_org_bot) = (imgpel***)calloc(2,sizeof(imgpel**))) == NULL)
+         no_mem_exit("init_global_buffers: imgUV_org_bot");
+ 
+       memory_size += 4*(sizeof(imgpel**));
+ 
+       memory_size += init_top_bot_planes(imgUV_org_frm[0], img->height_cr, img->width_cr, &(imgUV_org_top[0]), &(imgUV_org_bot[0]));
+       memory_size += init_top_bot_planes(imgUV_org_frm[1], img->height_cr, img->width_cr, &(imgUV_org_top[1]), &(imgUV_org_bot[1]));
+     }
+   }
+   return memory_size;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Dynamic memory allocation of frame size related global buffers
+  *    buffers are defined in global.h, allocated memory must be freed in
+  *    void free_global_buffers()
+  * \par Input:
+  *    Input Parameters struct inp_par *inp,                            \n
+  *    Image Parameters struct img_par *img
+  * \return Number of allocated bytes
+  ************************************************************************
+  */
+ int init_global_buffers()
+ {
+   int j,memory_size=0;
+ #ifdef _ADAPT_LAST_GROUP_
+   extern int *last_P_no_frm;
+   extern int *last_P_no_fld;
+ 
+   if ((last_P_no_frm = (int*)malloc(2*img->max_num_references*sizeof(int))) == NULL)
+     no_mem_exit("init_global_buffers: last_P_no");
+   if(!active_sps->frame_mbs_only_flag)
+     if ((last_P_no_fld = (int*)malloc(4*img->max_num_references*sizeof(int))) == NULL)
+       no_mem_exit("init_global_buffers: last_P_no");
+ #endif
+ 
+   memory_size += init_orig_buffers(); 
+     
+ 
+   if (input->WeightedPrediction || input->WeightedBiprediction || input->GenerateMultiplePPS)
+   {
+     // Currently only use up to 20 references. Need to use different indicator such as maximum num of references in list
+     memory_size += get_mem3Dint(&wp_weight,6,MAX_REFERENCE_PICTURES,3);
+     memory_size += get_mem3Dint(&wp_offset,6,MAX_REFERENCE_PICTURES,3);
+     
+     memory_size += get_mem4Dint(&wbp_weight, 6, MAX_REFERENCE_PICTURES, MAX_REFERENCE_PICTURES, 3);
+   }
+ 
+   // allocate memory for reference frames of each block: refFrArr
+ 
+   if(input->successive_Bframe!=0 || input->BRefPictures> 0)
+   {    
+     memory_size += get_mem3D     ((byte ****)(void*)(&direct_ref_idx), 2, img->height >> BLOCK_SHIFT, img->width >> BLOCK_SHIFT);
+     memory_size += get_mem2Dshort(&direct_pdir, img->height >> BLOCK_SHIFT, img->width >> BLOCK_SHIFT);
+   }
+ 
+   // allocate memory for temp quarter pel luma frame buffer: img4Y_tmp
+   // int img4Y_tmp[576][704];  (previously int imgY_tmp in global.h)
+   memory_size += get_mem2Dint(&img4Y_tmp, img->height+2*IMG_PAD_SIZE, (img->width+2*IMG_PAD_SIZE)*4);
+ 
+   if (input->rdopt==3)
+   {
+     memory_size += get_mem2Dint(&decs->resY, MB_BLOCK_SIZE, MB_BLOCK_SIZE);
+     if ((decs->decref = (imgpel****) calloc(input->NoOfDecoders,sizeof(imgpel***))) == NULL) 
+       no_mem_exit("init_global_buffers: decref");
+     for (j=0 ; j<input->NoOfDecoders; j++)
+     {
+       memory_size += get_mem3Dpel(&decs->decref[j], img->max_num_references+1, img->height, img->width);
+     }
+     memory_size += get_mem2Dpel(&decs->RefBlock, BLOCK_SIZE,BLOCK_SIZE);
+     memory_size += get_mem3Dpel(&decs->decY, input->NoOfDecoders, img->height, img->width);
+     memory_size += get_mem3Dpel(&decs->decY_best, input->NoOfDecoders, img->height, img->width);
+     memory_size += get_mem2D(&decs->status_map, img->height/MB_BLOCK_SIZE,img->width/MB_BLOCK_SIZE);
+     memory_size += get_mem2D(&decs->dec_mb_mode, img->width/MB_BLOCK_SIZE,img->height/MB_BLOCK_SIZE);
+   }
+   if (input->RestrictRef)
+   {
+     memory_size += get_mem2D(&pixel_map, img->height,img->width);
+     memory_size += get_mem2D(&refresh_map, img->height/8,img->width/8);
+   }
+ 
+   if(!active_sps->frame_mbs_only_flag)
+   {
+     memory_size += get_mem2Dpel(&imgY_com, img->height, img->width);
+ 
+     if (img->yuv_format != YUV400)
+     {
+       memory_size += get_mem3Dpel(&imgUV_com, 2, img->height_cr, img->width_cr);
+     }
+   }
+ 
+   if (input->FMEnable == 1)
+   {
+     memory_size += get_mem_FME();
+   }
+   else if (input->FMEnable == 2)
+   {
+     simplified_init_FME();
+     memory_size += simplified_get_mem_FME();
+   }
+   else if (input->FMEnable == 3)
+     memory_size += EPZSInit();
+ 
+ 
+   if (input->RCEnable)
+   {
+     rc_alloc();
+   }
+ 
+   return (memory_size);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free allocated memory of original picture buffers
+  ************************************************************************
+  */
+ void free_orig_planes()
+ {
+   free_mem2Dpel(imgY_org_frm);      // free ref frame buffers
+ 
+   if (img->yuv_format != YUV400)
+     free_mem3Dpel(imgUV_org_frm, 2);
+ 
+ 
+   if(!active_sps->frame_mbs_only_flag)
+   {
+     free_top_bot_planes(imgY_org_top, imgY_org_bot);
+ 
+     if (img->yuv_format != YUV400)
+     {
+       free_top_bot_planes(imgUV_org_top[0], imgUV_org_bot[0]);
+       free_top_bot_planes(imgUV_org_top[1], imgUV_org_bot[1]);
+       free (imgUV_org_top);
+       free (imgUV_org_bot);
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free allocated memory of frame size related global buffers
+  *    buffers are defined in global.h, allocated memory is allocated in
+  *    int get_mem4global_buffers()
+  * \par Input:
+  *    Input Parameters struct inp_par *inp,                             \n
+  *    Image Parameters struct img_par *img
+  * \par Output:
+  *    none
+  ************************************************************************
+  */
+ void free_global_buffers()
+ {
+   int  i,j;
+ 
+ #ifdef _ADAPT_LAST_GROUP_
+   extern int *last_P_no_frm;
+   extern int *last_P_no_fld;
+   free (last_P_no_frm);
+   free (last_P_no_fld);
+ #endif
+ 
+   free_orig_planes();
+ 
+   // Free Qmatrices and offsets
+   free_QMatrix();
+   free_QOffsets();
+ 
+   if (input->WeightedPrediction || input->WeightedBiprediction || input->GenerateMultiplePPS)
+   {
+     free_mem3Dint(wp_weight,6);
+     free_mem3Dint(wp_offset,6);
+     free_mem4Dint(wbp_weight,6,MAX_REFERENCE_PICTURES);
+   }
+ 
+   if(stats->successive_Bframe!=0 || input->BRefPictures> 0)
+   {
+     free_mem3D     ((byte ***)direct_ref_idx,2);
+     free_mem2Dshort(direct_pdir);
+   } // end if B frame
+ 
+ 
+   free_mem2Dint(img4Y_tmp);    // free temp quarter pel frame buffer
+ 
+   // free mem, allocated in init_img()
+   // free intra pred mode buffer for blocks
+   free_mem2D((byte**)img->ipredmode);
+   free_mem2D((byte**)img->ipredmode8x8);
+   free(img->mb_data);
+ 
+   free_mem2D((byte**)rddata_top_frame_mb.ipredmode);
+   
+   if(input->UseConstrainedIntraPred)
+   {
+     free (img->intra_block);
+   }
+ 
+   if (input->rdopt==3)
+   {
+     free(decs->resY[0]);
+     free(decs->resY);
+     free(decs->RefBlock[0]);
+     free(decs->RefBlock);
+     for (j=0; j<input->NoOfDecoders; j++)
+     {
+       free(decs->decY[j][0]);
+       free(decs->decY[j]);
+       free(decs->decY_best[j][0]);
+       free(decs->decY_best[j]);
+       for (i=0; i<img->max_num_references+1; i++)
+       {
+         free(decs->decref[j][i][0]);
+         free(decs->decref[j][i]);
+       }
+       free(decs->decref[j]);
+     }
+     free(decs->decY);
+     free(decs->decY_best);
+     free(decs->decref);
+     free(decs->status_map[0]);
+     free(decs->status_map);
+     free(decs->dec_mb_mode[0]);
+     free(decs->dec_mb_mode);
+   }
+   if (input->RestrictRef)
+   {
+     free(pixel_map[0]);
+     free(pixel_map);
+     free(refresh_map[0]);
+     free(refresh_map);
+   }
+ 
+   if(!active_sps->frame_mbs_only_flag)
+   {
+     free_mem2Dpel(imgY_com);
+     if (img->yuv_format != YUV400)
+     {
+       free_mem3Dpel(imgUV_com,2);
+     }
+   }
+ 
+   free_mem3Dint(img->nz_coeff, img->FrameSizeInMbs);
+ 
+   if(input->FMEnable == 1)
+   {
+     free_mem_FME();
+   }
+   else if (input->FMEnable == 2)
+   {
+     simplified_free_mem_FME();
+   }
+   else if (input->FMEnable == 3)
+   {
+     EPZSDelete();
+   }
+ 
+ 
+   if (input->RCEnable)
+   {
+     rc_free();
+   }
+ 
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate memory for mv
+  * \par Input:
+  *    Image Parameters struct img_par *img                             \n
+  *    int****** mv
+  * \return memory size in bytes
+  ************************************************************************
+  */
+ int get_mem_mv (short ******* mv)
+ {
+   int i, j, k, l, m;
+ 
+   if ((*mv = (short******)calloc(4,sizeof(short*****))) == NULL)
+     no_mem_exit ("get_mem_mv: mv");
+   for (i=0; i<4; i++)
+   {
+     if (((*mv)[i] = (short*****)calloc(4,sizeof(short****))) == NULL)
+       no_mem_exit ("get_mem_mv: mv");
+     for (j=0; j<4; j++)
+     {
+       if (((*mv)[i][j] = (short****)calloc(2,sizeof(short***))) == NULL)
+         no_mem_exit ("get_mem_mv: mv");
+       for (k=0; k<2; k++)
+       {
+         if (((*mv)[i][j][k] = (short***)calloc(img->max_num_references,sizeof(short**))) == NULL)
+           no_mem_exit ("get_mem_mv: mv");
+         for (l=0; l<img->max_num_references; l++)
+         {
+           if (((*mv)[i][j][k][l] = (short**)calloc(9,sizeof(short*))) == NULL)
+             no_mem_exit ("get_mem_mv: mv");
+           if (((*mv)[i][j][k][l][0] = (short*)calloc(2*9,sizeof(short))) == NULL)
+             no_mem_exit ("get_mem_mv: mv");
+           for (m=1; m<9; m++)
+             (*mv)[i][j][k][l][m] = (*mv)[i][j][k][l][m - 1] + 2;
+         }
+       }
+     }
+   }
+   return 4*4*img->max_num_references*9*2*sizeof(short);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free memory from mv
+  * \par Input:
+  *    int****** mv
+  ************************************************************************
+  */
+ void free_mem_mv (short****** mv)
+ {
+   int i, j, k, l;
+ 
+   for (i=0; i<4; i++)
+   {
+     for (j=0; j<4; j++)
+     {
+       for (k=0; k<2; k++)
+       {
+         for (l=0; l<img->max_num_references; l++)
+         {
+           free (mv[i][j][k][l][0]);
+           free (mv[i][j][k][l]);
+         }
+         free (mv[i][j][k]);
+       }
+       free (mv[i][j]);
+     }
+     free (mv[i]);
+   }
+   free (mv);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate memory for AC coefficients
+  ************************************************************************
+  */
+ int get_mem_ACcoeff (int***** cofAC)
+ {
+   int i, j, k;
+   int num_blk8x8 = 4 + img->num_blk8x8_uv;
+ 
+   if ((*cofAC = (int****)calloc (num_blk8x8, sizeof(int***))) == NULL)              no_mem_exit ("get_mem_ACcoeff: cofAC");
+   for (k=0; k<num_blk8x8; k++)
+   {
+     if (((*cofAC)[k] = (int***)calloc (4, sizeof(int**))) == NULL)         no_mem_exit ("get_mem_ACcoeff: cofAC");
+     for (j=0; j<4; j++)
+     {
+       if (((*cofAC)[k][j] = (int**)calloc (2, sizeof(int*))) == NULL)      no_mem_exit ("get_mem_ACcoeff: cofAC");
+       for (i=0; i<2; i++)
+       {
+         if (((*cofAC)[k][j][i] = (int*)calloc (65, sizeof(int))) == NULL)  no_mem_exit ("get_mem_ACcoeff: cofAC"); // 18->65 for ABT
+       }
+     }
+   }
+   return num_blk8x8*4*2*65*sizeof(int);// 18->65 for ABT
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate memory for DC coefficients
+  ************************************************************************
+  */
+ int get_mem_DCcoeff (int**** cofDC)
+ {
+   int j, k;
+ 
+   if ((*cofDC = (int***)calloc (3, sizeof(int**))) == NULL)           no_mem_exit ("get_mem_DCcoeff: cofDC");
+   for (k=0; k<3; k++)
+   {
+     if (((*cofDC)[k] = (int**)calloc (2, sizeof(int*))) == NULL)      no_mem_exit ("get_mem_DCcoeff: cofDC");
+     for (j=0; j<2; j++)
+     {
+       if (((*cofDC)[k][j] = (int*)calloc (65, sizeof(int))) == NULL)  no_mem_exit ("get_mem_DCcoeff: cofDC"); // 18->65 for ABT
+     }
+   }
+   return 3*2*65*sizeof(int); // 18->65 for ABT
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free memory of AC coefficients
+  ************************************************************************
+  */
+ void free_mem_ACcoeff (int**** cofAC)
+ {
+   int i, j, k;
+ 
+   for (k=0; k<4+img->num_blk8x8_uv; k++)
+   {
+     for (i=0; i<4; i++)
+     {
+       for (j=0; j<2; j++)
+       {
+         free (cofAC[k][i][j]);
+       }
+       free (cofAC[k][i]);
+     }
+     free (cofAC[k]);
+   }
+   free (cofAC);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free memory of DC coefficients
+  ************************************************************************
+  */
+ void free_mem_DCcoeff (int*** cofDC)
+ {
+   int i, j;
+ 
+   for (j=0; j<3; j++)
+   {
+     for (i=0; i<2; i++)
+     {
+       free (cofDC[j][i]);
+     }
+     free (cofDC[j]);
+   }
+   free (cofDC);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    form frame picture from two field pictures 
+  ************************************************************************
+  */
+ void combine_field()
+ {
+   int i;
+ 
+   for (i=0; i<img->height / 2; i++)
+   {
+     memcpy(imgY_com[i*2], enc_top_picture->imgY[i], img->width*sizeof(imgpel));     // top field
+     memcpy(imgY_com[i*2 + 1], enc_bottom_picture->imgY[i], img->width*sizeof(imgpel)); // bottom field
+   }
+ 
+   if (img->yuv_format != YUV400)
+   {
+     for (i=0; i<img->height_cr / 2; i++)
+     {
+       memcpy(imgUV_com[0][i*2],     enc_top_picture->imgUV[0][i],    img->width_cr*sizeof(imgpel));
+       memcpy(imgUV_com[0][i*2 + 1], enc_bottom_picture->imgUV[0][i], img->width_cr*sizeof(imgpel));
+       memcpy(imgUV_com[1][i*2],     enc_top_picture->imgUV[1][i],    img->width_cr*sizeof(imgpel));
+       memcpy(imgUV_com[1][i*2 + 1], enc_bottom_picture->imgUV[1][i], img->width_cr*sizeof(imgpel));
+     }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    RD decision of frame and field coding 
+  ************************************************************************
+  */
+ int decide_fld_frame(float snr_frame_Y, float snr_field_Y, int bit_field, int bit_frame, double lambda_picture)
+ {
+   double cost_frame, cost_field;
+ 
+   cost_frame = bit_frame * lambda_picture + snr_frame_Y;
+   cost_field = bit_field * lambda_picture + snr_field_Y;
+ 
+   if (cost_field > cost_frame)
+     return (0);
+   else
+     return (1);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Do some initializaiton work for encoding the 2nd IGOP
+  ************************************************************************
+  */
+ void process_2nd_IGOP()
+ {
+   Boolean FirstIGOPFinished = FALSE;
+   if ( img->number == input->no_frames-1 )
+     FirstIGOPFinished = TRUE;
+   if (input->NumFrameIn2ndIGOP==0) return;
+   if (!FirstIGOPFinished || In2ndIGOP) return;
+   In2ndIGOP = TRUE;
+ 
+ //  img->number = -1;
+   start_frame_no_in_this_IGOP = input->no_frames;
+   start_tr_in_this_IGOP = (input->no_frames-1)*(input->jumpd+1) +1;
+   input->no_frames = input->no_frames + input->NumFrameIn2ndIGOP;
+ 
+ /*  reset_buffers();
+ 
+   frm->picbuf_short[0]->used=0;
+   frm->picbuf_short[0]->picID=-1;
+   frm->picbuf_short[0]->lt_picID=-1;
+   frm->short_used = 0; */
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set the image type for I,P and SP pictures (not B!)
+  ************************************************************************
+  */
+ void SetImgType()
+ {
+   int intra_refresh = input->intra_period == 0 ? (IMG_NUMBER == 0) : ((IMG_NUMBER%input->intra_period) == 0);
+   
+   if (intra_refresh)
+   {
+     img->type = I_SLICE;        // set image type for first image to I-frame
+   }
+   else
+   {
+     img->type = input->sp_periodicity && ((IMG_NUMBER % input->sp_periodicity) ==0) ? SP_SLICE : ((input->BRefPictures == 2) ? B_SLICE : P_SLICE);      
+   }  
+ }
+ 
+  
+ void SetLevelIndices()
+ {
+   switch(active_sps->level_idc)
+   {
+   case 10:         
+     img->LevelIndex=0;
+     break;
+   case 11:         
+     if (active_sps->constrained_set3_flag == 0)
+       img->LevelIndex=2;
+     else
+       img->LevelIndex=1;
+     break;
+   case 12:         
+     img->LevelIndex=3;
+     break;
+   case 13:         
+     img->LevelIndex=4;
+     break;
+   case 20:         
+     img->LevelIndex=5;
+     break;
+   case 21:         
+     img->LevelIndex=6;
+     break;
+   case 22:         
+     img->LevelIndex=7;
+     break;
+   case 30:         
+     img->LevelIndex=8;
+     break;
+   case 31:         
+     img->LevelIndex=9;
+     break;
+   case 32:         
+     img->LevelIndex=10;
+     break;
+   case 40:         
+     img->LevelIndex=11;
+     break;
+   case 41:         
+     img->LevelIndex=12;
+     break;
+   case 42:         
+     if (active_sps->profile_idc <= 88)
+       img->LevelIndex=13;
+     else
+       img->LevelIndex=14;
+     break;
+   case 50:         
+     img->LevelIndex=15;
+     break;
+   case 51:
+   default:
+     img->LevelIndex=16;
+     break;
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/loopFilter.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/loopFilter.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/loopFilter.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,482 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file loopFilter.c
+  *
+  * \brief
+  *    Filter to reduce blocking artifacts on a macroblock level.
+  *    The filter strengh is QP dependent.
+  *
+  * \author
+  *    Contributors:
+  *    - Peter List       Peter.List at t-systems.de:  Original code                                 (13-Aug-2001)
+  *    - Jani Lainema     Jani.Lainema at nokia.com:   Some bug fixing, removal of recusiveness      (16-Aug-2001)
+  *    - Peter List       Peter.List at t-systems.de:  inplace filtering and various simplifications (10-Jan-2002)
+  *    - Anthony Joch     anthony at ubvideo.com:      Simplified switching between filters and 
+  *                                                 non-recursive default filter.                 (08-Jul-2002)
+  *    - Cristina Gomila  cristina.gomila at thomson.net: Simplification of the chroma deblocking
+  *                                                    from JVT-E089                              (21-Nov-2002)
+  *************************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <assert.h>
+ 
+ #include "global.h"
+ 
+ #include "image.h"
+ #include "mb_access.h"
+ 
+ extern const byte QP_SCALE_CR[52] ;
+ 
+ byte mixedModeEdgeFlag, fieldModeFilteringFlag;
+ 
+ /*********************************************************************************************************/
+ 
+ #define  IClip( Min, Max, Val) (((Val)<(Min))? (Min):(((Val)>(Max))? (Max):(Val)))
+ 
+ 
+ // NOTE: In principle, the alpha and beta tables are calculated with the formulas below
+ //       Alpha( qp ) = 0.8 * (2^(qp/6)  -  1)
+ //       Beta ( qp ) = 0.5 * qp  -  7
+ 
+ // The tables actually used have been "hand optimized" though (by Anthony Joch). So, the
+ // table values might be a little different to formula-generated values. Also, the first
+ // few values of both tables is set to zero to force the filter off at low qp’s 
+ 
+ byte ALPHA_TABLE[52]  = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,4,5,6,  7,8,9,10,12,13,15,17,  20,22,25,28,32,36,40,45,  50,56,63,71,80,90,101,113,  127,144,162,182,203,226,255,255} ;
+ byte  BETA_TABLE[52]  = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,2,2,2,3,  3,3,3, 4, 4, 4, 6, 6,   7, 7, 8, 8, 9, 9,10,10,  11,11,12,12,13,13, 14, 14,   15, 15, 16, 16, 17, 17, 18, 18} ;
+ byte CLIP_TAB[52][5]  =
+ {
+   { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},
+   { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},
+   { 0, 0, 0, 0, 0},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 1, 1, 1, 1},
+   { 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 2, 3, 3},
+   { 0, 1, 2, 3, 3},{ 0, 2, 2, 3, 3},{ 0, 2, 2, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 3, 3, 5, 5},{ 0, 3, 4, 6, 6},{ 0, 3, 4, 6, 6},
+   { 0, 4, 5, 7, 7},{ 0, 4, 5, 8, 8},{ 0, 4, 6, 9, 9},{ 0, 5, 7,10,10},{ 0, 6, 8,11,11},{ 0, 6, 8,13,13},{ 0, 7,10,14,14},{ 0, 8,11,16,16},
+   { 0, 9,12,18,18},{ 0,10,13,20,20},{ 0,11,15,23,23},{ 0,13,17,25,25}
+ } ;
+ 
+   
+ char chroma_edge[2][4][4] = //[dir][edge][yuv_format]
+ { { {-1, 0, 0, 0},
+     {-1,-1,-1, 1},
+     {-1, 1, 1, 2},
+     {-1,-1,-1, 3}},
+ 
+   { {-1, 0, 0, 0},
+     {-1,-1, 1, 1},
+     {-1, 1, 2, 2},
+     {-1,-1, 3, 3}}};
+ 
+   
+ void GetStrength(byte Strength[16],ImageParameters *img,int MbQAddr,int dir,int edge, int mvlimit);
+ void EdgeLoop(imgpel** Img, byte Strength[16],ImageParameters *img, int MbQAddr, int AlphaC0Offset, int BetaOffset, int dir, int edge, int width, int yuv, int uv);
+ void DeblockMb(ImageParameters *img, imgpel **imgY, imgpel ***imgUV, int MbQAddr) ;
+ 
+ 
+ /*!
+  *****************************************************************************************
+  * \brief
+  *    Filter all macroblocks in order of increasing macroblock address.
+  *****************************************************************************************
+  */
+ void DeblockFrame(ImageParameters *img, imgpel **imgY, imgpel ***imgUV)
+ {
+   unsigned i;
+ 
+   for (i=0; i<img->PicSizeInMbs; i++)
+   {
+     DeblockMb( img, imgY, imgUV, i ) ;
+   }
+ } 
+ 
+ 
+ /*!
+  *****************************************************************************************
+  * \brief
+  *    Deblocking filter for one macroblock.
+  *****************************************************************************************
+  */
+ 
+ void DeblockMb(ImageParameters *img, imgpel **imgY, imgpel ***imgUV, int MbQAddr)
+ {
+   int           EdgeCondition;
+   int           dir,edge;
+   byte          Strength[16];
+   int           mb_x, mb_y;
+ 
+   int           filterNon8x8LumaEdgesFlag[4] = {1,1,1,1};
+   int           filterLeftMbEdgeFlag;
+   int           filterTopMbEdgeFlag;
+   int           fieldModeMbFlag;
+   int           mvlimit=4;
+   int           i, StrengthSum;
+   Macroblock    *MbQ;
+ 
+   int           edge_cr;  //ADD-VG-15052004
+   
+   img->DeblockCall = 1;
+   get_mb_pos (MbQAddr, &mb_x, &mb_y);
+   filterLeftMbEdgeFlag  = (mb_x != 0);
+   filterTopMbEdgeFlag   = (mb_y != 0);
+ 
+   MbQ  = &(img->mb_data[MbQAddr]) ; // current Mb
+ 
+   if (MbQ->mb_type == I8MB)
+     assert(MbQ->luma_transform_size_8x8_flag);
+   
+   filterNon8x8LumaEdgesFlag[1] = 
+   filterNon8x8LumaEdgesFlag[3] = !(MbQ->luma_transform_size_8x8_flag);
+     
+   if (img->MbaffFrameFlag && mb_y==16 && MbQ->mb_field)
+     filterTopMbEdgeFlag = 0;
+ 
+   fieldModeMbFlag       = (img->structure!=FRAME) || (img->MbaffFrameFlag && MbQ->mb_field);
+   if (fieldModeMbFlag)
+     mvlimit = 2;
+ 
+   // return, if filter is disabled
+   if (MbQ->LFDisableIdc==1) {
+     img->DeblockCall = 0;
+     return;
+   }
+ 
+   if (MbQ->LFDisableIdc==2)
+   {
+     // don't filter at slice boundaries
+     filterLeftMbEdgeFlag = MbQ->mbAvailA;
+     // if this the bottom of a frame macroblock pair then always filter the top edge
+     if (img->MbaffFrameFlag && !MbQ->mb_field && (MbQAddr % 2)) filterTopMbEdgeFlag  = 1;
+     else                                                      filterTopMbEdgeFlag  = MbQ->mbAvailB;
+   }
+ 
+   img->current_mb_nr = MbQAddr;
+   CheckAvailabilityOfNeighbors();
+ 
+   for( dir=0 ; dir<2 ; dir++ )                                             // vertical edges, than horicontal edges
+   {
+     EdgeCondition = (dir && filterTopMbEdgeFlag) || (!dir && filterLeftMbEdgeFlag); // can not filter beyond picture boundaries
+     for( edge=0 ; edge<4 ; edge++ )                                            // first 4 vertical strips of 16 pel
+     {                                                                                         // then  4 horicontal
+       if( edge || EdgeCondition )
+       {
+         edge_cr = chroma_edge[dir][edge][img->yuv_format];
+         
+         GetStrength(Strength,img,MbQAddr,dir,edge, mvlimit); // Strength for 4 blks in 1 stripe
+         StrengthSum = Strength[0];
+         for (i = 1; i < 16; i++) StrengthSum += Strength[i];
+         if( StrengthSum )                      // only if one of the 16 Strength bytes is != 0
+         {
+           if (filterNon8x8LumaEdgesFlag[edge])
+             EdgeLoop( imgY, Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, edge, img->width, 0, 0) ; 
+           if( (imgUV != NULL) && (edge_cr >= 0))
+           {
+             EdgeLoop( imgUV[0], Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, edge_cr, img->width_cr, 1 , 0) ; 
+             EdgeLoop( imgUV[1], Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, edge_cr, img->width_cr, 1 , 1) ; 
+           }
+         }
+ 
+         if (dir && !edge && !MbQ->mb_field && mixedModeEdgeFlag) {
+           // this is the extra horizontal edge between a frame macroblock pair and a field above it
+           img->DeblockCall = 2;
+           GetStrength(Strength,img,MbQAddr,dir,4, mvlimit); // Strength for 4 blks in 1 stripe
+           //if( *((int*)Strength) )                      // only if one of the 4 Strength bytes is != 0
+           {
+             if (filterNon8x8LumaEdgesFlag[edge])
+               EdgeLoop( imgY, Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, 4, img->width, 0, 0) ; 
+             if( (imgUV != NULL) && (edge_cr >= 0))
+             {
+               EdgeLoop( imgUV[0], Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, 4, img->width_cr, 1, 0) ; 
+               EdgeLoop( imgUV[1], Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, 4, img->width_cr, 1, 1) ; 
+             }
+           }
+           img->DeblockCall = 1;
+         }
+         
+       }
+     }//end edge
+   }//end loop dir
+   img->DeblockCall = 0;   
+ 
+ }
+ 
+   /*!
+  *********************************************************************************************
+  * \brief
+  *    returns a buffer of 16 Strength values for one stripe in a mb (for different Frame types)
+  *********************************************************************************************
+  */
+ 
+ int  ININT_STRENGTH[4] = {0x04040404, 0x03030303, 0x03030303, 0x03030303} ; 
+ byte BLK_NUM[2][4][4]  = {{{0,4,8,12},{1,5,9,13},{2,6,10,14},{3,7,11,15}},{{0,1,2,3},{4,5,6,7},{8,9,10,11},{12,13,14,15}}} ;
+ byte BLK_4_TO_8[16]    = {0,0,1,1,0,0,1,1,2,2,3,3,2,2,3,3} ;
+ #define ANY_INTRA (MbP->mb_type==I4MB||MbP->mb_type==I16MB||MbP->mb_type==IPCM||MbQ->mb_type==I4MB||MbQ->mb_type==I16MB||MbQ->mb_type==IPCM)
+ 
+ void GetStrength(byte Strength[16],ImageParameters *img,int MbQAddr,int dir,int edge, int mvlimit)
+ {
+   int    blkP, blkQ, idx;
+   int    blk_x, blk_x2, blk_y, blk_y2 ;
+   short  ***list0_mv = enc_picture->mv[LIST_0];
+   short  ***list1_mv = enc_picture->mv[LIST_1];
+   char   **list0_refIdxArr = enc_picture->ref_idx[LIST_0];
+   char   **list1_refIdxArr = enc_picture->ref_idx[LIST_1];
+   int64  **list0_refPicIdArr = enc_picture->ref_pic_id[LIST_0];
+   int64  **list1_refPicIdArr = enc_picture->ref_pic_id[LIST_1];
+   int    xQ, xP, yQ, yP;
+   int    mb_x, mb_y;
+   Macroblock    *MbQ;
+   Macroblock    *MbP;
+   PixelPos pixP;
+ 
+   MbQ = &(img->mb_data[MbQAddr]);
+ 
+   for( idx=0 ; idx<16 ; idx++ )
+   {                                                                
+     xQ = dir ? idx : edge << 2;
+     yQ = dir ? (edge < 4 ? edge << 2 : 1) : idx;
+     getNeighbour(MbQAddr, xQ - (1 - dir), yQ - dir, 1, &pixP);
+     xP = pixP.x;
+     yP = pixP.y;
+     MbP = &(img->mb_data[pixP.mb_addr]);
+     mixedModeEdgeFlag = MbQ->mb_field != MbP->mb_field;
+ 
+     blkQ = ((yQ>>2)<<2) + (xQ>>2);
+     blkP = ((yP>>2)<<2) + (xP>>2);
+ 
+     if ((img->type==SP_SLICE)||(img->type==SI_SLICE) )
+     {
+       Strength[idx] = (edge == 0 && (((!img->MbaffFrameFlag && (img->structure==FRAME)) ||
+       (img->MbaffFrameFlag && !MbP->mb_field && !MbQ->mb_field)) ||
+       ((img->MbaffFrameFlag || (img->structure!=FRAME)) && !dir))) ? 4 : 3;
+     }
+     else
+     {
+       // Start with Strength=3. or Strength=4 for Mb-edge
+       Strength[idx] = (edge == 0 && (((!img->MbaffFrameFlag && (img->structure==FRAME)) ||
+         (img->MbaffFrameFlag && !MbP->mb_field && !MbQ->mb_field)) ||
+         ((img->MbaffFrameFlag || (img->structure!=FRAME)) && !dir))) ? 4 : 3;
+ 
+       if(  !(MbP->mb_type==I4MB || MbP->mb_type==I16MB || MbP->mb_type==IPCM || MbP->mb_type==I8MB)
+         && !(MbQ->mb_type==I4MB || MbQ->mb_type==I16MB || MbQ->mb_type==IPCM || MbQ->mb_type==I8MB) )
+       {
+         if( ((MbQ->cbp_blk &  (1 << blkQ )) != 0) || ((MbP->cbp_blk &  (1 << blkP)) != 0) )
+           Strength[idx] = 2 ;
+         else
+         {                                                     // if no coefs, but vector difference >= 1 set Strength=1 
+           // if this is a mixed mode edge then one set of reference pictures will be frame and the
+           // other will be field
+           if (mixedModeEdgeFlag)
+           {
+             (Strength[idx] = 1);
+           }
+           else
+           {
+           get_mb_block_pos (MbQAddr, &mb_x, &mb_y);
+           blk_y  = (mb_y<<2) + (blkQ >> 2) ;
+           blk_x  = (mb_x<<2) + (blkQ  & 3) ;
+           blk_y2 = pixP.pos_y >> 2;
+           blk_x2 = pixP.pos_x >> 2;
+           {
+               int64 ref_p0,ref_p1,ref_q0,ref_q1;      
+               ref_p0 = list0_refIdxArr[blk_y][blk_x]<0   ? INT64_MIN : list0_refPicIdArr[blk_y][blk_x];
+               ref_q0 = list0_refIdxArr[blk_y2][blk_x2]<0 ? INT64_MIN : list0_refPicIdArr[blk_y2][blk_x2];
+               ref_p1 = list1_refIdxArr[blk_y][blk_x]<0   ? INT64_MIN : list1_refPicIdArr[blk_y][blk_x];
+               ref_q1 = list1_refIdxArr[blk_y2][blk_x2]<0 ? INT64_MIN : list1_refPicIdArr[blk_y2][blk_x2];
+               if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) ||
+                 ((ref_p0==ref_q1) && (ref_p1==ref_q0))) 
+               {
+                 Strength[idx]=0;
+                 // L0 and L1 reference pictures of p0 are different; q0 as well
+                 if (ref_p0 != ref_p1) 
+                 { 
+                   // compare MV for the same reference picture
+                   if (ref_p0==ref_q0) 
+                   {
+                     Strength[idx] =  
+                       ( (abs( list0_mv[blk_y][blk_x][0] - list0_mv[blk_y2][blk_x2][0]) >= 4) 
+                       | (abs( list0_mv[blk_y][blk_x][1] - list0_mv[blk_y2][blk_x2][1]) >= mvlimit) 
+                       | (abs( list1_mv[blk_y][blk_x][0] - list1_mv[blk_y2][blk_x2][0]) >= 4) 
+                       | (abs( list1_mv[blk_y][blk_x][1] - list1_mv[blk_y2][blk_x2][1]) >= mvlimit));
+                   }
+                   else 
+                   {
+                     Strength[idx] =  
+                       ( (abs( list0_mv[blk_y][blk_x][0] - list1_mv[blk_y2][blk_x2][0]) >= 4) 
+                       | (abs( list0_mv[blk_y][blk_x][1] - list1_mv[blk_y2][blk_x2][1]) >= mvlimit) 
+                       | (abs( list1_mv[blk_y][blk_x][0] - list0_mv[blk_y2][blk_x2][0]) >= 4) 
+                       | (abs( list1_mv[blk_y][blk_x][1] - list0_mv[blk_y2][blk_x2][1]) >= mvlimit));
+                   } 
+                 }
+                 else 
+                 { // L0 and L1 reference pictures of p0 are the same; q0 as well
+                 
+                   Strength[idx] =  
+                     ( (abs( list0_mv[blk_y][blk_x][0] - list0_mv[blk_y2][blk_x2][0]) >= 4) 
+                     | (abs( list0_mv[blk_y][blk_x][1] - list0_mv[blk_y2][blk_x2][1]) >= mvlimit ) 
+                     | (abs( list1_mv[blk_y][blk_x][0] - list1_mv[blk_y2][blk_x2][0]) >= 4) 
+                     | (abs( list1_mv[blk_y][blk_x][1] - list1_mv[blk_y2][blk_x2][1]) >= mvlimit))
+                     &&
+                     ( (abs( list0_mv[blk_y][blk_x][0] - list1_mv[blk_y2][blk_x2][0]) >= 4) 
+                     | (abs( list0_mv[blk_y][blk_x][1] - list1_mv[blk_y2][blk_x2][1]) >= mvlimit) 
+                     | (abs( list1_mv[blk_y][blk_x][0] - list0_mv[blk_y2][blk_x2][0]) >= 4) 
+                     | (abs( list1_mv[blk_y][blk_x][1] - list0_mv[blk_y2][blk_x2][1]) >= mvlimit));
+                 }       
+               }
+               else 
+               {
+                 Strength[idx] = 1;        
+               } 
+             }
+           }
+         }
+       }
+     }
+   }
+ }
+ 
+ #define CQPOF(qp, uv) (Clip3(0, 51, qp + img->chroma_qp_offset[uv]))
+ 
+ /*!
+  *****************************************************************************************
+  * \brief
+  *    Filters one edge of 16 (luma) or 8 (chroma) pel
+  *****************************************************************************************
+  */
+ void EdgeLoop(imgpel** Img, byte Strength[16],ImageParameters *img, int MbQAddr, int AlphaC0Offset, int BetaOffset,
+               int dir, int edge, int width, int yuv, int uv)
+ {
+   int      pel, ap = 0, aq = 0, Strng ;
+   int      incP, incQ;
+   int      C0, c0, Delta, dif, AbsDelta ;
+   int      L2 = 0, L1, L0, R0, R1, R2 = 0, RL0, L3, R3 ;
+   int      Alpha = 0, Beta = 0 ;
+   byte*    ClipTab = NULL;   
+   int      small_gap;
+   int      indexA, indexB;
+   int      PelNum;
+   int      StrengthIdx;
+   imgpel   *SrcPtrP, *SrcPtrQ;
+   int      QP;
+   int      xP, xQ, yP, yQ;
+   Macroblock *MbQ, *MbP;
+   PixelPos pixP, pixQ;
+   int      bitdepth_scale;
+   int      pelnum_cr[2][4] =  {{0,8,16,16}, {0,8, 8,16}};  //[dir:0=vert, 1=hor.][yuv_format]
+ 
+   if (!yuv)
+     bitdepth_scale = 1<<(img->bitdepth_luma - 8);
+   else
+     bitdepth_scale = 1<<(img->bitdepth_chroma - 8);
+   
+   PelNum = yuv ? pelnum_cr[dir][img->yuv_format] : 16 ;
+ 
+   for( pel=0 ; pel<PelNum ; pel++ )
+   {
+     xQ = dir ? pel : edge << 2;
+     yQ = dir ? (edge < 4 ? edge << 2 : 1) : pel;
+     getNeighbour(MbQAddr, xQ, yQ, 1-yuv, &pixQ);
+     getNeighbour(MbQAddr, xQ - (1 - dir), yQ - dir, 1-yuv, &pixP);
+     xP = pixP.x;
+     yP = pixP.y;
+     MbQ = &(img->mb_data[MbQAddr]);
+     MbP = &(img->mb_data[pixP.mb_addr]);
+     fieldModeFilteringFlag = MbQ->mb_field || MbP->mb_field;
+     StrengthIdx = (yuv&&(PelNum==8)) ? ((MbQ->mb_field && !MbP->mb_field) ? pel<<1 :((pel>>1)<<2)+(pel%2)) : pel;
+ 
+     if (pixP.available || (MbQ->LFDisableIdc== 0)) {
+       incQ = dir ? ((fieldModeFilteringFlag && !MbQ->mb_field) ? 2 * width : width) : 1;
+       incP = dir ? ((fieldModeFilteringFlag && !MbP->mb_field) ? 2 * width : width) : 1;
+       SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]);
+       SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]);
+ 
+       // Average QP of the two blocks
+       QP  = yuv ? (QP_SCALE_CR[CQPOF(MbP->qp,uv)] + QP_SCALE_CR[CQPOF(MbQ->qp,uv)] + 1) >> 1 : (MbP->qp + MbQ->qp + 1) >> 1;
+ 
+       indexA = IClip(0, MAX_QP, QP + AlphaC0Offset);
+       indexB = IClip(0, MAX_QP, QP + BetaOffset);
+     
+       Alpha  =ALPHA_TABLE[indexA] * bitdepth_scale;
+       Beta   =BETA_TABLE[indexB]  * bitdepth_scale;
+       ClipTab=CLIP_TAB[indexA]; 
+ 
+       L0  = SrcPtrP[0] ;
+       R0  = SrcPtrQ[0] ;
+       L1  = SrcPtrP[-incP] ;
+       R1  = SrcPtrQ[ incQ] ;
+       L2  = SrcPtrP[-incP*2] ;
+       R2  = SrcPtrQ[ incQ*2] ;
+       L3  = SrcPtrP[-incP*3] ;
+       R3  = SrcPtrQ[ incQ*3] ;
+ 
+       if( (Strng = Strength[StrengthIdx]) )
+       {
+         AbsDelta  = abs( Delta = R0 - L0 )  ;
+       
+         if( AbsDelta < Alpha )
+         {
+           C0  = ClipTab[ Strng ] * bitdepth_scale;
+           if( ((abs( R0 - R1) - Beta )  & (abs(L0 - L1) - Beta )) < 0  ) 
+           {
+             if( !yuv)
+             {
+               aq  = (abs( R0 - R2) - Beta ) < 0  ;
+               ap  = (abs( L0 - L2) - Beta ) < 0  ;
+             }
+           
+             RL0             = L0 + R0 ;
+           
+             if(Strng == 4 )    // INTRA strong filtering
+             {
+               if( yuv)  // Chroma
+               {
+                 SrcPtrQ[0] = ((R1 << 1) + R0 + L1 + 2) >> 2; 
+                 SrcPtrP[0] = ((L1 << 1) + L0 + R1 + 2) >> 2;                                           
+               }
+               else  // Luma
+               {
+                 small_gap = (AbsDelta < ((Alpha >> 2) + 2));
+               
+                 aq &= small_gap;
+                 ap &= small_gap;
+               
+                 SrcPtrQ[0]   = aq ? ( L1 + ((R1 + RL0) << 1) +  R2 + 4) >> 3 : ((R1 << 1) + R0 + L1 + 2) >> 2 ;
+                 SrcPtrP[0]   = ap ? ( R1 + ((L1 + RL0) << 1) +  L2 + 4) >> 3 : ((L1 << 1) + L0 + R1 + 2) >> 2 ;
+               
+                 SrcPtrQ[ incQ] =   aq  ? ( R2 + R0 + R1 + L0 + 2) >> 2 : R1;
+                 SrcPtrP[-incP] =   ap  ? ( L2 + L1 + L0 + R0 + 2) >> 2 : L1;
+               
+                 SrcPtrQ[ incQ*2] = aq ? (((R3 + R2) <<1) + R2 + R1 + RL0 + 4) >> 3 : R2;
+                 SrcPtrP[-incP*2] = ap ? (((L3 + L2) <<1) + L2 + L1 + RL0 + 4) >> 3 : L2;
+               }
+             }
+             else                                                                                   // normal filtering
+             {
+               c0               = yuv? (C0+1):(C0 + ap + aq) ;
+               dif              = IClip( -c0, c0, ( (Delta << 2) + (L1 - R1) + 4) >> 3 ) ;
+               if(!yuv)
+               {
+                 SrcPtrP[0]  = IClip(0, img->max_imgpel_value , L0 + dif) ;
+                 SrcPtrQ[0]  = IClip(0, img->max_imgpel_value , R0 - dif) ;
+               } 
+               else 
+               {
+                 SrcPtrP[0]  = IClip(0, img->max_imgpel_value_uv , L0 + dif) ;
+                 SrcPtrQ[0]  = IClip(0, img->max_imgpel_value_uv , R0 - dif) ;
+               }
+             
+               if( !yuv )
+               {
+                 if( ap )
+                   SrcPtrP[-incP] += IClip( -C0,  C0, ( L2 + ((RL0 + 1) >> 1) - (L1<<1)) >> 1 ) ;
+                 if( aq  )
+                   SrcPtrQ[ incQ] += IClip( -C0,  C0, ( R2 + ((RL0 + 1) >> 1) - (R1<<1)) >> 1 ) ;
+               } ;
+             } ;
+           } ; 
+         } ;
+       } ;
+     } ;
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/macroblock.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/macroblock.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/macroblock.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,4435 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file macroblock.c
+  *
+  * \brief
+  *    Process one macroblock
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+  *    - Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+  *    - Jani Lainema                    <jani.lainema at nokia.com>
+  *    - Sebastian Purreiter             <sebastian.purreiter at mch.siemens.de>
+  *    - Detlev Marpe                    <marpe at hhi.de>
+  *    - Thomas Wedi                     <wedi at tnt.uni-hannover.de>
+  *    - Ragip Kurceren                  <ragip.kurceren at nokia.com>
+  *    - Alexis Michael Tourapis         <alexismt at ieee.org>
+  *************************************************************************************
+  */
+ 
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ #include <assert.h>
+ #include <limits.h>
+ #include <memory.h>
+ 
+ #include "global.h"
+ 
+ #include "elements.h"
+ #include "macroblock.h"
+ #include "refbuf.h"
+ #include "fmo.h"
+ #include "vlc.h"
+ #include "image.h"
+ #include "mb_access.h"
+ #include "ratectl.h"              // head file for rate control
+ #include "cabac.h"
+ #include "transform8x8.h"
+ 
+ //Rate control
+ int predict_error,dq;
+ extern int DELTA_QP,DELTA_QP2;
+ extern int QP,QP2;
+ 
+ 
+  /*!
+  ************************************************************************
+  * \brief
+  *    updates the coordinates for the next macroblock to be processed
+  *
+  * \param mb_addr
+  *    macroblock address in scan order
+  ************************************************************************
+  */
+ void set_MB_parameters (int mb_addr)
+ {
+   img->current_mb_nr = mb_addr;
+   
+   get_mb_block_pos(mb_addr, &img->mb_x, &img->mb_y);
+   
+   img->block_x = img->mb_x << 2;
+   img->block_y = img->mb_y << 2;
+ 
+   img->pix_x   = img->block_x << 2;
+   img->pix_y   = img->block_y << 2;
+ 
+   img->opix_x   = img->pix_x;
+ 
+   if (img->MbaffFrameFlag)
+   {
+     if (img->mb_data[mb_addr].mb_field)
+     {
+       imgY_org  = (mb_addr % 2) ? imgY_org_bot  : imgY_org_top;
+       imgUV_org = (mb_addr % 2) ? imgUV_org_bot : imgUV_org_top;
+       img->opix_y   = (img->mb_y >> 1 ) << 4;
+       img->mb_data[mb_addr].list_offset = (mb_addr % 2) ? 4 : 2;
+     }
+     else
+     {
+       imgY_org  = imgY_org_frm;
+       imgUV_org = imgUV_org_frm;
+       img->opix_y   = img->block_y << 2;
+       img->mb_data[mb_addr].list_offset = 0;
+     }
+   }
+   else
+   {
+     img->opix_y   = img->block_y << 2;
+     img->mb_data[mb_addr].list_offset = 0;
+   }
+ 
+   if (img->yuv_format != YUV400)
+   {
+     img->pix_c_x = (img->mb_cr_size_x * img->pix_x) >> 4;
+     img->pix_c_y = (img->mb_cr_size_y * img->pix_y) >> 4;
+     
+     img->opix_c_x = (img->mb_cr_size_x * img->opix_x) >> 4;
+     img->opix_c_y = (img->mb_cr_size_y * img->opix_y) >> 4;
+   }
+   //  printf ("set_MB_parameters: mb %d,  mb_x %d,  mb_y %d\n", mb_addr, img->mb_x, img->mb_y);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    clip to range [0..max luma pel value]
+  ************************************************************************
+  */
+ int clip1a(int a)
+ {
+   return ((a)>img->max_imgpel_value?img->max_imgpel_value:((a)<0?0:(a)));
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    clip to range [0..max chroma pel value]
+  ************************************************************************
+  */
+ int clip1a_chr(int a)
+ {
+   return ((a)>img->max_imgpel_value_uv?img->max_imgpel_value_uv:((a)<0?0:(a)));
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    updates the coordinates and statistics parameter for the
+  *    next macroblock
+  ************************************************************************
+  */
+ void proceed2nextMacroblock()
+ {
+ #if TRACE
+   int use_bitstream_backing = (input->slice_mode == FIXED_RATE || input->slice_mode == CALLBACK);
+ #endif
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   int*        bitCount = currMB->bitcounter;
+   int i;
+   
+ #if TRACE
+ 
+   if (p_trace)
+   {
+     fprintf(p_trace, "\n*********** Pic: %i (I/P) MB: %i Slice: %i **********\n\n", frame_no, img->current_mb_nr, img->current_slice_nr);
+     if(use_bitstream_backing)
+       fprintf(p_trace, "\n*********** Pic: %i (I/P) MB: %i Slice: %i **********\n\n", frame_no, img->current_mb_nr, img->current_slice_nr);
+    // Write out the tracestring for each symbol
+     for (i=0; i<currMB->currSEnr; i++)
+       trace2out(&(img->MB_SyntaxElements[i]));
+   }
+ #endif
+ 
+   // Update the statistics
+   stats->bit_use_mb_type[img->type]      += bitCount[BITS_MB_MODE];
+   stats->bit_use_coeffY[img->type]       += bitCount[BITS_COEFF_Y_MB] ;
+   stats->tmp_bit_use_cbp[img->type]      += bitCount[BITS_CBP_MB];
+   stats->bit_use_coeffC[img->type]       += bitCount[BITS_COEFF_UV_MB];
+   stats->bit_use_delta_quant[img->type]  += bitCount[BITS_DELTA_QUANT_MB];
+ 
+   if (IS_INTRA(currMB))
+   {
+     ++stats->intra_chroma_mode[currMB->c_ipred_mode];
+ 
+     if ((currMB->cbp&15) != 0)
+     {
+       if (currMB->luma_transform_size_8x8_flag)
+         ++stats->mode_use_transform_8x8[img->type][currMB->mb_type];
+       else
+         ++stats->mode_use_transform_4x4[img->type][currMB->mb_type];
+     }
+   }
+ 
+    ++stats->mode_use[img->type][currMB->mb_type];
+    stats->bit_use_mode[img->type][currMB->mb_type]+= bitCount[BITS_INTER_MB];
+  
+    if (img->type != I_SLICE)
+    {
+      if (currMB->mb_type == P8x8)
+      {
+        for(i=0;i<4;i++)
+        {
+          if (currMB->b8mode[i] > 0)
+            ++stats->mode_use[img->type][currMB->b8mode[i]];
+          else
+            ++stats->b8_mode_0_use[img->type][currMB->luma_transform_size_8x8_flag];
+          
+          if (currMB->b8mode[i]==4)
+          {
+            if ((currMB->luma_transform_size_8x8_flag && (currMB->cbp&15) != 0) || input->Transform8x8Mode == 2)
+              ++stats->mode_use_transform_8x8[img->type][4];
+            else
+              ++stats->mode_use_transform_4x4[img->type][4];
+          }
+        }
+      }
+      else if (currMB->mb_type >= 0 && currMB->mb_type <=3 && ((currMB->cbp&15) != 0))
+      {
+        if (currMB->luma_transform_size_8x8_flag)
+          ++stats->mode_use_transform_8x8[img->type][currMB->mb_type];
+        else
+          ++stats->mode_use_transform_4x4[img->type][currMB->mb_type];
+      }
+    }
+   
+   // Statistics
+   if ((img->type == P_SLICE)||(img->type==SP_SLICE) )
+   {
+     ++stats->quant0;
+     stats->quant1 += currMB->qp;      // to find average quant for inter frames
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    initializes the current macroblock
+  *
+  * \param mb_addr
+  *    macroblock address in scan order
+  * \param mb_field
+  *    true for field macroblock coding
+  ************************************************************************
+  */
+ void start_macroblock(int mb_addr, int mb_field)
+ {
+   int i,j,l;
+   int use_bitstream_backing = (input->slice_mode == FIXED_RATE || input->slice_mode == CALLBACK);
+   Macroblock *currMB = &img->mb_data[mb_addr];
+   Slice *curr_slice = img->currentSlice;
+   DataPartition *dataPart;
+   Bitstream *currStream;
+   EncodingEnvironmentPtr eep;
+   int max_qp_delta = 25 + img->bitdepth_luma_qp_scale/2;
+   int min_qp_delta = (26 + img->bitdepth_luma_qp_scale/2);
+   
+   currMB->mb_field = mb_field;
+   
+   enc_picture->mb_field[mb_addr] = mb_field;
+   
+   set_MB_parameters (mb_addr);
+ 
+   if(use_bitstream_backing)
+   {
+     // Keep the current state of the bitstreams
+     if(!img->cod_counter)
+       for (i=0; i<curr_slice->max_part_nr; i++)
+       {
+         dataPart = &(curr_slice->partArr[i]);
+         currStream = dataPart->bitstream;
+         currStream->stored_bits_to_go = currStream->bits_to_go;
+         currStream->stored_byte_pos   = currStream->byte_pos;
+         currStream->stored_byte_buf   = currStream->byte_buf;
+ 
+         if (input->symbol_mode ==CABAC)
+         {
+           eep = &(dataPart->ee_cabac);
+           eep->ElowS            = eep->Elow;
+           eep->ErangeS          = eep->Erange;
+           eep->EbufferS         = eep->Ebuffer;
+           eep->Ebits_to_goS     = eep->Ebits_to_go;
+           eep->Ebits_to_followS = eep->Ebits_to_follow;
+           eep->EcodestrmS       = eep->Ecodestrm;
+           eep->Ecodestrm_lenS   = eep->Ecodestrm_len;
+           eep->CS               = eep->C;
+           eep->ES               = eep->E;
+         }
+       }
+   }
+ 
+   // Save the slice number of this macroblock. When the macroblock below
+   // is coded it will use this to decide if prediction for above is possible
+   currMB->slice_nr = img->current_slice_nr;
+ 
+   // Initialize delta qp change from last macroblock. Feature may be used for future rate control
+   // Rate control
+   currMB->qpsp       = img->qpsp;
+   if(input->RCEnable)
+   {
+     int prev_mb = FmoGetPreviousMBNr(img->current_mb_nr);
+     if (prev_mb>-1)
+     {
+       currMB->prev_qp = img->mb_data[prev_mb].qp;
+       if (img->mb_data[prev_mb].slice_nr == img->current_slice_nr)
+       {
+         currMB->prev_delta_qp = img->mb_data[prev_mb].delta_qp;
+       }
+       else
+       {
+         currMB->prev_delta_qp = 0;
+       }
+     }
+     else
+     {
+       currMB->prev_qp = curr_slice->qp;
+       currMB->prev_delta_qp = 0;
+     }
+     // frame layer rate control
+     if(input->basicunit==img->Frame_Total_Number_MB)
+     {
+       currMB->delta_qp = 0;
+       currMB->qp       = img->qp;
+     }
+     // basic unit layer rate control
+     else
+     {
+       // each I or B frame has only one QP 
+       if((img->type==I_SLICE)||(img->type==B_SLICE))
+       {
+         currMB->delta_qp = 0;
+         currMB->qp       = img->qp;
+       }
+       else if(img->type==P_SLICE)
+       {
+         if (!img->write_macroblock) //write macroblock
+         {
+           if (!currMB->mb_field)  //frame macroblock
+           {
+             if (img->current_mb_nr == 0) //first macroblock
+             {
+               // Initialize delta qp change from last macroblock. Feature may be used for future rate control
+               currMB->delta_qp = 0;
+               currMB->qp       = img->qp;
+               DELTA_QP = DELTA_QP2 = currMB->delta_qp;
+               QP = QP2 = currMB->qp;
+             }
+             else
+             {
+               if (!((input->MbInterlace) && img->bot_MB)) //top macroblock
+               {
+                 if (img->mb_data[img->current_mb_nr-1].prev_cbp == 1)
+                 {
+                   currMB->delta_qp = 0;
+                   currMB->qp       = img->qp;
+                 }
+                 else
+                 {
+                   currMB->qp = img->mb_data[img->current_mb_nr-1].prev_qp;
+                   currMB->delta_qp = currMB->qp - img->mb_data[img->current_mb_nr-1].qp;
+                   img->qp = currMB->qp;
+                 }
+                 DELTA_QP = DELTA_QP2 = currMB->delta_qp;
+                 QP = QP2 = currMB->qp;
+               }
+               else //bottom macroblock
+               {
+                 // Initialize delta qp change from last macroblock. Feature may be used for future rate control
+                 currMB->delta_qp = 0;
+                 currMB->qp       = img->qp;       // needed in loop filter (even if constant QP is used)
+               }
+             }
+           }
+           else  // field macroblock
+           {
+             if (!img->bot_MB) //top macroblock 
+             {
+               currMB->delta_qp = DELTA_QP2;
+               currMB->qp   = img->qp    = QP2;
+             }
+             else // bottom macroblock
+             {
+               currMB->qp = img->qp;
+               currMB->delta_qp = 0;
+             }
+           }
+         }
+         else 
+         {
+           if (!img->bot_MB) //write top macroblock
+           {
+             if (img->write_macroblock_frame)
+             {
+               currMB->delta_qp = DELTA_QP;
+               img->qp = currMB->qp = QP;
+             }
+             else
+             {
+               currMB->delta_qp = DELTA_QP2;
+               img->qp = currMB->qp = QP2;
+             }
+           }
+           else //write bottom macroblock
+           {
+             currMB->delta_qp = 0;
+             currMB->qp = img->qp;
+           }
+         }
+         
+         // compute the quantization parameter for each basic unit of P frame
+         if(!((input->MbInterlace)&&img->bot_MB))
+         {
+           if(!currMB->mb_field)
+           {
+             if((img->NumberofCodedMacroBlocks>0)\
+               &&(img->NumberofCodedMacroBlocks%img->BasicUnit==0))
+             {
+               // frame coding
+               if(active_sps->frame_mbs_only_flag)
+               {
+                 updateRCModel();
+                 img->BasicUnitQP=updateQuantizationParameter(img->TopFieldFlag);
+               }
+               // adaptive field/frame coding
+               else if((input->PicInterlace==ADAPTIVE_CODING)&&(!input->MbInterlace)&&(img->IFLAG==0))
+               {
+                 updateRCModel();
+                 img->BasicUnitQP=updateQuantizationParameter(img->TopFieldFlag);
+               }
+               // field coding
+               else if((input->PicInterlace==FIELD_CODING)&&(!input->MbInterlace)&&(img->IFLAG==0))
+               {
+                 updateRCModel();
+                 img->BasicUnitQP=updateQuantizationParameter(img->TopFieldFlag);
+               }
+               // mb adaptive f/f coding, field coding
+               else if((input->MbInterlace)&&(img->IFLAG==0)&&(img->FieldControl==1))
+               {
+                 updateRCModel();
+                 img->BasicUnitQP=updateQuantizationParameter(img->TopFieldFlag);
+               }
+               // mb adaptive f/f coding, frame coding
+               else if((input->MbInterlace)&&(img->IFLAG==0)&&(img->FieldControl==0))
+               {
+                 updateRCModel();
+                 img->BasicUnitQP=updateQuantizationParameter(img->TopFieldFlag);
+               } 
+             }
+             
+             if(img->current_mb_nr==0)
+               img->BasicUnitQP=img->qp;
+             
+             currMB->predict_qp=img->BasicUnitQP;
+             
+             if(currMB->predict_qp>currMB->qp + max_qp_delta)
+               currMB->predict_qp=currMB->qp + max_qp_delta;
+             else if(currMB->predict_qp<currMB->qp - min_qp_delta)
+               currMB->predict_qp=currMB->qp - min_qp_delta; 
+            
+             dq = currMB->delta_qp + currMB->predict_qp-currMB->qp;
+             if(dq < -min_qp_delta) 
+             {
+               dq = -min_qp_delta;
+               predict_error = dq-currMB->delta_qp;
+               img->qp = img->qp+predict_error;
+               currMB->delta_qp = -min_qp_delta;
+             }
+             else if(dq > max_qp_delta)
+             {
+               dq = max_qp_delta;
+               predict_error = dq - currMB->delta_qp;
+               img->qp = img->qp + predict_error;
+               currMB->delta_qp = max_qp_delta;
+             }
+             else
+             {
+               currMB->delta_qp = dq;
+               predict_error=currMB->predict_qp-currMB->qp;
+               img->qp = currMB->predict_qp;
+             }
+             currMB->qp =  img->qp;
+             if (input->MbInterlace)
+             {
+               DELTA_QP = DELTA_QP2 = currMB->delta_qp;
+               QP = QP2     = currMB->qp;
+               DELTA_QP2 = currMB->delta_qp;
+             }
+             currMB->predict_error=predict_error;
+           }
+           else
+             predict_error=currMB->predict_error;
+         }
+         else
+           currMB->prev_qp=img->qp;
+        }
+     }   
+   }
+   else
+   {
+     Slice* currSlice = img->currentSlice;
+   	
+     int prev_mb = FmoGetPreviousMBNr(img->current_mb_nr);
+     if (prev_mb>-1)
+     {
+       currMB->prev_qp = img->mb_data[prev_mb].qp;
+       if (img->mb_data[prev_mb].slice_nr == img->current_slice_nr)
+       {
+         currMB->prev_delta_qp = img->mb_data[prev_mb].delta_qp;
+       }
+       else
+       {
+         currMB->prev_delta_qp = 0;
+       }
+     }
+     else
+     {
+       currMB->prev_qp = currSlice->qp;
+       currMB->prev_delta_qp = 0;
+     }
+ 
+     currMB->qp = currSlice->qp ;
+   
+     currMB->delta_qp = currMB->qp - currMB->prev_qp;
+     DELTA_QP = DELTA_QP2 = currMB->delta_qp;
+     QP = QP2 = currMB->qp;
+   }
+   // Initialize counter for MB symbols
+   currMB->currSEnr=0;
+ 
+   // loop filter parameter
+   if (active_pps->deblocking_filter_control_present_flag)
+   {
+     currMB->LFDisableIdc    = img->LFDisableIdc;
+     currMB->LFAlphaC0Offset = img->LFAlphaC0Offset;
+     currMB->LFBetaOffset    = img->LFBetaOffset;
+   }
+   else
+   {
+     currMB->LFDisableIdc    = 0;
+     currMB->LFAlphaC0Offset = 0;
+     currMB->LFBetaOffset    = 0;
+   }
+ 
+   // If MB is next to a slice boundary, mark neighboring blocks unavailable for prediction
+   CheckAvailabilityOfNeighbors();
+ 
+   if (input->symbol_mode == CABAC)
+     CheckAvailabilityOfNeighborsCABAC();
+   
+   // Reset vectors and reference indices before doing motion search in motion_search().
+   for (l=0; l<2; l++)
+   {
+     for (j=img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+     {
+       memset(&enc_picture->ref_idx[l][j][img->block_x], -1, BLOCK_MULTIPLE * sizeof(char));     
+       memset(enc_picture->mv [l][j][img->block_x], 0, 2 * BLOCK_MULTIPLE * sizeof(short));
+       for (i=img->block_x; i < img->block_x + BLOCK_MULTIPLE; i++)
+         enc_picture->ref_pic_id[l][j][i]= -1;
+     }
+   }
+ 
+   // Reset syntax element entries in MB struct
+   currMB->mb_type      = 0;
+   currMB->cbp_blk      = 0;
+   currMB->cbp          = 0;
+   currMB->cbp_bits     = 0;
+   currMB->c_ipred_mode = DC_PRED_8;
+ 
+   memset (currMB->mvd, 0, BLOCK_CONTEXT * sizeof(int));  
+   memset (currMB->intra_pred_modes, DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char)); // changing this to char would allow us to use memset
+   memset (currMB->intra_pred_modes8x8, DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
+ 
+   //initialize the whole MB as INTRA coded
+   //Blocks ar set to notINTRA in write_one_macroblock
+   if (input->UseConstrainedIntraPred)
+   {
+     img->intra_block[img->current_mb_nr] = 1;
+   }
+ 
+   // Initialize bitcounters for this macroblock
+   if(img->current_mb_nr == 0) // No slice header to account for
+   {
+     currMB->bitcounter[BITS_HEADER] = 0;
+   }
+   else if (currMB->slice_nr == img->mb_data[img->current_mb_nr-1].slice_nr) // current MB belongs to the
+   // same slice as the last MB
+   {
+     currMB->bitcounter[BITS_HEADER] = 0;
+   }
+ 
+   currMB->bitcounter[BITS_MB_MODE       ] = 0;
+   currMB->bitcounter[BITS_COEFF_Y_MB    ] = 0;
+   currMB->bitcounter[BITS_INTER_MB      ] = 0;
+   currMB->bitcounter[BITS_CBP_MB        ] = 0;
+   currMB->bitcounter[BITS_DELTA_QUANT_MB] = 0;
+   currMB->bitcounter[BITS_COEFF_UV_MB   ] = 0;
+ 
+ #ifdef _FAST_FULL_ME_
+ //  if(input->FMEnable != 0 && input->FMEnable != 3)
+   if(!input->FMEnable)
+     ResetFastFullIntegerSearch ();
+ #endif
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    terminates processing of the current macroblock depending
+  *    on the chosen slice mode
+  ************************************************************************
+  */
+ void terminate_macroblock( Boolean *end_of_slice,      //!< returns true for last macroblock of a slice, otherwise false
+                            Boolean *recode_macroblock  //!< returns true if max. slice size is exceeded an macroblock must be recoded in next slice
+                            )
+ {
+   int i;
+   Slice *currSlice = img->currentSlice;
+   Macroblock    *currMB    = &img->mb_data[img->current_mb_nr];
+   SyntaxElement *currSE    = &img->MB_SyntaxElements[currMB->currSEnr];
+   int *partMap = assignSE2partition[input->partition_mode];
+   DataPartition *dataPart;
+   Bitstream *currStream;
+   int rlc_bits=0;
+   EncodingEnvironmentPtr eep;
+   int use_bitstream_backing = (input->slice_mode == FIXED_RATE || input->slice_mode == CALLBACK);
+   int new_slice;
+   static int skip = FALSE;
+ 
+ 	 
+   // if previous mb in the same slice group has different slice number as the current, it's the
+   // the start of new slice
+   new_slice=0;
+   if ( (img->current_mb_nr==0) || (FmoGetPreviousMBNr(img->current_mb_nr)<0) )
+     new_slice=1;
+   else if( img->mb_data[FmoGetPreviousMBNr(img->current_mb_nr)].slice_nr != img->current_slice_nr )
+     new_slice=1;
+ 	  
+   *recode_macroblock=FALSE;
+ 
+   switch(input->slice_mode)
+   {
+   case NO_SLICES:
+     currSlice->num_mb++;
+     *recode_macroblock = FALSE;
+     if ((currSlice->num_mb) == (int)img->PicSizeInMbs) // maximum number of MBs reached
+       *end_of_slice = TRUE;
+     
+     // if it's end of current slice group, slice ends too
+     *end_of_slice |= (img->current_mb_nr == FmoGetLastCodedMBOfSliceGroup (FmoMB2SliceGroup (img->current_mb_nr)));
+     
+     break;
+   case FIXED_MB:
+     // For slice mode one, check if a new slice boundary follows
+     currSlice->num_mb++;
+     *recode_macroblock = FALSE;
+     //! Check end-of-slice group condition first
+     *end_of_slice = (img->current_mb_nr == FmoGetLastCodedMBOfSliceGroup (FmoMB2SliceGroup (img->current_mb_nr)));
+     //! Now check maximum # of MBs in slice
+     *end_of_slice |= (currSlice->num_mb >= input->slice_argument);
+     
+     break;
+     
+     // For slice modes two and three, check if coding of this macroblock
+     // resulted in too many bits for this slice. If so, indicate slice
+     // boundary before this macroblock and code the macroblock again
+   case FIXED_RATE:
+     // in case of skip MBs check if there is a slice boundary
+     // only for UVLC (img->cod_counter is always 0 in case of CABAC)
+     if(img->cod_counter)
+     {
+       // write out the skip MBs to know how many bits we need for the RLC
+       currSE->value1 = img->cod_counter;
+       currSE->value2 = 0;
+       currSE->mapping = ue_linfo;
+       currSE->type = SE_MBTYPE;
+       dataPart = &(currSlice->partArr[partMap[currSE->type]]);
+       
+       dataPart->writeSyntaxElement(  currSE, dataPart);
+       rlc_bits=currSE->len;
+       
+       currStream = dataPart->bitstream;
+       // save the bitstream as it would be if we write the skip MBs
+       currStream->bits_to_go_skip  = currStream->bits_to_go;
+       currStream->byte_pos_skip    = currStream->byte_pos;
+       currStream->byte_buf_skip    = currStream->byte_buf;
+       // restore the bitstream
+       currStream->bits_to_go = currStream->stored_bits_to_go;
+       currStream->byte_pos = currStream->stored_byte_pos;
+       currStream->byte_buf = currStream->stored_byte_buf;
+       skip = TRUE;
+     }
+     //! Check if the last coded macroblock fits into the size of the slice
+     //! But only if this is not the first macroblock of this slice
+     if (!new_slice)
+     {
+       if(slice_too_big(rlc_bits))
+       {
+         *recode_macroblock = TRUE;
+         *end_of_slice = TRUE;
+       }
+       else if(!img->cod_counter)
+         skip = FALSE;
+     }
+     // maximum number of MBs
+     
+     // check if current slice group is finished
+     if ((*recode_macroblock == FALSE) && (img->current_mb_nr == FmoGetLastCodedMBOfSliceGroup (FmoMB2SliceGroup (img->current_mb_nr)))) 
+     {
+       *end_of_slice = TRUE;
+       if(!img->cod_counter)
+         skip = FALSE;
+     }
+     
+     //! (first MB OR first MB in a slice) AND bigger that maximum size of slice
+     if (new_slice && slice_too_big(rlc_bits))
+     {
+       *end_of_slice = TRUE;
+       if(!img->cod_counter)
+         skip = FALSE;
+     }
+     if (!*recode_macroblock)
+       currSlice->num_mb++;
+     break;
+     
+   case  CALLBACK:
+     if (img->current_mb_nr > 0 && !new_slice)
+     {
+       if (currSlice->slice_too_big(rlc_bits))
+       {
+         *recode_macroblock = TRUE;
+         *end_of_slice = TRUE;
+       }
+     }
+     
+     if ( (*recode_macroblock == FALSE) && (img->current_mb_nr == FmoGetLastCodedMBOfSliceGroup (FmoMB2SliceGroup (img->current_mb_nr)))) 
+       *end_of_slice = TRUE;
+     break;
+     
+   default:
+     snprintf(errortext, ET_SIZE, "Slice Mode %d not supported", input->slice_mode);
+     error(errortext, 600);
+   }
+ 
+   if(*recode_macroblock == TRUE)
+   {
+     // Restore everything
+     for (i=0; i<currSlice->max_part_nr; i++)
+     {
+       dataPart = &(currSlice->partArr[i]);
+       currStream = dataPart->bitstream;
+       currStream->bits_to_go = currStream->stored_bits_to_go;
+       currStream->byte_pos  = currStream->stored_byte_pos;
+       currStream->byte_buf  = currStream->stored_byte_buf;
+       if (input->symbol_mode == CABAC)
+       {
+         eep = &(dataPart->ee_cabac);
+         eep->Elow            = eep->ElowS;
+         eep->Erange          = eep->ErangeS;
+         eep->Ebuffer         = eep->EbufferS;
+         eep->Ebits_to_go     = eep->Ebits_to_goS;
+         eep->Ebits_to_follow = eep->Ebits_to_followS;
+         eep->Ecodestrm       = eep->EcodestrmS;
+         eep->Ecodestrm_len   = eep->Ecodestrm_lenS;
+         eep->C               = eep->CS;
+         eep->E               = eep->ES;       
+       }
+     }
+   }
+ 
+   if(*end_of_slice == TRUE  && skip == TRUE) //! TO 4.11.2001 Skip MBs at the end of this slice
+   { 
+     //! only for Slice Mode 2 or 3
+     // If we still have to write the skip, let's do it!
+     if(img->cod_counter && *recode_macroblock == TRUE) //! MB that did not fit in this slice
+     { 
+       // If recoding is true and we have had skip, 
+       // we have to reduce the counter in case of recoding
+       img->cod_counter--;
+       if(img->cod_counter)
+       {
+         currSE->value1 = img->cod_counter;
+         currSE->value2 = 0;
+         currSE->mapping = ue_linfo;
+         currSE->type = SE_MBTYPE;
+         dataPart = &(currSlice->partArr[partMap[currSE->type]]);
+         dataPart->writeSyntaxElement(  currSE, dataPart);
+         rlc_bits=currSE->len;
+         currMB->bitcounter[BITS_MB_MODE]+=rlc_bits;
+         img->cod_counter = 0;
+       }
+     }
+     else //! MB that did not fit in this slice anymore is not a Skip MB
+     {
+       dataPart = &(currSlice->partArr[partMap[SE_MBTYPE]]);       
+       currStream = dataPart->bitstream;
+         // update the bitstream
+       currStream->bits_to_go = currStream->bits_to_go_skip;
+       currStream->byte_pos  = currStream->byte_pos_skip;
+       currStream->byte_buf  = currStream->byte_buf_skip;
+ 
+       // update the statistics
+       img->cod_counter = 0;
+       skip = FALSE;
+     }
+   }
+   
+   //! TO 4.11.2001 Skip MBs at the end of this slice for Slice Mode 0 or 1
+   if(*end_of_slice == TRUE && img->cod_counter && !use_bitstream_backing)
+   {
+     currSE->value1 = img->cod_counter;
+     currSE->value2 = 0;
+     currSE->mapping = ue_linfo;
+     currSE->type = SE_MBTYPE;
+     dataPart = &(currSlice->partArr[partMap[currSE->type]]);
+     dataPart->writeSyntaxElement(  currSE, dataPart);
+      currMB->currSEnr ++;
+ #if TRACE
+     snprintf(currSE->tracestring, TRACESTRING_SIZE, "Final MB runlength = %3d",img->cod_counter); 
+ #endif
+    
+     rlc_bits=currSE->len;
+     currMB->bitcounter[BITS_MB_MODE]+=rlc_bits;
+     img->cod_counter = 0;
+   }
+ }
+ 
+ /*!
+  *****************************************************************************
+  *
+  * \brief 
+  *    For Slice Mode 2: Checks if one partition of one slice exceeds the 
+  *    allowed size
+  * 
+  * \return
+  *    FALSE if all Partitions of this slice are smaller than the allowed size
+  *    TRUE is at least one Partition exceeds the limit
+  *
+  * \par Side effects
+  *    none
+  *
+  * \date
+  *    4 November 2001
+  *
+  * \author
+  *    Tobias Oelbaum      drehvial at gmx.net
+  *****************************************************************************/
+  
+  int slice_too_big(int rlc_bits)
+  {
+    Slice *currSlice = img->currentSlice;
+    DataPartition *dataPart;
+    Bitstream *currStream;
+    EncodingEnvironmentPtr eep;
+    int i;
+    int size_in_bytes;
+   
+    //! UVLC
+    if (input->symbol_mode == UVLC)
+    {
+      for (i=0; i<currSlice->max_part_nr; i++)
+      {
+        dataPart = &(currSlice->partArr[i]);
+        currStream = dataPart->bitstream;
+        size_in_bytes = currStream->byte_pos /*- currStream->tmp_byte_pos*/;
+ 
+        if (currStream->bits_to_go < 8)
+          size_in_bytes++;
+        if (currStream->bits_to_go < rlc_bits)
+          size_in_bytes++;
+        if(size_in_bytes > input->slice_argument)
+          return TRUE;
+      }
+    }
+     
+    //! CABAC
+    if (input->symbol_mode ==CABAC)
+    {
+      for (i=0; i<currSlice->max_part_nr; i++)
+      {
+         dataPart= &(currSlice->partArr[i]);
+         eep = &(dataPart->ee_cabac);
+       
+        if( arienco_bits_written(eep) > (input->slice_argument*8))
+           return TRUE;
+      }
+    }
+    return FALSE;
+  }
+ 
+  static pel_t *(*get_line) (pel_t**, int, int, int, int);
+ /*!
+  ************************************************************************
+  * \brief
+  *    Predict one component of a 4x4 Luma block
+  ************************************************************************
+  */
+ void OneComponentLumaPrediction4x4 ( int*   mpred,          //!< array of prediction values (row by row)
+                                      int    pic_pix_x,      //!< absolute horizontal coordinate of 4x4 block
+                                      int    pic_pix_y,      //!< absolute vertical   coordinate of 4x4 block
+                                      short* mv,             //!< motion vector
+                                      short  ref,            //!< reference frame 
+                                      StorablePicture **list //!< reference picture list
+                                      )
+ {
+   pel_t** ref_pic = list[ref]->imgY_ups;
+   int     j0      = (pic_pix_y << 2) + mv[1] + IMG_PAD_SIZE_TIMES4;
+   int     i0      = (pic_pix_x << 2) + mv[0] + IMG_PAD_SIZE_TIMES4;
+   int     j;
+   pel_t *ref_line;
+ 
+   int img_width =((list[ref]->size_x + 2*IMG_PAD_SIZE - 1)<<2);
+   int img_height=((list[ref]->size_y + 2*IMG_PAD_SIZE - 1)<<2);
+ 
+   if ((i0 > 0) && (i0 < img_width - 4*BLOCK_SIZE - 1) && (j0 > 0) && (j0 < img_height - 4*BLOCK_SIZE - 1))
+     get_line = FastLine4X;
+   else
+     get_line = UMVLine4X;    
+ 
+   for (j = j0; j < j0 + MB_BLOCK_SIZE; j+=BLOCK_SIZE) 
+   {
+     ref_line = get_line (ref_pic, j, i0, img_height, img_width);
+     *mpred++ = *ref_line;
+     *mpred++ = *(ref_line + 4);
+     *mpred++ = *(ref_line + 8);
+     *mpred++ = *(ref_line + 12);
+   }
+   
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    copy foward/backward prediction values of one component of a 4x4 Luma block
+  ************************************************************************
+  */
+ void copyblock4x4 ( int* mpred,                       //!< array of prediction values (row by row)
+                     int block[BLOCK_SIZE][BLOCK_SIZE] //!< target block
+                     )        
+ {
+   *mpred++ = block[0][0];
+   *mpred++ = block[1][0];
+   *mpred++ = block[2][0];
+   *mpred++ = block[3][0];
+   *mpred++ = block[0][1];
+   *mpred++ = block[1][1];
+   *mpred++ = block[2][1];
+   *mpred++ = block[3][1];
+   *mpred++ = block[0][2];
+   *mpred++ = block[1][2];
+   *mpred++ = block[2][2];
+   *mpred++ = block[3][2];
+   *mpred++ = block[0][3];
+   *mpred++ = block[1][3];
+   *mpred++ = block[2][3];
+   *mpred++ = block[3][3];
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Predict one 4x4 Luma block
+  ************************************************************************
+  */
+ void LumaPrediction4x4 ( int   block_x,    //!< relative horizontal block coordinate of 4x4 block
+                          int   block_y,    //!< relative vertical   block coordinate of 4x4 block
+                          int   p_dir,      //!< prediction direction (0=list0, 1=list1, 2=bipred)
+                          int   fw_mode,    //!< list0 prediction mode (1-7, 0=DIRECT if bw_mode=0)
+                          int   bw_mode,    //!< list1 prediction mode (1-7, 0=DIRECT if fw_mode=0)
+                          short fw_ref_idx, //!< reference frame for list0 prediction (-1: Intra4x4 pred. with fw_mode)
+                          short bw_ref_idx  //!< reference frame for list1 prediction 
+                          )
+ {
+   static int fw_pred[16];
+   static int bw_pred[16];
+ 
+   int  i, j;
+   int  block_x4  = block_x+4;
+   int  block_y4  = block_y+4;
+   int  pic_opix_x = img->opix_x + block_x;
+   int  pic_opix_y = img->opix_y + block_y;
+   int  bx        = block_x >> 2;
+   int  by        = block_y >> 2;
+   int* fpred     = fw_pred;
+   int* bpred     = bw_pred;
+   Macroblock*    currMB     = &img->mb_data[img->current_mb_nr];
+ 
+   int  apply_weights = ( (active_pps->weighted_pred_flag && (img->type== P_SLICE || img->type == SP_SLICE)) ||
+                          (active_pps->weighted_bipred_idc && (img->type== B_SLICE)));  
+   short**** mv_array = img->all_mv[by][bx];
+ 
+ 
+   if (currMB->bi_pred_me && fw_ref_idx == 0 && bw_ref_idx == 0 && p_dir == 2 && fw_mode==1 && bw_mode==1)
+   {
+     mv_array = currMB->bi_pred_me == 1? img->bipred_mv1[by][bx] : img->bipred_mv2[by][bx];
+   }
+ 
+   if ((p_dir==0)||(p_dir==2))
+   {
+     OneComponentLumaPrediction4x4 (fw_pred, pic_opix_x, pic_opix_y, mv_array[LIST_0][fw_ref_idx][fw_mode], fw_ref_idx, listX[0+currMB->list_offset]);   
+   }
+ 
+   if ((p_dir==1)||(p_dir==2))
+   { 
+     OneComponentLumaPrediction4x4 (bw_pred, pic_opix_x, pic_opix_y, mv_array[LIST_1][bw_ref_idx][bw_mode], bw_ref_idx, listX[1+currMB->list_offset]);   
+   }
+ 
+   if (apply_weights)
+   {
+ 
+     if (p_dir==2)
+     {
+       for   (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = clip1a(((wbp_weight[0][fw_ref_idx][bw_ref_idx][0] * *fpred++ + 
+                                     wbp_weight[1][fw_ref_idx][bw_ref_idx][0] * *bpred++ + 
+                                     2*wp_luma_round) >> (luma_log_weight_denom + 1)) + 
+                                     ((wp_offset[0][fw_ref_idx][0] + wp_offset[1][bw_ref_idx][0] + 1)>>1)); 
+     }
+     else if (p_dir==0)
+     {
+       for   (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = clip1a(((wp_weight[0][fw_ref_idx][0] * *fpred++  + wp_luma_round) >> luma_log_weight_denom) +
+                                     wp_offset[0][fw_ref_idx][0] );
+     }
+     else // p_dir==1
+     {
+       for   (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = clip1a(((wp_weight[1][bw_ref_idx][0] * *bpred++  + wp_luma_round) >> luma_log_weight_denom) +
+                                     wp_offset[1][bw_ref_idx][0] );
+     }
+   }
+   else
+   {
+     if (p_dir==2)
+     {
+       for   (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = (*fpred++ + *bpred++ + 1) >> 1; 
+     }
+     else if (p_dir==0)
+     {
+       for   (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = *fpred++;
+     }
+     else // p_dir==1
+     {
+       for   (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = *bpred++;
+     }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Predict one 4x4 Luma block
+  ************************************************************************
+  */
+ void LumaPrediction4x4Bi ( int   block_x,    //!< relative horizontal block coordinate of 4x4 block
+                            int   block_y,    //!< relative vertical   block coordinate of 4x4 block
+                            int   p_dir,      //!< prediction direction (0=list0, 1=list1, 2=bidir)
+                            int   fw_mode,    //!< list0 prediction mode (1-7, 0=DIRECT if bw_mode=0)
+                            int   bw_mode,    //!< list1 prediction mode (1-7, 0=DIRECT if fw_mode=0)
+                            short fw_ref_idx, //!< reference frame for list0 prediction (-1: Intra4x4 pred. with fw_mode)
+                            short bw_ref_idx, //!< reference frame for list1 prediction 
+                            int   list        //!< current list for prediction.
+                            )
+ {
+   static int fw_pred[16];
+   static int bw_pred[16];
+ 
+   int  i, j;
+   int  block_x4  = block_x+4;
+   int  block_y4  = block_y+4;
+   int  pic_opix_x = img->opix_x + block_x;
+   int  pic_opix_y = img->opix_y + block_y;
+   int  bx        = block_x >> 2;
+   int  by        = block_y >> 2;
+   int* fpred     = fw_pred;
+   int* bpred     = bw_pred;
+   Macroblock*    currMB     = &img->mb_data[img->current_mb_nr];
+    
+   int  apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+                          (active_pps->weighted_bipred_idc && (img->type == B_SLICE)));  
+   short   ****mv_array = list ? img->bipred_mv1[by][bx] : img->bipred_mv2[by][bx];
+ 
+ 
+   if ((p_dir==0)||(p_dir==2))
+   {
+     OneComponentLumaPrediction4x4 (fw_pred, pic_opix_x, pic_opix_y, mv_array[LIST_0][fw_ref_idx][fw_mode], fw_ref_idx, listX[0+currMB->list_offset]);   
+   }
+   
+   if ((p_dir==1)||(p_dir==2))
+   { 
+     OneComponentLumaPrediction4x4 (bw_pred, pic_opix_x, pic_opix_y, mv_array[LIST_1][bw_ref_idx][bw_mode], bw_ref_idx, listX[1+currMB->list_offset]);   
+   }
+ 
+   if (apply_weights)
+   {
+     if (p_dir==2)
+     {
+       for   (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = clip1a(((wbp_weight[0][fw_ref_idx][bw_ref_idx][0] * *fpred++ + 
+                                     wbp_weight[1][fw_ref_idx][bw_ref_idx][0] * *bpred++ + 
+                                     2*wp_luma_round) >> (luma_log_weight_denom + 1)) + 
+                                     ((wp_offset[0][fw_ref_idx][0] + wp_offset[1][bw_ref_idx][0] + 1)>>1)); 
+     }
+     else if (p_dir==0)
+     {
+       for   (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)                   
+           img->mpr[j][i] = clip1a(((wp_weight[0][fw_ref_idx][0] * *fpred++  + wp_luma_round) >> luma_log_weight_denom) +
+                                     wp_offset[0][fw_ref_idx][0] );
+     }
+     else // p_dir==1
+     {
+       for   (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = clip1a(((wp_weight[1][bw_ref_idx][0] * *bpred++  + wp_luma_round) >> luma_log_weight_denom) 
+           + wp_offset[1][bw_ref_idx][0] );
+     }
+   }
+   else
+   {
+     if (p_dir==2)
+     {
+       for   (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = (*fpred++ + *bpred++ + 1) / 2; 
+     }
+     else if (p_dir==0)
+     {
+       for   (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = *fpred++;
+     }
+     else // p_dir==1
+     {
+       for   (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = *bpred++;
+     }
+   }
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Residual Coding of an 8x8 Luma block (not for intra)
+  *
+  * \return
+  *    coefficient cost
+  ************************************************************************
+  */
+ int LumaResidualCoding8x8 ( int   *cbp,        //!< Output: cbp (updated according to processed 8x8 luminance block)
+                             int64 *cbp_blk,    //!< Output: block cbp (updated according to processed 8x8 luminance block)
+                             int   block8x8,    //!< block number of 8x8 block
+                             short p_dir,       //!< prediction direction
+                             int   fw_mode,     //!< list0 prediction mode (1-7, 0=DIRECT)
+                             int   bw_mode,     //!< list1 prediction mode (1-7, 0=DIRECT)
+                             short fw_refframe, //!< reference picture for list0 prediction
+                             short bw_refframe  //!< reference picture for list0 prediction
+                            )
+ {
+   int    block_y, block_x, pic_pix_y, pic_pix_x, i, j, nonzero = 0, cbp_blk_mask;
+   int    coeff_cost = 0;
+   int    mb_y       = (block8x8 >> 1) << 3;
+   int    mb_x       = (block8x8 & 0x01) << 3;
+   int    pix_y;
+   int    cbp_mask   = 1 << block8x8;
+   int    bxx, byy;                   // indexing curr_blk
+   int    scrFlag    = 0; // 0=noSCR, 1=strongSCR, 2=jmSCR
+   int    skipped    = (fw_mode == 0 && bw_mode == 0 && (img->type != B_SLICE));
+   Macroblock* currMB = &img->mb_data[img->current_mb_nr];
+   //set transform size
+   int    need_8x8_transform = currMB->luma_transform_size_8x8_flag;
+   // Residue Color Transform
+   int residue_R, residue_G, residue_B, temp;
+ 
+   if (img->type==B_SLICE)
+     scrFlag = 1;
+ 
+   //===== loop over 4x4 blocks =====
+   for (byy=0, block_y=mb_y; block_y<mb_y+8; byy+=4, block_y+=4)
+   {
+     pic_pix_y = img->opix_y + block_y;
+     
+     for (bxx=0, block_x=mb_x; block_x<mb_x+8; bxx+=4, block_x+=4)
+     {
+       pic_pix_x = img->opix_x + block_x;
+       
+       cbp_blk_mask = (block_x>>2) + block_y;
+       
+       // Residue Color Transform
+       if(img->residue_transform_flag)
+       {
+         ChromaPrediction4x4 (0, block_x, block_y, p_dir, fw_mode, bw_mode, fw_refframe, bw_refframe);
+         for (j=0; j<4; j++)
+           for (i=0; i<4; i++)
+             mprRGB[1][j+block_y][i+block_x] = img->mpr[j+block_y][i+block_x];
+           
+           ChromaPrediction4x4 (1, block_x, block_y, p_dir, fw_mode, bw_mode, fw_refframe, bw_refframe);
+           for (j=0; j<4; j++)
+             for (i=0; i<4; i++)
+               mprRGB[2][j+block_y][i+block_x] = img->mpr[j+block_y][i+block_x];
+       }
+       
+       //===== prediction of 4x4 block =====
+       LumaPrediction4x4 (block_x, block_y, p_dir, fw_mode, bw_mode, fw_refframe, bw_refframe);
+       
+       // Residue Color Transform
+       if(img->residue_transform_flag)
+       {
+         for (j=0; j<4; j++)
+           for (i=0; i<4; i++)
+             mprRGB[0][j+block_y][i+block_x] = img->mpr[j+block_y][i+block_x];
+       }
+       
+       //===== get displaced frame difference ======
+       if(!img->residue_transform_flag)
+       {
+         if(!need_8x8_transform)
+         {
+           
+           //===== get displaced frame difference ======                
+           for (j=0; j<4; j++)
+           {
+             pix_y = pic_pix_y + j;
+             for (i=0; i<4; i++)
+             {
+               img->m7[j][i] = imgY_org[pix_y][pic_pix_x + i] - img->mpr[j+block_y][i+block_x];
+             }
+           } 
+           //===== DCT, Quantization, inverse Quantization, IDCT, Reconstruction =====      
+           if (img->NoResidueDirect != 1 && !skipped  )
+           {
+             //===== DCT, Quantization, inverse Quantization, IDCT, Reconstruction =====
+             if (img->type!=SP_SLICE)  
+               nonzero = dct_luma   (block_x, block_y, &coeff_cost, 0);
+             else                      
+               nonzero = dct_luma_sp(block_x, block_y, &coeff_cost);
+ 
+             if (nonzero)
+             {
+               (*cbp_blk) |= 1 << cbp_blk_mask;  // one bit for every 4x4 block
+               (*cbp)     |= cbp_mask;           // one bit for the 4x4 blocks of an 8x8 block
+             }
+           }
+         }
+         else
+         {
+           for (j=0; j<4; j++)
+           {
+             pix_y = pic_pix_y + j;
+             for (i=0; i<4; i++)
+             {
+               img->m7[j+byy][i+bxx] = imgY_org[pix_y][pic_pix_x+i] - img->mpr[j+block_y][i+block_x];
+             }
+           }
+         }
+       } 
+       else 
+       {
+         /* Forward Residue Transform */
+         for (j=0; j<4; j++)
+           for (i=0; i<4; i++)
+           {
+             residue_B = imgUV_org[0][pic_pix_y+j][pic_pix_x+i] - mprRGB[1][j+block_y][i+block_x];
+             residue_G = imgY_org[pic_pix_y+j][pic_pix_x+i] - mprRGB[0][j+block_y][i+block_x];
+             residue_R = imgUV_org[1][pic_pix_y+j][pic_pix_x+i] - mprRGB[2][j+block_y][i+block_x];
+             
+             resTrans_R[j+block_y][i+block_x] = residue_R-residue_B;
+             temp = residue_B+(resTrans_R[j+block_y][i+block_x]>>1);
+             resTrans_B[j+block_y][i+block_x] = residue_G-temp;
+             resTrans_G[j+block_y][i+block_x] = temp+(resTrans_B[j+block_y][i+block_x]>>1);
+             
+             if(!need_8x8_transform)
+               img->m7[j][i] = resTrans_G[j+block_y][i+block_x];
+             else
+               img->m7[j+byy][i+bxx] = resTrans_G[j+block_y][i+block_x];
+           }
+           
+           // Residue Color Transform
+           //===== DCT, Quantization, inverse Quantization, IDCT, Reconstruction =====      
+           if (img->NoResidueDirect != 1 && !skipped && !need_8x8_transform )
+           {
+             //===== DCT, Quantization, inverse Quantization, IDCT, Reconstruction =====
+             if (img->type!=SP_SLICE)  nonzero = dct_luma   (block_x, block_y, &coeff_cost, 0);
+             else                      nonzero = dct_luma_sp(block_x, block_y, &coeff_cost);
+             
+             for (j=0; j<4; j++)
+               for (i=0; i<4; i++)
+                 rec_resG[j+block_y][i+block_x] = img->m7[j][i];
+               
+               if (nonzero)
+           {
+             (*cbp_blk) |= 1 << cbp_blk_mask;  // one bit for every 4x4 block
+             (*cbp)     |= cbp_mask;           // one bit for the 4x4 blocks of an 8x8 block
+           }
+         }
+       }
+     }
+   }
+ 
+   if(need_8x8_transform)
+   {
+     if (img->NoResidueDirect != 1 && !skipped)
+     {
+       if (img->type!=SP_SLICE)
+         nonzero = dct_luma8x8   (block8x8, &coeff_cost, 0);
+ 
+       // Residue Color Transform
+       if(img->residue_transform_flag)
+       {
+         for (j=0; j<8; j++)
+           for (i=0; i<8; i++)
+             rec_resG[mb_y+j][mb_x+i] = img->m7[j][i];
+       }
+ 
+       if (nonzero)
+       {
+         (*cbp_blk) |= 51 << (4*block8x8-2*(block8x8 & 0x01)); // corresponds to 110011, as if all four 4x4 blocks contain coeff, shifted to block position
+         (*cbp)     |= cbp_mask;           // one bit for the 4x4 blocks of an 8x8 block
+       }
+     }
+   }
+ 
+   /*
+   The purpose of the action below is to prevent that single or 'expensive' coefficients are coded.
+   With 4x4 transform there is larger chance that a single coefficient in a 8x8 or 16x16 block may be nonzero.
+   A single small (level=1) coefficient in a 8x8 block will cost: 3 or more bits for the coefficient,
+   4 bits for EOBs for the 4x4 blocks,possibly also more bits for CBP.  Hence the total 'cost' of that single
+   coefficient will typically be 10-12 bits which in a RD consideration is too much to justify the distortion improvement.
+   The action below is to watch such 'single' coefficients and set the reconstructed block equal to the prediction according
+   to a given criterium.  The action is taken only for inter luma blocks.
+ 
+   Notice that this is a pure encoder issue and hence does not have any implication on the standard.
+   coeff_cost is a parameter set in dct_luma() and accumulated for each 8x8 block.  If level=1 for a coefficient,
+   coeff_cost is increased by a number depending on RUN for that coefficient.The numbers are (see also dct_luma()): 3,2,2,1,1,1,0,0,...
+   when RUN equals 0,1,2,3,4,5,6, etc.
+   If level >1 coeff_cost is increased by 9 (or any number above 3). The threshold is set to 3. This means for example:
+   1: If there is one coefficient with (RUN,level)=(0,1) in a 8x8 block this coefficient is discarded.
+   2: If there are two coefficients with (RUN,level)=(1,1) and (4,1) the coefficients are also discarded
+   sum_cnt_nonz is the accumulation of coeff_cost over a whole macro block.  If sum_cnt_nonz is 5 or less for the whole MB,
+   all nonzero coefficients are discarded for the MB and the reconstructed block is set equal to the prediction.
+   */
+ 
+   if (img->NoResidueDirect != 1 && !skipped && coeff_cost <= _LUMA_COEFF_COST_ &&
+       ((img->qp + img->bitdepth_luma_qp_scale)!=0 || img->lossless_qpprime_flag==0))
+   {
+     coeff_cost  = 0;
+     (*cbp)     &=  (63 - cbp_mask);
+     (*cbp_blk) &= ~(51 << (4*block8x8-2*(block8x8 & 0x01)));
+ 
+     if(!img->residue_transform_flag)
+     {
+       for (j=mb_y; j<mb_y+8; j++)
+         memcpy(&enc_picture->imgY[img->pix_y + j][img->pix_x + mb_x], &img->mpr[j][mb_x], 2 * BLOCK_SIZE * sizeof(imgpel));
+     }
+     else        // Residue Color Transform
+     {
+       for (i=mb_x; i<mb_x+8; i++)
+         for (j=mb_y; j<mb_y+8; j++)
+         {
+           rec_resG[j][i] = 0;
+         }
+     }
+     if (img->type==SP_SLICE)
+     {
+       for (i=mb_x; i < mb_x+BLOCK_SIZE*2; i+=BLOCK_SIZE)
+         for (j=mb_y; j < mb_y+BLOCK_SIZE*2; j+=BLOCK_SIZE)
+           copyblock_sp(i,j);
+     }
+   }
+ 
+   return coeff_cost;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set mode parameters and reference frames for an 8x8 block
+  ************************************************************************
+  */
+ void SetModesAndRefframe (int b8, short* p_dir, int* fw_mode, int* bw_mode, short* fw_ref, short* bw_ref)
+ {
+   Macroblock* currMB = &img->mb_data[img->current_mb_nr];
+   int         j      = 2*(b8>>1);
+   int         i      = 2*(b8 & 0x01);
+ 
+   *fw_mode = *bw_mode = *fw_ref = *bw_ref = -1;
+ 
+   *p_dir  = currMB->b8pdir[b8];
+ 
+   if (img->type!=B_SLICE)
+   {
+     *fw_ref = enc_picture->ref_idx[LIST_0][img->block_y+j][img->block_x+i];
+     *bw_ref = 0;
+     *fw_mode  = currMB->b8mode[b8];
+     *bw_mode  = 0;
+   }
+   else
+   {
+     if (currMB->b8pdir[b8]==-1)
+     {
+       *fw_ref   = -1;
+       *bw_ref   = -1;
+       *fw_mode  =  0;
+       *bw_mode  =  0;
+     }
+     else if (currMB->b8pdir[b8]==0)
+     {
+       *fw_ref   = enc_picture->ref_idx[LIST_0][img->block_y+j][img->block_x+i];
+       *bw_ref   = 0;
+       *fw_mode  = currMB->b8mode[b8];
+       *bw_mode  = 0;
+     }
+     else if (currMB->b8pdir[b8]==1)
+     {
+       *fw_ref   = 0;
+       *bw_ref   = enc_picture->ref_idx[LIST_1][img->block_y+j][img->block_x+i];
+       *fw_mode  = 0;
+       *bw_mode  = currMB->b8mode[b8];
+     }
+     else
+     {
+       *fw_ref   = enc_picture->ref_idx[LIST_0][img->block_y+j][img->block_x+i];
+       *bw_ref   = enc_picture->ref_idx[LIST_1][img->block_y+j][img->block_x+i];
+       *fw_mode  = currMB->b8mode[b8];
+       *bw_mode  = currMB->b8mode[b8];
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Residual Coding of a Luma macroblock (not for intra)
+  ************************************************************************
+  */
+ void LumaResidualCoding ()
+ {
+   int i,j,block8x8,b8_x,b8_y;
+   int fw_mode, bw_mode;
+   short p_dir, refframe;
+   int sum_cnt_nonz;
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+ 
+   currMB->cbp     = 0 ;
+   currMB->cbp_blk = 0 ;
+   sum_cnt_nonz    = 0 ;
+ 
+   for (block8x8=0; block8x8<4; block8x8++)
+   {
+     short bw_ref;
+     SetModesAndRefframe (block8x8, &p_dir, &fw_mode, &bw_mode, &refframe, &bw_ref);
+     
+     sum_cnt_nonz += LumaResidualCoding8x8 (&(currMB->cbp), &(currMB->cbp_blk), block8x8,
+                                            p_dir, fw_mode, bw_mode, refframe, bw_ref);
+   }
+ 
+   if (sum_cnt_nonz <= _LUMA_MB_COEFF_COST_ &&
+       ((img->qp + img->bitdepth_luma_qp_scale)!=0 || img->lossless_qpprime_flag==0))
+   {
+      currMB->cbp     &= 0xfffff0 ;
+      currMB->cbp_blk &= 0xff0000 ;
+      if(!img->residue_transform_flag)
+      {
+        for (j=0; j < MB_BLOCK_SIZE; j++)
+          memcpy(&enc_picture->imgY[img->pix_y+j][img->pix_x], img->mpr[j], MB_BLOCK_SIZE * sizeof (imgpel));
+      }
+      else
+      {
+        for (i=0; i < MB_BLOCK_SIZE; i++)
+          for (j=0; j < MB_BLOCK_SIZE; j++)
+          {
+            rec_resG[j][i] = 0;
+          }
+      }
+ 
+      if (img->type==SP_SLICE)
+      {
+        for(block8x8=0;block8x8<4;block8x8++)
+        {
+          b8_x=(block8x8&1)<<3;
+          b8_y=(block8x8&2)<<2;
+          for (i=b8_x;i<b8_x+8;i+=4)
+            for (j=b8_y;j<b8_y+8;j+=4)
+              copyblock_sp(i,j);
+        }
+      }
+    }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Makes the decision if 8x8 tranform will be used (for RD-off)
+  ************************************************************************
+  */
+ int TransformDecision (int block_check, int *cost)
+ {
+   int    block_y, block_x, pic_pix_y, pic_pix_x, i, j, k;
+   int    mb_y, mb_x, block8x8;
+   int    fw_mode, bw_mode;
+   short  p_dir, fw_ref, bw_ref;
+   int    num_blks;
+   int    cost8x8=0, cost4x4=0;
+   int    diff[64], *diff_ptr;
+ 
+   if(block_check==-1)
+   {
+     block8x8=0;
+     num_blks=4;
+   }
+   else
+   {
+     block8x8=block_check;
+     num_blks=block_check+1;
+   }
+ 
+   for (; block8x8<num_blks; block8x8++)
+   {
+     SetModesAndRefframe (block8x8, &p_dir, &fw_mode, &bw_mode, &fw_ref, &bw_ref);
+ 
+     mb_y = (block8x8 >> 1) << 3;
+     mb_x = (block8x8 & 0x01) << 3;
+     //===== loop over 4x4 blocks =====
+     k=0;
+     for (block_y=mb_y; block_y<mb_y+8; block_y+=4)
+     {
+       pic_pix_y = img->opix_y + block_y;
+       
+       for (block_x=mb_x; block_x<mb_x+8; block_x+=4)
+       {
+         pic_pix_x = img->opix_x + block_x;
+ 
+         //===== prediction of 4x4 block =====
+         LumaPrediction4x4 (block_x, block_y, p_dir, fw_mode, bw_mode, fw_ref, bw_ref);
+ 
+         //===== get displaced frame difference ======
+         diff_ptr=&diff[k];
+         for (j=0; j<4; j++)
+         {
+           for (i=0; i<4; i++, k++)            
+             diff[k] = imgY_org[pic_pix_y+j][pic_pix_x+i] - img->mpr[j+block_y][i+block_x];
+         }
+         cost4x4 += SATD (diff_ptr, input->hadamard);
+       }
+     }
+     cost8x8 += SATD8X8 (diff, input->hadamard);
+   }
+ 
+   if(input->Transform8x8Mode==2) //always allow 8x8 transform
+     return 1;
+   else if(cost8x8<cost4x4)
+     return 1;
+   else
+   {
+     *cost = (*cost-cost8x8+cost4x4);
+     return 0;
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Predict one component of a chroma 4x4 block
+  ************************************************************************
+  */
+ void OneComponentChromaPrediction4x4 (int*        mpred,      //!< array to store prediction values
+                                       int         block_c_x,  //!< horizontal pixel coordinate of 4x4 block
+                                       int         block_c_y,  //!< vertical   pixel coordinate of 4x4 block
+                                       short****** mv,         //!< motion vector array
+                                       int         list_idx,   //!< reference picture list
+                                       short       ref,        //!< reference index
+                                       int         blocktype,  //!< block type
+                                       int         uv)         //!< chroma component
+ {
+   int     i, j, ii, jj, ii0, jj0, ii1, jj1, if0, if1, jf0, jf1;
+   short*  mvb;
+   
+   int     f1_x = 64/img->mb_cr_size_x;
+   int     f2_x=f1_x-1;
+ 
+   int     f1_y = 64/img->mb_cr_size_y;
+   int     f2_y=f1_y-1;
+ 
+   int     f3=f1_x*f1_y, f4=f3>>1;
+   int     list_offset = img->mb_data[img->current_mb_nr].list_offset;  
+   int     max_y_cr = (int) (list_offset ? (img->height_cr >> 1) - 1 : img->height_cr - 1);
+   int     max_x_cr = (int) (img->width_cr - 1);
+   int     jjx, iix;
+   int     mb_cr_y_div4 = img->mb_cr_size_y>>2;
+   int     mb_cr_x_div4 = img->mb_cr_size_x>>2;
+   int     jpos;
+ 
+   StorablePicture **list = listX[list_idx + list_offset];
+   pel_t** refimage = list[ref]->imgUV[uv];
+ 
+   for (j=block_c_y; j < block_c_y + BLOCK_SIZE; j++)
+   {
+     jjx = j/mb_cr_y_div4;
+     jpos = (j + img->opix_c_y)*f1_y;
+ 
+     for (i=block_c_x; i < block_c_x + BLOCK_SIZE; i++)
+     {
+       iix = i/mb_cr_x_div4;
+       mvb  = mv [jjx][iix][list_idx][ref][blocktype];
+ 
+       ii   = (i + img->opix_c_x)*f1_x + mvb[0];
+       jj   = jpos + mvb[1];
+ 
+       if (active_sps->chroma_format_idc == 1)
+         jj  += list[ref]->chroma_vector_adjustment;
+ 
+       ii0  = Clip3 (0, max_x_cr, ii/f1_x);
+       jj0  = Clip3 (0, max_y_cr, jj/f1_y);
+       ii1  = Clip3 (0, max_x_cr, (ii+f2_x)/f1_x);
+       jj1  = Clip3 (0, max_y_cr, (jj+f2_y)/f1_y);
+ 
+       if1  = (ii&f2_x);  if0 = f1_x-if1;
+       jf1  = (jj&f2_y);  jf0 = f1_y-jf1;
+       
+       *mpred++ = (if0 * jf0 * refimage[jj0][ii0] +
+                   if1 * jf0 * refimage[jj0][ii1] +
+                   if0 * jf1 * refimage[jj1][ii0] +
+                   if1 * jf1 * refimage[jj1][ii1] + f4) / f3;
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Predict an intra chroma 4x4 block
+  ************************************************************************
+  */
+ void IntraChromaPrediction4x4 (int  uv,       // <-- colour component
+                                int  block_x,  // <-- relative horizontal block coordinate of 4x4 block
+                                int  block_y)  // <-- relative vertical   block coordinate of 4x4 block
+ {
+   int mode = img->mb_data[img->current_mb_nr].c_ipred_mode;
+   int j;
+ 
+   //===== prediction =====
+   for (j=block_y; j<block_y+4; j++)
+     memcpy(&img->mpr[j][block_x],&img->mprr_c[uv][mode][j][block_x], BLOCK_MULTIPLE * sizeof(imgpel));
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Predict one chroma 4x4 block
+  ************************************************************************
+  */
+ void
+ ChromaPrediction4x4 (int   uv,           // <-- colour component
+                      int   block_x,      // <-- relative horizontal block coordinate of 4x4 block
+                      int   block_y,      // <-- relative vertical   block coordinate of 4x4 block
+                      int   p_dir,        // <-- prediction direction
+                      int   fw_mode,      // <-- list0  prediction mode (1-7, 0=DIRECT if bw_mode=0)
+                      int   bw_mode,      // <-- list1 prediction mode (1-7, 0=DIRECT if fw_mode=0)
+                      short fw_ref_idx,   // <-- reference frame for list0 prediction (if (<0) -> intra prediction)
+                      short bw_ref_idx)   // <-- reference frame for list1 prediction 
+ {
+   static int fw_pred[MB_BLOCK_SIZE];
+   static int bw_pred[MB_BLOCK_SIZE];
+ 
+   int  i, j;
+   int  block_x4  = block_x+4;
+   int  block_y4  = block_y+4;
+   int* fpred     = fw_pred;
+   int* bpred     = bw_pred;
+   short****** mv_array = img->all_mv;
+ 
+   Macroblock*    currMB     = &img->mb_data[img->current_mb_nr];
+ 
+   int  apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+                          (active_pps->weighted_bipred_idc && (img->type == B_SLICE)));  
+ 
+   if (currMB->bi_pred_me && fw_ref_idx == 0 && bw_ref_idx == 0 && p_dir == 2 && fw_mode==1 && bw_mode==1)
+     mv_array = currMB->bi_pred_me == 1? img->bipred_mv1 : img->bipred_mv2 ;
+ 
+   //===== INTRA PREDICTION =====
+   if (p_dir==-1)
+   {
+     IntraChromaPrediction4x4 (uv, block_x, block_y);
+     return;
+   }
+   
+   //===== INTER PREDICTION =====
+   if ((p_dir==0) || (p_dir==2))
+   {
+     OneComponentChromaPrediction4x4 (fw_pred, block_x, block_y, mv_array, LIST_0, fw_ref_idx, fw_mode, uv);
+   }
+   if ((p_dir==1) || (p_dir==2))
+   {
+     OneComponentChromaPrediction4x4 (bw_pred, block_x, block_y, mv_array, LIST_1, bw_ref_idx, bw_mode, uv);
+   }
+ 
+   if (apply_weights)
+   {
+     if (p_dir==2)
+     {
+       for (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+             img->mpr[j][i] =  clip1a_chr(((wbp_weight[0][fw_ref_idx][bw_ref_idx][uv+1] * *fpred++ + wbp_weight[1][fw_ref_idx][bw_ref_idx][uv+1] * *bpred++ 
+                   + 2*wp_chroma_round) >> (chroma_log_weight_denom + 1)) + ((wp_offset[0][fw_ref_idx][uv+1] + wp_offset[1][bw_ref_idx][uv+1] + 1)>>1) );
+     }
+     else if (p_dir==0)
+     {
+       for (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+            img->mpr[j][i] = clip1a_chr(((wp_weight[0][fw_ref_idx][uv+1] * *fpred++ + wp_chroma_round) >> chroma_log_weight_denom) +  wp_offset[0][fw_ref_idx][uv+1]);
+     }
+     else // (p_dir==1)
+     {
+       for (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = clip1a_chr(((wp_weight[1][bw_ref_idx][uv+1] * *bpred++ + wp_chroma_round) >> chroma_log_weight_denom) + wp_offset[1][bw_ref_idx][uv+1]);
+     }
+   }
+   else
+   {
+     if (p_dir==2)
+     {
+       for (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = (*fpred++ + *bpred++ + 1) >> 1; 
+     }
+     else if (p_dir==0)
+     {
+       for (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = *fpred++;
+     }
+     else // (p_dir==1)
+     {
+       for (j=block_y; j<block_y4; j++)
+         for (i=block_x; i<block_x4; i++)  
+           img->mpr[j][i] = *bpred++;
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Chroma residual coding for an macroblock
+  ************************************************************************
+  */
+ void ChromaResidualCoding (int* cr_cbp)
+ {
+   int   uv, block8, block_y, block_x, j, i;
+   int   fw_mode, bw_mode;
+   short p_dir, refframe, bw_ref;
+   int   skipped = (img->mb_data[img->current_mb_nr].mb_type == 0 && (img->type == P_SLICE || img->type == SP_SLICE));
+   int   yuv = img->yuv_format - 1; //ADD-VG-15052004
+ 
+   int   block8x8_idx[3][4][4] =     //ADD-VG-15052004
+   { { {0, 1, 0, 0}, 
+       {2, 3, 0, 0},
+       {0, 0, 0, 0},
+       {0, 0, 0, 0}, },
+ 
+     { {0, 1, 0, 0}, 
+       {0, 1, 0, 0}, 
+       {2, 3, 0, 0},
+       {2, 3, 0, 0}  },
+ 
+     { {0, 0, 1, 1},
+       {0, 0, 1, 1},
+       {2, 2, 3, 3},
+       {2, 2, 3, 3}  }
+   };
+   int residue_R, residue_G, residue_B, temp;
+ 
+   for (*cr_cbp=0, uv=0; uv<2; uv++)
+   {
+     //===== prediction of chrominance blocks ===d==
+     block8 = 0;
+     for (block_y=0; block_y < img->mb_cr_size_y; block_y+=4)
+     for (block_x=0; block_x < img->mb_cr_size_x; block_x+=4)
+     {
+       block8 = block8x8_idx[yuv][block_y>>2][block_x>>2];
+       SetModesAndRefframe (block8, &p_dir, &fw_mode, &bw_mode, &refframe, &bw_ref);
+ 
+       ChromaPrediction4x4 (uv, block_x, block_y, p_dir, fw_mode, bw_mode, refframe, bw_ref);
+     }
+ 
+     // ==== set chroma residue to zero for skip Mode in SP frames 
+     if (img->NoResidueDirect)
+     {
+       // Residue Color Transform
+       if(!img->residue_transform_flag)
+       {
+         for (j=0; j<img->mb_cr_size_y; j++)
+           memcpy(&enc_picture->imgUV[uv][img->pix_c_y+j][img->pix_c_x], img->mpr[j], img->mb_cr_size_x * sizeof(imgpel));             
+         ;
+       } 
+       else 
+       {
+         if(uv==0) 
+         {
+           for (j=0; j<img->mb_cr_size_y; j++)
+             memset(rec_resB[j], 0, img->mb_cr_size_x * sizeof(int));
+         }
+         else
+         {
+           for (j=0; j<img->mb_cr_size_y; j++)
+             memset(rec_resR[j], 0, img->mb_cr_size_x * sizeof(int));
+         }
+       }      
+     }
+     else if (skipped && img->type==SP_SLICE)
+     {
+       for (j=0; j<8; j++)
+         memset(img->m7[j], 0 , 8 * sizeof(int));
+     }
+     else
+     if (skipped)
+     {
+       // Residue Color Transform
+       if(!img->residue_transform_flag)
+       {
+         for (j=0; j<img->mb_cr_size_y; j++)
+           memcpy(&enc_picture->imgUV[uv][img->pix_c_y+j][img->pix_c_x], img->mpr[j], img->mb_cr_size_x * sizeof(imgpel));
+       }
+       else
+       {
+         if(uv==0) 
+         {
+           for (j=0; j<img->mb_cr_size_y; j++)
+             memset(rec_resB[j],0, img->mb_cr_size_x * sizeof(int));
+         }
+         else      
+         {
+           for (j=0; j<img->mb_cr_size_y; j++)
+             memset(rec_resR[j],0, img->mb_cr_size_x * sizeof(int));
+         }        
+       }
+     }
+     else
+     {
+       for (j=0; j<img->mb_cr_size_y; j++)
+         for (i=0; i<img->mb_cr_size_x; i++)
+         {
+           // Residue Color Transform
+           if(!img->residue_transform_flag)
+           {
+             img->m7[j][i] = imgUV_org[uv][img->opix_c_y+j][img->opix_c_x+i] - img->mpr[j][i];
+           } 
+           else 
+           {
+             if(uv==0) img->m7[j][i] = resTrans_B[j][i];
+             else      img->m7[j][i] = resTrans_R[j][i];
+           }
+         }
+     }
+ 
+     //===== DCT, Quantization, inverse Quantization, IDCT, and Reconstruction =====
+     //===== Call function for skip mode in SP frames to properly process frame ====
+     
+     if (skipped && img->type==SP_SLICE)
+     {
+         *cr_cbp=dct_chroma_sp(uv,*cr_cbp);
+     }
+     else
+     {
+       if (!img->NoResidueDirect && !skipped)
+       {
+         if (img->type!=SP_SLICE || IS_INTRA (&img->mb_data[img->current_mb_nr]))
+           *cr_cbp=dct_chroma   (uv,*cr_cbp);
+         else
+           *cr_cbp=dct_chroma_sp(uv,*cr_cbp);
+ 
+         if(img->residue_transform_flag){
+           for (j=0; j < img->mb_cr_size_y; j++)
+             for (i=0; i < img->mb_cr_size_x; i++)
+             {
+               if(uv==0)
+                 rec_resB[j][i] = img->m7[j][i];
+               else
+                 rec_resR[j][i] = img->m7[j][i];
+             }
+         }
+       }
+     }
+   }
+ 
+   //===== update currMB->cbp =====
+   img->mb_data[img->current_mb_nr].cbp += ((*cr_cbp)<<4);  
+ 
+   // Residue Color Transform
+   /* Inverse Residue Transform */
+   if(img->residue_transform_flag)
+   {
+     for (j=0; j<MB_BLOCK_SIZE; j++)
+       for (i=0; i<MB_BLOCK_SIZE; i++)
+       {
+         /* YCoCg-R */
+         temp      = rec_resG[j][i]-(rec_resB[j][i]>>1);
+         residue_G = rec_resB[j][i]+temp;
+         residue_B = temp - (rec_resR[j][i]>>1);
+         residue_R = residue_B+rec_resR[j][i];
+         enc_picture->imgUV[0][img->pix_y+j][img->pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_B+mprRGB[1][j][i]));
+         enc_picture->imgY[img->pix_y+j][img->pix_x+i]     = min(img->max_imgpel_value, max(0,residue_G+mprRGB[0][j][i]));
+         enc_picture->imgUV[1][img->pix_y+j][img->pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_R+mprRGB[2][j][i]));
+       }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Intra prediction of the chrminance layers of one macroblock
+  ************************************************************************
+  */
+ void IntraChromaPrediction (int *mb_up, int *mb_left, int*mb_up_left)
+ {
+ 
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   int      s, s0, s1, s2, s3, i, j, k;
+   pel_t**  image;
+   int      block_x, block_y;
+   int      mb_nr = img->current_mb_nr;
+   int      mb_available_up;
+   int      mb_available_left[2];
+   int      mb_available_up_left;
+   int      ih,iv;
+   int      ib,ic,iaa;
+   int      uv;
+   imgpel   hline[16], vline[16];
+   int      mode;
+   int      best_mode = DC_PRED_8;  //just an initilaization here, should always be overwritten
+   int      cost;
+   int      min_cost;
+   int      diff[16];
+   PixelPos up;        //!< pixel position  p(0,-1)
+   PixelPos left[17];  //!< pixel positions p(-1, -1..15)
+   int      cr_MB_x = img->mb_cr_size_x;
+   int      cr_MB_y = img->mb_cr_size_y;
+ 
+   //ADD-VG-07062004 
+   int      blk_x;
+   int      blk_y;
+   int      b8,b4;
+   int      yuv = img->yuv_format - 1;
+   
+   static int block_pos[3][4][4]= //[yuv][b8][b4]
+   {
+     { {0, 1, 2, 3},{0, 0, 0, 0},{0, 0, 0, 0},{0, 0, 0, 0}},
+     { {0, 1, 2, 3},{2, 3, 2, 3},{0, 0, 0, 0},{0, 0, 0, 0}},
+     { {0, 1, 2, 3},{1, 1, 3, 3},{2, 3, 2, 3},{3, 3, 3, 3}}
+   };
+   
+   //ADD-VG-07062004-END 
+   
+ 
+   for (i=0;i<cr_MB_y+1;i++)
+   {
+     getNeighbour(mb_nr, -1 , i-1 , 0, &left[i]);
+   }
+   
+   getNeighbour(mb_nr, 0 , -1 , 0, &up);
+ 
+ 
+   mb_available_up                             = up.available;
+   mb_available_up_left                        = left[0].available;
+   mb_available_left[0] = mb_available_left[1] = left[1].available;
+ 
+   if(input->UseConstrainedIntraPred)
+   {
+     mb_available_up = up.available ? img->intra_block[up.mb_addr] : 0;
+     for (i=0, mb_available_left[0]=1; i<(cr_MB_y>>1);i++)
+       mb_available_left[0]  &= left[i+1].available ? img->intra_block[left[i+1].mb_addr]: 0;
+     for (i=(cr_MB_y>>1), mb_available_left[1]=1; i<cr_MB_y;i++)
+       mb_available_left[1] &= left[i+1].available ? img->intra_block[left[i+1].mb_addr]: 0;
+     mb_available_up_left = left[0].available ? img->intra_block[left[0].mb_addr]: 0;
+   }
+ 
+   if (mb_up)
+     *mb_up = mb_available_up;
+   if (mb_left)
+     *mb_left = mb_available_left[0] && mb_available_left[1];
+   if (mb_up_left)
+     *mb_up_left = mb_available_up_left;
+ 
+ 
+   // compute all chroma intra prediction modes for both U and V
+   for (uv=0; uv<2; uv++)
+   {
+     image = enc_picture->imgUV[uv];
+ 
+     // DC prediction
+     for(b8=0; b8<img->num_blk8x8_uv >> 1;b8++)
+     {
+       for (b4=0; b4<4; b4++)
+       {
+         block_y = subblk_offset_y[yuv][b8][b4];
+         block_x = subblk_offset_x[yuv][b8][b4];
+         blk_x = block_x;
+         blk_y = block_y + 1;
+ 
+         s=img->dc_pred_value;
+         s0=s1=s2=s3=0;
+ 
+         //===== get prediction value =====
+         switch (block_pos[yuv][b8][b4])
+         {
+         case 0:  //===== TOP LEFT =====
+           if      (mb_available_up)       for (i=blk_x;i<(blk_x+4);i++)  s0 += image[up.pos_y][up.pos_x + i];
+           if      (mb_available_left[0])  for (i=blk_y;i<(blk_y+4);i++)  s2 += image[left[i].pos_y][left[i].pos_x];
+           if      (mb_available_up && mb_available_left[0])  s  = (s0+s2+4) >> 3;
+           else if (mb_available_up)                          s  = (s0   +2) >> 2;
+           else if (mb_available_left[0])                     s  = (s2   +2) >> 2;
+           break;
+         case 1: //===== TOP RIGHT =====
+           if      (mb_available_up)       for (i=blk_x;i<(blk_x+4);i++)  s1 += image[up.pos_y][up.pos_x + i];
+           else if (mb_available_left[0])  for (i=blk_y;i<(blk_y+4);i++)  s2 += image[left[i].pos_y][left[i].pos_x];
+           if      (mb_available_up)                          s  = (s1   +2) >> 2;
+           else if (mb_available_left[0])                     s  = (s2   +2) >> 2;
+           break;
+         case 2: //===== BOTTOM LEFT =====
+           if      (mb_available_left[1])  for (i=blk_y;i<(blk_y+4);i++)  s3 += image[left[i].pos_y][left[i].pos_x];
+           else if (mb_available_up)       for (i=blk_x;i<(blk_x+4);i++)  s0 += image[up.pos_y][up.pos_x + i];
+           if      (mb_available_left[1])                     s  = (s3   +2) >> 2;
+           else if (mb_available_up)                          s  = (s0   +2) >> 2;
+           break;
+         case 3: //===== BOTTOM RIGHT =====
+           if      (mb_available_up)       for (i=blk_x;i<(blk_x+4);i++)  s1 += image[up.pos_y][up.pos_x + i];
+           if      (mb_available_left[1])  for (i=blk_y;i<(blk_y+4);i++)  s3 += image[left[i].pos_y][left[i].pos_x];
+           if      (mb_available_up && mb_available_left[1])  s  = (s1+s3+4) >> 3;
+           else if (mb_available_up)                          s  = (s1   +2) >> 2;
+           else if (mb_available_left[1])                     s  = (s3   +2) >> 2;
+           break;
+         }
+         
+         //===== prediction =====
+         for (j=block_y; j<block_y+4; j++)
+           for (i=block_x; i<block_x+4; i++)
+           {
+             img->mprr_c[uv][DC_PRED_8][j][i] = s;
+           }
+       }
+     }
+ 
+     // vertical prediction
+     if (mb_available_up)
+     {
+       memcpy(hline,&image[up.pos_y][up.pos_x], cr_MB_x * sizeof(imgpel));
+       for (j=0; j<cr_MB_y; j++)
+         memcpy(img->mprr_c[uv][VERT_PRED_8][j], hline, cr_MB_x * sizeof(imgpel));
+     }
+ 
+     // horizontal prediction 
+     if (mb_available_left[0] && mb_available_left[1])
+     {
+       for (i=0; i<cr_MB_y; i++)
+         vline[i] = image[left[i+1].pos_y][left[i+1].pos_x];
+       for (i=0; i<cr_MB_x; i++)
+         for (j=0; j<cr_MB_y; j++)
+           img->mprr_c[uv][HOR_PRED_8][j][i] = vline[j];
+     }
+ 
+     // plane prediction
+     if (mb_available_left[0] && mb_available_left[1] && mb_available_up && mb_available_up_left)
+     {
+       ih = (cr_MB_x>>1)*(hline[cr_MB_x-1] - image[left[0].pos_y][left[0].pos_x]);
+       for (i=0;i<(cr_MB_x>>1)-1;i++)
+         ih += (i+1)*(hline[(cr_MB_x>>1)+i] - hline[(cr_MB_x>>1)-2-i]);
+ 
+       iv = (cr_MB_y>>1)*(vline[cr_MB_y-1] - image[left[0].pos_y][left[0].pos_x]);
+       for (i=0;i<(cr_MB_y>>1)-1;i++)
+         iv += (i+1)*(vline[(cr_MB_y>>1)+i] - vline[(cr_MB_y>>1)-2-i]);
+ 
+       ib= ((cr_MB_x == 8?17:5)*ih+2*cr_MB_x)>>(cr_MB_x == 8?5:6);
+       ic= ((cr_MB_y == 8?17:5)*iv+2*cr_MB_y)>>(cr_MB_y == 8?5:6);
+ 
+       iaa=16*(hline[cr_MB_x-1]+vline[cr_MB_y-1]);
+       for (j=0; j<cr_MB_y; j++)
+         for (i=0; i<cr_MB_x; i++)
+           img->mprr_c[uv][PLANE_8][j][i]=max(0,min(img->max_imgpel_value_uv,
+                                                    (iaa+(i-(cr_MB_x>>1)+1)*ib+(j-(cr_MB_y>>1)+1)*ic+16)>>5));
+     }
+   }
+ 
+   if (!input->rdopt)      // the rd-opt part does not work correctly (see encode_one_macroblock)
+   {                       // since ipredmodes could be overwritten => encoder-decoder-mismatches
+     // pick lowest cost prediction mode
+     min_cost = INT_MAX;
+     for (i=0;i<cr_MB_y;i++)
+     {
+       getNeighbour(mb_nr, 0 , i, 0, &left[i]);
+     }
+     for (mode=DC_PRED_8; mode<=PLANE_8; mode++)
+     {
+       if ((img->type != I_SLICE || !input->IntraDisableInterOnly) && input->ChromaIntraDisable == 1 && mode!=DC_PRED_8)
+         continue;
+       
+       if ((mode==VERT_PRED_8 && !mb_available_up) ||
+         (mode==HOR_PRED_8 && (!mb_available_left[0] || !mb_available_left[1])) ||
+         (mode==PLANE_8 && (!mb_available_left[0] || !mb_available_left[1] || !mb_available_up || !mb_available_up_left)))
+         continue;
+       
+       cost = 0;
+       for (uv=0; uv<2; uv++)
+       {
+         image = imgUV_org[uv];
+         for (block_y=0; block_y<cr_MB_y; block_y+=4)
+           for (block_x=0; block_x<cr_MB_x; block_x+=4)
+           {
+             for (k=0,j=block_y; j<block_y+4; j++)
+             {
+               for (i=block_x; i<block_x+4; i++,k++)
+                 diff[k] = image[left[j].pos_y][left[j].pos_x+i] - img->mprr_c[uv][mode][j][i];
+             }
+             cost += SATD(diff, input->hadamard);
+           }
+       }
+       if (cost < min_cost)
+       {
+         best_mode = mode;
+         min_cost = cost;
+       }
+     }    
+     currMB->c_ipred_mode = best_mode;
+   }  
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Check if all reference frames for a macroblock are zero
+  ************************************************************************
+  */
+ int
+ ZeroRef (Macroblock* currMB)
+ {
+   int i,j;
+ 
+   for (j=img->block_y; j<img->block_y + BLOCK_MULTIPLE; j++)
+   for (i=img->block_x; i<img->block_x + BLOCK_MULTIPLE; i++)
+   {
+     if (enc_picture->ref_idx[LIST_0][j][i]!=0)
+         return 0;
+   }
+   return 1;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Converts macroblock type to coding value
+  ************************************************************************
+  */
+ int
+ MBType2Value (Macroblock* currMB)
+ {
+   static const int dir1offset[3]    =  { 1,  2, 3};
+   static const int dir2offset[3][3] = {{ 0,  4,  8},   // 1. block forward
+                                        { 6,  2, 10},   // 1. block backward
+                                        {12, 14, 16}};  // 1. block bi-directional
+ 
+   int mbtype, pdir0, pdir1;
+ 
+   if (img->type!=B_SLICE)
+   {
+     if      (currMB->mb_type==I8MB ||currMB->mb_type==I4MB)
+       return (img->type==I_SLICE ? 0 : 6);
+     else if (currMB->mb_type==I16MB) return (img->type==I_SLICE ? 0 : 6) + img->i16offset;
+     else if (currMB->mb_type==IPCM) return (img->type==I_SLICE ? 25 : 31);
+     else if (currMB->mb_type==P8x8)
+     {
+       if (input->symbol_mode==UVLC 
+         && ZeroRef (currMB))         return 5;
+       else                           return 4;
+     }
+     else                             return currMB->mb_type;
+   }
+   else
+   {
+     mbtype = currMB->mb_type;
+     pdir0  = currMB->b8pdir[0];
+     pdir1  = currMB->b8pdir[3];
+ 
+     if      (mbtype==0)       return 0;
+     else if (mbtype==I4MB || mbtype==I8MB)
+       return 23;
+     else if (mbtype==I16MB)   return 23 + img->i16offset;
+     else if (mbtype==IPCM)    return 48;
+     else if (mbtype==P8x8)    return 22;
+     else if (mbtype==1)       return dir1offset[pdir0];
+     else if (mbtype==2)       return 4 + dir2offset[pdir0][pdir1];
+     else                      return 5 + dir2offset[pdir0][pdir1];
+   }
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Writes intra prediction modes for an 8x8 block
+  ************************************************************************
+  */
+ int writeIntra4x4Modes(int only_this_block)
+ {
+   int i,j,bs_x,bs_y,ii,jj;
+   int block8x8;
+   int rate;
+   char ipred_array[16];
+   int cont_array[16],ipred_number;
+   Macroblock    *currMB     = &img->mb_data[img->current_mb_nr];
+   SyntaxElement *currSE     = &img->MB_SyntaxElements[currMB->currSEnr];
+   int           *bitCount   = currMB->bitcounter;
+   Slice         *currSlice  = img->currentSlice;
+   DataPartition *dataPart;
+   const int     *partMap    = assignSE2partition[input->partition_mode];
+ 
+   ipred_number=0;
+   for(block8x8=0;block8x8<4;block8x8++)
+   {
+     if((( currMB->b8mode[block8x8]==IBLOCK && (only_this_block<0||only_this_block==block8x8)) ||
+       ((currMB->b8mode[block8x8]==I8MB) && (only_this_block<0||only_this_block==block8x8))))
+     {
+       bs_x = bs_y = (currMB->b8mode[block8x8] == I8MB)?8:4; 
+       ii=(bs_x>>2); // bug fix for solaris. mwi 
+       jj=(bs_y>>2); // bug fix for solaris. mwi
+       
+       for(j=0;j<4;j+=(jj<<1))
+       {
+         for(i=0;i<2;i+=ii)
+         {
+           ipred_array[ipred_number]=currMB->intra_pred_modes[(block8x8<<2)|j|i];
+           cont_array[ipred_number]=(block8x8<<2)+j+i;
+           ipred_number++;
+         }
+       }
+     }
+   }
+   rate=0;
+ 
+   for(i=0;i<ipred_number;i++)
+   {
+     currMB->IntraChromaPredModeFlag = 1;
+     currSE->context = cont_array[i];
+     currSE->value1  = ipred_array[i];
+     currSE->value2  = 0;
+ 
+ #if TRACE
+     snprintf(currSE->tracestring, TRACESTRING_SIZE, "Intra mode     = %3d %d",currSE->value1,currSE->context);
+ #endif
+ 
+     /*--- set symbol type and function pointers ---*/
+     if (input->symbol_mode != UVLC)    
+       currSE->writing = writeIntraPredMode_CABAC;
+     currSE->type = SE_INTRAPREDMODE;
+ 
+     /*--- choose data partition ---*/
+     dataPart = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);   
+     /*--- encode and update rate ---*/
+     if (input->symbol_mode == UVLC)    
+       writeSyntaxElement_Intra4x4PredictionMode(currSE, dataPart);
+     else
+       dataPart->writeSyntaxElement (currSE, dataPart);
+     bitCount[BITS_COEFF_Y_MB]+=currSE->len;
+     rate += currSE->len;
+     currSE++;
+     currMB->currSEnr++;
+   }
+ 
+   return rate;
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Converts 8x8 block type to coding value
+  ************************************************************************
+  */
+ int B8Mode2Value (int b8mode, int b8pdir)
+ {
+   static const int b8start[8] = {0,0,0,0, 1, 4, 5, 10};
+   static const int b8inc  [8] = {0,0,0,0, 1, 2, 2, 1};
+   
+   if (img->type!=B_SLICE)
+   {
+     return (b8mode-4);
+   }
+   else
+   {
+     return b8start[b8mode] + b8inc[b8mode] * b8pdir;
+   }
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Codes macroblock header
+ * \param rdopt
+ *    true for calls during RD-optimization
+ * \param coeff_rate
+ *    bitrate of Luma and Chroma coeff
+ ************************************************************************
+ */
+ int writeMBLayer (int rdopt, int *coeff_rate)
+ {
+   int             i,j;
+   int             mb_nr     = img->current_mb_nr;
+   Macroblock*     currMB    = &img->mb_data[mb_nr];
+   Macroblock*     prevMB    = mb_nr ? (&img->mb_data[mb_nr-1]) : NULL;
+   SyntaxElement  *currSE    = &img->MB_SyntaxElements[currMB->currSEnr];
+   int*            bitCount  = currMB->bitcounter;
+   Slice*          currSlice = img->currentSlice;
+   DataPartition*  dataPart;
+   const int*      partMap   = assignSE2partition[input->partition_mode];
+   int             no_bits   = 0;
+   int             skip      = currMB->mb_type ? 0:((img->type == B_SLICE) ? !currMB->cbp:1);
+   int             mb_type;
+   int             prevMbSkipped = 0;
+   int             mb_field_tmp;
+   Macroblock      *topMB = NULL;
+   
+   int             WriteFrameFieldMBInHeader = 0;
+ 
+   if (img->MbaffFrameFlag)
+   {
+     if (0==(mb_nr & 0x01))
+     {
+       WriteFrameFieldMBInHeader = 1; // top field
+ 
+       prevMbSkipped = 0;
+     }
+     else
+     {
+       if (prevMB->mb_type ? 0:((img->type == B_SLICE) ? !prevMB->cbp:1))
+       {
+         WriteFrameFieldMBInHeader = 1; // bottom, if top was skipped
+       }
+ 
+       topMB= &img->mb_data[img->current_mb_nr-1];
+       prevMbSkipped = topMB->skip_flag;
+     }
+   }
+   currMB->IntraChromaPredModeFlag = IS_INTRA(currMB);
+ 
+   // choose the appropriate data partition
+   dataPart = &(currSlice->partArr[partMap[SE_MBTYPE]]);
+   
+   if(img->type == I_SLICE)
+   {
+     //========= write mb_aff (I_SLICE) =========
+     if(WriteFrameFieldMBInHeader)
+     {
+       currSE->value1 = currMB->mb_field;
+       currSE->value2 = 0;
+       currSE->type   = SE_MBTYPE;      
+       
+ #if TRACE
+       snprintf(currSE->tracestring, TRACESTRING_SIZE, "mb_field_decoding_flag");
+ #endif
+       if( input->symbol_mode==UVLC)
+       {
+         currSE->mapping = ue_linfo;
+         currSE->bitpattern = (currMB->mb_field ? 1 : 0);
+         currSE->len = 1;
+         writeSyntaxElement2Buf_Fixed(currSE, dataPart->bitstream);
+       }
+       else
+       {
+         currSE->writing = writeFieldModeInfo_CABAC;
+         dataPart->writeSyntaxElement(currSE, dataPart);
+       }
+       
+       bitCount[BITS_MB_MODE] += currSE->len;
+       no_bits                += currSE->len;
+       currSE++;
+       currMB->currSEnr++;
+     }
+     
+     //========= write mb_type (I_SLICE) =========
+     currSE->value1  = MBType2Value (currMB);
+     currSE->value2  = 0;
+     currSE->type    = SE_MBTYPE;
+ 
+     if (input->symbol_mode == UVLC)  
+       currSE->mapping = ue_linfo;
+     else
+       currSE->writing = writeMB_typeInfo_CABAC;
+ 
+     dataPart->writeSyntaxElement( currSE, dataPart);
+ #if TRACE
+     snprintf(currSE->tracestring, TRACESTRING_SIZE,   "mb_type (I_SLICE) (%2d,%2d) = %3d",img->mb_x, img->mb_y, currMB->mb_type);
+ #endif
+     bitCount[BITS_MB_MODE] += currSE->len;
+     no_bits                += currSE->len;
+     currSE++;
+     currMB->currSEnr++;
+   }
+   // not I_SLICE, CABAC
+   else if (input->symbol_mode == CABAC)
+   {
+     if (img->MbaffFrameFlag && ((img->current_mb_nr & 0x01) == 0||prevMbSkipped))
+     {
+       mb_field_tmp = currMB->mb_field;
+       currMB->mb_field = field_flag_inference();
+       CheckAvailabilityOfNeighborsCABAC();
+       currMB->mb_field = mb_field_tmp;
+     }
+     
+     //========= write mb_skip_flag (CABAC) =========
+     mb_type         = MBType2Value (currMB);
+     currSE->value1  = mb_type;
+     currSE->value2  = currMB->cbp;
+     currSE->type    = SE_MBTYPE;
+     currSE->writing = writeMB_skip_flagInfo_CABAC;
+     dataPart->writeSyntaxElement( currSE, dataPart);
+ #if TRACE
+     snprintf(currSE->tracestring, TRACESTRING_SIZE, "mb_skip_flag");
+ #endif
+     bitCount[BITS_MB_MODE] += currSE->len;
+     no_bits                += currSE->len;
+     currSE++;
+     currMB->currSEnr++;
+ 
+     CheckAvailabilityOfNeighborsCABAC();
+     
+     //========= write mb_aff (CABAC) =========
+     if(img->MbaffFrameFlag && !skip) // check for copy mode
+     {
+       if(WriteFrameFieldMBInHeader)
+       {
+         currSE->value1 = currMB->mb_field;
+         currSE->value2 = 0;
+         currSE->type   =  SE_MBTYPE;
+ 
+         currSE->writing = writeFieldModeInfo_CABAC;
+         dataPart->writeSyntaxElement(currSE, dataPart);
+ #if TRACE
+         snprintf(currSE->tracestring, TRACESTRING_SIZE, "mb_field_decoding_flag");
+ #endif
+         bitCount[BITS_MB_MODE] += currSE->len;
+         no_bits                += currSE->len;
+         currSE++;
+         currMB->currSEnr++;
+       }
+     }
+     
+     //========= write mb_type (CABAC) =========
+     if (currMB->mb_type != 0 || ((img->type == B_SLICE) && currMB->cbp != 0))
+     {
+       currSE->value1  = mb_type;
+       currSE->value2  = 0;
+       currSE->type    = SE_MBTYPE;
+       currSE->writing = writeMB_typeInfo_CABAC;
+       dataPart->writeSyntaxElement( currSE, dataPart);
+ #if TRACE
+     if (img->type == B_SLICE) 
+       snprintf(currSE->tracestring, TRACESTRING_SIZE, "mb_type (B_SLICE) (%2d,%2d) = %3d",img->mb_x, img->mb_y, currMB->mb_type);
+     else                      
+       snprintf(currSE->tracestring, TRACESTRING_SIZE, "mb_type (P_SLICE) (%2d,%2d) = %3d",img->mb_x, img->mb_y, currMB->mb_type);
+ #endif
+       bitCount[BITS_MB_MODE] += currSE->len;
+       no_bits                += currSE->len;
+       currSE++;
+       currMB->currSEnr++;
+     }
+   }
+   // VLC not intra
+   else if (currMB->mb_type != 0 || ((img->type == B_SLICE) && currMB->cbp != 0))
+   {
+     //===== Run Length Coding: Non-Skipped macroblock =====
+     currSE->value1  = img->cod_counter;
+     currSE->value2  = 0;
+     currSE->mapping = ue_linfo;
+     currSE->type    = SE_MBTYPE;
+     dataPart->writeSyntaxElement( currSE, dataPart);
+ #if TRACE
+     snprintf(currSE->tracestring, TRACESTRING_SIZE, "mb_skip_run");
+ #endif
+     bitCount[BITS_MB_MODE] += currSE->len;
+     no_bits                += currSE->len;
+     currSE++;
+     currMB->currSEnr++;
+     
+     // Reset cod counter
+     img->cod_counter = 0;
+     
+     // write mb_aff
+     if(img->MbaffFrameFlag && !skip) // check for copy mode
+     {
+       if(WriteFrameFieldMBInHeader)
+       {
+         currSE->value1 = currMB->mb_field;
+         currSE->type   =  SE_MBTYPE;
+         currSE->mapping = ue_linfo;
+         
+         //dataPart->writeSyntaxElement(currSE, dataPart);
+         currSE->bitpattern = (currMB->mb_field ? 1 : 0);
+         currSE->len = 1;
+         writeSyntaxElement2Buf_Fixed(currSE, dataPart->bitstream);
+ 
+ #if TRACE
+         snprintf(currSE->tracestring, TRACESTRING_SIZE, "mb_field_decoding_flag");
+ #endif
+         bitCount[BITS_MB_MODE] += currSE->len;
+         no_bits                += currSE->len;
+         currSE++;
+         currMB->currSEnr++;
+       }
+     }
+     // Put out mb mode
+     currSE->value1  = MBType2Value (currMB);
+ 
+     if (img->type != B_SLICE)
+     {
+       currSE->value1--;
+     }
+     currSE->mapping = ue_linfo;
+     currSE->type    = SE_MBTYPE;
+     currSE->value2  = 0;
+ 
+     dataPart->writeSyntaxElement( currSE, dataPart);
+ #if TRACE
+     if (img->type == B_SLICE) 
+       snprintf(currSE->tracestring, TRACESTRING_SIZE, "mb_type (B_SLICE) (%2d,%2d) = %3d",img->mb_x, img->mb_y, currMB->mb_type);
+     else                      
+       snprintf(currSE->tracestring, TRACESTRING_SIZE, "mb_type (P_SLICE) (%2d,%2d) = %3d",img->mb_x, img->mb_y, currMB->mb_type);
+ #endif
+     bitCount[BITS_MB_MODE] += currSE->len;
+     no_bits                += currSE->len;
+     currSE++;
+     currMB->currSEnr++;
+   }
+   else
+   {
+     //Run Length Coding: Skipped macroblock
+     img->cod_counter++;
+     
+     currMB->skip_flag = 1;
+     // CAVLC
+     for (j=0; j < (4 + img->num_blk8x8_uv); j++)
+       for (i=0; i < 4; i++)
+         img->nz_coeff [img->current_mb_nr][i][j]=0;
+ 
+ 
+     if(FmoGetNextMBNr(img->current_mb_nr) == -1 && img->cod_counter>0)
+     {
+       // Put out run
+       currSE->value1  = img->cod_counter;
+       currSE->value2  = 0;
+       currSE->mapping = ue_linfo;
+       currSE->type    = SE_MBTYPE;
+ 
+       dataPart->writeSyntaxElement( currSE, dataPart);
+ #if TRACE
+       snprintf(currSE->tracestring, TRACESTRING_SIZE, "mb_skip_run");
+ #endif
+       bitCount[BITS_MB_MODE] += currSE->len;
+       no_bits                += currSE->len;
+       currSE++;
+       currMB->currSEnr++;
+ 
+       // Reset cod counter
+       img->cod_counter = 0;
+     }
+   }
+ 
+   //init NoMbPartLessThan8x8Flag
+   currMB->NoMbPartLessThan8x8Flag = (IS_DIRECT(currMB) && !(active_sps->direct_8x8_inference_flag))? 0: 1;
+   
+   if (currMB->mb_type == IPCM)
+   {
+     int jj, uv;
+ 
+     if (dataPart->bitstream->bits_to_go < 8)
+     {
+       if (input->symbol_mode == CABAC) {
+       }
+       else
+       {
+         currSE->len = dataPart->bitstream->bits_to_go;  
+         no_bits += currSE->len;
+         currSE->bitpattern = 0;       
+         bitCount[BITS_COEFF_Y_MB]+= currSE->len;
+         writeSyntaxElement2Buf_Fixed(currSE, dataPart->bitstream);
+         currSE->mapping = ue_linfo;
+ #if TRACE
+         snprintf(currSE->tracestring, TRACESTRING_SIZE, "IPCM aligment bits = %d", currSE->len);
+ #endif
+         currSE++;
+         currMB->currSEnr++;
+       }
+     }
+     for (j=0;j<MB_BLOCK_SIZE;j++)
+     {
+       jj = img->pix_y+j;
+       for (i=0;i<MB_BLOCK_SIZE;i++)
+       {
+         if (input->symbol_mode == CABAC) {
+         }
+         else
+         {
+           currSE->mapping = ue_linfo;
+           currSE->len = img->bitdepth_luma;  
+           no_bits += currSE->len;
+           currSE->bitpattern = enc_picture->imgY[jj][img->pix_x+i];       
+           bitCount[BITS_COEFF_Y_MB]+=currSE->len;
+           writeSyntaxElement2Buf_Fixed(currSE, dataPart->bitstream);        
+ #if TRACE
+           snprintf(currSE->tracestring, TRACESTRING_SIZE, "IPCM Luma (%d %d) = %d", j,i,currSE->bitpattern);
+ #endif
+           currSE++;
+           currMB->currSEnr++;
+         }
+       }
+     }
+     if (img->yuv_format != YUV400)
+     {
+       for (uv = 0; uv < 2; uv ++)
+       {
+         for (j=0;j<img->mb_cr_size_y;j++)
+         {
+           jj = img->pix_c_y+j;
+           for (i=0;i<img->mb_cr_size_x;i++)
+           {
+             if (input->symbol_mode == CABAC) {
+             }
+             else
+             {              
+               currSE->mapping = ue_linfo;
+               currSE->len = img->bitdepth_chroma;  
+               no_bits += currSE->len;
+               currSE->bitpattern = enc_picture->imgUV[uv][jj][img->pix_c_x+i];       
+               writeSyntaxElement2Buf_Fixed(currSE, dataPart->bitstream);        
+               bitCount[BITS_COEFF_UV_MB]+=currSE->len;
+ #if TRACE
+               snprintf(currSE->tracestring, TRACESTRING_SIZE, "IPCM chroma(%d) (%d %d) = %d", uv, j,i,currSE->bitpattern);
+ #endif
+               currSE++;
+               currMB->currSEnr++;
+             }
+           }
+         }
+       }
+     }
+     return no_bits;
+   }
+ 
+   //===== BITS FOR 8x8 SUB-PARTITION MODES =====
+   if (IS_P8x8 (currMB))
+   {
+     dataPart = &(currSlice->partArr[partMap[SE_MBTYPE]]);
+     
+     for (i=0; i<4; i++)
+     {
+       if (input->symbol_mode==UVLC)   
+         currSE->mapping = ue_linfo;
+       else
+         currSE->writing = writeB8_typeInfo_CABAC;
+ 
+       currSE->value1  = B8Mode2Value (currMB->b8mode[i], currMB->b8pdir[i]);
+       currSE->value2  = 0;
+       currSE->type    = SE_MBTYPE;
+       dataPart->writeSyntaxElement (currSE, dataPart);
+ #if TRACE
+       snprintf(currSE->tracestring, TRACESTRING_SIZE, "8x8 mode/pdir(%2d) = %3d/%d", i, currMB->b8mode[i], currMB->b8pdir[i]);
+ #endif
+       bitCount[BITS_MB_MODE]+= currSE->len;
+       no_bits               += currSE->len;
+       currSE++;
+       currMB->currSEnr++;
+ 
+       //set NoMbPartLessThan8x8Flag for P8x8 mode
+       currMB->NoMbPartLessThan8x8Flag &= (currMB->b8mode[i]==0 && active_sps->direct_8x8_inference_flag) || 
+                                          (currMB->b8mode[i]==4);
+     }
+     no_bits += writeMotionInfo2NAL  ();
+     currSE   = &img->MB_SyntaxElements[currMB->currSEnr];
+   }
+ 
+   //============= Transform size flag for INTRA MBs =============
+   //-------------------------------------------------------------
+   //transform size flag for INTRA_4x4 and INTRA_8x8 modes
+   if ((currMB->mb_type == I8MB || currMB->mb_type == I4MB) && input->Transform8x8Mode)
+   {
+     currSE->value1 = currMB->luma_transform_size_8x8_flag;
+     currSE->type   = SE_HEADER;
+         
+     if( input->symbol_mode==UVLC)
+     {
+       currSE->mapping = ue_linfo;
+       currSE->bitpattern = currMB->luma_transform_size_8x8_flag;
+       currSE->len = 1;
+       writeSyntaxElement2Buf_Fixed(currSE, dataPart->bitstream);
+     }
+     else
+     {
+       currSE->writing = writeMB_transform_size_CABAC;
+       dataPart->writeSyntaxElement(currSE, dataPart);
+     }
+ #if TRACE
+     snprintf(currSE->tracestring, TRACESTRING_SIZE, "transform size 8x8 flag = %3d", currMB->luma_transform_size_8x8_flag);
+ #endif
+     
+     bitCount[BITS_MB_MODE] += currSE->len;
+     no_bits                += currSE->len;
+     currSE++;
+     currMB->currSEnr++;
+   }
+   
+     
+  //===== BITS FOR INTRA PREDICTION MODES ====
+   no_bits += writeIntra4x4Modes(-1);
+   //===== BITS FOR CHROMA INTRA PREDICTION MODE ====
+   if (currMB->IntraChromaPredModeFlag && img->yuv_format != YUV400)
+     no_bits += writeChromaIntraPredMode();
+   else if(!rdopt) //GB CHROMA !!!!!
+     currMB->c_ipred_mode = DC_PRED_8; //setting c_ipred_mode to default is not the right place here
+                                       //resetting in rdopt.c (but where ??)
+                                       //with cabac and bframes maybe it could crash without this default
+                                       //since cabac needs the right neighborhood for the later MBs
+ 
+   //----- motion information -----
+   if (currMB->mb_type !=0 && currMB->mb_type !=P8x8)
+   {
+     no_bits  += writeMotionInfo2NAL  ();
+   }
+   
+   if ((currMB->mb_type!=0) || (img->type==B_SLICE && (currMB->cbp!=0)))
+   {
+     *coeff_rate = writeCBPandLumaCoeff ();
+     if (img->yuv_format != YUV400)
+       *coeff_rate  += writeChromaCoeff ();
+ 
+     no_bits  += *coeff_rate;
+   }
+   
+   return no_bits;
+ }
+ 
+ void write_terminating_bit (short bit)
+ {
+   DataPartition*          dataPart;
+   const int*              partMap   = assignSE2partition[input->partition_mode];
+   EncodingEnvironmentPtr  eep_dp;
+ 
+   //--- write non-slice termination symbol if the macroblock is not the first one in its slice ---
+   dataPart = &(img->currentSlice->partArr[partMap[SE_MBTYPE]]);
+   dataPart->bitstream->write_flag = 1;
+   eep_dp                          = &(dataPart->ee_cabac);
+   
+   biari_encode_symbol_final(eep_dp, bit); 
+ #if TRACE
+   fprintf (p_trace, "      CABAC terminating bit = %d\n",bit);
+ #endif
+ 
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Write chroma intra prediction mode.
+  ************************************************************************
+  */
+ int writeChromaIntraPredMode()
+ {
+   Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+   SyntaxElement*  currSE    = &img->MB_SyntaxElements[currMB->currSEnr];
+   Slice*          currSlice = img->currentSlice;
+   int*            bitCount  = currMB->bitcounter;
+   const int*      partMap   = assignSE2partition[input->partition_mode];
+   int             rate      = 0;
+   DataPartition*  dataPart;
+ 
+   //===== BITS FOR CHROMA INTRA PREDICTION MODES
+   if (input->symbol_mode==UVLC)  
+     currSE->mapping = ue_linfo;
+   else                            
+     currSE->writing = writeCIPredMode_CABAC;
+ 
+   currSE->value1 = currMB->c_ipred_mode;
+   currSE->value2 = 0;
+   currSE->type = SE_INTRAPREDMODE;
+   dataPart = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);
+ 
+   dataPart->writeSyntaxElement (currSE, dataPart);
+   bitCount[BITS_COEFF_UV_MB] += currSE->len;
+   rate                    += currSE->len;
+ #if TRACE
+   snprintf(currSE->tracestring, TRACESTRING_SIZE, "Chroma intra pred mode");
+ #endif
+   currSE++;
+   currMB->currSEnr++;
+ 
+   return rate;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+ *    Set global last_dquant according to macroblock delta qp
+ ************************************************************************
+ */
+ 
+ extern int last_dquant;
+ 
+ void set_last_dquant()
+ {
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+ 
+   if ((IS_INTERMV (currMB) || IS_INTRA (currMB)) 
+     || ((img->type==B_SLICE)  && currMB->cbp != 0))
+   {
+     // non-skip
+     last_dquant = currMB->delta_qp;
+   }
+   else
+   {
+     // skip
+     last_dquant = 0;
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Passes the chosen syntax elements to the NAL
+  ************************************************************************
+  */
+ void write_one_macroblock (int eos_bit)
+ {
+   Macroblock* currMB   = &img->mb_data[img->current_mb_nr];
+   int*        bitCount = currMB->bitcounter;
+   int i;
+ 
+   extern int cabac_encoding;
+ 
+   //===== init and update number of intra macroblocks =====
+   if (img->current_mb_nr==0)
+     intras=0;
+ 
+   if (IS_INTRA(currMB))
+     intras++;
+ 
+   //--- write non-slice termination symbol if the macroblock is not the first one in its slice ---
+   if (input->symbol_mode==CABAC && img->current_mb_nr!=img->currentSlice->start_mb_nr && eos_bit)
+   {
+     write_terminating_bit (0);
+   }
+ 
+   cabac_encoding = 1;
+ 
+   //--- write macroblock ---
+   writeMBLayer (0, &i);  // i is temporary
+ 
+   if (!((currMB->mb_type !=0 ) || ((img->type==B_SLICE) && currMB->cbp != 0) ))
+   { 
+     for (i=0; i < 4; i++)
+       memset(img->nz_coeff [img->current_mb_nr][i], 0, (4 + img->num_blk8x8_uv) * sizeof(int));  // CAVLC
+   }
+ 
+   set_last_dquant();
+ 
+   //--- constrain intra prediction ---
+   if(input->UseConstrainedIntraPred && (img->type==P_SLICE || img->type==B_SLICE))
+   {
+     if( !IS_INTRA(currMB) )
+     {
+       img->intra_block[img->current_mb_nr] = 0;
+     }
+   }
+ 
+   //--- set total bit-counter ---
+   bitCount[BITS_TOTAL_MB] = bitCount[BITS_MB_MODE]  + bitCount[BITS_COEFF_Y_MB]     
+                           + bitCount[BITS_INTER_MB] + bitCount[BITS_CBP_MB]  
+                           + bitCount[BITS_DELTA_QUANT_MB] + bitCount[BITS_COEFF_UV_MB];
+ 
+   //Rate control
+   img->NumberofMBHeaderBits=bitCount[BITS_MB_MODE]   + bitCount[BITS_INTER_MB]
+     + bitCount[BITS_CBP_MB]  + bitCount[BITS_DELTA_QUANT_MB];
+   img->NumberofMBTextureBits= bitCount[BITS_COEFF_Y_MB]+ bitCount[BITS_COEFF_UV_MB];
+   img->NumberofTextureBits +=img->NumberofMBTextureBits;
+   img->NumberofHeaderBits +=img->NumberofMBHeaderBits;
+   /*basic unit layer rate control*/
+   if(img->BasicUnit<img->Frame_Total_Number_MB)
+   {
+       img->NumberofBasicUnitHeaderBits +=img->NumberofMBHeaderBits;
+       img->NumberofBasicUnitTextureBits +=img->NumberofMBTextureBits;
+   }
+   /*record the total number of MBs*/
+   img->NumberofCodedMacroBlocks++;
+   
+   stats->bit_slice += bitCount[BITS_TOTAL_MB];
+ 
+   cabac_encoding = 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Sets context for reference frame parameter
+  ************************************************************************
+  */
+ int BType2CtxRef (int btype)
+ {
+   return (btype<4 ? 0 : 1);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Codes the reference frame
+  ************************************************************************
+  */
+ int writeReferenceFrame (int mode, int i, int j, int fwd_flag, int  ref)
+ {
+   Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+   SyntaxElement*  currSE    = &img->MB_SyntaxElements[currMB->currSEnr];
+   Slice*          currSlice = img->currentSlice;
+   int*            bitCount  = currMB->bitcounter;
+   const int*      partMap   = assignSE2partition[input->partition_mode];
+   int             rate      = 0;
+   DataPartition*  dataPart;
+   int             num_ref   = ( fwd_flag ? listXsize[LIST_0+currMB->list_offset]: listXsize[LIST_1+currMB->list_offset]);
+   int             flag_mode = 0;
+ 
+   if( num_ref == 1 )
+   {
+     return 0;
+   }
+ 
+   if ( num_ref == 2 )
+   {
+     flag_mode = 1;
+   }
+ 
+   currSE->value1 = ref;
+   currSE->value2  = 0;
+   currSE->type   = SE_REFFRAME;
+ 
+   dataPart = &(currSlice->partArr[partMap[currSE->type]]);
+   if (input->symbol_mode == UVLC)
+   {
+     if( flag_mode )
+     {
+       currSE->bitpattern = 1 - currSE->value1;
+       currSE->len = 1;
+       writeSyntaxElement2Buf_Fixed(currSE, dataPart->bitstream);
+     }
+     else
+     {
+       currSE->mapping = ue_linfo;
+       dataPart->writeSyntaxElement (currSE, dataPart);
+     }
+   }
+   else
+   {
+     currSE->context = BType2CtxRef (mode);
+     img->subblock_x = i; // position used for context determination
+     img->subblock_y = j; // position used for context determination
+     currSE->writing = writeRefFrame_CABAC;
+     currSE->value2 = (fwd_flag)? LIST_0:LIST_1;
+     dataPart->writeSyntaxElement (currSE, dataPart);
+   }
+ 
+   bitCount[BITS_INTER_MB] += currSE->len;
+   rate                    += currSE->len;
+ #if TRACE
+   if (fwd_flag)
+   {
+     snprintf(currSE->tracestring, TRACESTRING_SIZE, "ref_idx_l0 = %d", currSE->value1);
+   }
+   else
+   {
+     snprintf(currSE->tracestring, TRACESTRING_SIZE, "ref_idx_l1 = %d", currSE->value1);
+   }
+ #endif
+   currSE++;
+   currMB->currSEnr++;
+ 
+   return rate;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Writes motion vectors of an 8x8 block
+  ************************************************************************
+  */
+ int writeMotionVector8x8 (int  i0,
+                           int  j0,
+                           int  i1,
+                           int  j1,
+                           int  refframe,
+                           int  list_idx,
+                           int  mv_mode)
+ {
+   int            i, j, k, l, m;
+   int            curr_mvd;
+   DataPartition* dataPart;
+ 
+   int            rate       = 0;
+   int            step_h     = input->part_size[mv_mode][0];
+   int            step_v     = input->part_size[mv_mode][1];
+   Macroblock*    currMB     = &img->mb_data[img->current_mb_nr];
+   SyntaxElement* currSE     = &img->MB_SyntaxElements[currMB->currSEnr];
+   Slice*         currSlice  = img->currentSlice;
+   int*           bitCount   = currMB->bitcounter;
+   const int*     partMap    = assignSE2partition[input->partition_mode];
+   int            refindex   = refframe;
+ 
+   short******    all_mv     = img->all_mv;
+   short******    pred_mv    = img->pred_mv;
+ 
+   if (currMB->bi_pred_me && currMB->b8pdir[0]==2 && mv_mode == 1 && refindex == 0)
+     all_mv = currMB->bi_pred_me == 1? img->bipred_mv1 : img->bipred_mv2 ;
+ 
+   for (j=j0; j<j1; j+=step_v)
+   {
+     for (i=i0; i<i1; i+=step_h)
+     {
+       for (k=0; k<2; k++) 
+       {        
+         curr_mvd = all_mv[j][i][list_idx][refindex][mv_mode][k] - pred_mv[j][i][list_idx][refindex][mv_mode][k];
+         
+         //--- store (oversampled) mvd ---
+         for (l=0; l < step_v; l++)
+           for (m=0; m < step_h; m++)
+           {
+             currMB->mvd[list_idx][j+l][i+m][k] = curr_mvd;
+           }
+         currSE->value1 = curr_mvd;
+         currSE->value2 = 0;
+         currSE->type   = SE_MVD;
+         if (input->symbol_mode == UVLC)
+         {
+           currSE->mapping = se_linfo;
+         }
+         else
+         {
+           img->subblock_x = i; // position used for context determination
+           img->subblock_y = j; // position used for context determination
+           currSE->value2  = 2*k+list_idx; // identifies the component and the direction; only used for context determination
+           currSE->writing = writeMVD_CABAC;
+         }  
+         dataPart = &(currSlice->partArr[partMap[SE_MVD]]);
+         dataPart->writeSyntaxElement (currSE, dataPart);
+ #if TRACE
+         if (!list_idx)
+         {
+           snprintf(currSE->tracestring, TRACESTRING_SIZE, "mvd_l0 (%d) = %3d  (org_mv %3d pred_mv %3d)",k, curr_mvd, all_mv[j][i][list_idx][refindex][mv_mode][k], pred_mv[j][i][list_idx][refindex][mv_mode][k]);
+         }
+         else
+         {
+           snprintf(currSE->tracestring, TRACESTRING_SIZE, "mvd_l1 (%d) = %3d  (org_mv %3d pred_mv %3d)",k, curr_mvd, all_mv[j][i][list_idx][refindex][mv_mode][k], pred_mv[j][i][list_idx][refindex][mv_mode][k]);
+         }
+           
+ #endif
+         bitCount[BITS_INTER_MB] += currSE->len;
+         rate                    += currSE->len;
+         currSE++;  
+         currMB->currSEnr++;
+       }
+     }
+   }
+   return rate;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Writes motion info
+  ************************************************************************
+  */
+ int writeMotionInfo2NAL ()
+ {
+   int k, j0, i0, refframe;
+   int jj;
+   Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+   int             no_bits   = 0;
+   int   bframe          = (img->type==B_SLICE);
+   int   step_h0         = (input->blc_size[IS_P8x8(currMB) ? 4 : currMB->mb_type][0] >> 2);
+   int   step_v0         = (input->blc_size[IS_P8x8(currMB) ? 4 : currMB->mb_type][1] >> 2);
+ 
+   //=== If multiple ref. frames, write reference frame for the MB ===
+   if (IS_INTERMV (currMB))
+   {
+     // if UVLC is turned on, a 8x8 macroblock with all ref=0 in a P-frame is signalled in macroblock mode
+     if (!IS_P8x8 (currMB) || !ZeroRef (currMB) || input->symbol_mode==CABAC || bframe)
+     {
+       for (j0=0; j0<4; j0+=step_v0)
+       {
+         jj = img->block_y+j0;
+         for (i0=0; i0<4; i0+=step_h0)
+         {
+           k=j0+(i0 >> 1);
+           
+           if ((currMB->b8pdir[k]==0 || currMB->b8pdir[k]==2) && currMB->b8mode[k]!=0)//has forward vector
+           {
+             no_bits += writeReferenceFrame (currMB->b8mode[k], i0, j0, 1, enc_picture->ref_idx[LIST_0][jj][img->block_x+i0]);
+           }
+         }
+       }
+       for (j0=0; j0<4; j0+=step_v0)
+       {
+         jj = img->block_y+j0;
+         for (i0=0; i0<4; i0+=step_h0)
+         {
+           k=j0+(i0 >> 1);
+           if ((currMB->b8pdir[k]==1 || currMB->b8pdir[k]==2) && currMB->b8mode[k]!=0)//has backward vector
+           {
+             no_bits += writeReferenceFrame (currMB->b8mode[k], i0, j0, 0, enc_picture->ref_idx[LIST_1][jj][img->block_x+i0]);
+           }
+         }
+       }
+     }
+   }
+ 
+   //===== write forward motion vectors =====
+   if (IS_INTERMV (currMB))
+   {
+     for (j0=0; j0<4; j0+=step_v0)
+     {
+       jj = img->block_y+j0;
+       for (i0=0; i0<4; i0+=step_h0)
+       {
+         k=j0+(i0 >> 1);
+         if ((currMB->b8pdir[k]==0 || currMB->b8pdir[k]==2) && currMB->b8mode[k]!=0)//has forward vector
+         {
+           refframe  = enc_picture->ref_idx[LIST_0][jj][img->block_x+i0];
+           no_bits  += writeMotionVector8x8 (i0, j0, i0+step_h0, j0+step_v0, refframe, LIST_0, currMB->b8mode[k]);
+         }
+       }
+     }
+   }
+ 
+ 
+   //===== write backward motion vectors =====
+   if (IS_INTERMV (currMB) && bframe)
+   {
+     for (j0=0; j0<4; j0+=step_v0)
+     {
+       jj = img->block_y+j0;
+       for (i0=0; i0<4; i0+=step_h0)
+       {
+         k=j0+(i0 >> 1);
+         if ((currMB->b8pdir[k]==1 || currMB->b8pdir[k]==2) && currMB->b8mode[k]!=0)//has backward vector
+         {
+           refframe  = enc_picture->ref_idx[LIST_1][jj][img->block_x+i0];
+           no_bits  += writeMotionVector8x8 (i0, j0, i0+step_h0, j0+step_v0, refframe, LIST_1, currMB->b8mode[k]);
+         }
+       }
+     }
+   }
+   return no_bits;
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Writes chrominance coefficients
+  ************************************************************************
+  */
+ int writeChromaCoeff ()
+ {
+   int             rate      = 0;
+   Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+   SyntaxElement*  currSE    = &img->MB_SyntaxElements[currMB->currSEnr];
+   int*            bitCount  = currMB->bitcounter;
+   Slice*          currSlice = img->currentSlice;
+   const int*      partMap   = assignSE2partition[input->partition_mode];
+   int             cbp       = currMB->cbp;
+   DataPartition*  dataPart;
+ 
+   int   level, run;
+   int   k, uv;
+   int   b8, b4, param;
+   int*  ACLevel;
+   int*  ACRun;
+   int*  DCLevel;
+   int*  DCRun;
+   //ADD-VG-14052004
+   int   chroma_dc_context[3]={CHROMA_DC, CHROMA_DC_2x4, CHROMA_DC_4x4};
+   int   yuv = img->yuv_format - 1;
+ 
+   static unsigned char chroma_ac_param[3][8][4] =
+   {
+    {{ 4, 20,  5, 21},
+     {36, 52, 37, 53},
+     { 0,  0,  0,  0},
+     { 0,  0,  0,  0},
+     { 0,  0,  0,  0},
+     { 0,  0,  0,  0},
+     { 0,  0,  0,  0},
+     { 0,  0,  0,  0}},
+     
+    {{ 4, 20,  5, 21},
+     { 6, 22,  7, 23},
+     {36, 52, 37, 53},
+     {38, 54, 39, 55},
+     { 0,  0,  0,  0},
+     { 0,  0,  0,  0},
+     { 0,  0,  0,  0},
+     { 0,  0,  0,  0}},
+     
+    {{ 4, 20,  5, 21},
+     {36, 52, 37, 53},
+     { 6, 22,  7, 23},
+     {38, 54, 39, 55},
+     { 8, 24,  9, 25},
+     {40, 56, 41, 57},
+     {10, 26, 11, 27},
+     {42, 58, 43, 59}}};
+    //ADD-VG-14052004-END
+ 
+   //=====
+   //=====   D C - C O E F F I C I E N T S
+   //=====
+   if (cbp > 15)  // check if any chroma bits in coded block pattern is set
+   {
+     for (uv=0; uv < 2; uv++)
+     {
+       if (input->symbol_mode == UVLC)
+       {
+         param = uv;
+         rate += writeCoeff4x4_CAVLC (CHROMA_DC, 0, 0, param);
+         // CAVLC
+       }
+       else
+       {
+         
+         DCLevel = img->cofDC[uv+1][0];
+         DCRun   = img->cofDC[uv+1][1];
+         
+         level=1;
+         for (k=0; k <= img->num_cdc_coeff && level != 0; ++k)
+         {
+           level = currSE->value1 = DCLevel[k]; // level
+           run   = currSE->value2 = DCRun  [k]; // run
+           
+           if (input->symbol_mode == UVLC)   
+             currSE->mapping = levrun_linfo_c2x2;
+           else                              
+             currSE->writing = writeRunLevel_CABAC;
+           
+           currSE->context     = chroma_dc_context[yuv];
+           currSE->type        = (IS_INTRA(currMB) ? SE_CHR_DC_INTRA : SE_CHR_DC_INTER);
+           img->is_intra_block =  IS_INTRA(currMB);
+           img->is_v_block     = uv;
+           
+           // choose the appropriate data partition
+           dataPart = &(currSlice->partArr[partMap[currSE->type]]);
+           dataPart->writeSyntaxElement (currSE, dataPart);
+           bitCount[BITS_COEFF_UV_MB] += currSE->len;
+           rate                       += currSE->len;
+ #if TRACE
+           snprintf(currSE->tracestring, TRACESTRING_SIZE, "DC Chroma %2d: level =%3d run =%2d",k, level, run);
+ #endif
+           // proceed to next SE 
+           currSE++;  
+           currMB->currSEnr++;
+         }
+       }
+     }
+   }
+ 
+   //=====
+   //=====   A C - C O E F F I C I E N T S
+   //=====
+   uv=-1;   
+   if (cbp >> 4 == 2) // check if chroma bits in coded block pattern = 10b
+   {  
+     for (b8=4; b8 < (4+img->num_blk8x8_uv); b8++)
+     for (b4=0; b4 < 4; b4++)
+     {
+       if (input->symbol_mode == UVLC)
+       {
+         param = chroma_ac_param[yuv][b8-4][b4];
+         rate += writeCoeff4x4_CAVLC (CHROMA_AC, b8, b4, param);
+         // CAVLC
+       }
+       else
+       {
+ 
+         ACLevel = img->cofAC[b8][b4][0];
+         ACRun   = img->cofAC[b8][b4][1];
+ 
+         level=1;
+         uv++;
+ 
+         img->subblock_y = subblk_offset_y[yuv][b8-4][b4]>>2;
+         img->subblock_x = subblk_offset_x[yuv][b8-4][b4]>>2;
+ 
+         for (k=0; k < 16 && level != 0; k++)
+         {
+           level = currSE->value1 = ACLevel[k]; // level
+           run   = currSE->value2 = ACRun  [k]; // run
+ 
+           if (input->symbol_mode == UVLC)   currSE->mapping = levrun_linfo_inter;
+           else                              currSE->writing = writeRunLevel_CABAC;
+         
+           currSE->context     = CHROMA_AC;
+           currSE->type        = (IS_INTRA(currMB) ? SE_CHR_AC_INTRA : SE_CHR_AC_INTER);
+           img->is_intra_block =  IS_INTRA(currMB);
+           img->is_v_block     = (uv>=(img->num_blk8x8_uv<<1));
+ 
+           // choose the appropriate data partition
+           dataPart = &(currSlice->partArr[partMap[currSE->type]]); 
+           dataPart->writeSyntaxElement (currSE, dataPart);
+           bitCount[BITS_COEFF_UV_MB] += currSE->len;
+           rate                       += currSE->len;
+ #if TRACE
+           snprintf(currSE->tracestring, TRACESTRING_SIZE, "AC Chroma %2d: level =%3d run =%2d",k, level, run);
+ #endif
+ 
+           // proceed to next SE 
+           currSE++;  
+           currMB->currSEnr++;
+         }
+       }
+     }
+   }
+ 
+   return rate;
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Writes Luma coeff of an 4x4 block
+  ************************************************************************
+  */
+ int writeLumaCoeff4x4_CABAC (int b8, int b4, int intra4x4mode)
+ {
+   int             rate      = 0;
+   Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+   SyntaxElement*  currSE    = &img->MB_SyntaxElements[currMB->currSEnr];
+   Slice*          currSlice = img->currentSlice;
+   const int*      partMap   = assignSE2partition[input->partition_mode];
+   int*            bitCount  = currMB->bitcounter;
+   DataPartition*  dataPart;
+ 
+   int   level, run;
+   int   k;
+   int*  ACLevel = img->cofAC[b8][b4][0];
+   int*  ACRun   = img->cofAC[b8][b4][1];
+ 
+   img->subblock_x = ((b8&0x1)==0) ? (((b4&0x1)==0)? 0: 1) : (((b4&0x1)==0)? 2: 3); // horiz. position for coeff_count context
+   img->subblock_y = (b8<2)        ? ((b4<2)       ? 0: 1) : ((b4<2)       ? 2: 3); // vert.  position for coeff_count context
+ 
+   level=1; // get inside loop
+   for(k=0; k<=16 && level !=0; k++)
+   {
+     level = currSE->value1 = ACLevel[k]; // level
+     run   = currSE->value2 = ACRun  [k]; // run
+       
+     currSE->writing = writeRunLevel_CABAC;
+ 
+     currSE->context     = LUMA_4x4;
+     currSE->type        = (k==0 
+       ? (intra4x4mode ? SE_LUM_DC_INTRA : SE_LUM_DC_INTER) 
+       : (intra4x4mode ? SE_LUM_AC_INTRA : SE_LUM_AC_INTER));
+     img->is_intra_block = intra4x4mode;
+ 
+     // choose the appropriate data partition
+     dataPart = &(currSlice->partArr[partMap[currSE->type]]);          
+     dataPart->writeSyntaxElement (currSE, dataPart);
+     bitCount[BITS_COEFF_Y_MB] += currSE->len;
+     rate                      += currSE->len;
+ #if TRACE
+     snprintf(currSE->tracestring, TRACESTRING_SIZE, "Luma sng(%2d) level =%3d run =%2d", k, level,run);
+ #endif
+     /* proceed to next SE */
+     currSE++;  
+     currMB->currSEnr++;
+   }
+ 
+   return rate;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Writes Luma coeff of an 8x8 block
+  ************************************************************************
+  */
+ int writeLumaCoeff8x8_CABAC (int b8, int intra_mode)
+ {
+   int             rate      = 0;
+   Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+   SyntaxElement*  currSE    = &img->MB_SyntaxElements[currMB->currSEnr];
+   Slice*          currSlice = img->currentSlice;
+   const int*      partMap   = assignSE2partition[input->partition_mode];
+   int*            bitCount  = currMB->bitcounter;
+   DataPartition*  dataPart;
+   
+   int   level, run;
+   int   k;
+   int*  ACLevel = img->cofAC[b8][0][0];
+   int*  ACRun   = img->cofAC[b8][0][1];
+ 
+   img->subblock_x = ((b8&0x1)==0)?0:2;  // horiz. position for coeff_count context
+   img->subblock_y = (b8<2)?0:2;     // vert.  position for coeff_count context
+   
+   
+   level=1; // get inside loop
+   for(k=0; k<=64 && level !=0; k++)
+   {
+     level = currSE->value1 = ACLevel[k]; // level
+     run   = currSE->value2 = ACRun  [k]; // run
+ 
+     currSE->writing = writeRunLevel_CABAC;
+     
+     currSE->context     = LUMA_8x8;
+     currSE->type        = (k==0 
+       ? (intra_mode ? SE_LUM_DC_INTRA : SE_LUM_DC_INTER) 
+       : (intra_mode ? SE_LUM_AC_INTRA : SE_LUM_AC_INTER));
+     img->is_intra_block = intra_mode;
+     
+     // choose the appropriate data partition
+     dataPart = &(currSlice->partArr[partMap[img->type != B_SLICE ? currSE->type : SE_BFRAME]]);
+     
+     dataPart->writeSyntaxElement (currSE, dataPart);
+     bitCount[BITS_COEFF_Y_MB] += currSE->len;
+     rate                      += currSE->len;
+ #if TRACE
+     snprintf(currSE->tracestring, TRACESTRING_SIZE, "Luma8x8 sng(%2d) level =%3d run =%2d", k, level,run);
+ #endif
+     /* proceed to next SE */
+     currSE++;  
+     currMB->currSEnr++;
+     
+   }
+     
+   return rate;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Writes Luma Coeff of an 8x8 block
+ ************************************************************************
+ */
+ int writeLumaCoeff8x8 (int block8x8, int block_mode, int transform_size_flag)
+ {
+   int  block4x4, rate = 0;
+   int intra4x4mode = (block_mode==IBLOCK);
+   
+   if (block_mode == I8MB)
+     assert(transform_size_flag == 1);
+   
+   
+   if((!transform_size_flag) || input->symbol_mode == UVLC) // allow here if 4x4 or UVLC
+   {
+     if (input->symbol_mode == UVLC )
+     {
+       for (block4x4=0; block4x4<4; block4x4++)  
+         rate += writeCoeff4x4_CAVLC (LUMA, block8x8, block4x4, (transform_size_flag)?(block_mode==I8MB):intra4x4mode);// CAVLC, pass new intra
+     }
+     else
+     {
+       for (block4x4=0; block4x4<4; block4x4++)  
+         rate += writeLumaCoeff4x4_CABAC (block8x8, block4x4, intra4x4mode);
+     }      
+   }
+   else 
+     rate += writeLumaCoeff8x8_CABAC (block8x8, (block_mode == I8MB));
+ 
+   return rate;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Writes CBP, DQUANT, and Luma Coefficients of an macroblock
+  ************************************************************************
+  */
+ int writeCBPandLumaCoeff ()
+ {
+   int             mb_x, mb_y, i, j, k;
+   int             level, run;
+   int             rate      = 0;
+   Macroblock*     currMB    = &img->mb_data[img->current_mb_nr];
+   int*            bitCount  = currMB->bitcounter;
+   SyntaxElement*  currSE    = &img->MB_SyntaxElements[currMB->currSEnr];
+   Slice*          currSlice = img->currentSlice;
+   const int*      partMap   = assignSE2partition[input->partition_mode];
+   int             cbp       = currMB->cbp;
+   DataPartition*  dataPart;
+   int             need_transform_size_flag;   //ADD-VG-24062004
+  
+   int   b8, b4;
+   int*  DCLevel = img->cofDC[0][0];
+   int*  DCRun   = img->cofDC[0][1];
+   int*  ACLevel;
+   int*  ACRun;
+ 
+   if (!IS_NEWINTRA (currMB))
+   {
+     //=====   C B P   =====
+     //---------------------
+     currSE->value1 = cbp;
+     
+     if (IS_OLDINTRA (currMB) || currMB->mb_type == SI4MB ||  currMB->mb_type == I8MB)
+     {
+       if (input->symbol_mode == UVLC)  currSE->mapping = cbp_linfo_intra;
+       currSE->type = SE_CBP_INTRA;
+     }
+     else
+     {
+       if (input->symbol_mode == UVLC)  currSE->mapping = cbp_linfo_inter;
+       currSE->type = SE_CBP_INTER;
+     }
+     if (input->symbol_mode == CABAC)   currSE->writing = writeCBP_CABAC;
+                       
+     // choose the appropriate data partition
+     dataPart = &(currSlice->partArr[partMap[currSE->type]]);
+     
+     dataPart->writeSyntaxElement(currSE, dataPart);
+     bitCount[BITS_CBP_MB] += currSE->len;
+     rate                  += currSE->len;
+ #if TRACE
+     snprintf(currSE->tracestring, TRACESTRING_SIZE, "CBP (%2d,%2d) = %3d",img->mb_x, img->mb_y, cbp);
+ #endif
+     // proceed to next SE
+     currSE++;  
+     currMB->currSEnr++;
+ 
+ 
+     //============= Transform Size Flag for INTER MBs =============
+     //-------------------------------------------------------------
+     need_transform_size_flag = (((currMB->mb_type >= 1 && currMB->mb_type <= 3)||
+                                 (IS_DIRECT(currMB) && active_sps->direct_8x8_inference_flag) ||
+                                 (currMB->NoMbPartLessThan8x8Flag))
+                                 && currMB->mb_type != I8MB && currMB->mb_type != I4MB
+                                 && (currMB->cbp&15)
+                                 && input->Transform8x8Mode);
+ 
+     if (need_transform_size_flag)
+     {
+       currSE->value1 = currMB->luma_transform_size_8x8_flag;
+       currSE->type   = SE_HEADER;
+     
+       if (input->symbol_mode==UVLC)   currSE->mapping = ue_linfo;
+       else                            currSE->writing = writeMB_transform_size_CABAC;
+     
+       if( input->symbol_mode==UVLC)
+       {
+         currSE->bitpattern = currMB->luma_transform_size_8x8_flag;
+         currSE->len = 1;
+         writeSyntaxElement2Buf_Fixed(currSE, dataPart->bitstream);
+       }
+       else
+       {
+         dataPart->writeSyntaxElement(currSE, dataPart);
+       }
+   #if TRACE
+       snprintf(currSE->tracestring, TRACESTRING_SIZE, "transform size 8x8 flag = %3d", currMB->luma_transform_size_8x8_flag);
+   #endif
+     
+       bitCount[BITS_MB_MODE] += currSE->len;
+       rate                   += currSE->len;
+       currSE++;
+       currMB->currSEnr++;
+     }
+   }
+   
+   //=====   DQUANT   =====
+   //----------------------
+   if (cbp!=0 || IS_NEWINTRA (currMB))
+   {
+     currSE->value1 = currMB->delta_qp;
+ 
+     if (input->symbol_mode==UVLC)   currSE->mapping = se_linfo;
+     else                            currSE->writing = writeDquant_CABAC;
+ 
+     if (IS_INTER (currMB))  currSE->type = SE_DELTA_QUANT_INTER;
+     else                    currSE->type = SE_DELTA_QUANT_INTRA;
+ 
+ 
+     // choose the appropriate data partition
+     dataPart = &(img->currentSlice->partArr[partMap[currSE->type]]);
+     dataPart->writeSyntaxElement(  currSE, dataPart);
+     bitCount[BITS_DELTA_QUANT_MB] += currSE->len;
+     rate                          += currSE->len;
+ #if TRACE
+     snprintf(currSE->tracestring, TRACESTRING_SIZE, "Delta QP (%2d,%2d) = %3d",img->mb_x, img->mb_y, currMB->delta_qp);
+ #endif
+     // proceed to next SE
+     currSE++;
+     currMB->currSEnr++;
+   }
+ 
+   for (i=0; i < 4; i++)
+     memset(img->nz_coeff [img->current_mb_nr][i], 0, (4 + img->num_blk8x8_uv) * sizeof(int));
+ 
+   if (!IS_NEWINTRA (currMB))
+   {
+     //=====  L U M I N A N C E   =====
+     //--------------------------------
+     for (i=0; i<4; i++)  if (cbp & (1<<i))
+     {
+       rate += writeLumaCoeff8x8 (i, currMB->b8mode[i], currMB->luma_transform_size_8x8_flag);
+     }
+   }
+   else
+   {
+     //=====  L U M I N A N C E   f o r   1 6 x 1 6   =====
+     //----------------------------------------------------
+     // DC coeffs
+     if (input->symbol_mode == UVLC)
+     {
+       rate += writeCoeff4x4_CAVLC (LUMA_INTRA16x16DC, 0, 0, 0);  // CAVLC
+     }
+     else
+     {
+       level=1; // get inside loop
+       for (k=0; k<=16 && level!=0; k++)
+       {
+         level = currSE->value1 = DCLevel[k]; // level
+         run   = currSE->value2 = DCRun  [k]; // run
+ 
+         if (input->symbol_mode == UVLC)
+         {
+           currSE->mapping = levrun_linfo_inter;
+         }
+         else
+         {
+           currSE->writing = writeRunLevel_CABAC;
+         }
+ 
+         currSE->context     = LUMA_16DC;
+         currSE->type        = SE_LUM_DC_INTRA;   // element is of type DC
+         img->is_intra_block = 1;
+ 
+         // choose the appropriate data partition
+         dataPart = &(currSlice->partArr[partMap[currSE->type]]);
+     
+         dataPart->writeSyntaxElement (currSE, dataPart);
+         bitCount[BITS_COEFF_Y_MB] += currSE->len;
+         rate                      += currSE->len;
+ #if TRACE
+         snprintf(currSE->tracestring, TRACESTRING_SIZE, "DC luma 16x16 sng(%2d) level =%3d run =%2d", k, level, run);
+ #endif
+         // proceed to next SE
+         currSE++;
+         currMB->currSEnr++;
+       }
+     }
+ 
+     // AC coeffs
+     if (cbp & 15)
+     {
+       for (mb_y=0; mb_y < 4; mb_y += 2)
+       for (mb_x=0; mb_x < 4; mb_x += 2)
+       for (j=mb_y; j < mb_y+2; j++)
+       for (i=mb_x; i < mb_x+2; i++)
+       {
+         b8      = 2*(j >> 1) + (i >> 1);
+         b4      = 2*(j & 0x01) + (i & 0x01);
+         if (input->symbol_mode == UVLC)
+         {
+           rate += writeCoeff4x4_CAVLC (LUMA_INTRA16x16AC, b8, b4, 0);  // CAVLC
+         }
+         else
+         {
+           ACLevel = img->cofAC[b8][b4][0];
+           ACRun   = img->cofAC[b8][b4][1];
+ 
+           img->subblock_y = j;
+           img->subblock_x = i;
+ 
+           level=1; // get inside loop
+           for (k=0;k<16 && level !=0;k++)
+           {
+             level = currSE->value1 = ACLevel[k]; // level
+             run   = currSE->value2 = ACRun  [k]; // run
+ 
+             if (input->symbol_mode == UVLC)
+             {
+               currSE->mapping = levrun_linfo_inter;
+             }
+             else
+             {
+               currSE->writing = writeRunLevel_CABAC;
+             }
+             currSE->context     = LUMA_16AC;
+             currSE->type        = SE_LUM_AC_INTRA;   // element is of type AC
+             img->is_intra_block = 1;
+ 
+             // choose the appropriate data partition
+            dataPart = &(currSlice->partArr[partMap[currSE->type]]);
+ 
+             dataPart->writeSyntaxElement (currSE, dataPart);
+             bitCount[BITS_COEFF_Y_MB] += currSE->len;
+             rate                      += currSE->len;
+ #if TRACE
+             snprintf(currSE->tracestring, TRACESTRING_SIZE, "AC luma 16x16 sng(%2d) level =%3d run =%2d", k, level, run);
+ #endif
+             // proceed to next SE
+             currSE++;
+             currMB->currSEnr++;
+           }
+         }
+       }
+     }
+   }
+ 
+   return rate;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Get the Prediction from the Neighboring Blocks for Number of Nonzero Coefficients 
+  *    
+  *    Luma Blocks
+  ************************************************************************
+  */
+ int predict_nnz(int i,int j)
+ {
+   PixelPos pix;
+ 
+   int pred_nnz = 0;
+   int cnt      = 0;
+   int mb_nr    = img->current_mb_nr;
+ 
+   // left block
+   getLuma4x4Neighbour(mb_nr, i, j, -1, 0, &pix);
+ 
+   if (pix.available && active_pps->constrained_intra_pred_flag && (input->partition_mode != 0))
+   {
+     pix.available &= img->intra_block[pix.mb_addr];
+   }
+ 
+   if (pix.available)
+   {
+     pred_nnz = img->nz_coeff [pix.mb_addr ][pix.x][pix.y];
+     cnt++;
+   }
+ 
+   // top block
+   getLuma4x4Neighbour(mb_nr, i, j, 0, -1, &pix);
+ 
+   if (pix.available && active_pps->constrained_intra_pred_flag && (input->partition_mode != 0))
+   {
+     pix.available &= img->intra_block[pix.mb_addr];
+   }
+ 
+   if (pix.available)
+   {
+     pred_nnz += img->nz_coeff [pix.mb_addr ][pix.x][pix.y];
+     cnt++;
+   }
+ 
+   if (cnt==2)
+   {
+     pred_nnz++;
+     pred_nnz/=cnt; 
+   }
+ 
+   return pred_nnz;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Get the Prediction from the Neighboring Blocks for Number of Nonzero Coefficients 
+  *    
+  *    Chroma Blocks   
+  ************************************************************************
+  */
+ int predict_nnz_chroma(int i,int j)
+ {
+   PixelPos pix;
+   
+   int pred_nnz = 0;
+   int cnt      = 0;
+   int mb_nr    = img->current_mb_nr;
+   static int j_off_tab [12] = {0,0,0,0,4,4,4,4,8,8,8,8};
+   int j_off = j_off_tab[j];
+   
+ 
+   if (img->yuv_format != YUV444)
+   {
+     //YUV420 and YUV422
+     // left block
+     getChroma4x4Neighbour(mb_nr, i & 0x01, j-4, -1, 0, &pix);
+ 
+     if (pix.available && active_pps->constrained_intra_pred_flag && (input->partition_mode != 0))
+     {
+       pix.available &= img->intra_block[pix.mb_addr];
+     }
+ 
+     if (pix.available)
+     {
+       pred_nnz = img->nz_coeff [pix.mb_addr ][2 * (i >> 1) + pix.x][4 + pix.y];
+       cnt++;
+     }
+     
+     // top block
+     getChroma4x4Neighbour(mb_nr, i & 0x01, j-4, 0, -1, &pix);
+ 
+     if (pix.available && active_pps->constrained_intra_pred_flag && (input->partition_mode != 0))
+     {
+       pix.available &= img->intra_block[pix.mb_addr];
+     }
+ 
+     if (pix.available)
+     {
+       pred_nnz += img->nz_coeff [pix.mb_addr ][2 * (i >> 1) + pix.x][4 + pix.y];
+       cnt++;
+     }
+   }
+   else
+   {
+     //YUV444
+     // left block
+     getChroma4x4Neighbour(mb_nr, i, j-j_off, -1, 0, &pix);
+ 
+     if (pix.available && active_pps->constrained_intra_pred_flag && (input->partition_mode != 0))
+     {
+       pix.available &= img->intra_block[pix.mb_addr];
+     }
+ 
+     if (pix.available)
+     {
+       pred_nnz = img->nz_coeff [pix.mb_addr ][pix.x][j_off + pix.y];
+       cnt++;
+     }
+     
+     // top block
+     getChroma4x4Neighbour(mb_nr, i, j-j_off, 0, -1, &pix);
+ 
+     if (pix.available && active_pps->constrained_intra_pred_flag && (input->partition_mode != 0))
+     {
+       pix.available &= img->intra_block[pix.mb_addr];
+     }
+ 
+     if (pix.available)
+     {
+       pred_nnz += img->nz_coeff [pix.mb_addr ][pix.x][j_off + pix.y];
+       cnt++;
+     }
+     
+   }
+   
+   if (cnt==2)
+   {
+     pred_nnz++;
+     pred_nnz/=cnt; 
+   }
+ 
+   return pred_nnz;
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Writes coeff of an 4x4 block (CAVLC)
+  *
+  * \author
+  *    Karl Lillevold <karll at real.com>
+  *    contributions by James Au <james at ubvideo.com>
+  ************************************************************************
+  */
+ 
+ int writeCoeff4x4_CAVLC (int block_type, int b8, int b4, int param)
+ {
+   int           no_bits    = 0;
+   Macroblock    *currMB    = &img->mb_data[img->current_mb_nr];
+   SyntaxElement *currSE    = &img->MB_SyntaxElements[currMB->currSEnr];
+   int           *bitCount  = currMB->bitcounter;
+   Slice         *currSlice = img->currentSlice;
+   DataPartition *dataPart;
+   int           *partMap   = assignSE2partition[input->partition_mode];
+ 
+   int k,level,run,vlcnum;
+   int numcoeff, lastcoeff, numtrailingones; 
+   int numones, totzeros, zerosleft, numcoef;
+   int numcoeff_vlc;
+   int code, level_two_or_higher;
+   int dptype = 0, bitcounttype = 0;
+   int nnz, max_coeff_num = 0, cdc=0, cac=0;
+   int subblock_x, subblock_y;
+   char type[15];
+ 
+   static int incVlc[] = {0,3,6,12,24,48,32768};  // maximum vlc = 6
+ 
+ 
+   int*  pLevel = NULL;
+   int*  pRun = NULL;
+ 
+   switch (block_type)
+   {
+   case LUMA:
+     max_coeff_num = 16;
+     bitcounttype = BITS_COEFF_Y_MB;
+ 
+     pLevel = img->cofAC[b8][b4][0];
+     pRun   = img->cofAC[b8][b4][1];
+ 
+     sprintf(type, "%s", "Luma");
+     if (IS_INTRA (currMB))
+     {
+       dptype = SE_LUM_AC_INTRA;
+     }
+     else
+     {
+       dptype = SE_LUM_AC_INTER;
+     }
+     break;
+   case LUMA_INTRA16x16DC:
+     max_coeff_num = 16;
+     bitcounttype = BITS_COEFF_Y_MB;
+ 
+     pLevel = img->cofDC[0][0];
+     pRun   = img->cofDC[0][1];
+ 
+     sprintf(type, "%s", "Lum16DC");
+     dptype = SE_LUM_DC_INTRA;
+     break;
+   case LUMA_INTRA16x16AC:
+     max_coeff_num = 15;
+     bitcounttype = BITS_COEFF_Y_MB;
+ 
+     pLevel = img->cofAC[b8][b4][0];
+     pRun   = img->cofAC[b8][b4][1];
+ 
+     sprintf(type, "%s", "Lum16AC");
+     dptype = SE_LUM_AC_INTRA;
+     break;
+ 
+   case CHROMA_DC:
+     max_coeff_num = img->num_cdc_coeff;
+     bitcounttype = BITS_COEFF_UV_MB;
+     cdc = 1;
+ 
+     pLevel = img->cofDC[param+1][0];
+     pRun   = img->cofDC[param+1][1];
+ 
+     sprintf(type, "%s", "ChrDC");
+     if (IS_INTRA (currMB))
+     {
+       dptype = SE_CHR_DC_INTRA;
+     }
+     else
+     {
+       dptype = SE_CHR_DC_INTER;
+     }
+     break;
+   case CHROMA_AC:
+     max_coeff_num = 15;
+     bitcounttype = BITS_COEFF_UV_MB;
+     cac = 1;
+ 
+     pLevel = img->cofAC[b8][b4][0];
+     pRun   = img->cofAC[b8][b4][1];
+ 
+     sprintf(type, "%s", "ChrAC");
+     if (IS_INTRA (currMB))
+     {
+       dptype = SE_CHR_AC_INTRA;
+     }
+     else
+     {
+       dptype = SE_CHR_AC_INTER;
+     }
+     break;
+   default:
+     error("writeCoeff4x4_CAVLC: Invalid block type", 600);
+     break;
+   }
+ 
+   dataPart = &(currSlice->partArr[partMap[dptype]]);
+ 
+   numcoeff = 0;
+   numtrailingones = 0;
+   numones = 0;
+   lastcoeff = 0;
+   totzeros = 0;
+   level = 1;
+ 
+   for(k = 0; (k <= ((cdc)?img->num_cdc_coeff:16))&& level !=0; k++)
+   {
+     level = pLevel[k]; // level
+     run   = pRun[k];   // run
+ 
+     if (level)
+     {
+       if (run)
+         totzeros += run;
+       if (abs(level) == 1)
+       {
+         numtrailingones ++;
+         numones ++;
+         if (numtrailingones > 3)
+         {
+           numtrailingones = 3; /* clip to 3 */
+         }
+       }
+       else
+       {
+         numtrailingones = 0;
+       }
+       numcoeff ++;
+       lastcoeff = k;
+     }
+   }
+ 
+   if (!cdc)
+   {
+     if (!cac)
+     {
+       // luma
+       subblock_x = ((b8&0x1)==0)?(((b4&0x1)==0)?0:1):(((b4&0x1)==0)?2:3); 
+         // horiz. position for coeff_count context
+       subblock_y = (b8<2)?((b4<2)?0:1):((b4<2)?2:3); 
+         // vert.  position for coeff_count context
+       nnz = predict_nnz(subblock_x,subblock_y);
+     }
+     else
+     {
+       // chroma AC
+       subblock_x = param >> 4;
+       subblock_y = param & 15;
+       nnz = predict_nnz_chroma(subblock_x,subblock_y);
+     }
+ 
+     img->nz_coeff [img->current_mb_nr ][subblock_x][subblock_y] = numcoeff;
+ 
+ 
+     if (nnz < 2)
+     {
+       numcoeff_vlc = 0;
+     }
+     else if (nnz < 4)
+     {
+       numcoeff_vlc = 1;
+     }
+     else if (nnz < 8)
+     {
+       numcoeff_vlc = 2;
+     }
+     else 
+     {
+       numcoeff_vlc = 3;
+     }
+ 
+   }
+   else
+   {
+     // chroma DC (has its own VLC)
+     // numcoeff_vlc not relevant
+     numcoeff_vlc = 0;
+ 
+     subblock_x = param;
+     subblock_y = param;
+   }
+ 
+   currSE->type  = dptype;   
+ 
+   currSE->value1 = numcoeff;
+   currSE->value2 = numtrailingones;
+   currSE->len = numcoeff_vlc; /* use len to pass vlcnum */
+ 
+ #if TRACE
+   snprintf(currSE->tracestring, 
+     TRACESTRING_SIZE, "%s # c & tr.1s(%d,%d) vlc=%d #c=%d #t1=%d",
+     type, subblock_x, subblock_y, numcoeff_vlc, numcoeff, numtrailingones);
+ #endif
+ 
+   if (!cdc)
+     writeSyntaxElement_NumCoeffTrailingOnes(currSE, dataPart);
+   else
+     writeSyntaxElement_NumCoeffTrailingOnesChromaDC(currSE, dataPart);
+ 
+   bitCount[bitcounttype]+=currSE->len;
+   no_bits               +=currSE->len;
+ 
+   // proceed to next SE
+   currSE++;
+   currMB->currSEnr++;
+ 
+ 
+   if (!numcoeff)
+     return no_bits;
+ 
+   if (numcoeff)
+   {
+     code = 0;
+     for (k = lastcoeff; k > lastcoeff-numtrailingones; k--)
+     {
+       level = pLevel[k]; // level
+       if (abs(level) > 1)
+       {
+         printf("ERROR: level > 1\n");
+         exit(-1);
+       }
+       code <<= 1;
+       if (level < 0)
+       {
+         code |= 0x1;
+       }
+     }
+ 
+     if (numtrailingones)
+     {
+       currSE->type  = dptype;   
+ 
+       currSE->value2 = numtrailingones;
+       currSE->value1 = code;
+ 
+ #if TRACE
+       snprintf(currSE->tracestring, 
+         TRACESTRING_SIZE, "%s trailing ones sign (%d,%d)", 
+         type, subblock_x, subblock_y);
+ #endif
+ 
+       writeSyntaxElement_VLC (currSE, dataPart);
+       bitCount[bitcounttype]+=currSE->len;
+       no_bits               +=currSE->len;
+ 
+       // proceed to next SE
+       currSE++;
+       currMB->currSEnr++;
+     }
+ 
+     // encode levels
+     level_two_or_higher = 1;
+     if (numcoeff > 3 && numtrailingones == 3)
+       level_two_or_higher = 0;
+ 
+     if (numcoeff > 10 && numtrailingones < 3)
+       vlcnum = 1;
+     else
+       vlcnum = 0;
+ 
+     for (k = lastcoeff - numtrailingones; k >= 0; k--)
+     {
+       level = pLevel[k]; // level
+ 
+       currSE->value1 = level;
+       currSE->type  = dptype;   
+ 
+   #if TRACE
+         snprintf(currSE->tracestring, 
+           TRACESTRING_SIZE, "%s lev (%d,%d) k=%d vlc=%d lev=%3d",
+             type, subblock_x, subblock_y, k, vlcnum, level);
+   #endif
+ 
+           if (level_two_or_higher)
+           {
+             if (currSE->value1 > 0)
+               currSE->value1 --;
+             else
+               currSE->value1 ++;
+             level_two_or_higher = 0;
+           }
+ 
+       //    encode level
+       if (vlcnum == 0)
+         writeSyntaxElement_Level_VLC1(currSE, dataPart);
+       else
+         writeSyntaxElement_Level_VLCN(currSE, vlcnum, dataPart);
+ 
+       // update VLC table
+       if (abs(level)>incVlc[vlcnum])
+         vlcnum++;
+ 
+       if (k == lastcoeff - numtrailingones && abs(level)>3)
+         vlcnum = 2;
+ 
+       bitCount[bitcounttype]+=currSE->len;
+       no_bits               +=currSE->len;
+ 
+       // proceed to next SE
+       currSE++;
+       currMB->currSEnr++;
+     }
+ 
+     // encode total zeroes
+     if (numcoeff < max_coeff_num)
+     {
+ 
+       currSE->type  = dptype;   
+       currSE->value1 = totzeros;
+ 
+       vlcnum = numcoeff-1;
+ 
+       currSE->len = vlcnum;
+ 
+ #if TRACE
+       snprintf(currSE->tracestring, 
+         TRACESTRING_SIZE, "%s totalrun (%d,%d) vlc=%d totzeros=%3d",
+           type, subblock_x, subblock_y, vlcnum, totzeros);
+ #endif
+       if (!cdc)
+         writeSyntaxElement_TotalZeros(currSE, dataPart);
+       else
+         writeSyntaxElement_TotalZerosChromaDC(currSE, dataPart);
+ 
+       bitCount[bitcounttype]+=currSE->len;
+       no_bits               +=currSE->len;
+ 
+       // proceed to next SE
+       currSE++;
+       currMB->currSEnr++;
+     }
+ 
+     // encode run before each coefficient
+     zerosleft = totzeros;
+     numcoef = numcoeff;
+     for (k = lastcoeff; k >= 0; k--)
+     {
+       run = pRun[k]; // run
+ 
+       currSE->value1 = run;
+       currSE->type  = dptype;   
+ 
+       // for last coeff, run is remaining totzeros
+       // when zerosleft is zero, remaining coeffs have 0 run
+       if (numcoeff <= 1 || !zerosleft)
+         break;
+ 
+       if (numcoef > 1 && zerosleft) 
+       {
+ 
+         vlcnum = zerosleft - 1;
+         if (vlcnum > RUNBEFORE_NUM-1)
+           vlcnum = RUNBEFORE_NUM-1;
+ 
+         currSE->len = vlcnum;
+ 
+ #if TRACE
+         snprintf(currSE->tracestring, 
+           TRACESTRING_SIZE, "%s run (%d,%d) k=%d vlc=%d run=%2d",
+             type, subblock_x, subblock_y, k, vlcnum, run);
+ #endif
+ 
+         writeSyntaxElement_Run(currSE, dataPart);
+ 
+         bitCount[bitcounttype]+=currSE->len;
+         no_bits               +=currSE->len;
+ 
+         zerosleft -= run;
+         numcoef --;
+ 
+         // proceed to next SE
+         currSE++;
+         currMB->currSEnr++;
+       }
+     }
+   }
+ 
+   return no_bits;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Find best 16x16 based intra mode
+  *
+  * \par Input:
+  *    Image parameters, pointer to best 16x16 intra mode
+  *
+  * \par Output:
+  *    best 16x16 based SAD
+  ************************************************************************/
+ int find_sad_16x16(int *intra_mode)
+ {
+   int current_intra_sad_2,best_intra_sad2;
+   int M1[16][16],M0[4][4][4][4],M3[4],M4[4][4];
+ 
+   int i,j,k;
+   int ii,jj;
+   int mb_nr = img->current_mb_nr;
+   
+   PixelPos up;          //!< pixel position p(0,-1)
+   PixelPos left[17];    //!< pixel positions p(-1, -1..15)
+ 
+   int up_avail, left_avail, left_up_avail;
+ 
+   for (i=0;i<17;i++)
+   {
+     getNeighbour(mb_nr, -1 ,  i-1 , 1, &left[i]);
+   }
+   
+   getNeighbour(mb_nr, 0     ,  -1 , 1, &up);
+ 
+   if (!(input->UseConstrainedIntraPred))
+   {
+     up_avail   = up.available;
+     left_avail = left[1].available;
+     left_up_avail = left[0].available;
+   }
+   else
+   {
+     up_avail      = up.available ? img->intra_block[up.mb_addr] : 0;
+     for (i=1, left_avail=1; i<17;i++)
+       left_avail  &= left[i].available ? img->intra_block[left[i].mb_addr]: 0;
+     left_up_avail = left[0].available ? img->intra_block[left[0].mb_addr]: 0;
+   }
+ 
+   best_intra_sad2=MAX_VALUE;
+   *intra_mode = DC_PRED_16;
+ 
+   for (k=0;k<4;k++)
+   {
+     if (input->IntraDisableInterOnly == 0 || img->type != I_SLICE)
+     {
+       if (input->Intra16x16ParDisable && (k==VERT_PRED_16||k==HOR_PRED_16))
+         continue;
+       
+       if (input->Intra16x16PlaneDisable && k==PLANE_16)
+         continue;
+     }
+     //check if there are neighbours to predict from
+     if ((k==0 && !up_avail) || (k==1 && !left_avail) || (k==3 && (!left_avail || !up_avail || !left_up_avail)))
+     {
+       ; // edge, do nothing
+     }
+     else
+     {
+       for (j=0;j<16;j++)
+       {
+         for (i=0;i<16;i++)
+         {
+           M1[j][i]=imgY_org[img->opix_y+j][img->opix_x+i]-img->mprr_2[k][j][i];
+           M0[i & 0x03][i >> 2][j & 0x03][j >> 2]=M1[j][i];
+         }
+       }
+       current_intra_sad_2=0;              // no SAD start handicap here
+       for (jj=0;jj<4;jj++)
+       {
+         for (ii=0;ii<4;ii++)
+         {
+           for (j=0;j<4;j++)
+           {
+             M3[0]=M0[0][ii][j][jj]+M0[3][ii][j][jj];
+             M3[1]=M0[1][ii][j][jj]+M0[2][ii][j][jj];
+             M3[2]=M0[1][ii][j][jj]-M0[2][ii][j][jj];
+             M3[3]=M0[0][ii][j][jj]-M0[3][ii][j][jj];
+ 
+             M0[0][ii][j][jj]=M3[0]+M3[1];
+             M0[2][ii][j][jj]=M3[0]-M3[1];
+             M0[1][ii][j][jj]=M3[2]+M3[3];
+             M0[3][ii][j][jj]=M3[3]-M3[2];
+           }
+ 
+           for (i=0;i<4;i++)
+           {
+             M3[0]=M0[i][ii][0][jj]+M0[i][ii][3][jj];
+             M3[1]=M0[i][ii][1][jj]+M0[i][ii][2][jj];
+             M3[2]=M0[i][ii][1][jj]-M0[i][ii][2][jj];
+             M3[3]=M0[i][ii][0][jj]-M0[i][ii][3][jj];
+ 
+             M0[i][ii][0][jj]=M3[0]+M3[1];
+             M0[i][ii][2][jj]=M3[0]-M3[1];
+             M0[i][ii][1][jj]=M3[2]+M3[3];
+             M0[i][ii][3][jj]=M3[3]-M3[2];
+             for (j=0;j<4;j++)
+               if ((i+j)!=0)
+                 current_intra_sad_2 += abs(M0[i][ii][j][jj]);
+           }
+         }
+       }
+ 
+       for (j=0;j<4;j++)
+         for (i=0;i<4;i++)
+           M4[j][i]=M0[0][i][0][j]/4;
+ 
+         // Hadamard of DC koeff
+         for (j=0;j<4;j++)
+         {
+           M3[0]=M4[j][0]+M4[j][3];
+           M3[1]=M4[j][1]+M4[j][2];
+           M3[2]=M4[j][1]-M4[j][2];
+           M3[3]=M4[j][0]-M4[j][3];
+ 
+           M4[j][0]=M3[0]+M3[1];
+           M4[j][2]=M3[0]-M3[1];
+           M4[j][1]=M3[2]+M3[3];
+           M4[j][3]=M3[3]-M3[2];
+         }
+ 
+         for (i=0;i<4;i++)
+         {
+           M3[0]=M4[0][i]+M4[3][i];
+           M3[1]=M4[1][i]+M4[2][i];
+           M3[2]=M4[1][i]-M4[2][i];
+           M3[3]=M4[0][i]-M4[3][i];
+ 
+           M4[0][i]=M3[0]+M3[1];
+           M4[2][i]=M3[0]-M3[1];
+           M4[1][i]=M3[2]+M3[3];
+           M4[3][i]=M3[3]-M3[2];
+ 
+           for (j=0;j<4;j++)
+             current_intra_sad_2 += abs(M4[j][i]);
+         }
+         if(current_intra_sad_2 < best_intra_sad2)
+         {
+           best_intra_sad2=current_intra_sad_2;
+           *intra_mode = k; // update best intra mode
+ 
+         }
+     }
+   }
+   best_intra_sad2 = best_intra_sad2/2;
+ 
+   return best_intra_sad2;
+ 
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/macroblock.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/macroblock.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/macroblock.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,112 ----
+ 
+ /*!
+  ************************************************************************
+  * \file
+  *    macroblock.h
+  *
+  * \brief
+  *    Arrays for macroblock processing
+  *
+  * \author
+  *    Inge Lille-Langoy               <inge.lille-langoy at telenor.com>     \n
+  *    Telenor Satellite Services                                          \n
+  *    P.O.Box 6914 St.Olavs plass                                         \n
+  *    N-0130 Oslo, Norway
+  *
+  ************************************************************************/
+ 
+ #ifndef _MACROBLOCK_H_
+ #define _MACROBLOCK_H_
+ 
+ 
+ //! just to make new temp intra mode table
+ const int  MODTAB[3][2]=
+ {
+   { 0, 4},
+   {16,12},
+   { 8,20}
+ };
+ 
+ //! gives codeword number from CBP value, both for intra and inter
+ const unsigned char NCBP[2][48][2]=
+ {
+   {  // 0      1        2       3       4       5       6       7       8       9      10      11
+     { 1, 0},{10, 1},{11, 2},{ 6, 5},{12, 3},{ 7, 6},{14,14},{ 2,10},{13, 4},{15,15},{ 8, 7},{ 3,11},
+     { 9, 8},{ 4,12},{ 5,13},{ 0, 9},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},
+     { 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},
+     { 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0}
+   },
+   {
+     { 3, 0},{29, 2},{30, 3},{17, 7},{31, 4},{18, 8},{37,17},{ 8,13},{32, 5},{38,18},{19, 9},{ 9,14},
+     {20,10},{10,15},{11,16},{ 2,11},{16, 1},{33,32},{34,33},{21,36},{35,34},{22,37},{39,44},{ 4,40},
+     {36,35},{40,45},{23,38},{ 5,41},{24,39},{ 6,42},{ 7,43},{ 1,19},{41, 6},{42,24},{43,25},{25,20},
+     {44,26},{26,21},{46,46},{12,28},{45,27},{47,47},{27,22},{13,29},{28,23},{14,30},{15,31},{ 0,12}
+   },
+ };
+ 
+ 
+ extern int QP2QUANT[40];
+ extern int ver_offset[4][8][4];
+ extern int hor_offset[4][8][4];
+ 
+ const unsigned char subblk_offset_x[3][8][4] =
+ {
+   { {0, 4, 0, 4}, 
+     {0, 4, 0, 4}, 
+     {0, 0, 0, 0}, 
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},  
+     {0, 0, 0, 0},  
+     {0, 0, 0, 0},  
+     {0, 0, 0, 0}, },
+   
+   { {0, 4, 0, 4}, 
+     {0, 4, 0, 4}, 
+     {0, 4, 0, 4}, 
+     {0, 4, 0, 4},
+     {0, 0, 0, 0},    
+     {0, 0, 0, 0},    
+     {0, 0, 0, 0},    
+     {0, 0, 0, 0}, },
+   
+   { {0, 4, 0, 4}, 
+     {8,12, 8,12},
+     {0, 4, 0, 4},
+     {8,12, 8,12},
+     {0, 4, 0, 4},  
+     {8,12, 8,12},  
+     {0, 4, 0, 4},  
+     {8,12, 8,12}  }
+ };
+     
+ const unsigned char subblk_offset_y[3][8][4] =
+ { { {0, 0, 4, 4}, 
+     {0, 0, 4, 4},
+     {0, 0, 0, 0}, 
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0}, 
+     {0, 0, 0, 0},
+     {0, 0, 0, 0}, },
+ 
+   { {0, 0, 4, 4}, 
+     {8, 8,12,12}, 
+     {0, 0, 4, 4},
+     {8, 8,12,12},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0}, 
+     {0, 0, 0, 0},
+     {0, 0, 0, 0}  },
+ 
+   { {0, 0, 4, 4},
+     {0, 0, 4, 4},
+     {8, 8,12,12},
+     {8, 8,12,12}, 
+     {0, 0, 4, 4},
+     {0, 0, 4, 4},
+     {8, 8,12,12},
+     {8, 8,12,12} }
+ };
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/mb_access.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/mb_access.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/mb_access.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,683 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file mb_access.c
+  *
+  * \brief
+  *    Functions for macroblock neighborhoods
+  *
+  *  \author
+  *      Main contributors (see contributors.h for copyright, address and affiliation details)
+  *      - Karsten Sühring          <suehring at hhi.de>
+  *************************************************************************************
+  */
+ #include <assert.h>
+ 
+ #include "global.h"
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    returns 1 if the macroblock at the given address is available
+  ************************************************************************
+  */
+ int mb_is_available(int mbAddr, int currMbAddr)
+ {
+   if ((mbAddr < 0) || (mbAddr > ((int)img->PicSizeInMbs - 1)))
+     return 0;
+ 
+   // the following line checks both: slice number and if the mb has been decoded
+   if (!img->DeblockCall)
+   {
+     if (img->mb_data[mbAddr].slice_nr != img->mb_data[currMbAddr].slice_nr)
+       return 0;
+   }
+   
+   return 1;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Checks the availability of neighboring macroblocks of
+  *    the current macroblock for prediction and context determination;
+  ************************************************************************
+  */
+ void CheckAvailabilityOfNeighbors()
+ {
+   const int mb_nr = img->current_mb_nr;
+   Macroblock *currMB = &img->mb_data[mb_nr];
+ 
+   // mark all neighbors as unavailable
+   currMB->mb_available_up   = NULL;
+   currMB->mb_available_left = NULL;
+ 
+   if (img->MbaffFrameFlag)
+   {
+     currMB->mbAddrA = 2 * (mb_nr/2 - 1);
+     currMB->mbAddrB = 2 * (mb_nr/2 - img->PicWidthInMbs);
+     currMB->mbAddrC = 2 * (mb_nr/2 - img->PicWidthInMbs + 1);
+     currMB->mbAddrD = 2 * (mb_nr/2 - img->PicWidthInMbs - 1);
+     
+     currMB->mbAvailA = mb_is_available(currMB->mbAddrA, mb_nr) && (((mb_nr/2) % img->PicWidthInMbs)!=0);
+     currMB->mbAvailB = mb_is_available(currMB->mbAddrB, mb_nr);
+     currMB->mbAvailC = mb_is_available(currMB->mbAddrC, mb_nr) && (((mb_nr/2 +1) % img->PicWidthInMbs)!=0);
+     currMB->mbAvailD = mb_is_available(currMB->mbAddrD, mb_nr) && (((mb_nr/2) % img->PicWidthInMbs)!=0);
+   }
+   else
+   {
+     currMB->mbAddrA = mb_nr - 1;
+     currMB->mbAddrB = mb_nr - img->PicWidthInMbs;
+     currMB->mbAddrC = mb_nr - img->PicWidthInMbs + 1;
+     currMB->mbAddrD = mb_nr - img->PicWidthInMbs - 1;
+ 
+     currMB->mbAvailA = mb_is_available(currMB->mbAddrA, mb_nr) && ((mb_nr % img->PicWidthInMbs)!=0);
+     currMB->mbAvailB = mb_is_available(currMB->mbAddrB, mb_nr);
+     currMB->mbAvailC = mb_is_available(currMB->mbAddrC, mb_nr) && (((mb_nr+1) % img->PicWidthInMbs)!=0);
+     currMB->mbAvailD = mb_is_available(currMB->mbAddrD, mb_nr) && ((mb_nr % img->PicWidthInMbs)!=0);
+   }
+ 
+   if (currMB->mbAvailA) currMB->mb_available_left = &(img->mb_data[currMB->mbAddrA]);
+   if (currMB->mbAvailB) currMB->mb_available_up   = &(img->mb_data[currMB->mbAddrB]);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    returns the x and y macroblock coordinates for a given MbAddress
+  ************************************************************************
+  */
+ void get_mb_block_pos (int mb_addr, int *x, int*y)
+ {
+ 
+   if (img->MbaffFrameFlag)
+   {
+     *x = ( (mb_addr/2) % img->PicWidthInMbs);
+     *y = (((mb_addr/2) / img->PicWidthInMbs) * 2 + (mb_addr%2));
+   }
+   else
+   {
+     *x = (mb_addr % img->PicWidthInMbs);
+     *y = (mb_addr / img->PicWidthInMbs);
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    returns the x and y sample coordinates for a given MbAddress
+  ************************************************************************
+  */
+ void get_mb_pos (int mb_addr, int *x, int*y)
+ {
+   get_mb_block_pos(mb_addr, x, y);
+   
+   (*x) *= MB_BLOCK_SIZE;
+   (*y) *= MB_BLOCK_SIZE;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    get neighbouring positions for non-aff coding
+  * \param curr_mb_nr
+  *   current macroblock number (decoding order)
+  * \param xN
+  *    input x position
+  * \param yN
+  *    input y position
+  * \param luma
+  *    1 if luma coding, 0 for chroma
+  * \param pix
+  *    returns position informations
+  ************************************************************************
+  */
+ void getNonAffNeighbour(unsigned int curr_mb_nr, int xN, int yN, int luma, PixelPos *pix)
+ {
+   Macroblock *currMb = &img->mb_data[curr_mb_nr];
+   int maxW, maxH;
+ 
+   if (luma)
+   {
+     maxW = 16;
+     maxH = 16;
+   }
+   else
+   {
+     assert(img->yuv_format != 0);
+     maxW = img->mb_cr_size_x;
+     maxH = img->mb_cr_size_y;
+   }
+ 
+   if ((xN<0)&&(yN<0))
+   {
+     pix->mb_addr   = currMb->mbAddrD;
+     pix->available = currMb->mbAvailD;
+   }
+   else if ((xN<0)&&((yN>=0)&&(yN<maxH)))
+   {
+     pix->mb_addr  = currMb->mbAddrA;
+     pix->available = currMb->mbAvailA;
+   }
+   else if (((xN>=0)&&(xN<maxW))&&(yN<0))
+   {
+     pix->mb_addr  = currMb->mbAddrB;
+     pix->available = currMb->mbAvailB;
+   }
+   else if (((xN>=0)&&(xN<maxW))&&((yN>=0)&&(yN<maxH)))
+   {
+     pix->mb_addr  = curr_mb_nr;
+     pix->available = 1;
+   }
+   else if ((xN>=maxW)&&(yN<0))
+   {
+     pix->mb_addr  = currMb->mbAddrC;
+     pix->available = currMb->mbAvailC;
+   }
+   else 
+   {
+     pix->available = 0;
+   }
+ 
+   if (pix->available || img->DeblockCall)
+   {
+     pix->x = (xN + maxW) % maxW;
+     pix->y = (yN + maxH) % maxH;
+ 
+     get_mb_pos(pix->mb_addr, &(pix->pos_x), &(pix->pos_y));
+ 
+     if (luma)
+     {
+       pix->pos_x += pix->x;
+       pix->pos_y += pix->y;
+     }
+     else
+     {
+       //pix->pos_x = pix->pos_x/(16/img->mb_cr_size_x) + pix->x;
+       //pix->pos_y = pix->pos_y/(16/img->mb_cr_size_y) + pix->y;
+       pix->pos_x = ((img->mb_cr_size_x * pix->pos_x) >> 4) + pix->x;
+       pix->pos_y = ((img->mb_cr_size_y * pix->pos_y) >> 4) + pix->y;      
+     }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    get neighbouring positions for aff coding
+  * \param curr_mb_nr
+  *   current macroblock number (decoding order)
+  * \param xN
+  *    input x position
+  * \param yN
+  *    input y position
+  * \param luma
+  *    1 if luma coding, 0 for chroma
+  * \param pix
+  *    returns position informations
+  ************************************************************************
+  */
+ void getAffNeighbour(unsigned int curr_mb_nr, int xN, int yN, int luma, PixelPos *pix)
+ {
+   Macroblock *currMb = &img->mb_data[curr_mb_nr];
+   int maxW, maxH;
+   int yM = -1;
+ 
+   if (luma)
+   {
+     maxW = 16;
+     maxH = 16;
+   }
+   else
+   {
+     assert(img->yuv_format != 0);
+     maxW = img->mb_cr_size_x;
+     maxH = img->mb_cr_size_y;
+   }
+ 
+   // initialize to "not available"
+   pix->available = 0;
+ 
+   if(yN > (maxH - 1))
+   {
+     return;
+   }
+   if(xN > (maxW - 1) && yN >= 0 && yN < maxH)
+   {
+     return;
+   }
+ 
+   if (xN < 0)
+   {
+     if (yN < 0)
+     {
+       if(!currMb->mb_field)
+       {
+         // frame
+         if (curr_mb_nr%2 == 0)
+         {
+           // top
+           pix->mb_addr   = currMb->mbAddrD  + 1;
+           pix->available = currMb->mbAvailD;
+           yM = yN;
+         }
+         else
+         {
+           // bottom
+           pix->mb_addr   = currMb->mbAddrA;
+           pix->available = currMb->mbAvailA;
+           if (currMb->mbAvailA)
+           {
+             if(!img->mb_data[currMb->mbAddrA].mb_field)
+             {
+                yM = yN;
+             }
+             else
+             {
+               (pix->mb_addr)++;
+                yM = (yN + maxH) >> 1;
+             }
+           }
+         }
+       }
+       else
+       {
+         // field
+         if(curr_mb_nr % 2 == 0)
+         {
+           // top
+           pix->mb_addr   = currMb->mbAddrD;
+           pix->available = currMb->mbAvailD;
+           if (currMb->mbAvailD)
+           {
+             if(!img->mb_data[currMb->mbAddrD].mb_field)
+             {
+               (pix->mb_addr)++;
+                yM = 2 * yN;
+             }
+             else
+             {
+                yM = yN;
+             }
+           }
+         }
+         else
+         {
+           // bottom
+           pix->mb_addr   = currMb->mbAddrD+1;
+           pix->available = currMb->mbAvailD;
+           yM = yN;
+         }
+       }
+     }
+     else
+     { // xN < 0 && yN >= 0
+       if (yN >= 0 && yN <maxH)
+       {
+         if (!currMb->mb_field)
+         {
+           // frame
+           if(curr_mb_nr % 2 == 0)
+           {
+             // top
+             pix->mb_addr   = currMb->mbAddrA;
+             pix->available = currMb->mbAvailA;
+             if (currMb->mbAvailA)
+             {
+               if(!img->mb_data[currMb->mbAddrA].mb_field)
+               {
+                  yM = yN;
+               }
+               else
+               {
+                 if (yN %2 == 0)
+                 {
+                    yM = yN>> 1;
+                 }
+                 else
+                 {
+                   (pix->mb_addr)++;
+                    yM = yN>> 1;
+                 }
+               }
+             }
+           }
+           else
+           {
+             // bottom
+             pix->mb_addr   = currMb->mbAddrA;
+             pix->available = currMb->mbAvailA;
+             if (currMb->mbAvailA)
+             {
+               if(!img->mb_data[currMb->mbAddrA].mb_field)
+               {
+                 (pix->mb_addr)++;
+                  yM = yN;
+               }
+               else
+               {
+                 if (yN %2 == 0)
+                 {
+                    yM = (yN + maxH) >> 1;
+                 }
+                 else
+                 {
+                   (pix->mb_addr)++;
+                    yM = (yN + maxH) >> 1;
+                 }
+               }
+             }
+           }
+         }
+         else
+         {
+           // field
+           if (curr_mb_nr % 2 == 0)
+           {
+             // top
+             pix->mb_addr  = currMb->mbAddrA;
+             pix->available = currMb->mbAvailA;
+             if (currMb->mbAvailA)
+             {
+               if(!img->mb_data[currMb->mbAddrA].mb_field)
+               {
+                 if (yN < (maxH / 2))
+                 {
+                    yM = yN << 1;
+                 }
+                 else
+                 {
+                   (pix->mb_addr)++;
+                    yM = (yN << 1 ) - maxH;
+                 }
+               }
+               else
+               {
+                  yM = yN;
+               }
+             }
+           }
+           else
+           {
+             // bottom
+             pix->mb_addr  = currMb->mbAddrA;
+             pix->available = currMb->mbAvailA;
+             if (currMb->mbAvailA)
+             {
+               if(!img->mb_data[currMb->mbAddrA].mb_field)
+               {
+                 if (yN < (maxH / 2))
+                 {
+                   yM = (yN << 1) + 1;
+                 }
+                 else
+                 {
+                   (pix->mb_addr)++;
+                    yM = (yN << 1 ) + 1 - maxH;
+                 }
+               }
+               else
+               {
+                 (pix->mb_addr)++;
+                  yM = yN;
+               }
+             }
+           }
+         }
+       }
+     }
+   }
+   else
+   { // xN >= 0
+     if (xN >= 0 && xN < maxW)
+     {
+       if (yN<0)
+       {
+         if (!currMb->mb_field)
+         {
+           //frame
+           if (curr_mb_nr % 2 == 0)
+           {
+             //top
+             pix->mb_addr  = currMb->mbAddrB;
+             // for the deblocker if the current MB is a frame and the one above is a field
+             // then the neighbor is the top MB of the pair
+             if (currMb->mbAvailB)
+             {
+               if (!(img->DeblockCall == 1 && (img->mb_data[currMb->mbAddrB]).mb_field))
+                 pix->mb_addr  += 1;
+             }
+             
+             pix->available = currMb->mbAvailB;
+             yM = yN;
+           }
+           else
+           {
+             // bottom
+             pix->mb_addr   = curr_mb_nr - 1;
+             pix->available = 1;
+             yM = yN;
+           }
+         }
+         else
+         {
+           // field
+           if (curr_mb_nr % 2 == 0)
+           {
+             // top
+             pix->mb_addr   = currMb->mbAddrB;
+             pix->available = currMb->mbAvailB;
+             if (currMb->mbAvailB)
+             {
+               if(!img->mb_data[currMb->mbAddrB].mb_field)
+               {
+                 (pix->mb_addr)++;
+                  yM = 2* yN;
+               }
+               else
+               {
+                  yM = yN;
+               }
+             }
+           }
+           else
+           {
+             // bottom
+             pix->mb_addr   = currMb->mbAddrB + 1;
+             pix->available = currMb->mbAvailB;
+             yM = yN;
+           }
+         }
+       }
+       else
+       {
+         // yN >=0
+         // for the deblocker if this is the extra edge then do this special stuff
+         if (yN == 0 && img->DeblockCall == 2)
+         {
+           pix->mb_addr  = currMb->mbAddrB + 1;
+           pix->available = 1;
+           yM = yN - 1;
+         }
+ 
+         else if ((yN >= 0) && (yN <maxH))
+         {
+           pix->mb_addr   = curr_mb_nr;
+           pix->available = 1;
+           yM = yN;
+         }
+       }
+     }
+     else
+     { // xN >= maxW
+       if(yN < 0)
+       {
+         if (!currMb->mb_field)
+         {
+           // frame
+           if (curr_mb_nr % 2 == 0)
+           {
+             // top
+             pix->mb_addr  = currMb->mbAddrC + 1;
+             pix->available = currMb->mbAvailC;
+             yM = yN;
+           }
+           else
+           {
+             // bottom
+             pix->available = 0;
+           }
+         }
+         else
+         {
+           // field
+           if (curr_mb_nr % 2 == 0)
+           {
+             // top
+             pix->mb_addr   = currMb->mbAddrC;
+             pix->available = currMb->mbAvailC;
+             if (currMb->mbAvailC)
+             {
+               if(!img->mb_data[currMb->mbAddrC].mb_field)
+               {
+                 (pix->mb_addr)++;
+                  yM = 2* yN;
+               }
+               else
+               {
+                 yM = yN;
+               }
+             }
+           }
+           else
+           {
+             // bottom
+             pix->mb_addr   = currMb->mbAddrC + 1;
+             pix->available = currMb->mbAvailC;
+             yM = yN;
+           }
+         }
+       }
+     }
+   }
+   if (pix->available || img->DeblockCall)
+   {
+     pix->x = (xN + maxW) % maxW;
+     pix->y = (yM + maxH) % maxH;
+     get_mb_pos(pix->mb_addr, &(pix->pos_x), &(pix->pos_y));
+     if (luma)
+     {
+       pix->pos_x += pix->x;
+       pix->pos_y += pix->y;
+     }
+     else
+     {
+       //pix->pos_x = pix->pos_x/(16/img->mb_cr_size_x) + pix->x;
+       //pix->pos_y = pix->pos_y/(16/img->mb_cr_size_y) + pix->y;        
+       pix->pos_x = ((img->mb_cr_size_x * pix->pos_x) >> 4) + pix->x;
+       pix->pos_y = ((img->mb_cr_size_y * pix->pos_y) >> 4) + pix->y;      
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    get neighbouring positions. MB AFF is automatically used from img structure
+  * \param curr_mb_nr
+  *   current macroblock number (decoding order)
+  * \param xN
+  *    input x position
+  * \param yN
+  *    input y position
+  * \param luma
+  *    1 if luma coding, 0 for chroma
+  * \param pix
+  *    returns position informations
+  ************************************************************************
+  */
+ void getNeighbour(int curr_mb_nr, int xN, int yN, int luma, PixelPos *pix)
+ {
+   if (curr_mb_nr<0)
+     error ("getNeighbour: invalid macroblock number", 100);
+ 
+   if (img->MbaffFrameFlag)
+     getAffNeighbour(curr_mb_nr, xN, yN, luma, pix);
+   else
+     getNonAffNeighbour(curr_mb_nr, xN, yN, luma, pix);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    get neighbouring  get neighbouring 4x4 luma block
+  * \param curr_mb_nr
+  *   current macroblock number (decoding order)
+  * \param block_x
+  *    input x block position
+  * \param block_y
+  *    input y block position
+  * \param rel_x
+  *    relative x position of neighbor
+  * \param rel_y
+  *    relative y position of neighbor
+  * \param pix
+  *    returns position informations
+  ************************************************************************
+  */
+ void getLuma4x4Neighbour (int curr_mb_nr, int block_x, int block_y, int rel_x, int rel_y, PixelPos *pix)
+ {
+   int x = 4* block_x + rel_x;
+   int y = 4* block_y + rel_y;
+ 
+   getNeighbour(curr_mb_nr, x, y, 1, pix);
+ 
+   if (pix->available)
+   {
+     pix->x >>= 2;
+     pix->y >>= 2;
+     pix->pos_x >>= 2;
+     pix->pos_y >>= 2;
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    get neighbouring 4x4 chroma block
+  * \param curr_mb_nr
+  *   current macroblock number (decoding order)
+  * \param block_x
+  *    input x block position
+  * \param block_y
+  *    input y block position
+  * \param rel_x
+  *    relative x position of neighbor
+  * \param rel_y
+  *    relative y position of neighbor
+  * \param pix
+  *    returns position informations
+  ************************************************************************
+  */
+ void getChroma4x4Neighbour (int curr_mb_nr, int block_x, int block_y, int rel_x, int rel_y, PixelPos *pix)
+ {
+   int x = 4* block_x + rel_x;
+   int y = 4* block_y + rel_y;
+ 
+   getNeighbour(curr_mb_nr, x, y, 0, pix);
+ 
+   if (pix->available)
+   {
+     pix->x >>= 2;
+     pix->y >>= 2;
+     pix->pos_x >>= 2;
+     pix->pos_y >>= 2;
+   }
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/mb_access.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/mb_access.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/mb_access.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,30 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file mb_access.h
+  *
+  * \brief
+  *    Functions for macroblock neighborhoods
+  *
+  * \author
+  *     Main contributors (see contributors.h for copyright, address and affiliation details)
+  *     - Karsten Sühring          <suehring at hhi.de>
+  *************************************************************************************
+  */
+ 
+ #ifndef _MB_ACCESS_H_
+ #define _MB_ACCESS_H_
+ 
+ void CheckAvailabilityOfNeighbors();
+ 
+ void getNeighbour(int curr_mb_nr, int xN, int yN, int luma, PixelPos *pix);
+ void getLuma4x4Neighbour (int curr_mb_nr, int block_x, int block_y, int rel_x, int rel_y, PixelPos *pix);
+ void getChroma4x4Neighbour (int curr_mb_nr, int block_x, int block_y, int rel_x, int rel_y, PixelPos *pix);
+ 
+ int  mb_is_available(int mbAddr, int currMbAddr);
+ void get_mb_pos (int mb_addr, int *x, int*y);
+ void get_mb_block_pos (int mb_addr, int *x, int*y);
+ 
+ 
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/mbuffer.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/mbuffer.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/mbuffer.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,3865 ----
+ 
+ /*!
+  ***********************************************************************
+  *  \file
+  *      mbuffer.c
+  *
+  *  \brief
+  *      Frame buffer functions
+  *
+  *  \author
+  *      Main contributors (see contributors.h for copyright, address and affiliation details)
+  *      - Karsten Sühring                 <suehring at hhi.de>
+  *      - Alexis Tourapis                 <alexismt at ieee.org>
+  ***********************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <assert.h>
+ #include <limits.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ #include "mbuffer.h"
+ #include "memalloc.h"
+ #include "output.h"
+ #include "image.h"
+ 
+ static void insert_picture_in_dpb(FrameStore* fs, StorablePicture* p);
+ static void output_one_frame_from_dpb();
+ static int  is_used_for_reference(FrameStore* fs);
+ static void get_smallest_poc(int *poc,int * pos);
+ static int  remove_unused_frame_from_dpb();
+ static int  is_short_term_reference(FrameStore* fs);
+ static int  is_long_term_reference(FrameStore* fs);
+ void gen_field_ref_ids(StorablePicture *p);
+ 
+ DecodedPictureBuffer dpb;
+ 
+ StorablePicture **listX[6];
+ 
+ ColocatedParams *Co_located = NULL;
+ 
+ 
+ int listXsize[6];
+ 
+ #define MAX_LIST_SIZE 33
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Print out list of pictures in DPB. Used for debug purposes.
+  ************************************************************************
+  */
+ void dump_dpb()
+ {
+   unsigned i;
+ 
+   return;
+   
+   for (i=0; i<dpb.used_size;i++)
+   {
+     printf("(");
+     printf("fn=%d  ", dpb.fs[i]->frame_num);
+     if (dpb.fs[i]->is_used & 1)
+     {
+       if (dpb.fs[i]->top_field)
+         printf("T: poc=%d  ", dpb.fs[i]->top_field->poc);
+       else
+         printf("T: poc=%d  ", dpb.fs[i]->frame->top_poc);
+     }
+     if (dpb.fs[i]->is_used & 2)
+     {
+       if (dpb.fs[i]->bottom_field)
+         printf("B: poc=%d  ", dpb.fs[i]->bottom_field->poc);
+       else
+         printf("B: poc=%d  ", dpb.fs[i]->frame->bottom_poc);
+     }
+     if (dpb.fs[i]->is_used == 3)
+       printf("F: poc=%d  ", dpb.fs[i]->frame->poc);
+     printf("G: poc=%d)  ", dpb.fs[i]->poc);
+     if (dpb.fs[i]->is_reference) printf ("ref (%d) ", dpb.fs[i]->is_reference);
+     if (dpb.fs[i]->is_long_term) printf ("lt_ref (%d) ", dpb.fs[i]->is_reference);
+     if (dpb.fs[i]->is_output) printf ("out  ");
+     if (dpb.fs[i]->is_used == 3)
+     {
+       if (dpb.fs[i]->frame->non_existing) printf ("ne  ");
+     }
+     printf ("\n");
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Returns the size of the dpb depending on level and picture size
+  *
+  *
+  ************************************************************************
+  */
+ int getDpbSize()
+ {
+   int pic_size = (active_sps->pic_width_in_mbs_minus1 + 1) * (active_sps->pic_height_in_map_units_minus1 + 1) * (active_sps->frame_mbs_only_flag?1:2) * 384;
+ 
+   int size = 0;
+ 
+   switch (active_sps->level_idc)
+   {
+   case 10:
+     size = 152064;
+     break;
+   case 11:
+     size = 345600;
+     break;
+   case 12:
+     size = 912384;
+     break;
+   case 13:
+     size = 912384;
+     break;
+   case 20:
+     size = 912384;
+     break;
+   case 21:
+     size = 1824768;
+     break;
+   case 22:
+     size = 3110400;
+     break;
+   case 30:
+     size = 3110400;
+     break;
+   case 31:
+     size = 6912000;
+     break;
+   case 32:
+     size = 7864320;
+     break;
+   case 40:
+     size = 12582912;
+     break;
+   case 41:
+     size = 12582912;
+     break;
+  case 42:
+    if(  (active_sps->profile_idc==FREXT_HP   ) || (active_sps->profile_idc==FREXT_Hi10P)
+      || (active_sps->profile_idc==FREXT_Hi422) || (active_sps->profile_idc==FREXT_Hi444))
+      size = 13369344;
+    else
+      size = 12582912;
+    break; 
+   case 50:
+     size = 42393600;
+     break;
+   case 51:
+     size = 70778880;
+     break;
+   default:
+     error ("undefined level", 500);
+     break;
+   }
+ 
+   size /= pic_size;
+   return min( size, 16);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Check then number of frames marked "used for reference" and break 
+  *    if maximum is exceeded
+  *
+  ************************************************************************
+  */
+ void check_num_ref()
+ {
+   if ((int)(dpb.ltref_frames_in_buffer +  dpb.ref_frames_in_buffer ) > (max(1,img->num_ref_frames)))
+   {
+     error ("Max. number of reference frames exceeded. Invalid stream.", 500);
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate memory for decoded picture buffer and initialize with sane values.
+  *
+  ************************************************************************
+  */
+ void init_dpb()
+ {
+   unsigned i,j;
+ 
+   if (dpb.init_done)
+   {
+     free_dpb();
+   }
+ 
+   dpb.size      = getDpbSize();
+   
+   if (dpb.size < (unsigned int)input->num_ref_frames)
+   {
+     error ("DPB size at specified level is smaller than the specified number of reference frames. This is not allowed.\n", 1000);
+   }
+ 
+   dpb.used_size = 0;
+   dpb.last_picture = NULL;
+ 
+   dpb.ref_frames_in_buffer = 0;
+   dpb.ltref_frames_in_buffer = 0;
+   
+   dpb.fs = calloc(dpb.size, sizeof (FrameStore*));
+   if (NULL==dpb.fs) 
+     no_mem_exit("init_dpb: dpb->fs");
+ 
+   dpb.fs_ref = calloc(dpb.size, sizeof (FrameStore*));
+   if (NULL==dpb.fs_ref) 
+     no_mem_exit("init_dpb: dpb->fs_ref");
+ 
+   dpb.fs_ltref = calloc(dpb.size, sizeof (FrameStore*));
+   if (NULL==dpb.fs_ltref) 
+     no_mem_exit("init_dpb: dpb->fs_ltref");
+ 
+   for (i=0; i<dpb.size; i++)
+   {
+     dpb.fs[i]       = alloc_frame_store();
+     dpb.fs_ref[i]   = NULL;
+     dpb.fs_ltref[i] = NULL;
+   }
+   
+   for (i=0; i<6; i++)
+   {
+     listX[i] = calloc(MAX_LIST_SIZE, sizeof (StorablePicture*)); // +1 for reordering
+     if (NULL==listX[i]) 
+       no_mem_exit("init_dpb: listX[i]");
+   }
+ 
+   for (j=0;j<6;j++)
+   {
+     for (i=0; i<MAX_LIST_SIZE; i++)
+     {
+       listX[j][i] = NULL;
+     }
+     listXsize[j]=0;
+   }
+ 
+   dpb.last_output_poc = INT_MIN;
+ 
+   img->last_has_mmco_5 = 0;
+ 
+   dpb.init_done = 1;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free memory for decoded picture buffer.
+  ************************************************************************
+  */
+ void free_dpb()
+ {
+   unsigned i;
+   if (dpb.fs)
+   {
+     for (i=0; i<dpb.size; i++)
+     {
+       free_frame_store(dpb.fs[i]);
+     }
+     free (dpb.fs);
+     dpb.fs=NULL;
+   }
+   if (dpb.fs_ref)
+   {
+     free (dpb.fs_ref);
+   }
+   if (dpb.fs_ltref)
+   {
+     free (dpb.fs_ltref);
+   }
+   dpb.last_output_poc = INT_MIN;
+ 
+   for (i=0; i<6; i++)
+     if (listX[i])
+     {
+       free (listX[i]);
+       listX[i] = NULL;
+     }
+ 
+   dpb.init_done = 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate memory for decoded picture buffer frame stores an initialize with sane values.
+  *
+  * \return
+  *    the allocated FrameStore structure
+  ************************************************************************
+  */
+ FrameStore* alloc_frame_store()
+ {
+   FrameStore *f;
+ 
+   f = calloc (1, sizeof(FrameStore));
+   if (NULL==f) 
+     no_mem_exit("alloc_frame_store: f");
+ 
+   f->is_used      = 0;
+   f->is_reference = 0;
+   f->is_long_term = 0;
+   f->is_orig_reference = 0;
+ 
+   f->is_output = 0;
+ 
+   f->frame        = NULL;;
+   f->top_field    = NULL;
+   f->bottom_field = NULL;
+ 
+   return f;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate memory for a stored picture. 
+  *
+  * \param structure
+  *    picture structure
+  * \param size_x
+  *    horizontal luma size
+  * \param size_y
+  *    vertical luma size
+  * \param size_x_cr
+  *    horizontal chroma size
+  * \param size_y_cr
+  *    vertical chroma size
+  *
+  * \return
+  *    the allocated StorablePicture structure
+  ************************************************************************
+  */
+ StorablePicture* alloc_storable_picture(PictureStructure structure, int size_x, int size_y, int size_x_cr, int size_y_cr)
+ {
+   StorablePicture *s;
+ 
+   //printf ("Allocating (%s) picture (x=%d, y=%d, x_cr=%d, y_cr=%d)\n", (type == FRAME)?"FRAME":(type == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", size_x, size_y, size_x_cr, size_y_cr);
+ 
+   s = calloc (1, sizeof(StorablePicture));
+   if (NULL==s) 
+     no_mem_exit("alloc_storable_picture: s");
+ 
+   s->imgY_11  = NULL;
+   s->imgY_ups = NULL;
+   s->imgUV    = NULL;
+ 
+   if (input->WeightedPrediction || input->WeightedBiprediction || input->GenerateMultiplePPS)
+   {
+       s->imgY_11_w = NULL;
+       s->imgY_ups_w = NULL;
+   }
+ 
+   get_mem2Dpel (&(s->imgY), size_y, size_x);
+   if (img->yuv_format != YUV400)
+     get_mem3Dpel (&(s->imgUV), 2, size_y_cr, size_x_cr);
+ 
+   s->mb_field = calloc (img->PicSizeInMbs, sizeof(int));
+   if (NULL==s->mb_field) 
+     no_mem_exit("alloc_storable_picture: s->mb_field");
+ 
+   get_mem3D      ((byte****)(&(s->ref_idx)),    2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+   get_mem3Dint64 (&(s->ref_pic_id), 6, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+   get_mem3Dint64 (&(s->ref_id),     6, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+   get_mem4Dshort (&(s->mv),         2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE, 2);
+ 
+   get_mem2D (&(s->moving_block),       size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+   get_mem2D (&(s->field_frame),        size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+ 
+   s->pic_num=0;
+   s->frame_num=0;
+   s->long_term_frame_idx=0;
+   s->long_term_pic_num=0;
+   s->used_for_reference=0;
+   s->is_long_term=0;
+   s->non_existing=0;
+   s->is_output = 0;
+ 
+   s->structure=structure;
+ 
+   s->size_x = size_x;
+   s->size_y = size_y;
+   s->size_x_cr = size_x_cr;
+   s->size_y_cr = size_y_cr;
+   
+   s->top_field    = NULL;
+   s->bottom_field = NULL;
+   s->frame        = NULL;
+ 
+   s->coded_frame    = 0;
+   s->MbaffFrameFlag = 0;
+ 
+   return s;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free frame store memory.
+  *
+  * \param f
+  *    FrameStore to be freed
+  *
+  ************************************************************************
+  */
+ void free_frame_store(FrameStore* f)
+ {
+   if (f)
+   {
+     if (f->frame)
+     {
+       free_storable_picture(f->frame);
+       f->frame=NULL;
+     }
+     if (f->top_field)
+     {
+       free_storable_picture(f->top_field);
+       f->top_field=NULL;
+     }
+     if (f->bottom_field)
+     {
+       free_storable_picture(f->bottom_field);
+       f->bottom_field=NULL;
+     }
+     free(f);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free picture memory.
+  *
+  * \param p
+  *    Picture to be freed
+  *
+  ************************************************************************
+  */
+ void free_storable_picture(StorablePicture* p)
+ {
+   if (p)
+   {
+     if (p->ref_idx)
+     {
+       free_mem3D ((byte***)p->ref_idx, 2);
+       p->ref_idx = NULL;
+     }
+     
+     if (p->ref_pic_id)
+     {
+       free_mem3Dint64 (p->ref_pic_id, 6);
+       p->ref_pic_id = NULL;
+     }
+     if (p->ref_id)
+     {
+       free_mem3Dint64 (p->ref_id, 6);
+       p->ref_id = NULL;
+     }
+     if (p->mv)
+     {
+       free_mem4Dshort (p->mv, 2, p->size_y / BLOCK_SIZE);
+       p->mv = NULL;
+     }
+ 
+     if (p->moving_block)
+     {
+       free_mem2D (p->moving_block);
+       p->moving_block=NULL;
+     }
+     
+     if (p->field_frame)
+     {
+       free_mem2D (p->field_frame);
+       p->field_frame=NULL;
+     }
+ 
+     
+     if (p->imgY)
+     {
+       free_mem2Dpel (p->imgY);
+       p->imgY=NULL;
+     }
+     if (p->imgY_11)
+     {
+       free (p->imgY_11);
+       p->imgY_11=NULL;
+     }
+     if (p->imgY_ups)
+     {
+       free_mem2Dpel (p->imgY_ups);
+       p->imgY_ups=NULL;
+     }
+     if (p->imgUV)
+     {
+       free_mem3Dpel (p->imgUV, 2);
+       p->imgUV=NULL;
+     }
+ 
+     if (input->WeightedPrediction || input->WeightedBiprediction || input->GenerateMultiplePPS)
+     {
+       if (p->imgY_11_w)
+       {
+         free (p->imgY_11_w);
+         p->imgY_11_w=NULL;
+       }
+       if (p->imgY_ups_w)
+       {
+         free_mem2Dpel (p->imgY_ups_w);
+         p->imgY_ups_w=NULL;
+       }
+     }
+ 
+     if (p->mb_field)
+     {
+       free(p->mb_field);
+       p->mb_field=NULL;
+     }
+     
+     free(p);
+     p = NULL;
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    mark FrameStore unused for reference
+  *
+  ************************************************************************
+  */
+ static void unmark_for_reference(FrameStore* fs)
+ {
+ 
+   if (fs->is_used & 1)
+   {
+     if (fs->top_field)
+     {
+       fs->top_field->used_for_reference = 0;
+     }
+   }
+   if (fs->is_used & 2)
+   {
+     if (fs->bottom_field)
+     {
+       fs->bottom_field->used_for_reference = 0;
+     }
+   }
+   if (fs->is_used == 3)
+   {
+     if (fs->top_field && fs->bottom_field)
+     {
+       fs->top_field->used_for_reference = 0;
+       fs->bottom_field->used_for_reference = 0;
+     }
+     fs->frame->used_for_reference = 0;
+   }
+ 
+   fs->is_reference = 0;
+ 
+   if(fs->frame)
+   {
+     if (fs->frame->imgY_ups_w)
+     {
+       free_mem2Dpel (fs->frame->imgY_ups_w);
+       fs->frame->imgY_ups_w=NULL;
+     }
+     if (fs->frame->imgY_ups)
+     {
+       free_mem2Dpel (fs->frame->imgY_ups);
+       fs->frame->imgY_ups=NULL;
+     }
+     if (fs->frame->imgY_11_w)
+     {
+       free (fs->frame->imgY_11_w);
+       fs->frame->imgY_11_w=NULL;
+     }
+ 
+     if (fs->frame->ref_pic_id)
+     {
+       free_mem3Dint64 (fs->frame->ref_pic_id, 6);
+       fs->frame->ref_pic_id = NULL;
+     }
+     if (fs->frame->ref_id)
+     {
+       free_mem3Dint64 (fs->frame->ref_id, 6);
+       fs->frame->ref_id = NULL;
+     }
+   }
+ 
+   if (fs->top_field)
+   {
+     if (fs->top_field->imgY_ups_w)
+     {
+       free_mem2Dpel (fs->top_field->imgY_ups_w);
+       fs->top_field->imgY_ups_w=NULL;
+     }
+     if (fs->top_field->imgY_ups)
+     {
+       free_mem2Dpel (fs->top_field->imgY_ups);
+       fs->top_field->imgY_ups=NULL;
+     }
+     if (fs->top_field->imgY_11_w)
+     {
+       free (fs->top_field->imgY_11_w);
+       fs->top_field->imgY_11_w=NULL;
+     }
+     
+     if (fs->top_field->ref_pic_id)
+     {
+       free_mem3Dint64 (fs->top_field->ref_pic_id, 6);
+       fs->top_field->ref_pic_id = NULL;
+     }
+     if (fs->top_field->ref_id)
+     {
+       free_mem3Dint64 (fs->top_field->ref_id, 6);
+       fs->top_field->ref_id = NULL;
+     }
+ 
+   }
+   if (fs->bottom_field)
+   {
+     if (fs->bottom_field->imgY_ups_w)
+     {
+       free_mem2Dpel (fs->bottom_field->imgY_ups_w);
+       fs->bottom_field->imgY_ups_w=NULL;
+     }
+     if (fs->bottom_field->imgY_ups)
+     {
+       free_mem2Dpel (fs->bottom_field->imgY_ups);
+       fs->bottom_field->imgY_ups=NULL;
+     }
+     if (fs->bottom_field->imgY_11_w)
+     {
+       free (fs->bottom_field->imgY_11_w);
+       fs->bottom_field->imgY_11_w=NULL;
+     }
+     if (fs->bottom_field->ref_pic_id)
+     {
+       free_mem3Dint64 (fs->bottom_field->ref_pic_id, 6);
+       fs->bottom_field->ref_pic_id = NULL;
+     }
+     if (fs->bottom_field->ref_id)
+     {
+       free_mem3Dint64 (fs->bottom_field->ref_id, 6);
+       fs->bottom_field->ref_id = NULL;
+     }
+   }
+ 
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    mark FrameStore unused for reference and reset long term flags
+  *
+  ************************************************************************
+  */
+ static void unmark_for_long_term_reference(FrameStore* fs)
+ {
+ 
+   if (fs->is_used & 1)
+   {
+     if (fs->top_field)
+     {
+       fs->top_field->used_for_reference = 0;
+       fs->top_field->is_long_term = 0;
+     }
+   }
+   if (fs->is_used & 2)
+   {
+     if (fs->bottom_field)
+     {
+       fs->bottom_field->used_for_reference = 0;
+       fs->bottom_field->is_long_term = 0;
+     }
+   }
+   if (fs->is_used == 3)
+   {
+     if (fs->top_field && fs->bottom_field)
+     {
+       fs->top_field->used_for_reference = 0;
+       fs->top_field->is_long_term = 0;
+       fs->bottom_field->used_for_reference = 0;
+       fs->bottom_field->is_long_term = 0;
+     }
+     fs->frame->used_for_reference = 0;
+     fs->frame->is_long_term = 0;
+   }
+ 
+   fs->is_reference = 0;
+   fs->is_long_term = 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    compares two stored pictures by picture number for qsort in descending order
+  *
+  ************************************************************************
+  */
+ static int compare_pic_by_pic_num_desc( const void *arg1, const void *arg2 )
+ {
+   if ( (*(StorablePicture**)arg1)->pic_num < (*(StorablePicture**)arg2)->pic_num)
+     return 1;
+   if ( (*(StorablePicture**)arg1)->pic_num > (*(StorablePicture**)arg2)->pic_num)
+     return -1;
+   else
+     return 0;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    compares two stored pictures by picture number for qsort in descending order
+  *
+  ************************************************************************
+  */
+ static int compare_pic_by_lt_pic_num_asc( const void *arg1, const void *arg2 )
+ {
+   if ( (*(StorablePicture**)arg1)->long_term_pic_num < (*(StorablePicture**)arg2)->long_term_pic_num)
+     return -1;
+   if ( (*(StorablePicture**)arg1)->long_term_pic_num > (*(StorablePicture**)arg2)->long_term_pic_num)
+     return 1;
+   else
+     return 0;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    compares two frame stores by pic_num for qsort in descending order
+  *
+  ************************************************************************
+  */
+ static int compare_fs_by_frame_num_desc( const void *arg1, const void *arg2 )
+ {
+   if ( (*(FrameStore**)arg1)->frame_num_wrap < (*(FrameStore**)arg2)->frame_num_wrap)
+     return 1;
+   if ( (*(FrameStore**)arg1)->frame_num_wrap > (*(FrameStore**)arg2)->frame_num_wrap)
+     return -1;
+   else
+     return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    compares two frame stores by lt_pic_num for qsort in descending order
+  *
+  ************************************************************************
+  */
+ static int compare_fs_by_lt_pic_idx_asc( const void *arg1, const void *arg2 )
+ {
+   if ( (*(FrameStore**)arg1)->long_term_frame_idx < (*(FrameStore**)arg2)->long_term_frame_idx)
+     return -1;
+   if ( (*(FrameStore**)arg1)->long_term_frame_idx > (*(FrameStore**)arg2)->long_term_frame_idx)
+     return 1;
+   else
+     return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    compares two stored pictures by poc for qsort in ascending order
+  *
+  ************************************************************************
+  */
+ static int compare_pic_by_poc_asc( const void *arg1, const void *arg2 )
+ {
+   if ( (*(StorablePicture**)arg1)->poc < (*(StorablePicture**)arg2)->poc)
+     return -1;
+   if ( (*(StorablePicture**)arg1)->poc > (*(StorablePicture**)arg2)->poc)
+     return 1;
+   else
+     return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    compares two stored pictures by poc for qsort in descending order
+  *
+  ************************************************************************
+  */
+ static int compare_pic_by_poc_desc( const void *arg1, const void *arg2 )
+ {
+   if ( (*(StorablePicture**)arg1)->poc < (*(StorablePicture**)arg2)->poc)
+     return 1;
+   if ( (*(StorablePicture**)arg1)->poc > (*(StorablePicture**)arg2)->poc)
+     return -1;
+   else
+     return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    compares two frame stores by poc for qsort in ascending order
+  *
+  ************************************************************************
+  */
+ static int compare_fs_by_poc_asc( const void *arg1, const void *arg2 )
+ {
+   if ( (*(FrameStore**)arg1)->poc < (*(FrameStore**)arg2)->poc)
+     return -1;
+   if ( (*(FrameStore**)arg1)->poc > (*(FrameStore**)arg2)->poc)
+     return 1;
+   else
+     return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    compares two frame stores by poc for qsort in descending order
+  *
+  ************************************************************************
+  */
+ static int compare_fs_by_poc_desc( const void *arg1, const void *arg2 )
+ {
+   if ( (*(FrameStore**)arg1)->poc < (*(FrameStore**)arg2)->poc)
+     return 1;
+   if ( (*(FrameStore**)arg1)->poc > (*(FrameStore**)arg2)->poc)
+     return -1;
+   else
+     return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    returns true, if picture is short term reference picture
+  *
+  ************************************************************************
+  */
+ int is_short_ref(StorablePicture *s)
+ {
+   return ((s->used_for_reference) && (!(s->is_long_term)));
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    returns true, if picture is long term reference picture
+  *
+  ************************************************************************
+  */
+ int is_long_ref(StorablePicture *s)
+ {
+   return ((s->used_for_reference) && (s->is_long_term));
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Generates a alternating field list from a given FrameStore list
+  *
+  ************************************************************************
+  */
+ static void gen_pic_list_from_frame_list(PictureStructure currStrcture, FrameStore **fs_list, int list_idx, StorablePicture **list, int *list_size, int long_term)
+ {
+   int top_idx = 0;
+   int bot_idx = 0;
+ 
+   int (*is_ref)(StorablePicture *s);
+ 
+   if (long_term)
+     is_ref=is_long_ref;
+   else
+     is_ref=is_short_ref;
+ 
+   if (currStrcture == TOP_FIELD)
+   {
+     while ((top_idx<list_idx)||(bot_idx<list_idx))
+     {
+       for ( ; top_idx<list_idx; top_idx++)
+       {
+         if(fs_list[top_idx]->is_used & 1)
+         {
+           if(is_ref(fs_list[top_idx]->top_field))
+           {
+             // short term ref pic
+             list[*list_size] = fs_list[top_idx]->top_field;
+             (*list_size)++;
+             top_idx++;
+             break;
+           }
+         }
+       }
+       for ( ; bot_idx<list_idx; bot_idx++)
+       {
+         if(fs_list[bot_idx]->is_used & 2)
+         {
+           if(is_ref(fs_list[bot_idx]->bottom_field))
+           {
+             // short term ref pic
+             list[*list_size] = fs_list[bot_idx]->bottom_field;
+             (*list_size)++;
+             bot_idx++;
+             break;
+           }
+         }
+       }
+     }
+   }
+   if (currStrcture == BOTTOM_FIELD)
+   {
+     while ((top_idx<list_idx)||(bot_idx<list_idx))
+     {
+       for ( ; bot_idx<list_idx; bot_idx++)
+       {
+         if(fs_list[bot_idx]->is_used & 2)
+         {
+           if(is_ref(fs_list[bot_idx]->bottom_field))
+           {
+             // short term ref pic
+             list[*list_size] = fs_list[bot_idx]->bottom_field;
+             (*list_size)++;
+             bot_idx++;
+             break;
+           }
+         }
+       }
+       for ( ; top_idx<list_idx; top_idx++)
+       {
+         if(fs_list[top_idx]->is_used & 1)
+         {
+           if(is_ref(fs_list[top_idx]->top_field))
+           {
+             // short term ref pic
+             list[*list_size] = fs_list[top_idx]->top_field;
+             (*list_size)++;
+             top_idx++;
+             break;
+           }
+         }
+       }
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Initialize listX[0] and list 1 depending on current picture type
+  *
+  ************************************************************************
+  */
+ void init_lists(int currSliceType, PictureStructure currPicStructure)
+ {
+   int add_top = 0, add_bottom = 0;
+   unsigned i;
+   int j;
+   int MaxFrameNum = 1 << (log2_max_frame_num_minus4 + 4);
+   int diff;
+ 
+   int list0idx = 0;
+   int list0idx_1 = 0;
+   int listltidx = 0;
+ 
+   FrameStore **fs_list0;
+   FrameStore **fs_list1;
+   FrameStore **fs_listlt;
+ 
+   StorablePicture *tmp_s;
+ 
+   if (currPicStructure == FRAME)  
+   {
+     for (i=0; i<dpb.ref_frames_in_buffer; i++)
+     {
+       if (dpb.fs_ref[i]->is_used==3)
+       {
+         if ((dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term))
+         {
+           if( dpb.fs_ref[i]->frame_num > img->frame_num )
+           {
+             dpb.fs_ref[i]->frame_num_wrap = dpb.fs_ref[i]->frame_num - MaxFrameNum;
+           }
+           else
+           {
+             dpb.fs_ref[i]->frame_num_wrap = dpb.fs_ref[i]->frame_num;
+           }
+           dpb.fs_ref[i]->frame->pic_num = dpb.fs_ref[i]->frame_num_wrap;
+         }
+       }
+     }
+     // update long_term_pic_num
+     for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+     {
+       if (dpb.fs_ltref[i]->is_used==3)
+       {
+         if (dpb.fs_ltref[i]->frame->is_long_term)
+         {
+           dpb.fs_ltref[i]->frame->long_term_pic_num = dpb.fs_ltref[i]->frame->long_term_frame_idx;
+         }
+       }
+     }
+   }
+   else
+   {
+     if (currPicStructure == TOP_FIELD)
+     {
+       add_top    = 1;
+       add_bottom = 0;
+     }
+     else
+     {
+       add_top    = 0;
+       add_bottom = 1;
+     }
+     
+     for (i=0; i<dpb.ref_frames_in_buffer; i++)
+     {
+       if (dpb.fs_ref[i]->is_reference)
+       {
+         if( dpb.fs_ref[i]->frame_num > img->frame_num )
+         {
+           dpb.fs_ref[i]->frame_num_wrap = dpb.fs_ref[i]->frame_num - MaxFrameNum;
+         }
+         else
+         {
+           dpb.fs_ref[i]->frame_num_wrap = dpb.fs_ref[i]->frame_num;
+         }
+         if (dpb.fs_ref[i]->is_reference & 1)
+         {
+           dpb.fs_ref[i]->top_field->pic_num = (2 * dpb.fs_ref[i]->frame_num_wrap) + add_top;
+         }
+         if (dpb.fs_ref[i]->is_reference & 2)
+         {
+           dpb.fs_ref[i]->bottom_field->pic_num = (2 * dpb.fs_ref[i]->frame_num_wrap) + add_bottom;
+         }
+       }
+     }
+     // update long_term_pic_num
+     for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+     {
+       if (dpb.fs_ltref[i]->is_long_term & 1)
+       {
+         dpb.fs_ltref[i]->top_field->long_term_pic_num = 2 * dpb.fs_ltref[i]->top_field->long_term_frame_idx + add_top;
+   }
+       if (dpb.fs_ltref[i]->is_long_term & 2)
+       {
+         dpb.fs_ltref[i]->bottom_field->long_term_pic_num = 2 * dpb.fs_ltref[i]->bottom_field->long_term_frame_idx + add_bottom;
+       }
+     }
+   }
+ 
+ 
+ 
+   if ((currSliceType == I_SLICE)||(currSliceType == SI_SLICE))
+   {
+     listXsize[0] = 0;
+     listXsize[1] = 0;
+     return;
+   }
+ 
+   if ((currSliceType == P_SLICE)||(currSliceType == SP_SLICE))
+   {
+     // Calculate FrameNumWrap and PicNum
+     if (currPicStructure == FRAME)  
+     {
+       for (i=0; i<dpb.ref_frames_in_buffer; i++)
+       {
+         if (dpb.fs_ref[i]->is_used==3)
+         {
+           if ((dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term))
+           {
+             listX[0][list0idx++] = dpb.fs_ref[i]->frame;
+           }
+         }
+       }
+       // order list 0 by PicNum
+       qsort((void *)listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_pic_num_desc);
+       listXsize[0] = list0idx;
+ //      printf("listX[0] (PicNum): "); for (i=0; i<list0idx; i++){printf ("%d  ", listX[0][i]->pic_num);} printf("\n");
+ 
+       // long term handling
+       for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+       {
+         if (dpb.fs_ltref[i]->is_used==3)
+         {
+           if (dpb.fs_ltref[i]->frame->is_long_term)
+           {
+             listX[0][list0idx++]=dpb.fs_ltref[i]->frame;
+           }
+         }
+       }
+       qsort((void *)&listX[0][listXsize[0]], list0idx-listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc);
+       listXsize[0] = list0idx;
+     }
+     else
+     {
+       fs_list0 = calloc(dpb.size, sizeof (FrameStore*));
+       if (NULL==fs_list0) 
+          no_mem_exit("init_lists: fs_list0");
+       fs_listlt = calloc(dpb.size, sizeof (FrameStore*));
+       if (NULL==fs_listlt) 
+          no_mem_exit("init_lists: fs_listlt");
+ 
+       for (i=0; i<dpb.ref_frames_in_buffer; i++)
+       {
+         if (dpb.fs_ref[i]->is_reference)
+         {
+           fs_list0[list0idx++] = dpb.fs_ref[i];
+         }
+       }
+ 
+       qsort((void *)fs_list0, list0idx, sizeof(FrameStore*), compare_fs_by_frame_num_desc);
+ 
+ //      printf("fs_list0 (FrameNum): "); for (i=0; i<list0idx; i++){printf ("%d  ", fs_list0[i]->frame_num_wrap);} printf("\n");
+ 
+       listXsize[0] = 0;
+       gen_pic_list_from_frame_list(currPicStructure, fs_list0, list0idx, listX[0], &listXsize[0], 0);
+ 
+ //      printf("listX[0] (PicNum): "); for (i=0; i<listXsize[0]; i++){printf ("%d  ", listX[0][i]->pic_num);} printf("\n");
+ 
+       // long term handling
+       for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+       {
+         fs_listlt[listltidx++]=dpb.fs_ltref[i];
+       }
+ 
+       qsort((void *)fs_listlt, listltidx, sizeof(FrameStore*), compare_fs_by_lt_pic_idx_asc);
+ 
+       gen_pic_list_from_frame_list(currPicStructure, fs_listlt, listltidx, listX[0], &listXsize[0], 1);
+ 
+       free(fs_list0);
+       free(fs_listlt);
+     }
+     listXsize[1] = 0;
+   }
+   else
+   {
+     // B-Slice
+     if (currPicStructure == FRAME)  
+     {
+       for (i=0; i<dpb.ref_frames_in_buffer; i++)
+       {
+         if (dpb.fs_ref[i]->is_used==3)
+         {
+           if ((dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term))
+           {
+             if (img->framepoc > dpb.fs_ref[i]->frame->poc)
+             {
+               listX[0][list0idx++] = dpb.fs_ref[i]->frame;
+             }
+           }
+         }
+       }
+       qsort((void *)listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_poc_desc);
+       list0idx_1 = list0idx;
+       for (i=0; i<dpb.ref_frames_in_buffer; i++)
+       {
+         if (dpb.fs_ref[i]->is_used==3)
+         {
+           if ((dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term))
+           {
+             if (img->framepoc < dpb.fs_ref[i]->frame->poc)
+             {
+               listX[0][list0idx++] = dpb.fs_ref[i]->frame;
+             }
+           }
+         }
+       }
+       qsort((void *)&listX[0][list0idx_1], list0idx-list0idx_1, sizeof(StorablePicture*), compare_pic_by_poc_asc);
+ 
+       for (j=0; j<list0idx_1; j++)
+       {
+         listX[1][list0idx-list0idx_1+j]=listX[0][j];
+       }
+       for (j=list0idx_1; j<list0idx; j++)
+       {
+         listX[1][j-list0idx_1]=listX[0][j];
+       }
+ 
+       listXsize[0] = listXsize[1] = list0idx;
+ 
+ //      printf("listX[0] currPoc=%d (Poc): ", img->framepoc); for (i=0; i<listXsize[0]; i++){printf ("%d  ", listX[0][i]->poc);} printf("\n");
+ //      printf("listX[1] currPoc=%d (Poc): ", img->framepoc); for (i=0; i<listXsize[1]; i++){printf ("%d  ", listX[1][i]->poc);} printf("\n");
+ 
+       // long term handling
+       for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+       {
+         if (dpb.fs_ltref[i]->is_used==3)
+         {
+           if (dpb.fs_ltref[i]->frame->is_long_term)
+           {
+             listX[0][list0idx]  =dpb.fs_ltref[i]->frame;
+             listX[1][list0idx++]=dpb.fs_ltref[i]->frame;
+           }
+         }
+       }
+       qsort((void *)&listX[0][listXsize[0]], list0idx-listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc);
+       qsort((void *)&listX[1][listXsize[0]], list0idx-listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc);
+       listXsize[0] = listXsize[1] = list0idx;
+     }
+     else
+     {
+       fs_list0 = calloc(dpb.size, sizeof (FrameStore*));
+       if (NULL==fs_list0) 
+          no_mem_exit("init_lists: fs_list0");
+       fs_list1 = calloc(dpb.size, sizeof (FrameStore*));
+       if (NULL==fs_list1) 
+          no_mem_exit("init_lists: fs_list1");
+       fs_listlt = calloc(dpb.size, sizeof (FrameStore*));
+       if (NULL==fs_listlt) 
+          no_mem_exit("init_lists: fs_listlt");
+ 
+       listXsize[0] = 0;
+       listXsize[1] = 1;
+ 
+       for (i=0; i<dpb.ref_frames_in_buffer; i++)
+       {
+         if (dpb.fs_ref[i]->is_used)
+         {
+           if (img->ThisPOC >= dpb.fs_ref[i]->poc)
+           {
+             fs_list0[list0idx++] = dpb.fs_ref[i];
+           }
+         }
+       }
+       qsort((void *)fs_list0, list0idx, sizeof(FrameStore*), compare_fs_by_poc_desc);
+       list0idx_1 = list0idx;
+       for (i=0; i<dpb.ref_frames_in_buffer; i++)
+       {
+         if (dpb.fs_ref[i]->is_used)
+         {
+           if (img->ThisPOC < dpb.fs_ref[i]->poc)
+           {
+             fs_list0[list0idx++] = dpb.fs_ref[i];
+           }
+         }
+       }
+       qsort((void *)&fs_list0[list0idx_1], list0idx-list0idx_1, sizeof(FrameStore*), compare_fs_by_poc_asc);
+ 
+       for (j=0; j<list0idx_1; j++)
+       {
+         fs_list1[list0idx-list0idx_1+j]=fs_list0[j];
+       }
+       for (j=list0idx_1; j<list0idx; j++)
+       {
+         fs_list1[j-list0idx_1]=fs_list0[j];
+       }
+       
+ //      printf("fs_list0 currPoc=%d (Poc): ", img->ThisPOC); for (i=0; i<list0idx; i++){printf ("%d  ", fs_list0[i]->poc);} printf("\n");
+ //      printf("fs_list1 currPoc=%d (Poc): ", img->ThisPOC); for (i=0; i<list0idx; i++){printf ("%d  ", fs_list1[i]->poc);} printf("\n");
+ 
+       listXsize[0] = 0;
+       listXsize[1] = 0;
+       gen_pic_list_from_frame_list(currPicStructure, fs_list0, list0idx, listX[0], &listXsize[0], 0);
+       gen_pic_list_from_frame_list(currPicStructure, fs_list1, list0idx, listX[1], &listXsize[1], 0);
+ 
+ //      printf("listX[0] currPoc=%d (Poc): ", img->framepoc); for (i=0; i<listXsize[0]; i++){printf ("%d  ", listX[0][i]->poc);} printf("\n");
+ //      printf("listX[1] currPoc=%d (Poc): ", img->framepoc); for (i=0; i<listXsize[1]; i++){printf ("%d  ", listX[1][i]->poc);} printf("\n");
+ 
+       // long term handling
+       for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+       {
+         fs_listlt[listltidx++]=dpb.fs_ltref[i];
+       }
+ 
+       qsort((void *)fs_listlt, listltidx, sizeof(FrameStore*), compare_fs_by_lt_pic_idx_asc);
+ 
+       gen_pic_list_from_frame_list(currPicStructure, fs_listlt, listltidx, listX[0], &listXsize[0], 1);
+       gen_pic_list_from_frame_list(currPicStructure, fs_listlt, listltidx, listX[1], &listXsize[1], 1);
+ 
+       free(fs_list0);
+       free(fs_list1);
+       free(fs_listlt);
+     }
+   } 
+ 
+   if ((listXsize[0] == listXsize[1]) && (listXsize[0] > 1))
+   {
+     // check if lists are identical, if yes swap first two elements of listX[1]
+     diff=0;
+     for (j = 0; j< listXsize[0]; j++)
+     {
+       if (listX[0][j]!=listX[1][j])
+         diff=1;
+     }
+     if (!diff)
+     {
+       tmp_s = listX[1][0];
+       listX[1][0]=listX[1][1];
+       listX[1][1]=tmp_s;
+     }
+   }
+   // set max size
+   listXsize[0] = min (listXsize[0], img->num_ref_idx_l0_active);
+   listXsize[1] = min (listXsize[1], img->num_ref_idx_l1_active);
+ 
+   // set the unused list entries to NULL
+   for (i=listXsize[0]; i< (MAX_LIST_SIZE) ; i++)
+   {
+     listX[0][i] = NULL;
+   }
+   for (i=listXsize[1]; i< (MAX_LIST_SIZE) ; i++)
+   {
+     listX[1][i] = NULL;
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Initialize listX[2..5] from lists 0 and 1
+  *    listX[2]: list0 for current_field==top
+  *    listX[3]: list1 for current_field==top
+  *    listX[4]: list0 for current_field==bottom
+  *    listX[5]: list1 for current_field==bottom
+  *
+  ************************************************************************
+  */
+ void init_mbaff_lists()
+ {
+   unsigned j;
+   int i;
+ 
+   for (i=2;i<6;i++)
+   {
+     for (j=0; j<MAX_LIST_SIZE; j++)
+     {
+       listX[i][j] = NULL;
+     }
+     listXsize[i]=0;
+   }
+ 
+   for (i=0; i<listXsize[0]; i++)
+   {
+     listX[2][2*i]  =listX[0][i]->top_field;
+     listX[2][2*i+1]=listX[0][i]->bottom_field;
+     listX[4][2*i]  =listX[0][i]->bottom_field;
+     listX[4][2*i+1]=listX[0][i]->top_field;
+   }
+   listXsize[2]=listXsize[4]=listXsize[0] * 2;
+ 
+   for (i=0; i<listXsize[1]; i++)
+   {
+     listX[3][2*i]  =listX[1][i]->top_field;
+     listX[3][2*i+1]=listX[1][i]->bottom_field;
+     listX[5][2*i]  =listX[1][i]->bottom_field;
+     listX[5][2*i+1]=listX[1][i]->top_field;
+   }
+   listXsize[3]=listXsize[5]=listXsize[1] * 2;
+ }
+  
+  /*!
+  ************************************************************************
+  * \brief
+  *    Returns short term pic with given picNum
+  *
+  ************************************************************************
+  */
+ static StorablePicture*  get_short_term_pic(int picNum)
+ {
+   unsigned i;
+ 
+   for (i=0; i<dpb.ref_frames_in_buffer; i++)
+   {
+     if (img->structure==FRAME)
+     {
+       if (dpb.fs_ref[i]->is_reference == 3)
+         if ((!dpb.fs_ref[i]->frame->is_long_term)&&(dpb.fs_ref[i]->frame->pic_num == picNum))
+           return dpb.fs_ref[i]->frame;
+     }
+     else
+     {
+       if (dpb.fs_ref[i]->is_reference & 1)
+         if ((!dpb.fs_ref[i]->top_field->is_long_term)&&(dpb.fs_ref[i]->top_field->pic_num == picNum))
+           return dpb.fs_ref[i]->top_field;
+       if (dpb.fs_ref[i]->is_reference & 2)
+         if ((!dpb.fs_ref[i]->bottom_field->is_long_term)&&(dpb.fs_ref[i]->bottom_field->pic_num == picNum))
+           return dpb.fs_ref[i]->bottom_field;
+     }
+   }
+   return NULL;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Returns short term pic with given LongtermPicNum
+  *
+  ************************************************************************
+  */
+ static StorablePicture*  get_long_term_pic(int LongtermPicNum)
+ {
+   unsigned i;
+ 
+   for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+   {
+     if (img->structure==FRAME)
+     {
+       if (dpb.fs_ltref[i]->is_reference == 3)
+         if ((dpb.fs_ltref[i]->frame->is_long_term)&&(dpb.fs_ltref[i]->frame->long_term_pic_num == LongtermPicNum))
+           return dpb.fs_ltref[i]->frame;
+     }
+     else
+     {
+       if (dpb.fs_ltref[i]->is_reference & 1)
+         if ((dpb.fs_ltref[i]->top_field->is_long_term)&&(dpb.fs_ltref[i]->top_field->long_term_pic_num == LongtermPicNum))
+           return dpb.fs_ltref[i]->top_field;
+       if (dpb.fs_ltref[i]->is_reference & 2)
+         if ((dpb.fs_ltref[i]->bottom_field->is_long_term)&&(dpb.fs_ltref[i]->bottom_field->long_term_pic_num == LongtermPicNum))
+           return dpb.fs_ltref[i]->bottom_field;
+     }
+   }
+   return NULL;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Reordering process for short-term reference pictures
+  *
+  ************************************************************************
+  */
+ static void reorder_short_term(StorablePicture **RefPicListX, int num_ref_idx_lX_active_minus1, int picNumLX, int *refIdxLX)
+ {
+   int cIdx, nIdx;
+ 
+   StorablePicture *picLX;
+ 
+   picLX = get_short_term_pic(picNumLX);
+ 
+   for( cIdx = num_ref_idx_lX_active_minus1+1; cIdx > *refIdxLX; cIdx-- )
+     RefPicListX[ cIdx ] = RefPicListX[ cIdx - 1];
+   
+   RefPicListX[ (*refIdxLX)++ ] = picLX;
+ 
+   nIdx = *refIdxLX;
+ 
+   for( cIdx = *refIdxLX; cIdx <= num_ref_idx_lX_active_minus1+1; cIdx++ )
+     if (RefPicListX[ cIdx ])
+       if( (RefPicListX[ cIdx ]->is_long_term ) ||  (RefPicListX[ cIdx ]->pic_num != picNumLX ))
+         RefPicListX[ nIdx++ ] = RefPicListX[ cIdx ];
+ 
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Reordering process for short-term reference pictures
+  *
+  ************************************************************************
+  */
+ static void reorder_long_term(StorablePicture **RefPicListX, int num_ref_idx_lX_active_minus1, int LongTermPicNum, int *refIdxLX)
+ {
+   int cIdx, nIdx;
+ 
+   StorablePicture *picLX;
+ 
+   picLX = get_long_term_pic(LongTermPicNum);
+ 
+   for( cIdx = num_ref_idx_lX_active_minus1+1; cIdx > *refIdxLX; cIdx-- )
+     RefPicListX[ cIdx ] = RefPicListX[ cIdx - 1];
+   
+   RefPicListX[ (*refIdxLX)++ ] = picLX;
+ 
+   nIdx = *refIdxLX;
+ 
+   for( cIdx = *refIdxLX; cIdx <= num_ref_idx_lX_active_minus1+1; cIdx++ )
+     if( (!RefPicListX[ cIdx ]->is_long_term ) ||  (RefPicListX[ cIdx ]->long_term_pic_num != LongTermPicNum ))
+       RefPicListX[ nIdx++ ] = RefPicListX[ cIdx ];
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Reordering process for reference picture lists
+  *
+  ************************************************************************
+  */
+ void reorder_ref_pic_list(StorablePicture **list, int *list_size, int num_ref_idx_lX_active_minus1, int *reordering_of_pic_nums_idc, int *abs_diff_pic_num_minus1, int *long_term_pic_idx)
+ {
+   int i;
+ 
+   int maxPicNum, currPicNum, picNumLXNoWrap, picNumLXPred, picNumLX;
+   int refIdxLX = 0;
+   int MaxFrameNum = 1 << (log2_max_frame_num_minus4 + 4);
+ 
+   if (img->structure==FRAME)
+   {
+     maxPicNum  = MaxFrameNum;
+     currPicNum = img->frame_num;
+   }
+   else
+   {
+     maxPicNum  = 2 * MaxFrameNum;
+     currPicNum = 2 * img->frame_num + 1;
+   }
+ 
+   picNumLXPred = currPicNum;
+ 
+   for (i=0; reordering_of_pic_nums_idc[i]!=3; i++)
+   {
+     if (reordering_of_pic_nums_idc[i]>3)
+       error ("Invalid remapping_of_pic_nums_idc command", 500);
+ 
+     if (reordering_of_pic_nums_idc[i] < 2)
+     {
+       if (reordering_of_pic_nums_idc[i] == 0)
+       {
+         if( picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 ) < 0 )
+           picNumLXNoWrap = picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 ) + maxPicNum;
+         else
+           picNumLXNoWrap = picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 );
+       }
+       else // (reordering_of_pic_nums_idc[i] == 1)
+       {
+         if( picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 )  >=  maxPicNum )
+           picNumLXNoWrap = picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 ) - maxPicNum;
+         else
+           picNumLXNoWrap = picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 );
+       }
+       picNumLXPred = picNumLXNoWrap;
+ 
+       if( picNumLXNoWrap > currPicNum )
+         picNumLX = picNumLXNoWrap - maxPicNum;
+       else
+         picNumLX = picNumLXNoWrap;
+ 
+       reorder_short_term(list, num_ref_idx_lX_active_minus1, picNumLX, &refIdxLX);
+     }
+     else //(reordering_of_pic_nums_idc[i] == 2)
+     {
+       reorder_long_term(list, num_ref_idx_lX_active_minus1, long_term_pic_idx[i], &refIdxLX);
+     }
+     
+   }
+   // that's a definition
+   *list_size = num_ref_idx_lX_active_minus1 + 1;
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Update the list of frame stores that contain reference frames/fields
+  *
+  ************************************************************************
+  */
+ void update_ref_list()
+ {
+   unsigned i, j;
+   for (i=0, j=0; i<dpb.used_size; i++)
+   {
+     if (is_short_term_reference(dpb.fs[i]))
+     {
+       dpb.fs_ref[j++]=dpb.fs[i];
+     }
+   }
+ 
+   dpb.ref_frames_in_buffer = j;
+ 
+   while (j<dpb.size)
+   {
+     dpb.fs_ref[j++]=NULL;
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Update the list of frame stores that contain long-term reference 
+  *    frames/fields
+  *
+  ************************************************************************
+  */
+ void update_ltref_list()
+ {
+   unsigned i, j;
+   for (i=0, j=0; i<dpb.used_size; i++)
+   {
+     if (is_long_term_reference(dpb.fs[i]))
+     {
+       dpb.fs_ltref[j++]=dpb.fs[i];
+     }
+   }
+ 
+   dpb.ltref_frames_in_buffer=j;
+ 
+   while (j<dpb.size)
+   {
+     dpb.fs_ltref[j++]=NULL;
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Perform Memory management for idr pictures
+  *
+  ************************************************************************
+  */
+ static void idr_memory_management(StorablePicture* p)
+ {
+   unsigned i;
+ 
+   assert (img->currentPicture->idr_flag);
+ 
+   if (img->no_output_of_prior_pics_flag)
+   {
+     // free all stored pictures
+     for (i=0; i<dpb.used_size; i++)
+     {
+       // reset all reference settings
+       free_frame_store(dpb.fs[i]);
+       dpb.fs[i] = alloc_frame_store();
+     }
+     for (i=0; i<dpb.ref_frames_in_buffer; i++)
+     {
+       dpb.fs_ref[i]=NULL;
+     }
+     for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+     {
+       dpb.fs_ltref[i]=NULL;
+     }
+     dpb.used_size=0;
+   }
+   else
+   {
+     flush_dpb();
+   }
+   dpb.last_picture = NULL;
+ 
+   update_ref_list();
+   update_ltref_list();
+   dpb.last_output_poc = INT_MIN;
+   
+   if (img->long_term_reference_flag)
+   {
+     dpb.max_long_term_pic_idx = 0;
+     p->is_long_term           = 1;
+     p->long_term_frame_idx    = 0;
+   }
+   else
+   {
+     dpb.max_long_term_pic_idx = -1;
+     p->is_long_term           = 0;
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Perform Sliding window decoded reference picture marking process
+  *
+  ************************************************************************
+  */
+ static void sliding_window_memory_management(StorablePicture* p)
+ {
+   unsigned i;
+ 
+   assert (!img->currentPicture->idr_flag);
+   // if this is a reference pic with sliding sliding window, unmark first ref frame
+   if (dpb.ref_frames_in_buffer==active_sps->num_ref_frames - dpb.ltref_frames_in_buffer)
+   {
+     for (i=0; i<dpb.used_size;i++)
+     {
+       if (dpb.fs[i]->is_reference  && (!(dpb.fs[i]->is_long_term)))
+       {
+         unmark_for_reference(dpb.fs[i]);
+         update_ref_list();
+         break;
+       }
+     }
+   }
+ 
+   p->is_long_term = 0;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Calculate picNumX
+  ************************************************************************
+  */
+ static int get_pic_num_x (StorablePicture *p, int difference_of_pic_nums_minus1)
+ {
+   int currPicNum;
+ 
+   if (p->structure == FRAME)
+     currPicNum = p->frame_num;
+   else 
+     currPicNum = 2 * p->frame_num + 1;
+   
+   return currPicNum - (difference_of_pic_nums_minus1 + 1);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Adaptive Memory Management: Mark short term picture unused
+  ************************************************************************
+  */
+ static void mm_unmark_short_term_for_reference(StorablePicture *p, int difference_of_pic_nums_minus1)
+ {
+   int picNumX;
+ 
+   unsigned i;
+ 
+   picNumX = get_pic_num_x(p, difference_of_pic_nums_minus1);
+ 
+   for (i=0; i<dpb.ref_frames_in_buffer; i++)
+   {
+     if (p->structure == FRAME)
+     {
+       if ((dpb.fs_ref[i]->is_reference==3) && (dpb.fs_ref[i]->is_long_term==0))
+       {
+         if (dpb.fs_ref[i]->frame->pic_num == picNumX)
+         {
+           unmark_for_reference(dpb.fs_ref[i]);
+           return;
+         }
+       }
+     }
+     else
+     {
+       if ((dpb.fs_ref[i]->is_reference & 1) && (!(dpb.fs_ref[i]->is_long_term & 1)))
+       {
+         if (dpb.fs_ref[i]->top_field->pic_num == picNumX)
+         {
+           dpb.fs_ref[i]->top_field->used_for_reference = 0;
+           dpb.fs_ref[i]->is_reference &= 2;
+           if (dpb.fs_ref[i]->is_used == 3)
+           {
+             dpb.fs_ref[i]->frame->used_for_reference = 0;
+           }
+           return;
+         }
+       }
+       if ((dpb.fs_ref[i]->is_reference & 2) && (!(dpb.fs_ref[i]->is_long_term & 2)))
+       {
+         if (dpb.fs_ref[i]->bottom_field->pic_num == picNumX)
+         {
+           dpb.fs_ref[i]->bottom_field->used_for_reference = 0;
+           dpb.fs_ref[i]->is_reference &= 1;
+           if (dpb.fs_ref[i]->is_used == 3)
+           {
+             dpb.fs_ref[i]->frame->used_for_reference = 0;
+           }
+           return;
+         }
+       }
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Adaptive Memory Management: Mark long term picture unused
+  ************************************************************************
+  */
+ static void mm_unmark_long_term_for_reference(StorablePicture *p, int long_term_pic_num)
+ {
+   unsigned i;
+   for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+   {
+     if (p->structure == FRAME)
+     {
+       if ((dpb.fs_ltref[i]->is_reference==3) && (dpb.fs_ltref[i]->is_long_term==3))
+       {
+         if (dpb.fs_ltref[i]->frame->long_term_pic_num == long_term_pic_num)
+         {
+           unmark_for_long_term_reference(dpb.fs_ltref[i]);
+         }
+       }
+     }
+     else
+     {
+       if ((dpb.fs_ltref[i]->is_reference & 1) && ((dpb.fs_ltref[i]->is_long_term & 1)))
+       {
+         if (dpb.fs_ltref[i]->top_field->long_term_pic_num == long_term_pic_num)
+         {
+           dpb.fs_ltref[i]->top_field->used_for_reference = 0;
+           dpb.fs_ltref[i]->top_field->is_long_term = 0;
+           dpb.fs_ltref[i]->is_reference &= 2;
+           dpb.fs_ltref[i]->is_long_term &= 2;
+           if (dpb.fs_ltref[i]->is_used == 3)
+           {
+             dpb.fs_ltref[i]->frame->used_for_reference = 0;
+             dpb.fs_ltref[i]->frame->is_long_term = 0;
+           }
+           return;
+         }
+       }
+       if ((dpb.fs_ltref[i]->is_reference & 2) && ((dpb.fs_ltref[i]->is_long_term & 2)))
+       {
+         if (dpb.fs_ltref[i]->bottom_field->long_term_pic_num == long_term_pic_num)
+         {
+           dpb.fs_ltref[i]->bottom_field->used_for_reference = 0;
+           dpb.fs_ltref[i]->bottom_field->is_long_term = 0;
+           dpb.fs_ltref[i]->is_reference &= 1;
+           dpb.fs_ltref[i]->is_long_term &= 1;
+           if (dpb.fs_ltref[i]->is_used == 3)
+           {
+             dpb.fs_ltref[i]->frame->used_for_reference = 0;
+             dpb.fs_ltref[i]->frame->is_long_term = 0;
+           }
+           return;
+         }
+       }
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Mark a long-term reference frame or complementary field pair unused for referemce
+  ************************************************************************
+  */
+ static void unmark_long_term_frame_for_reference_by_frame_idx(int long_term_frame_idx)
+ {
+   unsigned i;
+   for(i=0; i<dpb.ltref_frames_in_buffer; i++)
+   {
+     if (dpb.fs_ltref[i]->long_term_frame_idx == long_term_frame_idx)
+       unmark_for_long_term_reference(dpb.fs_ltref[i]);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Mark a long-term reference field unused for reference only if it's not
+  *    the complementary field of the picture indicated by picNumX
+  ************************************************************************
+  */
+ static void unmark_long_term_field_for_reference_by_frame_idx(PictureStructure structure, int long_term_frame_idx, int mark_current, unsigned curr_frame_num, int curr_pic_num)
+ {
+   unsigned i;
+   int MaxFrameNum = 1 << (log2_max_frame_num_minus4 + 4);
+ 
+   assert(structure!=FRAME);
+   if (curr_pic_num<0)
+     curr_pic_num+=(2*MaxFrameNum);
+ 
+   for(i=0; i<dpb.ltref_frames_in_buffer; i++)
+   {
+     if (dpb.fs_ltref[i]->long_term_frame_idx == long_term_frame_idx)
+     {
+       if (structure == TOP_FIELD)
+       {
+         if ((dpb.fs_ltref[i]->is_long_term == 3))
+         {
+           unmark_for_long_term_reference(dpb.fs_ltref[i]);
+         }
+         else
+         {
+           if ((dpb.fs_ltref[i]->is_long_term == 1))
+           {
+             unmark_for_long_term_reference(dpb.fs_ltref[i]);
+           }
+           else
+           {
+             if (mark_current)
+             {
+               if (dpb.last_picture)
+               {
+                 if ( ( dpb.last_picture != dpb.fs_ltref[i] )|| dpb.last_picture->frame_num != curr_frame_num)
+                   unmark_for_long_term_reference(dpb.fs_ltref[i]);
+               }
+               else
+               {
+                 unmark_for_long_term_reference(dpb.fs_ltref[i]);
+               }
+             }
+             else
+             {
+               if ((dpb.fs_ltref[i]->frame_num) != (unsigned)(curr_pic_num/2))
+               {
+                 unmark_for_long_term_reference(dpb.fs_ltref[i]);
+               }
+             }
+           }
+         }
+       }
+       if (structure == BOTTOM_FIELD)
+       {
+         if ((dpb.fs_ltref[i]->is_long_term == 3))
+         {
+           unmark_for_long_term_reference(dpb.fs_ltref[i]);
+         }
+         else
+         {
+           if ((dpb.fs_ltref[i]->is_long_term == 2))
+           {
+             unmark_for_long_term_reference(dpb.fs_ltref[i]);
+           }
+           else
+           {
+             if (mark_current)
+             {
+               if (dpb.last_picture)
+               {
+                 if ( ( dpb.last_picture != dpb.fs_ltref[i] )|| dpb.last_picture->frame_num != curr_frame_num)
+                   unmark_for_long_term_reference(dpb.fs_ltref[i]);
+               }
+               else
+               {
+                 unmark_for_long_term_reference(dpb.fs_ltref[i]);
+               }
+             }
+             else
+             {
+               if ((dpb.fs_ltref[i]->frame_num) != (unsigned)(curr_pic_num/2))
+               {
+                 unmark_for_long_term_reference(dpb.fs_ltref[i]);
+               }
+             }
+           }
+         }
+       }
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    mark a picture as long-term reference
+  ************************************************************************
+  */
+ static void mark_pic_long_term(StorablePicture* p, int long_term_frame_idx, int picNumX)
+ {
+   unsigned i;
+   int add_top, add_bottom;
+ 
+   if (p->structure == FRAME)
+   {
+     for (i=0; i<dpb.ref_frames_in_buffer; i++)
+     {
+       if (dpb.fs_ref[i]->is_reference == 3)
+       {
+         if ((!dpb.fs_ref[i]->frame->is_long_term)&&(dpb.fs_ref[i]->frame->pic_num == picNumX))
+         {
+           dpb.fs_ref[i]->long_term_frame_idx = dpb.fs_ref[i]->frame->long_term_frame_idx
+                                              = long_term_frame_idx;
+           dpb.fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx;
+           dpb.fs_ref[i]->frame->is_long_term = 1;
+ 
+           if (dpb.fs_ref[i]->top_field && dpb.fs_ref[i]->bottom_field)
+           {
+             dpb.fs_ref[i]->top_field->long_term_frame_idx = dpb.fs_ref[i]->bottom_field->long_term_frame_idx
+                                                           = long_term_frame_idx;
+             dpb.fs_ref[i]->top_field->long_term_pic_num = long_term_frame_idx;
+             dpb.fs_ref[i]->bottom_field->long_term_pic_num = long_term_frame_idx;
+ 
+             dpb.fs_ref[i]->top_field->is_long_term = dpb.fs_ref[i]->bottom_field->is_long_term
+                                                    = 1;
+ 
+           }
+           dpb.fs_ref[i]->is_long_term = 3;
+           return;
+         }
+       }
+     }
+     printf ("Warning: reference frame for long term marking not found\n");
+   }
+   else
+   {
+     if (p->structure == TOP_FIELD)
+     {
+       add_top    = 1;
+       add_bottom = 0;
+     }
+     else
+     {
+       add_top    = 0;
+       add_bottom = 1;
+     }
+     for (i=0; i<dpb.ref_frames_in_buffer; i++)
+     {
+       if (dpb.fs_ref[i]->is_reference & 1)
+       {
+         if ((!dpb.fs_ref[i]->top_field->is_long_term)&&(dpb.fs_ref[i]->top_field->pic_num == picNumX))
+         {
+           if ((dpb.fs_ref[i]->is_long_term) && (dpb.fs_ref[i]->long_term_frame_idx != long_term_frame_idx))
+           {
+               printf ("Warning: assigning long_term_frame_idx different from other field\n");
+           }
+ 
+           dpb.fs_ref[i]->long_term_frame_idx = dpb.fs_ref[i]->top_field->long_term_frame_idx 
+                                              = long_term_frame_idx;
+           dpb.fs_ref[i]->top_field->long_term_pic_num = 2 * long_term_frame_idx + add_top;
+           dpb.fs_ref[i]->top_field->is_long_term = 1;
+           dpb.fs_ref[i]->is_long_term |= 1;
+           if (dpb.fs_ref[i]->is_long_term == 3)
+           {
+             dpb.fs_ref[i]->frame->is_long_term = 1;
+             dpb.fs_ref[i]->frame->long_term_frame_idx = dpb.fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx;
+           }
+           return;
+         }
+       }
+       if (dpb.fs_ref[i]->is_reference & 2)
+       {
+         if ((!dpb.fs_ref[i]->bottom_field->is_long_term)&&(dpb.fs_ref[i]->bottom_field->pic_num == picNumX))
+         {
+           if ((dpb.fs_ref[i]->is_long_term) && (dpb.fs_ref[i]->long_term_frame_idx != long_term_frame_idx))
+           {
+               printf ("Warning: assigning long_term_frame_idx different from other field\n");
+           }
+ 
+           dpb.fs_ref[i]->long_term_frame_idx = dpb.fs_ref[i]->bottom_field->long_term_frame_idx 
+                                              = long_term_frame_idx;
+           dpb.fs_ref[i]->bottom_field->long_term_pic_num = 2 * long_term_frame_idx + add_top;
+           dpb.fs_ref[i]->bottom_field->is_long_term = 1;
+           dpb.fs_ref[i]->is_long_term |= 2;
+           if (dpb.fs_ref[i]->is_long_term == 3)
+           {
+             dpb.fs_ref[i]->frame->is_long_term = 1;
+             dpb.fs_ref[i]->frame->long_term_frame_idx = dpb.fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx;
+           }
+           return;
+         }
+       }
+     }
+     printf ("Warning: reference field for long term marking not found\n");
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Assign a long term frame index to a short term picture
+  ************************************************************************
+  */
+ static void mm_assign_long_term_frame_idx(StorablePicture* p, int difference_of_pic_nums_minus1, int long_term_frame_idx)
+ {
+   int picNumX;
+ 
+   picNumX = get_pic_num_x(p, difference_of_pic_nums_minus1);
+ 
+   // remove frames/fields with same long_term_frame_idx
+   if (p->structure == FRAME)
+   {
+     unmark_long_term_frame_for_reference_by_frame_idx(long_term_frame_idx);
+   }
+   else
+   {
+     unsigned i;
+     PictureStructure structure = FRAME;
+ 
+     for (i=0; i<dpb.ref_frames_in_buffer; i++)
+     {
+       if (dpb.fs_ref[i]->is_reference & 1)
+       {
+         if (dpb.fs_ref[i]->top_field->pic_num == picNumX)
+         {
+           structure = TOP_FIELD;
+           break;
+         }
+       }
+       if (dpb.fs_ref[i]->is_reference & 2)
+       {
+         if (dpb.fs_ref[i]->bottom_field->pic_num == picNumX)
+         {
+           structure = BOTTOM_FIELD;
+           break;
+         }
+       }
+     }
+     if (structure==FRAME)
+     {
+       error ("field for long term marking not found",200);
+     }
+     
+     unmark_long_term_field_for_reference_by_frame_idx(structure, long_term_frame_idx, 0, 0, picNumX);
+   }
+ 
+   mark_pic_long_term(p, long_term_frame_idx, picNumX);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set new max long_term_frame_idx
+  ************************************************************************
+  */
+ void mm_update_max_long_term_frame_idx(int max_long_term_frame_idx_plus1)
+ {
+   unsigned i;
+ 
+   dpb.max_long_term_pic_idx = max_long_term_frame_idx_plus1 - 1;
+ 
+   // check for invalid frames
+   for (i=0; i<dpb.ltref_frames_in_buffer; i++)
+   {
+     if (dpb.fs_ltref[i]->long_term_frame_idx > dpb.max_long_term_pic_idx)
+     {
+       unmark_for_long_term_reference(dpb.fs_ltref[i]);
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Mark all long term reference pictures unused for reference
+  ************************************************************************
+  */
+ static void mm_unmark_all_long_term_for_reference ()
+ {
+   mm_update_max_long_term_frame_idx(0);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Mark all short term reference pictures unused for reference
+  ************************************************************************
+  */
+ static void mm_unmark_all_short_term_for_reference ()
+ {
+   unsigned int i;
+   for (i=0; i<dpb.ref_frames_in_buffer; i++)
+   {
+     unmark_for_reference(dpb.fs_ref[i]);
+   }
+   update_ref_list();
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Mark the current picture used for long term reference
+  ************************************************************************
+  */
+ static void mm_mark_current_picture_long_term(StorablePicture *p, int long_term_frame_idx)
+ {
+   // remove long term pictures with same long_term_frame_idx
+   if (p->structure == FRAME)
+   {
+     unmark_long_term_frame_for_reference_by_frame_idx(long_term_frame_idx);
+   }
+   else
+   {
+     unmark_long_term_field_for_reference_by_frame_idx(p->structure, long_term_frame_idx, 1, p->pic_num, 0);
+   }
+ 
+   p->is_long_term = 1;
+   p->long_term_frame_idx = long_term_frame_idx;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Perform Adaptive memory control decoded reference picture marking process
+  ************************************************************************
+  */
+ static void adaptive_memory_management(StorablePicture* p)
+ {
+   DecRefPicMarking_t *tmp_drpm;
+ 
+   img->last_has_mmco_5 = 0;
+ 
+   assert (!img->currentPicture->idr_flag);
+   assert (img->adaptive_ref_pic_buffering_flag);
+ 
+   while (img->dec_ref_pic_marking_buffer)
+   {
+     tmp_drpm = img->dec_ref_pic_marking_buffer;
+     switch (tmp_drpm->memory_management_control_operation)
+     {
+       case 0:
+         if (tmp_drpm->Next != NULL)
+         {
+           error ("memory_management_control_operation = 0 not last operation in buffer", 500);
+         }
+         break;
+       case 1:
+         mm_unmark_short_term_for_reference(p, tmp_drpm->difference_of_pic_nums_minus1);
+         update_ref_list();
+         break;
+       case 2:
+         mm_unmark_long_term_for_reference(p, tmp_drpm->long_term_pic_num);
+         update_ltref_list();
+         break;
+       case 3:
+         mm_assign_long_term_frame_idx(p, tmp_drpm->difference_of_pic_nums_minus1, tmp_drpm->long_term_frame_idx);
+         update_ref_list();
+         update_ltref_list();
+         break;
+       case 4:
+         mm_update_max_long_term_frame_idx (tmp_drpm->max_long_term_frame_idx_plus1);
+         update_ltref_list();
+         break;
+       case 5:
+         mm_unmark_all_short_term_for_reference();
+         mm_unmark_all_long_term_for_reference();
+        img->last_has_mmco_5 = 1;
+         break;
+       case 6:
+         mm_mark_current_picture_long_term(p, tmp_drpm->long_term_frame_idx);
+         check_num_ref();
+         break;
+       default:
+         error ("invalid memory_management_control_operation in buffer", 500);
+     }
+     img->dec_ref_pic_marking_buffer = tmp_drpm->Next;
+     free (tmp_drpm);
+   }
+   if ( img->last_has_mmco_5 )
+   {
+     p->pic_num = p->frame_num = 0;
+     
+     switch (p->structure)
+     {
+     case TOP_FIELD:
+       {
+         p->poc = p->top_poc = img->toppoc =0;
+         break;
+       }
+     case BOTTOM_FIELD:
+       {
+         p->poc = p->bottom_poc = img->bottompoc = 0;
+         break;
+       }
+     case FRAME:
+       {
+         p->top_poc    -= p->poc;
+         p->bottom_poc -= p->poc;
+ 
+         img->toppoc = p->top_poc;
+         img->bottompoc = p->bottom_poc;
+ 
+         p->poc = min (p->top_poc, p->bottom_poc);
+         img->framepoc = p->poc;
+         break;
+       }
+     }
+     img->ThisPOC = p->poc;
+     flush_dpb();
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Store a picture in DPB. This includes cheking for space in DPB and 
+  *    flushing frames.
+  *    If we received a frame, we need to check for a new store, if we
+  *    got a field, check if it's the second field of an already allocated
+  *    store.
+  *
+  * \param p
+  *    Picture to be stored
+  *
+  ************************************************************************
+  */
+ void store_picture_in_dpb(StorablePicture* p)
+ {
+   unsigned i;
+   int poc, pos;
+   // diagnostics
+   //printf ("Storing (%s) non-ref pic with frame_num #%d\n", (p->type == FRAME)?"FRAME":(p->type == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", p->pic_num);
+   // if frame, check for new store, 
+   assert (p!=NULL);
+ 
+   p->used_for_reference = (img->nal_reference_idc != 0);
+   
+   img->last_has_mmco_5=0;
+   img->last_pic_bottom_field = (img->structure == BOTTOM_FIELD);
+ 
+   if (img->currentPicture->idr_flag)
+     idr_memory_management(p);
+   else
+   {
+     // adaptive memory management
+     if (p->used_for_reference && (img->adaptive_ref_pic_buffering_flag))
+       adaptive_memory_management(p);
+   }
+ 
+   if ((p->structure==TOP_FIELD)||(p->structure==BOTTOM_FIELD))
+   {
+     // check for frame store with same pic_number
+     if (dpb.last_picture)
+     {
+       if ((int)dpb.last_picture->frame_num == p->pic_num)
+       {
+         if (((p->structure==TOP_FIELD)&&(dpb.last_picture->is_used==2))||((p->structure==BOTTOM_FIELD)&&(dpb.last_picture->is_used==1)))
+         {
+           if ((p->used_for_reference && (dpb.last_picture->is_orig_reference!=0))||
+               (!p->used_for_reference && (dpb.last_picture->is_orig_reference==0)))
+           {
+             insert_picture_in_dpb(dpb.last_picture, p);
+             update_ref_list();
+             update_ltref_list();
+             dump_dpb();
+             dpb.last_picture = NULL;
+             return;
+           }
+         }
+       }
+     }
+   }
+   
+   // this is a frame or a field which has no stored complementary field
+ 
+   // sliding window, if necessary
+   if ((!img->currentPicture->idr_flag)&&(p->used_for_reference && (!img->adaptive_ref_pic_buffering_flag)))
+   {
+     sliding_window_memory_management(p);
+   } 
+ 
+   // first try to remove unused frames
+   if (dpb.used_size==dpb.size)
+   {
+     remove_unused_frame_from_dpb();
+   }
+   
+   // then output frames until one can be removed
+   while (dpb.used_size==dpb.size)
+   {
+     // non-reference frames may be output directly
+     if (!p->used_for_reference)
+     {
+       get_smallest_poc(&poc, &pos);
+       if ((-1==pos) || (p->poc < poc))
+       {
+         direct_output(p, p_dec);
+         return;
+       }
+     }
+     // flush a frame
+     output_one_frame_from_dpb();
+   }
+   
+   // check for duplicate frame number in short term reference buffer
+   if ((p->used_for_reference)&&(!p->is_long_term))
+   {
+     for (i=0; i<dpb.ref_frames_in_buffer; i++)
+     {
+       if (dpb.fs_ref[i]->frame_num == p->frame_num)
+       {
+         error("duplicate frame_num im short-term reference picture buffer", 500);
+       }
+     }
+ 
+   }
+   // store at end of buffer
+ //  printf ("store frame/field at pos %d\n",dpb.used_size);
+   insert_picture_in_dpb(dpb.fs[dpb.used_size],p);
+   
+   if (p->structure != FRAME)
+   {
+     dpb.last_picture = dpb.fs[dpb.used_size];
+   }
+   else
+   {
+     dpb.last_picture = NULL;
+   }
+ 
+   dpb.used_size++;
+ 
+   update_ref_list();
+   update_ltref_list();
+ 
+   check_num_ref();
+ 
+   dump_dpb();
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Insert the frame picture into the if the top field has already
+  *    been stored for the coding decision
+  *
+  * \param p
+  *    StorablePicture to be inserted
+  *
+  ************************************************************************
+  */
+ void replace_top_pic_with_frame(StorablePicture* p)
+ {
+   FrameStore* fs = NULL;
+   unsigned i, found;
+ 
+   assert (p!=NULL);
+   assert (p->structure==FRAME);
+ 
+   p->used_for_reference = (img->nal_reference_idc != 0);
+   // upsample a reference picture
+   if (p->used_for_reference)
+   {
+     UnifiedOneForthPix(p);
+   }
+ 
+   found=0;
+ 
+   for (i=0;i<dpb.used_size;i++)
+   {
+     if((dpb.fs[i]->frame_num == img->frame_num)&&(dpb.fs[i]->is_used==1))
+     {
+       found=1;
+       fs = dpb.fs[i];
+       break;
+     }
+   }
+ 
+   if (!found)
+   {
+     error("replace_top_pic_with_frame: error storing reference frame (top field not found)",500);
+   }
+ 
+   free_storable_picture(fs->top_field);
+   fs->top_field=NULL;
+   fs->frame=p;
+   fs->is_used = 3;
+   if (p->used_for_reference)
+   {
+     fs->is_reference = 3;
+     if (p->is_long_term)
+     {
+       fs->is_long_term = 3;
+     }
+   }
+   // generate field views
+   dpb_split_field(fs);
+   update_ref_list();
+   update_ltref_list();
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Insert the picture into the DPB. A free DPB position is necessary
+  *    for frames, .
+  *
+  * \param fs
+  *    FrameStore into which the picture will be inserted
+  * \param p
+  *    StorablePicture to be inserted
+  *
+  ************************************************************************
+  */
+ static void insert_picture_in_dpb(FrameStore* fs, StorablePicture* p)
+ {
+ //  printf ("insert (%s) pic with frame_num #%d, poc %d\n", (p->structure == FRAME)?"FRAME":(p->structure == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", p->pic_num, p->poc);
+   assert (p!=NULL);
+   assert (fs!=NULL);
+ 
+   // upsample a reference picture
+   if (p->used_for_reference)
+   {
+     UnifiedOneForthPix(p);
+   }
+ 
+   switch (p->structure)
+   {
+   case FRAME: 
+     fs->frame = p;
+     fs->is_used = 3;
+     if (p->used_for_reference)
+     {
+       fs->is_reference = 3;
+       fs->is_orig_reference = 3;
+       if (p->is_long_term)
+       {
+         fs->is_long_term = 3;
+         fs->long_term_frame_idx = p->long_term_frame_idx;
+       }
+     }
+    // generate field views
+       dpb_split_field(fs); 
+     break;
+   case TOP_FIELD:
+     fs->top_field = p;
+     fs->is_used |= 1;
+     if (p->used_for_reference)
+     {
+       fs->is_reference |= 1;
+       fs->is_orig_reference |= 1;
+       if (p->is_long_term)
+       {
+         fs->is_long_term |= 1;
+         fs->long_term_frame_idx = p->long_term_frame_idx;
+       }
+     }
+     if (fs->is_used == 3)
+     {
+       // generate frame view
+       dpb_combine_field(fs);
+     } else
+     {
+       fs->poc = p->poc;
+       gen_field_ref_ids(p);
+     }
+     break;
+   case BOTTOM_FIELD:
+     fs->bottom_field = p;
+     fs->is_used |= 2;
+     if (p->used_for_reference)
+     {
+       fs->is_reference |= 2;
+       fs->is_orig_reference |= 2;
+       if (p->is_long_term)
+       {
+         fs->is_long_term |= 2;
+         fs->long_term_frame_idx = p->long_term_frame_idx;
+       }
+     }
+     if (fs->is_used == 3)
+     {
+       // generate frame view
+       dpb_combine_field(fs);
+     } else
+     {
+       fs->poc = p->poc;
+       gen_field_ref_ids(p);
+     }
+     break;
+   }
+   fs->frame_num = p->pic_num;
+   fs->is_output = p->is_output;
+ 
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Check if one of the frames/fields in frame store is used for reference
+  ************************************************************************
+  */
+ static int is_used_for_reference(FrameStore* fs)
+ {
+   if (fs->is_reference)
+   {
+     return 1;
+   }
+   
+   if (fs->is_used == 3) // frame
+   {
+     if (fs->frame->used_for_reference)
+     {
+       return 1;
+     }
+   }
+ 
+   if (fs->is_used & 1) // top field
+   {
+     if (fs->top_field)
+     {
+       if (fs->top_field->used_for_reference)
+       {
+         return 1;
+       }
+     }
+   }
+ 
+   if (fs->is_used & 2) // bottom field
+   {
+     if (fs->bottom_field)
+     {
+       if (fs->bottom_field->used_for_reference)
+       {
+         return 1;
+       }
+     }
+   }
+   return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Check if one of the frames/fields in frame store is used for short-term reference
+  ************************************************************************
+  */
+ static int is_short_term_reference(FrameStore* fs)
+ {
+ 
+   if (fs->is_used==3) // frame
+   {
+     if ((fs->frame->used_for_reference)&&(!fs->frame->is_long_term))
+     {
+       return 1;
+     }
+   }
+ 
+   if (fs->is_used & 1) // top field
+   {
+     if (fs->top_field)
+     {
+       if ((fs->top_field->used_for_reference)&&(!fs->top_field->is_long_term))
+       {
+         return 1;
+       }
+     }
+   }
+ 
+   if (fs->is_used & 2) // bottom field
+   {
+     if (fs->bottom_field)
+     {
+       if ((fs->bottom_field->used_for_reference)&&(!fs->bottom_field->is_long_term))
+       {
+         return 1;
+       }
+     }
+   }
+   return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Check if one of the frames/fields in frame store is used for short-term reference
+  ************************************************************************
+  */
+ static int is_long_term_reference(FrameStore* fs)
+ {
+ 
+   if (fs->is_used==3) // frame
+   {
+     if ((fs->frame->used_for_reference)&&(fs->frame->is_long_term))
+     {
+       return 1;
+     }
+   }
+ 
+   if (fs->is_used & 1) // top field
+   {
+     if (fs->top_field)
+     {
+       if ((fs->top_field->used_for_reference)&&(fs->top_field->is_long_term))
+       {
+         return 1;
+       }
+     }
+   }
+ 
+   if (fs->is_used & 2) // bottom field
+   {
+     if (fs->bottom_field)
+     {
+       if ((fs->bottom_field->used_for_reference)&&(fs->bottom_field->is_long_term))
+       {
+         return 1;
+       }
+     }
+   }
+   return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    remove one frame from DPB
+  ************************************************************************
+  */
+ static void remove_frame_from_dpb(int pos)
+ {
+   FrameStore* fs = dpb.fs[pos];
+   FrameStore* tmp;
+   unsigned i;
+   
+ //  printf ("remove frame with frame_num #%d\n", fs->frame_num);
+   switch (fs->is_used)
+   {
+   case 3:
+     free_storable_picture(fs->frame);
+     free_storable_picture(fs->top_field);
+     free_storable_picture(fs->bottom_field);
+     fs->frame=NULL;
+     fs->top_field=NULL;
+     fs->bottom_field=NULL;
+     break;
+   case 2:
+     free_storable_picture(fs->bottom_field);
+     fs->bottom_field=NULL;
+     break;
+   case 1:
+     free_storable_picture(fs->top_field);
+     fs->top_field=NULL;
+     break;
+   case 0:
+     break;
+   default:
+     error("invalid frame store type",500);
+   }
+   fs->is_used = 0;
+   fs->is_long_term = 0;
+   fs->is_reference = 0;
+   fs->is_orig_reference = 0;
+ 
+   // move empty framestore to end of buffer
+   tmp = dpb.fs[pos];
+ 
+   for (i=pos; i<dpb.used_size-1;i++)
+   {
+     dpb.fs[i] = dpb.fs[i+1];
+   }
+   dpb.fs[dpb.used_size-1] = tmp;
+   dpb.used_size--;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    find smallest POC in the DPB.
+  ************************************************************************
+  */
+ static void get_smallest_poc(int *poc,int * pos)
+ {
+   unsigned i;
+ 
+   if (dpb.used_size<1)
+   {
+     error("Cannot determine smallest POC, DPB empty.",150);
+   }
+ 
+   *pos=-1;
+   *poc = INT_MAX;
+   for (i=0; i<dpb.used_size; i++)
+   {
+     if ((*poc>dpb.fs[i]->poc)&&(!dpb.fs[i]->is_output))
+     {
+       *poc = dpb.fs[i]->poc;
+       *pos=i;
+     }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Remove a picture from DPB which is no longer needed.
+  ************************************************************************
+  */
+ static int remove_unused_frame_from_dpb()
+ {
+   unsigned i;
+ 
+   // check for frames that were already output and no longer used for reference
+   for (i=0; i<dpb.used_size; i++)
+   {
+     if (dpb.fs[i]->is_output && (!is_used_for_reference(dpb.fs[i])))
+     {
+       remove_frame_from_dpb(i);
+       return 1;
+     }
+   }
+   return 0;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Output one picture stored in the DPB.
+  ************************************************************************
+  */
+ static void output_one_frame_from_dpb()
+ {
+   int poc, pos;
+   //diagnostics
+   if (dpb.used_size<1)
+   {
+     error("Cannot output frame, DPB empty.",150);
+   }
+ 
+   // find smallest POC
+   get_smallest_poc(&poc, &pos);
+ 
+   if(pos==-1)
+   {
+     error("no frames for output available", 150);
+   }
+ 
+   // call the output function
+ //  printf ("output frame with frame_num #%d, poc %d (dpb. dpb.size=%d, dpb.used_size=%d)\n", dpb.fs[pos]->frame_num, dpb.fs[pos]->frame->poc, dpb.size, dpb.used_size);
+ 
+   write_stored_frame(dpb.fs[pos], p_dec);
+ 
+   if (dpb.last_output_poc >= poc)
+   {
+     error ("output POC must be in ascending order", 150);
+   } 
+   dpb.last_output_poc = poc;
+   // free frame store and move empty store to end of buffer
+   if (!is_used_for_reference(dpb.fs[pos]))
+   {
+     remove_frame_from_dpb(pos);
+   }
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    All stored picture are output. Should be called to empty the buffer
+  ************************************************************************
+  */
+ void flush_dpb()
+ {
+   unsigned i;
+ 
+   //diagnostics
+ //  printf("Flush remaining frames from dpb. dpb.size=%d, dpb.used_size=%d\n",dpb.size,dpb.used_size);
+ 
+   // mark all frames unused
+   for (i=0; i<dpb.used_size; i++)
+   {
+     unmark_for_reference (dpb.fs[i]);
+   }
+ 
+   while (remove_unused_frame_from_dpb()) ;
+   
+   // output frames in POC order
+   while (dpb.used_size)
+   {
+     output_one_frame_from_dpb();
+   }
+ 
+   dpb.last_output_poc = INT_MIN;
+ }
+ 
+ #define RSD(x) ((x&2)?(x|1):(x&(~1)))
+ 
+ 
+ void gen_field_ref_ids(StorablePicture *p)
+ {
+   int i,j, dummylist0, dummylist1;
+    //! Generate Frame parameters from field information.
+   for (i=0 ; i<p->size_x/4 ; i++)
+   {
+     for (j=0 ; j<p->size_y/4 ; j++)
+     {              
+         dummylist0= p->ref_idx[LIST_0][j][i];
+         dummylist1= p->ref_idx[LIST_1][j][i];
+         //! association with id already known for fields.
+         p->ref_id[LIST_0][j][i] = (dummylist0>=0)? p->ref_pic_num[LIST_0][dummylist0] : 0;
+         p->ref_id[LIST_1][j][i] = (dummylist1>=0)? p->ref_pic_num[LIST_1][dummylist1] : 0;          
+         p->field_frame[j][i]=1;
+     }     
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Extract top field from a frame
+  ************************************************************************
+  */
+ void dpb_split_field(FrameStore *fs)
+ {
+   int i, j, ii, jj, jj4;
+   int idiv,jdiv;
+   int currentmb;
+   int dummylist0,dummylist1;
+   int twosz16 = 2*(fs->frame->size_x/16);
+ 
+   fs->poc = fs->frame->poc;
+ 
+   if (!active_sps->frame_mbs_only_flag)
+   {
+     fs->top_field    = alloc_storable_picture(TOP_FIELD,    fs->frame->size_x, fs->frame->size_y/2, fs->frame->size_x_cr, fs->frame->size_y_cr/2);
+     fs->bottom_field = alloc_storable_picture(BOTTOM_FIELD, fs->frame->size_x, fs->frame->size_y/2, fs->frame->size_x_cr, fs->frame->size_y_cr/2);
+ 
+     for (i=0; i<fs->frame->size_y/2; i++)
+     {
+       memcpy(fs->top_field->imgY[i], fs->frame->imgY[i*2], fs->frame->size_x*sizeof(imgpel));
+     }
+     
+     for (i=0; i<fs->frame->size_y_cr/2; i++)
+     {
+       memcpy(fs->top_field->imgUV[0][i], fs->frame->imgUV[0][i*2], fs->frame->size_x_cr*sizeof(imgpel));
+       memcpy(fs->top_field->imgUV[1][i], fs->frame->imgUV[1][i*2], fs->frame->size_x_cr*sizeof(imgpel));
+     }
+     
+     for (i=0; i<fs->frame->size_y/2; i++)
+     {
+       memcpy(fs->bottom_field->imgY[i], fs->frame->imgY[i*2 + 1], fs->frame->size_x*sizeof(imgpel));
+     }
+     
+     for (i=0; i<fs->frame->size_y_cr/2; i++)
+     {
+       memcpy(fs->bottom_field->imgUV[0][i], fs->frame->imgUV[0][i*2 + 1], fs->frame->size_x_cr*sizeof(imgpel));
+       memcpy(fs->bottom_field->imgUV[1][i], fs->frame->imgUV[1][i*2 + 1], fs->frame->size_x_cr*sizeof(imgpel));
+     }
+     
+     UnifiedOneForthPix(fs->top_field);
+     UnifiedOneForthPix(fs->bottom_field);
+ 
+     fs->top_field->poc = fs->frame->top_poc;
+     fs->bottom_field->poc =  fs->frame->bottom_poc;
+     
+     fs->top_field->frame_poc =  fs->frame->frame_poc;
+     
+     fs->top_field->bottom_poc =fs->bottom_field->bottom_poc =  fs->frame->bottom_poc;
+     fs->top_field->top_poc =fs->bottom_field->top_poc =  fs->frame->top_poc;
+     fs->bottom_field->frame_poc =  fs->frame->frame_poc;
+     
+     fs->top_field->used_for_reference = fs->bottom_field->used_for_reference 
+                                       = fs->frame->used_for_reference;
+     fs->top_field->is_long_term = fs->bottom_field->is_long_term 
+                                 = fs->frame->is_long_term;
+     fs->long_term_frame_idx = fs->top_field->long_term_frame_idx 
+                             = fs->bottom_field->long_term_frame_idx 
+                             = fs->frame->long_term_frame_idx;
+ 
+     fs->top_field->coded_frame = fs->bottom_field->coded_frame = 1;
+     fs->top_field->MbaffFrameFlag = fs->bottom_field->MbaffFrameFlag
+                                   = fs->frame->MbaffFrameFlag;
+ 
+     fs->frame->top_field    = fs->top_field;
+     fs->frame->bottom_field = fs->bottom_field;
+ 
+     fs->top_field->bottom_field = fs->bottom_field;
+     fs->top_field->frame        = fs->frame;
+     fs->bottom_field->top_field = fs->top_field;
+     fs->bottom_field->frame     = fs->frame;
+   
+     fs->top_field->chroma_format_idc = fs->bottom_field->chroma_format_idc = fs->frame->chroma_format_idc;
+  
+     //store reference picture index
+     memcpy(fs->top_field->ref_pic_num[LIST_1]   , fs->frame->ref_pic_num[2 + LIST_1], 2*listXsize[LIST_1] * sizeof(int64));
+     memcpy(fs->bottom_field->ref_pic_num[LIST_1], fs->frame->ref_pic_num[4 + LIST_1], 2*listXsize[LIST_1] * sizeof(int64));
+     memcpy(fs->top_field->ref_pic_num[LIST_0]   , fs->frame->ref_pic_num[2 + LIST_0], 2*listXsize[LIST_0] * sizeof(int64));
+     memcpy(fs->bottom_field->ref_pic_num[LIST_0], fs->frame->ref_pic_num[4 + LIST_0], 2*listXsize[LIST_0] * sizeof(int64));
+     
+   }
+   else
+   {
+     fs->top_field=NULL;
+     fs->bottom_field=NULL;
+     fs->frame->top_field=NULL;
+     fs->frame->bottom_field=NULL;
+   }
+   
+   for (j=0 ; j<fs->frame->size_y/4 ; j++)      
+   {           
+     jdiv=j/4;
+     for (i=0 ; i<fs->frame->size_x/4 ; i++)          
+     {   
+       idiv=i/4;
+       currentmb = twosz16*(jdiv/2)+ (idiv)*2 + (jdiv%2);
+ 
+       if (fs->frame->MbaffFrameFlag  && fs->frame->mb_field[currentmb])
+       {
+         int list_offset = currentmb%2? 4: 2;
+         dummylist0 = fs->frame->ref_idx[LIST_0][j][i];
+         dummylist1 = fs->frame->ref_idx[LIST_1][j][i];        
+         //! association with id already known for fields.
+         fs->frame->ref_id[LIST_0 + list_offset][j][i] = (dummylist0>=0)? fs->frame->ref_pic_num[LIST_0 + list_offset][dummylist0] : 0;
+         fs->frame->ref_id[LIST_1 + list_offset][j][i] = (dummylist1>=0)? fs->frame->ref_pic_num[LIST_1 + list_offset][dummylist1] : 0;          
+         //! need to make association with frames
+         fs->frame->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->frame->frm_ref_pic_num[LIST_0 + list_offset][dummylist0] : 0;
+         fs->frame->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->frame->frm_ref_pic_num[LIST_1 + list_offset][dummylist1] : 0;                   
+         
+       }
+       else
+       {
+         dummylist0 = fs->frame->ref_idx[LIST_0][j][i];
+         dummylist1 = fs->frame->ref_idx[LIST_1][j][i];        
+         fs->frame->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->frame->ref_pic_num[LIST_0][dummylist0] : -1;
+         fs->frame->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->frame->ref_pic_num[LIST_1][dummylist1] : -1;    
+       }
+     }      
+   }
+   
+   if (!active_sps->frame_mbs_only_flag && fs->frame->MbaffFrameFlag)
+   {    
+     for (j=0 ; j<fs->frame->size_y/8; j++)      
+     { 
+       jj = (j/4)*8 + j%4;
+       jj4 = jj + 4;
+       jdiv=j/2;
+       for (i=0 ; i<fs->frame->size_x/4 ; i++)
+       {                
+         idiv=i/4;
+         
+         currentmb = twosz16*(jdiv/2)+ (idiv)*2 + (jdiv%2);
+         // Assign field mvs attached to MB-Frame buffer to the proper buffer
+         if (fs->frame->mb_field[currentmb])
+         {
+           fs->bottom_field->field_frame[j][i] = fs->top_field->field_frame[j][i]=1;
+           fs->frame->field_frame[2*j][i] = fs->frame->field_frame[2*j+1][i]=1;
+           
+           fs->bottom_field->mv[LIST_0][j][i][0] = fs->frame->mv[LIST_0][jj4][i][0];
+           fs->bottom_field->mv[LIST_0][j][i][1] = fs->frame->mv[LIST_0][jj4][i][1];
+           fs->bottom_field->mv[LIST_1][j][i][0] = fs->frame->mv[LIST_1][jj4][i][0];
+           fs->bottom_field->mv[LIST_1][j][i][1] = fs->frame->mv[LIST_1][jj4][i][1];
+           fs->bottom_field->ref_idx[LIST_0][j][i] = fs->frame->ref_idx[LIST_0][jj4][i];
+           fs->bottom_field->ref_idx[LIST_1][j][i] = fs->frame->ref_idx[LIST_1][jj4][i];
+           fs->bottom_field->ref_id[LIST_0][j][i] = fs->frame->ref_id[LIST_0+4][jj4][i];
+           fs->bottom_field->ref_id[LIST_1][j][i] = fs->frame->ref_id[LIST_1+4][jj4][i];
+           
+           
+           fs->top_field->mv[LIST_0][j][i][0] = fs->frame->mv[LIST_0][jj][i][0];
+           fs->top_field->mv[LIST_0][j][i][1] = fs->frame->mv[LIST_0][jj][i][1];
+           fs->top_field->mv[LIST_1][j][i][0] = fs->frame->mv[LIST_1][jj][i][0];
+           fs->top_field->mv[LIST_1][j][i][1] = fs->frame->mv[LIST_1][jj][i][1];
+           fs->top_field->ref_idx[LIST_0][j][i] = fs->frame->ref_idx[LIST_0][jj][i];
+           fs->top_field->ref_idx[LIST_1][j][i] = fs->frame->ref_idx[LIST_1][jj][i];
+           fs->top_field->ref_id[LIST_0][j][i] = fs->frame->ref_id[LIST_0+2][jj][i];
+           fs->top_field->ref_id[LIST_1][j][i] = fs->frame->ref_id[LIST_1+2][jj][i];          
+         }
+       }
+     }             
+   }
+     
+   //! Generate field MVs from Frame MVs
+   if (!active_sps->frame_mbs_only_flag)
+   {
+     for (j=0 ; j<fs->frame->size_y/8 ; j++)
+     {
+       jj = 2* RSD(j);
+       jdiv = j/2;
+       for (i=0 ; i<fs->frame->size_x/4 ; i++)
+       {
+         ii = RSD(i);
+         idiv = i/4;
+         
+         currentmb = twosz16*(jdiv/2)+ (idiv)*2 + (jdiv%2);        
+         
+         if (!fs->frame->MbaffFrameFlag  || !fs->frame->mb_field[currentmb])    
+         {
+           fs->frame->field_frame[2*j+1][i] = fs->frame->field_frame[2*j][i]=0;
+           
+           fs->top_field->field_frame[j][i] = fs->bottom_field->field_frame[j][i] = 0;
+           
+           fs->top_field->mv[LIST_0][j][i][0] = fs->bottom_field->mv[LIST_0][j][i][0] = fs->frame->mv[LIST_0][jj][ii][0];
+           fs->top_field->mv[LIST_0][j][i][1] = fs->bottom_field->mv[LIST_0][j][i][1] = fs->frame->mv[LIST_0][jj][ii][1];
+           fs->top_field->mv[LIST_1][j][i][0] = fs->bottom_field->mv[LIST_1][j][i][0] = fs->frame->mv[LIST_1][jj][ii][0];
+           fs->top_field->mv[LIST_1][j][i][1] = fs->bottom_field->mv[LIST_1][j][i][1] = fs->frame->mv[LIST_1][jj][ii][1];
+           
+           // Scaling of references is done here since it will not affect spatial direct (2*0 =0)
+           if (fs->frame->ref_idx[LIST_0][jj][ii] == -1)      
+             fs->top_field->ref_idx[LIST_0][j][i] = fs->bottom_field->ref_idx[LIST_0][j][i] = - 1;
+           else
+           {
+             dummylist0=fs->top_field->ref_idx[LIST_0][j][i] = fs->bottom_field->ref_idx[LIST_0][j][i] = fs->frame->ref_idx[LIST_0][jj][ii];
+             fs->top_field   ->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->frame->top_ref_pic_num[LIST_0][dummylist0] : 0;
+             fs->bottom_field->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->frame->bottom_ref_pic_num[LIST_0][dummylist0] : 0;
+           }
+           
+           if (fs->frame->ref_idx[LIST_1][jj][ii] == -1)      
+             fs->top_field->ref_idx[LIST_1][j][i] = fs->bottom_field->ref_idx[LIST_1][j][i] = - 1;
+           else
+           {
+             dummylist1=fs->top_field->ref_idx[LIST_1][j][i] = fs->bottom_field->ref_idx[LIST_1][j][i] = fs->frame->ref_idx[LIST_1][jj][ii];
+             
+             fs->top_field   ->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->frame->top_ref_pic_num[LIST_1][dummylist1] : 0;
+             fs->bottom_field->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->frame->bottom_ref_pic_num[LIST_1][dummylist1] : 0;
+           }
+         }
+         else
+         {
+           fs->frame->field_frame[2*j+1][i] = fs->frame->field_frame[2*j][i]= fs->frame->mb_field[currentmb];
+         }
+       }
+     }
+   }
+   else
+   {    
+     memset( &(fs->frame->field_frame[0][0]), 0, fs->frame->size_y * fs->frame->size_x /16 * sizeof(byte));
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Generate a frame from top and bottom fields,
+  *    YUV components and display information only
+  ************************************************************************
+  */
+ void dpb_combine_field_yuv(FrameStore *fs)
+ {
+   int i;
+ 
+   fs->frame = alloc_storable_picture(FRAME, fs->top_field->size_x, fs->top_field->size_y*2, fs->top_field->size_x_cr, fs->top_field->size_y_cr*2);
+ 
+   for (i=0; i<fs->top_field->size_y; i++)
+   {
+     memcpy(fs->frame->imgY[i*2],     fs->top_field->imgY[i]   , fs->top_field->size_x*sizeof(imgpel));     // top field
+     memcpy(fs->frame->imgY[i*2 + 1], fs->bottom_field->imgY[i], fs->bottom_field->size_x*sizeof(imgpel)); // bottom field
+   }
+ 
+   for (i=0; i<fs->top_field->size_y_cr; i++)
+   {
+     memcpy(fs->frame->imgUV[0][i*2],     fs->top_field->imgUV[0][i],    fs->top_field->size_x_cr*sizeof(imgpel));
+     memcpy(fs->frame->imgUV[0][i*2 + 1], fs->bottom_field->imgUV[0][i], fs->bottom_field->size_x_cr*sizeof(imgpel));
+     memcpy(fs->frame->imgUV[1][i*2],     fs->top_field->imgUV[1][i],    fs->top_field->size_x_cr*sizeof(imgpel));
+     memcpy(fs->frame->imgUV[1][i*2 + 1], fs->bottom_field->imgUV[1][i], fs->bottom_field->size_x_cr*sizeof(imgpel));
+   }
+   
+   fs->poc=fs->frame->poc =fs->frame->frame_poc = min (fs->top_field->poc, fs->bottom_field->poc);
+ 
+   fs->bottom_field->frame_poc=fs->top_field->frame_poc=fs->frame->poc;
+ 
+   fs->bottom_field->top_poc=fs->frame->top_poc=fs->top_field->poc;
+   fs->top_field->bottom_poc=fs->frame->bottom_poc=fs->bottom_field->poc;
+ 
+   fs->frame->used_for_reference = (fs->top_field->used_for_reference && fs->bottom_field->used_for_reference );
+   fs->frame->is_long_term = (fs->top_field->is_long_term && fs->bottom_field->is_long_term );
+ 
+   if (fs->frame->is_long_term) 
+     fs->frame->long_term_frame_idx = fs->long_term_frame_idx;
+ 
+   fs->frame->top_field    = fs->top_field;
+   fs->frame->bottom_field = fs->bottom_field;
+   
+   fs->frame->coded_frame = 0;
+ 
+   fs->frame->chroma_format_idc = fs->top_field->chroma_format_idc;
+   fs->frame->frame_cropping_flag = fs->top_field->frame_cropping_flag;
+   if (fs->frame->frame_cropping_flag)
+   {
+     fs->frame->frame_cropping_rect_top_offset = fs->top_field->frame_cropping_rect_top_offset;
+     fs->frame->frame_cropping_rect_bottom_offset = fs->top_field->frame_cropping_rect_bottom_offset;
+     fs->frame->frame_cropping_rect_left_offset = fs->top_field->frame_cropping_rect_left_offset;
+     fs->frame->frame_cropping_rect_right_offset = fs->top_field->frame_cropping_rect_right_offset;
+   }
+   
+   fs->top_field->frame = fs->bottom_field->frame = fs->frame;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Generate a frame from top and bottom fields
+  ************************************************************************
+  */
+ void dpb_combine_field(FrameStore *fs)
+ {
+   int i,j, jj, jj4;
+   int dummylist0, dummylist1;
+ 
+   dpb_combine_field_yuv(fs);
+ 
+   UnifiedOneForthPix(fs->frame);
+ 
+   //combine field for frame
+   for (i=0;i<(listXsize[LIST_1]+1)/2;i++)
+   {
+     fs->frame->ref_pic_num[LIST_1][i]=   min ((fs->top_field->ref_pic_num[LIST_1][2*i]/2)*2, (fs->bottom_field->ref_pic_num[LIST_1][2*i]/2)*2);
+   }
+ 
+   for (i=0;i<(listXsize[LIST_0]+1)/2;i++)
+   {
+     fs->frame->ref_pic_num[LIST_0][i]=   min ((fs->top_field->ref_pic_num[LIST_0][2*i]/2)*2, (fs->bottom_field->ref_pic_num[LIST_0][2*i]/2)*2);
+   }
+   
+    //! Use inference flag to remap mvs/references 
+ 
+   //! Generate Frame parameters from field information.
+   for (j=0 ; j<fs->top_field->size_y/4 ; j++)
+   {
+     jj = 8*(j/4) + (j%4);
+     jj4 = jj + 4;
+     for (i=0 ; i<fs->top_field->size_x/4 ; i++)
+     {
+       fs->frame->field_frame[jj][i]= fs->frame->field_frame[jj4][i]=1;
+       
+       fs->frame->mv[LIST_0][jj][i][0] = fs->top_field->mv[LIST_0][j][i][0];
+       fs->frame->mv[LIST_0][jj][i][1] = fs->top_field->mv[LIST_0][j][i][1] ;
+       fs->frame->mv[LIST_1][jj][i][0] = fs->top_field->mv[LIST_1][j][i][0];
+       fs->frame->mv[LIST_1][jj][i][1] = fs->top_field->mv[LIST_1][j][i][1] ; 
+       
+       dummylist0=fs->frame->ref_idx[LIST_0][jj][i]  = fs->top_field->ref_idx[LIST_0][j][i];
+       dummylist1=fs->frame->ref_idx[LIST_1][jj][i]  = fs->top_field->ref_idx[LIST_1][j][i];
+       
+       //! association with id already known for fields.
+       fs->top_field->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->top_field->ref_pic_num[LIST_0][dummylist0] : 0;
+       fs->top_field->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->top_field->ref_pic_num[LIST_1][dummylist1] : 0;          
+       
+       //! need to make association with frames
+       fs->frame->ref_id[LIST_0][jj][i] = (dummylist0>=0)? fs->top_field->frm_ref_pic_num[LIST_0][dummylist0] : 0;
+       fs->frame->ref_id[LIST_1][jj][i] = (dummylist1>=0)? fs->top_field->frm_ref_pic_num[LIST_1][dummylist1] : 0;          
+       
+       fs->frame->mv[LIST_0][jj4][i][0] = fs->bottom_field->mv[LIST_0][j][i][0];
+       fs->frame->mv[LIST_0][jj4][i][1] = fs->bottom_field->mv[LIST_0][j][i][1] ;
+       fs->frame->mv[LIST_1][jj4][i][0] = fs->bottom_field->mv[LIST_1][j][i][0];
+       fs->frame->mv[LIST_1][jj4][i][1] = fs->bottom_field->mv[LIST_1][j][i][1] ; 
+       
+       dummylist0=fs->frame->ref_idx[LIST_0][jj4][i]  = fs->bottom_field->ref_idx[LIST_0][j][i];
+       dummylist1=fs->frame->ref_idx[LIST_1][jj4][i]  = fs->bottom_field->ref_idx[LIST_1][j][i];
+       
+       fs->bottom_field->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->bottom_field->ref_pic_num[LIST_0][dummylist0] : 0;
+       fs->bottom_field->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->bottom_field->ref_pic_num[LIST_1][dummylist1] : 0;          
+       
+       //! need to make association with frames
+       fs->frame->ref_id[LIST_0][jj4][i] = (dummylist0>=0)? fs->bottom_field->frm_ref_pic_num[LIST_0][dummylist0] : -1;
+       fs->frame->ref_id[LIST_1][jj4][i] = (dummylist1>=0)? fs->bottom_field->frm_ref_pic_num[LIST_1][dummylist1] : -1;          
+ 
+       fs->top_field->field_frame[j][i]=1;
+       fs->bottom_field->field_frame[j][i]=1;      
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate memory for buffering of reference picture reordering commands
+  ************************************************************************
+  */
+ void alloc_ref_pic_list_reordering_buffer(Slice *currSlice)
+ {
+   int size = img->num_ref_idx_l0_active+1;
+ 
+   if (img->type!=I_SLICE && img->type!=SI_SLICE)
+   {
+     if ((currSlice->reordering_of_pic_nums_idc_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: remapping_of_pic_nums_idc_l0");
+     if ((currSlice->abs_diff_pic_num_minus1_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: abs_diff_pic_num_minus1_l0");
+     if ((currSlice->long_term_pic_idx_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: long_term_pic_idx_l0");
+   }
+   else
+   {
+     currSlice->reordering_of_pic_nums_idc_l0 = NULL;
+     currSlice->abs_diff_pic_num_minus1_l0 = NULL;
+     currSlice->long_term_pic_idx_l0 = NULL;
+   }
+   
+   size = img->num_ref_idx_l1_active+1;
+ 
+   if (img->type==B_SLICE)
+   {
+     if ((currSlice->reordering_of_pic_nums_idc_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: remapping_of_pic_nums_idc_l1");
+     if ((currSlice->abs_diff_pic_num_minus1_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: abs_diff_pic_num_minus1_l1");
+     if ((currSlice->long_term_pic_idx_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: long_term_pic_idx_l1");
+   }
+   else
+   {
+     currSlice->reordering_of_pic_nums_idc_l1 = NULL;
+     currSlice->abs_diff_pic_num_minus1_l1 = NULL;
+     currSlice->long_term_pic_idx_l1 = NULL;
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free memory for buffering of reference picture reordering commands
+  ************************************************************************
+  */
+ void free_ref_pic_list_reordering_buffer(Slice *currSlice)
+ {
+ 
+   if (currSlice->reordering_of_pic_nums_idc_l0) 
+     free(currSlice->reordering_of_pic_nums_idc_l0);
+   if (currSlice->abs_diff_pic_num_minus1_l0)
+     free(currSlice->abs_diff_pic_num_minus1_l0);
+   if (currSlice->long_term_pic_idx_l0)
+     free(currSlice->long_term_pic_idx_l0);
+ 
+   currSlice->reordering_of_pic_nums_idc_l0 = NULL;
+   currSlice->abs_diff_pic_num_minus1_l0 = NULL;
+   currSlice->long_term_pic_idx_l0 = NULL;
+   
+   if (currSlice->reordering_of_pic_nums_idc_l1)
+     free(currSlice->reordering_of_pic_nums_idc_l1);
+   if (currSlice->abs_diff_pic_num_minus1_l1)
+     free(currSlice->abs_diff_pic_num_minus1_l1);
+   if (currSlice->long_term_pic_idx_l1)
+     free(currSlice->long_term_pic_idx_l1);
+   
+   currSlice->reordering_of_pic_nums_idc_l1 = NULL;
+   currSlice->abs_diff_pic_num_minus1_l1 = NULL;
+   currSlice->long_term_pic_idx_l1 = NULL;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *      Tian Dong
+  *          June 13, 2002, Modifed on July 30, 2003
+  *
+  *      If a gap in frame_num is found, try to fill the gap
+  * \param img
+  *      
+  ************************************************************************
+  */
+ void fill_frame_num_gap(ImageParameters *img)
+ {
+   int CurrFrameNum;
+   int UnusedShortTermFrameNum;
+   StorablePicture *picture = NULL;
+   int nal_ref_idc_bak;
+   int MaxFrameNum = 1 << (log2_max_frame_num_minus4 + 4);
+ 
+ //  printf("A gap in frame number is found, try to fill it.\n");
+ 
+   nal_ref_idc_bak = img->nal_reference_idc;
+   img->nal_reference_idc = 1;
+ 
+   UnusedShortTermFrameNum = (img->pre_frame_num + 1) % MaxFrameNum;
+   CurrFrameNum = img->frame_num;
+ 
+   while (CurrFrameNum != UnusedShortTermFrameNum)
+   {
+     picture = alloc_storable_picture (FRAME, img->width, img->height, img->width_cr, img->height_cr);
+     picture->coded_frame = 1;
+     picture->pic_num = UnusedShortTermFrameNum;
+     picture->non_existing = 1;
+     picture->is_output = 1;
+     
+     img->adaptive_ref_pic_buffering_flag = 0;
+ 
+     store_picture_in_dpb(picture);
+ 
+     picture=NULL;
+     UnusedShortTermFrameNum = (UnusedShortTermFrameNum + 1) % MaxFrameNum;
+   }
+ 
+   img->nal_reference_idc = nal_ref_idc_bak;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate co-located memory 
+  *
+  * \param size_x
+  *    horizontal luma size
+  * \param size_y
+  *    vertical luma size
+  * \param mb_adaptive_frame_field_flag
+  *    flag that indicates macroblock adaptive frame/field coding
+  *
+  * \return
+  *    the allocated StorablePicture structure
+  ************************************************************************
+  */
+ ColocatedParams* alloc_colocated(int size_x, int size_y, int mb_adaptive_frame_field_flag)
+ {
+   ColocatedParams *s;
+ 
+   s = calloc(1, sizeof(ColocatedParams)); 
+   if (NULL == s)
+     no_mem_exit("alloc_colocated: s");
+ 
+   s->size_x = size_x;
+   s->size_y = size_y;
+ 
+ 
+   get_mem3D      ((byte****)(&(s->ref_idx))   , 2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+   get_mem3Dint64 (&(s->ref_pic_id), 2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+   get_mem4Dshort (&(s->mv)        , 2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE,2 );
+ 
+   get_mem2D      (&(s->moving_block),  size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+   get_mem2D      (&(s->field_frame) , size_y / BLOCK_SIZE, size_x / BLOCK_SIZE);
+ 
+   if (mb_adaptive_frame_field_flag)
+   {
+     get_mem3D      ((byte****)(&(s->top_ref_idx))   , 2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE);
+     get_mem3Dint64 (&(s->top_ref_pic_id),             2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE);
+     get_mem4Dshort (&(s->top_mv),                     2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE, 2);
+     get_mem2D (&(s->top_moving_block),                   size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE);
+     
+     get_mem3D      ((byte****)(&(s->bottom_ref_idx)), 2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE);
+     get_mem3Dint64 (&(s->bottom_ref_pic_id),          2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE);
+     get_mem4Dshort (&(s->bottom_mv),                  2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE, 2);
+     get_mem2D (&(s->bottom_moving_block),                size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE);
+   }
+ 
+   s->mb_adaptive_frame_field_flag  = mb_adaptive_frame_field_flag;
+ 
+   return s;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free co-located memory.
+  *
+  * \param p
+  *    Picture to be freed
+  *
+  ************************************************************************
+  */
+ void free_colocated(ColocatedParams* p)
+ {
+   if (p)
+   {
+     free_mem3D      ((byte***)p->ref_idx, 2);
+     free_mem3Dint64 (p->ref_pic_id, 2);
+     free_mem4Dshort (p->mv, 2, p->size_y / BLOCK_SIZE);
+ 
+     if (p->moving_block)
+     {
+       free_mem2D (p->moving_block);
+       p->moving_block=NULL;
+     }
+     if (p->field_frame)
+     {
+       free_mem2D (p->field_frame);
+       p->field_frame=NULL;
+     }
+     
+     if (p->mb_adaptive_frame_field_flag)
+     {
+       free_mem3D      ((byte***)p->top_ref_idx, 2);
+       free_mem3Dint64 (p->top_ref_pic_id, 2);
+       free_mem4Dshort (p->top_mv, 2, p->size_y / BLOCK_SIZE / 2);
+       
+       
+       if (p->top_moving_block)
+       {
+         free_mem2D (p->top_moving_block);
+         p->top_moving_block=NULL;
+       }
+       
+       free_mem3D      ((byte***)p->bottom_ref_idx, 2);
+       free_mem3Dint64 (p->bottom_ref_pic_id, 2);
+       free_mem4Dshort (p->bottom_mv, 2, p->size_y / BLOCK_SIZE / 2);
+       
+       
+       if (p->bottom_moving_block)
+       {
+         free_mem2D (p->bottom_moving_block);
+         p->bottom_moving_block=NULL;
+       }    
+       
+     }
+ 
+     free(p);
+ 
+     p=NULL;
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Compute co-located motion info
+  *
+  ************************************************************************
+  */
+ 
+ void compute_colocated(ColocatedParams* p, StorablePicture **listX[6])
+ {
+   StorablePicture *fs, *fs_top, *fs_bottom;
+   int i,j, ii, jj, jdiv;
+ 
+   fs_top=fs_bottom=fs = listX[LIST_1 ][0];
+ 
+   if (img->MbaffFrameFlag)
+   {
+     fs_top= listX[LIST_1 + 2][0];
+     fs_bottom= listX[LIST_1 + 4][0];
+   }
+   else
+   {
+     if (img->structure!=FRAME)
+     {
+       if ((img->structure != fs->structure) && (fs->coded_frame))
+       {
+         if (img->structure==TOP_FIELD)
+         {
+           fs_top=fs_bottom=fs = listX[LIST_1 ][0]->top_field;
+         }
+         else
+         {
+           fs_top=fs_bottom=fs = listX[LIST_1 ][0]->bottom_field;
+         }
+       }
+     }
+   }
+   
+   if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)      
+   {
+     for (j=0 ; j<fs->size_y/4 ; j++)      
+     {
+       jdiv = j/2;
+       jj = j/2 + 4 * (j/8);
+       for (i=0 ; i<fs->size_x/4 ; i++)          
+       {                
+ 
+         if (img->MbaffFrameFlag && fs->field_frame[j][i])
+         { 
+           //! Assign frame buffers for field MBs   
+           //! Check whether we should use top or bottom field mvs.
+           //! Depending on the assigned poc values.          
+           
+           if (abs(enc_picture->poc - fs_bottom->poc) > abs(enc_picture->poc - fs_top->poc) )
+           {
+             p->mv[LIST_0][j][i][0]    = fs_top->mv[LIST_0][jdiv][i][0];
+             p->mv[LIST_0][j][i][1]    = fs_top->mv[LIST_0][jdiv][i][1] ;          
+             p->mv[LIST_1][j][i][0]    = fs_top->mv[LIST_1][jdiv][i][0];
+             p->mv[LIST_1][j][i][1]    = fs_top->mv[LIST_1][jdiv][i][1] ;           
+             p->ref_idx[LIST_0][j][i]  = fs_top->ref_idx[LIST_0][jdiv][i];         
+             p->ref_idx[LIST_1][j][i]  = fs_top->ref_idx[LIST_1][jdiv][i];  
+             p->ref_pic_id[LIST_0][j][i]   = fs->ref_id[LIST_0][jj][i];
+             p->ref_pic_id[LIST_1][j][i]   = fs->ref_id[LIST_1][jj][i];
+             
+             p->is_long_term             = fs_top->is_long_term;
+           }
+           else
+           {
+             p->mv[LIST_0][j][i][0]      = fs_bottom->mv[LIST_0][jdiv][i][0];
+             p->mv[LIST_0][j][i][1]      = fs_bottom->mv[LIST_0][jdiv][i][1] ;          
+             p->mv[LIST_1][j][i][0]      = fs_bottom->mv[LIST_1][jdiv][i][0];
+             p->mv[LIST_1][j][i][1]      = fs_bottom->mv[LIST_1][jdiv][i][1] ;           
+             p->ref_idx[LIST_0][j][i]    = fs_bottom->ref_idx[LIST_0][jdiv][i];
+             p->ref_idx[LIST_1][j][i]    = fs_bottom->ref_idx[LIST_1][jdiv][i];
+             p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][jj + 4][i];
+             p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][jj + 4][i];
+             
+             p->is_long_term             = fs_bottom->is_long_term;
+           }          
+         }
+         else
+         {
+           p->mv[LIST_0][j][i][0]      = fs->mv[LIST_0][j][i][0];
+           p->mv[LIST_0][j][i][1]      = fs->mv[LIST_0][j][i][1] ;          
+           p->mv[LIST_1][j][i][0]      = fs->mv[LIST_1][j][i][0];
+           p->mv[LIST_1][j][i][1]      = fs->mv[LIST_1][j][i][1] ;           
+           p->ref_idx[LIST_0][j][i]    = fs->ref_idx[LIST_0][j][i];         
+           p->ref_idx[LIST_1][j][i]    = fs->ref_idx[LIST_1][j][i];                   
+           p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][j][i];
+           p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][j][i];
+           
+           p->is_long_term             = fs->is_long_term;
+         }
+       }      
+     }
+   }    
+         
+ 
+   //! Generate field MVs from Frame MVs
+   if (img->structure || img->MbaffFrameFlag)
+   {    
+     for (j=0 ; j<fs->size_y/8 ; j++)
+     {       
+       jj = RSD(j);
+       for (i=0 ; i<fs->size_x/4 ; i++)
+       {
+         ii = RSD(i);
+         //! Do nothing if macroblock as field coded in MB-AFF        
+         if (!img->MbaffFrameFlag )
+         {
+           p->mv[LIST_0][j][i][0] = fs->mv[LIST_0][jj][ii][0];
+           p->mv[LIST_0][j][i][1] = fs->mv[LIST_0][jj][ii][1];
+           p->mv[LIST_1][j][i][0] = fs->mv[LIST_1][jj][ii][0];
+           p->mv[LIST_1][j][i][1] = fs->mv[LIST_1][jj][ii][1];
+           
+           // Scaling of references is done here since it will not affect spatial direct (2*0 =0)
+ 
+           if (fs->ref_idx[LIST_0][jj][ii] == -1)      
+           {
+             p->ref_idx   [LIST_0][j][i] = -1;
+             p->ref_pic_id[LIST_0][j][i] = -1;
+           }
+           else
+           {
+             p->ref_idx   [LIST_0][j][i] = fs->ref_idx[LIST_0][jj][ii] ;
+             p->ref_pic_id[LIST_0][j][i] = fs->ref_id [LIST_0][jj][ii];
+           }
+           
+           if (fs->ref_idx[LIST_1][jj][ii] == -1)      
+           {
+             p->ref_idx   [LIST_1][j][i] = -1;
+             p->ref_pic_id[LIST_1][j][i] = -1;
+           }
+           else
+           {
+             p->ref_idx   [LIST_1][j][i] = fs->ref_idx[LIST_1][jj][ii];
+             p->ref_pic_id[LIST_1][j][i] = fs->ref_id [LIST_1][jj][ii];
+           }
+           
+           p->is_long_term = fs->is_long_term;
+ 
+           if (img->direct_spatial_mv_pred_flag == 1)
+           {
+             p->moving_block[j][i] = 
+               !((!p->is_long_term 
+               && ((p->ref_idx[LIST_0][j][i] == 0) 
+               &&  (abs(p->mv[LIST_0][j][i][0])>>1 == 0) 
+               &&  (abs(p->mv[LIST_0][j][i][1])>>1 == 0))) 
+               || ((p->ref_idx[LIST_0][j][i] == -1) 
+               &&  (p->ref_idx[LIST_1][j][i] == 0) 
+               &&  (abs(p->mv[LIST_1][j][i][0])>>1 == 0) 
+               &&  (abs(p->mv[LIST_1][j][i][1])>>1 == 0)));
+           }
+         }
+         else
+         {
+           p->bottom_mv[LIST_0][j][i][0] = fs_bottom->mv[LIST_0][jj][ii][0];
+           p->bottom_mv[LIST_0][j][i][1] = fs_bottom->mv[LIST_0][jj][ii][1];
+           p->bottom_mv[LIST_1][j][i][0] = fs_bottom->mv[LIST_1][jj][ii][0];
+           p->bottom_mv[LIST_1][j][i][1] = fs_bottom->mv[LIST_1][jj][ii][1];
+           p->bottom_ref_idx[LIST_0][j][i] = fs_bottom->ref_idx[LIST_0][jj][ii]; 
+           p->bottom_ref_idx[LIST_1][j][i] = fs_bottom->ref_idx[LIST_1][jj][ii]; 
+           p->bottom_ref_pic_id[LIST_0][j][i] = fs_bottom->ref_id[LIST_0][jj][ii];
+           p->bottom_ref_pic_id[LIST_1][j][i] = fs_bottom->ref_id[LIST_1][jj][ii];
+ 
+           if (img->direct_spatial_mv_pred_flag == 1)
+           {
+             p->bottom_moving_block[j][i] = 
+               !((!fs_bottom->is_long_term 
+               && ((p->bottom_ref_idx[LIST_0][j][i] == 0) 
+               &&  (abs(p->bottom_mv[LIST_0][j][i][0])>>1 == 0) 
+               &&  (abs(p->bottom_mv[LIST_0][j][i][1])>>1 == 0))) 
+               || ((p->bottom_ref_idx[LIST_0][j][i] == -1) 
+               &&  (p->bottom_ref_idx[LIST_1][j][i] == 0) 
+               &&  (abs(p->bottom_mv[LIST_1][j][i][0])>>1 == 0) 
+               &&  (abs(p->bottom_mv[LIST_1][j][i][1])>>1 == 0)));
+           }
+ 
+           p->top_mv[LIST_0][j][i][0] = fs_top->mv[LIST_0][jj][ii][0];
+           p->top_mv[LIST_0][j][i][1] = fs_top->mv[LIST_0][jj][ii][1];
+           p->top_mv[LIST_1][j][i][0] = fs_top->mv[LIST_1][jj][ii][0];
+           p->top_mv[LIST_1][j][i][1] = fs_top->mv[LIST_1][jj][ii][1];
+           p->top_ref_idx[LIST_0][j][i] = fs_top->ref_idx[LIST_0][jj][ii]; 
+           p->top_ref_idx[LIST_1][j][i] = fs_top->ref_idx[LIST_1][jj][ii]; 
+           p->top_ref_pic_id[LIST_0][j][i] = fs_top->ref_id[LIST_0][jj][ii];
+           p->top_ref_pic_id[LIST_1][j][i] = fs_top->ref_id[LIST_1][jj][ii]; 
+ 
+           if (img->direct_spatial_mv_pred_flag == 1)
+           {
+             p->top_moving_block[j][i] = 
+               !((!fs_top->is_long_term 
+               && ((p->top_ref_idx[LIST_0][j][i] == 0) 
+               &&  (abs(p->top_mv[LIST_0][j][i][0])>>1 == 0) 
+               &&  (abs(p->top_mv[LIST_0][j][i][1])>>1 == 0))) 
+               || ((p->top_ref_idx[LIST_0][j][i] == -1) 
+               &&  (p->top_ref_idx[LIST_1][j][i] == 0) 
+               &&  (abs(p->top_mv[LIST_1][j][i][0])>>1 == 0) 
+               &&  (abs(p->top_mv[LIST_1][j][i][1])>>1 == 0)));
+           }
+ 
+           if ((img->direct_spatial_mv_pred_flag == 0 ) && !fs->field_frame[2*j][i])
+           {
+             p->top_mv[LIST_0][j][i][1] /= 2;        
+             p->top_mv[LIST_1][j][i][1] /= 2;
+             p->bottom_mv[LIST_0][j][i][1] /= 2;        
+             p->bottom_mv[LIST_1][j][i][1] /= 2;
+           }
+ 
+         }
+       }
+     }
+   }
+ 
+   
+   if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)      
+   {       
+     //! Use inference flag to remap mvs/references
+     //! Frame with field co-located
+     
+     if (!img->structure)
+     {
+       for (j=0 ; j<fs->size_y/4 ; j++)      
+       {                
+         jdiv = j/2;
+         jj = j/2 + 4*(j/8);
+         for (i=0 ; i<fs->size_x/4 ; i++)          
+         {                
+           
+           if (fs->field_frame[j][i])
+           {
+             if (abs(enc_picture->poc - fs->bottom_field->poc) > abs(enc_picture->poc - fs->top_field->poc))
+             {
+               p->mv[LIST_0][j][i][0] = fs->top_field->mv[LIST_0][jdiv][i][0];
+               p->mv[LIST_0][j][i][1] = fs->top_field->mv[LIST_0][jdiv][i][1] ;
+               p->mv[LIST_1][j][i][0] = fs->top_field->mv[LIST_1][jdiv][i][0];
+               p->mv[LIST_1][j][i][1] = fs->top_field->mv[LIST_1][jdiv][i][1] ; 
+               
+               p->ref_idx[LIST_0][j][i]  = fs->top_field->ref_idx[LIST_0][jdiv][i];
+               p->ref_idx[LIST_1][j][i]  = fs->top_field->ref_idx[LIST_1][jdiv][i];
+               p->ref_pic_id[LIST_0][j][i]   = fs->ref_id[LIST_0][jj][i];
+               p->ref_pic_id[LIST_1][j][i]   = fs->ref_id[LIST_1][jj][i];                     
+               p->is_long_term               = fs->top_field->is_long_term;
+             }
+             else
+             {
+               p->mv[LIST_0][j][i][0] = fs->bottom_field->mv[LIST_0][jdiv][i][0];
+               p->mv[LIST_0][j][i][1] = fs->bottom_field->mv[LIST_0][jdiv][i][1] ;
+               p->mv[LIST_1][j][i][0] = fs->bottom_field->mv[LIST_1][jdiv][i][0];
+               p->mv[LIST_1][j][i][1] = fs->bottom_field->mv[LIST_1][jdiv][i][1] ; 
+               
+               p->ref_idx[LIST_0][j][i]  = fs->bottom_field->ref_idx[LIST_0][jdiv][i];
+               p->ref_idx[LIST_1][j][i]  = fs->bottom_field->ref_idx[LIST_1][jdiv][i];
+               p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][jj + 4][i];
+               p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][jj + 4][i];                     
+               p->is_long_term             = fs->bottom_field->is_long_term;
+             }
+           }
+         }
+       }      
+     }
+   }
+ 
+ 
+   p->is_long_term = fs->is_long_term;
+   
+   if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag)      
+   {
+     for (j=0 ; j<fs->size_y/4 ; j++)      
+     {                
+       jj = RSD(j);
+       for (i=0 ; i<fs->size_x/4 ; i++)          
+       {           
+         ii = RSD(i);
+         
+         p->mv[LIST_0][j][i][0]=p->mv[LIST_0][jj][ii][0];
+         p->mv[LIST_0][j][i][1]=p->mv[LIST_0][jj][ii][1];
+         p->mv[LIST_1][j][i][0]=p->mv[LIST_1][jj][ii][0];
+         p->mv[LIST_1][j][i][1]=p->mv[LIST_1][jj][ii][1];        
+         
+         p->ref_idx[LIST_0][j][i]=p->ref_idx[LIST_0][jj][ii];
+         p->ref_idx[LIST_1][j][i]=p->ref_idx[LIST_1][jj][ii];
+         p->ref_pic_id[LIST_0][j][i] = p->ref_pic_id[LIST_0][jj][ii];
+         p->ref_pic_id[LIST_1][j][i] = p->ref_pic_id[LIST_1][jj][ii];
+         
+         if (img->direct_spatial_mv_pred_flag == 1)
+         {
+           p->moving_block[j][i]= 
+             !((!p->is_long_term 
+             && ((p->ref_idx[LIST_0][j][i] == 0) 
+             &&  (abs(p->mv[LIST_0][j][i][0])>>1 == 0) 
+             &&  (abs(p->mv[LIST_0][j][i][1])>>1 == 0))) 
+             || ((p->ref_idx[LIST_0][j][i] == -1) 
+             &&  (p->ref_idx[LIST_1][j][i] == 0) 
+             &&  (abs(p->mv[LIST_1][j][i][0])>>1 == 0) 
+             &&  (abs(p->mv[LIST_1][j][i][1])>>1 == 0)));
+         }
+       }
+     }
+   }
+   else
+   {
+     for (j=0 ; j<fs->size_y/4 ; j++)      
+     {                
+       jj = RSD(j);
+       for (i=0 ; i<fs->size_x/4 ; i++)          
+       {           
+         ii = RSD(i);
+         //! Use inference flag to remap mvs/references
+         p->mv[LIST_0][j][i][0]=fs->mv[LIST_0][j][i][0];
+         p->mv[LIST_0][j][i][1]=fs->mv[LIST_0][j][i][1];
+         p->mv[LIST_1][j][i][0]=fs->mv[LIST_1][j][i][0];
+         p->mv[LIST_1][j][i][1]=fs->mv[LIST_1][j][i][1];        
+         
+         p->ref_idx[LIST_0][j][i]=fs->ref_idx[LIST_0][j][i];
+         p->ref_idx[LIST_1][j][i]=fs->ref_idx[LIST_1][j][i];
+         p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][j][i];
+         p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][j][i];
+         
+         if (img->direct_spatial_mv_pred_flag == 1)
+         {
+           p->moving_block[j][i]= 
+             !((!p->is_long_term 
+             && ((p->ref_idx[LIST_0][j][i] == 0) 
+             &&  (abs(p->mv[LIST_0][j][i][0])>>1 == 0) 
+             &&  (abs(p->mv[LIST_0][j][i][1])>>1 == 0))) 
+             || ((p->ref_idx[LIST_0][j][i] == -1) 
+             &&  (p->ref_idx[LIST_1][j][i] == 0) 
+             &&  (abs(p->mv[LIST_1][j][i][0])>>1 == 0) 
+             &&  (abs(p->mv[LIST_1][j][i][1])>>1 == 0)));
+         }
+       }
+     }      
+   }
+   
+   
+   if (img->direct_spatial_mv_pred_flag ==0)
+   {
+     for (j=0 ; j<fs->size_y/4 ; j++)      
+     {                
+       for (i=0 ; i<fs->size_x/4 ; i++)          
+       {                        
+         if ((!img->MbaffFrameFlag &&!img->structure && fs->field_frame[j][i]) || (img->MbaffFrameFlag && fs->field_frame[j][i]))
+         {
+           p->mv[LIST_0][j][i][1] *= 2;        
+           p->mv[LIST_1][j][i][1] *= 2;
+         }
+         else  if (img->structure && !fs->field_frame[j][i])
+         {
+           p->mv[LIST_0][j][i][1] /= 2;
+           p->mv[LIST_1][j][i][1] /= 2;
+         }
+         
+       }      
+     }
+     
+     for (j=0; j<2 + (img->MbaffFrameFlag * 4);j+=2)
+     {
+       for (i=0; i<listXsize[j];i++)
+       {
+         int prescale, iTRb, iTRp;
+         
+         if (j==0)
+         {
+           iTRb = Clip3( -128, 127, enc_picture->poc - listX[LIST_0 + j][i]->poc );
+         }
+         else if (j == 2)
+         {          
+           iTRb = Clip3( -128, 127, enc_picture->top_poc - listX[LIST_0 + j][i]->poc );
+         }
+         else
+         {
+           iTRb = Clip3( -128, 127, enc_picture->bottom_poc - listX[LIST_0 + j][i]->poc );
+         }
+         
+         iTRp = Clip3( -128, 127,  listX[LIST_1 + j][0]->poc - listX[LIST_0 + j][i]->poc);
+         
+         if (iTRp!=0)
+         {
+           prescale = ( 16384 + abs( iTRp / 2 ) ) / iTRp;
+           img->mvscale[j][i] = Clip3( -1024, 1023, ( iTRb * prescale + 32 ) >> 6 ) ;
+         }
+         else
+         {
+           img->mvscale[j][i] = 9999;
+         }
+       }
+     }
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/mbuffer.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/mbuffer.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/mbuffer.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,193 ----
+ 
+ /*!
+  ***********************************************************************
+  *  \file
+  *      mbuffer.h
+  *
+  *  \brief
+  *      Frame buffer functions
+  *
+  *  \author
+  *      Main contributors (see contributors.h for copyright, address and affiliation details)
+  *      - Karsten Sühring          <suehring at hhi.de>
+  ***********************************************************************
+  */
+ #ifndef _MBUFFER_H_
+ #define _MBUFFER_H_
+ 
+ #define MAX_LIST_SIZE 33
+ 
+ //! definition a picture (field or frame)
+ typedef struct storable_picture
+ {
+   PictureStructure structure;
+ 
+   int         poc;
+   int         top_poc;
+   int         bottom_poc;
+   int         frame_poc;
+   int         order_num;
+   int64       ref_pic_num[6][MAX_LIST_SIZE];
+   int64       frm_ref_pic_num[6][MAX_LIST_SIZE];
+   int64       top_ref_pic_num[6][MAX_LIST_SIZE];
+   int64       bottom_ref_pic_num[6][MAX_LIST_SIZE];
+   unsigned    frame_num;
+   int         pic_num;
+   int         long_term_pic_num;
+   int         long_term_frame_idx;
+ 
+   int         is_long_term;
+   int         used_for_reference;
+   int         is_output;
+   int         non_existing;
+ 
+   int         size_x, size_y, size_x_cr, size_y_cr;
+   int         chroma_vector_adjustment;
+   int         coded_frame;
+   int         MbaffFrameFlag;
+ 
+   imgpel **   imgY;          //!< Y picture component
+   imgpel *    imgY_11;       //!< Y picture component with padded borders
+   imgpel *    imgY_11_w;     //!< Y picture component with padded borders for weighted prediction
+   imgpel **   imgY_ups;      //!< Y picture component upsampled (Quarter pel)
+   imgpel **   imgY_ups_w;    //!< Y picture component upsampled (Quarter pel) for weighted prediction
+   imgpel ***  imgUV;         //!< U and V picture components
+ 
+   byte *      mb_field;      //!< field macroblock indicator
+ 
+   char  ***   ref_idx;       //!< reference picture   [list][subblock_y][subblock_x]
+ 
+   int64 ***   ref_pic_id;    //!< reference picture identifier [list][subblock_y][subblock_x]
+                              //   (not  simply index) 
+ 
+   int64 ***   ref_id;        //!< reference picture identifier [list][subblock_y][subblock_x]
+                              //   (not  simply index) 
+ 
+   short ****  mv;            //!< motion vector       [list][subblock_x][subblock_y][component]
+   
+   byte **     moving_block;
+   byte **     field_frame;         //!< indicates if co_located is field or frame.
+   
+   struct storable_picture *top_field;     // for mb aff, if frame for referencing the top field
+   struct storable_picture *bottom_field;  // for mb aff, if frame for referencing the bottom field
+   struct storable_picture *frame;         // for mb aff, if field for referencing the combined frame
+ 
+   int         chroma_format_idc;
+   int         frame_mbs_only_flag;
+   int         frame_cropping_flag;
+   int         frame_cropping_rect_left_offset;
+   int         frame_cropping_rect_right_offset;
+   int         frame_cropping_rect_top_offset;
+   int         frame_cropping_rect_bottom_offset;
+ } StorablePicture;
+ 
+ 
+ //! definition a picture (field or frame)
+ typedef struct colocated_params
+ {
+   int         mb_adaptive_frame_field_flag;
+   int         size_x, size_y;
+ 
+   int64       ref_pic_num[6][MAX_LIST_SIZE];  
+ 
+   char  ***   ref_idx;       //!< reference picture   [list][subblock_y][subblock_x]
+   int64 ***   ref_pic_id;    //!< reference picture identifier [list][subblock_y][subblock_x]
+   short ****  mv;            //!< motion vector       [list][subblock_x][subblock_y][component]  
+   byte **     moving_block;
+ 
+   // Top field params
+   int64       top_ref_pic_num[6][MAX_LIST_SIZE];  
+   char  ***   top_ref_idx;       //!< reference picture   [list][subblock_y][subblock_x]
+   int64 ***   top_ref_pic_id;    //!< reference picture identifier [list][subblock_y][subblock_x]
+   short ****  top_mv;            //!< motion vector       [list][subblock_x][subblock_y][component]  
+   byte **     top_moving_block;
+ 
+   // Bottom field params
+   int64       bottom_ref_pic_num[6][MAX_LIST_SIZE];  
+   char  ***   bottom_ref_idx;       //!< reference picture   [list][subblock_y][subblock_x]
+   int64 ***   bottom_ref_pic_id;    //!< reference picture identifier [list][subblock_y][subblock_x]
+   short ****  bottom_mv;            //!< motion vector       [list][subblock_x][subblock_y][component] 
+   byte **     bottom_moving_block;
+   
+   byte        is_long_term;
+   byte **     field_frame;         //!< indicates if co_located is field or frame.
+ 
+ } ColocatedParams;
+ 
+ //! Frame Stores for Decoded Picture Buffer
+ typedef struct frame_store
+ {
+   int       is_used;                //!< 0=empty; 1=top; 2=bottom; 3=both fields (or frame)
+   int       is_reference;           //!< 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used
+   int       is_long_term;           //!< 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used
+   int       is_orig_reference;      //!< original marking by nal_ref_idc: 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used
+ 
+   int       is_non_existent;
+ 
+   unsigned  frame_num;
+   int       frame_num_wrap;
+   int       long_term_frame_idx;
+   int       is_output;
+   int       poc;
+ 
+   StorablePicture *frame;
+   StorablePicture *top_field;
+   StorablePicture *bottom_field;
+ 
+ } FrameStore;
+ 
+ 
+ //! Decoded Picture Buffer
+ typedef struct decoded_picture_buffer
+ {
+   FrameStore  **fs;
+   FrameStore  **fs_ref;
+   FrameStore  **fs_ltref;
+   unsigned      size;
+   unsigned      used_size;
+   unsigned      ref_frames_in_buffer;
+   unsigned      ltref_frames_in_buffer;
+   int           last_output_poc;
+   int           max_long_term_pic_idx;
+ 
+   int           init_done;
+ 
+   FrameStore   *last_picture;
+ } DecodedPictureBuffer;
+ 
+ 
+ extern DecodedPictureBuffer dpb;
+ extern StorablePicture **listX[6];
+ extern int listXsize[6];
+ 
+ void             init_dpb();
+ void             free_dpb();
+ FrameStore*      alloc_frame_store();
+ void             free_frame_store(FrameStore* f);
+ StorablePicture* alloc_storable_picture(PictureStructure type, int size_x, int size_y, int size_x_cr, int size_y_cr);
+ void             free_storable_picture(StorablePicture* p);
+ void             store_picture_in_dpb(StorablePicture* p);
+ void             replace_top_pic_with_frame(StorablePicture* p);
+ void             flush_dpb();
+ 
+ void             dpb_split_field(FrameStore *fs);
+ void             dpb_combine_field(FrameStore *fs);
+ void             dpb_combine_field_yuv(FrameStore *fs);
+ 
+ void             init_lists(int currSliceType, PictureStructure currPicStructure);
+ void             reorder_ref_pic_list(StorablePicture **list, int *list_size, 
+                                       int num_ref_idx_lX_active_minus1, int *reordering_of_pic_nums_idc, 
+                                       int *abs_diff_pic_num_minus1, int *long_term_pic_idx);
+ 
+ void             init_mbaff_lists();
+ void             alloc_ref_pic_list_reordering_buffer(Slice *currSlice);
+ void             free_ref_pic_list_reordering_buffer(Slice *currSlice);
+ 
+ void             fill_frame_num_gap(ImageParameters *img);
+ 
+ ColocatedParams* alloc_colocated(int size_x, int size_y,int mb_adaptive_frame_field_flag);
+ void free_colocated(ColocatedParams* p);
+ void compute_colocated(ColocatedParams* p, StorablePicture **listX[6]);
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/memalloc.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/memalloc.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/memalloc.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,763 ----
+ 
+ /*!
+  ************************************************************************
+  * \file  memalloc.c
+  *
+  * \brief
+  *    Memory allocation and free helper functions
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  ************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ 
+ #include "global.h"
+ 
+ 
+  /*!
+  ************************************************************************
+  * \brief
+  *    Initialize 2-dimensional top and bottom field to point to the proper
+  *    lines in frame
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************/
+ int init_top_bot_planes(imgpel **imgFrame, int rows, int columns, imgpel ***imgTopField, imgpel ***imgBotField)
+ {
+   int i;
+       
+   if((*imgTopField   = (imgpel**)calloc(rows/2,        sizeof(imgpel*))) == NULL)
+     no_mem_exit("init_top_bot_planes: imgTopField");
+ 
+   if((*imgBotField   = (imgpel**)calloc(rows/2,        sizeof(imgpel*))) == NULL)
+     no_mem_exit("init_top_bot_planes: imgBotField");
+ 
+   for(i=0 ; i<rows/2 ; i++)
+   {
+     (*imgTopField)[i] =  imgFrame[2*i  ];
+     (*imgBotField)[i] =  imgFrame[2*i+1];
+   }
+ 
+   return rows*sizeof(imgpel*);
+ }
+ 
+  /*!
+  ************************************************************************
+  * \brief
+  *    free 2-dimensional top and bottom fields without freeing target memory
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************/
+ void free_top_bot_planes(imgpel **imgTopField, imgpel **imgBotField)
+ {
+   free (imgTopField);
+   free (imgBotField);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 2D memory array -> imgpel array2D[rows][columns]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************/
+ int get_mem2Dpel(imgpel ***array2D, int rows, int columns)
+ {
+   int i;
+       
+   if((*array2D      = (imgpel**)calloc(rows,        sizeof(imgpel*))) == NULL)
+     no_mem_exit("get_mem2Dpel: array2D");
+   if(((*array2D)[0] = (imgpel* )calloc(rows*columns,sizeof(imgpel ))) == NULL)
+     no_mem_exit("get_mem2Dpel: array2D");
+ 
+   for(i=1 ; i<rows ; i++)
+     (*array2D)[i] =  (*array2D)[i-1] + columns  ;
+ 
+   return rows*columns*sizeof(imgpel);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 3D memory array -> imgpel array3D[frames][rows][columns]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************
+  */
+ int get_mem3Dpel(imgpel ****array3D, int frames, int rows, int columns)
+ {
+   int  j;
+       
+   if(((*array3D) = (imgpel***)calloc(frames,sizeof(imgpel**))) == NULL)
+     no_mem_exit("get_mem3Dpel: array3D");
+ 
+   for(j=0;j<frames;j++)
+     get_mem2Dpel( (*array3D)+j, rows, columns ) ;
+ 
+   return frames*rows*columns*sizeof(imgpel);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 2D memory array
+  *    which was allocated with get_mem2Dpel()
+  ************************************************************************
+  */
+ void free_mem2Dpel(imgpel **array2D)
+ {
+   if (array2D)
+   {
+     if (array2D[0])
+       free (array2D[0]);
+     else error ("free_mem2Dpel: trying to free unused memory",100);
+       
+     free (array2D);
+   } else
+   {
+     error ("free_mem2Dpel: trying to free unused memory",100);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 3D memory array
+  *    which was allocated with get_mem3Dpel()
+  ************************************************************************
+  */
+ void free_mem3Dpel(imgpel ***array3D, int frames)
+ {
+   int i;
+       
+   if (array3D)
+   {
+     for (i=0;i<frames;i++)
+     { 
+       free_mem2Dpel(array3D[i]);
+     }
+    free (array3D);
+   } else
+   {
+     error ("free_mem3Dpel: trying to free unused memory",100);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 2D memory array -> unsigned char array2D[rows][columns]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************/
+ int get_mem2D(byte ***array2D, int rows, int columns)
+ {
+   int i;
+ 
+   if((*array2D      = (byte**)calloc(rows,        sizeof(byte*))) == NULL)
+     no_mem_exit("get_mem2D: array2D");
+   if(((*array2D)[0] = (byte* )calloc(columns*rows,sizeof(byte ))) == NULL)
+     no_mem_exit("get_mem2D: array2D");
+ 
+   for(i=1;i<rows;i++)
+     (*array2D)[i] = (*array2D)[i-1] + columns ;
+ 
+   return rows*columns;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 2D memory array -> int array2D[rows][columns]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************
+  */
+ int get_mem2Dint(int ***array2D, int rows, int columns)
+ {
+   int i;
+   
+   if((*array2D      = (int**)calloc(rows,        sizeof(int*))) == NULL)
+     no_mem_exit("get_mem2Dint: array2D");
+   if(((*array2D)[0] = (int* )calloc(rows*columns,sizeof(int ))) == NULL)
+     no_mem_exit("get_mem2Dint: array2D");
+   
+   for(i=1 ; i<rows ; i++)
+     (*array2D)[i] =  (*array2D)[i-1] + columns  ;
+   
+   return rows*columns*sizeof(int);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 2D memory array -> int64 array2D[rows][columns]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************
+  */
+ int get_mem2Dint64(int64 ***array2D, int rows, int columns)
+ {
+   int i;
+ 
+   if((*array2D      = (int64**)calloc(rows,        sizeof(int64*))) == NULL)
+     no_mem_exit("get_mem2Dint64: array2D");
+   if(((*array2D)[0] = (int64* )calloc(rows*columns,sizeof(int64 ))) == NULL)
+     no_mem_exit("get_mem2Dint64: array2D");
+ 
+   for(i=1 ; i<rows ; i++)
+     (*array2D)[i] =  (*array2D)[i-1] + columns  ;
+ 
+   return rows*columns*sizeof(int64);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 3D memory array -> unsigned char array3D[frames][rows][columns]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************
+  */
+ int get_mem3D(byte ****array3D, int frames, int rows, int columns)
+ {
+   int  j;
+ 
+   if(((*array3D) = (byte***)calloc(frames,sizeof(byte**))) == NULL)
+     no_mem_exit("get_mem3D: array3D");
+ 
+   for(j=0;j<frames;j++)
+     get_mem2D( (*array3D)+j, rows, columns ) ;
+ 
+   return frames*rows*columns;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 3D memory array -> int array3D[frames][rows][columns]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************
+  */
+ int get_mem3Dint(int ****array3D, int frames, int rows, int columns)
+ {
+   int  j;
+ 
+   if(((*array3D) = (int***)calloc(frames,sizeof(int**))) == NULL)
+     no_mem_exit("get_mem3Dint: array3D");
+ 
+   for(j=0;j<frames;j++)
+     get_mem2Dint( (*array3D)+j, rows, columns ) ;
+ 
+   return frames*rows*columns*sizeof(int);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 3D memory array -> int64 array3D[frames][rows][columns]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************
+  */
+ int get_mem3Dint64(int64 ****array3D, int frames, int rows, int columns)
+ {
+   int  j;
+ 
+   if(((*array3D) = (int64***)calloc(frames,sizeof(int64**))) == NULL)
+     no_mem_exit("get_mem3Dint64: array3D");
+ 
+   for(j=0;j<frames;j++)
+     get_mem2Dint64( (*array3D)+j, rows, columns ) ;
+ 
+   return frames*rows*columns*sizeof(int64);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 4D memory array -> int array4D[frames][rows][columns][component]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************
+  */
+ int get_mem4Dint(int *****array4D, int idx, int frames, int rows, int columns )
+ {
+   int  j;
+ 
+   if(((*array4D) = (int****)calloc(idx,sizeof(int***))) == NULL)
+     no_mem_exit("get_mem4Dint: array4D");
+ 
+   for(j=0;j<idx;j++)
+     get_mem3Dint( (*array4D)+j, frames, rows, columns ) ;
+ 
+   return idx*frames*rows*columns*sizeof(int);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 5D memory array -> int array5D[refs][blocktype][rows][columns][component]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************
+  */
+ int get_mem5Dint(int ******array5D, int refs, int blocktype, int rows, int columns, int component)
+ {
+   int  j;
+ 
+   if(((*array5D) = (int*****)calloc(refs,sizeof(int****))) == NULL)
+     no_mem_exit("get_mem5Dint: array5D");
+ 
+   ;
+   for(j=0;j<refs;j++)
+     get_mem4Dint( (*array5D)+j, blocktype, rows, columns, component) ;
+ 
+   return refs*blocktype*rows*columns*component*sizeof(int);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 2D memory array
+  *    which was allocated with get_mem2D()
+  ************************************************************************
+  */
+ void free_mem2D(byte **array2D)
+ {
+   if (array2D)
+   {
+     if (array2D[0])
+       free (array2D[0]);
+     else error ("free_mem2D: trying to free unused memory",100);
+ 
+     free (array2D);
+   } else
+   {
+     error ("free_mem2D: trying to free unused memory",100);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 2D memory array
+  *    which was allocated with get_mem2Dint()
+  ************************************************************************
+  */
+ void free_mem2Dint(int **array2D)
+ {
+   if (array2D)
+   {
+     if (array2D[0]) 
+       free (array2D[0]);
+     else error ("free_mem2Dint: trying to free unused memory",100);
+ 
+     free (array2D);
+ 
+   } else
+   {
+     error ("free_mem2Dint: trying to free unused memory",100);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 2D memory array
+  *    which was allocated with get_mem2Dint64()
+  ************************************************************************
+  */
+ void free_mem2Dint64(int64 **array2D)
+ {
+   if (array2D)
+   {
+     if (array2D[0]) 
+       free (array2D[0]);
+     else error ("free_mem2Dint64: trying to free unused memory",100);
+ 
+     free (array2D);
+ 
+   } else
+   {
+     error ("free_mem2Dint64: trying to free unused memory",100);
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 3D memory array
+  *    which was allocated with get_mem3D()
+  ************************************************************************
+  */
+ void free_mem3D(byte ***array3D, int frames)
+ {
+   int i;
+ 
+   if (array3D)
+   {
+     for (i=0;i<frames;i++)
+     { 
+       free_mem2D(array3D[i]);
+     }
+    free (array3D);
+   } else
+   {
+     error ("free_mem3D: trying to free unused memory",100);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 3D memory array 
+  *    which was allocated with get_mem3Dint()
+  ************************************************************************
+  */
+ void free_mem3Dint(int ***array3D, int frames)
+ {
+   int i;
+ 
+   if (array3D)
+   {
+     for (i=0;i<frames;i++)
+     { 
+       free_mem2Dint(array3D[i]);
+     }
+    free (array3D);
+   } else
+   {
+     error ("free_mem3Dint: trying to free unused memory",100);
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 3D memory array 
+  *    which was allocated with get_mem3Dint64()
+  ************************************************************************
+  */
+ void free_mem3Dint64(int64 ***array3D, int frames)
+ {
+   int i;
+ 
+   if (array3D)
+   {
+     for (i=0;i<frames;i++)
+     { 
+       free_mem2Dint64(array3D[i]);
+     }
+    free (array3D);
+   } else
+   {
+     error ("free_mem3Dint64: trying to free unused memory",100);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 4D memory array 
+  *    which was allocated with get_mem4Dint()
+  ************************************************************************
+  */
+ void free_mem4Dint(int ****array4D, int idx, int frames )
+ {
+   int  j;
+ 
+   if (array4D)
+   {
+     for(j=0;j<idx;j++)
+       free_mem3Dint( array4D[j], frames) ;
+     free (array4D);
+   } else
+   {
+     error ("free_mem4Dint: trying to free unused memory",100);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 5D int memory array 
+  *    which was allocated with get_mem5Dint()
+  ************************************************************************
+  */
+ void free_mem5Dint(int *****array5D, int refs, int blocktype, int height)
+ {
+   int  j;
+ 
+   if (array5D)
+   {
+     for(j=0;j<refs;j++)
+       free_mem4Dint( array5D[j], blocktype, height) ;
+     free (array5D);
+   } else
+   {
+     error ("free_mem5Dint: trying to free unused memory",100);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Exit program if memory allocation failed (using error())
+  * \param where
+  *    string indicating which memory allocation failed
+  ************************************************************************
+  */
+ void no_mem_exit(char *where)
+ {
+    snprintf(errortext, ET_SIZE, "Could not allocate memory: %s",where);
+    error (errortext, 100);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 2D short memory array -> short array2D[rows][columns]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************
+  */
+ int get_mem2Dshort(short ***array2D, int rows, int columns)
+ {
+   int i;
+   
+   if((*array2D      = (short**)calloc(rows,        sizeof(short*))) == NULL)
+     no_mem_exit("get_mem2Dshort: array2D");
+   if(((*array2D)[0] = (short* )calloc(rows*columns,sizeof(short ))) == NULL)
+     no_mem_exit("get_mem2Dshort: array2D");
+   
+   for(i=1 ; i<rows ; i++)
+     (*array2D)[i] =  (*array2D)[i-1] + columns  ;
+   
+   return rows*columns*sizeof(short);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 3D memory short array -> short array3D[frames][rows][columns]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************
+  */
+ int get_mem3Dshort(short ****array3D, int frames, int rows, int columns)
+ {
+   int  j;
+ 
+   if(((*array3D) = (short***)calloc(frames,sizeof(short**))) == NULL)
+     no_mem_exit("get_mem3Dshort: array3D");
+ 
+   for(j=0;j<frames;j++)
+     get_mem2Dshort( (*array3D)+j, rows, columns ) ;
+ 
+   return frames*rows*columns*sizeof(short);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 4D memory short array -> short array3D[frames][rows][columns][component]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************
+  */
+ int get_mem4Dshort(short *****array4D, int idx, int frames, int rows, int columns )
+ {
+   int  j;
+ 
+   if(((*array4D) = (short****)calloc(idx,sizeof(short**))) == NULL)
+     no_mem_exit("get_mem4Dshort: array4D");
+ 
+   for(j=0;j<idx;j++)
+     get_mem3Dshort( (*array4D)+j, frames, rows, columns ) ;
+ 
+   return idx*frames*rows*columns*sizeof(short);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 5D memory array -> short array5D[refs][blocktype][rows][columns][component]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************
+  */
+ int get_mem5Dshort(short ******array5D, int refs, int blocktype, int rows, int columns, int component)
+ {
+   int  j;
+ 
+   if(((*array5D) = (short*****)calloc(refs,sizeof(short****))) == NULL)
+     no_mem_exit("get_mem5Dshort: array5D");
+ 
+   ;
+   for(j=0;j<refs;j++)
+     get_mem4Dshort( (*array5D)+j, blocktype, rows, columns, component) ;
+ 
+   return refs*blocktype*rows*columns*component*sizeof(short);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocate 6D memory array -> short array6D[list][refs][blocktype][rows][columns][component]
+  *
+  * \par Output:
+  *    memory size in bytes
+  ************************************************************************
+  */
+ int get_mem6Dshort(short *******array6D, int list, int refs, int blocktype, int rows, int columns, int component)
+ {
+   int  j;
+ 
+   if(((*array6D) = (short******)calloc(list,sizeof(short*****))) == NULL)
+     no_mem_exit("get_mem6Dshort: array6D");
+ 
+   ;
+   for(j=0;j<list;j++)
+     get_mem5Dshort( (*array6D)+j, refs, blocktype, rows, columns, component) ;
+ 
+   return list * refs * blocktype * rows * columns * component * sizeof(short);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 2D short memory array
+  *    which was allocated with get_mem2Dshort()
+  ************************************************************************
+  */
+ void free_mem2Dshort(short **array2D)
+ {
+   if (array2D)
+   {
+     if (array2D[0]) 
+       free (array2D[0]);
+     else error ("free_mem2Dshort: trying to free unused memory",100);
+ 
+     free (array2D);
+ 
+   } else
+   {
+     error ("free_mem2Dshort: trying to free unused memory",100);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 3D short memory array 
+  *    which was allocated with get_mem3Dshort()
+  ************************************************************************
+  */
+ void free_mem3Dshort(short ***array3D, int frames)
+ {
+   int i;
+ 
+   if (array3D)
+   {
+     for (i=0;i<frames;i++)
+     { 
+       free_mem2Dshort(array3D[i]);
+     }
+    free (array3D);
+   } else
+   {
+     error ("free_mem3Dshort: trying to free unused memory",100);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 4D short memory array 
+  *    which was allocated with get_mem4Dshort()
+  ************************************************************************
+  */
+ void free_mem4Dshort(short ****array4D, int idx, int frames )
+ {
+   int  j;
+ 
+   if (array4D)
+   {
+     for(j=0;j<idx;j++)
+       free_mem3Dshort( array4D[j], frames) ;
+     free (array4D);
+   } else
+   {
+     error ("free_mem4Dshort: trying to free unused memory",100);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 5D short memory array 
+  *    which was allocated with get_mem5Dshort()
+  ************************************************************************
+  */
+ void free_mem5Dshort(short *****array5D, int refs, int blocktype, int height)
+ {
+   int  j;
+ 
+   if (array5D)
+   {
+     for(j=0;j<refs;j++)
+       free_mem4Dshort( array5D[j], blocktype, height) ;
+     free (array5D);
+   } else
+   {
+     error ("free_mem5Dshort: trying to free unused memory",100);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    free 6D short memory array 
+  *    which was allocated with get_mem6Dshort()
+  ************************************************************************
+  */
+ void free_mem6Dshort(short ******array6D, int list, int refs, int blocktype, int height)
+ {
+   int  j;
+ 
+   if (array6D)
+   {
+     for(j=0;j<list;j++)
+       free_mem5Dshort( array6D[j], refs, blocktype, height) ;
+     free (array6D);
+   } else
+   {
+     error ("free_mem6Dshort: trying to free unused memory",100);
+   }
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/memalloc.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/memalloc.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/memalloc.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,63 ----
+ 
+ /*!
+  ************************************************************************
+  * \file  memalloc.h
+  *
+  * \brief
+  *    Memory allocation and free helper funtions
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  ************************************************************************
+  */
+ 
+ #ifndef _MEMALLOC_H_
+ #define _MEMALLOC_H_
+ 
+ int  get_mem2D(byte ***array2D, int rows, int columns);
+ int  get_mem3D(byte ****array2D, int frames, int rows, int columns);
+ 
+ int  get_mem2Dint(int ***array2D, int rows, int columns);
+ int  get_mem3Dint(int ****array3D, int frames, int rows, int columns);
+ int  get_mem4Dint(int *****array4D, int idx, int frames, int rows, int columns );
+ int  get_mem5Dint(int ******array5D, int refs, int blocktype, int rows, int columns, int component);
+ 
+ int  get_mem2Dint64(int64 ***array2D, int rows, int columns);
+ int  get_mem3Dint64(int64 ****array3D, int frames, int rows, int columns);
+ 
+ int  get_mem2Dshort(short ***array2D, int rows, int columns);
+ int  get_mem3Dshort(short ****array3D, int frames, int rows, int columns);
+ int  get_mem4Dshort(short *****array4D, int idx, int frames, int rows, int columns );
+ int  get_mem5Dshort(short ******array5D, int refs, int blocktype, int rows, int columns, int component);
+ int  get_mem6Dshort(short *******array6D, int list, int refs, int blocktype, int rows, int columns, int component);
+ 
+ int get_mem2Dpel(imgpel ***array2D, int rows, int columns);
+ int get_mem3Dpel(imgpel ****array3D, int frames, int rows, int columns);
+ 
+ void free_mem2D(byte **array2D);
+ void free_mem3D(byte ***array2D, int frames);
+ 
+ void free_mem2Dint(int **array2D);
+ void free_mem3Dint(int ***array3D, int frames);
+ void free_mem4Dint(int ****array4D, int idx, int frames);
+ void free_mem5Dint(int *****array5D, int refs, int blocktype, int rows);
+ 
+ void free_mem2Dint64(int64 **array2D);
+ void free_mem3Dint64(int64 ***array3D64, int frames);
+ 
+ void free_mem2Dshort(short **array2D);
+ void free_mem3Dshort(short ***array3D, int frames);
+ void free_mem4Dshort(short ****array4D, int idx, int frames);
+ void free_mem5Dshort(short *****array5D, int refs, int blocktype, int height);
+ void free_mem6Dshort(short ******array5D, int list, int refs, int blocktype, int height);
+ 
+ void free_mem2Dpel(imgpel **array2D);
+ void free_mem3Dpel(imgpel ***array3D, int frames);
+ 
+ int init_top_bot_planes(imgpel **imgFrame, int rows, int columns, imgpel ***imgTopField, imgpel ***imgBotField);
+ void free_top_bot_planes(imgpel **imgTopField, imgpel **imgBotField);
+ 
+ 
+ void no_mem_exit(char *where);
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/minmax.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/minmax.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/minmax.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,19 ----
+ 
+ /*!
+  ************************************************************************
+  *  \file
+  *     minmax.h
+  *  \brief
+  *     defines min and max macros for non WIN32 environments
+  ************************************************************************
+  */
+ #ifndef _MINMAX_
+ #define _MINMAX_
+ 
+ #if !defined(WIN32) || defined(__GNUC__)
+ #define max(a, b) (((a) > (b)) ? (a) : (b))
+ #define min(a, b) (((a) < (b)) ? (a) : (b))
+ #endif
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/mode_decision.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/mode_decision.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/mode_decision.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,1844 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file mode_decision.c
+  *
+  * \brief
+  *    Main macroblock mode decision functions and helpers
+  *
+  **************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <math.h>
+ #include <assert.h>
+ #include <limits.h>
+ #include <float.h>
+ #include <memory.h>
+ 
+ #include "global.h"
+ #include "rdopt_coding_state.h"
+ #include "mb_access.h"
+ #include "intrarefresh.h"
+ #include "image.h"
+ #include "transform8x8.h"
+ #include "fast_me.h"
+ #include "simplified_fast_me.h"
+ #include "ratectl.h"            
+ #include "mode_decision.h"
+ 
+ //==== MODULE PARAMETERS ====
+ imgpel temp_imgY[16][16]; // to temp store the Y data for 8x8 transform
+ imgpel temp_imgU[16][16];
+ imgpel temp_imgV[16][16];
+ 
+ const int  b8_mode_table[6]  = {0, 4, 5, 6, 7};         // DO NOT CHANGE ORDER !!!
+ const int  mb_mode_table[9]  = {0, 1, 2, 3, P8x8, I16MB, I4MB, I8MB, IPCM}; // DO NOT CHANGE ORDER !!!
+ // Residue Color Transform
+ const int  mb_mode_table_RCT[11]  = {0, 1, 2, 3, P8x8, I16MB, I16MB, I16MB, I16MB, I4MB, I8MB};
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    Update Rate Control Difference
+ *************************************************************************************
+ */
+ void rc_store_diff(int cpix_x, int cpix_y, imgpel prediction[16][16])
+ {
+   int i,j;
+   int pix_x, pix_y;
+   
+   for (i=0; i<MB_BLOCK_SIZE; i++)
+   {
+     pix_x = cpix_x + i;
+     for(j=0; j<MB_BLOCK_SIZE; j++)
+     {
+       pix_y = cpix_y + j;
+       diffy[j][i] = imgY_org[pix_y][pix_x] - prediction[j][i];
+     }
+   } 
+ }
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    Update Rate Control Parameters
+ *************************************************************************************
+ */
+ void update_rc(Macroblock *currMB, short best_mode)
+ {
+   if(img->type==P_SLICE)
+   {
+     img->MADofMB[img->current_mb_nr] = calc_MAD();
+     
+     if(input->basicunit<img->Frame_Total_Number_MB)
+     {
+       img->TotalMADBasicUnit +=img->MADofMB[img->current_mb_nr];
+       
+       // delta_qp is present only for non-skipped macroblocks
+       if ((currMB->cbp!=0 || best_mode==I16MB))
+         currMB->prev_cbp = 1;
+       else
+       {
+         currMB->delta_qp = 0;
+         currMB->qp = currMB->prev_qp;
+         img->qp = currMB->qp;
+         currMB->prev_cbp = 0;
+       }
+       // When MBAFF is used, delta_qp is only present for 
+       // the first non-skipped macroblock of each macroblock pair
+       if (input->MbInterlace)
+       {
+         if(!currMB->mb_field)
+         {
+           DELTA_QP = currMB->delta_qp;
+           QP       = currMB->qp;
+         }
+         else
+         {
+           DELTA_QP2 = currMB->delta_qp;
+           QP2       = currMB->qp;
+         }
+       }       
+     }
+   }
+ }
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    Fast intra decision
+ *************************************************************************************
+ */
+ void fast_mode_intra_decision(short *intra_skip, double min_rate)
+ {
+   int i;
+   int mb_available_up, mb_available_left, mb_available_up_left;
+   long SBE;
+   double AR = 0, ABE = 0;
+   PixelPos up;       //!< pixel position p(0,-1)
+   PixelPos left[2];  //!< pixel positions p(-1, -1..0)   
+   
+   for (i=0;i<2;i++)
+   {
+     getNeighbour(img->current_mb_nr, -1 ,  i-1 , 0, &left[i]);
+   }
+   getNeighbour(img->current_mb_nr, 0     ,  -1 , 0, &up);
+   
+   mb_available_up       = up.available;
+   mb_available_up_left  = left[0].available;
+   mb_available_left     = left[1].available;
+   
+   AR=(1.0/384)*min_rate;
+   
+   SBE = 0;
+   
+   if( (img->mb_y != (int)img->FrameHeightInMbs-1) && (img->mb_x != (int)img->PicWidthInMbs-1) && mb_available_left && mb_available_up)
+   {
+     for(i = 0; i < MB_BLOCK_SIZE; i++)
+     {
+       SBE += abs(imgY_org[img->opix_y][img->opix_x+i] - enc_picture->imgY[img->pix_y-1][img->pix_x+i]);
+       SBE += abs(imgY_org[img->opix_y+i][img->opix_x] - enc_picture->imgY[img->pix_y+i][img->pix_x-1]);
+     }
+     for(i = 0; i < 8; i++)
+     {
+       SBE += abs(imgUV_org[0][img->opix_c_y][img->opix_c_x+i] - enc_picture->imgUV[0][img->pix_c_y-1][img->pix_c_x+i]);
+       SBE += abs(imgUV_org[0][img->opix_c_y+i][img->opix_c_x] - enc_picture->imgUV[0][img->pix_c_y+i][img->pix_c_x-1]);
+       SBE += abs(imgUV_org[1][img->opix_c_y][img->opix_c_x+i] - enc_picture->imgUV[1][img->pix_c_y-1][img->pix_c_x+i]);
+       SBE += abs(imgUV_org[1][img->opix_c_y+i][img->opix_c_x] - enc_picture->imgUV[1][img->pix_c_y+i][img->pix_c_x-1]);
+     }            
+     ABE = 1.0/64 * SBE;
+   }
+   else  // Image boundary
+   {
+     ABE = 0; 
+   }
+   
+   if(AR <= ABE)
+   {
+     *intra_skip = 1;
+   }
+ }
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    Initialize Encoding parameters for Macroblock
+ *************************************************************************************
+ */
+ void init_enc_mb_params(Macroblock* currMB, RD_PARAMS *enc_mb, int intra, int bslice)
+ {
+   int mode;
+   int l,k;
+   
+   //Setup list offset
+   enc_mb->list_offset[LIST_0] = LIST_0 + currMB->list_offset;
+   enc_mb->list_offset[LIST_1] = LIST_1 + currMB->list_offset;
+   
+   enc_mb->curr_mb_field = ((img->MbaffFrameFlag)&&(currMB->mb_field));  
+   enc_mb->best_ref[LIST_0] = 0;
+   enc_mb->best_ref[LIST_1] = -1;
+   
+   // Set valid modes
+   enc_mb->valid[I8MB]  = input->Transform8x8Mode;
+   enc_mb->valid[I4MB]  = (input->Transform8x8Mode==2) ? 0:1;
+   enc_mb->valid[I16MB] = 1;
+   enc_mb->valid[IPCM]  = (input->symbol_mode != CABAC && input->EnableIPCM);
+   
+   enc_mb->valid[0]     = (!intra );
+   enc_mb->valid[1]     = (!intra && input->InterSearch16x16);
+   enc_mb->valid[2]     = (!intra && input->InterSearch16x8);
+   enc_mb->valid[3]     = (!intra && input->InterSearch8x16);
+   enc_mb->valid[4]     = (!intra && input->InterSearch8x8);
+   enc_mb->valid[5]     = (!intra && input->InterSearch8x4 && !(input->Transform8x8Mode==2));
+   enc_mb->valid[6]     = (!intra && input->InterSearch4x8 && !(input->Transform8x8Mode==2));
+   enc_mb->valid[7]     = (!intra && input->InterSearch4x4 && !(input->Transform8x8Mode==2));
+   enc_mb->valid[P8x8]  = (enc_mb->valid[4] || enc_mb->valid[5] || enc_mb->valid[6] || enc_mb->valid[7]);
+   enc_mb->valid[12]    = (img->type == SI_SLICE);
+ 
+   
+   //===== SET LAGRANGE PARAMETERS =====
+   // Note that these are now computed at the slice level to reduce
+   // computations and cleanup code.
+   if (bslice && img->nal_reference_idc)
+   {
+     enc_mb->lambda_md = img->lambda_md[5][img->qp];
+     enc_mb->lambda_me = img->lambda_me[5][img->qp];
+     enc_mb->lambda_mf = img->lambda_mf[5][img->qp];
+   }
+   else
+   {
+     enc_mb->lambda_md = img->lambda_md[img->type][img->qp];
+     enc_mb->lambda_me = img->lambda_me[img->type][img->qp];
+     enc_mb->lambda_mf = img->lambda_mf[img->type][img->qp];
+   }
+   
+   // Initialize bipredME decisions
+   for (mode=0; mode<MAXMODE; mode++)
+   {
+     img->bi_pred_me[mode]=0;
+   }
+   
+   if (!img->MbaffFrameFlag)
+   {
+     for (l = LIST_0; l < BI_PRED; l++)
+     {
+       for(k = 0; k < listXsize[l]; k++)
+       {
+         listX[l][k]->chroma_vector_adjustment= 0;
+         if(img->structure == TOP_FIELD && img->structure != listX[l][k]->structure)
+           listX[l][k]->chroma_vector_adjustment = -2;
+         if(img->structure == BOTTOM_FIELD && img->structure != listX[l][k]->structure)
+           listX[l][k]->chroma_vector_adjustment = 2;
+       }
+     }
+   }
+   else
+   {
+     if (enc_mb->curr_mb_field)
+     {
+       for (l = enc_mb->list_offset[LIST_0]; l <= enc_mb->list_offset[LIST_1]; l++)
+       {
+         for(k = 0; k < listXsize[l]; k++)
+         {
+           listX[l][k]->chroma_vector_adjustment= 0;
+           if(img->current_mb_nr % 2 == 0 && listX[l][k]->structure == BOTTOM_FIELD)
+             listX[l][k]->chroma_vector_adjustment = -2;
+           if(img->current_mb_nr % 2 == 1 && listX[l][k]->structure == TOP_FIELD)
+             listX[l][k]->chroma_vector_adjustment = 2;
+         }
+       }
+     }
+     else
+     {
+       for (l = enc_mb->list_offset[LIST_0]; l <= enc_mb->list_offset[LIST_1]; l++)
+       {
+         for(k = 0; k < listXsize[l]; k++)
+           listX[l][k]->chroma_vector_adjustment= 0;
+       }
+     }    
+   } 
+ }
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    computation of prediction list (including biprediction) cost
+ *************************************************************************************
+ */
+ void list_prediction_cost(int list, int block, int mode, RD_PARAMS enc_mb, int bmcost[5], char best_ref[2])
+ {
+   short ref;
+   int mcost;
+   int cur_list = list < BI_PRED ? enc_mb.list_offset[list] : enc_mb.list_offset[LIST_0];
+   
+   //--- get cost and reference frame for forward prediction ---
+   
+   if (list < BI_PRED)
+   {
+     for (ref=0; ref < listXsize[cur_list]; ref++)
+     {
+       if (!img->checkref || list || ref==0 || CheckReliabilityOfRef (block, list, ref, mode))
+       {
+         mcost  = (input->rdopt 
+           ? REF_COST (enc_mb.lambda_mf, ref, cur_list) 
+           : (int) (2 * enc_mb.lambda_me * min(ref, 1)));     
+         
+         mcost += motion_cost[mode][list][ref][block];
+         if (mcost < bmcost[list])
+         {
+           bmcost[list]   = mcost;
+           best_ref[list] = (char)ref;
+         }
+       }
+     }
+   }
+   else if (list == BI_PRED)
+   {
+     bmcost[list]  = (input->rdopt 
+       ? (REF_COST  (enc_mb.lambda_mf, (short)best_ref[LIST_0], cur_list)
+       +  REF_COST  (enc_mb.lambda_mf, (short)best_ref[LIST_1], cur_list + LIST_1)) 
+       : (int) (2 * (enc_mb.lambda_me * (min((short)best_ref[LIST_0], 1) + min((short)best_ref[LIST_1], 1)))));    
+     bmcost[list] += BIDPartitionCost (mode, block, (short)best_ref[LIST_0], (short)best_ref[LIST_1], enc_mb.lambda_mf);    
+   }
+   else
+   {
+     bmcost[list]  = (input->rdopt 
+       ? (REF_COST (enc_mb.lambda_mf, 0, cur_list) 
+       +  REF_COST (enc_mb.lambda_mf, 0, cur_list + LIST_1)) 
+       : (int) (4 * enc_mb.lambda_me));
+     bmcost[list] += BPredPartitionCost(mode, block, 0, 0, enc_mb.lambda_mf, !(list&1));                
+   }
+ }  
+ 
+ int compute_ref_cost(RD_PARAMS enc_mb, int ref, int list)
+ {  
+   return WEIGHTED_COST(enc_mb.lambda_mf,((listXsize[enc_mb.list_offset[list]] <= 1)? 0:refbits[ref]));
+ }
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    Determination of prediction list based on simple distortion computation
+ *************************************************************************************
+ */
+ void determine_prediction_list(int mode, int bmcost[5], char best_ref[2], short *best_pdir, int *cost, short *bi_pred_me)
+ {  
+   if ((!input->BiPredMotionEstimation) || (mode != 1))
+   {               
+     //--- get prediction direction ----
+     if  (bmcost[LIST_0] <= bmcost[LIST_1] 
+       && bmcost[LIST_0] <= bmcost[BI_PRED])
+     {
+       *best_pdir = 0;
+ //      if(*cost!=INT_MAX)
+         *cost += bmcost[LIST_0];
+       //best_ref[LIST_1] = 0;
+     }
+     else if (bmcost[LIST_1] <= bmcost[LIST_0] 
+       &&     bmcost[LIST_1] <= bmcost[BI_PRED])
+     {
+       *best_pdir = 1;
+ //      if(*cost!=INT_MAX)
+         *cost += bmcost[LIST_1];
+       //best_ref[LIST_0] = 0;
+     }
+     else
+     {
+       *best_pdir = 2;
+ //      if((bmcost[BI_PRED]==INT_MAX) | (*cost==INT_MAX))
+ //        *cost=INT_MAX;
+ //      else
+         *cost += bmcost[BI_PRED];
+       //best_ref[LIST_1] = 0;
+     }
+   }
+   else
+   {                            
+     img->bi_pred_me[mode]=0;
+     *bi_pred_me = 0;
+     //--- get prediction direction ----
+     if  (bmcost[LIST_0] <= bmcost[LIST_1] 
+       && bmcost[LIST_0] <= bmcost[BI_PRED] 
+       && bmcost[LIST_0] <= bmcost[BI_PRED_L0] 
+       && bmcost[LIST_0] <= bmcost[BI_PRED_L1])
+     {
+       *best_pdir = 0;
+       *cost += bmcost[LIST_0];
+       //best_ref[LIST_1] = 0;
+     }
+     else if (bmcost[LIST_1] <= bmcost[LIST_0] 
+       &&     bmcost[LIST_1] <= bmcost[BI_PRED] 
+       &&     bmcost[LIST_1] <= bmcost[BI_PRED_L0] 
+       &&     bmcost[LIST_1] <= bmcost[BI_PRED_L1])
+     {
+       *best_pdir = 1;
+       *cost += bmcost[LIST_1];
+       //best_ref[LIST_0] = 0;
+     }
+     else if (bmcost[BI_PRED] <= bmcost[LIST_0] 
+       &&     bmcost[BI_PRED] <= bmcost[LIST_1] 
+       &&     bmcost[BI_PRED] <= bmcost[BI_PRED_L0] 
+       &&     bmcost[BI_PRED] <= bmcost[BI_PRED_L1])
+     {
+       *best_pdir = 2;
+       *cost += bmcost[BI_PRED];
+       //best_ref[LIST_1] = 0;  
+     }
+     else if (bmcost[BI_PRED_L0] <= bmcost[LIST_0] 
+       &&     bmcost[BI_PRED_L0] <= bmcost[LIST_1] 
+       &&     bmcost[BI_PRED_L0] <= bmcost[BI_PRED]
+       &&     bmcost[BI_PRED_L0] <= bmcost[BI_PRED_L1])
+     {
+       *best_pdir = 2;
+       *cost += bmcost[BI_PRED_L0];                  
+       *bi_pred_me = 1;
+       img->bi_pred_me[mode]=1;
+       best_ref[LIST_1] = 0;
+       best_ref[LIST_0] = 0;
+     }
+     else
+     {
+       *best_pdir = 2;
+       *cost += bmcost[BI_PRED_L1];                                  
+       *bi_pred_me = 2;
+       best_ref[LIST_1] = 0;
+       best_ref[LIST_0] = 0;
+       img->bi_pred_me[mode]=2;
+     }               
+   }
+ }
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    RD decision process
+ *************************************************************************************
+ */
+ void compute_mode_RD_cost(int mode, 
+                           Macroblock *currMB, 
+                           RD_PARAMS enc_mb, 
+                           double *min_rdcost, 
+                           double *min_rate, 
+                           int i16mode, 
+                           short bslice, 
+                           short *inter_skip)
+ {
+   //--- transform size ---           
+   currMB->luma_transform_size_8x8_flag = input->Transform8x8Mode==2
+     ?  (mode >= 1 && mode <= 3)
+     || (mode == 0 && bslice && active_sps->direct_8x8_inference_flag)
+     || ((mode == P8x8) && (enc_mb.valid[4]))
+     :  0;
+   
+   SetModesAndRefframeForBlocks (mode);
+   
+   // Encode with coefficients
+   img->NoResidueDirect = 0;  
+   if (currMB->c_ipred_mode == DC_PRED_8 || (IS_INTRA(currMB) ))
+   {
+     while(1)
+     {
+       if (RDCost_for_macroblocks (enc_mb.lambda_md, mode, min_rdcost, min_rate, i16mode))
+       {
+         //Rate control
+         if (input->RCEnable)
+         {
+           if(mode == P8x8)
+             rc_store_diff(img->opix_x,img->opix_y,
+             currMB->luma_transform_size_8x8_flag == 1 ? tr8x8.mpr8x8 : tr4x4.mpr8x8);
+           else
+             rc_store_diff(img->opix_x, img->opix_y, pred);
+         }      
+         store_macroblock_parameters (mode);
+         
+         if(input->rdopt==2 && mode == 0 && input->EarlySkipEnable)
+         {
+           // check transform quantized coeff.
+           if(currMB->cbp == 0)
+             *inter_skip = 1;
+         }
+         
+       }
+       
+       // Go through transform modes. 
+       // Note that if currMB->cbp is 0 one could choose to skip 8x8 mode
+       // although this could be due to deadzoning decisions.
+       //if (input->Transform8x8Mode==1 && currMB->cbp!=0) 
+       if (input->Transform8x8Mode==1)
+       {
+         //=========== try mb_types 1,2,3 with 8x8 transform ===========
+         if ((mode >= 1 && mode <= 3) && currMB->luma_transform_size_8x8_flag == 0)
+         {
+           //try with 8x8 transform size
+           currMB->luma_transform_size_8x8_flag = 1;
+           continue;
+         }
+         //=========== try DIRECT-MODE with 8x8 transform ===========
+         else if (mode == 0 && bslice && active_sps->direct_8x8_inference_flag && currMB->luma_transform_size_8x8_flag == 0)
+         {
+           //try with 8x8 transform size
+           currMB->luma_transform_size_8x8_flag = 1;
+           continue;
+         }
+         //=========== try mb_type P8x8 for mode 4 with 4x4/8x8 transform ===========
+         else if ((mode == P8x8) && (enc_mb.valid[4]) && (currMB->luma_transform_size_8x8_flag == 0))
+         {
+           currMB->luma_transform_size_8x8_flag = 1; //check 8x8 partition for transform size 8x8
+           continue;
+         }
+         else
+         {
+           currMB->luma_transform_size_8x8_flag = 0;
+           break;
+         }
+       }
+       else
+         break;
+     }
+   }
+   
+   // Encode with no coefficients. Currently only for direct. This could be extended to all other modes as in example.
+   //if (mode < P8x8 && (*inter_skip == 0) && enc_mb.valid[mode] && currMB->cbp && (currMB->cbp&15) != 15 && !input->nobskip)
+   if ( bslice && mode == 0 && (*inter_skip == 0) && enc_mb.valid[mode] 
+     && currMB->cbp && (currMB->cbp&15) != 15 && !input->nobskip) 
+   {
+     img->NoResidueDirect = 1;
+     if (RDCost_for_macroblocks (enc_mb.lambda_md, mode, min_rdcost, min_rate, i16mode)) 
+     {
+       //Rate control
+       if (input->RCEnable)
+         rc_store_diff(img->opix_x,img->opix_y,pred);
+       
+       store_macroblock_parameters (mode);
+     }
+   }
+ };
+ 
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    Mode Decision for an 8x8 sub-macroblock
+ *************************************************************************************
+ */
+ void submacroblock_mode_decision(RD_PARAMS enc_mb, 
+                                  RD_8x8DATA *dataTr, 
+                                  Macroblock *currMB,
+                                  int ***cofACtr, 
+                                  int *have_direct, 
+                                  short bslice, 
+                                  int block, 
+                                  int *cost_direct,                                  
+                                  int *cost,
+                                  int *cost8x8_direct,
+                                  int transform8x8)  
+ {
+   int j0, i0, j1, i1;
+   int i,j, k;
+   int min_cost8x8, index;
+   double min_rdcost, rdcost = 0.0;
+   short best_pdir = 0;
+   char best_ref[2] = {0, -1};
+   int mode;
+   int64 curr_cbp_blk;
+   int direct4x4_tmp, direct8x8_tmp;
+   int bmcost[5] = {INT_MAX};
+   int cnt_nonz = 0;
+   short pdir;
+   int dummy;
+   short bi_pred_me;
+   int best_cnt_nonz = 0;
+   int maxindex =  (transform8x8) ? 2 : 5;
+   int pix_x, pix_y;
+   int block_x, block_y;
+   
+   int fadjust[16][16], fadjustCr[2][16][16];    
+   int (*fadjustTransform)[16][16] = transform8x8? img->fadjust8x8 : img->fadjust4x4;
+   int (*fadjustTransformCr)[2][16][16] = transform8x8? img->fadjust8x8Cr : img->fadjust4x4Cr;
+   int lumaAdjustIndex = transform8x8? 2 : 3;
+   int chromaAdjustIndex = transform8x8? 0 : 2;
+   
+   //--- set coordinates ---
+   j0 = ((block/2)<<3);    j1 = (j0>>2);
+   i0 = ((block%2)<<3);    i1 = (i0>>2);
+   
+ #ifdef BEST_NZ_COEFF
+   for(j = 0; j <= 1; j++)
+   {
+     for(i = 0; i <= 1; i++)
+       best_nz_coeff[i][j] = img->nz_coeff[img->current_mb_nr][i1 + i][j1 + j] = 0;
+   }
+ #endif
+   
+   if (transform8x8)
+     currMB->luma_transform_size_8x8_flag = 1; //switch to transform size 8x8
+   
+   //--- store coding state before coding ---
+   store_coding_state (cs_cm);
+ 
+   //=====  LOOP OVER POSSIBLE CODING MODES FOR 8x8 SUB-PARTITION  =====
+   for (min_cost8x8=INT_MAX, min_rdcost=1e30, index=(bslice?0:1); index<maxindex; index++)
+   {
+     mode = b8_mode_table[index]; 
+     *cost = 0;
+     if (enc_mb.valid[mode] && (transform8x8 == 0 || mode != 0 || (mode == 0 && active_sps->direct_8x8_inference_flag)))
+     {
+       curr_cbp_blk = 0;
+       
+       if (mode==0)
+       {
+         //--- Direct Mode ---                                      
+         if (!input->rdopt )
+         {
+           direct4x4_tmp=0; direct8x8_tmp=0;
+           direct4x4_tmp = Get_Direct_Cost8x8 ( block, &direct8x8_tmp);
+           
+           if ((direct4x4_tmp==INT_MAX)||(*cost_direct==INT_MAX))
+           {
+             *cost_direct = INT_MAX;
+             if (transform8x8) 
+               *cost8x8_direct = INT_MAX;
+           }
+           else
+           {
+             *cost_direct += direct4x4_tmp;
+             if (transform8x8) 
+               *cost8x8_direct += direct8x8_tmp;
+           }                     
+           *have_direct ++;
+           
+           if (transform8x8)
+           {
+             switch(input->Transform8x8Mode)
+             {
+             case 1: // Mixture of 8x8 & 4x4 transform
+               if((direct8x8_tmp < direct4x4_tmp) || !(enc_mb.valid[5] && enc_mb.valid[6] && enc_mb.valid[7]))
+                 *cost = direct8x8_tmp;
+               else
+                 *cost = direct4x4_tmp;
+               break;
+             case 2: // 8x8 Transform only
+               *cost = direct8x8_tmp;
+               break;
+             default: // 4x4 Transform only
+               *cost = direct4x4_tmp;
+               break;
+             }            
+             if (input->Transform8x8Mode==2) 
+               *cost = INT_MAX;                                       
+           }
+           else
+           {
+             *cost = direct4x4_tmp;
+           }
+         }
+ 
+         block_x = img->block_x+(block&1)*2;
+         block_y = img->block_y+(block&2);
+         best_ref[LIST_0] = direct_ref_idx[LIST_0][block_y][block_x];
+         best_ref[LIST_1] = direct_ref_idx[LIST_1][block_y][block_x];
+         best_pdir   = direct_pdir[block_y][block_x];
+       } // if (mode==0)
+       else
+       {        
+         //======= motion estimation for all reference frames ========
+         //-----------------------------------------------------------
+         PartitionMotionSearch (mode, block, enc_mb.lambda_mf);
+         
+         //--- get cost and reference frame for LIST 0 prediction ---
+         bmcost[LIST_0] = INT_MAX;
+         list_prediction_cost(LIST_0, block, mode, enc_mb, bmcost, best_ref);
+         
+         //store LIST 0 reference index for every block
+         block_x = img->block_x+(block&1)*2;
+         block_y = img->block_y+(block&2);
+         for (j = block_y; j< block_y + 2; j++)
+         {
+           for (i = block_x; i < block_x + 2; i++)
+           {
+             enc_picture->ref_idx   [LIST_0][j][i] = best_ref[LIST_0];
+             enc_picture->ref_pic_id[LIST_0][j][i] = 
+               enc_picture->ref_pic_num[enc_mb.list_offset[LIST_0]][(short)best_ref[LIST_0]];
+           }
+         } 
+         
+         if (bslice)
+         {
+           //--- get cost and reference frame for LIST 1 prediction ---
+           bmcost[LIST_1] = INT_MAX;
+           list_prediction_cost(LIST_1, block, mode, enc_mb, bmcost, best_ref);
+           
+           // Compute bipredictive cost between best list 0 and best list 1 references
+           list_prediction_cost(BI_PRED, block, mode, enc_mb, bmcost, best_ref);
+           
+           //--- get prediction direction ----          
+           determine_prediction_list(mode, bmcost, best_ref, &best_pdir, cost, &bi_pred_me);
+           
+           //store backward reference index for every block
+           for (j = block_y; j< block_y + 2; j++)
+           {
+             for (i = block_x; i < block_x + 2; i++)
+             {
+               enc_picture->ref_idx[LIST_0][j][i] = best_ref[LIST_0];
+               enc_picture->ref_idx[LIST_1][j][i] = best_ref[LIST_1];
+             }
+           }
+         } // if (bslice)
+         else
+         {
+           best_pdir = 0;
+           *cost     = bmcost[LIST_0];
+         }
+       } // if (mode!=0)
+       
+       if (input->rdopt)
+       {
+         //--- get and check rate-distortion cost ---
+         rdcost = RDCost_for_8x8blocks (&cnt_nonz, &curr_cbp_blk, enc_mb.lambda_md,
+           block, mode, best_pdir, best_ref[LIST_0], best_ref[LIST_1]);
+       }
+       else
+       {
+         if (*cost!=INT_MAX)
+           *cost += (REF_COST (enc_mb.lambda_mf, B8Mode2Value (mode, best_pdir),
+           enc_mb.list_offset[(best_pdir<1?LIST_0:LIST_1)]) - 1);
+       }
+       
+       //--- set variables if best mode has changed ---
+       if ( ( input->rdopt && rdcost < min_rdcost) 
+         || (!input->rdopt && *cost < min_cost8x8))
+       {
+         min_cost8x8                 = *cost;
+         min_rdcost                  = rdcost;
+         dataTr->part8x8mode [block] = mode;
+         dataTr->part8x8pdir [block] = best_pdir;
+         dataTr->part8x8fwref[block] = best_ref[LIST_0];
+         dataTr->part8x8bwref[block] = best_ref[LIST_1];
+         
+ #ifdef BEST_NZ_COEFF
+         for(j = 0; j <= 1; j++)
+         {
+           for(i = 0; i <= 1; i++)
+             best_nz_coeff[i][j]= cnt_nonz 
+             ? img->nz_coeff[img->current_mb_nr][i1 + i][j1 + j] : 0;
+         }
+ #endif
+         
+         //--- store number of nonzero coefficients ---
+         best_cnt_nonz  = cnt_nonz;
+         
+         if (input->rdopt)
+         {
+           //--- store block cbp ---
+           cbp_blk8x8    &= (~(0x33 << (((block>>1)<<3)+((block%2)<<1)))); // delete bits for block
+           cbp_blk8x8    |= curr_cbp_blk;
+           
+           //--- store coefficients ---
+           for (k=0; k< 4; k++)
+           {
+             for (j=0; j< 2; j++)
+               for (i=0; i<65; i++)  
+                 cofACtr[k][j][i] = img->cofAC[block][k][j][i]; // 18->65 for ABT
+           }   
+           //--- store reconstruction and prediction --- 
+           if(!img->residue_transform_flag)
+           {
+             for (j=j0; j<j0+8; j++)    
+             {
+               pix_y = img->pix_y + j;
+               for (i=i0; i<i0+8; i++)
+               {
+                 pix_x = img->pix_x + i;
+                 dataTr->rec_mbY8x8[j][i] = enc_picture->imgY[pix_y][pix_x];
+                 dataTr->mpr8x8[j][i] = img->mpr[j][i];
+               }     
+             }
+           }
+           else
+           {
+             for (j=j0; j<j0+8; j++)                
+               for (i=i0; i<i0+8; i++)
+               {
+                 dataTr->rec_resG_8x8  [j][i] = rec_resG  [j][i];
+                 dataTr->resTrans_R_8x8[j][i] = resTrans_R[j][i];
+                 dataTr->resTrans_B_8x8[j][i] = resTrans_B[j][i];
+                 dataTr->mprRGB_8x8 [0][j][i] = mprRGB [0][j][i];
+                 dataTr->mprRGB_8x8 [1][j][i] = mprRGB [1][j][i];
+                 dataTr->mprRGB_8x8 [2][j][i] = mprRGB [2][j][i];                  
+               }     
+           }
+         }                  
+         if (img->AdaptiveRounding)
+         {
+           for (j=j0; j<j0+8; j++)
+             for (i=i0; i<i0+8; i++)                    
+             {
+               fadjust     [j][i] = fadjustTransform  [0]   [j][i];
+               fadjustCr[0][j][i] = fadjustTransformCr[0][0][j][i];
+               fadjustCr[1][j][i] = fadjustTransformCr[0][1][j][i];
+             }
+         }
+         //--- store best 8x8 coding state ---
+         if (block < 3)
+         store_coding_state (cs_b8);
+       } // if (rdcost <= min_rdcost)
+       
+       //--- re-set coding state as it was before coding with current mode was performed ---
+ 
+       reset_coding_state (cs_cm);
+     } // if ((enc_mb.valid[mode] && (transform8x8 == 0 || mode != 0 || (mode == 0 && active_sps->direct_8x8_inference_flag)))
+   } // for (min_rdcost=1e30, index=(bslice?0:1); index<6; index++)  
+   
+ #ifdef BEST_NZ_COEFF
+   for(j = 0; j <= 1; j++)
+   {
+     for(i = 0; i <= 1; i++)
+       img->nz_coeff[img->current_mb_nr][i1 + i][j1 + j] = best_nz_coeff[i][j];
+   }
+ #endif              
+   
+   if (!transform8x8)
+     dataTr->cost8x8 += min_cost8x8;
+   
+   if (!input->rdopt)
+   {
+     if (transform8x8)
+     {
+       dataTr->cost8x8 += min_cost8x8;              
+       mode = dataTr->part8x8mode[block];
+       pdir = dataTr->part8x8pdir[block];
+     }
+     else
+     {
+       mode = dataTr->part8x8mode[block];
+       pdir = dataTr->part8x8pdir[block];    
+     }
+     curr_cbp_blk  = 0;
+     best_cnt_nonz = LumaResidualCoding8x8 (&dummy, &curr_cbp_blk, block, pdir,
+       (pdir==0||pdir==2?mode:0), (pdir==1||pdir==2?mode:0), dataTr->part8x8fwref[block], dataTr->part8x8bwref[block]);    
+     
+     cbp_blk8x8   &= (~(0x33 << (((block>>1)<<3)+((block%2)<<1)))); // delete bits for block
+     cbp_blk8x8   |= curr_cbp_blk;
+     
+     //--- store coefficients ---
+     for (k=0; k< 4; k++)
+     {
+         for (j=0; j< 2; j++)
+           memcpy(cofACtr[k][j],img->cofAC[block][k][j],65 * sizeof(int));
+     } 
+     
+     //--- store reconstruction and prediction ---
+     if(!img->residue_transform_flag) // Residue Color Transform
+     {
+       for (j=j0; j<j0+2* BLOCK_SIZE; j++)
+         {       
+         memcpy(&dataTr->rec_mbY8x8[j][i0], &enc_picture->imgY[img->pix_y + j][img->pix_x + i0], 2* BLOCK_SIZE * sizeof (imgpel));
+         memcpy(&dataTr->mpr8x8[j][i0], &img->mpr[j][i0], 2* BLOCK_SIZE * sizeof (imgpel));
+       }
+     }
+     else 
+     {   
+       for (j=j0; j<j0+8; j++)
+         for (i=i0; i<i0+8; i++)
+         { 
+           dataTr->rec_resG_8x8  [j][i] = rec_resG  [j][i];
+           dataTr->resTrans_R_8x8[j][i] = resTrans_R[j][i];
+           dataTr->resTrans_B_8x8[j][i] = resTrans_B[j][i];          
+           dataTr->mprRGB_8x8 [0][j][i] = mprRGB [0][j][i];
+           dataTr->mprRGB_8x8 [1][j][i] = mprRGB [1][j][i];
+           dataTr->mprRGB_8x8 [2][j][i] = mprRGB [2][j][i];
+         }
+     }              
+   }
+   
+   //----- set cbp and count of nonzero coefficients ---
+   if (best_cnt_nonz)
+   {
+     cbp8x8       |= (1 << block);
+     cnt_nonz_8x8 += best_cnt_nonz;
+   }
+   
+   if (!transform8x8)
+   {
+     if (block<3)
+     {
+       //===== re-set reconstructed block =====
+       j0   = 8*(block/2);
+       i0   = 8*(block%2);
+       for (j=j0; j<j0 + 2 * BLOCK_SIZE; j++)
+         {
+         memcpy(&enc_picture->imgY[img->pix_y + j][img->pix_x], dataTr->rec_mbY8x8[j], 2 * BLOCK_SIZE * sizeof(imgpel));
+       }
+     } // if (block<3)    
+   }  
+   else
+   {  
+     //======= save motion data for 8x8 partition for transform size 8x8 ========
+     StoreNewMotionVectorsBlock8x8(0, block, dataTr->part8x8mode[block], dataTr->part8x8fwref[block], dataTr->part8x8bwref[block], dataTr->part8x8pdir[block], bslice);
+   }  
+   //===== set motion vectors and reference frames (prediction) =====
+   SetRefAndMotionVectors (block, dataTr->part8x8mode[block], dataTr->part8x8pdir[block], dataTr->part8x8fwref[block], dataTr->part8x8bwref[block]);
+   
+   //===== set the coding state after current block =====
+   //if (transform8x8 == 0 || block < 3)
+   if (block < 3)
+     reset_coding_state (cs_b8);
+   
+   if (img->AdaptiveRounding)
+   {
+     for (j=j0; j<j0+2 * BLOCK_SIZE; j++)
+       {
+         memcpy(&fadjustTransform  [lumaAdjustIndex  ]   [j][i0], &fadjust     [j][i0], 2 * BLOCK_SIZE * sizeof(int));
+         memcpy(&fadjustTransformCr[chromaAdjustIndex][0][j][i0], &fadjustCr[0][j][i0], 2 * BLOCK_SIZE * sizeof(int));
+         memcpy(&fadjustTransformCr[chromaAdjustIndex][1][j][i0], &fadjustCr[1][j][i0], 2 * BLOCK_SIZE * sizeof(int));
+       }
+   }
+ }
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    Mode Decision for a macroblock
+ *************************************************************************************
+ */
+ void encode_one_macroblock ()
+ {  
+   int max_index;
+   
+   int         rerun, block, index, mode, i, j, k, ctr16x16, dummy;
+   short       best_pdir;
+   RD_PARAMS   enc_mb;
+   double      min_rdcost, max_rdcost=1e30;
+   char        best_ref[2] = {0, -1};
+   int         bmcost[5] = {INT_MAX};
+   int         cost=0;
+   int         min_cost = INT_MAX, cost_direct=0, have_direct=0, i16mode;
+   int         intra1 = 0;
+   int         temp_cpb = 0;
+   int         best_transform_flag = 0;
+   int         cost8x8_direct = 0;  
+   short       islice      = (img->type==I_SLICE);
+   short       bslice      = (img->type==B_SLICE);
+   short       pslice      = (img->type==P_SLICE) || (img->type==SP_SLICE);
+   short       intra       = (islice || (pslice && img->mb_y==img->mb_y_upd && img->mb_y_upd!=img->mb_y_intra));
+   
+   short       runs        = (input->RestrictRef==1 && input->rdopt==3 && (pslice  || (bslice && img->nal_reference_idc>0)) ? 2 : 1);
+   
+   int         pix_x, pix_y;
+   Macroblock* currMB      = &img->mb_data[img->current_mb_nr];
+   Macroblock* prevMB      = img->current_mb_nr ? &img->mb_data[img->current_mb_nr-1]:NULL ;
+   
+   char   **ipredmodes = img->ipredmode;
+   short   *allmvs = img->all_mv[0][0][0][0][0];
+   short   max_chroma_pred_mode;
+   int     ****i4p;  //for non-RD-opt. mode
+   
+ #ifdef BEST_NZ_COEFF
+   int best_nz_coeff[2][2];
+ #endif  
+   
+   int tmp_8x8_flag, tmp_no_mbpart;  
+   // Residue Color Transform
+   int residue_R, residue_G, residue_B, temp;
+   int cr_cbp = 0;  
+   // Fast Mode Decision
+   short inter_skip = 0, intra_skip = 0;
+   int cost16 = 0, mode16 = 0;
+   double min_rate = 0, RDCost16 = DBL_MAX;
+   
+   
+   if(input->FMEnable == 1)
+   {
+     decide_intrabk_SAD();
+   }
+   else if (input->FMEnable ==2)
+   {
+     simplified_decide_intrabk_SAD();
+   }
+   
+   intra |= RandomIntra (img->current_mb_nr);    // Forced Pseudo-Random Intra
+   
+   //===== Setup Macroblock encoding parameters =====
+   init_enc_mb_params(currMB, &enc_mb, intra, bslice);
+   
+   // Perform multiple encodings if rdopt with losses is enabled
+   for (rerun=0; rerun<runs; rerun++)
+   {
+     if (runs==2)
+       input->rdopt= (rerun==0) ? 1 : 3;
+     
+     // reset chroma intra predictor to default
+     currMB->c_ipred_mode = DC_PRED_8;
+ 
+     //=====   S T O R E   C O D I N G   S T A T E   =====
+     //---------------------------------------------------
+     store_coding_state (cs_cm);
+     
+     if (!intra)
+     {
+       //===== set direct motion vectors =====
+       best_mode = 1;
+       if (bslice)
+       {
+         Get_Direct_Motion_Vectors ();
+         if (input->rdopt == 2 && enc_mb.valid[0])
+         {
+           best_mode = 0;
+           currMB->c_ipred_mode=DC_PRED_8;
+           min_rdcost = max_rdcost;
+           compute_mode_RD_cost(0, currMB, enc_mb, &min_rdcost, &min_rate, i16mode, bslice, &inter_skip);
+         }
+       }
+       
+       //===== MOTION ESTIMATION FOR 16x16, 16x8, 8x16 BLOCKS =====      
+       for (min_cost=INT_MAX, mode=1; mode<4; mode++)
+       {
+         bi_pred_me = 0;
+         img->bi_pred_me[mode]=0;
+         if (enc_mb.valid[mode] && !inter_skip)
+         {
+           for (cost=0, block=0; block<(mode==1?1:2); block++)
+           {
+             PartitionMotionSearch (mode, block, enc_mb.lambda_mf);
+             //--- set 4x4 block indizes (for getting MV) ---
+             j = (block==1 && mode==2 ? 2 : 0);
+             i = (block==1 && mode==3 ? 2 : 0);
+             
+             //--- get cost and reference frame for List 0 prediction ---
+             bmcost[LIST_0] = INT_MAX;
+             list_prediction_cost(LIST_0, block, mode, enc_mb, bmcost, best_ref);
+             
+             if (bslice)
+             {
+               //--- get cost and reference frame for List 1 prediction ---
+               bmcost[LIST_1] = INT_MAX;
+               list_prediction_cost(LIST_1, block, mode, enc_mb, bmcost, best_ref);
+               
+               // Compute bipredictive cost between best list 0 and best list 1 references
+               list_prediction_cost(BI_PRED, block, mode, enc_mb, bmcost, best_ref);
+               
+               // Finally, if mode 16x16, compute cost for bipredictive ME vectore
+               if (input->BiPredMotionEstimation && mode == 1)
+               {                
+                 list_prediction_cost(BI_PRED_L0, block, mode, enc_mb, bmcost, 0);
+                 list_prediction_cost(BI_PRED_L1, block, mode, enc_mb, bmcost, 0);
+               }
+               else
+               {
+                 bmcost[BI_PRED_L0] = INT_MAX;
+                 bmcost[BI_PRED_L1] = INT_MAX;
+               }
+               
+               // Determine prediction list based on mode cost
+               determine_prediction_list(mode, bmcost, best_ref, &best_pdir, &cost, &bi_pred_me);
+             }
+             else // if (bslice)
+             {
+               best_pdir  = 0;
+               cost      += bmcost[LIST_0];
+             }
+             
+             assign_enc_picture_params(mode, best_pdir, block, enc_mb.list_offset[LIST_0], best_ref[LIST_0], best_ref[LIST_1], bslice);
+             
+             //----- set reference frame and direction parameters -----
+             if (mode==3)
+             {
+               best8x8fwref [3][block  ] = best8x8fwref [3][  block+2] = best_ref[LIST_0];
+               best8x8pdir  [3][block  ] = best8x8pdir  [3][  block+2] = best_pdir;
+               best8x8bwref [3][block  ] = best8x8bwref [3][  block+2] = best_ref[LIST_1];
+             }
+             else if (mode==2)
+             {
+               best8x8fwref [2][2*block] = best8x8fwref [2][2*block+1] = best_ref[LIST_0];
+               best8x8pdir  [2][2*block] = best8x8pdir  [2][2*block+1] = best_pdir;
+               best8x8bwref [2][2*block] = best8x8bwref [2][2*block+1] = best_ref[LIST_1];
+             }
+             else
+             {
+               best8x8fwref [1][0] = best8x8fwref [1][1] = best8x8fwref [1][2] = best8x8fwref [1][3] = best_ref[LIST_0];
+               best8x8pdir  [1][0] = best8x8pdir  [1][1] = best8x8pdir  [1][2] = best8x8pdir  [1][3] = best_pdir;
+               best8x8bwref [1][0] = best8x8bwref [1][1] = best8x8bwref [1][2] = best8x8bwref [1][3] = best_ref[LIST_1];
+             }
+             
+             //--- set reference frames and motion vectors ---
+             if (mode>1 && block==0)
+               SetRefAndMotionVectors (block, mode, best_pdir, best_ref[LIST_0], best_ref[LIST_1]);            
+           } // for (block=0; block<(mode==1?1:2); block++)
+           
+           if(!input->rdopt)
+           {
+             currMB->luma_transform_size_8x8_flag = 0;
+             if (input->Transform8x8Mode) //for inter rd-off, set 8x8 to do 8x8 transform
+             {
+               SetModesAndRefframeForBlocks(mode);
+               currMB->luma_transform_size_8x8_flag = TransformDecision(-1, &cost);
+             }
+           }          
+           
+           if(input->rdopt == 2 && mode == 1)
+           {
+             if(pslice)
+               min_rdcost = max_rdcost;
+             
+             //=====   S T O R E   C O D I N G   S T A T E   =====
+             //---------------------------------------------------
+             //store_coding_state (cs_cm);
+ 
+             for (ctr16x16=0, k=0; k<1; k++)
+             {
+               i16mode = 0; 
+               
+               //--- for INTER16x16 check all prediction directions ---
+               if (bslice)
+               {
+                 best8x8pdir[1][0] = best8x8pdir[1][1] = best8x8pdir[1][2] = best8x8pdir[1][3] = ctr16x16;
+                 if (ctr16x16 < 2) k--;
+                 ctr16x16++;
+               }
+               
+               currMB->c_ipred_mode=DC_PRED_8;
+               compute_mode_RD_cost(mode, currMB, enc_mb, &min_rdcost, &min_rate, i16mode, bslice, &inter_skip);
+             } // for (ctr16x16=0, k=0; k<1; k++)
+             
+             if(pslice)
+             {
+               // Get SKIP motion vector and compare SKIP_MV with best motion vector of 16x16
+               FindSkipModeMotionVector ();
+               if(input->EarlySkipEnable)
+               {
+                 //===== check for SKIP mode =====
+                 if ( currMB->cbp==0 && enc_picture->ref_idx[LIST_0][img->block_y][img->block_x]==0 &&
+                   enc_picture->mv[LIST_0][img->block_y][img->block_x][0]==allmvs[0] &&
+                   enc_picture->mv[LIST_0][img->block_y][img->block_x][1]==allmvs[1]               )
+                 {
+                   inter_skip = 1;
+                   best_mode = 0;
+                 }
+               } // if(input->EarlySkipEnable)
+             }
+             
+             // store variables.
+             RDCost16 = min_rdcost;
+             mode16 = best_mode;
+             cost16 = cost;
+           } // if(input->rdopt == 2 && mode == 1)
+           
+           if ((!inter_skip) && (cost < min_cost))
+           {
+             best_mode = mode;
+             min_cost  = cost;
+             best_transform_flag = currMB->luma_transform_size_8x8_flag;
+           }
+         } // if (enc_mb.valid[mode])
+       } // for (mode=1; mode<4; mode++)
+       
+       if ((!inter_skip) && enc_mb.valid[P8x8])
+       {
+         tr8x8.cost8x8 = INT_MAX;
+         tr4x4.cost8x8 = INT_MAX;
+         //===== store coding state of macroblock =====
+         store_coding_state (cs_mb);
+         
+         currMB->all_blk_8x8 = -1;
+         
+         if (input->Transform8x8Mode)
+         {  
+           tr8x8.cost8x8 = 0;
+           //===========================================================
+           // Check 8x8 partition with transform size 8x8 
+           //===========================================================
+           //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
+           for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block=0; block<4; block++)
+             submacroblock_mode_decision(enc_mb, &tr8x8, currMB, cofAC_8x8ts[block],
+             &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 1);
+           
+           // following params could be added in RD_8x8DATA structure
+           cbp8_8x8ts      = cbp8x8;
+           cbp_blk8_8x8ts  = cbp_blk8x8;
+           cnt_nonz8_8x8ts = cnt_nonz_8x8;
+           currMB->luma_transform_size_8x8_flag = 0; //switch to 4x4 transform size
+ 
+           //--- re-set coding state (as it was before 8x8 block coding) ---
+           //reset_coding_state (cs_mb);        
+         }// if (input->Transform8x8Mode)
+         
+ 
+         if (input->Transform8x8Mode != 2)  
+         {
+           tr4x4.cost8x8 = 0;
+           //=================================================================
+           // Check 8x8, 8x4, 4x8 and 4x4 partitions with transform size 4x4
+           //=================================================================
+           //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
+           for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block=0; block<4; block++)
+           {
+             submacroblock_mode_decision(enc_mb, &tr4x4, currMB, cofAC8x8[block],
+               &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 0);
+             
+             best8x8mode       [block] = tr4x4.part8x8mode [block];
+             best8x8pdir [P8x8][block] = tr4x4.part8x8pdir [block];
+             best8x8fwref[P8x8][block] = tr4x4.part8x8fwref[block];
+             best8x8bwref[P8x8][block] = tr4x4.part8x8bwref[block];
+           }          
+           //--- re-set coding state (as it was before 8x8 block coding) ---
+           // reset_coding_state (cs_mb);  
+         }// if (input->Transform8x8Mode != 2)
+         
+         //--- re-set coding state (as it was before 8x8 block coding) ---
+         reset_coding_state (cs_mb);
+         
+         
+         // This is not enabled yet since mpr has reverse order.
+         if (input->RCEnable)
+           rc_store_diff(img->opix_x,img->opix_y,img->mpr);
+         
+         //check cost for P8x8 for non-rdopt mode
+         if (!input->rdopt && (tr4x4.cost8x8 < min_cost || tr8x8.cost8x8 < min_cost))
+         {
+           best_mode = P8x8;
+           if (input->Transform8x8Mode)
+           {
+             if (tr8x8.cost8x8 < tr4x4.cost8x8)
+             {
+               min_cost = tr8x8.cost8x8;
+               currMB->luma_transform_size_8x8_flag=1;
+             }
+             else if(tr4x4.cost8x8 < tr8x8.cost8x8)
+             {
+               min_cost = tr4x4.cost8x8;
+               currMB->luma_transform_size_8x8_flag=0;
+             }
+             else
+             {
+               min_cost = tr8x8.cost8x8;
+               currMB->luma_transform_size_8x8_flag=1;
+               if (GetBestTransformP8x8() == 0)
+               {
+                 min_cost = tr4x4.cost8x8;
+                 currMB->luma_transform_size_8x8_flag=0;
+               }
+             }
+           }
+           else
+           {
+             min_cost = tr4x4.cost8x8;
+             currMB->luma_transform_size_8x8_flag=0;
+           }
+         }// if (!input->rdopt && (tr4x4.cost8x8 < min_cost || tr8x8.cost8x8 < min_cost))
+       }
+       else // if (enc_mb.valid[P8x8])
+       {
+         tr4x4.cost8x8 = INT_MAX;
+       }
+       
+       // Find a motion vector for the Skip mode
+       if(input->rdopt != 2 && pslice)
+         FindSkipModeMotionVector ();
+     }   
+     else // if (!intra)
+     {
+       min_cost = INT_MAX;
+     }
+     
+     //========= C H O O S E   B E S T   M A C R O B L O C K   M O D E =========
+     //-------------------------------------------------------------------------
+    if (input->rdopt)
+    {
+     if (!inter_skip)
+     {
+         int mb_available_up;
+         int mb_available_left;
+         int mb_available_up_left;
+         
+         if(input->rdopt == 2 && img->type!=I_SLICE)
+         {
+           min_rdcost = RDCost16;
+           best_mode  = mode16;
+         }
+         else
+           min_rdcost = max_rdcost;
+         
+         // if Fast High mode, compute  inter modes separate process for inter/intra
+         max_index = (img->residue_transform_flag) ? 11 : ((input->rdopt == 2 && !intra && input->SelectiveIntraEnable ) ? 5 : 9);
+         
+         if (input->BiPredMotionEstimation)
+           img->bi_pred_me[1] =0;  
+         
+         if (img->yuv_format != YUV400 && max_index != 5)
+         {
+           // precompute all new chroma intra prediction modes
+           IntraChromaPrediction(&mb_available_up, &mb_available_left, &mb_available_up_left);
+           max_chroma_pred_mode = PLANE_8;
+         }
+         else
+           max_chroma_pred_mode = DC_PRED_8;
+         
+         for (currMB->c_ipred_mode=DC_PRED_8; currMB->c_ipred_mode<=max_chroma_pred_mode; currMB->c_ipred_mode++)
+         {
+           // bypass if c_ipred_mode is not allowed
+           if ( (img->yuv_format != YUV400) &&
+             (  ((!intra || !input->IntraDisableInterOnly) && input->ChromaIntraDisable == 1 && currMB->c_ipred_mode!=DC_PRED_8) 
+             || (currMB->c_ipred_mode == VERT_PRED_8 && !mb_available_up) 
+             || (currMB->c_ipred_mode == HOR_PRED_8  && !mb_available_left) 
+             || (currMB->c_ipred_mode == PLANE_8     && (!mb_available_left || !mb_available_up || !mb_available_up_left))))
+             continue;        
+           
+           //===== GET BEST MACROBLOCK MODE =====
+           for (ctr16x16=0, index=0; index < max_index; index++)
+           {
+             mode = mb_mode_table[index];
+             
+             if (img->yuv_format != YUV400)
+             {           
+               if (input->rdopt == 2)
+               {
+                 i16mode = 0;              
+                 // RDcost of mode 1 in P-slice and mode 0, 1 in B-slice are already available
+                 if(((bslice && mode == 0) || (!islice && mode == 1)))
+                   continue;
+               }
+               else
+               {
+                 // Residue Color Transform
+                 if(img->residue_transform_flag)
+                 {
+                   mode = mb_mode_table_RCT[index];
+                   if( mode == I16MB) 
+                     i16mode = index -5;
+                   // bypass if i16mode is not allowed
+                   if (mode == I16MB &&
+                     (  (i16mode==VERT_PRED_16 && !mb_available_up) 
+                     || (i16mode==HOR_PRED_16  && !mb_available_left) 
+                     || (i16mode==PLANE_16    && (!mb_available_left || !mb_available_up || !mb_available_up_left))))
+                     continue;
+                 }
+                 else
+                 {
+                   mode = mb_mode_table[index];
+                   i16mode = 0; 
+                 }
+               }
+             }
+             //--- for INTER16x16 check all prediction directions ---
+             if (mode==1 && bslice)
+             {
+               best8x8pdir[1][0] = best8x8pdir[1][1] = best8x8pdir[1][2] = best8x8pdir[1][3] = ctr16x16;
+               
+               if ( (bslice) && (input->BiPredMotionEstimation) 
+                 && (ctr16x16 == 2 && img->bi_pred_me[mode] < 2 && mode == 1))
+                 ctr16x16--;
+               if (ctr16x16 < 2) 
+                 index--;
+               ctr16x16++;
+             }
+             
+             // Skip intra modes in inter slices if best inter mode is 
+             // a MB partition and cbp is 0.
+             if (input->SkipIntraInInterSlices && !intra && mode >= I16MB 
+               && best_mode <=3 && currMB->cbp == 0)
+               continue;
+             
+             if (enc_mb.valid[mode])
+               compute_mode_RD_cost(mode, currMB, enc_mb, &min_rdcost, &min_rate, i16mode, bslice, &inter_skip);
+             
+             if ((input->BiPredMotionEstimation) && (bslice) && ctr16x16 == 2 
+               && img->bi_pred_me[mode] < 2 && mode == 1 && best8x8pdir[1][0] == 2) 
+               img->bi_pred_me[mode] = img->bi_pred_me[mode] + 1;
+           }// for (ctr16x16=0, index=0; index<max_index; index++)
+         }// for (currMB->c_ipred_mode=DC_PRED_8; currMB->c_ipred_mode<=max_chroma_pred_mode; currMB->c_ipred_mode++)
+         
+         // Selective Intra Coding
+         if(img->type!=I_SLICE && input->rdopt == 2 && input->SelectiveIntraEnable && input->ProfileIDC<FREXT_HP)
+         {
+           fast_mode_intra_decision(&intra_skip, min_rate);
+           
+           if(!intra_skip)
+           {
+             // precompute all new chroma intra prediction modes
+             if (img->yuv_format != YUV400)
+             {
+               // precompute all new chroma intra prediction modes
+               IntraChromaPrediction(&mb_available_up, &mb_available_left, &mb_available_up_left);
+               max_chroma_pred_mode = PLANE_8;
+             }
+             else
+               max_chroma_pred_mode = DC_PRED_8;
+             
+             max_index = 9;
+             
+             for (currMB->c_ipred_mode=DC_PRED_8; currMB->c_ipred_mode<=max_chroma_pred_mode; currMB->c_ipred_mode++)
+             {
+               
+               // bypass if c_ipred_mode is not allowed
+               if ( (img->yuv_format != YUV400) &&
+                 (  ((!intra || !input->IntraDisableInterOnly) && input->ChromaIntraDisable == 1 && currMB->c_ipred_mode!=DC_PRED_8) 
+                 || (currMB->c_ipred_mode == VERT_PRED_8 && !mb_available_up) 
+                 || (currMB->c_ipred_mode == HOR_PRED_8  && !mb_available_left) 
+                 || (currMB->c_ipred_mode == PLANE_8     && (!mb_available_left || !mb_available_up || !mb_available_up_left))))
+                 continue;           
+               
+               //===== GET BEST MACROBLOCK MODE =====
+               for (index = 5; index < max_index; index++)
+               {
+                 mode = mb_mode_table[index];
+                 
+                 if (input->SkipIntraInInterSlices && !intra && mode >= I16MB 
+                   && best_mode <=3 && currMB->cbp == 0)
+                   continue;
+                 
+                 if (img->yuv_format != YUV400)
+                 {           
+                   if (input->rdopt == 2)
+                   {
+                     i16mode = 0;              
+                     // RDcost of mode 1 in P-slice and mode 0, 1 in B-slice are already available
+                     if(((bslice && mode == 0) || (!islice && mode == 1)))
+                       continue;
+                   }
+                   else
+                   {
+                     // Residue Color Transform
+                     if(img->residue_transform_flag)
+                     {
+                       mode = mb_mode_table_RCT[index];
+                       if( mode == I16MB) 
+                         i16mode = index -5;
+                       // bypass if i16mode is not allowed
+                       if (mode == I16MB &&
+                         (  (i16mode==VERT_PRED_16 && !mb_available_up) 
+                         || (i16mode==HOR_PRED_16  && !mb_available_left) 
+                         || (i16mode==PLANE_16    && (!mb_available_left || !mb_available_up || !mb_available_up_left))))
+                         continue;
+                     }
+                     else
+                     {
+                       mode = mb_mode_table[index];
+                       i16mode = 0; 
+                     }
+                   }
+                 }
+                 
+                 if (enc_mb.valid[mode])
+                   compute_mode_RD_cost(mode, currMB, enc_mb, &min_rdcost, &min_rate, i16mode, bslice, &inter_skip);                            
+               } // for (index = 5; index < max_index; index++)
+             }
+           }
+         }          
+       }
+    }
+    else //rdopt off
+    {
+      tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;	//save 8x8_flag
+      tmp_no_mbpart = currMB->NoMbPartLessThan8x8Flag;		  //save no-part-less
+      
+      if (img->yuv_format != YUV400)
+        // precompute all chroma intra prediction modes
+        IntraChromaPrediction(NULL, NULL, NULL);
+      
+      if (enc_mb.valid[0] && bslice) // check DIRECT MODE
+      {
+        if(have_direct)
+        {
+          switch(input->Transform8x8Mode)
+          {
+          case 1: // Mixture of 8x8 & 4x4 transform
+            cost = ((cost8x8_direct < cost_direct) || !(enc_mb.valid[5] && enc_mb.valid[6] && enc_mb.valid[7])) 
+              ? cost8x8_direct : cost_direct;
+            break;
+          case 2: // 8x8 Transform only
+            cost = cost8x8_direct;
+            break;
+          default: // 4x4 Transform only
+            cost = cost_direct;
+            break;
+          }
+        }
+        else
+        { //!have_direct
+          cost = Get_Direct_CostMB (enc_mb.lambda_mf);
+        }
+        if (cost!=INT_MAX)
+        {
+          cost -= (int)floor(16*enc_mb.lambda_me+0.4999);
+        }
+        
+        if (cost <= min_cost)
+        {
+          if(active_sps->direct_8x8_inference_flag && input->Transform8x8Mode)
+          {
+            if(input->Transform8x8Mode==2)
+              currMB->luma_transform_size_8x8_flag=1;
+            else
+            {
+              if(cost8x8_direct < cost_direct)
+                currMB->luma_transform_size_8x8_flag=1;
+              else
+                currMB->luma_transform_size_8x8_flag=0;
+            }
+          }
+          else
+            currMB->luma_transform_size_8x8_flag=0;
+          
+          //Rate control
+          if (input->RCEnable)
+            rc_store_diff(img->opix_x,img->opix_y,img->mpr);
+          
+          min_cost  = cost;
+          best_mode = 0;
+        }
+        else
+        {
+          currMB->luma_transform_size_8x8_flag = tmp_8x8_flag; // restore if not best
+          currMB->NoMbPartLessThan8x8Flag = tmp_no_mbpart; // restore if not best
+        }        
+      }
+      
+      if (enc_mb.valid[I8MB]) // check INTRA8x8
+      {
+        currMB->luma_transform_size_8x8_flag = 1; // at this point cost will ALWAYS be less than min_cost 
+        
+        currMB->mb_type = I8MB;
+        temp_cpb = Mode_Decision_for_new_Intra8x8Macroblock (enc_mb.lambda_md, &cost);
+        
+        if (cost <= min_cost)
+        {
+          // Residue Color Transform
+          if(img->residue_transform_flag)
+          {            
+            for(i=0; i<2; i++) 
+            {
+              for(j=0; j<4; j++) 
+                for(k=0; k<4; k++)
+                  if(cbp_chroma_block[i][j][k]) cr_cbp = 2;
+            }            
+            cr_cbp = dct_chroma_DC(0, cr_cbp);
+            cr_cbp = dct_chroma_DC(1, cr_cbp);
+            
+            temp_cpb += (cr_cbp<<4);
+            for(j=0; j<MB_BLOCK_SIZE; j++) 
+            {
+              pix_y = img->pix_y + j;
+              for(i=0; i<MB_BLOCK_SIZE; i++) 
+              {
+                pix_x = img->pix_x + i;
+                temp_imgU[j][i] = enc_picture->imgUV[0][pix_y][pix_x];
+                temp_imgV[j][i] = enc_picture->imgUV[1][pix_y][pix_x];
+              }
+            }
+          }          
+          currMB->cbp = temp_cpb;
+          
+          //coeffs
+          if (input->Transform8x8Mode != 2)
+          {
+            i4p=cofAC; cofAC=img->cofAC; img->cofAC=i4p;
+          }
+          
+          for(j=0; j<MB_BLOCK_SIZE; j++) 
+          {
+            pix_y = img->pix_y + j;
+            for(i=0; i<MB_BLOCK_SIZE; i++) 
+            {
+              pix_x = img->pix_x + i;
+              temp_imgY[j][i] = enc_picture->imgY[pix_y][pix_x];
+            }
+          }
+          
+          //Rate control
+          if (input->RCEnable)
+            rc_store_diff(img->opix_x,img->opix_y,img->mpr);
+          
+          min_cost  = cost;
+          best_mode = I8MB;
+          tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
+        } 
+        else
+          currMB->luma_transform_size_8x8_flag = tmp_8x8_flag; // restore if not best
+      }
+      
+      if (enc_mb.valid[I4MB]) // check INTRA4x4
+      {
+        currMB->luma_transform_size_8x8_flag = 0;
+        currMB->mb_type = I4MB;
+        temp_cpb = Mode_Decision_for_Intra4x4Macroblock (enc_mb.lambda_md, &cost);
+        
+        if (cost <= min_cost)
+        {
+          // Residue Color Transform
+          if(img->residue_transform_flag)
+          {
+            for(i=0; i<2; i++) 
+            { 
+              for(j=0; j<4; j++) 
+                for(k=0; k<4; k++) 
+                  if(cbp_chroma_block[i][j][k]) 
+                    cr_cbp = 2;
+            }
+            cr_cbp = dct_chroma_DC(0, cr_cbp);
+            cr_cbp = dct_chroma_DC(1, cr_cbp);
+            
+            temp_cpb += (cr_cbp<<4);
+          }
+          currMB->cbp = temp_cpb;
+          
+          //Rate control
+          if (input->RCEnable)
+            rc_store_diff(img->opix_x,img->opix_y,img->mpr);
+          
+          min_cost  = cost;
+          best_mode = I4MB;
+          tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
+        } 
+        else
+        {
+          currMB->luma_transform_size_8x8_flag = tmp_8x8_flag; // restore if not best
+          //coeffs
+          i4p=cofAC; cofAC=img->cofAC; img->cofAC=i4p;
+        }
+      }
+      if (enc_mb.valid[I16MB]) // check INTRA16x16
+      {
+        currMB->luma_transform_size_8x8_flag = 0;
+        intrapred_luma_16x16 ();
+        cost = find_sad_16x16 (&i16mode);
+        
+        if (cost < min_cost)
+        {
+          //Rate control
+          // should this access opix or pix?
+          if (input->RCEnable)
+            rc_store_diff(img->opix_x,img->opix_y,img->mprr_2[i16mode]);
+          
+          // Residue Color Transform
+          if(img->residue_transform_flag)
+          {
+            for (j = 0; j < MB_BLOCK_SIZE; j++) 
+            {
+              pix_y = img->pix_y + j;
+              for (i = 0; i < MB_BLOCK_SIZE; i++) 
+              {
+                pix_x = img->pix_x + i;
+                residue_G = imgY_org    [pix_y][pix_x] - img->mprr_2   [i16mode]             [j][i];
+                residue_B = imgUV_org[0][pix_y][pix_x] - img->mprr_c[0][currMB->c_ipred_mode][j][i];
+                residue_R = imgUV_org[1][pix_y][pix_x] - img->mprr_c[1][currMB->c_ipred_mode][j][i];                
+                /* Forward Residue Transform */
+                resTrans_R[j][i] = residue_R - residue_B;
+                temp             = residue_B + (resTrans_R[j][i] >> 1);
+                resTrans_B[j][i] = residue_G-temp;
+                resTrans_G[j][i] = temp+(resTrans_B[j][i] >> 1);                
+                img->m7[j][i]    = resTrans_G[j][i];
+              }
+            }
+          }
+          
+          best_mode   = I16MB;
+          currMB->cbp = dct_luma_16x16 (i16mode);
+          
+          // Residue Color Transform
+          if(img->residue_transform_flag)
+          {
+            for (j = 0; j < MB_BLOCK_SIZE; j++) 
+            {
+              for (i=0; i < MB_BLOCK_SIZE; i++) 
+              {
+                rec_resG[j][i] = img->m7[j][i];
+                img->m7[j][i]  = resTrans_B[j][i];
+              }
+            }
+            cr_cbp = dct_chroma(0, 0);
+            
+            for (j=0; j < MB_BLOCK_SIZE; j++) 
+            {
+              for (i=0; i < MB_BLOCK_SIZE; i++) 
+              {
+                rec_resB[j][i] = img->m7[j][i];
+                img->m7[j][i]  = resTrans_R[j][i];
+              }
+            } 
+            cr_cbp = dct_chroma(1, cr_cbp);
+            
+            for (j=0; j < MB_BLOCK_SIZE; j++) 
+            {
+              for (i=0; i < MB_BLOCK_SIZE; i++) 
+                rec_resR[j][i] = img->m7[j][i];
+            } 
+            currMB->cbp += (cr_cbp<<4);
+            
+            /* Inverse Residue Transform */
+            for (j=0; j < MB_BLOCK_SIZE; j++) 
+            {
+              pix_y = img->pix_y + j;
+              for (i=0; i < MB_BLOCK_SIZE; i++) 
+              {
+                pix_x = img->pix_x + i;
+                temp      = rec_resG[j][i] - (rec_resB[j][i] >> 1);
+                residue_G = rec_resB[j][i]+temp;
+                residue_B = temp - (rec_resR[j][i]>>1);
+                residue_R = residue_B+rec_resR[j][i];                
+                enc_picture->imgY    [pix_y][pix_x] = 
+                  min(img->max_imgpel_value   ,max(0, residue_G + (int) img->mprr_2[i16mode][j][i]));
+                enc_picture->imgUV[0][pix_y][pix_x] = 
+                  min(img->max_imgpel_value_uv,max(0, residue_B + (int) img->mprr_c[0][currMB->c_ipred_mode][j][i]));
+                enc_picture->imgUV[1][pix_y][pix_x] = 
+                  min(img->max_imgpel_value_uv,max(0, residue_R + (int) img->mprr_c[1][currMB->c_ipred_mode][j][i]));
+              }
+            }
+          }
+        }
+        else
+        {
+          currMB->luma_transform_size_8x8_flag = tmp_8x8_flag; // restore
+          currMB->NoMbPartLessThan8x8Flag = tmp_no_mbpart;     // restore
+        }
+      }
+      }    
+      if (rerun==0)
+        intra1 = IS_INTRA(currMB);
+   } // for (rerun=0; rerun<runs; rerun++) 
+   
+   //=====  S E T   F I N A L   M A C R O B L O C K   P A R A M E T E R S ======
+   //---------------------------------------------------------------------------  
+   if (input->rdopt)
+   {   
+     if ((cbp!=0 || best_mode==I16MB ))
+       currMB->prev_cbp = 1;    
+     else if (cbp==0 && !input->RCEnable)
+     {
+       currMB->delta_qp = 0;
+       currMB->qp = currMB->prev_qp;
+       img->qp = currMB->qp;
+       currMB->prev_cbp = 0;
+     }    
+     set_stored_macroblock_parameters ();
+   }
+   else
+   {
+     //===== set parameters for chosen mode =====
+     SetModesAndRefframeForBlocks (best_mode);
+     
+     if (best_mode==P8x8)
+     {
+       if (currMB->luma_transform_size_8x8_flag && (cbp8_8x8ts == 0) && input->Transform8x8Mode != 2)
+         currMB->luma_transform_size_8x8_flag = 0;
+       
+       SetCoeffAndReconstruction8x8 (currMB);
+       
+       memset(currMB->intra_pred_modes, DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
+       for (k=0, j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+         memset(&ipredmodes[j][img->block_x], DC_PRED, BLOCK_MULTIPLE * sizeof(char));
+     }
+     else
+     {
+       //===== set parameters for chosen mode =====
+       if (best_mode == I8MB)
+       {
+         memcpy(currMB->intra_pred_modes,currMB->intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));
+         for(j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+           memcpy(&img->ipredmode[j][img->block_x],&img->ipredmode8x8[j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+ 
+         //--- restore reconstruction for 8x8 transform ---
+         for(j=0; j<MB_BLOCK_SIZE; j++) 
+         {
+           memcpy(&enc_picture->imgY[img->pix_y + j][img->pix_x],temp_imgY[j], MB_BLOCK_SIZE * sizeof(imgpel));
+         }
+         
+         // Residue Color Transform
+         if(img->residue_transform_flag)
+         {          
+           for(j=0; j<MB_BLOCK_SIZE; j++) 
+           {
+             pix_y = img->pix_c_y + j;
+             for(i=0; i<MB_BLOCK_SIZE; i++) 
+             {
+               pix_x = img->pix_c_x + i;
+               enc_picture->imgUV[0][pix_y][pix_x] = temp_imgU[j][i] ;
+               enc_picture->imgUV[1][pix_y][pix_x] = temp_imgV[j][i] ;
+             }
+           }                
+         }
+       }
+       
+       if ((best_mode!=I4MB)&&(best_mode != I8MB))
+       {
+         memset(currMB->intra_pred_modes,DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
+         for(j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+           memset(&ipredmodes[j][img->block_x],DC_PRED, BLOCK_MULTIPLE * sizeof(char));
+ 
+         if (best_mode!=I16MB)
+         {
+           if((best_mode>=1) && (best_mode<=3))
+             currMB->luma_transform_size_8x8_flag = best_transform_flag;
+           LumaResidualCoding ();
+           
+           if((currMB->cbp==0)&&(best_mode==0))
+             currMB->luma_transform_size_8x8_flag = 0;
+           
+           //Rate control
+           if (input->RCEnable)
+             rc_store_diff(img->opix_x,img->opix_y,img->mpr);
+         }
+       }
+     }
+     //check luma cbp for transform size flag
+     if (((currMB->cbp&15) == 0) && !(IS_OLDINTRA(currMB) || currMB->mb_type == I8MB))
+       currMB->luma_transform_size_8x8_flag = 0;
+     
+     // precompute all chroma intra prediction modes
+     if (img->yuv_format != YUV400)
+       IntraChromaPrediction(NULL, NULL, NULL);
+ 
+     img->i16offset = 0;
+     dummy = 0;
+     
+     // Residue Color Transform
+     if ((!(img->residue_transform_flag && (best_mode==I4MB || best_mode==I16MB || best_mode==I8MB))) && img->yuv_format!=YUV400)
+       ChromaResidualCoding (&dummy);
+     if (best_mode==I16MB) {
+       img->i16offset = I16Offset  (currMB->cbp, i16mode);
+     }
+     SetMotionVectorsMB (currMB, bslice);
+     
+     //===== check for SKIP mode =====
+     if ((pslice) && best_mode==1 && currMB->cbp==0 &&
+       enc_picture->ref_idx[LIST_0][img->block_y][img->block_x]    == 0 &&
+       enc_picture->mv     [LIST_0][img->block_y][img->block_x][0] == allmvs[0] &&
+       enc_picture->mv     [LIST_0][img->block_y][img->block_x][1] == allmvs[1])
+     {
+       currMB->mb_type = currMB->b8mode[0] = currMB->b8mode[1] = currMB->b8mode[2] = currMB->b8mode[3] = 0;
+       currMB->luma_transform_size_8x8_flag = 0;
+     }
+     
+     if(img->MbaffFrameFlag)
+       set_mbaff_parameters();
+   }
+   
+   // Rate control
+   if(input->RCEnable)
+     update_rc(currMB, best_mode);
+   
+   rdopt->min_rdcost = input->rdopt ? min_rdcost : min_cost;
+   
+   if ( (img->MbaffFrameFlag)
+     && (img->current_mb_nr%2)
+     && (currMB->mb_type ? 0:((bslice) ? !currMB->cbp:1))  // bottom is skip
+     && (prevMB->mb_type ? 0:((bslice) ? !prevMB->cbp:1))
+     && !(field_flag_inference() == enc_mb.curr_mb_field)) // top is skip
+   {    
+     rdopt->min_rdcost = 1e30;  // don't allow coding of a MB pair as skip if wrong inference
+   }
+   
+   //===== Decide if this MB will restrict the reference frames =====
+   if (input->RestrictRef)
+     update_refresh_map(intra, intra1, currMB);  
+   
+   if(input->FMEnable == 1)
+   {
+     skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
+   }
+   else if(input->FMEnable == 2)
+   {
+     simplified_skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/mode_decision.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/mode_decision.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/mode_decision.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,87 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file
+  *    rdopt.h
+  *
+  * \author
+  *    Alexis Michael Tourapis
+  *
+  * \date
+  *    21. February 2005
+  *
+  * \brief
+  *    Headerfile for for rd-optimized mode decision
+  **************************************************************************
+  */
+ 
+ #ifndef _RD_OPT_H_
+ #define _RD_OPT_H_
+ 
+ #define KS_MV
+ 
+ extern CSptr cs_mb, cs_b8, cs_cm, cs_imb, cs_ib8, cs_ib4, cs_pc;
+ extern imgpel   mpr_8x8ts[16][16];
+ extern imgpel   rec_mbY[16][16], rec_mbU[16][16], rec_mbV[16][16];    // reconstruction values
+ extern RD_8x8DATA tr4x4, tr8x8;
+ 
+ extern const  int LEVELMVLIMIT[17][6];
+ extern int    ****cofAC_8x8ts;        // [8x8block][4x4block][level/run][scan_pos]
+ extern int    ****cofAC, ****cofAC8x8;        // [8x8block][4x4block][level/run][scan_pos]
+ extern int    QP2QUANT[40];
+ extern int    cbp_blk8x8;
+ extern int    cbp, cbp8x8, cnt_nonz_8x8;
+ extern int64  cbp_blk;
+ extern int64  cbp_blk8_8x8ts;
+ extern int    cbp8_8x8ts;
+ extern int    cnt_nonz8_8x8ts;
+ extern int    QP,QP2;
+ extern int    DELTA_QP,DELTA_QP2;
+ extern int    diffy[16][16];
+ 
+ // Residue Color Transform
+ extern int    cofAC4x4_chroma[2][2][18];
+ extern int    mprRGB_8x8[3][16][16], mprRGB_8x8ts[3][16][16];
+ extern char   b4_ipredmode[16], b4_intra_pred_modes[16];
+ 
+ extern short  bi_pred_me;
+ extern short  best_mode;
+ extern short  best8x8mode          [4]; // [block]
+ extern short  best8x8pdir [MAXMODE][4]; // [mode][block]
+ extern short  best8x8fwref[MAXMODE][4]; // [mode][block]
+ extern short  best8x8bwref[MAXMODE][4]; // [mode][block]
+ extern imgpel pred[16][16];
+ 
+ extern void   set_stored_macroblock_parameters ();
+ extern void   StoreMV8x8(int);
+ extern void   RestoreMV8x8(int);
+ extern void   store_macroblock_parameters (int);
+ extern void   SetModesAndRefframeForBlocks (int);
+ extern void   SetRefAndMotionVectors (int, int, int, int, int);
+ extern void   StoreNewMotionVectorsBlock8x8(int, int, int, int, int, int, int);
+ extern void   assign_enc_picture_params(int, int, int, int, int, int, int);
+ extern void   update_refresh_map(int intra, int intra1, Macroblock *currMB);
+ extern void   SetMotionVectorsMB (Macroblock*, int);
+ extern void   SetCoeffAndReconstruction8x8 (Macroblock*);
+ 
+ extern int    GetBestTransformP8x8();
+ extern int    I16Offset (int, int);
+ extern int    CheckReliabilityOfRef (int, int, int, int);
+ extern int    Mode_Decision_for_Intra4x4Macroblock (double, int*);
+ extern int    RDCost_for_macroblocks (double, int, double*, double*, int);
+ extern double RDCost_for_8x8blocks (int*, int64*, double, int, int, short, short, short);
+ 
+ extern const int  b8_mode_table[6];
+ extern const int  mb_mode_table[9];
+   // Residue Color Transform
+ extern const int  mb_mode_table_RCT[11];
+ 
+ void rc_store_diff(int cpix_x, int cpix_y, imgpel prediction[16][16]);
+ void update_rc(Macroblock *currMB, short best_mode);
+ void submacroblock_mode_decision(RD_PARAMS, RD_8x8DATA *, Macroblock *,int ***, int *, short, int, int *, int *, int *, int);
+ void init_enc_mb_params(Macroblock* currMB, RD_PARAMS *enc_mb, int intra, int bslice);
+ void list_prediction_cost(int list, int block, int mode, RD_PARAMS enc_mb, int bmcost[5], char best_ref[2]);
+ void determine_prediction_list(int, int [5], char [2], short *, int *, short *);
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/mv-search.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/mv-search.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/mv-search.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,3873 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file mv-search.c
+  *
+  * \brief
+  *    Motion Vector Search, unified for B and P Pictures
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *      - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+  *      - Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+  *      - Rickard Sjoberg                 <rickard.sjoberg at era.ericsson.se>
+  *      - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+  *      - Jani Lainema                    <jani.lainema at nokia.com>
+  *      - Detlev Marpe                    <marpe at hhi.de>
+  *      - Thomas Wedi                     <wedi at tnt.uni-hannover.de>
+  *      - Heiko Schwarz                   <hschwarz at hhi.de>
+  *      - Alexis Michael Tourapis         <alexismt at ieee.org>
+  *
+  *************************************************************************************
+ */
+ 
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ #include <math.h>
+ #include <limits.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ 
+ #include "image.h"
+ #include "mv-search.h"
+ #include "refbuf.h"
+ #include "memalloc.h"
+ #include "mb_access.h"
+ #include "fast_me.h"
+ #include "simplified_fast_me.h"
+ 
+ #include "epzs.h"
+ 
+ #include <time.h>
+ #include <sys/timeb.h>
+ 
+ // These procedure pointers are used by motion_search() and one_eigthpel()
+ static pel_t *(*PelYline_11) (pel_t *, int, int, int, int);
+ static pel_t *(*get_line) (pel_t**, int, int, int, int);
+ static pel_t *(*get_line_p1) (pel_t**, int, int, int, int);
+ static pel_t *(*get_line_p2) (pel_t**, int, int, int, int);
+ 
+ // Statistics, temporary
+ int     max_mvd;
+ short*  spiral_search_x;
+ short*  spiral_search_y;
+ short*  spiral_hpel_search_x;
+ short*  spiral_hpel_search_y;
+ 
+ int*    mvbits;
+ int*    refbits;
+ unsigned int*  byte_abs;
+ int**** motion_cost;
+ int     byte_abs_range;
+ 
+ void SetMotionVectorPredictor (short  pmv[2],
+                                char   **refPic,
+                                short  ***tmp_mv,
+                                short  ref_frame,
+                                int    list,
+                                int    block_x,
+                                int    block_y,
+                                int    blockshape_x,
+                                int    blockshape_y);
+ 
+ extern ColocatedParams *Co_located;
+ 
+ #ifdef _FAST_FULL_ME_
+ 
+ /*****
+  *****  static variables for fast integer motion estimation
+  *****
+  */
+ static int  **search_setup_done;  //!< flag if all block SAD's have been calculated yet
+ static int  **search_center_x;    //!< absolute search center for fast full motion search
+ static int  **search_center_y;    //!< absolute search center for fast full motion search
+ static int  **pos_00;             //!< position of (0,0) vector
+ static distpel  *****BlockSAD;        //!< SAD for all blocksize, ref. frames and motion vectors
+ static int  **max_search_range;
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    function creating arrays for fast integer motion estimation
+  ***********************************************************************
+  */
+ void
+ InitializeFastFullIntegerSearch ()
+ {
+   int  i, j, k, list;
+   int  search_range = input->search_range;
+   int  max_pos      = (2*search_range+1) * (2*search_range+1);
+   
+   if ((BlockSAD = (distpel*****)malloc (2 * sizeof(distpel****))) == NULL)
+     no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
+   
+   for (list=0; list<2;list++)
+   {
+     if ((BlockSAD[list] = (distpel****)malloc ((img->max_num_references) * sizeof(distpel***))) == NULL)
+       no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
+     for (i = 0; i < img->max_num_references; i++)
+     {
+       if ((BlockSAD[list][i] = (distpel***)malloc (8 * sizeof(distpel**))) == NULL)
+         no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
+       for (j = 1; j < 8; j++)
+       {
+         if ((BlockSAD[list][i][j] = (distpel**)malloc (16 * sizeof(distpel*))) == NULL)
+           no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
+         for (k = 0; k < 16; k++)
+         {
+           if ((BlockSAD[list][i][j][k] = (distpel*)malloc (max_pos * sizeof(distpel))) == NULL)
+             no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
+         }        
+       }
+     }
+   }
+   
+   if ((search_setup_done = (int**)malloc (2*sizeof(int)))==NULL)
+     no_mem_exit ("InitializeFastFullIntegerSearch: search_setup_done");
+   if ((search_center_x = (int**)malloc (2*sizeof(int)))==NULL)
+     no_mem_exit ("InitializeFastFullIntegerSearch: search_center_x");
+   if ((search_center_y = (int**)malloc (2*sizeof(int)))==NULL)
+     no_mem_exit ("InitializeFastFullIntegerSearch: search_center_y");
+   if ((pos_00 = (int**)malloc (2*sizeof(int)))==NULL)
+     no_mem_exit ("InitializeFastFullIntegerSearch: pos_00");
+   if ((max_search_range = (int**)malloc (2*sizeof(int)))==NULL)
+     no_mem_exit ("InitializeFastFullIntegerSearch: max_search_range");
+   
+   for (list=0; list<2; list++)
+   {
+     if ((search_setup_done[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
+       no_mem_exit ("InitializeFastFullIntegerSearch: search_setup_done");
+     if ((search_center_x[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
+       no_mem_exit ("InitializeFastFullIntegerSearch: search_center_x");
+     if ((search_center_y[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
+       no_mem_exit ("InitializeFastFullIntegerSearch: search_center_y");
+     if ((pos_00[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
+       no_mem_exit ("InitializeFastFullIntegerSearch: pos_00");
+     if ((max_search_range[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
+       no_mem_exit ("InitializeFastFullIntegerSearch: max_search_range");
+   }
+   
+   // assign max search ranges for reference frames
+   if (input->full_search == 2)
+   {
+     for (list=0;list<2;list++)
+       for (i=0; i<img->max_num_references; i++)  
+         max_search_range[list][i] = search_range;
+   }
+   else
+   {
+     for (list=0;list<2;list++)
+     {
+       max_search_range[list][0] = search_range;
+       for (i=1; i< img->max_num_references; i++)  max_search_range[list][i] = search_range / 2;
+     }
+   }
+   
+ }
+ 
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    function for deleting the arrays for fast integer motion estimation
+  ***********************************************************************
+  */
+ void
+ ClearFastFullIntegerSearch ()
+ {
+   int  i, j, k, list;
+   
+   for (list=0; list<2; list++)
+   {
+     for (i = 0; i < img->max_num_references; i++)
+     {
+       for (j = 1; j < 8; j++)
+       {
+         for (k = 0; k < 16; k++)
+         {
+           free (BlockSAD[list][i][j][k]);
+         }
+         free (BlockSAD[list][i][j]);
+       }
+       free (BlockSAD[list][i]);
+     }
+     free (BlockSAD[list]);
+   }
+   free (BlockSAD);
+   
+   for (list=0; list<2; list++)
+   {
+     free (search_setup_done[list]);
+     free (search_center_x[list]);
+     free (search_center_y[list]);
+     free (pos_00[list]);
+     free (max_search_range[list]);
+   }
+   free (search_setup_done);
+   free (search_center_x);
+   free (search_center_y);
+   free (pos_00);
+   free (max_search_range);
+   
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    function resetting flags for fast integer motion estimation
+  *    (have to be called in start_macroblock())
+  ***********************************************************************
+  */
+ void
+ ResetFastFullIntegerSearch ()
+ {
+   int i,list;
+   
+   for (list=0; list<2; list++)
+     for (i = 0; i < img->max_num_references; i++)
+       search_setup_done [list][i] = 0;
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    calculation of SAD for larger blocks on the basis of 4x4 blocks
+  ***********************************************************************
+  */
+ void
+ SetupLargerBlocks (int list, int refindex, int max_pos)
+ {
+ #define ADD_UP_BLOCKS()   _o=*_bo; _i=*_bi; _j=*_bj; for(pos=0;pos<max_pos;pos++) _o[pos] = _i[pos] + _j[pos];
+ #define INCREMENT(inc)    _bo+=inc; _bi+=inc; _bj+=inc;
+   
+   distpel   pos, **_bo, **_bi, **_bj;
+   register distpel *_o,   *_i,   *_j;
+   
+   //--- blocktype 6 ---
+   _bo = BlockSAD[list][refindex][6];
+   _bi = BlockSAD[list][refindex][7];
+   _bj = _bi + 4;
+   ADD_UP_BLOCKS(); INCREMENT(1);
+   ADD_UP_BLOCKS(); INCREMENT(1);
+   ADD_UP_BLOCKS(); INCREMENT(1);
+   ADD_UP_BLOCKS(); INCREMENT(5);
+   ADD_UP_BLOCKS(); INCREMENT(1);
+   ADD_UP_BLOCKS(); INCREMENT(1);
+   ADD_UP_BLOCKS(); INCREMENT(1);
+   ADD_UP_BLOCKS();
+   
+   //--- blocktype 5 ---
+   _bo = BlockSAD[list][refindex][5];
+   _bi = BlockSAD[list][refindex][7];
+   _bj = _bi + 1;
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS();
+   
+   //--- blocktype 4 ---
+   _bo = BlockSAD[list][refindex][4];
+   _bi = BlockSAD[list][refindex][6];
+   _bj = _bi + 1;
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS(); INCREMENT(6);
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS();
+   
+   //--- blocktype 3 ---
+   _bo = BlockSAD[list][refindex][3];
+   _bi = BlockSAD[list][refindex][4];
+   _bj = _bi + 8;
+   ADD_UP_BLOCKS(); INCREMENT(2);
+   ADD_UP_BLOCKS();
+   
+   //--- blocktype 2 ---
+   _bo = BlockSAD[list][refindex][2];
+   _bi = BlockSAD[list][refindex][4];
+   _bj = _bi + 2;
+   ADD_UP_BLOCKS(); INCREMENT(8);
+   ADD_UP_BLOCKS();
+   
+   //--- blocktype 1 ---
+   _bo = BlockSAD[list][refindex][1];
+   _bi = BlockSAD[list][refindex][3];
+   _bj = _bi + 2;
+   ADD_UP_BLOCKS();
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Setup the fast search for an macroblock
+  ***********************************************************************
+  */
+ void SetupFastFullPelSearch (short ref, int list)  // <--  reference frame parameter, list0 or 1
+ {
+   short   pmv[2];
+   pel_t   orig_blocks[256], *orgptr=orig_blocks, *refptr;
+   int     offset_x, offset_y, x, y, range_partly_outside, ref_x, ref_y, pos, abs_x, abs_y, bindex, blky;
+   int     LineSadBlk0, LineSadBlk1, LineSadBlk2, LineSadBlk3;
+   int     max_width, max_height;
+   int     img_width, img_height;
+   
+   StorablePicture *ref_picture;
+   pel_t   *ref_pic;
+   
+   distpel**   block_sad = BlockSAD[list][ref][7];
+   int     search_range  = max_search_range[list][ref];
+   int     max_pos       = (2*search_range+1) * (2*search_range+1);
+   
+   int     list_offset   = img->mb_data[img->current_mb_nr].list_offset; 
+   
+   int     apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+     (active_pps->weighted_bipred_idc && (img->type == B_SLICE)));
+   
+   
+   ref_picture     = listX[list+list_offset][ref];
+   
+   //===== Use weighted Reference for ME ====
+   if (apply_weights && input->UseWeightedReferenceME)
+     ref_pic       = ref_picture->imgY_11_w;
+   else
+     ref_pic       = ref_picture->imgY_11;
+   
+   max_width     = ref_picture->size_x - 17;
+   max_height    = ref_picture->size_y - 17;
+   
+   img_width     = ref_picture->size_x;
+   img_height    = ref_picture->size_y;
+   
+   //===== get search center: predictor of 16x16 block =====
+   SetMotionVectorPredictor (pmv, enc_picture->ref_idx[list], enc_picture->mv[list], ref, list, 0, 0, 16, 16);
+   
+   search_center_x[list][ref] = pmv[0] / 4;
+   search_center_y[list][ref] = pmv[1] / 4;
+   
+   if (!input->rdopt)
+   {
+     //--- correct center so that (0,0) vector is inside ---
+     search_center_x[list][ref] = max(-search_range, min(search_range, search_center_x[list][ref]));
+     search_center_y[list][ref] = max(-search_range, min(search_range, search_center_y[list][ref]));
+   }
+   search_center_x[list][ref] = Clip3(-2047 + search_range, 2047 - search_range, search_center_x[list][ref]);
+   search_center_y[list][ref] = Clip3(LEVELMVLIMIT[img->LevelIndex][0] + search_range, LEVELMVLIMIT[img->LevelIndex][1]  - search_range, search_center_y[list][ref]);
+   
+   search_center_x[list][ref] += img->opix_x;
+   search_center_y[list][ref] += img->opix_y;
+   
+   offset_x = search_center_x[list][ref];
+   offset_y = search_center_y[list][ref];
+   
+   //===== copy original block for fast access =====
+   for   (y = img->opix_y; y < img->opix_y+16; y++)
+     for (x = img->opix_x; x < img->opix_x+16; x++)
+       *orgptr++ = imgY_org [y][x];
+ 
+ 
+   //===== check if whole search range is inside image =====
+   if (offset_x >= search_range && offset_x <= max_width  - search_range &&
+       offset_y >= search_range && offset_y <= max_height - search_range   )
+   {
+     range_partly_outside = 0; PelYline_11 = FastLine16Y_11;
+   }
+   else
+   {
+     range_partly_outside = 1;
+   }
+ 
+   //===== determine position of (0,0)-vector =====
+   if (!input->rdopt)
+   {
+     ref_x = img->opix_x - offset_x;
+     ref_y = img->opix_y - offset_y;
+ 
+     for (pos = 0; pos < max_pos; pos++)
+     {
+       if (ref_x == spiral_search_x[pos] &&
+           ref_y == spiral_search_y[pos])
+       {
+         pos_00[list][ref] = pos;
+         break;
+       }
+     }
+   }
+ 
+   //===== loop over search range (spiral search): get blockwise SAD =====
+   for (pos = 0; pos < max_pos; pos++)
+   {
+     abs_y = offset_y + spiral_search_y[pos];
+     abs_x = offset_x + spiral_search_x[pos];
+ 
+     if (range_partly_outside)
+     {
+       if (abs_y >= 0 && abs_y <= max_height &&
+           abs_x >= 0 && abs_x <= max_width    )
+       {
+         PelYline_11 = FastLine16Y_11;
+       }
+       else
+       {
+         PelYline_11 = UMVLine16Y_11;
+       }
+     }
+ 
+     orgptr = orig_blocks;
+     bindex = 0;
+     for (blky = 0; blky < 4; blky++)
+     {
+       LineSadBlk0 = LineSadBlk1 = LineSadBlk2 = LineSadBlk3 = 0;
+       for (y = 0; y < 4; y++)
+       {
+         refptr = PelYline_11 (ref_pic, abs_y++, abs_x, img_height, img_width);
+ 
+         LineSadBlk0 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk0 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk0 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk0 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk1 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk1 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk1 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk1 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk2 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk2 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk2 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk2 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk3 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk3 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk3 += byte_abs [*refptr++ - *orgptr++];
+         LineSadBlk3 += byte_abs [*refptr++ - *orgptr++];
+       }
+       block_sad[bindex++][pos] = LineSadBlk0;
+       block_sad[bindex++][pos] = LineSadBlk1;
+       block_sad[bindex++][pos] = LineSadBlk2;
+       block_sad[bindex++][pos] = LineSadBlk3;
+     }
+   }
+ 
+ 
+   //===== combine SAD's for larger block types =====
+   SetupLargerBlocks (list, ref, max_pos);
+ 
+   //===== set flag marking that search setup have been done =====
+   search_setup_done[list][ref] = 1;
+ }
+ #endif // _FAST_FULL_ME_
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set motion vector predictor
+  ************************************************************************
+  */
+ void SetMotionVectorPredictor (short  pmv[2],
+                                char   **refPic,
+                                short  ***tmp_mv,
+                                short  ref_frame,
+                                int    list,
+                                int    block_x,
+                                int    block_y,
+                                int    blockshape_x,
+                                int    blockshape_y)
+ {
+   int mb_x                 = 4*block_x;
+   int mb_y                 = 4*block_y;
+   int mb_nr                = img->current_mb_nr;
+   
+   int mv_a, mv_b, mv_c, pred_vec=0;
+   int mvPredType, rFrameL, rFrameU, rFrameUR;
+   int hv;
+   
+   PixelPos block_a, block_b, block_c, block_d;
+   
+   int SAD_a=0, SAD_b=0, SAD_c=0, SAD_d=0;
+   int temp_pred_SAD[2];
+ 
+   int fastme_sp_enable=(input->FMEnable==1 && (ref_frame<=0) && (FME_blocktype==1));
+   if (fastme_sp_enable) 
+     pred_SAD_space=0;
+  
+   getLuma4x4Neighbour(mb_nr, block_x, block_y,           -1,  0, &block_a);
+   getLuma4x4Neighbour(mb_nr, block_x, block_y,            0, -1, &block_b);
+   getLuma4x4Neighbour(mb_nr, block_x, block_y, blockshape_x, -1, &block_c);
+   getLuma4x4Neighbour(mb_nr, block_x, block_y,           -1, -1, &block_d);
+   
+   if (mb_y > 0)
+   {
+     if (mb_x < 8)  // first column of 8x8 blocks
+     {
+       if (mb_y==8)
+       {
+         if (blockshape_x == 16)      block_c.available  = 0;
+       }
+       else
+       {
+         if (mb_x+blockshape_x == 8)  block_c.available = 0;
+       }
+     }
+     else
+     {
+       if (mb_x+blockshape_x == 16)   block_c.available = 0;
+     }
+   }
+   
+   if (!block_c.available)
+   {
+     block_c=block_d;
+   }
+   
+   mvPredType = MVPRED_MEDIAN;
+   
+   if (!img->MbaffFrameFlag)
+   {
+     rFrameL    = block_a.available    ? refPic[block_a.pos_y][block_a.pos_x] : -1;
+     rFrameU    = block_b.available    ? refPic[block_b.pos_y][block_b.pos_x] : -1;
+     rFrameUR   = block_c.available    ? refPic[block_c.pos_y][block_c.pos_x] : -1;
+   }
+   else
+   {
+     if (img->mb_data[img->current_mb_nr].mb_field)
+     {
+       rFrameL    = block_a.available    ? 
+         img->mb_data[block_a.mb_addr].mb_field ? 
+         refPic[block_a.pos_y][block_a.pos_x]:
+         refPic[block_a.pos_y][block_a.pos_x] * 2: 
+         -1;
+       rFrameU    = block_b.available    ? 
+         img->mb_data[block_b.mb_addr].mb_field ? 
+         refPic[block_b.pos_y][block_b.pos_x]:
+         refPic[block_b.pos_y][block_b.pos_x] * 2: 
+         -1;
+       rFrameUR    = block_c.available    ? 
+         img->mb_data[block_c.mb_addr].mb_field ? 
+         refPic[block_c.pos_y][block_c.pos_x]:
+         refPic[block_c.pos_y][block_c.pos_x] * 2: 
+         -1;
+     }
+     else
+     {
+       rFrameL    = block_a.available    ? 
+         img->mb_data[block_a.mb_addr].mb_field ? 
+         refPic[block_a.pos_y][block_a.pos_x] >>1:
+         refPic[block_a.pos_y][block_a.pos_x] : 
+         -1;
+       rFrameU    = block_b.available    ? 
+         img->mb_data[block_b.mb_addr].mb_field ? 
+         refPic[block_b.pos_y][block_b.pos_x] >>1:
+         refPic[block_b.pos_y][block_b.pos_x] : 
+         -1;
+       rFrameUR    = block_c.available    ? 
+         img->mb_data[block_c.mb_addr].mb_field ? 
+         refPic[block_c.pos_y][block_c.pos_x] >>1:
+         refPic[block_c.pos_y][block_c.pos_x] : 
+         -1;
+     }
+   }
+   
+   /* Prediction if only one of the neighbors uses the reference frame
+    * we are checking
+    */
+   if(rFrameL == ref_frame && rFrameU != ref_frame && rFrameUR != ref_frame)       mvPredType = MVPRED_L;
+   else if(rFrameL != ref_frame && rFrameU == ref_frame && rFrameUR != ref_frame)  mvPredType = MVPRED_U;
+   else if(rFrameL != ref_frame && rFrameU != ref_frame && rFrameUR == ref_frame)  mvPredType = MVPRED_UR;
+   // Directional predictions 
+   if(blockshape_x == 8 && blockshape_y == 16)
+   {
+     if(mb_x == 0)
+     {
+       if(rFrameL == ref_frame)
+         mvPredType = MVPRED_L;
+     }
+     else
+     {
+       if( rFrameUR == ref_frame)
+         mvPredType = MVPRED_UR;
+     }
+   }
+   else if(blockshape_x == 16 && blockshape_y == 8)
+   {
+     if(mb_y == 0)
+     {
+       if(rFrameU == ref_frame)
+         mvPredType = MVPRED_U;
+     }
+     else
+     {
+       if(rFrameL == ref_frame)
+         mvPredType = MVPRED_L;
+     }
+   }
+   
+   for (hv=0; hv < 2; hv++)
+   {
+     if (!img->MbaffFrameFlag || hv==0)
+     {
+       mv_a = block_a.available  ? tmp_mv[block_a.pos_y][block_a.pos_x][hv] : 0;
+       mv_b = block_b.available  ? tmp_mv[block_b.pos_y][block_b.pos_x][hv] : 0;
+       mv_c = block_c.available  ? tmp_mv[block_c.pos_y][block_c.pos_x][hv] : 0;
+     }
+     else
+     {
+       if (img->mb_data[img->current_mb_nr].mb_field)
+       {
+         mv_a = block_a.available  ? img->mb_data[block_a.mb_addr].mb_field
+           ? tmp_mv[block_a.pos_y][block_a.pos_x][hv]
+           : tmp_mv[block_a.pos_y][block_a.pos_x][hv] / 2
+           : 0;
+         mv_b = block_b.available  ? img->mb_data[block_b.mb_addr].mb_field
+           ? tmp_mv[block_b.pos_y][block_b.pos_x][hv]
+           : tmp_mv[block_b.pos_y][block_b.pos_x][hv] / 2
+           : 0;
+         mv_c = block_c.available  ? img->mb_data[block_c.mb_addr].mb_field
+           ? tmp_mv[block_c.pos_y][block_c.pos_x][hv]
+           : tmp_mv[block_c.pos_y][block_c.pos_x][hv] / 2
+           : 0;
+       }
+       else
+       {
+         mv_a = block_a.available  ? img->mb_data[block_a.mb_addr].mb_field
+           ? tmp_mv[block_a.pos_y][block_a.pos_x][hv] * 2
+           : tmp_mv[block_a.pos_y][block_a.pos_x][hv]
+           : 0;
+         mv_b = block_b.available  ? img->mb_data[block_b.mb_addr].mb_field
+           ? tmp_mv[block_b.pos_y][block_b.pos_x][hv] * 2
+           : tmp_mv[block_b.pos_y][block_b.pos_x][hv]
+           : 0;
+         mv_c = block_c.available  ? img->mb_data[block_c.mb_addr].mb_field
+           ? tmp_mv[block_c.pos_y][block_c.pos_x][hv] * 2
+           : tmp_mv[block_c.pos_y][block_c.pos_x][hv]
+           : 0;
+       }
+     }
+     
+     if(fastme_sp_enable)
+     {
+       SAD_a = block_a.available ? ((list==1) ? (fastme_l1_cost[FME_blocktype][block_a.pos_y][block_a.pos_x]) : (fastme_l0_cost[FME_blocktype][block_a.pos_y][block_a.pos_x])) : 0;
+       SAD_b = block_b.available ? ((list==1) ? (fastme_l1_cost[FME_blocktype][block_b.pos_y][block_b.pos_x]) : (fastme_l0_cost[FME_blocktype][block_b.pos_y][block_b.pos_x])) : 0;
+       SAD_d = block_d.available ? ((list==1) ? (fastme_l1_cost[FME_blocktype][block_d.pos_y][block_d.pos_x]) : (fastme_l0_cost[FME_blocktype][block_d.pos_y][block_d.pos_x])) : 0;
+       SAD_c = block_c.available ? ((list==1) ? (fastme_l1_cost[FME_blocktype][block_c.pos_y][block_c.pos_x]) : (fastme_l0_cost[FME_blocktype][block_c.pos_y][block_c.pos_x])) : SAD_d;
+     }
+     
+     switch (mvPredType)
+     {
+     case MVPRED_MEDIAN:
+       if(!(block_b.available || block_c.available))
+       {
+         pred_vec = mv_a;
+         if(fastme_sp_enable) temp_pred_SAD[hv] = SAD_a;
+       }
+       else
+       {
+         pred_vec = mv_a+mv_b+mv_c-min(mv_a,min(mv_b,mv_c))-max(mv_a,max(mv_b,mv_c));
+       }
+       if(fastme_sp_enable)
+       {
+         if (pred_vec == mv_a && SAD_a != 0) temp_pred_SAD[hv] = SAD_a;
+         else if (pred_vec == mv_b && SAD_b!=0) temp_pred_SAD[hv] = SAD_b;
+         else temp_pred_SAD[hv] = SAD_c;
+       }
+       break;
+     case MVPRED_L:
+       pred_vec = mv_a;
+       if(fastme_sp_enable) temp_pred_SAD[hv] = SAD_a;
+       break;
+     case MVPRED_U:
+       pred_vec = mv_b;
+       if(fastme_sp_enable) temp_pred_SAD[hv] = SAD_b;
+       break;
+     case MVPRED_UR:
+       pred_vec = mv_c;
+       if(fastme_sp_enable) temp_pred_SAD[hv] = SAD_c;
+       break;
+     default:
+       break;
+     }
+     
+     pmv[hv] = pred_vec;
+     
+   }
+   
+   if(fastme_sp_enable) pred_SAD_space = temp_pred_SAD[0]>temp_pred_SAD[1]?temp_pred_SAD[1]:temp_pred_SAD[0];
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Initialize the motion search
+ ************************************************************************
+ */
+ void
+ Init_Motion_Search_Module ()
+ {
+   int bits, i, imin, imax, k, l;
+   
+   int search_range               = input->search_range;
+   int max_search_points          = max(9, (2*search_range+1)*(2*search_range+1));
+   int max_ref_bits               = 1 + 2 * (int)floor(log(max(16,img->max_num_references+1)) / log(2) + 1e-10);
+   int max_ref                    = (1<<((max_ref_bits>>1)+1))-1;
+   int number_of_subpel_positions = 4 * (2*search_range+3);
+   int max_mv_bits                = 3 + 2 * (int)ceil (log(number_of_subpel_positions+1) / log(2) + 1e-10);
+   max_mvd                        = (1<<( max_mv_bits >>1)   )-1;
+   byte_abs_range                 = (img->max_imgpel_value > img->max_imgpel_value_uv) ? (img->max_imgpel_value + 1) * 64 : (img->max_imgpel_value_uv + 1) * 64;
+   
+   
+   //=====   CREATE ARRAYS   =====
+   //-----------------------------
+   if ((spiral_search_x = (short*)calloc(max_search_points, sizeof(short))) == NULL)
+     no_mem_exit("Init_Motion_Search_Module: spiral_search_x");
+   if ((spiral_search_y = (short*)calloc(max_search_points, sizeof(short))) == NULL)
+     no_mem_exit("Init_Motion_Search_Module: spiral_search_y");
+   if ((spiral_hpel_search_x = (short*)calloc(max_search_points, sizeof(short))) == NULL)
+     no_mem_exit("Init_Motion_Search_Module: spiral_hpel_search_x");
+   if ((spiral_hpel_search_y = (short*)calloc(max_search_points, sizeof(short))) == NULL)
+     no_mem_exit("Init_Motion_Search_Module: spiral_hpel_search_y");
+   if ((mvbits = (int*)calloc(2*max_mvd+1, sizeof(int))) == NULL)
+     no_mem_exit("Init_Motion_Search_Module: mvbits");
+   if ((refbits = (int*)calloc(max_ref, sizeof(int))) == NULL)
+     no_mem_exit("Init_Motion_Search_Module: refbits");
+   if ((byte_abs = (unsigned int*)calloc(byte_abs_range, sizeof(unsigned int))) == NULL)
+     no_mem_exit("Init_Motion_Search_Module: byte_abs");
+   
+   get_mem4Dint (&motion_cost, 8, 2, img->max_num_references, 4);
+   
+   //--- set array offsets ---
+   mvbits   += max_mvd;
+   byte_abs += byte_abs_range/2;
+   
+   
+   //=====   INIT ARRAYS   =====
+   //---------------------------
+   //--- init array: motion vector bits ---
+   mvbits[0] = 1;
+   for (bits=3; bits<=max_mv_bits; bits+=2)
+   {
+     imax = 1    << (bits >> 1);
+     imin = imax >> 1;
+     
+     for (i = imin; i < imax; i++)   mvbits[-i] = mvbits[i] = bits;
+   }
+   //--- init array: reference frame bits ---
+   refbits[0] = 1;
+   for (bits=3; bits<=max_ref_bits; bits+=2)
+   {
+     imax = (1   << ((bits >> 1) + 1)) - 1;
+     imin = imax >> 1;
+     
+     for (i = imin; i < imax; i++)   refbits[i] = bits;
+   }
+   //--- init array: absolute value ---
+   byte_abs[0] = 0;
+   for (i=1; i<byte_abs_range/2; i++)   byte_abs[i] = byte_abs[-i] = i;
+   //--- init array: search pattern ---
+   spiral_search_x[0] = spiral_search_y[0] = 0;
+   spiral_hpel_search_x[0] = spiral_hpel_search_y[0] = 0;
+   for (k=1, l=1; l<=max(1,search_range); l++)
+   {
+     for (i=-l+1; i< l; i++)
+     {
+       spiral_search_x[k] =  i;  spiral_search_y[k] = -l;
+       spiral_hpel_search_x[k] =  i<<1;  spiral_hpel_search_y[k++] = -l<<1;
+       spiral_search_x[k] =  i;  spiral_search_y[k] =  l;      
+       spiral_hpel_search_x[k] =  i<<1;  spiral_hpel_search_y[k++] =  l<<1;
+     }
+     for (i=-l;   i<=l; i++)
+     {
+       spiral_search_x[k] = -l;  spiral_search_y[k] =  i;
+       spiral_hpel_search_x[k] = -l<<1;  spiral_hpel_search_y[k++] = i<<1;
+       spiral_search_x[k] =  l;  spiral_search_y[k] =  i;      
+       spiral_hpel_search_x[k] =  l<<1;  spiral_hpel_search_y[k++] = i<<1;
+     }
+   }
+   
+ #ifdef _FAST_FULL_ME_
+ //  if(input->FMEnable != 0 && input->FMEnable != 3)
+   if(!input->FMEnable)
+     InitializeFastFullIntegerSearch ();
+ #endif
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free memory used by motion search
+  ************************************************************************
+  */
+ void
+ Clear_Motion_Search_Module ()
+ {
+   //--- correct array offset ---
+   mvbits   -= max_mvd;
+   byte_abs -= byte_abs_range/2;
+   
+   //--- delete arrays ---
+   free (spiral_search_x);
+   free (spiral_search_y);
+   free (spiral_hpel_search_x);
+   free (spiral_hpel_search_y);
+   free (mvbits);
+   free (refbits);
+   free (byte_abs);
+   free_mem4Dint (motion_cost, 8, 2);
+   
+ #ifdef _FAST_FULL_ME_
+ //  if(input->FMEnable != 0 && input->FMEnable != 3)
+   if(!input->FMEnable)
+     ClearFastFullIntegerSearch ();
+ #endif
+ }
+ 
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Full pixel block motion search
+  ***********************************************************************
+  */
+ int                                               //  ==> minimum motion cost after search
+ FullPelBlockMotionSearch (pel_t**   orig_pic,     // <--  original pixel values for the AxB block
+                           short     ref,          // <--  reference frame (0... or -1 (backward))
+                           int       list,         // <--  current list
+                           int       pic_pix_x,    // <--  absolute x-coordinate of regarded AxB block
+                           int       pic_pix_y,    // <--  absolute y-coordinate of regarded AxB block
+                           int       blocktype,    // <--  block type (1-16x16 ... 7-4x4)
+                           short     pred_mv_x,    // <--  motion vector predictor (x) in sub-pel units
+                           short     pred_mv_y,    // <--  motion vector predictor (y) in sub-pel units
+                           short*    mv_x,         // <--> in: search center (x) / out: motion vector (x) - in pel units
+                           short*    mv_y,         // <--> in: search center (y) / out: motion vector (y) - in pel units
+                           int       search_range, // <--  1-d search range in pel units
+                           int       min_mcost,    // <--  minimum motion cost (cost for center or huge value)
+                           int       lambda_factor)       // <--  lagrangian parameter for determining motion cost
+ {
+   int   pos, cand_x, cand_y, y, x4, mcost;
+   
+   pel_t *orig_line, *ref_line;
+   pel_t *(*get_ref_line)(int, pel_t*, int, int, int, int);
+   
+   int   list_offset   = img->mb_data[img->current_mb_nr].list_offset; 
+   pel_t *ref_pic      = listX[list+list_offset][ref]->imgY_11;
+   int   img_width     = listX[list+list_offset][ref]->size_x;
+   int   img_height    = listX[list+list_offset][ref]->size_y;
+   
+   int   best_pos      = 0;                                        // position with minimum motion cost
+   int   max_pos       = (2*search_range+1)*(2*search_range+1);    // number of search positions
+   int   blocksize_y   = input->blc_size[blocktype][1];            // vertical block size
+   int   blocksize_x   = input->blc_size[blocktype][0];            // horizontal block size
+   int   blocksize_x4  = blocksize_x >> 2;                         // horizontal block size in 4-pel units
+   int   pred_x        = (pic_pix_x << 2) + pred_mv_x;       // predicted position x (in sub-pel units)
+   int   pred_y        = (pic_pix_y << 2) + pred_mv_y;       // predicted position y (in sub-pel units)
+   int   center_x      = pic_pix_x + *mv_x;                        // center position x (in pel units)
+   int   center_y      = pic_pix_y + *mv_y;                        // center position y (in pel units)
+   int   check_for_00  = (blocktype==1 && !input->rdopt && img->type!=B_SLICE && ref==0);
+   
+   int  apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+     (active_pps->weighted_bipred_idc && (img->type == B_SLICE)));  
+   
+   //===== Use weighted Reference for ME ====
+   if (apply_weights && input->UseWeightedReferenceME)
+     ref_pic       = listX[list+list_offset][ref]->imgY_11_w;
+   else
+     ref_pic       = listX[list+list_offset][ref]->imgY_11;
+   
+   //===== set function for getting reference picture lines =====
+   if ((center_x > search_range) && (center_x < img_width -1-search_range-blocksize_x) &&
+     (center_y > search_range) && (center_y < img_height-1-search_range-blocksize_y)   )
+   {
+     get_ref_line = FastLineX;
+   }
+   else
+   {
+     get_ref_line = UMVLineX;
+   }
+   
+   
+   //===== loop over all search positions =====
+   for (pos=0; pos<max_pos; pos++)
+   {
+     //--- set candidate position (absolute position in pel units) ---
+     cand_x = center_x + spiral_search_x[pos];
+     cand_y = center_y + spiral_search_y[pos];
+     
+     //--- initialize motion cost (cost for motion vector) and check ---
+     mcost = MV_COST (lambda_factor, 2, cand_x, cand_y, pred_x, pred_y);
+     if (check_for_00 && cand_x==pic_pix_x && cand_y==pic_pix_y)
+     {
+       mcost -= WEIGHTED_COST (lambda_factor, 16);
+     }
+     if (mcost >= min_mcost)   continue;
+     
+     //--- add residual cost to motion cost ---
+     for (y=0; y<blocksize_y; y++)
+     {
+       ref_line  = get_ref_line (blocksize_x, ref_pic, cand_y+y, cand_x, img_height, img_width);
+       orig_line = orig_pic [y];
+       
+       for (x4=0; x4<blocksize_x4; x4++)
+       {
+         mcost += byte_abs[ *orig_line++ - *ref_line++ ];
+         mcost += byte_abs[ *orig_line++ - *ref_line++ ];
+         mcost += byte_abs[ *orig_line++ - *ref_line++ ];
+         mcost += byte_abs[ *orig_line++ - *ref_line++ ];
+       }
+       
+       if (mcost >= min_mcost)
+       {
+         break;
+       }
+     }
+     
+     //--- check if motion cost is less than minimum cost ---
+     if (mcost < min_mcost)
+     {
+       best_pos  = pos;
+       min_mcost = mcost;
+     }
+   }
+   
+   
+   //===== set best motion vector and return minimum motion cost =====
+   if (best_pos)
+   {
+     *mv_x += spiral_search_x[best_pos];
+     *mv_y += spiral_search_y[best_pos];
+   }
+   return min_mcost;
+ }
+                           
+                           
+ #ifdef _FAST_FULL_ME_
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Fast Full pixel block motion search
+  ***********************************************************************
+  */
+ int                                                   //  ==> minimum motion cost after search
+ FastFullPelBlockMotionSearch (pel_t**   orig_pic,     // <--  not used
+                               short     ref,          // <--  reference frame (0... or -1 (backward))
+                               int       list,
+                               int       pic_pix_x,    // <--  absolute x-coordinate of regarded AxB block
+                               int       pic_pix_y,    // <--  absolute y-coordinate of regarded AxB block
+                               int       blocktype,    // <--  block type (1-16x16 ... 7-4x4)
+                               short     pred_mv_x,    // <--  motion vector predictor (x) in sub-pel units
+                               short     pred_mv_y,    // <--  motion vector predictor (y) in sub-pel units
+                               short*    mv_x,         //  --> motion vector (x) - in pel units
+                               short*    mv_y,         //  --> motion vector (y) - in pel units
+                               int       search_range, // <--  1-d search range in pel units
+                               int       min_mcost,    // <--  minimum motion cost (cost for center or huge value)
+                               int       lambda_factor)       // <--  lagrangian parameter for determining motion cost
+ {
+   int   pos, offset_x, offset_y, cand_x, cand_y, mcost;
+   
+   int   max_pos       = (2*search_range+1)*(2*search_range+1);              // number of search positions
+   int   best_pos      = 0;                                                  // position with minimum motion cost
+   int   block_index;                                                        // block index for indexing SAD array
+   distpel*  block_sad;                                                          // pointer to SAD array
+   
+   block_index   = (pic_pix_y-img->opix_y)+((pic_pix_x-img->opix_x)>>2); // block index for indexing SAD array
+   block_sad     = BlockSAD[list][ref][blocktype][block_index];         // pointer to SAD array
+   
+   //===== set up fast full integer search if needed / set search center =====
+   if (!search_setup_done[list][ref])
+   {
+     SetupFastFullPelSearch (ref, list);
+   }
+   
+   offset_x = search_center_x[list][ref] - img->opix_x;
+   offset_y = search_center_y[list][ref] - img->opix_y;
+   
+   //===== cost for (0,0)-vector: it is done before, because MVCost can be negative =====
+   if (!input->rdopt)
+   {
+     mcost = block_sad[pos_00[list][ref]] + MV_COST (lambda_factor, 2, 0, 0, pred_mv_x, pred_mv_y);
+     
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos_00[list][ref];
+     }
+   }
+   
+   //===== loop over all search positions =====
+   for (pos=0; pos<max_pos; pos++, block_sad++)
+   {
+     //--- check residual cost ---
+     if (*block_sad < min_mcost)
+     {
+       //--- get motion vector cost ---
+       cand_x = offset_x + spiral_search_x[pos];
+       cand_y = offset_y + spiral_search_y[pos];
+       mcost  = *block_sad;
+       mcost += MV_COST (lambda_factor, 2, cand_x, cand_y, pred_mv_x, pred_mv_y);
+       
+       //--- check motion cost ---
+       if (mcost < min_mcost)
+       {
+         min_mcost = mcost;
+         best_pos  = pos;
+       }
+     }
+   }
+   
+   //===== set best motion vector and return minimum motion cost =====
+   *mv_x = offset_x + spiral_search_x[best_pos];
+   *mv_y = offset_y + spiral_search_y[best_pos];
+   return min_mcost;
+ }
+ #endif
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Calculate SA(T)D
+  ***********************************************************************
+  */
+ int
+ SATD (int* diff, int use_hadamard)
+ {
+   int k, satd = 0, m[16], d[16];
+   
+   if (use_hadamard)
+   {
+     /*===== hadamard transform =====*/
+     m[ 0] = diff[ 0] + diff[12];
+     m[ 1] = diff[ 1] + diff[13];
+     m[ 2] = diff[ 2] + diff[14];
+     m[ 3] = diff[ 3] + diff[15];
+     m[ 4] = diff[ 4] + diff[ 8];
+     m[ 5] = diff[ 5] + diff[ 9];
+     m[ 6] = diff[ 6] + diff[10];
+     m[ 7] = diff[ 7] + diff[11];
+     m[ 8] = diff[ 4] - diff[ 8];
+     m[ 9] = diff[ 5] - diff[ 9];
+     m[10] = diff[ 6] - diff[10];
+     m[11] = diff[ 7] - diff[11];
+     m[12] = diff[ 0] - diff[12];    
+     m[13] = diff[ 1] - diff[13];        
+     m[14] = diff[ 2] - diff[14];            
+     m[15] = diff[ 3] - diff[15];
+     
+     d[ 0] = m[ 0] + m[ 4];
+     d[ 1] = m[ 1] + m[ 5];
+     d[ 2] = m[ 2] + m[ 6];
+     d[ 3] = m[ 3] + m[ 7];
+     d[ 4] = m[ 8] + m[12];
+     d[ 5] = m[ 9] + m[13];
+     d[ 6] = m[10] + m[14];
+     d[ 7] = m[11] + m[15];
+     d[ 8] = m[ 0] - m[ 4];
+     d[ 9] = m[ 1] - m[ 5];
+     d[10] = m[ 2] - m[ 6];
+     d[11] = m[ 3] - m[ 7];
+     d[12] = m[12] - m[ 8];
+     d[13] = m[13] - m[ 9];
+     d[14] = m[14] - m[10];
+     d[15] = m[15] - m[11];
+     
+     m[ 0] = d[ 0] + d[ 3];
+     m[ 1] = d[ 1] + d[ 2];
+     m[ 2] = d[ 1] - d[ 2];
+     m[ 3] = d[ 0] - d[ 3];
+     m[ 4] = d[ 4] + d[ 7];
+     m[ 5] = d[ 5] + d[ 6];
+     m[ 6] = d[ 5] - d[ 6];
+     m[ 7] = d[ 4] - d[ 7];
+     m[ 8] = d[ 8] + d[11];
+     m[ 9] = d[ 9] + d[10];
+     m[10] = d[ 9] - d[10];
+     m[11] = d[ 8] - d[11];
+     m[12] = d[12] + d[15];
+     m[13] = d[13] + d[14];
+     m[14] = d[13] - d[14];
+     m[15] = d[12] - d[15];
+     
+     d[ 0] = m[ 0] + m[ 1];
+     d[ 1] = m[ 0] - m[ 1];
+     d[ 2] = m[ 2] + m[ 3];
+     d[ 3] = m[ 3] - m[ 2];
+     d[ 4] = m[ 4] + m[ 5];
+     d[ 5] = m[ 4] - m[ 5];
+     d[ 6] = m[ 6] + m[ 7];
+     d[ 7] = m[ 7] - m[ 6];
+     d[ 8] = m[ 8] + m[ 9];
+     d[ 9] = m[ 8] - m[ 9];
+     d[10] = m[10] + m[11];
+     d[11] = m[11] - m[10];
+     d[12] = m[12] + m[13];
+     d[13] = m[12] - m[13];
+     d[14] = m[14] + m[15];
+     d[15] = m[15] - m[14];
+     
+     //===== sum up =====
+     // Table lookup is faster than abs macro
+     if (input->FMEnable == 2)
+     {
+       for (k=0; k<16; ++k)
+       {
+         satd += byte_abs [d [k]];
+       }
+     }
+     else
+     {
+       for (k=0; k<16; ++k)
+       {
+         //satd += absm(d[k]);
+         satd += byte_abs [d [k]];
+       }
+     }
+     satd = ((satd+1)>>1);
+   }
+   else
+   {
+     /*===== sum up =====*/
+     for (k = 0; k < 16; k++)
+     {
+       satd += byte_abs [diff [k]];
+     }
+   }
+   
+   return satd;
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Calculate SA(T)D for 8x8
+  ***********************************************************************
+  */
+ int
+ SATD8X8 (int* diff, int use_hadamard)
+ {
+   int i, j, jj, sad=0;
+   int m1[8][8], m2[8][8], m3[8][8];
+   
+   if(use_hadamard)
+   {
+     //horizontal
+     for (j=0; j < 8; j++)
+     {
+       jj = j << 3;
+       m2[j][0] = diff[jj  ] + diff[jj+4];
+       m2[j][1] = diff[jj+1] + diff[jj+5];
+       m2[j][2] = diff[jj+2] + diff[jj+6];
+       m2[j][3] = diff[jj+3] + diff[jj+7];
+       m2[j][4] = diff[jj  ] - diff[jj+4];
+       m2[j][5] = diff[jj+1] - diff[jj+5];
+       m2[j][6] = diff[jj+2] - diff[jj+6];
+       m2[j][7] = diff[jj+3] - diff[jj+7];
+       
+       m1[j][0] = m2[j][0] + m2[j][2];
+       m1[j][1] = m2[j][1] + m2[j][3];
+       m1[j][2] = m2[j][0] - m2[j][2];
+       m1[j][3] = m2[j][1] - m2[j][3];
+       m1[j][4] = m2[j][4] + m2[j][6];
+       m1[j][5] = m2[j][5] + m2[j][7];
+       m1[j][6] = m2[j][4] - m2[j][6];
+       m1[j][7] = m2[j][5] - m2[j][7];
+       
+       m2[j][0] = m1[j][0] + m1[j][1];
+       m2[j][1] = m1[j][0] - m1[j][1];
+       m2[j][2] = m1[j][2] + m1[j][3];
+       m2[j][3] = m1[j][2] - m1[j][3];
+       m2[j][4] = m1[j][4] + m1[j][5];
+       m2[j][5] = m1[j][4] - m1[j][5];
+       m2[j][6] = m1[j][6] + m1[j][7];
+       m2[j][7] = m1[j][6] - m1[j][7];
+     }
+     
+     //vertical
+     for (i=0; i < 8; i++)
+     {
+       m3[0][i] = m2[0][i] + m2[4][i];
+       m3[1][i] = m2[1][i] + m2[5][i];
+       m3[2][i] = m2[2][i] + m2[6][i];
+       m3[3][i] = m2[3][i] + m2[7][i];
+       m3[4][i] = m2[0][i] - m2[4][i];
+       m3[5][i] = m2[1][i] - m2[5][i];
+       m3[6][i] = m2[2][i] - m2[6][i];
+       m3[7][i] = m2[3][i] - m2[7][i];
+       
+       m1[0][i] = m3[0][i] + m3[2][i];
+       m1[1][i] = m3[1][i] + m3[3][i];
+       m1[2][i] = m3[0][i] - m3[2][i];
+       m1[3][i] = m3[1][i] - m3[3][i];
+       m1[4][i] = m3[4][i] + m3[6][i];
+       m1[5][i] = m3[5][i] + m3[7][i];
+       m1[6][i] = m3[4][i] - m3[6][i];
+       m1[7][i] = m3[5][i] - m3[7][i];
+       
+       m2[0][i] = m1[0][i] + m1[1][i];
+       m2[1][i] = m1[0][i] - m1[1][i];
+       m2[2][i] = m1[2][i] + m1[3][i];
+       m2[3][i] = m1[2][i] - m1[3][i];
+       m2[4][i] = m1[4][i] + m1[5][i];
+       m2[5][i] = m1[4][i] - m1[5][i];
+       m2[6][i] = m1[6][i] + m1[7][i];
+       m2[7][i] = m1[6][i] - m1[7][i];
+     }
+     for (j=0; j < 8; j++)
+       for (i=0; i < 8; i++)
+         sad += (absm(m2[j][i]));
+ 
+     sad=((sad+2)>>2);
+   }
+   else
+   {
+     for(i=0; i<64; i++)
+       sad += byte_abs [diff [i]];
+   }
+   
+   return sad;
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Calculate SA(T)D for 8x8
+  ***********************************************************************
+  */
+ int
+ find_SATD (int c_diff[MB_PIXELS], int blocktype)
+ {
+   int sad=0;
+   
+   switch(blocktype)
+   {
+     //16x16
+   case 1: 
+     sad  = SATD8X8 (c_diff,       input->hadamard);
+     sad += SATD8X8 (&c_diff[ 64], input->hadamard);
+     sad += SATD8X8 (&c_diff[128], input->hadamard);
+     sad += SATD8X8 (&c_diff[192], input->hadamard);
+     break;
+     //16x8 8x16
+   case 2:
+   case 3: 
+     sad  = SATD8X8 (c_diff,      input->hadamard);
+     sad += SATD8X8 (&c_diff[64], input->hadamard);
+     break;
+     //8x8
+   case 4: 
+     sad  = SATD8X8 (c_diff, input->hadamard);
+     break;
+     //8x4 4x8
+   default:
+     sad=-1;
+     break;
+   }
+   
+   return sad;
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Sub pixel block motion search
+  ***********************************************************************
+  */
+ int                                               //  ==> minimum motion cost after search
+ SubPelBlockMotionSearch (pel_t**   orig_pic,      // <--  original pixel values for the AxB block
+                          short     ref,           // <--  reference frame (0... or -1 (backward))
+                          int       list,          // <--  reference picture list 
+                          int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                          int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                          int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                          int       pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                          int       pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                          short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                          short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                          int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                          int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                          int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                          int       lambda_factor  // <--  lagrangian parameter for determining motion cost
+                          )
+ {
+   int   j, i, k;
+   int   c_diff[MB_PIXELS];
+   int   diff[16], *d;
+   int   pos, best_pos, mcost, abort_search;
+   int   y0, y1, y2, y3;
+   int   x0;
+   int   ry0, ry4, ry8, ry12, rx0;
+   
+   int   cand_mv_x, cand_mv_y;
+   int   y_offset, ypels =(128 - 64 * (blocktype == 3));
+   
+   int   check_position0 = (!input->rdopt && img->type!=B_SLICE && ref==0 && blocktype==1 && *mv_x==0 && *mv_y==0 && input->hadamard);
+   int   blocksize_x     = input->blc_size[blocktype][0];
+   int   blocksize_y     = input->blc_size[blocktype][1];
+   int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<< 2);
+   int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<< 2);
+   int   min_pos2        = (input->hadamard == 1 ? 0 : 1);
+   int   max_pos2        = (input->hadamard ? max(1,search_pos2) : search_pos2);
+   int   list_offset     = img->mb_data[img->current_mb_nr].list_offset; 
+   int   apply_weights   = ((active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+     (active_pps->weighted_bipred_idc && (img->type == B_SLICE)));  
+   int   halfpelhadamard  = input->hadamard == 2 ? 0 : input->hadamard;
+   int   qpelstart        = input->hadamard == 2 ? 0 : 1;
+   int   test8x8transform = input->Transform8x8Mode && blocktype <= 4 && halfpelhadamard;
+   int   cmv_x, cmv_y;
+   
+   StorablePicture *ref_picture = listX[list+list_offset][ref];
+   pel_t **ref_pic = (apply_weights && input->UseWeightedReferenceME)? ref_picture->imgY_ups_w : ref_picture->imgY_ups;      
+   pel_t *ref_line;
+   pel_t *orig_line;  
+   int img_width  = ((ref_picture->size_x + 2*IMG_PAD_SIZE - 1)<<2);
+   int img_height = ((ref_picture->size_y + 2*IMG_PAD_SIZE - 1)<<2);
+   int max_pos_x4 = ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
+   int max_pos_y4 = ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
+   
+   /*********************************
+    *****                       *****
+    *****  HALF-PEL REFINEMENT  *****
+    *****                       *****
+    *********************************/
+   
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 1) &&
+     (pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 1)   )
+   {
+     get_line = FastLine4X;
+   }
+   else
+   {
+     get_line = UMVLine4X;    
+   }
+   
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = min_pos2; pos < max_pos2; pos++)
+   {
+     cand_mv_x = *mv_x + (spiral_hpel_search_x[pos]);    // quarter-pel units
+     cand_mv_y = *mv_y + (spiral_hpel_search_y[pos]);    // quarter-pel units
+     
+     //----- set motion vector cost -----
+     mcost = MV_COST (lambda_factor, 0, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+     
+     if (check_position0 && pos==0)
+     {
+       mcost -= WEIGHTED_COST (lambda_factor, 16);
+     }
+     
+     if (mcost >= min_mcost) continue;
+     
+     cmv_x = cand_mv_x + pic4_pix_x;
+     cmv_y = cand_mv_y + pic4_pix_y;
+     
+     //----- add up SATD -----
+     for (y0=0, abort_search=0; y0<blocksize_y && !abort_search; y0+=4)
+     {
+       y_offset = (y0>7)*ypels;
+       ry0  = (y0<<2) + cmv_y;
+       ry4  = ry0 + 4;
+       ry8  = ry4 + 4;
+       ry12 = ry8 + 4;
+       y1 = y0 + 1;
+       y2 = y1 + 1;
+       y3 = y2 + 1;
+       
+       for (x0=0; x0<blocksize_x; x0+=BLOCK_SIZE)
+       {
+         rx0 = (x0<<2) + cmv_x;
+         d   = diff;
+         
+         orig_line = &orig_pic [y0][x0];    
+         ref_line  = get_line (ref_pic, ry0, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         orig_line = &orig_pic [y1][x0];    
+         ref_line  = get_line (ref_pic, ry4, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         orig_line = &orig_pic [y2][x0];
+         ref_line  = get_line (ref_pic, ry8, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         orig_line = &orig_pic [y3][x0];    
+         ref_line  = get_line (ref_pic, ry12, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         if (!test8x8transform)
+         {
+           if ((mcost += SATD (diff, halfpelhadamard)) > min_mcost)
+           {
+             abort_search = 1;
+             break;
+           }
+         }
+         else
+         {
+           i = (x0&0x7) +  (x0>7) * 64 + y_offset;
+           for(k=0, j=y0; j<BLOCK_SIZE + y0; j++, k+=BLOCK_SIZE)
+             memcpy(&(c_diff[i + ((j&0x7)<<3)]), &diff[k], BLOCK_SIZE*sizeof(int));
+         }
+       }
+     }
+     
+     
+     if(test8x8transform)
+       mcost += find_SATD (c_diff, blocktype);
+     
+     
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+   }
+   if (best_pos)
+   {
+     *mv_x += (spiral_hpel_search_x [best_pos]);
+     *mv_y += (spiral_hpel_search_y [best_pos]);
+   }
+   
+   if (input->hadamard == 2)
+     min_mcost = INT_MAX;
+   
+   test8x8transform = input->Transform8x8Mode && blocktype <= 4 && input->hadamard;
+   
+   /************************************
+    *****                          *****
+    *****  QUARTER-PEL REFINEMENT  *****
+    *****                          *****
+    ************************************/
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + *mv_x > 0) && (pic4_pix_x + *mv_x < max_pos_x4) &&
+     (pic4_pix_y + *mv_y > 0) && (pic4_pix_y + *mv_y < max_pos_y4)   )
+   {
+     get_line = FastLine4X;
+   }
+   else
+   {
+     get_line = UMVLine4X;    
+   }
+   
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = qpelstart; pos < search_pos4; pos++)
+   {
+     cand_mv_x = *mv_x + spiral_search_x[pos];    // quarter-pel units
+     cand_mv_y = *mv_y + spiral_search_y[pos];    // quarter-pel units
+     
+     //----- set motion vector cost -----
+     mcost = MV_COST (lambda_factor, 0, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+     
+     if (mcost >= min_mcost) continue;
+     cmv_x = cand_mv_x + pic4_pix_x;
+     cmv_y = cand_mv_y + pic4_pix_y;
+     
+     //----- add up SATD -----
+     for (y0=0, abort_search=0; y0<blocksize_y && !abort_search; y0+=4)
+     {
+       y_offset = (y0>7)*ypels;
+       ry0 = (y0<<2) + cmv_y;
+       ry4  = ry0 + 4;
+       ry8  = ry4 + 4;
+       ry12 = ry8 + 4;
+       y1 = y0 + 1;
+       y2 = y1 + 1;
+       y3 = y2 + 1;
+       
+       for (x0=0; x0<blocksize_x; x0+=BLOCK_SIZE)
+       {
+         rx0  = (x0<<2) + cmv_x;
+         d    = diff;
+         
+         orig_line = &orig_pic [y0][x0];    
+         ref_line  = get_line (ref_pic, ry0, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         orig_line = &orig_pic [y1][x0];    
+         ref_line  = get_line (ref_pic, ry4, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         orig_line = &orig_pic [y2][x0];
+         ref_line  = get_line (ref_pic, ry8, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         orig_line = &orig_pic [y3][x0];    
+         ref_line  = get_line (ref_pic, ry12, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d        = *orig_line   - *(ref_line += 4);
+         
+         if (!test8x8transform)
+         {
+           if ((mcost += SATD (diff, input->hadamard)) > min_mcost)
+           {
+             abort_search = 1;
+             break;
+           }
+         }
+         else
+         {
+           i = (x0&0x7) + (x0>7) * 64 + y_offset;
+           for(k=0, j=y0; j<y0 + BLOCK_SIZE; j++, k+=BLOCK_SIZE)
+             memcpy(&(c_diff[i + ((j&0x7)<<3)]), &diff[k], BLOCK_SIZE*sizeof(int));
+         }
+       }
+     }
+     
+     if(test8x8transform)
+       mcost += find_SATD (c_diff, blocktype);
+     
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+   }
+   if (best_pos)
+   {
+     *mv_x += spiral_search_x [best_pos];
+     *mv_y += spiral_search_y [best_pos];
+   }
+   
+   //===== return minimum motion cost =====
+   return min_mcost;
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Sub pixel block motion search enhanced
+  ***********************************************************************
+  */
+ int                                               //  ==> minimum motion cost after search
+ simplified_FastFullSubPelBlockMotionSearch (pel_t**   orig_pic,      // <--  original pixel values for the AxB block
+                          short     ref,           // <--  reference frame (0... or -1 (backward))
+                          int       list,          // <--  reference picture list 
+                          int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                          int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                          int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                          int       pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                          int       pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                          short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                          short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                          int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                          int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                          int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                          int       lambda_factor  // <--  lagrangian parameter for determining motion cost
+                          )
+ {
+   int   j, i, k;
+   int   c_diff[MB_PIXELS];
+   int   diff[16], *d;
+   int   pos, best_pos, mcost, abort_search;
+   int   y0, y1, y2, y3;
+   int   x0;
+   int   ry0, ry4, ry8, ry12, rx0;
+   
+   int   cand_mv_x, cand_mv_y;
+   int   y_offset, ypels =(128 - 64 * (blocktype == 3));
+   
+   int   check_position0 = (!input->rdopt && img->type!=B_SLICE && ref==0 && blocktype==1 && *mv_x==0 && *mv_y==0 && input->hadamard);
+   int   blocksize_x     = input->blc_size[blocktype][0];
+   int   blocksize_y     = input->blc_size[blocktype][1];
+   int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<< 2);
+   int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<< 2);
+   int   min_pos2        = (input->hadamard == 1 ? 0 : 1);
+   int   max_pos2        = (input->hadamard ? max(1,search_pos2) : search_pos2);
+   int   list_offset     = img->mb_data[img->current_mb_nr].list_offset; 
+   int   apply_weights   = ((active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
+                            (active_pps->weighted_bipred_idc && (img->type == B_SLICE)));  
+   int   halfpelhadamard  = input->hadamard == 2 ? 0 : input->hadamard;
+   int   qpelstart        = input->hadamard == 2 ? 0 : 1;
+   int   test8x8transform = input->Transform8x8Mode && blocktype <= 4 && halfpelhadamard;
+   int   cmv_x, cmv_y;
+   
+   StorablePicture *ref_picture = listX[list+list_offset][ref];
+   pel_t **ref_pic = (apply_weights && input->UseWeightedReferenceME)? ref_picture->imgY_ups_w : ref_picture->imgY_ups;      
+   pel_t *ref_line;
+   pel_t *orig_line;  
+   int img_width  = ((ref_picture->size_x + 2*IMG_PAD_SIZE - 1)<<2);
+   int img_height = ((ref_picture->size_y + 2*IMG_PAD_SIZE - 1)<<2);
+   int max_pos_x4 = ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
+   int max_pos_y4 = ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
+   
+   /*********************************
+    *****                       *****
+    *****  HALF-PEL REFINEMENT  *****
+    *****                       *****
+    *********************************/
+   
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 1) &&
+     (pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 1)   )
+   {
+     get_line = FastLine4X;
+   }
+   else
+   {
+     get_line = UMVLine4X;    
+   }
+   
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = min_pos2; pos < max_pos2; pos++)
+   {
+     cand_mv_x = *mv_x + (spiral_hpel_search_x[pos]);    // quarter-pel units
+     cand_mv_y = *mv_y + (spiral_hpel_search_y[pos]);    // quarter-pel units
+     
+     //----- set motion vector cost -----
+     mcost = MV_COST (lambda_factor, 0, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+     
+     if (check_position0 && pos==0)
+     {
+       mcost -= WEIGHTED_COST (lambda_factor, 16);
+     }
+     
+     if (mcost >= min_mcost) continue;
+     
+     cmv_x = cand_mv_x + pic4_pix_x;
+     cmv_y = cand_mv_y + pic4_pix_y;
+     
+     //----- add up SATD -----
+     for (y0=0, abort_search=0; y0<blocksize_y && !abort_search; y0+=4)
+     {
+       y_offset = (y0>7)*ypels;
+       ry0  = (y0<<2) + cmv_y;
+       ry4  = ry0 + 4;
+       ry8  = ry4 + 4;
+       ry12 = ry8 + 4;
+       y1 = y0 + 1;
+       y2 = y1 + 1;
+       y3 = y2 + 1;
+       
+       for (x0=0; x0<blocksize_x; x0+=BLOCK_SIZE)
+       {
+         rx0 = (x0<<2) + cmv_x;
+         d   = diff;
+         
+         orig_line = &orig_pic [y0][x0];    
+         ref_line  = get_line (ref_pic, ry0, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         orig_line = &orig_pic [y1][x0];    
+         ref_line  = get_line (ref_pic, ry4, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         orig_line = &orig_pic [y2][x0];
+         ref_line  = get_line (ref_pic, ry8, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         orig_line = &orig_pic [y3][x0];    
+         ref_line  = get_line (ref_pic, ry12, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         if (!test8x8transform)
+         {
+           if ((mcost += SATD (diff, halfpelhadamard)) >= min_mcost)
+           {
+             abort_search = 1;
+             break;
+           }
+         }
+         else
+         {
+           i = (x0&0x7) +  (x0>7) * 64 + y_offset;
+           for(k=0, j=y0; j<BLOCK_SIZE + y0; j++, k+=BLOCK_SIZE)
+             memcpy(&(c_diff[i + ((j&0x7)<<3)]), &diff[k], BLOCK_SIZE*sizeof(int));
+         }
+       }
+     }
+     
+     if(test8x8transform)
+       mcost += find_SATD (c_diff, blocktype);
+     
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+     if (min_mcost < (SubPelThreshold3>>block_type_shift_factor[blocktype])) 
+     {
+       break;
+     }
+   }
+ 
+   if (best_pos)
+   {
+     *mv_x += (spiral_hpel_search_x [best_pos]);
+     *mv_y += (spiral_hpel_search_y [best_pos]);
+   }
+   
+   if ((*mv_x == 0) && (*mv_y == 0) && (pred_mv_x == 0 && pred_mv_y == 0) &&
+ 	   (min_mcost < (SubPelThreshold1>>block_type_shift_factor[blocktype])) ) 
+   {
+       best_pos = 0;
+       return min_mcost;
+   }
+ 
+   if (input->hadamard == 2)
+     min_mcost = INT_MAX;
+   
+   test8x8transform = input->Transform8x8Mode && blocktype <= 4 && input->hadamard;
+   
+   /************************************
+    *****                          *****
+    *****  QUARTER-PEL REFINEMENT  *****
+    *****                          *****
+    ************************************/
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + *mv_x > 0) && (pic4_pix_x + *mv_x < max_pos_x4) &&
+     (pic4_pix_y + *mv_y > 0) && (pic4_pix_y + *mv_y < max_pos_y4)   )
+   {
+     get_line = FastLine4X;
+   }
+   else
+   {
+     get_line = UMVLine4X;    
+   }
+   
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = qpelstart; pos < search_pos4; pos++)
+   {
+     cand_mv_x = *mv_x + spiral_search_x[pos];    // quarter-pel units
+     cand_mv_y = *mv_y + spiral_search_y[pos];    // quarter-pel units
+     
+     //----- set motion vector cost -----
+     mcost = MV_COST (lambda_factor, 0, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+     
+     if (mcost >= min_mcost) continue;
+     cmv_x = cand_mv_x + pic4_pix_x;
+     cmv_y = cand_mv_y + pic4_pix_y;
+     
+     //----- add up SATD -----
+     for (y0=0, abort_search=0; y0<blocksize_y && !abort_search; y0+=4)
+     {
+       y_offset = (y0>7)*ypels;
+       ry0 = (y0<<2) + cmv_y;
+       ry4  = ry0 + 4;
+       ry8  = ry4 + 4;
+       ry12 = ry8 + 4;
+       y1 = y0 + 1;
+       y2 = y1 + 1;
+       y3 = y2 + 1;
+       
+       for (x0=0; x0<blocksize_x; x0+=BLOCK_SIZE)
+       {
+         rx0  = (x0<<2) + cmv_x;
+         d    = diff;
+         
+         orig_line = &orig_pic [y0][x0];    
+         ref_line  = get_line (ref_pic, ry0, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         orig_line = &orig_pic [y1][x0];    
+         ref_line  = get_line (ref_pic, ry4, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         orig_line = &orig_pic [y2][x0];
+         ref_line  = get_line (ref_pic, ry8, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line   - *(ref_line += 4);
+         
+         orig_line = &orig_pic [y3][x0];    
+         ref_line  = get_line (ref_pic, ry12, rx0, img_height, img_width);
+         *d++      = *orig_line++ - *(ref_line     );
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d++      = *orig_line++ - *(ref_line += 4);
+         *d        = *orig_line   - *(ref_line += 4);
+         
+         if (!test8x8transform)
+         {
+           if ((mcost += SATD (diff, input->hadamard)) >= min_mcost)
+           {
+             abort_search = 1;
+             break;
+           }
+         }
+         else
+         {
+           i = (x0&0x7) + (x0>7) * 64 + y_offset;
+           for(k=0, j=y0; j<y0 + BLOCK_SIZE; j++, k+=BLOCK_SIZE)
+             memcpy(&(c_diff[i + ((j&0x7)<<3)]), &diff[k], BLOCK_SIZE*sizeof(int));
+         }
+       }
+     }
+     
+     if(test8x8transform)
+       mcost += find_SATD (c_diff, blocktype);
+     
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+     if (min_mcost < (SubPelThreshold3>>block_type_shift_factor[blocktype])) 
+     {
+       break;
+     }
+   }
+ 
+   if (best_pos) 
+   {
+     *mv_x += spiral_search_x [best_pos];
+     *mv_y += spiral_search_y [best_pos];
+   }
+   
+   //===== return minimum motion cost =====
+   return min_mcost;
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Full pixel block motion search
+  ***********************************************************************
+  */
+ int                                                //  ==> minimum motion cost after search
+ FullPelBlockMotionBiPred (pel_t**   orig_pic,      // <--  original pixel values for the AxB block
+                           short       ref,         // <--  reference frame (0... or -1 (backward))
+                           int       list,
+                           int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                           int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                           int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                           short     pred_mv_x1,    // <--  motion vector predictor (x) in sub-pel units
+                           short     pred_mv_y1,    // <--  motion vector predictor (y) in sub-pel units
+                           short     pred_mv_x2,    // <--  motion vector predictor (x) in sub-pel units
+                           short     pred_mv_y2,    // <--  motion vector predictor (y) in sub-pel units
+                           short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                           short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                           short*    s_mv_x,        // <--> in: search center (x) / out: motion vector (x) - in pel units
+                           short*    s_mv_y,        // <--> in: search center (y) / out: motion vector (y) - in pel units
+                           int       search_range,  // <--  1-d search range in pel units
+                           int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                           int       lambda_factor) // <--  lagrangian parameter for determining motion cost
+ {
+   int   pos, cand_x, cand_y, y, x4, mcost;
+   
+   pel_t *orig_line, *ref2_line, *ref1_line;
+   pel_t *(*get_ref_line1)(int, pel_t*, int, int, int, int);
+   pel_t *(*get_ref_line2)(int, pel_t*, int, int, int, int);
+   
+   int   list_offset   = img->mb_data[img->current_mb_nr].list_offset; 
+   pel_t *ref1_pic     = listX[list + list_offset          ][ref]->imgY_11;
+   pel_t *ref2_pic     = listX[list ==0? 1 + list_offset: list_offset][ 0 ]->imgY_11;  
+   int   img_width     = listX[list+list_offset            ][ref]->size_x;
+   int   img_height    = listX[list+list_offset            ][ref]->size_y;
+   
+   int   best_pos      = 0;                                        // position with minimum motion cost
+   int   max_pos       = (2*search_range+1)*(2*search_range+1);    // number of search positions
+   int   blocksize_y   = input->blc_size[blocktype][1];            // vertical block size
+   int   blocksize_x   = input->blc_size[blocktype][0];            // horizontal block size
+   int   blocksize_x4  = blocksize_x >> 2;                         // horizontal block size in 4-pel units
+   int   pred_x1        = (pic_pix_x << 2) + pred_mv_x1;       // predicted position x (in sub-pel units)
+   int   pred_y1        = (pic_pix_y << 2) + pred_mv_y1;       // predicted position y (in sub-pel units)
+   int   pred_x2        = (pic_pix_x << 2) + pred_mv_x2;       // predicted position x (in sub-pel units)
+   int   pred_y2        = (pic_pix_y << 2) + pred_mv_y2;       // predicted position y (in sub-pel units)
+   short center_x      = pic_pix_x + *mv_x;                      // center position x (in pel units)
+   short center_y      = pic_pix_y + *mv_y;                      // center position y (in pel units)
+   short ref1_center_x = pic_pix_x + *s_mv_x;                      // mvx of second pred (in pel units)
+   short ref1_center_y = pic_pix_y + *s_mv_y;                      // mvy of second pred (in pel units)
+   
+   
+   int   bi_diff; 
+   short apply_weights   = (active_pps->weighted_bipred_idc>0);  
+   short weightSpic = (apply_weights ? (list == 0? wbp_weight[list_offset    ][ref][0  ][0]: wbp_weight[list_offset + 1][0  ][ref][0]) : 1<<luma_log_weight_denom);
+   short weightRpic = (apply_weights ? (list == 0? wbp_weight[list_offset + 1][ref][0  ][0]: wbp_weight[list_offset    ][0  ][ref][0]) : 1<<luma_log_weight_denom);
+   short offsetSpic = (apply_weights ? (list == 0?  wp_offset[list_offset    ][ref]     [0]:  wp_offset[list_offset + 1][0  ]     [0]) : 0);
+   short offsetRpic = (apply_weights ? (list == 0?  wp_offset[list_offset + 1][ref]     [0]:  wp_offset[list_offset    ][0  ]     [0]) : 0);
+   short weightedpel,pixel1,pixel2;
+   short offsetBi=(offsetRpic + offsetSpic + 1)>>1;
+   //===== set function for getting reference picture lines =====
+   if ((center_x > search_range) && (center_x < img_width -1-search_range-blocksize_x) &&
+     (center_y > search_range) && (center_y < img_height-1-search_range-blocksize_y)   )
+   {
+     get_ref_line2 = FastLineX;
+   }
+   else
+   {
+     get_ref_line2 = UMVLineX;
+   }
+   
+   //===== set function for getting reference picture lines =====
+   if ((ref1_center_x > search_range) && (ref1_center_x < img_width -1-search_range-blocksize_x) &&
+       (ref1_center_y > search_range) && (ref1_center_y < img_height-1-search_range-blocksize_y)   )
+   {
+     get_ref_line1 = FastLineX;
+   }
+   else
+   {
+     get_ref_line1 = UMVLineX;
+   }
+   
+   //===== loop over all search positions =====
+   for (pos=0; pos<max_pos; pos++)
+   {
+     //--- set candidate position (absolute position in pel units) ---
+     cand_x = center_x + spiral_search_x[pos];
+     cand_y = center_y + spiral_search_y[pos];
+     
+     //--- initialize motion cost (cost for motion vector) and check ---
+     mcost =  MV_COST (lambda_factor, 2, ref1_center_x, ref1_center_y, pred_x1, pred_y1);
+     mcost += MV_COST (lambda_factor, 2,        cand_x,        cand_y, pred_x2, pred_y2);
+     
+     if (mcost >= min_mcost)   continue;
+     
+     //--- add residual cost to motion cost ---
+     if (apply_weights)
+     {
+       for (y=0; y<blocksize_y; y++)
+       {
+         ref2_line  = get_ref_line2 (blocksize_x, ref2_pic,        cand_y+y,        cand_x, img_height, img_width);
+         ref1_line  = get_ref_line1 (blocksize_x, ref1_pic, ref1_center_y+y, ref1_center_x, img_height, img_width);
+         orig_line = orig_pic [y];
+         
+         for (x4=0; x4<blocksize_x4; x4++)
+         { 
+           pixel1=weightSpic * (*ref1_line++);
+           pixel2=weightRpic * (*ref2_line++);
+           weightedpel =  Clip3 (0, img->max_imgpel_value ,((pixel1 + pixel2 +
+                                 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           bi_diff = (*orig_line++)  - weightedpel;
+           mcost += byte_abs[bi_diff];
+           
+           pixel1=weightSpic * (*ref1_line++);
+           pixel2=weightRpic * (*ref2_line++);
+           weightedpel =  Clip3 (0, img->max_imgpel_value ,((pixel1 + pixel2 +
+                                 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           bi_diff = (*orig_line++)  - weightedpel;
+           mcost += byte_abs[bi_diff];
+           
+           pixel1=weightSpic * (*ref1_line++);
+           pixel2=weightRpic * (*ref2_line++);
+           weightedpel =  Clip3 (0, img->max_imgpel_value ,((pixel1 + pixel2 +
+                                 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           bi_diff = (*orig_line++)  - weightedpel;
+           mcost += byte_abs[bi_diff];
+           
+           pixel1=weightSpic * (*ref1_line++);
+           pixel2=weightRpic * (*ref2_line++);
+           weightedpel =  Clip3 (0, img->max_imgpel_value ,((pixel1 + pixel2 +
+                                 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           bi_diff = (*orig_line++)  - weightedpel;
+           mcost += byte_abs[bi_diff];
+         }    
+         
+         if (mcost >= min_mcost)
+         {
+           break;
+         }        
+       }
+     }
+     else
+     {
+       for (y=0; y<blocksize_y; y++)
+       {
+         ref2_line = get_ref_line2 (blocksize_x, ref2_pic,        cand_y+y,        cand_x, img_height, img_width);
+         ref1_line = get_ref_line1 (blocksize_x, ref1_pic, ref1_center_y+y, ref1_center_x, img_height, img_width);
+         orig_line = orig_pic [y];
+         
+         for (x4=0; x4<blocksize_x4; x4++)
+         {         
+           bi_diff = (*orig_line++) - (((*ref1_line++) + *ref2_line++)>>1) ;
+           mcost += byte_abs[bi_diff];
+           bi_diff = (*orig_line++) - (((*ref1_line++) + *ref2_line++)>>1) ;
+           mcost += byte_abs[bi_diff];
+           bi_diff = (*orig_line++) - (((*ref1_line++) + *ref2_line++)>>1) ;
+           mcost += byte_abs[bi_diff];
+           bi_diff = (*orig_line++) - (((*ref1_line++) + *ref2_line++)>>1) ;
+           mcost += byte_abs[bi_diff];
+         }    
+         
+         if (mcost >= min_mcost)
+         {
+           break;
+         }
+       }
+     }
+     
+     //--- check if motion cost is less than minimum cost ---
+     if (mcost < min_mcost)
+     {
+       best_pos  = pos;
+       min_mcost = mcost;
+     }
+   }
+   
+   
+   //===== set best motion vector and return minimum motion cost =====
+   if (best_pos)
+   {
+     *mv_x += spiral_search_x[best_pos];
+     *mv_y += spiral_search_y[best_pos];
+   }
+   return min_mcost;
+ }
+ 
+ /*!
+ ***********************************************************************
+ * \brief
+ *    Sub pixel block motion search
+ ***********************************************************************
+ */
+ int                                               //  ==> minimum motion cost after search
+ SubPelBlockSearchBiPred (pel_t**   orig_pic,      // <--  original pixel values for the AxB block
+                          short     ref,           // <--  reference frame (0... or -1 (backward))
+                          int       list,          // <--  reference picture list 
+                          int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                          int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                          int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                          short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                          short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                          short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                          short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                          short*    s_mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                          short*    s_mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                          int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                          int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                          int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                          int       lambda_factor   // <--  lagrangian parameter for determining motion cost
+                          )
+ {
+   int   j, i, k;
+   int   c_diff[MB_PIXELS];
+   int   diff[16], *d;  
+   int   pos, best_pos, mcost, abort_search;
+   int   y0, y1, y2, y3;
+   int   x0;
+   int   ry0, ry4, ry8, ry12, rx0;
+   int   sy0, sy4, sy8, sy12, sx0;
+   
+   int   cand_mv_x, cand_mv_y;
+   int   y_offset, ypels =(128 - 64 * (blocktype == 3));
+   
+   int   blocksize_x     = input->blc_size[blocktype][0];
+   int   blocksize_y     = input->blc_size[blocktype][1];
+   int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<< 2);
+   int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<< 2);
+   int   min_pos2        = (input->hadamard ? 0 : 1);
+   int   max_pos2        = (input->hadamard ? max(1,search_pos2) : search_pos2);
+   int   list_offset   = img->mb_data[img->current_mb_nr].list_offset; 
+   int   apply_weights =  (active_pps->weighted_bipred_idc );  
+   short weightSpic = (apply_weights ? (list == 0? wbp_weight[list_offset    ][ref][0  ][0]: wbp_weight[list_offset + 1][0  ][ref][0]) : 1);
+   short weightRpic = (apply_weights ? (list == 0? wbp_weight[list_offset + 1][ref][0  ][0]: wbp_weight[list_offset    ][0  ][ref][0]) : 1);
+   short offsetSpic = (apply_weights ? (list == 0?  wp_offset[list_offset    ][ref]     [0]:  wp_offset[list_offset + 1][0  ]     [0]) : 0);
+   short offsetRpic = (apply_weights ? (list == 0?  wp_offset[list_offset + 1][ref]     [0]:  wp_offset[list_offset    ][0  ]     [0]) : 0);
+   short offsetBi=(offsetRpic + offsetSpic + 1)>>1;
+   pel_t weightedpel;
+   int   test8x8transform = input->Transform8x8Mode && blocktype <= 4 && input->hadamard;
+   int   cmv_x, cmv_y;
+   int   smv_x = *s_mv_x + pic4_pix_x;
+   int   smv_y = *s_mv_y + pic4_pix_y;
+   
+   StorablePicture *ref_picture = listX[list+list_offset][ref];
+   pel_t **ref1_pic = ref_picture->imgY_ups;      
+   pel_t **ref2_pic = listX[list==0? 1 +list_offset: list_offset][0  ]->imgY_ups;  
+   pel_t *ref_line_p1,*ref_line_p2;
+   pel_t *orig_line;
+   int img_width  = ((ref_picture->size_x + 2*IMG_PAD_SIZE - 1)<<2);
+   int img_height = ((ref_picture->size_y + 2*IMG_PAD_SIZE - 1)<<2);
+   int max_pos_x4 = ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
+   int max_pos_y4 = ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
+   
+   /*********************************
+    *****                       *****
+    *****  HALF-PEL REFINEMENT  *****
+    *****                       *****
+    *********************************/
+   
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + *mv_x > 2) && (pic4_pix_x + *mv_x < max_pos_x4 - 1) &&
+     (pic4_pix_y + *mv_y > 2) && (pic4_pix_y + *mv_y < max_pos_y4 - 1))
+   {
+     get_line_p2 = FastLine4X;
+   } 
+   else
+   {
+     get_line_p2 = UMVLine4X;    
+   }
+   
+   if ((pic4_pix_x + *s_mv_x > 2) && (pic4_pix_x + *s_mv_x < max_pos_x4 - 1) &&
+     (pic4_pix_y + *s_mv_y > 2) && (pic4_pix_y + *s_mv_y < max_pos_y4 - 1))
+   {
+     get_line_p1 = FastLine4X;
+   }
+   else
+   {
+     get_line_p1 = UMVLine4X;    
+   }
+ 
+ 
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = min_pos2; pos < max_pos2; pos++)
+   {
+     cand_mv_x = *mv_x + (spiral_hpel_search_x[pos]);    // quarter-pel units
+     cand_mv_y = *mv_y + (spiral_hpel_search_y[pos]);    // quarter-pel units
+     
+     //----- set motion vector cost -----
+     mcost = MV_COST (lambda_factor, 0, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+     
+     if (mcost >= min_mcost) continue;
+     
+     cmv_x = cand_mv_x + pic4_pix_x;
+     cmv_y = cand_mv_y + pic4_pix_y;
+     
+     //----- add up SATD -----
+     for (y0=0, abort_search=0; y0<blocksize_y && !abort_search; y0+=BLOCK_SIZE)
+     {
+       y_offset = (y0>7)*ypels;
+       ry0 = (y0<<2) + cmv_y;
+       ry4  = ry0 + 4;
+       ry8  = ry4 + 4;
+       ry12 = ry8 + 4;      
+       sy0 = (y0<<2) + smv_y;
+       sy4  = sy0 + 4;
+       sy8  = sy4 + 4;
+       sy12 = sy8 + 4;
+       y1 = y0 + 1;
+       y2 = y1 + 1;
+       y3 = y2 + 1;
+       
+       if (apply_weights)
+       {
+         for (x0=0; x0<blocksize_x; x0+=BLOCK_SIZE)
+         {
+           rx0 = (x0<<2) + cmv_x;            
+           sx0 = (x0<<2) + smv_x;
+           d   = diff;
+           
+           orig_line = &orig_pic [y0][x0];    
+           ref_line_p1 = get_line_p1 (ref1_pic, sy0, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry0, rx0,  img_height, img_width);
+           weightedpel = clip1a (((( weightSpic * (*(ref_line_p1    )) + weightRpic * (*(ref_line_p2     ))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;          
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line - weightedpel;
+           
+           orig_line = &orig_pic [y1][x0];
+           ref_line_p1 = get_line_p1 (ref1_pic, sy4, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry4, rx0,  img_height, img_width);
+           weightedpel = clip1a (((( weightSpic * (*(ref_line_p1    )) + weightRpic * (*(ref_line_p2     ))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;          
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line - weightedpel;
+           
+           
+           orig_line = &orig_pic [y2][x0];
+           ref_line_p1 = get_line_p1 (ref1_pic, sy8, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry8, rx0,  img_height, img_width);
+           weightedpel = clip1a (((( weightSpic * (*(ref_line_p1    )) + weightRpic * (*(ref_line_p2     ))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;          
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line - weightedpel;
+           
+           orig_line = &orig_pic [y3][x0];
+           ref_line_p1 = get_line_p1 (ref1_pic, sy12, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry12, rx0,  img_height, img_width);
+           weightedpel = clip1a (((( weightSpic * (*(ref_line_p1    )) + weightRpic * (*(ref_line_p2     ))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;          
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d   = *orig_line - weightedpel;
+           
+           if (!test8x8transform)
+           {
+             if ((mcost += SATD (diff, input->hadamard)) > min_mcost)
+             {
+               abort_search = 1;
+               break;
+             }
+           }
+           else
+           {
+             i = (x0&0x7) + (x0>7) * 64 + y_offset;
+             for(k=0, j=y0; j<y0 + BLOCK_SIZE; j++, k+=BLOCK_SIZE)
+               memcpy(&(c_diff[i + ((j&0x7)<<3)]), &diff[k], BLOCK_SIZE*sizeof(int));
+           }
+         }
+       }
+       else
+       {      
+         for (x0=0; x0<blocksize_x; x0+=BLOCK_SIZE)
+         {         
+           rx0 = (x0<<2) + cmv_x;            
+           sx0 = (x0<<2) + smv_x;
+           d   = diff;
+           
+           orig_line = &orig_pic [y0][x0];    
+           ref_line_p1 = get_line_p1 (ref1_pic, sy0, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry0, rx0,  img_height, img_width);
+           *d++ = *orig_line++  -  ((*(ref_line_p1     ) + *(ref_line_p2     ) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line    -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           
+           orig_line = &orig_pic [y1][x0];    
+           ref_line_p1 = get_line_p1 (ref1_pic, sy4, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry4, rx0,  img_height, img_width);
+           *d++ = *orig_line++  -  ((*(ref_line_p1     ) + *(ref_line_p2     ) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line    -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           
+           orig_line = &orig_pic [y2][x0];    
+           ref_line_p1 = get_line_p1 (ref1_pic, sy8, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry8, rx0,  img_height, img_width);
+           *d++ = *orig_line++  -  ((*(ref_line_p1     ) + *(ref_line_p2     ) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line    -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           
+           orig_line = &orig_pic [y3][x0];    
+           ref_line_p1 = get_line_p1 (ref1_pic, sy12, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry12, rx0,  img_height, img_width);
+           *d++ = *orig_line++  -  ((*(ref_line_p1     ) + *(ref_line_p2     ) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d   = *orig_line    -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           
+           if (!test8x8transform)
+           {
+             if ((mcost += SATD (diff, input->hadamard)) > min_mcost)
+             {
+               abort_search = 1;
+               break;
+             }
+           }
+           else
+           {
+             i = (x0&0x7) + (x0>7) * 64 + y_offset;
+             for(k=0, j=y0; j<BLOCK_SIZE + y0; j++, k+=BLOCK_SIZE)
+               memcpy(&(c_diff[i + ((j&0x7)<<3)]), &diff[k], BLOCK_SIZE*sizeof(int));
+           }
+         }
+       }
+     }  
+     
+     if(test8x8transform)
+       mcost += find_SATD (c_diff, blocktype);
+     
+     
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos;
+     }      
+   }
+   
+   if (best_pos)
+   {
+     *mv_x += (spiral_hpel_search_x [best_pos]);
+     *mv_y += (spiral_hpel_search_y [best_pos]);
+   }
+   
+   test8x8transform = input->Transform8x8Mode && blocktype <= 4 && input->hadamard;
+   
+   /************************************
+   *****                          *****
+   *****  QUARTER-PEL REFINEMENT  *****
+   *****                          *****
+   ************************************/
+   //===== set function for getting pixel values =====
+   if ((pic4_pix_x + *mv_x > 0) && (pic4_pix_x + *mv_x < max_pos_x4) &&
+     (pic4_pix_y + *mv_y > 0) && (pic4_pix_y + *mv_y < max_pos_y4))
+   {
+     get_line_p2 = FastLine4X;
+   }
+   else
+   {
+     get_line_p2 = UMVLine4X;    
+   }
+   
+   if ((pic4_pix_x + *s_mv_x > 0) && (pic4_pix_x + *s_mv_x < max_pos_x4) &&
+     (pic4_pix_y + *s_mv_y > 0) && (pic4_pix_y + *s_mv_y < max_pos_y4))
+   {
+     get_line_p1 = FastLine4X;
+   }
+   else
+   {
+     get_line_p1 = UMVLine4X;    
+   }
+   
+   
+   //===== loop over search positions =====
+   for (best_pos = 0, pos = 1; pos < search_pos4; pos++)
+   {
+     cand_mv_x = *mv_x + spiral_search_x[pos];    // quarter-pel units
+     cand_mv_y = *mv_y + spiral_search_y[pos];    // quarter-pel units
+     
+     //----- set motion vector cost -----
+     mcost = MV_COST (lambda_factor, 0, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+     
+     if (mcost >= min_mcost) continue;
+     cmv_x = cand_mv_x + pic4_pix_x;
+     cmv_y = cand_mv_y + pic4_pix_y;
+     
+     //----- add up SATD -----
+     for (y0=0, abort_search=0; y0<blocksize_y && !abort_search; y0+=BLOCK_SIZE)
+     {
+       y_offset = (y0>7)*ypels;
+       ry0 = (y0<<2) + cmv_y;
+       ry4  = ry0 + 4;
+       ry8  = ry4 + 4;
+       ry12 = ry8 + 4;      
+       sy0 = (y0<<2) + smv_y;
+       sy4  = sy0 + 4;
+       sy8  = sy4 + 4;
+       sy12 = sy8 + 4;
+       y1 = y0 + 1;
+       y2 = y1 + 1;
+       y3 = y2 + 1;
+       
+       if (apply_weights)
+       {
+         
+         for (x0=0; x0<blocksize_x; x0+=BLOCK_SIZE)
+         {
+           rx0 = (x0<<2) + cmv_x;            
+           sx0 = (x0<<2) + smv_x;
+           d   = diff;
+           
+           orig_line = &orig_pic [y0][x0];    
+           ref_line_p1 = get_line_p1 (ref1_pic, sy0, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry0, rx0,  img_height, img_width);
+           weightedpel = clip1a (((( weightSpic * (*(ref_line_p1    )) + weightRpic * (*(ref_line_p2     ))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;          
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line - weightedpel;
+           
+           orig_line = &orig_pic [y1][x0];
+           ref_line_p1 = get_line_p1 (ref1_pic, sy4, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry4, rx0,  img_height, img_width);
+           weightedpel = clip1a (((( weightSpic * (*(ref_line_p1    )) + weightRpic * (*(ref_line_p2     ))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;          
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line - weightedpel;
+           
+           
+           orig_line = &orig_pic [y2][x0];
+           ref_line_p1 = get_line_p1 (ref1_pic, sy8, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry8, rx0,  img_height, img_width);
+           weightedpel = clip1a (((( weightSpic * (*(ref_line_p1    )) + weightRpic * (*(ref_line_p2     ))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;          
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line - weightedpel;
+           
+           orig_line = &orig_pic [y3][x0];
+           ref_line_p1 = get_line_p1 (ref1_pic, sy12, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry12, rx0,  img_height, img_width);
+           weightedpel = clip1a (((( weightSpic * (*(ref_line_p1    )) + weightRpic * (*(ref_line_p2     ))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;          
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d++ = *orig_line++ - weightedpel;
+           weightedpel = clip1a ((((weightSpic * (*(ref_line_p1 += 4)) + weightRpic * (*(ref_line_p2 += 4))) 
+             + 2 * wp_luma_round) >> (luma_log_weight_denom + 1)) + (offsetBi));
+           *d   = *orig_line - weightedpel;
+           
+           if (!test8x8transform)
+           {
+             if ((mcost += SATD (diff, input->hadamard)) > min_mcost)
+             {
+               abort_search = 1;
+               break;
+             }
+           }
+           else
+           {
+             i = (x0&0x7) + (x0>7) * 64 + y_offset;
+             for(k=0, j=y0; j<y0 + BLOCK_SIZE; j++, k+=BLOCK_SIZE)
+               memcpy(&(c_diff[i + ((j&0x7)<<3)]), &diff[k], BLOCK_SIZE*sizeof(int));
+           }
+         }
+       }
+       else
+       {      
+         for (x0=0; x0<blocksize_x; x0+=BLOCK_SIZE)
+         {
+           rx0 = (x0<<2) + cmv_x;            
+           sx0 = (x0<<2) + smv_x;
+           d   = diff;
+           
+           orig_line = &orig_pic [y0][x0];    
+           ref_line_p1 = get_line_p1 (ref1_pic, sy0, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry0, rx0,  img_height, img_width);
+           *d++ = *orig_line++  -  ((*(ref_line_p1     ) + *(ref_line_p2     ) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line    -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           
+           orig_line = &orig_pic [y1][x0];    
+           ref_line_p1 = get_line_p1 (ref1_pic, sy4, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry4, rx0,  img_height, img_width);
+           *d++ = *orig_line++  -  ((*(ref_line_p1     ) + *(ref_line_p2     ) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line    -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           
+           orig_line = &orig_pic [y2][x0];    
+           ref_line_p1 = get_line_p1 (ref1_pic, sy8, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry8, rx0,  img_height, img_width);
+           *d++ = *orig_line++  -  ((*(ref_line_p1     ) + *(ref_line_p2     ) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line    -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           
+           orig_line = &orig_pic [y3][x0];    
+           ref_line_p1 = get_line_p1 (ref1_pic, sy12, sx0,  img_height, img_width);
+           ref_line_p2 = get_line_p2 (ref2_pic, ry12, rx0,  img_height, img_width);
+           *d++ = *orig_line++  -  ((*(ref_line_p1     ) + *(ref_line_p2     ) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d++ = *orig_line++  -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           *d   = *orig_line    -  ((*(ref_line_p1 += 4) + *(ref_line_p2 += 4) + 1) >> 1);
+           
+           if (!test8x8transform)
+           {
+             if ((mcost += SATD (diff, input->hadamard)) > min_mcost)
+             {
+               abort_search = 1;
+               break;
+             }
+           }
+           else
+           {
+             i = (x0&0x7) + (x0>7) * 64 + y_offset;
+             for(k=0, j=y0; j<y0 + BLOCK_SIZE; j++, k+=BLOCK_SIZE)
+               memcpy(&(c_diff[i + ((j&0x7)<<3)]), &diff[k], BLOCK_SIZE*sizeof(int));
+           }
+         }
+       }
+     }
+     if(test8x8transform)
+       mcost += find_SATD (c_diff, blocktype);
+     
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       best_pos  = pos;
+     }
+     
+   }
+   
+   if (best_pos)
+   {
+     *mv_x += spiral_search_x [best_pos];
+     *mv_y += spiral_search_y [best_pos];
+   }
+   
+   //===== return minimum motion cost =====
+   return min_mcost;
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Motion Cost for Bidirectional modes
+  ***********************************************************************
+  */
+ int BPredPartitionCost (int   blocktype,
+                         int   block8x8,
+                         short fw_ref,
+                         short bw_ref,
+                         int   lambda_factor,
+                         int   list)
+ {
+   static int  bx0[5][4] = {{0,0,0,0}, {0,0,0,0}, {0,0,0,0}, {0,2,0,0}, {0,2,0,2}};
+   static int  by0[5][4] = {{0,0,0,0}, {0,0,0,0}, {0,2,0,0}, {0,0,0,0}, {0,0,2,2}};
+   
+   int   diff[64];
+   int   curr_blk[MB_BLOCK_SIZE][MB_BLOCK_SIZE]; // ABT pred.error buffer
+   int   bsx       = min(input->blc_size[blocktype][0],8);
+   int   bsy       = min(input->blc_size[blocktype][1],8);
+   
+   int   pic_pix_x, pic_pix_y, block_x, block_y;
+   int   v, h, mcost, i, j, k;
+   int   mvd_bits  = 0;
+   int   parttype  = (blocktype<4?blocktype:4);
+   int   step_h0   = (input->part_size[ parttype][0]);
+   int   step_v0   = (input->part_size[ parttype][1]);
+   int   step_h    = (input->part_size[blocktype][0]);
+   int   step_v    = (input->part_size[blocktype][1]);
+   int   bxx, byy;                               // indexing curr_blk
+   
+   short   ******all_mv = list ? img->bipred_mv1 : img->bipred_mv2;
+   short   ******  p_mv = img->pred_mv;
+   
+   for (v=by0[parttype][block8x8]; v<by0[parttype][block8x8]+step_v0; v+=step_v)
+   {
+     for (h=bx0[parttype][block8x8]; h<bx0[parttype][block8x8]+step_h0; h+=step_h)
+     {
+       mvd_bits += mvbits[ all_mv [v][h][LIST_0][fw_ref][blocktype][0] - p_mv[v][h][LIST_0][fw_ref][blocktype][0] ];
+       mvd_bits += mvbits[ all_mv [v][h][LIST_0][fw_ref][blocktype][1] - p_mv[v][h][LIST_0][fw_ref][blocktype][1] ];
+       
+       mvd_bits += mvbits[ all_mv [v][h][LIST_1][bw_ref][blocktype][0] - p_mv[v][h][LIST_1][bw_ref][blocktype][0] ];
+       mvd_bits += mvbits[ all_mv [v][h][LIST_1][bw_ref][blocktype][1] - p_mv[v][h][LIST_1][bw_ref][blocktype][1] ];
+     }
+   }
+     mcost = WEIGHTED_COST (lambda_factor, mvd_bits);
+     
+     //----- cost of residual signal -----
+     for (byy=0, v=by0[parttype][block8x8]; v<by0[parttype][block8x8]+step_v0; byy+=4, v++)
+     {
+ 
+       pic_pix_y = img->opix_y + (block_y = (v<<2));      
+       for (bxx=0, h=bx0[parttype][block8x8]; h<bx0[parttype][block8x8]+step_h0; bxx+=4, h++)
+       {
+         pic_pix_x = img->opix_x + (block_x = (h<<2));        
+         LumaPrediction4x4Bi (block_x, block_y, 2, blocktype, blocktype, fw_ref, bw_ref, list);
+         
+         for (k=j=0; j<4; j++)
+         {
+           for (  i=0; i<4; i++)
+             diff[k++] = curr_blk[byy+j][bxx+i] = 
+             imgY_org[pic_pix_y+j][pic_pix_x+i] - img->mpr[j+block_y][i+block_x];            
+         }
+         if ((!input->Transform8x8Mode) || (blocktype>4))
+         {
+           mcost += SATD (diff, input->hadamard);          
+         }
+       }
+     }
+     if (input->Transform8x8Mode && (blocktype<=4))  // tchen 4-29-04
+     {
+       for (byy=0; byy < input->blc_size[parttype][1]; byy+=bsy)
+         for (bxx=0; bxx<input->blc_size[parttype][0]; bxx+=bsx)
+         {
+           for (k=0, j=byy;j<byy + 8;j++, k += 8)
+             memcpy(&diff[k], &(curr_blk[j][bxx]), 8 * sizeof(int));          
+           
+           mcost += SATD8X8(diff, input->hadamard);
+         }
+     }
+     return mcost;
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Block motion search
+  ***********************************************************************
+  */
+ int                                         //!< minimum motion cost after search
+ BlockMotionSearch (short     ref,           //!< reference idx
+                    int       list,          //!< reference pciture list
+                    int       mb_x,          //!< x-coordinate inside macroblock
+                    int       mb_y,          //!< y-coordinate inside macroblock
+                    int       blocktype,     //!< block type (1-16x16 ... 7-4x4)
+                    int       search_range,  //!< 1-d search range for integer-position search
+                    int       lambda_factor) //!< lagrangian parameter for determining motion cost
+ {
+   static pel_t   orig_val [256];
+   static pel_t  *orig_pic  [16] = {orig_val,     orig_val+ 16, orig_val+ 32, orig_val+ 48,
+                                    orig_val+ 64, orig_val+ 80, orig_val+ 96, orig_val+112,
+                                    orig_val+128, orig_val+144, orig_val+160, orig_val+176,
+                                    orig_val+192, orig_val+208, orig_val+224, orig_val+240};
+ 
+   short     pred_mv_x, pred_mv_y, mv_x, mv_y;
+   int       i, j;
+   
+   int       max_value = INT_MAX;
+   int       min_mcost = max_value;
+   
+   int       block_x   = (mb_x>>2);
+   int       block_y   = (mb_y>>2);
+   
+   int       bsx       = input->blc_size[blocktype][0];
+   int       bsy       = input->blc_size[blocktype][1];
+   
+   int       pic_pix_x = img->opix_x + mb_x;
+   int       pic_pix_y = img->opix_y + mb_y;
+   
+   
+ #ifdef WIN32
+   struct _timeb tstruct1;
+   struct _timeb tstruct2;
+ #else
+   struct timeb tstruct1;
+   struct timeb tstruct2;
+ #endif
+   
+   int me_tmp_time;
+   short*    pred_mv = img->pred_mv[block_y][block_x][list][ref][blocktype];
+   short****** all_mv    = img->all_mv;  
+   int list_offset = ((img->MbaffFrameFlag) && (img->mb_data[img->current_mb_nr].mb_field)) ? img->current_mb_nr % 2 ? 4 : 2 : 0;
+   int *prevSad = (input->FMEnable == 3)? EPZSDistortion[list + list_offset][blocktype - 1]: NULL;
+ 
+ #ifdef WIN32
+   _ftime( &tstruct1 );    // start time ms
+ #else
+   ftime(&tstruct1);
+ #endif
+   
+   //==================================
+   //=====   GET ORIGINAL BLOCK   =====
+   //==================================
+   for (j = 0; j < bsy; j++)
+     memcpy(orig_pic[j],&imgY_org[pic_pix_y+j][pic_pix_x], bsx *sizeof(imgpel));
+   
+   if(input->FMEnable == 1)
+   {
+     setup_FME(ref, list, block_y, block_x, blocktype, all_mv );
+   }
+   else if (input->FMEnable == 2)
+   {
+     simplified_setup_FME(ref, list, block_y, block_x, blocktype, all_mv );
+   }
+ 
+   //===========================================
+   //=====   GET MOTION VECTOR PREDICTOR   =====
+   //===========================================
+  
+   SetMotionVectorPredictor (pred_mv, enc_picture->ref_idx[list], enc_picture->mv[list], ref, list, block_x, block_y, bsx, bsy);
+   
+   pred_mv_x = pred_mv[0];
+   pred_mv_y = pred_mv[1];
+   
+   //==================================
+   //=====   INTEGER-PEL SEARCH   =====
+   //==================================
+   
+   if (input->FMEnable == 1)
+   {
+     mv_x = pred_mv_x / 4;
+     mv_y = pred_mv_y / 4;
+     
+     if (!input->rdopt)
+     {
+       //--- adjust search center so that the (0,0)-vector is inside ---
+       mv_x = max (-search_range, min (search_range, mv_x));
+       mv_y = max (-search_range, min (search_range, mv_y));
+     }
+     
+     mv_x = Clip3(-2047 + search_range, 2047 - search_range, mv_x);
+     mv_y = Clip3(LEVELMVLIMIT[img->LevelIndex][0] + search_range, LEVELMVLIMIT[img->LevelIndex][1]  - search_range, mv_y);
+     
+     
+     min_mcost = FastIntegerPelBlockMotionSearch(orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+       pred_mv_x, pred_mv_y, &mv_x, &mv_y, search_range,
+       min_mcost, lambda_factor);
+     //FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3
+     for (i=0; i < (bsx>>2); i++)
+     {
+       for (j=0; j < (bsy>>2); j++)
+       {
+         if(list == 0) 
+         {
+           fastme_ref_cost[ref][blocktype][block_y+j][block_x+i] = min_mcost;
+           if (ref==0)
+             fastme_l0_cost[blocktype][(img->pix_y>>2)+block_y+j][(img->pix_x>>2)+block_x+i] = min_mcost;
+         }
+         else
+         {
+           fastme_l1_cost[blocktype][(img->pix_y>>2)+block_y+j][(img->pix_x>>2)+block_x+i] = min_mcost;
+         }
+       }
+     }
+   }
+   else if (input->FMEnable == 2)
+   {
+     mv_x = pred_mv_x / 4;
+     mv_y = pred_mv_y / 4;
+     
+     if (!input->rdopt)
+     {
+       //--- adjust search center so that the (0,0)-vector is inside ---
+       mv_x = max (-search_range, min (search_range, mv_x));
+       mv_y = max (-search_range, min (search_range, mv_y));
+     }
+     
+     mv_x = Clip3(-2047 + search_range, 2047 - search_range, mv_x);
+     mv_y = Clip3(LEVELMVLIMIT[img->LevelIndex][0] + search_range, LEVELMVLIMIT[img->LevelIndex][1]  - search_range, mv_y);
+     
+     
+     min_mcost = simplified_FastIntegerPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+                                                  pred_mv_x, pred_mv_y, &mv_x, &mv_y, search_range,
+                                                  min_mcost, lambda_factor);
+     for (i=0; i < (bsx>>2); i++)
+     {
+       for (j=0; j < (bsy>>2); j++)
+       {
+         if(list == 0) 
+         {
+           simplified_fastme_l0_cost[blocktype][(img->pix_y>>2)+block_y+j][(img->pix_x>>2)+block_x+i] = min_mcost;
+         }
+         else
+         {
+           simplified_fastme_l1_cost[blocktype][(img->pix_y>>2)+block_y+j][(img->pix_x>>2)+block_x+i] = min_mcost;
+         }
+       }
+     }
+   }
+   //--- perform motion search using EPZS schemes---
+   else if (input->FMEnable == 3)
+   {    
+     //--- set search center ---
+     mv_x = pred_mv_x / 4;
+     mv_y = pred_mv_y / 4;
+     //mv_x = (pred_mv_x + 2)>> 2;
+     //mv_y = (pred_mv_y + 2)>> 2;    
+     if (!input->rdopt)
+     {
+       //--- adjust search center so that the (0,0)-vector is inside ---
+       mv_x = max (-search_range, min (search_range, mv_x));
+       mv_y = max (-search_range, min (search_range, mv_y));
+     }
+     
+     
+     mv_x = Clip3(-2047 + search_range, 2047 - search_range, mv_x);
+     mv_y = Clip3(LEVELMVLIMIT[img->LevelIndex][0] + search_range, LEVELMVLIMIT[img->LevelIndex][1]  - search_range, mv_y);
+     
+     min_mcost = EPZSPelBlockMotionSearch (orig_pic, ref, list, list_offset, 
+       enc_picture->ref_idx, enc_picture->mv,pic_pix_x, pic_pix_y, blocktype,
+       pred_mv_x, pred_mv_y, &mv_x, &mv_y, search_range, min_mcost, lambda_factor);
+     
+   }
+   else
+   {
+ #ifndef _FAST_FULL_ME_
+     
+     //--- set search center ---
+     mv_x = pred_mv_x / 4;
+     mv_y = pred_mv_y / 4;
+     if (!input->rdopt)
+     {
+       //--- adjust search center so that the (0,0)-vector is inside ---
+       mv_x = max (-search_range, min (search_range, mv_x));
+       mv_y = max (-search_range, min (search_range, mv_y));
+     }
+     
+     mv_x = Clip3(-2047 + search_range, 2047 - search_range, mv_x);
+     mv_y = Clip3(LEVELMVLIMIT[img->LevelIndex][0] + search_range, LEVELMVLIMIT[img->LevelIndex][1]  - search_range, mv_y);
+     
+     //--- perform motion search ---
+     min_mcost = FullPelBlockMotionSearch     (orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+       pred_mv_x, pred_mv_y, &mv_x, &mv_y, search_range,
+       min_mcost, lambda_factor);
+     
+ #else
+     // comments:   - orig_pic is not used  -> be careful
+     //             - search center is automatically determined
+     min_mcost = FastFullPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+       pred_mv_x, pred_mv_y, &mv_x, &mv_y, search_range,
+       min_mcost, lambda_factor);
+     
+ #endif // #ifndef _FAST_FULL_ME_
+   }
+   //===== convert search center to quarter-pel units =====
+   mv_x <<= 2;
+   mv_y <<= 2;
+   
+   //==============================
+   //=====   SUB-PEL SEARCH   =====
+   //==============================
+   if (!input->DisableSubpelME)
+   {
+     if (input->FMEnable != 3 || (ref == 0 || img->structure != FRAME || (ref > 0 && min_mcost < 3.5 * prevSad[pic_pix_x >> 2])))
+     {
+       
+       if (input->hadamard == 1)
+       {
+         min_mcost = max_value;
+       }
+       
+       if (input->FMEnable == 1)
+       {
+         if(blocktype >3)
+         {
+           min_mcost =  FastSubPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+             pred_mv_x, pred_mv_y, &mv_x, &mv_y, 9, 9, min_mcost, lambda_factor, (input->Transform8x8Mode && blocktype <= 4 && input->hadamard));
+         }
+         else
+         {
+           min_mcost =  SubPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+             pred_mv_x, pred_mv_y, &mv_x, &mv_y, 9, 9, min_mcost, lambda_factor);
+         }
+       }
+       else if (input->FMEnable == 2)
+       {
+         if(blocktype > 1)
+         {
+           min_mcost =  simplified_FastSubPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y,
+             blocktype, pred_mv_x, pred_mv_y, &mv_x, &mv_y, 9, 9, min_mcost, lambda_factor, 
+             (input->Transform8x8Mode && blocktype <= 4 && input->hadamard));
+         }
+         else
+         {
+           min_mcost =  simplified_FastFullSubPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y, 
+             blocktype, pred_mv_x, pred_mv_y, &mv_x, &mv_y, 9, 9, min_mcost, lambda_factor);
+         }
+       }
+       else
+       {      
+         min_mcost =  SubPelBlockMotionSearch (orig_pic, ref, list, pic_pix_x, pic_pix_y, blocktype,
+           pred_mv_x, pred_mv_y, &mv_x, &mv_y, 9, 9, min_mcost, lambda_factor);            
+       }
+     }
+   }
+   
+   if (!input->rdopt)
+   {
+     // Get the skip mode cost
+     if (blocktype == 1 && (img->type == P_SLICE||img->type == SP_SLICE))
+     {
+       int cost;
+       
+       FindSkipModeMotionVector ();
+       
+       cost  = GetSkipCostMB (lambda_factor);
+       cost -= ((lambda_factor + 4096) >> 13);
+       
+       if (cost < min_mcost)
+       {
+         min_mcost = cost;
+         mv_x      = img->all_mv [0][0][0][0][0][0];
+         mv_y      = img->all_mv [0][0][0][0][0][1];
+       }
+     }
+   }
+   
+   //===============================================
+   //=====   SET MV'S AND RETURN MOTION COST   =====
+   //===============================================
+   
+   for (j=block_y; j < block_y + (bsy>>2); j++)    
+   {
+     for (i=block_x; i < block_x + (bsx>>2); i++)
+     {
+       all_mv[j][i][list][ref][blocktype][0] = mv_x;
+       all_mv[j][i][list][ref][blocktype][1] = mv_y;
+     }
+   }
+   
+   if (img->type==B_SLICE && input->BiPredMotionEstimation!=0 && (blocktype == 1) && (ref==0))
+   {
+     
+     short   ******bipred_mv = list ? img->bipred_mv1 : img->bipred_mv2;
+     int     min_mcostbi = max_value;
+     short   bimv_x, bimv_y, tempmv_x ,tempmv_y;
+     short   pred_mv_x1, pred_mv_y1;
+     short   pred_mv_x2 = 0, pred_mv_y2 = 0;
+     short   iterlist=list;
+     short   pred_mv_bi[2];
+     
+     SetMotionVectorPredictor (pred_mv_bi, enc_picture->ref_idx[list ^ 1], enc_picture->mv[(list == LIST_0? LIST_1: LIST_0)], 0, (list == LIST_0? LIST_1: LIST_0), block_x, block_y, bsx, bsy);
+ 
+     mv_x=(mv_x + 2)>>2;
+     mv_y=(mv_y + 2)>>2;
+      
+     for (i=0;i<=input->BiPredMERefinements;i++)
+     {
+       if (i%2)
+       {
+         pred_mv_x2=pred_mv[0];
+         pred_mv_y2=pred_mv[1]; 
+         pred_mv_x1=pred_mv_bi[0];
+         pred_mv_y1=pred_mv_bi[1]; 
+         tempmv_x=bimv_x;
+         tempmv_y=bimv_y;        
+         bimv_x=mv_x;
+         bimv_y=mv_y;
+         iterlist= list ^ 1;
+         
+       }
+       else
+       {
+         pred_mv_x1=pred_mv[0];
+         pred_mv_y1=pred_mv[1]; 
+         pred_mv_x2=pred_mv_bi[0];
+         pred_mv_y2=pred_mv_bi[1]; 
+         
+         if (i!=0)
+         {
+           tempmv_x=bimv_x;
+           tempmv_y=bimv_y;        
+           bimv_x=mv_x;
+           bimv_y=mv_y;
+         }
+         else
+         {
+           tempmv_x=mv_x;
+           tempmv_y=mv_y;        
+           bimv_x = (pred_mv_x2 + 2)>>2;
+           bimv_y = (pred_mv_y2 + 2)>>2;
+         }
+         
+         iterlist=list;
+       }
+       if (input->FMEnable == 3)        
+       {
+         min_mcostbi = EPZSBiPredBlockMotionSearch (orig_pic, ref, iterlist, list_offset, 
+           enc_picture->ref_idx, enc_picture->mv, 
+           pic_pix_x, pic_pix_y, blocktype, 
+           pred_mv_x1, pred_mv_y1, pred_mv_x2, pred_mv_y2, 
+           &bimv_x, &bimv_y, &tempmv_x, &tempmv_y, 
+           input->BiPredMESearchRange, min_mcostbi, lambda_factor);
+       }
+       else
+       {
+         min_mcostbi = FullPelBlockMotionBiPred (orig_pic, ref, iterlist, 
+           pic_pix_x, pic_pix_y, blocktype, 
+           pred_mv_x1, pred_mv_y1, pred_mv_x2, pred_mv_y2, 
+           &bimv_x, &bimv_y, &tempmv_x, &tempmv_y, 
+           input->BiPredMESearchRange>>i, min_mcostbi, lambda_factor);
+       }
+       mv_x=tempmv_x;
+       mv_y=tempmv_y;        
+     }
+     
+     mv_x=tempmv_x << 2;
+     mv_y=tempmv_y << 2;
+     bimv_x = bimv_x << 2;
+     bimv_y = bimv_y << 2;
+ 
+     if (input->BiPredMESubPel && !input->DisableSubpelME)
+     {
+       if (input->hadamard)
+       {
+         min_mcostbi = max_value;
+       }
+       
+       min_mcostbi =  SubPelBlockSearchBiPred (orig_pic, 0, iterlist, pic_pix_x, pic_pix_y, blocktype,
+         pred_mv_x2, pred_mv_y2, &bimv_x, &bimv_y, &mv_x, &mv_y, 9, 9,
+         min_mcostbi, lambda_factor);
+     }
+     
+     if (input->BiPredMESubPel==2 && !input->DisableSubpelME)
+     {
+       if (input->hadamard)
+       {
+         min_mcostbi = max_value;
+       }
+       
+       min_mcostbi =  SubPelBlockSearchBiPred (orig_pic, 0, (iterlist == LIST_0? LIST_1: LIST_0), pic_pix_x, pic_pix_y, blocktype,
+         pred_mv_x, pred_mv_y, &mv_x, &mv_y, &bimv_x, &bimv_y, 9, 9,
+         min_mcostbi, lambda_factor);      
+     }
+ 
+     for (j=block_y; j < block_y + (bsy>>2); j++)
+     {
+       for (i=block_x ; i < block_x + (bsx>>2); i++)      
+       {
+         bipred_mv[j][i][iterlist    ][0][blocktype][0] = mv_x;
+         bipred_mv[j][i][iterlist    ][0][blocktype][1] = mv_y;
+         bipred_mv[j][i][iterlist ^ 1][0][blocktype][0] = bimv_x;
+         bipred_mv[j][i][iterlist ^ 1][0][blocktype][1] = bimv_y;        
+       }
+     }
+   }
+   
+   
+ #ifdef WIN32
+   _ftime(&tstruct2);   // end time ms
+ #else
+   ftime(&tstruct2);    // end time ms
+ #endif
+   
+   me_tmp_time=(tstruct2.time*1000+tstruct2.millitm) - (tstruct1.time*1000+tstruct1.millitm); 
+   me_tot_time += me_tmp_time;
+   me_time += me_tmp_time;
+   
+   return min_mcost;
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Motion Cost for Bidirectional modes
+  ***********************************************************************
+  */
+ int BIDPartitionCost (int   blocktype,
+                       int   block8x8,
+                       short fw_ref,
+                       short bw_ref,
+                       int   lambda_factor)
+ {
+   static int  bx0[5][4] = {{0,0,0,0}, {0,0,0,0}, {0,0,0,0}, {0,2,0,0}, {0,2,0,2}};
+   static int  by0[5][4] = {{0,0,0,0}, {0,0,0,0}, {0,2,0,0}, {0,0,0,0}, {0,0,2,2}};
+   
+   int   diff[64];
+   int   curr_blk[MB_BLOCK_SIZE][MB_BLOCK_SIZE]; // ABT pred.error buffer
+   int   bsx       = min(input->blc_size[blocktype][0],8);
+   int   bsy       = min(input->blc_size[blocktype][1],8);
+   
+   int   pic_pix_x, pic_pix_y, block_x, block_y;
+   int   v, h, mcost, i, j, k;
+   int   mvd_bits  = 0;
+   int   parttype  = (blocktype<4?blocktype:4);
+   int   step_h0   = (input->part_size[ parttype][0]);
+   int   step_v0   = (input->part_size[ parttype][1]);
+   int   step_h    = (input->part_size[blocktype][0]);
+   int   step_v    = (input->part_size[blocktype][1]);
+   int   bxx, byy;                               // indexing curr_blk
+   int   bx = bx0[parttype][block8x8];
+   int   by = by0[parttype][block8x8];
+   short   ******all_mv = img->all_mv;
+   short   ******  p_mv = img->pred_mv;
+   
+   //----- cost for motion vector bits -----
+   for (v=by; v<by + step_v0; v+=step_v)
+   {
+     for (h=bx; h<bx + step_h0; h+=step_h)
+     {
+       mvd_bits += mvbits[ all_mv [v][h][LIST_0][fw_ref][blocktype][0] - p_mv[v][h][LIST_0][fw_ref][blocktype][0] ];
+       mvd_bits += mvbits[ all_mv [v][h][LIST_0][fw_ref][blocktype][1] - p_mv[v][h][LIST_0][fw_ref][blocktype][1] ];
+       
+       mvd_bits += mvbits[ all_mv [v][h][LIST_1][bw_ref][blocktype][0] - p_mv[v][h][LIST_1][bw_ref][blocktype][0] ];
+       mvd_bits += mvbits[ all_mv [v][h][LIST_1][bw_ref][blocktype][1] - p_mv[v][h][LIST_1][bw_ref][blocktype][1] ];
+     }
+   }
+   
+   mcost = WEIGHTED_COST (lambda_factor, mvd_bits);
+   
+   //----- cost of residual signal -----
+   for (byy=0, v=by; v<by + step_v0; byy+=4, v++)
+   {
+     pic_pix_y = img->opix_y + (block_y = (v<<2));
+     for (bxx=0, h=bx; h<bx + step_h0; bxx+=4, h++)
+     {
+       pic_pix_x = img->opix_x + (block_x = (h<<2));
+       LumaPrediction4x4 (block_x, block_y, 2, blocktype, blocktype, fw_ref, bw_ref);
+       
+       for (k=j=0; j<4; j++)
+       {
+         for (  i=0; i<4; i++)
+           diff[k++] = curr_blk[byy+j][bxx+i] = 
+           imgY_org[pic_pix_y+j][pic_pix_x+i] - img->mpr[j+block_y][i+block_x];
+       }
+       if ((!input->Transform8x8Mode) || (blocktype>4))
+         mcost += SATD (diff, input->hadamard);
+     }
+   }
+   if (input->Transform8x8Mode && (blocktype<=4))  // tchen 4-29-04
+   {
+     for (byy=0; byy < input->blc_size[parttype][1]; byy+=bsy)
+       for (bxx=0; bxx<input->blc_size[parttype][0]; bxx+=bsx)
+       {        
+         for (k=0, j=byy;j<byy + 8;j++, k += 8)
+           memcpy(&diff[k], &(curr_blk[j][bxx]), 8 * sizeof(int));          
+         
+         mcost += SATD8X8(diff, input->hadamard);
+       }
+   }
+   return mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Get cost for skip mode for an macroblock
+  ************************************************************************
+  */
+ int GetSkipCostMB (int lambda_factor)
+ {
+   int block_y, block_x, pic_pix_y, pic_pix_x, i, j, k;
+   int diff[16];
+   int cost = 0;
+   
+   int curr_diff[8][8], diff8x8[64];
+   int mb_x, mb_y;
+   int block;  
+   for(block=0;block<4;block++)
+   {
+     mb_y    = (block/2)<<3;
+     mb_x    = (block%2)<<3;
+     for (block_y=mb_y; block_y<mb_y+8; block_y+=4)
+     {
+       pic_pix_y = img->opix_y + block_y;
+       for (block_x=mb_x; block_x<mb_x+8; block_x+=4)
+       {
+         pic_pix_x = img->opix_x + block_x;
+         
+         //===== prediction of 4x4 block =====
+         LumaPrediction4x4 (block_x, block_y, 0, 0, 0, 0, 0);
+         
+         //===== get displaced frame difference ======                
+         for (k=j=0; j<4; j++)
+           for (i=0; i<4; i++, k++)
+           {
+             diff[k] = curr_diff[block_y-mb_y+j][block_x-mb_x+i] = imgY_org[pic_pix_y+j][pic_pix_x+i] - img->mpr[j+block_y][i+block_x];
+           }
+           
+           if(!((input->rdopt==0)&&(input->Transform8x8Mode)))
+             cost += SATD (diff, input->hadamard);
+       }
+     }
+     
+     if((input->rdopt==0)&&(input->Transform8x8Mode))
+     {      
+       for(k=j=0; j<8; j++, k+=8)
+         memcpy(&diff8x8[k], &(curr_diff[j]), 8 * sizeof(int));          
+       cost += SATD8X8 (diff8x8, input->hadamard);
+     }
+   }
+   
+   return cost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Find motion vector for the Skip mode
+  ************************************************************************
+  */
+ void FindSkipModeMotionVector ()
+ {
+   int   bx, by;
+   short ******all_mv = img->all_mv;
+   
+   short pmv[2];
+   
+   int zeroMotionAbove;
+   int zeroMotionLeft;
+   PixelPos mb_a, mb_b;
+   int      a_mv_y = 0;
+   int      a_ref_idx = 0;
+   int      b_mv_y = 0;
+   int      b_ref_idx = 0;
+   
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   
+   getLuma4x4Neighbour(img->current_mb_nr,0,0,-1, 0,&mb_a);
+   getLuma4x4Neighbour(img->current_mb_nr,0,0, 0,-1,&mb_b);
+   
+   if (mb_a.available)
+   {
+     a_mv_y    = enc_picture->mv[LIST_0][mb_a.pos_y][mb_a.pos_x][1];
+     a_ref_idx = enc_picture->ref_idx[LIST_0][mb_a.pos_y][mb_a.pos_x];
+     
+     if (currMB->mb_field && !img->mb_data[mb_a.mb_addr].mb_field)
+     {
+       a_mv_y    /=2;
+       a_ref_idx *=2;
+     }
+     if (!currMB->mb_field && img->mb_data[mb_a.mb_addr].mb_field)
+     {
+       a_mv_y    *=2;
+       a_ref_idx >>=1;
+     }
+   }
+   
+   if (mb_b.available)
+   {
+     b_mv_y    = enc_picture->mv[LIST_0][mb_b.pos_y][mb_b.pos_x][1];
+     b_ref_idx = enc_picture->ref_idx[LIST_0][mb_b.pos_y][mb_b.pos_x];
+     
+     if (currMB->mb_field && !img->mb_data[mb_b.mb_addr].mb_field)
+     {
+       b_mv_y    /=2;
+       b_ref_idx *=2;
+     }
+     if (!currMB->mb_field && img->mb_data[mb_b.mb_addr].mb_field)
+     {
+       b_mv_y    *=2;
+       b_ref_idx >>=1;
+     }
+   }
+   
+   zeroMotionLeft  = !mb_a.available ? 1 : a_ref_idx==0 && enc_picture->mv[LIST_0][mb_a.pos_y][mb_a.pos_x][0]==0 && a_mv_y==0 ? 1 : 0;
+   zeroMotionAbove = !mb_b.available ? 1 : b_ref_idx==0 && enc_picture->mv[LIST_0][mb_b.pos_y][mb_b.pos_x][0]==0 && b_mv_y==0 ? 1 : 0;
+   
+   if (zeroMotionAbove || zeroMotionLeft)
+   {
+     for (by = 0;by < 4;by++)
+       for (bx = 0;bx < 4;bx++)
+       {
+         all_mv [by][bx][0][0][0][0] = 0;
+         all_mv [by][bx][0][0][0][1] = 0;
+       }
+   }
+   else
+   {
+     SetMotionVectorPredictor (pmv, enc_picture->ref_idx[LIST_0], enc_picture->mv[LIST_0], 0, LIST_0, 0, 0, 16, 16);
+     for (by = 0;by < 4;by++)
+       for (bx = 0;bx < 4;bx++)
+       {
+         all_mv [by][bx][0][0][0][0] = pmv[0];
+         all_mv [by][bx][0][0][0][1] = pmv[1];
+       }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Get cost for direct mode for an 8x8 block
+  ************************************************************************
+  */
+ int Get_Direct_Cost8x8 (int block, int *cost8x8)
+ {
+   int block_y, block_x, pic_pix_y, pic_pix_x, i, j, k;
+   int curr_diff[8][8], diff8x8[64];
+   int diff[16];
+   int cost  = 0;
+   int mb_y  = (block/2)<<3;
+   int mb_x  = (block%2)<<3;
+   
+   for (block_y=mb_y; block_y<mb_y+8; block_y+=4)
+   {
+     pic_pix_y = img->opix_y + block_y;
+     
+     for (block_x=mb_x; block_x<mb_x+8; block_x+=4)
+     {
+       pic_pix_x = img->opix_x + block_x;
+       
+       if (direct_pdir[pic_pix_y>>2][pic_pix_x>>2]<0)
+       {
+         *cost8x8=INT_MAX;
+         return INT_MAX; //mode not allowed 
+       }
+       
+       //===== prediction of 4x4 block =====
+       
+       LumaPrediction4x4 (block_x, block_y, direct_pdir[pic_pix_y>>2][pic_pix_x>>2], 0, 0, 
+         direct_ref_idx[LIST_0][pic_pix_y>>2][pic_pix_x>>2], 
+         direct_ref_idx[LIST_1][pic_pix_y>>2][pic_pix_x>>2]);
+       
+       //===== get displaced frame difference ======                
+       for (k=j=0; j<4; j++)
+         for (i=0; i<4; i++, k++)
+         {
+           diff[k] = curr_diff[block_y-mb_y+j][block_x-mb_x+i] =
+             imgY_org[pic_pix_y+j][pic_pix_x+i] - img->mpr[j+block_y][i+block_x];
+         }
+         
+         cost += SATD (diff, input->hadamard);
+     }
+   }
+   
+   if((input->rdopt==0)&&(input->Transform8x8Mode))
+   {
+     k=0;
+     for(j=0; j<8; j++, k+=8)
+       memcpy(&diff8x8[k], &(curr_diff[j]), 8 * sizeof(int));          
+     
+     *cost8x8 += SATD8X8 (diff8x8, input->hadamard);
+   }
+   
+   return cost;
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Get cost for direct mode for an macroblock
+  ************************************************************************
+  */
+ int Get_Direct_CostMB (int lambda_factor)
+ {
+   int i;
+   int cost = 0;
+   int cost8x8 = 0;
+   
+   for (i=0; i<4; i++)
+   {
+     cost += Get_Direct_Cost8x8 (i, &cost8x8);
+     if (cost8x8 == INT_MAX) return INT_MAX;
+   }
+   
+   switch(input->Transform8x8Mode)
+   {
+   case 1: // Mixture of 8x8 & 4x4 transform
+     if((cost8x8 < cost)||
+       !(input->InterSearch8x4 &&
+       input->InterSearch4x8 &&
+       input->InterSearch4x4)
+       )
+     {
+       cost = cost8x8; //return 8x8 cost
+     }
+     break;
+   case 2: // 8x8 Transform only
+     cost = cost8x8;
+     break;
+   default: // 4x4 Transform only
+     break;
+   }
+   
+   return cost;
+   // T.Nishi(MEI ACC) 04-28-2004 end
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Motion search for a partition
+  ************************************************************************
+  */
+ void
+ PartitionMotionSearch (int    blocktype,
+                        int    block8x8,
+                        int    lambda_factor)
+ {
+   static int  bx0[5][4] = {{0,0,0,0}, {0,0,0,0}, {0,0,0,0}, {0,2,0,0}, {0,2,0,2}};
+   static int  by0[5][4] = {{0,0,0,0}, {0,0,0,0}, {0,2,0,0}, {0,0,0,0}, {0,0,2,2}};
+   
+   char  **ref_array;
+   short ***mv_array;
+   short *all_mv;
+   short ref;
+   int   v, h, mcost, search_range, i, j;
+   int   pic_block_x, pic_block_y;
+   int   bslice    = (img->type==B_SLICE);
+   int   parttype  = (blocktype<4?blocktype:4);
+   int   step_h0   = (input->part_size[ parttype][0]);
+   int   step_v0   = (input->part_size[ parttype][1]);
+   int   step_h    = (input->part_size[blocktype][0]);
+   int   step_v    = (input->part_size[blocktype][1]);
+   int   list;
+   int   numlists  = bslice ? 2 : 1;
+   int   list_offset = img->mb_data[img->current_mb_nr].list_offset; 
+   int   block_y;
+   int   *m_cost;
+   int   by = by0[parttype][block8x8];
+   int   bx = bx0[parttype][block8x8];
+   
+   //===== LOOP OVER REFERENCE FRAMES =====
+   for (list=0; list<numlists;list++)
+   {
+     for (ref=0; ref < listXsize[list+list_offset]; ref++)
+     {
+        m_cost = &motion_cost[blocktype][list][ref][block8x8];
+       //----- set search range ---
+ #ifdef _FULL_SEARCH_RANGE_
+       if      (input->full_search == 2) 
+         search_range = input->search_range;
+       else if (input->full_search == 1) 
+         search_range = input->search_range /  (min(ref,1)+1);
+       else                              
+         search_range = input->search_range / ((min(ref,1)+1) * min(2,blocktype));
+ #else
+       search_range = input->search_range / ((min(ref,1)+1) * min(2,blocktype));
+ #endif
+       
+       //----- set arrays -----
+       ref_array = enc_picture->ref_idx[list];
+       mv_array  = enc_picture->mv[list];
+       
+       //----- init motion cost -----
+       //motion_cost[blocktype][list][ref][block8x8] = 0;
+       *m_cost = 0;
+       
+       //===== LOOP OVER SUB MACRO BLOCK partitions
+       for (v=by; v<by + step_v0; v += step_v)
+       {
+         pic_block_y = img->block_y + v;
+         
+         for (h=bx; h<bx+step_h0; h+=step_h)
+         {
+           all_mv = img->all_mv[v][h][list][ref][blocktype];
+           pic_block_x = img->block_x + h;
+           
+           //--- motion search for block ---
+           
+           mcost = BlockMotionSearch     (ref, list, h<<2, v<<2, blocktype, search_range, lambda_factor);
+           *m_cost += mcost;
+           
+           //--- set motion vectors and reference frame (for motion vector prediction) ---
+           for (j=0; j<step_v; j++)
+           {
+             block_y = pic_block_y+j;
+             memset(&ref_array [block_y][pic_block_x], ref, step_h * sizeof(char));
+ 
+             for (i=0; i<step_h; i++)
+             {
+               mv_array  [block_y][pic_block_x+i][0] = all_mv[0];
+               mv_array  [block_y][pic_block_x+i][1] = all_mv[1];
+             }
+           }
+         }
+       }
+     }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Calculate Direct Motion Vectors  *****
+  ************************************************************************
+  */
+ void Get_Direct_Motion_Vectors ()
+ {
+   
+   int   block_x, block_y, pic_block_x, pic_block_y, opic_block_x, opic_block_y;
+   short ****all_mvs;
+   int   mv_scale;
+   int refList; 
+   int ref_idx;   
+   
+   byte  **   moving_block;
+   short ****   co_located_mv;
+   char  ***    co_located_ref_idx;
+   int64 ***    co_located_ref_id;
+   char  **     ref_pic_l0 = enc_picture->ref_idx[LIST_0];
+   char  **     ref_pic_l1 = enc_picture->ref_idx[LIST_1];
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   
+   if (currMB->list_offset)
+   {
+     if(img->current_mb_nr%2)
+     {
+       moving_block = Co_located->bottom_moving_block;
+       co_located_mv = Co_located->bottom_mv;
+       co_located_ref_idx = Co_located->bottom_ref_idx;
+       co_located_ref_id = Co_located->bottom_ref_pic_id;
+     }
+     else
+     {
+       moving_block = Co_located->top_moving_block;
+       co_located_mv = Co_located->top_mv;
+       co_located_ref_idx = Co_located->top_ref_idx;
+       co_located_ref_id = Co_located->top_ref_pic_id;
+     }
+   }
+   else
+   {
+     moving_block = Co_located->moving_block;
+     co_located_mv = Co_located->mv;
+     co_located_ref_idx = Co_located->ref_idx;
+     co_located_ref_id = Co_located->ref_pic_id;
+   }
+   
+   if (img->direct_spatial_mv_pred_flag)  //spatial direct mode copy from decoder
+   {
+     
+     short l0_refA, l0_refB, l0_refD, l0_refC;
+     short l1_refA, l1_refB, l1_refD, l1_refC; 
+     short l0_refX,l1_refX;
+     short pmvfw[2]={0,0},pmvbw[2]={0,0};
+     
+     PixelPos mb_a, mb_b, mb_d, mb_c;              
+     
+     getLuma4x4Neighbour(img->current_mb_nr, 0, 0, -1,  0,&mb_a);
+     getLuma4x4Neighbour(img->current_mb_nr, 0, 0,  0, -1,&mb_b);
+     getLuma4x4Neighbour(img->current_mb_nr, 0, 0, 16, -1,&mb_c);
+     getLuma4x4Neighbour(img->current_mb_nr, 0, 0, -1, -1,&mb_d);
+     
+     if (!img->MbaffFrameFlag)
+     {
+       l0_refA = mb_a.available ? ref_pic_l0[mb_a.pos_y][mb_a.pos_x] : -1;
+       l0_refB = mb_b.available ? ref_pic_l0[mb_b.pos_y][mb_b.pos_x] : -1;
+       l0_refD = mb_d.available ? ref_pic_l0[mb_d.pos_y][mb_d.pos_x] : -1;
+       l0_refC = mb_c.available ? ref_pic_l0[mb_c.pos_y][mb_c.pos_x] : l0_refD;      
+       
+       l1_refA = mb_a.available ? ref_pic_l1[mb_a.pos_y][mb_a.pos_x] : -1;
+       l1_refB = mb_b.available ? ref_pic_l1[mb_b.pos_y][mb_b.pos_x] : -1;
+       l1_refD = mb_d.available ? ref_pic_l1[mb_d.pos_y][mb_d.pos_x] : -1;
+       l1_refC = mb_c.available ? ref_pic_l1[mb_c.pos_y][mb_c.pos_x] : l1_refD;      
+     }
+     else
+     {
+       if (currMB->mb_field)
+       {
+         l0_refA = mb_a.available 
+           ? (img->mb_data[mb_a.mb_addr].mb_field  || ref_pic_l0[mb_a.pos_y][mb_a.pos_x] < 0
+           ?  ref_pic_l0[mb_a.pos_y][mb_a.pos_x]
+           :  ref_pic_l0[mb_a.pos_y][mb_a.pos_x] * 2) : -1;
+         
+         l0_refB = mb_b.available 
+           ? (img->mb_data[mb_b.mb_addr].mb_field || ref_pic_l0[mb_b.pos_y][mb_b.pos_x] < 0
+           ?  ref_pic_l0[mb_b.pos_y][mb_b.pos_x] 
+           :  ref_pic_l0[mb_b.pos_y][mb_b.pos_x] * 2) : -1;
+         
+         l0_refD = mb_d.available 
+           ? (img->mb_data[mb_d.mb_addr].mb_field || ref_pic_l0[mb_d.pos_y][mb_d.pos_x] < 0
+           ?  ref_pic_l0[mb_d.pos_y][mb_d.pos_x] 
+           :  ref_pic_l0[mb_d.pos_y][mb_d.pos_x] * 2) : -1;
+         
+         l0_refC = mb_c.available 
+           ? (img->mb_data[mb_c.mb_addr].mb_field || ref_pic_l0[mb_c.pos_y][mb_c.pos_x] < 0
+           ?  ref_pic_l0[mb_c.pos_y][mb_c.pos_x] 
+           :  ref_pic_l0[mb_c.pos_y][mb_c.pos_x] * 2) : l0_refD;
+         
+         l1_refA = mb_a.available 
+           ? (img->mb_data[mb_a.mb_addr].mb_field || ref_pic_l1[mb_a.pos_y][mb_a.pos_x] < 0
+           ?  ref_pic_l1[mb_a.pos_y][mb_a.pos_x] 
+           :  ref_pic_l1[mb_a.pos_y][mb_a.pos_x] * 2) : -1;
+         
+         l1_refB = mb_b.available 
+           ? (img->mb_data[mb_b.mb_addr].mb_field || ref_pic_l1[mb_b.pos_y][mb_b.pos_x] < 0
+           ?  ref_pic_l1[mb_b.pos_y][mb_b.pos_x] 
+           :  ref_pic_l1[mb_b.pos_y][mb_b.pos_x] * 2) : -1;
+         
+         l1_refD = mb_d.available 
+           ? (img->mb_data[mb_d.mb_addr].mb_field || ref_pic_l1[mb_d.pos_y][mb_d.pos_x] < 0
+           ?  ref_pic_l1[mb_d.pos_y][mb_d.pos_x] 
+           :  ref_pic_l1[mb_d.pos_y][mb_d.pos_x] * 2) : -1;
+         
+         l1_refC = mb_c.available 
+           ? (img->mb_data[mb_c.mb_addr].mb_field || ref_pic_l1[mb_c.pos_y][mb_c.pos_x] < 0
+           ?  ref_pic_l1[mb_c.pos_y][mb_c.pos_x] 
+           :  ref_pic_l1[mb_c.pos_y][mb_c.pos_x] * 2) : l1_refD;
+       }
+       else
+       {
+         l0_refA = mb_a.available 
+           ? (img->mb_data[mb_a.mb_addr].mb_field || ref_pic_l0[mb_a.pos_y][mb_a.pos_x]  < 0 
+           ?  ref_pic_l0[mb_a.pos_y][mb_a.pos_x] >> 1 
+           :  ref_pic_l0[mb_a.pos_y][mb_a.pos_x]) : -1;
+         
+         l0_refB = mb_b.available 
+           ? (img->mb_data[mb_b.mb_addr].mb_field || ref_pic_l0[mb_b.pos_y][mb_b.pos_x] < 0 
+           ?  ref_pic_l0[mb_b.pos_y][mb_b.pos_x] >> 1 
+           :  ref_pic_l0[mb_b.pos_y][mb_b.pos_x]) : -1;
+         
+         l0_refD = mb_d.available 
+           ? (img->mb_data[mb_d.mb_addr].mb_field || ref_pic_l0[mb_d.pos_y][mb_d.pos_x] < 0 
+           ?  ref_pic_l0[mb_d.pos_y][mb_d.pos_x] >> 1 
+           :  ref_pic_l0[mb_d.pos_y][mb_d.pos_x]) : -1;      
+         
+         l0_refC = mb_c.available 
+           ? (img->mb_data[mb_c.mb_addr].mb_field || ref_pic_l0[mb_c.pos_y][mb_c.pos_x] < 0 
+           ?  ref_pic_l0[mb_c.pos_y][mb_c.pos_x] >> 1 
+           :  ref_pic_l0[mb_c.pos_y][mb_c.pos_x]) : l0_refD;      
+         
+         l1_refA = mb_a.available 
+           ? (img->mb_data[mb_a.mb_addr].mb_field || ref_pic_l1[mb_a.pos_y][mb_a.pos_x] < 0 
+           ?  ref_pic_l1[mb_a.pos_y][mb_a.pos_x] >> 1 
+           :  ref_pic_l1[mb_a.pos_y][mb_a.pos_x]) : -1;
+         
+         l1_refB = mb_b.available 
+           ? (img->mb_data[mb_b.mb_addr].mb_field || ref_pic_l1[mb_b.pos_y][mb_b.pos_x] < 0 
+           ?  ref_pic_l1[mb_b.pos_y][mb_b.pos_x] >> 1 
+           :  ref_pic_l1[mb_b.pos_y][mb_b.pos_x]) : -1;
+         
+         l1_refD = mb_d.available 
+           ? (img->mb_data[mb_d.mb_addr].mb_field || ref_pic_l1[mb_d.pos_y][mb_d.pos_x] < 0 
+           ?  ref_pic_l1[mb_d.pos_y][mb_d.pos_x] >> 1 
+           :  ref_pic_l1[mb_d.pos_y][mb_d.pos_x]) : -1;
+         
+         l1_refC = mb_c.available 
+           ? (img->mb_data[mb_c.mb_addr].mb_field || ref_pic_l1[mb_c.pos_y][mb_c.pos_x] < 0 
+           ?  ref_pic_l1[mb_c.pos_y][mb_c.pos_x] >> 1
+           :  ref_pic_l1[mb_c.pos_y][mb_c.pos_x]) : l1_refD;
+       }
+     }
+     
+     l0_refX = (l0_refA >= 0 && l0_refB >= 0) ? min(l0_refA,l0_refB): max(l0_refA,l0_refB);
+     l0_refX = (l0_refX >= 0 && l0_refC >= 0) ? min(l0_refX,l0_refC): max(l0_refX,l0_refC);
+     
+     l1_refX = (l1_refA >= 0 && l1_refB >= 0) ? min(l1_refA,l1_refB): max(l1_refA,l1_refB);
+     l1_refX = (l1_refX >= 0 && l1_refC >= 0) ? min(l1_refX,l1_refC): max(l1_refX,l1_refC);        
+     
+     if (l0_refX >=0)
+       SetMotionVectorPredictor (pmvfw, enc_picture->ref_idx[LIST_0], enc_picture->mv[LIST_0], l0_refX, LIST_0, 0, 0, 16, 16);
+     
+     if (l1_refX >=0)
+       SetMotionVectorPredictor (pmvbw, enc_picture->ref_idx[LIST_1], enc_picture->mv[LIST_1], l1_refX, LIST_1, 0, 0, 16, 16);
+     
+     for (block_y=0; block_y<4; block_y++)
+     {
+       pic_block_y  = (img->pix_y  >> 2) + block_y;
+       opic_block_y = (img->opix_y >> 2) + block_y;
+       
+       for (block_x=0; block_x<4; block_x++)
+       {
+         pic_block_x  = (img->pix_x  >> 2) + block_x;
+         opic_block_x = (img->opix_x >> 2) + block_x;
+         
+         all_mvs = img->all_mv[block_y][block_x];
+         
+         if (l0_refX >=0)
+         {
+           if (!l0_refX  && !moving_block[opic_block_y][opic_block_x])
+           {
+             all_mvs[LIST_0][0][0][0] = 0;
+             all_mvs[LIST_0][0][0][1] = 0;            
+             direct_ref_idx[LIST_0][pic_block_y][pic_block_x]=0;       
+           }
+           else
+           {
+             all_mvs[LIST_0][l0_refX][0][0] = pmvfw[0];
+             all_mvs[LIST_0][l0_refX][0][1] = pmvfw[1];
+             direct_ref_idx[LIST_0][pic_block_y][pic_block_x]= (char)l0_refX;              
+           }
+         }
+         else
+         {
+           all_mvs[LIST_0][0][0][0] = 0;
+           all_mvs[LIST_0][0][0][1] = 0;
+           direct_ref_idx[LIST_0][pic_block_y][pic_block_x]=-1;          
+         }
+         
+         if (l1_refX >=0)
+         {
+           if(l1_refX==0 && !moving_block[opic_block_y][opic_block_x])
+           {                  
+             all_mvs[LIST_1][0][0][0] = 0;
+             all_mvs[LIST_1][0][0][1] = 0;
+             direct_ref_idx[LIST_1][pic_block_y][pic_block_x]= (char)l1_refX;     
+           }
+           else
+           {
+             all_mvs[LIST_1][l1_refX][0][0] = pmvbw[0];
+             all_mvs[LIST_1][l1_refX][0][1] = pmvbw[1];
+             direct_ref_idx[LIST_1][pic_block_y][pic_block_x]= (char)l1_refX;
+           }               
+         }
+         else
+         {      
+           direct_ref_idx[LIST_1][pic_block_y][pic_block_x]=-1;
+           
+           all_mvs[LIST_1][0][0][0] = 0;
+           all_mvs[LIST_1][0][0][1] = 0;
+         }
+         
+         // Test Level Limits if satisfied.
+         if (img->MbaffFrameFlag 
+           && (all_mvs[LIST_0][l0_refX < 0? 0 : l0_refX][0][0] < -8192 
+           ||  all_mvs[LIST_0][l0_refX < 0? 0 : l0_refX][0][0] >  8191 
+           ||  all_mvs[LIST_0][l0_refX < 0? 0 : l0_refX][0][1] < LEVELMVLIMIT[img->LevelIndex][4] 
+           ||  all_mvs[LIST_0][l0_refX < 0? 0 : l0_refX][0][1] > LEVELMVLIMIT[img->LevelIndex][5] 
+           ||  all_mvs[LIST_1][l1_refX < 0? 0 : l1_refX][0][0] < -8192 
+           ||  all_mvs[LIST_1][l1_refX < 0? 0 : l1_refX][0][0] > 8191 
+           ||  all_mvs[LIST_1][l1_refX < 0? 0 : l1_refX][0][1] < LEVELMVLIMIT[img->LevelIndex][4] 
+           ||  all_mvs[LIST_1][l1_refX < 0? 0 : l1_refX][0][1] > LEVELMVLIMIT[img->LevelIndex][5])) 
+         { 
+           direct_ref_idx[LIST_0][pic_block_y][pic_block_x] = -1; 
+           direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = -1; 
+           direct_pdir           [pic_block_y][pic_block_x] = -1; 
+         } 
+         else 
+         { 
+           if (l0_refX < 0 && l1_refX < 0)
+           {
+             direct_ref_idx[LIST_0][pic_block_y][pic_block_x] = 
+               direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = 0;
+           }
+           if      (direct_ref_idx[LIST_1][pic_block_y][pic_block_x] == -1) 
+             direct_pdir[pic_block_y][pic_block_x] = 0;
+           else if (direct_ref_idx[LIST_0][pic_block_y][pic_block_x] == -1) 
+             direct_pdir[pic_block_y][pic_block_x] = 1;
+           else                                                           
+             direct_pdir[pic_block_y][pic_block_x] = 2;
+         }        
+       }
+     }
+   }
+   else
+   {
+     int64 *refpic = enc_picture->ref_pic_num[LIST_0 +currMB->list_offset];
+     
+     //temporal direct mode copy from decoder
+     for (block_y = 0; block_y < 4; block_y++)
+     {
+       pic_block_y  = (img->pix_y  >> 2) + block_y;
+       opic_block_y = (img->opix_y >> 2) + block_y;
+       
+       for (block_x = 0; block_x < 4; block_x++)
+       {
+         pic_block_x  = (img->pix_x>>2) + block_x;
+         opic_block_x = (img->opix_x>>2) + block_x;
+         all_mvs = img->all_mv[block_y][block_x];
+         
+         refList = (co_located_ref_idx[LIST_0][opic_block_y][opic_block_x]== -1 ? LIST_1 : LIST_0);
+         ref_idx = co_located_ref_idx[refList][opic_block_y][opic_block_x];
+         
+         // next P is intra mode
+         if (ref_idx==-1)
+         {
+           all_mvs[LIST_0][0][0][0] = 0;
+           all_mvs[LIST_0][0][0][1] = 0;
+           all_mvs[LIST_1][0][0][0] = 0;
+           all_mvs[LIST_1][0][0][1] = 0;
+           direct_ref_idx[LIST_0][pic_block_y][pic_block_x] = 0;
+           direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = 0;
+           direct_pdir[pic_block_y][pic_block_x] = 2;
+         }
+         // next P is skip or inter mode
+         else 
+         {
+           int mapped_idx=INVALIDINDEX;
+           int iref; 
+           
+           for (iref=0;iref<min(img->num_ref_idx_l0_active,listXsize[LIST_0+currMB->list_offset]);iref++)
+           {
+             if (refpic[iref]==co_located_ref_id[refList ][opic_block_y][opic_block_x])
+             {
+               mapped_idx=iref;
+               break;
+             }
+             else //! invalid index. Default to zero even though this case should not happen
+             {                        
+               mapped_idx=INVALIDINDEX;
+             }
+           }
+           
+           if (mapped_idx !=INVALIDINDEX)
+           {
+             mv_scale = img->mvscale[LIST_0+currMB->list_offset][mapped_idx];
+             
+             if (mv_scale==9999)
+             {
+               // forward
+               all_mvs[LIST_0][0][0][0] = co_located_mv[refList][opic_block_y][opic_block_x][0];
+               all_mvs[LIST_0][0][0][1] = co_located_mv[refList][opic_block_y][opic_block_x][1];
+               // backward
+               all_mvs[LIST_1][0][0][0] = 0;
+               all_mvs[LIST_1][0][0][1] = 0;
+             }
+             else
+             {
+               // forward
+               all_mvs[LIST_0][mapped_idx][0][0] = (mv_scale * co_located_mv[refList][opic_block_y][opic_block_x][0] + 128) >> 8;
+               all_mvs[LIST_0][mapped_idx][0][1] = (mv_scale * co_located_mv[refList][opic_block_y][opic_block_x][1] + 128) >> 8;
+               // backward
+               all_mvs[LIST_1][         0][0][0] = ((mv_scale - 256)* co_located_mv[refList][opic_block_y][opic_block_x][0] + 128) >> 8;
+               all_mvs[LIST_1][         0][0][1] = ((mv_scale - 256)* co_located_mv[refList][opic_block_y][opic_block_x][1] + 128) >> 8;
+             }
+             
+             // Test Level Limits if satisfied.
+             if ( all_mvs[LIST_0][mapped_idx][0][0] < -8192 
+               || all_mvs[LIST_0][mapped_idx][0][0] >  8191 
+               || all_mvs[LIST_0][mapped_idx][0][1] < LEVELMVLIMIT[img->LevelIndex][4] 
+               || all_mvs[LIST_0][mapped_idx][0][1] > LEVELMVLIMIT[img->LevelIndex][5] 
+               || all_mvs[LIST_1][0][0][0] < -8192 
+               || all_mvs[LIST_1][0][0][0] > 8191 
+               || all_mvs[LIST_1][0][0][1] < LEVELMVLIMIT[img->LevelIndex][4] 
+               || all_mvs[LIST_1][0][0][1] > LEVELMVLIMIT[img->LevelIndex][5]) 
+             { 
+               direct_ref_idx[LIST_0][pic_block_y][pic_block_x] = -1; 
+               direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = -1; 
+               direct_pdir[pic_block_y][pic_block_x] = -1; 
+             } 
+             else 
+             { 
+               direct_ref_idx[LIST_0][pic_block_y][pic_block_x] = mapped_idx; 
+               direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = 0; 
+               direct_pdir[pic_block_y][pic_block_x] = 2; 
+             }
+           }
+           else
+           {
+             direct_ref_idx[LIST_0][pic_block_y][pic_block_x] = -1;
+             direct_ref_idx[LIST_1][pic_block_y][pic_block_x] = -1;
+             direct_pdir[pic_block_y][pic_block_x] = -1;
+           }
+         }
+       }
+     }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    control the sign of a with b
+  ************************************************************************
+  */
+ int sign(int a,int b)
+ {
+   return (b >= 0) ? absm(a) : -absm(a);
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/mv-search.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/mv-search.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/mv-search.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,76 ----
+ 
+ /*!
+  ************************************************************************
+  * \file mv-search.h
+  *
+  * \brief
+  *   array definition for motion search
+  *
+  * \author
+  *    Inge Lille-Langoy               <inge.lille-langoy at telenor.com>   \n
+  *    Copyright (C) 1999  Telenor Satellite Services, Norway
+  *
+  ************************************************************************
+  */
+ 
+ #ifndef _MV_SEARCH_H_
+ #define _MV_SEARCH_H_
+ 
+ //! convert from H.263 QP to H.264 quant given by: quant=pow(2,QP/6)
+ const int QP2QUANT[40]=
+ {
+    1, 1, 1, 1, 2, 2, 2, 2,
+    3, 3, 3, 4, 4, 4, 5, 6,
+    6, 7, 8, 9,10,11,13,14,
+   16,18,20,23,25,29,32,36,
+   40,45,51,57,64,72,81,91
+ };
+ 
+ // Vertical MV Limits (integer/halfpel/quarterpel)
+ // Currently only Integer Pel restrictions are used, 
+ // since the way values are specified 
+ // (i.e. mvlowbound = (levelmvlowbound + 1) and the way
+ // Subpel me is performed subpel will always be within range.
+  
+ const int LEVELMVLIMIT[17][6] =
+ {
+   {  -63,  63,  -128,  127,  -256,  255},
+   {  -63,  63,  -128,  127,  -256,  255},
+   { -127, 127,  -256,  255,  -512,  511},
+   { -127, 127,  -256,  255,  -512,  511},
+   { -127, 127,  -256,  255,  -512,  511},
+   { -127, 127,  -256,  255,  -512,  511},
+   { -255, 255,  -512,  511, -1024, 1023},
+   { -255, 255,  -512,  511, -1024, 1023},
+   { -255, 255,  -512,  511, -1024, 1023},
+   { -511, 511, -1024, 1023, -2048, 2047},
+   { -511, 511, -1024, 1023, -2048, 2047},
+   { -511, 511, -1024, 1023, -2048, 2047},
+   { -511, 511, -1024, 1023, -2048, 2047},
+   { -511, 511, -1024, 1023, -2048, 2047},
+   { -511, 511, -1024, 1023, -2048, 2047},
+   { -511, 511, -1024, 1023, -2048, 2047},
+   { -511, 511, -1024, 1023, -2048, 2047}
+ 
+   /*
+   {  -64,  63,  -128,  127,  -256,  255},
+   {  -64,  63,  -128,  127,  -256,  255},
+   { -128, 127,  -256,  255,  -512,  511},
+   { -128, 127,  -256,  255,  -512,  511},
+   { -128, 127,  -256,  255,  -512,  511},
+   { -128, 127,  -256,  255,  -512,  511},
+   { -256, 255,  -512,  511, -1024, 1023},
+   { -256, 255,  -512,  511, -1024, 1023},
+   { -256, 255,  -512,  511, -1024, 1023},
+   { -512, 511, -1024, 1023, -2048, 2047},
+   { -512, 511, -1024, 1023, -2048, 2047},
+   { -512, 511, -1024, 1023, -2048, 2047},
+   { -512, 511, -1024, 1023, -2048, 2047},
+   { -512, 511, -1024, 1023, -2048, 2047},
+   { -512, 511, -1024, 1023, -2048, 2047},
+   { -512, 511, -1024, 1023, -2048, 2047},
+   { -512, 511, -1024, 1023, -2048, 2047}
+   */
+ };
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/nal.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/nal.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/nal.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,147 ----
+ 
+ /*!
+  **************************************************************************************
+  * \file
+  *    nal.c
+  * \brief
+  *    Handles the operations on converting String of Data Bits (SODB)
+  *    to Raw Byte Sequence Payload (RBSP), and then 
+  *    onto Encapsulate Byte Sequence Payload (EBSP).
+  *  \date 14 June 2002
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details) 
+  *      - Shankar Regunathan                  <shanre at microsoft.de>
+  *      - Stephan Wenger                      <stewe at cs.tu-berlin.de>
+  ***************************************************************************************
+  */
+ 
+ 
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ #include <assert.h>
+ #include <memory.h>
+ 
+ #include "global.h"
+ 
+  /*!
+  ************************************************************************
+  * \brief
+  *    Converts String Of Data Bits (SODB) to Raw Byte Sequence 
+  *    Packet (RBSP)
+  * \param currStream
+  *        Bitstream which contains data bits.
+  * \return None
+  * \note currStream is byte-aligned at the end of this function
+  *    
+  ************************************************************************
+ */
+ 
+ static byte *NAL_Payload_buffer;
+ 
+ void SODBtoRBSP(Bitstream *currStream)
+ {
+   currStream->byte_buf <<= 1;
+   currStream->byte_buf |= 1;
+   currStream->bits_to_go--;
+   currStream->byte_buf <<= currStream->bits_to_go;
+   currStream->streamBuffer[currStream->byte_pos++] = currStream->byte_buf;
+   currStream->bits_to_go = 8;
+   currStream->byte_buf = 0;
+ }
+ 
+ 
+ /*!
+ ************************************************************************
+ *  \brief
+ *     This function converts a RBSP payload to an EBSP payload
+ *     
+ *  \param streamBuffer
+ *       pointer to data bits
+ *  \param begin_bytepos
+ *            The byte position after start-code, after which stuffing to
+ *            prevent start-code emulation begins.
+ *  \param end_bytepos
+ *           Size of streamBuffer in bytes.
+ *  \param min_num_bytes
+ *           Minimum number of bytes in payload. Should be 0 for VLC entropy
+ *           coding mode. Determines number of stuffed words for CABAC mode.
+ *  \return 
+ *           Size of streamBuffer after stuffing.
+ *  \note
+ *      NAL_Payload_buffer is used as temporary buffer to store data.
+ *
+ *
+ ************************************************************************
+ */
+ 
+ int RBSPtoEBSP(byte *streamBuffer, int begin_bytepos, int end_bytepos, int min_num_bytes)
+ {
+   
+   int i, j, count;
+ 
+   memcpy(&NAL_Payload_buffer[begin_bytepos],&streamBuffer[begin_bytepos], (end_bytepos - begin_bytepos) * sizeof(unsigned char));
+ 
+   count = 0;
+   j = begin_bytepos;
+   for(i = begin_bytepos; i < end_bytepos; i++) 
+   {
+     if(count == ZEROBYTES_SHORTSTARTCODE && !(NAL_Payload_buffer[i] & 0xFC)) 
+     {
+       streamBuffer[j] = 0x03;
+       j++;
+       count = 0;   
+     }
+     streamBuffer[j] = NAL_Payload_buffer[i];
+     if(NAL_Payload_buffer[i] == 0x00)      
+       count++;
+     else 
+       count = 0;
+     j++;
+   }
+ 
+   for (i = 0; i< (min_num_bytes - end_bytepos); i+=3 ) 
+   {
+     streamBuffer[j]   = 0x00; // CABAC zero word
+     streamBuffer[j+1] = 0x00;
+     streamBuffer[j+2] = 0x03;
+     j += 3;
+     stats->bit_use_stuffingBits[img->type]+=16;
+   }
+   return j;
+ }
+ 
+  /*!
+  ************************************************************************
+  * \brief
+  *    Initializes NAL module (allocates NAL_Payload_buffer)
+  ************************************************************************
+ */
+ 
+ void AllocNalPayloadBuffer()
+ {
+   const int buffer_size = ((input->img_width+img->auto_crop_right) * (input->img_height+img->auto_crop_bottom) * 5); // AH 190202: There can be data expansion with 
+                                                           // low QP values. So, we make sure that buffer 
+                                                           // does not overflow. 4 is probably safe multiplier.
+   FreeNalPayloadBuffer();
+ 
+   NAL_Payload_buffer = (byte *) calloc(buffer_size, sizeof(byte));
+   assert (NAL_Payload_buffer != NULL);
+ }
+ 
+ 
+  /*!
+  ************************************************************************
+  * \brief
+  *   Finits NAL module (frees NAL_Payload_buffer)
+  ************************************************************************
+ */
+ 
+ void FreeNalPayloadBuffer()
+ {
+   if(NAL_Payload_buffer)
+   {
+     free(NAL_Payload_buffer);
+     NAL_Payload_buffer=NULL;
+   }
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/nalu.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/nalu.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/nalu.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,78 ----
+ 
+ /*!
+  ************************************************************************
+  * \file  nalu.c
+  *
+  * \brief
+  *    Common NALU support functions
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Stephan Wenger   <stewe at cs.tu-berlin.de>
+  ************************************************************************
+  */
+ 
+ #include <assert.h>
+ #include <memory.h>
+ 
+ #include "global.h"
+ #include "nalu.h"
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Converts an RBSP to a NALU
+  *
+  * \param rbsp
+  *    byte buffer with the rbsp
+  * \param nalu
+  *    nalu structure to be filled
+  * \param rbsp_size
+  *    size of the rbsp in bytes
+  * \param nal_unit_type
+  *    as in JVT doc
+  * \param nal_reference_idc
+  *    as in JVT doc
+  * \param min_num_bytes
+  *    some incomprehensible CABAC stuff
+  * \param UseAnnexbLongStartcode
+  *    when 1 and when using AnnexB bytestreams, then use a long startcode prefix
+  *
+  * \return
+  *    length of the NALU in bytes
+  *************************************************************************************
+  */
+ 
+ int RBSPtoNALU (unsigned char *rbsp, NALU_t *nalu, int rbsp_size, int nal_unit_type, int nal_reference_idc, 
+                 int min_num_bytes, int UseAnnexbLongStartcode)
+ {
+   int len;
+ 
+   assert (nalu != NULL);
+   assert (nal_reference_idc <=3 && nal_reference_idc >=0);
+   assert (nal_unit_type > 0 && nal_unit_type <= 10);
+   assert (rbsp_size < MAXRBSPSIZE);
+ 
+   nalu->forbidden_bit = 0;
+   nalu->nal_reference_idc = nal_reference_idc;
+   nalu->nal_unit_type = nal_unit_type;
+   nalu->startcodeprefix_len = UseAnnexbLongStartcode?4:3;
+   nalu->buf[0] =
+     nalu->forbidden_bit << 7      |
+     nalu->nal_reference_idc << 5  |
+     nalu->nal_unit_type;
+ 
+   memcpy (&nalu->buf[1], rbsp, rbsp_size);
+ // printf ("First Byte %x\n", nalu->buf[0]);
+ // printf ("RBSPtoNALU: Before: NALU len %d\t RBSP %x %x %x %x\n", rbsp_size, (unsigned) nalu->buf[1], (unsigned) nalu->buf[2], (unsigned) nalu->buf[3], (unsigned) nalu->buf[4]);
+ 
+   len = 1 + RBSPtoEBSP (&nalu->buf[1], 0, rbsp_size, min_num_bytes);
+ 
+ // printf ("RBSPtoNALU: After : NALU len %d\t EBSP %x %x %x %x\n", rbsp_size, (unsigned) nalu->buf[1], (unsigned) nalu->buf[2], (unsigned) nalu->buf[3], (unsigned) nalu->buf[4]);
+ // printf ("len %d\n\n", len);
+   nalu->len = len;
+ 
+   return len;
+ }
+ 
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/nalu.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/nalu.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/nalu.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,28 ----
+ 
+ /*!
+  **************************************************************************************
+  * \file
+  *    parset.h
+  * \brief
+  *    Picture and Sequence Parameter Sets, encoder operations
+  *    This code reflects JVT version xxx
+  *  \date 25 November 2002
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details) 
+  *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+  ***************************************************************************************
+  */
+ 
+ 
+ #ifndef _NALU_H_
+ #define _NALU_H_
+ 
+ #include "nalucommon.h"
+ 
+ int RBSPtoNALU (unsigned char *rbsp, NALU_t *nalu, int rbsp_size, int nal_unit_type, int nal_reference_idc, 
+                 int min_num_bytes, int UseAnnexbLongStartcode);
+ 
+ int (*WriteNALU)(NALU_t *n);     //! Hides the write function in Annex B or RTP
+ 
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/nalucommon.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/nalucommon.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/nalucommon.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,72 ----
+ 
+ /*!
+  ************************************************************************
+  * \file  nalucommon.c
+  *
+  * \brief
+  *    Common NALU support functions
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Stephan Wenger   <stewe at cs.tu-berlin.de>
+  ************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ 
+ #include "global.h"
+ #include "nalu.h"
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Allocates memory for a NALU
+  *
+  * \param buffersize
+  *     size of NALU buffer 
+  *
+  * \return
+  *    pointer to a NALU
+  *************************************************************************************
+  */
+  
+ 
+ NALU_t *AllocNALU(int buffersize)
+ {
+   NALU_t *n;
+ 
+   if ((n = (NALU_t*)calloc (1, sizeof (NALU_t))) == NULL) no_mem_exit ("AllocNALU: n");
+ 
+   n->max_size=buffersize;
+ 
+   if ((n->buf = (byte*)calloc (buffersize, sizeof (byte))) == NULL) no_mem_exit ("AllocNALU: n->buf");
+   
+   return n;
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Frees a NALU
+  *
+  * \param n 
+  *    NALU to be freed
+  *
+  *************************************************************************************
+  */
+ 
+ void FreeNALU(NALU_t *n)
+ {
+   if (n)
+   {
+     if (n->buf)
+     {
+       free(n->buf);
+       n->buf=NULL;
+     }
+     free (n);
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/nalucommon.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/nalucommon.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/nalucommon.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,55 ----
+ 
+ /*!
+  **************************************************************************************
+  * \file
+  *    nalucommon.h.h
+  * \brief
+  *    NALU handling common to encoder and decoder
+  *  \date 25 November 2002
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details) 
+  *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+  ***************************************************************************************
+  */
+ 
+ 
+ #ifndef _NALUCOMMON_H_
+ #define _NALUCOMMON_H_
+ 
+ #define MAXRBSPSIZE 64000
+ 
+ #define NALU_TYPE_SLICE    1
+ #define NALU_TYPE_DPA      2
+ #define NALU_TYPE_DPB      3
+ #define NALU_TYPE_DPC      4
+ #define NALU_TYPE_IDR      5
+ #define NALU_TYPE_SEI      6
+ #define NALU_TYPE_SPS      7
+ #define NALU_TYPE_PPS      8
+ #define NALU_TYPE_AUD      9
+ #define NALU_TYPE_EOSEQ    10
+ #define NALU_TYPE_EOSTREAM 11
+ #define NALU_TYPE_FILL     12
+ 
+ #define NALU_PRIORITY_HIGHEST     3
+ #define NALU_PRIORITY_HIGH        2
+ #define NALU_PRIRITY_LOW          1
+ #define NALU_PRIORITY_DISPOSABLE  0
+ 
+ 
+ typedef struct 
+ {
+   int startcodeprefix_len;      //! 4 for parameter sets and first slice in picture, 3 for everything else (suggested)
+   unsigned len;                 //! Length of the NAL unit (Excluding the start code, which does not belong to the NALU)
+   unsigned max_size;            //! Nal Unit Buffer size
+   int nal_unit_type;            //! NALU_TYPE_xxxx
+   int nal_reference_idc;        //! NALU_PRIORITY_xxxx
+   int forbidden_bit;            //! should be always FALSE
+   byte *buf;        //! conjtains the first byte followed by the EBSP
+ } NALU_t;
+ 
+ 
+ NALU_t *AllocNALU();
+ void FreeNALU(NALU_t *n);
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/output.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/output.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/output.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,468 ----
+ 
+ /*!
+  ************************************************************************
+  * \file output.c
+  *
+  * \brief
+  *    Output an image and Trance support
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Karsten Suehring               <suehring at hhi.de>
+  ************************************************************************
+  */
+ 
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ #include <assert.h>
+ #include <string.h>
+ 
+ #ifdef WIN32
+ #include <io.h>
+ #else
+ #include <unistd.h>
+ #endif
+ 
+ #include "global.h"
+ #include "image.h"
+ 
+ void write_out_picture(StorablePicture *p, int p_out);
+ 
+ FrameStore* out_buffer;
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *      checks if the System is big- or little-endian
+  * \return
+  *      0, little-endian (e.g. Intel architectures)
+  *      1, big-endian (e.g. SPARC, MIPS, PowerPC)
+  ************************************************************************
+  */
+ int testEndian()
+ {
+   short s;
+   byte *p;
+ 
+   p=(byte*)&s;
+ 
+   s=1;
+ 
+   return (*p==0);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Convert image plane to temporary buffer for file writing
+  ************************************************************************
+  */
+ void img2buf ( imgpel** imgX,            //!< Pointer to image plane
+                unsigned char* buf,       //!< Buffer for file output
+                int size_x,               //!< horizontal size of picture
+                int size_y,               //!< vertical size of picture
+                int symbol_size_in_bytes, //!< number of bytes in file used to represent a pel
+                int crop_left,            //!< pixels to crop from left
+                int crop_right,           //!< pixels to crop from right
+                int crop_top,             //!< pixels to crop from top
+                int crop_bottom           //!< pixels to crop from bottom
+                )
+ {
+   int i,j;
+ 
+   int twidth  = size_x - crop_left - crop_right;
+   int theight = size_y - crop_top - crop_bottom;
+ 
+   int size = 0;
+ 
+   unsigned char  ui8;
+   unsigned short tmp16, ui16;
+   unsigned long  tmp32, ui32;
+ 
+   if (( sizeof(char) == sizeof (imgpel)) && ( sizeof(char) == symbol_size_in_bytes))
+   {
+     // imgpel == pixel_in_file == 1 byte -> simple copy
+     for(i=0;i<theight;i++)
+       memcpy(buf+crop_left+(i*twidth),&(imgX[i+crop_top][crop_left]), twidth);
+     
+   }
+   else
+   {
+     // sizeof (imgpel) > sizeof(char)
+     if (testEndian())
+     {
+       // big endian
+       switch (symbol_size_in_bytes)
+       {
+       case 1:
+         {
+           for(i=crop_top;i<size_y-crop_bottom;i++)
+             for(j=crop_left;j<size_x-crop_right;j++)
+             {
+               ui8 = (unsigned char) (imgX[i][j]);
+               buf[(j-crop_left+((i-crop_top)*(twidth)))] = ui8;
+             }
+           break;
+         }
+       case 2:
+         {
+           for(i=crop_top;i<size_y-crop_bottom;i++)
+             for(j=crop_left;j<size_x-crop_right;j++)
+             {
+               tmp16 = (unsigned short) (imgX[i][j]);
+               ui16  = (tmp16 >> 8) | ((tmp16&0xFF)<<8);
+               memcpy(buf+((j-crop_left+((i-crop_top)*(twidth)))*2),&(ui16), 2);
+             }
+           break;
+         }
+       case 4:
+         {
+           for(i=crop_top;i<size_y-crop_bottom;i++)
+             for(j=crop_left;j<size_x-crop_right;j++)
+             {
+               tmp32 = (unsigned long) (imgX[i][j]);
+               ui32  = ((tmp32&0xFF00)<<8) | ((tmp32&0xFF)<<24) | ((tmp32&0xFF0000)>>8) | ((tmp32&0xFF000000)>>24);
+               memcpy(buf+((j-crop_left+((i-crop_top)*(twidth)))*4),&(ui32), 4);
+             }
+           break;
+         }
+       default:
+         {
+            error ("writing only to formats of 8, 16 or 32 bit allowed on big endian architecture", 500);
+            break;
+         }
+       }
+ 
+     }
+     else
+     {
+       // little endian
+       if (sizeof (imgpel) < symbol_size_in_bytes)
+       {
+         // this should not happen. we should not have smaller imgpel than our source material.
+         size = sizeof (imgpel);
+         // clear buffer
+         memset (buf, 0, (twidth*theight*symbol_size_in_bytes));
+       }
+       else
+       {
+         size = symbol_size_in_bytes;
+       }
+ 
+       for(i=crop_top;i<size_y-crop_bottom;i++)
+         for(j=crop_left;j<size_x-crop_right;j++)
+         {
+           memcpy(buf+((j-crop_left+((i-crop_top)*(twidth)))*symbol_size_in_bytes),&(imgX[i][j]), size);
+         }
+ 
+     }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Writes out a storable picture without doing any output modifications
+  * \param p
+  *    Picture to be written
+  * \param p_out
+  *    Output file
+  * \param real_structure
+  *    real picture structure
+  ************************************************************************
+  */
+ void write_picture(StorablePicture *p, int p_out, int real_structure)
+ {
+   write_out_picture(p, p_out);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Writes out a storable picture
+  * \param p
+  *    Picture to be written
+  * \param p_out
+  *    Output file
+  ************************************************************************
+  */
+ void write_out_picture(StorablePicture *p, int p_out)
+ {
+   int SubWidthC  [4]= { 1, 2, 2, 1};
+   int SubHeightC [4]= { 1, 2, 1, 1};
+ 
+   int crop_left, crop_right, crop_top, crop_bottom;
+   int symbol_size_in_bytes = img->pic_unit_size_on_disk/8;
+   Boolean rgb_output = (input->rgb_input_flag && input->yuv_format==3);
+   unsigned char *buf;
+ 
+   if (p->non_existing)
+     return;
+ 
+   if (p->frame_cropping_flag)
+   {
+     crop_left   = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_left_offset;
+     crop_right  = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_right_offset;
+     crop_top    = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset;
+     crop_bottom = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset;
+   }
+   else
+   {
+     crop_left = crop_right = crop_top = crop_bottom = 0;
+   }
+ 
+   //printf ("write frame size: %dx%d\n", p->size_x-crop_left-crop_right,p->size_y-crop_top-crop_bottom );
+ 
+   // KS: this buffer should actually be allocated only once, but this is still much faster than the previous version
+   buf = malloc (p->size_x*p->size_y*symbol_size_in_bytes);
+   if (NULL==buf)
+   {
+     no_mem_exit("write_out_picture: buf");
+   }
+ 
+   if(rgb_output)
+   {
+     crop_left   = p->frame_cropping_rect_left_offset;
+     crop_right  = p->frame_cropping_rect_right_offset;
+     crop_top    = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset;
+     crop_bottom = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset;
+ 
+     img2buf (p->imgUV[1], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom);
+     write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes);
+ 
+     if (p->frame_cropping_flag)
+     {
+       crop_left   = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_left_offset;
+       crop_right  = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_right_offset;
+       crop_top    = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset;
+       crop_bottom = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset;
+     }
+     else
+     {
+       crop_left = crop_right = crop_top = crop_bottom = 0;
+     }
+   }
+ 
+   img2buf (p->imgY, buf, p->size_x, p->size_y, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom);
+   write(p_out, buf, (p->size_y-crop_bottom-crop_top)*(p->size_x-crop_right-crop_left)*symbol_size_in_bytes);
+ 
+   if (p->chroma_format_idc!=YUV400)
+   {
+     crop_left   = p->frame_cropping_rect_left_offset;
+     crop_right  = p->frame_cropping_rect_right_offset;
+     crop_top    = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset;
+     crop_bottom = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset;
+ 
+     img2buf (p->imgUV[0], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom);
+     write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)* symbol_size_in_bytes);
+ 
+     if (!rgb_output)
+     {
+       img2buf (p->imgUV[1], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom);
+       write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes);
+     }
+   }
+ 
+   free(buf);
+     
+ //  fsync(p_out);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Initialize output buffer for direct output
+  ************************************************************************
+  */
+ void init_out_buffer()
+ {
+   out_buffer = alloc_frame_store();
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Uninitialize output buffer for direct output
+  ************************************************************************
+  */
+ void uninit_out_buffer()
+ {
+   free_frame_store(out_buffer);
+   out_buffer=NULL;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Initialize picture memory with (Y:0,U:128,V:128)
+  ************************************************************************
+  */
+ void clear_picture(StorablePicture *p)
+ {
+   int i;
+ 
+   for(i=0;i<p->size_y;i++)
+     memset(p->imgY[i], img->dc_pred_value, p->size_x*sizeof(imgpel));
+   for(i=0;i<p->size_y_cr;i++)
+     memset(p->imgUV[0][i], img->dc_pred_value, p->size_x_cr*sizeof(imgpel));
+   for(i=0;i<p->size_y_cr;i++)
+     memset(p->imgUV[1][i], img->dc_pred_value, p->size_x_cr*sizeof(imgpel));
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Write out not paired direct output fields. A second empty field is generated
+  *    and combined into the frame buffer.
+  * \param fs
+  *    FrameStore that contains a single field
+  * \param p_out
+  *    Output file
+  ************************************************************************
+  */
+ void write_unpaired_field(FrameStore* fs, int p_out)
+ {
+   StorablePicture *p;
+   assert (fs->is_used<3);
+   if(fs->is_used &1)
+   {
+     // we have a top field
+     // construct an empty bottom field
+     p = fs->top_field;
+     fs->bottom_field = alloc_storable_picture(BOTTOM_FIELD, p->size_x, p->size_y, p->size_x_cr, p->size_y_cr);
+     fs->bottom_field->chroma_format_idc = p->chroma_format_idc;
+     clear_picture(fs->bottom_field);
+     dpb_combine_field_yuv(fs);
+     write_picture (fs->frame, p_out, TOP_FIELD);
+   }
+ 
+   if(fs->is_used &2)
+   {
+     // we have a bottom field
+     // construct an empty top field
+     p = fs->bottom_field;
+     fs->top_field = alloc_storable_picture(TOP_FIELD, p->size_x, p->size_y, p->size_x_cr, p->size_y_cr);
+     clear_picture(fs->top_field);
+     fs->top_field->chroma_format_idc = p->chroma_format_idc;
+     clear_picture(fs->top_field);
+     fs ->top_field->frame_cropping_flag = fs->bottom_field->frame_cropping_flag;
+     if(fs ->top_field->frame_cropping_flag) 
+     {
+       fs ->top_field->frame_cropping_rect_top_offset = fs->bottom_field->frame_cropping_rect_top_offset;
+       fs ->top_field->frame_cropping_rect_bottom_offset = fs->bottom_field->frame_cropping_rect_bottom_offset;
+       fs ->top_field->frame_cropping_rect_left_offset = fs->bottom_field->frame_cropping_rect_left_offset;
+       fs ->top_field->frame_cropping_rect_right_offset = fs->bottom_field->frame_cropping_rect_right_offset;
+     }
+     dpb_combine_field_yuv(fs);
+     write_picture (fs->frame, p_out, BOTTOM_FIELD);
+   }
+ 
+   fs->is_used=3;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Write out unpaired fields from output buffer.
+  * \param p_out
+  *    Output file
+  ************************************************************************
+  */
+ void flush_direct_output(int p_out)
+ {
+   write_unpaired_field(out_buffer, p_out);
+ 
+   free_storable_picture(out_buffer->frame);
+   out_buffer->frame = NULL;
+   free_storable_picture(out_buffer->top_field);
+   out_buffer->top_field = NULL;
+   free_storable_picture(out_buffer->bottom_field);
+   out_buffer->bottom_field = NULL;
+   out_buffer->is_used = 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Write a frame (from FrameStore)
+  * \param fs
+  *    FrameStore containing the frame
+  * \param p_out
+  *    Output file
+  ************************************************************************
+  */
+ void write_stored_frame( FrameStore *fs,int p_out)
+ {
+   // make sure no direct output field is pending
+   flush_direct_output(p_out);
+ 
+   if (fs->is_used<3)
+   {
+     write_unpaired_field(fs, p_out);
+   }
+   else
+   {
+     write_picture(fs->frame, p_out, FRAME);
+   }
+ 
+   fs->is_output = 1;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Directly output a picture without storing it in the DPB. Fields 
+  *    are buffered before they are written to the file.
+  * \param p
+  *    Picture for output
+  * \param p_out
+  *    Output file
+  ************************************************************************
+  */
+ void direct_output(StorablePicture *p, int p_out)
+ {
+   if (p->structure==FRAME)
+   {
+     // we have a frame (or complementary field pair)
+     // so output it directly
+     flush_direct_output(p_out);
+     write_picture (p, p_out, FRAME);
+     free_storable_picture(p);
+     return;
+   }
+ 
+   if (p->structure == TOP_FIELD)
+   {
+     if (out_buffer->is_used &1)
+       flush_direct_output(p_out);
+     out_buffer->top_field = p;
+     out_buffer->is_used |= 1;
+   }
+ 
+   if (p->structure == BOTTOM_FIELD)
+   {
+     if (out_buffer->is_used &2)
+       flush_direct_output(p_out);
+     out_buffer->bottom_field = p;
+     out_buffer->is_used |= 2;
+   }
+ 
+   if (out_buffer->is_used == 3)
+   {
+     // we have both fields, so output them
+     dpb_combine_field_yuv(out_buffer);
+     write_picture (out_buffer->frame, p_out, FRAME);
+     free_storable_picture(out_buffer->frame);
+     out_buffer->frame = NULL;
+     free_storable_picture(out_buffer->top_field);
+     out_buffer->top_field = NULL;
+     free_storable_picture(out_buffer->bottom_field);
+     out_buffer->bottom_field = NULL;
+     out_buffer->is_used = 0;
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/output.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/output.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/output.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,24 ----
+ 
+ /*!
+  **************************************************************************************
+  * \file
+  *    output.h
+  * \brief
+  *    Picture writing routine headers
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details) 
+  *      - Karsten Suehring        <suehring at hhi.de>
+  ***************************************************************************************
+  */
+ 
+ #ifndef _OUTPUT_H_
+ #define _OUTPUT_H_
+ 
+ int testEndian();
+ 
+ void write_stored_frame(FrameStore *fs, int p_out);
+ void direct_output(StorablePicture *p, int p_out);
+ void init_out_buffer();
+ void uninit_out_buffer();
+ 
+ #endif //_OUTPUT_H_


Index: llvm-test/MultiSource/Applications/JM/lencod/parset.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/parset.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/parset.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,908 ----
+ 
+ /*!
+  **************************************************************************************
+  * \file
+  *    parset.c
+  * \brief
+  *    Picture and Sequence Parameter set generation and handling
+  *  \date 25 November 2002
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details) 
+  *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+  *
+  **************************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <assert.h>
+ #include <string.h>
+  
+ #include "global.h"
+ 
+ #include "contributors.h"
+ #include "mbuffer.h"
+ #include "parset.h"
+ #include "vlc.h"
+ 
+ // Local helpers
+ static int IdentifyProfile();
+ static int IdentifyLevel();
+ static int GenerateVUISequenceParameters(Bitstream *bitstream);
+ 
+ extern ColocatedParams *Co_located;
+ 
+ pic_parameter_set_rbsp_t *PicParSet[MAXPPS];
+ 
+ static const byte ZZ_SCAN[16]  =
+ {  0,  1,  4,  8,  5,  2,  3,  6,  9, 12, 13, 10,  7, 11, 14, 15
+ };
+ 
+ static const byte ZZ_SCAN8[64] =
+ {  0,  1,  8, 16,  9,  2,  3, 10, 17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
+ };
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    generates a sequence and picture parameter set and stores these in global
+  *    active_sps and active_pps
+  *
+  * \return
+  *    A NALU containing the Sequence ParameterSet
+  *
+  *************************************************************************************
+ */
+ void GenerateParameterSets ()
+ {
+   int i;
+   seq_parameter_set_rbsp_t *sps = NULL; 
+ 
+   sps = AllocSPS();
+ 
+   for (i=0; i<MAXPPS; i++)
+   {
+     PicParSet[i] = NULL;
+   }
+ 
+ 
+   GenerateSequenceParameterSet(sps, 0);
+ 
+   if (input->GenerateMultiplePPS)
+   {
+     PicParSet[0] = AllocPPS();
+     PicParSet[1] = AllocPPS();
+     PicParSet[2] = AllocPPS();
+ 
+     if (sps->profile_idc >= FREXT_HP)
+     {
+       GeneratePictureParameterSet( PicParSet[0], sps, 0, 0, 0, input->cb_qp_index_offset, input->cr_qp_index_offset);
+       GeneratePictureParameterSet( PicParSet[1], sps, 1, 1, 1, input->cb_qp_index_offset, input->cr_qp_index_offset);
+       GeneratePictureParameterSet( PicParSet[2], sps, 2, 1, 2, input->cb_qp_index_offset, input->cr_qp_index_offset);
+ 
+     }
+     else
+     {
+       GeneratePictureParameterSet( PicParSet[0], sps, 0, 0, 0, input->chroma_qp_index_offset, 0);
+       GeneratePictureParameterSet( PicParSet[1], sps, 1, 1, 1, input->chroma_qp_index_offset, 0);
+       GeneratePictureParameterSet( PicParSet[2], sps, 2, 1, 2, input->chroma_qp_index_offset, 0);
+     }
+   }
+   else
+   {
+     PicParSet[0] = AllocPPS();
+     if (sps->profile_idc >= FREXT_HP)
+       GeneratePictureParameterSet( PicParSet[0], sps, 0, input->WeightedPrediction, input->WeightedBiprediction, 
+                                    input->cb_qp_index_offset, input->cr_qp_index_offset);
+     else
+       GeneratePictureParameterSet( PicParSet[0], sps, 0, input->WeightedPrediction, input->WeightedBiprediction,
+                                    input->chroma_qp_index_offset, 0);
+     
+   }
+ 
+   active_sps = sps;
+   active_pps = PicParSet[0];
+ }
+ 
+ /*! 
+ *************************************************************************************
+ * \brief
+ *    frees global parameter sets active_sps and active_pps
+ *
+ * \return
+ *    A NALU containing the Sequence ParameterSet
+ *
+ *************************************************************************************
+ */
+ void FreeParameterSets ()
+ {
+   int i;
+   for (i=0; i<MAXPPS; i++)
+   {
+     if ( NULL != PicParSet[i])
+     {
+       FreePPS(PicParSet[i]);
+       PicParSet[i] = NULL;
+     }
+   }
+   FreeSPS (active_sps);
+ }
+ 
+ /*! 
+ *************************************************************************************
+ * \brief
+ *    int GenerateSeq_parameter_set_NALU ();
+ *
+ * \note
+ *    Uses the global variables through GenerateSequenceParameterSet()
+ *    and GeneratePictureParameterSet
+ *
+ * \return
+ *    A NALU containing the Sequence ParameterSet
+ *
+ *************************************************************************************
+ */
+ 
+ NALU_t *GenerateSeq_parameter_set_NALU ()
+ {
+   NALU_t *n = AllocNALU(64000);
+   int RBSPlen = 0;
+   int NALUlen;
+   byte rbsp[MAXRBSPSIZE];
+ 
+   RBSPlen = GenerateSeq_parameter_set_rbsp (active_sps, rbsp);
+   NALUlen = RBSPtoNALU (rbsp, n, RBSPlen, NALU_TYPE_SPS, NALU_PRIORITY_HIGHEST, 0, 1);
+   n->startcodeprefix_len = 4;
+ 
+   return n;
+ }
+ 
+ 
+ /*! 
+ *************************************************************************************
+ * \brief
+ *    NALU_t *GeneratePic_parameter_set_NALU (int PPS_id);
+ *
+ * \note
+ *    Uses the global variables through GenerateSequenceParameterSet()
+ *    and GeneratePictureParameterSet
+ *
+ * \return
+ *    A NALU containing the Picture Parameter Set
+ *
+ *************************************************************************************
+ */
+ 
+ NALU_t *GeneratePic_parameter_set_NALU(int PPS_id)
+ {
+   NALU_t *n = AllocNALU(64000);
+   int RBSPlen = 0;
+   int NALUlen;
+   byte rbsp[MAXRBSPSIZE];
+ 
+   RBSPlen = GeneratePic_parameter_set_rbsp (PicParSet[PPS_id], rbsp);
+   NALUlen = RBSPtoNALU (rbsp, n, RBSPlen, NALU_TYPE_PPS, NALU_PRIORITY_HIGHEST, 0, 1);
+   n->startcodeprefix_len = 4;
+ 
+   return n;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    GenerateSequenceParameterSet: extracts info from global variables and
+  *    generates sequence parameter set structure
+  *
+  * \par
+  *    Function reads all kinds of values from several global variables,
+  *    including input-> and image-> and fills in the sps.  Many
+  *    values are current hard-coded to defaults.
+  *
+  ************************************************************************
+  */
+ 
+ void GenerateSequenceParameterSet( seq_parameter_set_rbsp_t *sps, //!< Sequence Parameter Set to be filled
+                                    int SPS_id                     //!< SPS ID
+                                    )
+ {
+   unsigned i;
+   int SubWidthC  [4]= { 1, 2, 2, 1};
+   int SubHeightC [4]= { 1, 2, 1, 1};
+ 
+   int frext_profile = ((IdentifyProfile()==FREXT_HP) || 
+                       (IdentifyProfile()==FREXT_Hi10P) ||
+                       (IdentifyProfile()==FREXT_Hi422) ||
+                       (IdentifyProfile()==FREXT_Hi444));
+ 
+   // *************************************************************************
+   // Sequence Parameter Set
+   // *************************************************************************
+   assert (sps != NULL);
+   // Profile and Level should be calculated using the info from the config
+   // file.  Calculation is hidden in IndetifyProfile() and IdentifyLevel()
+   sps->profile_idc = IdentifyProfile();
+   sps->level_idc = IdentifyLevel();
+ 
+   // needs to be set according to profile
+   sps->constrained_set0_flag = 0;
+   sps->constrained_set1_flag = 0;
+   sps->constrained_set2_flag = 0;
+   sps->constrained_set3_flag = 0;
+ 
+   // Parameter Set ID hard coded to zero
+   sps->seq_parameter_set_id = 0;
+ 
+   // Fidelity Range Extensions stuff
+   sps->bit_depth_luma_minus8   = input->BitDepthLuma - 8;
+   sps->bit_depth_chroma_minus8 = input->BitDepthChroma - 8;
+   img->lossless_qpprime_flag = input->lossless_qpprime_y_zero_flag & (sps->profile_idc==FREXT_Hi444);
+   img->residue_transform_flag = input->residue_transform_flag;
+   
+   //! POC stuff:
+   //! The following values are hard-coded in init_poc().  Apparently,
+   //! the poc implementation covers only a subset of the poc functionality.
+   //! Here, the same subset is implemented.  Changes in the POC stuff have
+   //! also to be reflected here
+   sps->log2_max_frame_num_minus4 = log2_max_frame_num_minus4;
+   sps->log2_max_pic_order_cnt_lsb_minus4 = log2_max_pic_order_cnt_lsb_minus4;
+   
+   sps->pic_order_cnt_type = input->pic_order_cnt_type;
+   sps->num_ref_frames_in_pic_order_cnt_cycle = img->num_ref_frames_in_pic_order_cnt_cycle;
+   sps->delta_pic_order_always_zero_flag = img->delta_pic_order_always_zero_flag;
+   sps->offset_for_non_ref_pic = img->offset_for_non_ref_pic;
+   sps->offset_for_top_to_bottom_field = img->offset_for_top_to_bottom_field;
+ 
+   for (i=0; i<img->num_ref_frames_in_pic_order_cnt_cycle; i++)
+   {
+     sps->offset_for_ref_frame[i] = img->offset_for_ref_frame[i];
+   }
+   // End of POC stuff
+ 
+   // Number of Reference Frames
+   sps->num_ref_frames = input->num_ref_frames;
+ 
+   //required_frame_num_update_behaviour_flag hardcoded to zero
+   sps->gaps_in_frame_num_value_allowed_flag = FALSE;    // double check
+ 
+   sps->frame_mbs_only_flag = !(input->PicInterlace || input->MbInterlace);
+ 
+   // Picture size, finally a simple one :-)
+   sps->pic_width_in_mbs_minus1 = ((input->img_width+img->auto_crop_right)/16) -1;
+   sps->pic_height_in_map_units_minus1 = (((input->img_height+img->auto_crop_bottom)/16)/ (2 - sps->frame_mbs_only_flag)) - 1;
+ 
+   // a couple of flags, simple
+   sps->mb_adaptive_frame_field_flag = (FRAME_CODING != input->MbInterlace);
+   sps->direct_8x8_inference_flag = input->directInferenceFlag;
+   
+   // Sequence VUI not implemented, signalled as not present
+   sps->vui_parameters_present_flag = (input->rgb_input_flag && input->yuv_format==3);
+ 
+   sps->chroma_format_idc = input->yuv_format;
+ 
+   // This should be moved somewhere else.
+   {
+     int PicWidthInMbs, PicHeightInMapUnits, FrameHeightInMbs;
+     int width, height;
+     PicWidthInMbs = (sps->pic_width_in_mbs_minus1 +1);
+     PicHeightInMapUnits = (sps->pic_height_in_map_units_minus1 +1);
+     FrameHeightInMbs = ( 2 - sps->frame_mbs_only_flag ) * PicHeightInMapUnits;
+     
+     width = PicWidthInMbs * MB_BLOCK_SIZE;
+     height = FrameHeightInMbs * MB_BLOCK_SIZE;
+     
+     Co_located = alloc_colocated (width, height,sps->mb_adaptive_frame_field_flag);    
+     
+   }
+ 
+   // Fidelity Range Extensions stuff
+   if(frext_profile)
+   {
+ 
+     sps->seq_scaling_matrix_present_flag = (input->ScalingMatrixPresentFlag&1);
+     for(i=0; i<8; i++)
+     {
+       if(i<6)
+         sps->seq_scaling_list_present_flag[i] = (input->ScalingListPresentFlag[i]&1);
+       else
+       {
+         if(input->Transform8x8Mode)
+           sps->seq_scaling_list_present_flag[i] = (input->ScalingListPresentFlag[i]&1);
+         else
+           sps->seq_scaling_list_present_flag[i] = 0;
+       }
+     }
+   }
+   else
+   {
+     sps->seq_scaling_matrix_present_flag = 0;
+     for(i=0; i<8; i++)
+       sps->seq_scaling_list_present_flag[i] = 0;
+ 
+   }
+ 
+ 
+   if (img->auto_crop_right || img->auto_crop_bottom)
+   {
+     sps->frame_cropping_flag = TRUE;
+     sps->frame_cropping_rect_left_offset=0;
+     sps->frame_cropping_rect_top_offset=0;
+     sps->frame_cropping_rect_right_offset=  (img->auto_crop_right / SubWidthC[sps->chroma_format_idc]);
+     sps->frame_cropping_rect_bottom_offset= (img->auto_crop_bottom / (SubHeightC[sps->chroma_format_idc] * (2 - sps->frame_mbs_only_flag)));
+     if (img->auto_crop_right % SubWidthC[sps->chroma_format_idc])
+     {
+       error("automatic frame cropping (width) not possible",500);
+     }
+     if (img->auto_crop_bottom % (SubHeightC[sps->chroma_format_idc] * (2 - sps->frame_mbs_only_flag)))
+     {
+       error("automatic frame cropping (height) not possible",500);
+     }
+   }
+   else
+   {
+     sps->frame_cropping_flag = FALSE;
+   }
+ };
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    GeneratePictureParameterSet: 
+  *    Generates a Picture Parameter Set structure
+  *
+  * \par
+  *    Regarding the QP
+  *    The previous software versions coded the absolute QP only in the 
+  *    slice header.  This is kept, and the offset in the PPS is coded 
+  *    even if we could save bits by intelligently using this field.
+  *
+  ************************************************************************
+  */
+ 
+ void GeneratePictureParameterSet( pic_parameter_set_rbsp_t *pps, //!< Picture Parameter Set to be filled
+                                   seq_parameter_set_rbsp_t *sps, //!< used Sequence Parameter Set
+                                   int PPS_id,                    //!< PPS ID
+                                   int WeightedPrediction,        //!< value of weighted_pred_flag
+                                   int WeightedBiprediction,      //!< value of weighted_bipred_idc
+                                   int cb_qp_index_offset,        //!< value of cb_qp_index_offset
+                                   int cr_qp_index_offset         //!< value of cr_qp_index_offset
+                                   )
+ {
+   unsigned i;
+ 
+   int frext_profile = ((IdentifyProfile()==FREXT_HP) || 
+                       (IdentifyProfile()==FREXT_Hi10P) ||
+                       (IdentifyProfile()==FREXT_Hi422) ||
+                       (IdentifyProfile()==FREXT_Hi444));
+ 
+   // *************************************************************************
+   // Picture Parameter Set 
+   // *************************************************************************
+ 
+   pps->seq_parameter_set_id = sps->seq_parameter_set_id;
+   pps->pic_parameter_set_id = PPS_id;
+   pps->entropy_coding_mode_flag = (input->symbol_mode==UVLC?0:1);
+ 
+   // Fidelity Range Extensions stuff
+   if(frext_profile)
+   {
+     pps->transform_8x8_mode_flag = input->Transform8x8Mode ? 1:0;
+     pps->pic_scaling_matrix_present_flag = (input->ScalingMatrixPresentFlag&2)>>1;
+     for(i=0; i<8; i++)
+     {
+       if(i<6)
+         pps->pic_scaling_list_present_flag[i] = (input->ScalingListPresentFlag[i]&2)>>1;
+       else
+       {
+         if(pps->transform_8x8_mode_flag)
+           pps->pic_scaling_list_present_flag[i] = (input->ScalingListPresentFlag[i]&2)>>1;
+         else
+           pps->pic_scaling_list_present_flag[i] = 0;
+       }
+     }
+   }
+   else
+   {
+     pps->pic_scaling_matrix_present_flag = 0;
+     for(i=0; i<8; i++)
+       pps->pic_scaling_list_present_flag[i] = 0;
+ 
+     pps->transform_8x8_mode_flag = input->Transform8x8Mode = 0;
+   }
+ 
+   // JVT-Fxxx (by Stephan Wenger, make this flag unconditional
+   pps->pic_order_present_flag = img->pic_order_present_flag;
+ 
+ 
+   // Begin FMO stuff
+   pps->num_slice_groups_minus1 = input->num_slice_groups_minus1;
+ 
+ 	
+   //! Following set the parameter for different slice group types
+   if (pps->num_slice_groups_minus1 > 0)
+   {
+      if ((pps->slice_group_id = calloc ((sps->pic_height_in_map_units_minus1+1)*(sps->pic_width_in_mbs_minus1+1), sizeof(byte))) == NULL)
+        no_mem_exit ("GeneratePictureParameterSet: slice_group_id");
+ 
+     switch (input->slice_group_map_type)
+     {
+     case 0:
+ 			
+       pps->slice_group_map_type = 0;
+       for(i=0; i<=pps->num_slice_groups_minus1; i++)
+       {
+         pps->run_length_minus1[i]=input->run_length_minus1[i];
+       }
+ 			
+       break;
+     case 1:
+       pps->slice_group_map_type = 1;
+       break;
+     case 2:
+       // i loops from 0 to num_slice_groups_minus1-1, because no info for background needed
+       pps->slice_group_map_type = 2;
+       for(i=0; i<pps->num_slice_groups_minus1; i++)
+       {
+         pps->top_left[i] = input->top_left[i];
+         pps->bottom_right[i] = input->bottom_right[i];      
+       }
+      break;
+     case 3:
+     case 4:
+     case 5:
+       pps->slice_group_map_type = input->slice_group_map_type;
+ 			
+       pps->slice_group_change_direction_flag = input->slice_group_change_direction_flag;
+       pps->slice_group_change_rate_minus1 = input->slice_group_change_rate_minus1;
+       break;
+     case 6:
+       pps->slice_group_map_type = 6;   
+       pps->pic_size_in_map_units_minus1 = 
+ 				(((input->img_height+img->auto_crop_bottom)/MB_BLOCK_SIZE)/(2-sps->frame_mbs_only_flag))
+ 				*((input->img_width+img->auto_crop_right)/MB_BLOCK_SIZE) -1;
+ 			
+       for (i=0;i<=pps->pic_size_in_map_units_minus1; i++)
+         pps->slice_group_id[i] = input->slice_group_id[i];
+ 			
+       break;
+     default:
+       printf ("Parset.c: slice_group_map_type invalid, default\n");
+       assert (0==1);
+     }
+   }
+ // End FMO stuff
+ 
+   pps->num_ref_idx_l0_active_minus1 = sps->frame_mbs_only_flag ? (sps->num_ref_frames-1) : (2 * sps->num_ref_frames - 1) ;   // set defaults
+   pps->num_ref_idx_l1_active_minus1 = sps->frame_mbs_only_flag ? (sps->num_ref_frames-1) : (2 * sps->num_ref_frames - 1) ;   // set defaults
+   
+   pps->weighted_pred_flag = WeightedPrediction;
+   pps->weighted_bipred_idc = WeightedBiprediction;
+ 
+   pps->pic_init_qp_minus26 = 0;         // hard coded to zero, QP lives in the slice header
+   pps->pic_init_qs_minus26 = 0;
+ 
+   pps->chroma_qp_index_offset = cb_qp_index_offset; 
+   if (frext_profile)
+   {
+     pps->cb_qp_index_offset     = cb_qp_index_offset;
+     pps->cr_qp_index_offset     = cr_qp_index_offset;
+   }
+   else
+     pps->cb_qp_index_offset = pps->cr_qp_index_offset = pps->chroma_qp_index_offset;
+ 
+   pps->deblocking_filter_control_present_flag = input->LFSendParameters;
+   pps->constrained_intra_pred_flag = input->UseConstrainedIntraPred;
+   
+   pps->redundant_pic_cnt_present_flag = 0;
+ };
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    syntax for scaling list matrix values
+  *
+  * \param scalingListinput
+  *    input scaling list
+  * \param scalingList
+  *    scaling list to be used
+  * \param sizeOfScalingList
+  *    size of the scaling list
+  * \param UseDefaultScalingMatrix
+  *    usage of default Scaling Matrix
+  * \param bitstream
+  *    target bitstream for writing syntax
+  *
+  * \return
+  *    size of the RBSP in bytes
+  *
+  *************************************************************************************
+  */
+ int Scaling_List(short *scalingListinput, short *scalingList, int sizeOfScalingList, short *UseDefaultScalingMatrix, Bitstream *bitstream)
+ {
+   int j, scanj;
+   int len=0;
+   int delta_scale, lastScale, nextScale;
+ 
+   lastScale = 8;
+   nextScale = 8;
+ 
+   for(j=0; j<sizeOfScalingList; j++)
+   {
+     scanj = (sizeOfScalingList==16) ? ZZ_SCAN[j]:ZZ_SCAN8[j];
+ 
+     if(nextScale!=0)
+     {
+       delta_scale = scalingListinput[scanj]-lastScale; // Calculate delta from the scalingList data from the input file
+       if(delta_scale>127)
+         delta_scale=delta_scale-256;
+       else if(delta_scale<-128)
+         delta_scale=delta_scale+256;
+ 
+       len+=se_v ("   : delta_sl   ",                      delta_scale,                       bitstream);
+       nextScale = scalingListinput[scanj];
+       *UseDefaultScalingMatrix|=(scanj==0 && nextScale==0); // Check first matrix value for zero
+     }
+ 
+     scalingList[scanj] = (nextScale==0) ? lastScale:nextScale; // Update the actual scalingList matrix with the correct values
+     lastScale = scalingList[scanj];
+   }
+ 
+   return len;
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    int GenerateSeq_parameter_set_rbsp (seq_parameter_set_rbsp_t *sps, char *rbsp);
+  *
+  * \param sps
+  *    sequence parameter structure
+  * \param rbsp
+  *    buffer to be filled with the rbsp, size should be at least MAXIMUMPARSETRBSPSIZE
+  *
+  * \return
+  *    size of the RBSP in bytes
+  *
+  * \note
+  *    Sequence Parameter VUI function is called, but the function implements
+  *    an exit (-1)
+  *************************************************************************************
+  */
+ int GenerateSeq_parameter_set_rbsp (seq_parameter_set_rbsp_t *sps, unsigned char *rbsp)
+ {
+   Bitstream *bitstream;
+   int len = 0, LenInBytes;
+   unsigned i;
+ 
+   assert (rbsp != NULL);
+ 
+   if ((bitstream=calloc(1, sizeof(Bitstream)))==NULL) no_mem_exit("SeqParameterSet:bitstream");
+ 
+   // .. and use the rbsp provided (or allocated above) for the data
+   bitstream->streamBuffer = rbsp;
+   bitstream->bits_to_go = 8;
+ 
+   len+=u_v  (8, "SPS: profile_idc",                             sps->profile_idc,                               bitstream);
+ 
+   len+=u_1  ("SPS: constrained_set0_flag",                      sps->constrained_set0_flag,    bitstream);
+   len+=u_1  ("SPS: constrained_set1_flag",                      sps->constrained_set1_flag,    bitstream);
+   len+=u_1  ("SPS: constrained_set2_flag",                      sps->constrained_set2_flag,    bitstream);
+   len+=u_1  ("SPS: constrained_set3_flag",                      sps->constrained_set3_flag,    bitstream);
+   len+=u_v  (4, "SPS: reserved_zero_4bits",                     0,                             bitstream);
+ 
+   len+=u_v  (8, "SPS: level_idc",                               sps->level_idc,                                 bitstream);
+ 
+   len+=ue_v ("SPS: seq_parameter_set_id",                    sps->seq_parameter_set_id,                      bitstream);
+ 
+   // Fidelity Range Extensions stuff
+   if((sps->profile_idc==FREXT_HP) || 
+      (sps->profile_idc==FREXT_Hi10P) ||
+      (sps->profile_idc==FREXT_Hi422) ||
+      (sps->profile_idc==FREXT_Hi444))
+   {
+     len+=ue_v ("SPS: chroma_format_idc",                        sps->chroma_format_idc,                          bitstream);
+     if(img->yuv_format == 3)
+       len+=u_1  ("SPS: residue_transform_flag",                 img->residue_transform_flag,                     bitstream);
+     len+=ue_v ("SPS: bit_depth_luma_minus8",                    sps->bit_depth_luma_minus8,                      bitstream);
+     len+=ue_v ("SPS: bit_depth_chroma_minus8",                  sps->bit_depth_chroma_minus8,                    bitstream);
+     len+=u_1  ("SPS: lossless_qpprime_y_zero_flag",             img->lossless_qpprime_flag,                      bitstream);
+     //other chroma info to be added in the future
+ 
+     len+=u_1 ("SPS: seq_scaling_matrix_present_flag",           sps->seq_scaling_matrix_present_flag,            bitstream);
+ 
+     if(sps->seq_scaling_matrix_present_flag)
+     {
+       for(i=0; i<8; i++)
+       {
+         len+=u_1 ("SPS: seq_scaling_list_present_flag",         sps->seq_scaling_list_present_flag[i],           bitstream);
+         if(sps->seq_scaling_list_present_flag[i])
+         {
+           if(i<6)
+             len+=Scaling_List(ScalingList4x4input[i], ScalingList4x4[i], 16, &UseDefaultScalingMatrix4x4Flag[i], bitstream);
+           else
+             len+=Scaling_List(ScalingList8x8input[i-6], ScalingList8x8[i-6], 64, &UseDefaultScalingMatrix8x8Flag[i-6], bitstream);
+         }
+       }
+     }
+   }
+ 
+   len+=ue_v ("SPS: log2_max_frame_num_minus4",               sps->log2_max_frame_num_minus4,                 bitstream);
+   len+=ue_v ("SPS: pic_order_cnt_type",                      sps->pic_order_cnt_type,                        bitstream);
+ 
+   if (sps->pic_order_cnt_type == 0)
+     len+=ue_v ("SPS: log2_max_pic_order_cnt_lsb_minus4",     sps->log2_max_pic_order_cnt_lsb_minus4,         bitstream);
+   else if (sps->pic_order_cnt_type == 1)
+   {
+     len+=u_1  ("SPS: delta_pic_order_always_zero_flag",        sps->delta_pic_order_always_zero_flag,          bitstream);
+     len+=se_v ("SPS: offset_for_non_ref_pic",                  sps->offset_for_non_ref_pic,                    bitstream);
+     len+=se_v ("SPS: offset_for_top_to_bottom_field",          sps->offset_for_top_to_bottom_field,            bitstream);
+     len+=ue_v ("SPS: num_ref_frames_in_pic_order_cnt_cycle",   sps->num_ref_frames_in_pic_order_cnt_cycle,     bitstream);
+     for (i=0; i<sps->num_ref_frames_in_pic_order_cnt_cycle; i++)
+       len+=se_v ("SPS: offset_for_ref_frame",                  sps->offset_for_ref_frame[i],                      bitstream);
+   }
+   len+=ue_v ("SPS: num_ref_frames",                          sps->num_ref_frames,                            bitstream);
+   len+=u_1  ("SPS: gaps_in_frame_num_value_allowed_flag",    sps->gaps_in_frame_num_value_allowed_flag,      bitstream);
+   len+=ue_v ("SPS: pic_width_in_mbs_minus1",                 sps->pic_width_in_mbs_minus1,                   bitstream);
+   len+=ue_v ("SPS: pic_height_in_map_units_minus1",          sps->pic_height_in_map_units_minus1,            bitstream);
+   len+=u_1  ("SPS: frame_mbs_only_flag",                     sps->frame_mbs_only_flag,                       bitstream);
+   if (!sps->frame_mbs_only_flag)
+   {
+     len+=u_1  ("SPS: mb_adaptive_frame_field_flag",            sps->mb_adaptive_frame_field_flag,              bitstream);
+   }
+   len+=u_1  ("SPS: direct_8x8_inference_flag",               sps->direct_8x8_inference_flag,                 bitstream);
+ 
+   len+=u_1  ("SPS: frame_cropping_flag",                      sps->frame_cropping_flag,                       bitstream);
+   if (sps->frame_cropping_flag)
+   {
+     len+=ue_v ("SPS: frame_cropping_rect_left_offset",          sps->frame_cropping_rect_left_offset,           bitstream);
+     len+=ue_v ("SPS: frame_cropping_rect_right_offset",         sps->frame_cropping_rect_right_offset,          bitstream);
+     len+=ue_v ("SPS: frame_cropping_rect_top_offset",           sps->frame_cropping_rect_top_offset,            bitstream);
+     len+=ue_v ("SPS: frame_cropping_rect_bottom_offset",        sps->frame_cropping_rect_bottom_offset,         bitstream);
+   }
+ 
+   len+=u_1  ("SPS: vui_parameters_present_flag",             sps->vui_parameters_present_flag,               bitstream);
+ 
+   if (sps->vui_parameters_present_flag)
+     len+=GenerateVUISequenceParameters(bitstream);    // currently a dummy, asserting
+ 
+   SODBtoRBSP(bitstream);     // copies the last couple of bits into the byte buffer
+   
+   LenInBytes=bitstream->byte_pos;
+ 
+   free (bitstream);
+   
+   return LenInBytes;
+ }
+ 
+ 
+ /*! 
+  ***********************************************************************************************
+  * \brief
+  *    int GeneratePic_parameter_set_rbsp (pic_parameter_set_rbsp_t *sps, char *rbsp);
+  *
+  * \param pps
+  *    picture parameter structure
+  * \param rbsp
+  *    buffer to be filled with the rbsp, size should be at least MAXIMUMPARSETRBSPSIZE
+  *
+  * \return
+  *    size of the RBSP in bytes, negative in case of an error
+  *
+  * \note
+  *    Picture Parameter VUI function is called, but the function implements
+  *    an exit (-1)
+  ************************************************************************************************
+  */
+  
+ int GeneratePic_parameter_set_rbsp (pic_parameter_set_rbsp_t *pps, unsigned char *rbsp)
+ {
+   Bitstream *bitstream;
+   int len = 0, LenInBytes;
+   unsigned i;
+   unsigned NumberBitsPerSliceGroupId;
+   int profile_idc;
+ 
+   assert (rbsp != NULL);
+ 
+   if ((bitstream=calloc(1, sizeof(Bitstream)))==NULL) no_mem_exit("PicParameterSet:bitstream");
+ 
+   // .. and use the rbsp provided (or allocated above) for the data
+   bitstream->streamBuffer = rbsp;
+   bitstream->bits_to_go = 8;
+ 
+   pps->pic_order_present_flag = img->pic_order_present_flag;
+ 
+   len+=ue_v ("PPS: pic_parameter_set_id",                    pps->pic_parameter_set_id,                      bitstream);
+   len+=ue_v ("PPS: seq_parameter_set_id",                    pps->seq_parameter_set_id,                      bitstream);
+   len+=u_1  ("PPS: entropy_coding_mode_flag",                pps->entropy_coding_mode_flag,                  bitstream);
+   len+=u_1  ("PPS: pic_order_present_flag",                  pps->pic_order_present_flag,                    bitstream);
+   len+=ue_v ("PPS: num_slice_groups_minus1",                 pps->num_slice_groups_minus1,                   bitstream);
+ 
+   // FMO stuff
+   if(pps->num_slice_groups_minus1 > 0 )
+   {
+     len+=ue_v ("PPS: slice_group_map_type",                 pps->slice_group_map_type,                   bitstream);
+     if (pps->slice_group_map_type == 0)
+       for (i=0; i<=pps->num_slice_groups_minus1; i++)
+         len+=ue_v ("PPS: run_length_minus1[i]",                           pps->run_length_minus1[i],                             bitstream);
+     else if (pps->slice_group_map_type==2)
+       for (i=0; i<pps->num_slice_groups_minus1; i++)
+       {
+ 
+         len+=ue_v ("PPS: top_left[i]",                          pps->top_left[i],                           bitstream);
+         len+=ue_v ("PPS: bottom_right[i]",                      pps->bottom_right[i],                       bitstream);
+       }
+     else if (pps->slice_group_map_type == 3 ||
+              pps->slice_group_map_type == 4 ||
+              pps->slice_group_map_type == 5) 
+     {
+       len+=u_1  ("PPS: slice_group_change_direction_flag",         pps->slice_group_change_direction_flag,         bitstream);
+       len+=ue_v ("PPS: slice_group_change_rate_minus1",            pps->slice_group_change_rate_minus1,            bitstream);
+     } 
+     else if (pps->slice_group_map_type == 6)
+     {
+       if (pps->num_slice_groups_minus1>=4)
+         NumberBitsPerSliceGroupId=3;
+       else if (pps->num_slice_groups_minus1>=2)
+         NumberBitsPerSliceGroupId=2;
+       else if (pps->num_slice_groups_minus1>=1)
+         NumberBitsPerSliceGroupId=1;
+       else
+         NumberBitsPerSliceGroupId=0;
+         
+       len+=ue_v ("PPS: pic_size_in_map_units_minus1",                       pps->pic_size_in_map_units_minus1,             bitstream);
+       for(i=0; i<=pps->pic_size_in_map_units_minus1; i++)
+         len+= u_v  (NumberBitsPerSliceGroupId, "PPS: >slice_group_id[i]",   pps->slice_group_id[i],                        bitstream);
+     }
+   }
+   // End of FMO stuff
+ 
+   len+=ue_v ("PPS: num_ref_idx_l0_active_minus1",             pps->num_ref_idx_l0_active_minus1,              bitstream);
+   len+=ue_v ("PPS: num_ref_idx_l1_active_minus1",             pps->num_ref_idx_l1_active_minus1,              bitstream);
+   len+=u_1  ("PPS: weighted_pred_flag",                       pps->weighted_pred_flag,                        bitstream);
+   len+=u_v  (2, "PPS: weighted_bipred_idc",                   pps->weighted_bipred_idc,                       bitstream);
+   len+=se_v ("PPS: pic_init_qp_minus26",                      pps->pic_init_qp_minus26,                       bitstream);
+   len+=se_v ("PPS: pic_init_qs_minus26",                      pps->pic_init_qs_minus26,                       bitstream);
+ 
+   profile_idc = IdentifyProfile();
+   if((profile_idc==FREXT_HP) || 
+      (profile_idc==FREXT_Hi10P) ||
+      (profile_idc==FREXT_Hi422) ||
+      (profile_idc==FREXT_Hi444))
+     len+=se_v ("PPS: chroma_qp_index_offset",                 pps->cb_qp_index_offset,                        bitstream);
+   else
+     len+=se_v ("PPS: chroma_qp_index_offset",                 pps->chroma_qp_index_offset,                    bitstream);
+ 
+   len+=u_1  ("PPS: deblocking_filter_control_present_flag",   pps->deblocking_filter_control_present_flag,    bitstream);
+   len+=u_1  ("PPS: constrained_intra_pred_flag",              pps->constrained_intra_pred_flag,               bitstream);
+   len+=u_1  ("PPS: redundant_pic_cnt_present_flag",           pps->redundant_pic_cnt_present_flag,            bitstream);
+ 
+   // Fidelity Range Extensions stuff
+   if((profile_idc==FREXT_HP) || 
+      (profile_idc==FREXT_Hi10P) ||
+      (profile_idc==FREXT_Hi422) ||
+      (profile_idc==FREXT_Hi444))
+   {
+     len+=u_1  ("PPS: transform_8x8_mode_flag",                pps->transform_8x8_mode_flag,                   bitstream);
+     
+     len+=u_1  ("PPS: pic_scaling_matrix_present_flag",        pps->pic_scaling_matrix_present_flag,           bitstream);
+ 
+     if(pps->pic_scaling_matrix_present_flag)
+     {
+       for(i=0; i<(6+((unsigned)pps->transform_8x8_mode_flag<<1)); i++)
+       {
+         len+=u_1  ("PPS: pic_scaling_list_present_flag",      pps->pic_scaling_list_present_flag[i],          bitstream);
+ 
+         if(pps->pic_scaling_list_present_flag[i])
+         {
+           if(i<6)
+             len+=Scaling_List(ScalingList4x4input[i], ScalingList4x4[i], 16, &UseDefaultScalingMatrix4x4Flag[i], bitstream);
+           else
+             len+=Scaling_List(ScalingList8x8input[i-6], ScalingList8x8[i-6], 64, &UseDefaultScalingMatrix8x8Flag[i-6], bitstream);
+         }
+       }
+     }
+     len+=se_v ("PPS: second_chroma_qp_index_offset",          pps->cr_qp_index_offset,                        bitstream);
+   }
+ 
+   SODBtoRBSP(bitstream);     // copies the last couple of bits into the byte buffer
+   
+   LenInBytes=bitstream->byte_pos;
+ 
+   // Get rid of the helper structures
+   free (bitstream);
+ 
+   return LenInBytes;
+ }
+ 
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Returns the Profile
+  *
+  * \return
+  *    Profile according to Annex A
+  *
+  * \note
+  *    Function is currently a dummy.  Should "calculate" the profile from those
+  *    config file parameters.  E.g.
+  *
+  *    Profile = Baseline;
+  *    if (CABAC Used || Interlace used) Profile=Main;
+  *    if (!Cabac Used) && (Bframes | SPframes) Profile = Streaming;
+  *
+  *************************************************************************************
+  */
+ int IdentifyProfile()
+ {
+   return input->ProfileIDC;
+ };
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Returns the Level
+  *
+  * \return
+  *    Level according to Annex A
+  *
+  * \note
+  *    This function is currently a dummy, but should calculate the level out of 
+  *    the config file parameters (primarily the picture size)
+  *************************************************************************************
+  */
+ int IdentifyLevel()
+ {
+   return input->LevelIDC;
+ };
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Function body for VUI Parameter generation (to be done)
+  *
+  * \return
+  *    exits with error message
+  *************************************************************************************
+  */
+ static int GenerateVUISequenceParameters(Bitstream *bitstream)
+ {
+   int len=0;
+ 
+   // special case to signal the RGB format
+   if(input->rgb_input_flag && input->yuv_format==3)
+   { 
+     //still pretty much a dummy VUI
+     printf ("test: writing Sequence Parameter VUI to signal RGB format\n");
+     len+=u_1 ("VUI: aspect_ratio_info_present_flag", 0, bitstream);
+     len+=u_1 ("VUI: overscan_info_present_flag", 0, bitstream);
+     len+=u_1 ("VUI: video_signal_type_present_flag", 1, bitstream);
+     len+=u_v (3, "VUI: video format", 2, bitstream);
+     len+=u_1 ("VUI: video_full_range_flag", 1, bitstream);
+     len+=u_1 ("VUI: color_description_present_flag", 1, bitstream);
+     len+=u_v (8, "VUI: colour primaries", 2, bitstream);
+     len+=u_v (8, "VUI: transfer characteristics", 2, bitstream);
+     len+=u_v (8, "VUI: matrix coefficients", 0, bitstream);
+     len+=u_1 ("VUI: chroma_loc_info_present_flag", 0, bitstream);
+     len+=u_1 ("VUI: timing_info_present_flag", 0, bitstream);
+     len+=u_1 ("VUI: nal_hrd_parameters_present_flag", 0, bitstream);
+     len+=u_1 ("VUI: vcl_hrd_parameters_present_flag", 0, bitstream);
+     len+=u_1 ("VUI: pic_struc_present_flag", 0, bitstream);
+     len+=u_1 ("VUI: bitstream_restriction_flag", 0, bitstream);
+ 
+     return len;
+   }
+   else 
+   {
+     printf ("Sequence Parameter VUI not yet implemented, this should never happen, exit\n");
+     exit (-1);
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/parset.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/parset.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/parset.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,45 ----
+ 
+ /*!
+  **************************************************************************************
+  * \file
+  *    parset.h
+  * \brief
+  *    Picture and Sequence Parameter Sets, encoder operations
+  *    This code reflects JVT version xxx
+  *  \date 25 November 2002
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details) 
+  *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+  ***************************************************************************************
+  */
+ 
+ 
+ #ifndef _PARSET_H_
+ #define _PARSET_H_
+ 
+ #include "parsetcommon.h"
+ #include "nalu.h"
+ 
+ void GenerateParameterSets ();
+ void FreeParameterSets ();
+ 
+ NALU_t *GenerateSeq_parameter_set_NALU ();
+ NALU_t *GeneratePic_parameter_set_NALU (int);
+ 
+ // The following are local helpers, but may come handy in the future, hence public
+ void GenerateSequenceParameterSet(seq_parameter_set_rbsp_t *sps, int SPS_id);
+ void GeneratePictureParameterSet( pic_parameter_set_rbsp_t *pps, seq_parameter_set_rbsp_t *sps, int PPS_id, 
+                                  int WeightedPrediction, int WeightedBiprediction, 
+                                  int cb_qp_index_offset, int cr_qp_index_offset);
+ 
+ int Scaling_List(short *scalingListinput, short *scalingList, int sizeOfScalingList, short *UseDefaultScalingMatrix, Bitstream *bitstream);
+ int GenerateSeq_parameter_set_rbsp (seq_parameter_set_rbsp_t *sps, unsigned char *buf);
+ int GeneratePic_parameter_set_rbsp (pic_parameter_set_rbsp_t *pps, unsigned char *buf);
+ void FreeSPS (seq_parameter_set_rbsp_t *sps);
+ void FreePPS (pic_parameter_set_rbsp_t *pps);
+ 
+ pic_parameter_set_rbsp_t *AllocPPS ();
+ seq_parameter_set_rbsp_t *AllocSPS ();
+ 
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/parsetcommon.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/parsetcommon.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/parsetcommon.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,100 ----
+ 
+ /*!
+  **************************************************************************************
+  * \file
+  *    parset.c
+  * \brief
+  *    Picture and Sequence Parameter set generation and handling
+  *  \date 25 November 2002
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details) 
+  *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+  *
+  **************************************************************************************
+  */
+ #include <stdlib.h>
+ #include <assert.h>
+ #include <memory.h>
+ 
+ #include "global.h"
+ #include "memalloc.h"
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Allocates memory for a pps
+  *
+  * \return
+  *    pointer to a pps
+  *************************************************************************************
+  */
+ pic_parameter_set_rbsp_t *AllocPPS ()
+  {
+    pic_parameter_set_rbsp_t *p;
+ 
+    if ((p=calloc (sizeof (pic_parameter_set_rbsp_t), 1)) == NULL)
+      no_mem_exit ("AllocPPS: PPS");
+    p->slice_group_id = NULL;
+    return p;
+  }
+ 
+  
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Allocates memory for am sps
+  *
+  * \return
+  *    pointer to a sps
+  *************************************************************************************
+  */
+ seq_parameter_set_rbsp_t *AllocSPS ()
+  {
+    seq_parameter_set_rbsp_t *p;
+ 
+    if ((p=calloc (sizeof (seq_parameter_set_rbsp_t), 1)) == NULL)
+      no_mem_exit ("AllocSPS: SPS");
+    return p;
+  }
+ 
+  
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Frees a pps
+  *
+  * \param pps
+  *     pps to be freed
+  *
+  * \return
+  *    none
+  *************************************************************************************
+  */
+  
+  void FreePPS (pic_parameter_set_rbsp_t *pps)
+  {
+    assert (pps != NULL);
+    if (pps->slice_group_id != NULL) 
+      free (pps->slice_group_id);
+    free (pps);
+  }
+ 
+  
+  /*! 
+  *************************************************************************************
+  * \brief
+  *    Frees a sps
+  *
+  * \param sps
+  *     sps to be freed
+  *
+  * \return
+  *    none
+  *************************************************************************************
+  */
+  
+  void FreeSPS (seq_parameter_set_rbsp_t *sps)
+  {
+    assert (sps != NULL);
+    free (sps); 
+  }


Index: llvm-test/MultiSource/Applications/JM/lencod/parsetcommon.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/parsetcommon.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/parsetcommon.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,194 ----
+ 
+ /*!
+  **************************************************************************************
+  * \file
+  *    parsetcommon.h
+  * \brief
+  *    Picture and Sequence Parameter Sets, structures common to encoder and decoder
+  *    This code reflects JVT version xxx
+  *  \date 25 November 2002
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details) 
+  *      - Stephan Wenger        <stewe at cs.tu-berlin.de>
+  ***************************************************************************************
+  */
+ 
+ 
+ 
+ // In the JVT syntax, frequently flags are used that indicate the presence of
+ // certain pieces of information in the NALU.  Here, these flags are also
+ // present.  In the encoder, those bits indicate that the values signalled to
+ // be present are meaningful and that this part of the syntax should be
+ // written to the NALU.  In the decoder, the flag indicates that information
+ // was received from the decoded NALU and should be used henceforth.
+ // The structure names were chosen as indicated in the JVT syntax
+ 
+ #ifndef _PARSETCOMMON_H_
+ #define _PARSETCOMMON_H_
+ 
+ #define MAXIMUMPARSETRBSPSIZE   1500
+ #define MAXIMUMPARSETNALUSIZE   1500
+ 
+ #define MAXSPS  32
+ #define MAXPPS  256
+ 
+ //! Boolean Type
+ typedef enum {
+   FALSE,
+   TRUE
+ } Boolean;
+ 
+ #define MAXIMUMVALUEOFcpb_cnt   32
+ typedef struct
+ {
+   unsigned  cpb_cnt;                                          // ue(v)
+   unsigned  bit_rate_scale;                                   // u(4)
+   unsigned  cpb_size_scale;                                   // u(4)
+     unsigned  bit_rate_value [MAXIMUMVALUEOFcpb_cnt];         // ue(v)
+     unsigned  cpb_size_value[MAXIMUMVALUEOFcpb_cnt];          // ue(v)
+     unsigned  vbr_cbr_flag[MAXIMUMVALUEOFcpb_cnt];            // u(1)
+   unsigned  initial_cpb_removal_delay_length_minus1;          // u(5)
+   unsigned  cpb_removal_delay_length_minus1;                  // u(5)
+   unsigned  dpb_output_delay_length_minus1;                   // u(5)
+   unsigned  time_offset_length;                               // u(5)
+ } hrd_parameters_t;
+ 
+ 
+ typedef struct
+ {
+   Boolean      aspect_ratio_info_present_flag;                   // u(1)
+     unsigned  aspect_ratio_idc;                               // u(8)
+       unsigned  sar_width;                                    // u(16)
+       unsigned  sar_height;                                   // u(16)
+   Boolean      overscan_info_present_flag;                       // u(1)
+     Boolean      overscan_appropriate_flag;                      // u(1)
+   Boolean      video_signal_type_present_flag;                   // u(1)
+     unsigned  video_format;                                   // u(3)
+     Boolean      video_full_range_flag;                          // u(1)
+     Boolean      colour_description_present_flag;                // u(1)
+       unsigned  colour_primaries;                             // u(8)
+       unsigned  transfer_characteristics;                     // u(8)
+       unsigned  matrix_coefficients;                          // u(8)
+   Boolean      chroma_location_info_present_flag;                // u(1)
+     unsigned  chroma_location_frame;                          // ue(v)
+     unsigned  chroma_location_field;                          // ue(v)
+   Boolean      timing_info_present_flag;                         // u(1)
+     unsigned  num_units_in_tick;                              // u(32)
+     unsigned  time_scale;                                     // u(32)
+     Boolean      fixed_frame_rate_flag;                          // u(1)
+   Boolean      nal_hrd_parameters_present_flag;                  // u(1)
+     hrd_parameters_t nal_hrd_parameters;                      // hrd_paramters_t
+   Boolean      vcl_hrd_parameters_present_flag;                  // u(1)
+     hrd_parameters_t vcl_hrd_parameters;                      // hrd_paramters_t
+   // if ((nal_hrd_parameters_present_flag || (vcl_hrd_parameters_present_flag))
+     Boolean      low_delay_hrd_flag;                             // u(1)
+   Boolean      bitstream_restriction_flag;                       // u(1)
+     Boolean      motion_vectors_over_pic_boundaries_flag;        // u(1)
+     unsigned  max_bytes_per_pic_denom;                        // ue(v)
+     unsigned  max_bits_per_mb_denom;                          // ue(v)
+     unsigned  log2_max_mv_length_vertical;                    // ue(v)
+     unsigned  log2_max_mv_length_horizontal;                  // ue(v)
+     unsigned  max_dec_frame_reordering;                       // ue(v)
+     unsigned  max_dec_frame_buffering;                        // ue(v)
+ } vui_seq_parameters_t;
+ 
+ 
+ #define MAXnum_slice_groups_minus1  8
+ typedef struct
+ {
+   Boolean   Valid;                  // indicates the parameter set is valid
+   unsigned  pic_parameter_set_id;                             // ue(v)
+   unsigned  seq_parameter_set_id;                             // ue(v)
+   Boolean   entropy_coding_mode_flag;                         // u(1)
+ 
+   Boolean   transform_8x8_mode_flag;                          // u(1)
+   Boolean   pic_scaling_matrix_present_flag;                  // u(1)
+   int       pic_scaling_list_present_flag[8];                 // u(1)
+ 
+   // if( pic_order_cnt_type < 2 )  in the sequence parameter set
+   Boolean      pic_order_present_flag;                           // u(1)
+   unsigned  num_slice_groups_minus1;                          // ue(v)
+     unsigned  slice_group_map_type;                        // ue(v)
+     // if( slice_group_map_type = = 0 )
+       unsigned  run_length_minus1[MAXnum_slice_groups_minus1]; // ue(v)
+     // else if( slice_group_map_type = = 2 )
+       unsigned  top_left[MAXnum_slice_groups_minus1];         // ue(v)
+       unsigned  bottom_right[MAXnum_slice_groups_minus1];     // ue(v)
+     // else if( slice_group_map_type = = 3 || 4 || 5
+       Boolean   slice_group_change_direction_flag;            // u(1)
+       unsigned  slice_group_change_rate_minus1;               // ue(v)
+     // else if( slice_group_map_type = = 6 )
+       unsigned  pic_size_in_map_units_minus1;	                // ue(v)
+       byte      *slice_group_id;                              // complete MBAmap u(v)
+ 			
+   int       num_ref_idx_l0_active_minus1;                     // ue(v)
+   int       num_ref_idx_l1_active_minus1;                     // ue(v)
+   Boolean   weighted_pred_flag;                               // u(1)
+   unsigned  weighted_bipred_idc;                              // u(2)
+   int       pic_init_qp_minus26;                              // se(v)
+   int       pic_init_qs_minus26;                              // se(v)
+   int       chroma_qp_index_offset;                           // se(v)
+ 
+   int       cb_qp_index_offset;                               // se(v)
+   int       cr_qp_index_offset;                               // se(v)
+ 
+   Boolean   deblocking_filter_control_present_flag;           // u(1)
+   Boolean   constrained_intra_pred_flag;                      // u(1)
+   Boolean   redundant_pic_cnt_present_flag;                   // u(1)
+   Boolean   vui_pic_parameters_flag;                          // u(1)
+ } pic_parameter_set_rbsp_t;
+ 
+ 
+ #define MAXnum_ref_frames_in_pic_order_cnt_cycle  256
+ typedef struct
+ {
+   Boolean   Valid;                  // indicates the parameter set is valid
+ 
+   unsigned  profile_idc;                                      // u(8)
+   Boolean   constrained_set0_flag;                            // u(1)
+   Boolean   constrained_set1_flag;                            // u(1)
+   Boolean   constrained_set2_flag;                            // u(1)
+   Boolean   constrained_set3_flag;                            // u(1)
+   unsigned  level_idc;                                        // u(8)
+   unsigned  seq_parameter_set_id;                             // ue(v)
+   unsigned  chroma_format_idc;                                // ue(v)
+ 
+   Boolean   seq_scaling_matrix_present_flag;                  // u(1)
+   int       seq_scaling_list_present_flag[8];                 // u(1)
+ 
+   unsigned  bit_depth_luma_minus8;                            // ue(v)
+   unsigned  bit_depth_chroma_minus8;                          // ue(v)
+   unsigned  log2_max_frame_num_minus4;                        // ue(v)
+   unsigned pic_order_cnt_type;
+   // if( pic_order_cnt_type == 0 ) 
+   unsigned log2_max_pic_order_cnt_lsb_minus4;                 // ue(v)
+   // else if( pic_order_cnt_type == 1 )
+     Boolean delta_pic_order_always_zero_flag;               // u(1)
+     int     offset_for_non_ref_pic;                         // se(v)
+     int     offset_for_top_to_bottom_field;                 // se(v)
+     unsigned  num_ref_frames_in_pic_order_cnt_cycle;          // ue(v)
+     // for( i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++ )
+       int   offset_for_ref_frame[MAXnum_ref_frames_in_pic_order_cnt_cycle];   // se(v)
+   unsigned  num_ref_frames;                                   // ue(v)
+   Boolean   gaps_in_frame_num_value_allowed_flag;             // u(1)
+   unsigned  pic_width_in_mbs_minus1;                          // ue(v)
+   unsigned  pic_height_in_map_units_minus1;                   // ue(v)
+   Boolean   frame_mbs_only_flag;                              // u(1)
+   // if( !frame_mbs_only_flag ) 
+     Boolean   mb_adaptive_frame_field_flag;                   // u(1)
+   Boolean   direct_8x8_inference_flag;                        // u(1)
+   Boolean   frame_cropping_flag;                              // u(1)
+     unsigned  frame_cropping_rect_left_offset;                // ue(v)
+     unsigned  frame_cropping_rect_right_offset;               // ue(v)
+     unsigned  frame_cropping_rect_top_offset;                 // ue(v)
+     unsigned  frame_cropping_rect_bottom_offset;              // ue(v)
+   Boolean   vui_parameters_present_flag;                      // u(1)
+     vui_seq_parameters_t vui_seq_parameters;                  // vui_seq_parameters_t
+ } seq_parameter_set_rbsp_t;
+ 
+ pic_parameter_set_rbsp_t *AllocPPS ();
+ seq_parameter_set_rbsp_t *AllocSPS ();
+ void FreePPS (pic_parameter_set_rbsp_t *pps);
+ void FreeSPS (seq_parameter_set_rbsp_t *sps);
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/q_matrix.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/q_matrix.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/q_matrix.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,633 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file q_matrix.c
+  *
+  * \brief
+  *    read q_matrix parameters from input file: q_matrix.cfg
+  *
+  *************************************************************************************
+  */
+ #include <stdlib.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ #include "memalloc.h"
+ 
+ extern char *GetConfigFileContent (char *Filename, int error_type);
+ 
+ #define MAX_ITEMS_TO_PARSE  1000
+ 
+ extern const int quant_coef[6][4][4];
+ extern const int dequant_coef[6][4][4];
+ 
+ extern const int quant_coef8[6][8][8];
+ extern const int dequant_coef8[6][8][8];
+ 
+ 
+ int matrix4x4_check[6] = {0, 0, 0, 0, 0, 0};
+ int matrix8x8_check[2] = {0, 0};
+ 
+ static const char MatrixType4x4[6][20] =
+ {
+   "INTRA4X4_LUMA",
+   "INTRA4X4_CHROMAU",
+   "INTRA4X4_CHROMAV",
+   "INTER4X4_LUMA",
+   "INTER4X4_CHROMAU",
+   "INTER4X4_CHROMAV"
+ };
+ 
+ static const char MatrixType8x8[2][20] =
+ {
+   "INTRA8X8_LUMA",
+   "INTER8X8_LUMA",
+ };
+ 
+ int ****LevelScale4x4Luma;
+ int *****LevelScale4x4Chroma;
+ int ****LevelScale8x8Luma;
+ 
+ int ****InvLevelScale4x4Luma;
+ int *****InvLevelScale4x4Chroma;
+ int ****InvLevelScale8x8Luma;
+ 
+ short ScalingList4x4input[6][16];
+ short ScalingList8x8input[2][64];
+ short ScalingList4x4[6][16];
+ short ScalingList8x8[2][64];
+ 
+ short UseDefaultScalingMatrix4x4Flag[6];
+ short UseDefaultScalingMatrix8x8Flag[2];
+ 
+ static const short Quant_intra_default[16] =
+ {
+  6,13,20,28,
+ 13,20,28,32,
+ 20,28,32,37,
+ 28,32,37,42
+ };
+ 
+ static const short Quant_inter_default[16] =
+ {
+ 10,14,20,24,
+ 14,20,24,27,
+ 20,24,27,30,
+ 24,27,30,34
+ };
+ 
+ static const short Quant8_intra_default[64] =
+ {
+  6,10,13,16,18,23,25,27,
+ 10,11,16,18,23,25,27,29,
+ 13,16,18,23,25,27,29,31,
+ 16,18,23,25,27,29,31,33,
+ 18,23,25,27,29,31,33,36,
+ 23,25,27,29,31,33,36,38,
+ 25,27,29,31,33,36,38,40,
+ 27,29,31,33,36,38,40,42
+ };
+ 
+ static const short Quant8_inter_default[64] =
+ {
+  9,13,15,17,19,21,22,24,
+ 13,13,17,19,21,22,24,25,
+ 15,17,19,21,22,24,25,27,
+ 17,19,21,22,24,25,27,28,
+ 19,21,22,24,25,27,28,30,
+ 21,22,24,25,27,28,30,32,
+ 22,24,25,27,28,30,32,33,
+ 24,25,27,28,30,32,33,35
+ };
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Check the parameter name.
+  * \param s
+  *    parameter name string
+  * \param type
+  *    4x4 or 8x8 matrix type
+  * \return
+  *    the index number if the string is a valid parameter name,         \n
+  *    -1 for error
+  ***********************************************************************
+  */
+ int CheckParameterName (char *s, int *type)
+ {
+   int i = 0;
+ 
+   *type = 0;
+   while ((MatrixType4x4[i] != NULL) && (i<6))
+   {
+     if (0==strcmp (MatrixType4x4[i], s))
+       return i;
+     else
+       i++;
+   }
+ 
+   i = 0;
+   *type = 1;
+   while ((MatrixType8x8[i] != NULL) && (i<2))
+   {
+     if (0==strcmp (MatrixType8x8[i], s))
+       return i;
+     else
+       i++;
+   }
+ 
+   return -1;
+ };
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Parse the Q matrix values read from cfg file.
+  * \param buf
+  *    buffer to be parsed
+  * \param bufsize
+  *    buffer size of buffer
+  ***********************************************************************
+  */
+ void ParseMatrix (char *buf, int bufsize)
+ {
+   char *items[MAX_ITEMS_TO_PARSE];
+   int MapIdx;
+   int item = 0;
+   int InString = 0, InItem = 0;
+   char *p = buf;
+   char *bufend = &buf[bufsize];
+   int IntContent;
+   int i, j, range, type, cnt;
+   short *ScalingList;
+ 
+   while (p < bufend)
+   {
+     switch (*p)
+     {
+       case 13:
+         p++;
+         break;
+       case '#':                 // Found comment
+         *p = '\0';              // Replace '#' with '\0' in case of comment immediately following integer or string
+         while (*p != '\n' && p < bufend)  // Skip till EOL or EOF, whichever comes first
+           p++;
+         InString = 0;
+         InItem = 0;
+         break;
+       case '\n':
+         InItem = 0;
+         InString = 0;
+         *p++='\0';
+         break;
+       case ' ':
+       case '\t':              // Skip whitespace, leave state unchanged
+         if (InString)
+           p++;
+         else
+         {                     // Terminate non-strings once whitespace is found
+           *p++ = '\0';
+           InItem = 0;
+         }
+         break;
+ 
+       case '"':               // Begin/End of String
+         *p++ = '\0';
+         if (!InString)
+         {
+           items[item++] = p;
+           InItem = ~InItem;
+         }
+         else
+           InItem = 0;
+         InString = ~InString; // Toggle
+         break;
+ 
+       case ',':
+         p++;
+         InItem = 0;
+         break;
+ 
+       default:
+         if (!InItem)
+         {
+           items[item++] = p;
+           InItem = ~InItem;
+         }
+         p++;
+     }
+   }
+ 
+   item--;
+ 
+   for (i=0; i<item; i+=cnt)
+   {
+     cnt=0;
+     if (0 > (MapIdx = CheckParameterName (items[i+cnt], &type)))
+     {
+       snprintf (errortext, ET_SIZE, " Parsing error in config file: Parameter Name '%s' not recognized.", items[i+cnt]);
+       error (errortext, 300);
+     }
+     cnt++;
+     if (strcmp ("=", items[i+cnt]))
+     {
+       snprintf (errortext, ET_SIZE, " Parsing error in config file: '=' expected as the second token in each item.");
+       error (errortext, 300);
+     }
+     cnt++;
+ 
+     if (!type) //4x4 Matrix
+     {
+       range = 16;
+       ScalingList = ScalingList4x4input[MapIdx];
+       matrix4x4_check[MapIdx] = 1; //to indicate matrix found in cfg file
+     }
+     else //8x8 matrix
+     {
+       range = 64;
+       ScalingList = ScalingList8x8input[MapIdx];
+       matrix8x8_check[MapIdx] = 1; //to indicate matrix found in cfg file
+     }
+ 
+     for(j=0; j<range; j++)
+     {
+       if (1 != sscanf (items[i+cnt+j], "%d", &IntContent))
+       {
+         snprintf (errortext, ET_SIZE, " Parsing error: Expected numerical value for Parameter of %s, found '%s'.", items[i], items[i+cnt+j]);
+         error (errortext, 300);
+       }
+ 
+       ScalingList[j] = (short)IntContent; //save value in matrix
+     }
+     cnt+=j;
+     printf (".");
+   }
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Check Q Matrix values. If invalid values found in matrix,
+  *    whole matrix will be patch with default value 16.
+  ***********************************************************************
+  */
+ void PatchMatrix(void)
+ {
+   short *ScalingList;
+   int i, cnt, fail;
+ 
+   for(i=0; i<6; i++)
+   {
+     if(input->ScalingListPresentFlag[i])
+     {
+       ScalingList=ScalingList4x4input[i];
+       if(matrix4x4_check[i])
+       {
+         fail=0;
+         for(cnt=0; cnt<16; cnt++)
+         {
+           if(ScalingList[cnt]<0 || ScalingList[cnt]>255) // ScalingList[0]=0 to indicate use default matrix
+           {
+             fail=1;
+             break;
+           }
+         }
+ 
+         if(fail) //value of matrix exceed range
+         {
+           printf("\n%s value exceed range. (Value must be 1 to 255)\n", MatrixType4x4[i]);
+           printf("Setting default values for this matrix.");
+           if(i>2)
+             memcpy(ScalingList, Quant_inter_default, sizeof(short)*16);
+           else
+             memcpy(ScalingList, Quant_intra_default, sizeof(short)*16);
+         }
+       }
+       else //matrix not found, pad with default value
+       {
+         printf("\n%s matrix definition not found. Setting default values.", MatrixType4x4[i]);
+         if(i>2)
+           memcpy(ScalingList, Quant_inter_default, sizeof(short)*16);
+         else
+           memcpy(ScalingList, Quant_intra_default, sizeof(short)*16);
+       }
+     }
+ 
+     if((i<2) && input->ScalingListPresentFlag[i+6])
+     {
+       ScalingList=ScalingList8x8input[i];
+       if(matrix8x8_check[i])
+       {
+         fail=0;
+         for(cnt=0; cnt<64; cnt++)
+         {
+           if(ScalingList[cnt]<0 || ScalingList[cnt]>255) // ScalingList[0]=0 to indicate use default matrix
+           {
+             fail=1;
+             break;
+           }
+         }
+ 
+         if(fail) //value of matrix exceed range
+         {
+           printf("\n%s value exceed range. (Value must be 1 to 255)\n", MatrixType8x8[i]);
+           printf("Setting default values for this matrix.");
+           if(i==7)
+             memcpy(ScalingList, Quant8_inter_default, sizeof(short)*64);
+           else
+             memcpy(ScalingList, Quant8_intra_default, sizeof(short)*64);
+         }
+       }
+       else //matrix not found, pad with default value
+       {
+         printf("\n%s matrix definition not found. Setting default values.", MatrixType8x8[i]);
+         if(i==7)
+           memcpy(ScalingList, Quant8_inter_default, sizeof(short)*64);
+         else
+           memcpy(ScalingList, Quant8_intra_default, sizeof(short)*64);
+       }
+     }
+   }
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Allocate Q matrix arrays
+  ***********************************************************************
+  */
+ void allocate_QMatrix ()
+ {
+   get_mem4Dint(&LevelScale4x4Luma,      2, 6, 4, 4);
+   get_mem5Dint(&LevelScale4x4Chroma, 2, 2, 6, 4, 4);
+   get_mem4Dint(&LevelScale8x8Luma,      2, 6, 8, 8);
+   
+   get_mem4Dint(&InvLevelScale4x4Luma,      2, 6, 4, 4);
+   get_mem5Dint(&InvLevelScale4x4Chroma, 2, 2, 6, 4, 4);
+   get_mem4Dint(&InvLevelScale8x8Luma,      2, 6, 8, 8);
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Free Q matrix arrays
+  ***********************************************************************
+  */
+ void free_QMatrix ()
+ {
+   free_mem4Dint(LevelScale4x4Luma,      2, 6);
+   free_mem5Dint(LevelScale4x4Chroma, 2, 2, 6);
+   free_mem4Dint(LevelScale8x8Luma,      2, 6);
+   
+   free_mem4Dint(InvLevelScale4x4Luma,      2, 6);
+   free_mem5Dint(InvLevelScale4x4Chroma, 2, 2, 6);
+   free_mem4Dint(InvLevelScale8x8Luma,      2, 6);
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Initialise Q matrix values.
+  ***********************************************************************
+  */
+ void Init_QMatrix (void)
+ {
+   char *content;
+ 
+ 
+   allocate_QMatrix ();
+ 
+   if(input->ScalingMatrixPresentFlag)
+   {
+     printf ("Parsing QMatrix file %s ", input->QmatrixFile);
+     content = GetConfigFileContent(input->QmatrixFile, 0);
+     if(content!='\0')
+       ParseMatrix(content, strlen (content));
+     else
+       printf("\nError: %s\nProceeding with default values for all matrices.", errortext);
+ 
+     PatchMatrix();
+     printf("\n");
+ 
+     memset(UseDefaultScalingMatrix4x4Flag, 0, 6 * sizeof(short));
+     UseDefaultScalingMatrix8x8Flag[0]=UseDefaultScalingMatrix8x8Flag[1]=0;
+   
+     free(content);
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    For calculating the quantisation values at frame level
+  *
+  * \par Input:
+  *    none
+  *
+  * \par Output:
+  *    none
+  ************************************************************************
+  */
+ void CalculateQuantParam(void)
+ {
+   int i, j, k, temp;
+   int present[6];
+   int no_q_matrix=FALSE;
+ 
+   if(!active_sps->seq_scaling_matrix_present_flag && !active_pps->pic_scaling_matrix_present_flag) //set to no q-matrix
+     no_q_matrix=TRUE;
+   else
+   {
+     memset(present, 0, sizeof(int)*6);
+ 
+     if(active_sps->seq_scaling_matrix_present_flag)
+       for(i=0; i<6; i++)
+         present[i] = active_sps->seq_scaling_list_present_flag[i];
+ 
+     if(active_pps->pic_scaling_matrix_present_flag)
+       for(i=0; i<6; i++)
+       {
+         if((i==0) || (i==3))
+           present[i] |= active_pps->pic_scaling_list_present_flag[i];
+         else
+           present[i] = active_pps->pic_scaling_list_present_flag[i];
+       }
+   }
+ 
+   if(no_q_matrix==TRUE)
+   {
+     for(k=0; k<6; k++)
+       for(j=0; j<4; j++)
+         for(i=0; i<4; i++)
+         {
+           LevelScale4x4Luma[1][k][j][i]         = quant_coef[k][j][i];
+           InvLevelScale4x4Luma[1][k][j][i]      = dequant_coef[k][j][i]<<4;
+ 
+           LevelScale4x4Chroma[0][1][k][j][i]    = quant_coef[k][j][i];
+           InvLevelScale4x4Chroma[0][1][k][j][i] = dequant_coef[k][j][i]<<4;
+ 
+           LevelScale4x4Chroma[1][1][k][j][i]    = quant_coef[k][j][i];
+           InvLevelScale4x4Chroma[1][1][k][j][i] = dequant_coef[k][j][i]<<4;
+ 
+           // Inter
+           LevelScale4x4Luma[0][k][j][i]         = quant_coef[k][j][i];
+           InvLevelScale4x4Luma[0][k][j][i]      = dequant_coef[k][j][i]<<4;
+ 
+           LevelScale4x4Chroma[0][0][k][j][i]    = quant_coef[k][j][i];
+           InvLevelScale4x4Chroma[0][0][k][j][i] = dequant_coef[k][j][i]<<4;
+ 
+           LevelScale4x4Chroma[1][0][k][j][i]    = quant_coef[k][j][i];
+           InvLevelScale4x4Chroma[1][0][k][j][i] = dequant_coef[k][j][i]<<4;
+         }
+   }
+   else
+   {
+     for(k=0; k<6; k++)
+       for(j=0; j<4; j++)
+         for(i=0; i<4; i++)
+         {
+           temp = (i<<2)+j;
+           if((!present[0]) || UseDefaultScalingMatrix4x4Flag[0])
+           {
+             LevelScale4x4Luma[1][k][j][i]         = (quant_coef[k][j][i]<<4)/Quant_intra_default[temp];
+             InvLevelScale4x4Luma[1][k][j][i]      = dequant_coef[k][j][i]*Quant_intra_default[temp];
+           }
+           else
+           {
+             LevelScale4x4Luma[1][k][j][i]         = (quant_coef[k][j][i]<<4)/ScalingList4x4[0][temp];
+             InvLevelScale4x4Luma[1][k][j][i]      = dequant_coef[k][j][i]*ScalingList4x4[0][temp];
+           }
+ 
+           if(!present[1])
+           {
+             LevelScale4x4Chroma[0][1][k][j][i]    = LevelScale4x4Luma[1][k][j][i];
+             InvLevelScale4x4Chroma[0][1][k][j][i] = InvLevelScale4x4Luma[1][k][j][i];
+           }
+           else
+           {
+             LevelScale4x4Chroma[0][1][k][j][i]    = (quant_coef[k][j][i]<<4)/(UseDefaultScalingMatrix4x4Flag[1] ? Quant_intra_default[temp]:ScalingList4x4[1][temp]);
+             InvLevelScale4x4Chroma[0][1][k][j][i] = dequant_coef[k][j][i]*(UseDefaultScalingMatrix4x4Flag[1] ? Quant_intra_default[temp]:ScalingList4x4[1][temp]);
+           }
+ 
+           if(!present[2])
+           {
+             LevelScale4x4Chroma[1][1][k][j][i]    = LevelScale4x4Chroma[0][1][k][j][i];
+             InvLevelScale4x4Chroma[1][1][k][j][i] = InvLevelScale4x4Chroma[0][1][k][j][i];
+           }
+           else
+           {
+             LevelScale4x4Chroma[1][1][k][j][i]    = (quant_coef[k][j][i]<<4)/(UseDefaultScalingMatrix4x4Flag[2] ? Quant_intra_default[temp]:ScalingList4x4[2][temp]);
+             InvLevelScale4x4Chroma[1][1][k][j][i] = dequant_coef[k][j][i]*(UseDefaultScalingMatrix4x4Flag[2] ? Quant_intra_default[temp]:ScalingList4x4[2][temp]);
+           }
+ 
+           if((!present[3]) || UseDefaultScalingMatrix4x4Flag[3])
+           {
+             LevelScale4x4Luma[0][k][j][i]         = (quant_coef[k][j][i]<<4)/Quant_inter_default[temp];
+             InvLevelScale4x4Luma[0][k][j][i]      = dequant_coef[k][j][i]*Quant_inter_default[temp];
+           }
+           else
+           {
+             LevelScale4x4Luma[0][k][j][i]         = (quant_coef[k][j][i]<<4)/ScalingList4x4[3][temp];
+             InvLevelScale4x4Luma[0][k][j][i]      = dequant_coef[k][j][i]*ScalingList4x4[3][temp];
+           }
+ 
+           if(!present[4])
+           {
+             LevelScale4x4Chroma[0][0][k][j][i]    = LevelScale4x4Luma[0][k][j][i];
+             InvLevelScale4x4Chroma[0][0][k][j][i] = InvLevelScale4x4Luma[0][k][j][i];
+           }
+           else
+           {
+             LevelScale4x4Chroma[0][0][k][j][i]    = (quant_coef[k][j][i]<<4)/(UseDefaultScalingMatrix4x4Flag[4] ? Quant_inter_default[temp]:ScalingList4x4[4][temp]);
+             InvLevelScale4x4Chroma[0][0][k][j][i] = dequant_coef[k][j][i]*(UseDefaultScalingMatrix4x4Flag[4] ? Quant_inter_default[temp]:ScalingList4x4[4][temp]);
+           }
+ 
+           if(!present[5])
+           {
+             LevelScale4x4Chroma[1][0][k][j][i]    = LevelScale4x4Chroma[0][0][k][j][i];
+             InvLevelScale4x4Chroma[1][0][k][j][i] = InvLevelScale4x4Chroma[0][0][k][j][i];
+           }
+           else
+           {
+             LevelScale4x4Chroma[1][0][k][j][i]    = (quant_coef[k][j][i]<<4)/(UseDefaultScalingMatrix4x4Flag[5] ? Quant_inter_default[temp]:ScalingList4x4[5][temp]);
+             InvLevelScale4x4Chroma[1][0][k][j][i] = dequant_coef[k][j][i]*(UseDefaultScalingMatrix4x4Flag[5] ? Quant_inter_default[temp]:ScalingList4x4[5][temp]);
+           }
+         }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Calculate the quantisation and inverse quantisation parameters
+  *
+  ************************************************************************
+  */
+ void CalculateQuant8Param()
+ {
+   int i, j, k, temp;
+   int present[2];
+   int no_q_matrix=FALSE;
+   
+   if(!active_sps->seq_scaling_matrix_present_flag && !active_pps->pic_scaling_matrix_present_flag) //set to default matrix
+     no_q_matrix=TRUE;
+   else
+   {
+     memset(present, 0, sizeof(int)*2);
+     
+     if(active_sps->seq_scaling_matrix_present_flag)
+       for(i=0; i<2; i++)
+         present[i] = active_sps->seq_scaling_list_present_flag[i+6];
+ 
+     if(active_pps->pic_scaling_matrix_present_flag)
+       for(i=0; i<2; i++)
+         present[i] |= active_pps->pic_scaling_list_present_flag[i+6];
+   }
+ 
+   if(no_q_matrix==TRUE)
+   {
+     for(k=0; k<6; k++)
+       for(j=0; j<8; j++)
+         for(i=0; i<8; i++)
+         {
+           LevelScale8x8Luma[1][k][j][i]         = quant_coef8[k][j][i];
+           InvLevelScale8x8Luma[1][k][j][i]      = dequant_coef8[k][j][i]<<4;
+           
+           LevelScale8x8Luma[0][k][j][i]         = quant_coef8[k][j][i];
+           InvLevelScale8x8Luma[0][k][j][i]      = dequant_coef8[k][j][i]<<4;
+         }
+   }
+   else
+   {
+     for(k=0; k<6; k++)
+       for(j=0; j<8; j++)
+         for(i=0; i<8; i++)
+         {
+           temp = (i<<3)+j;
+           if((!present[0]) || UseDefaultScalingMatrix8x8Flag[0])
+           {
+             LevelScale8x8Luma[1][k][j][i]    = (quant_coef8[k][j][i]<<4)/Quant8_intra_default[temp];
+             InvLevelScale8x8Luma[1][k][j][i] = dequant_coef8[k][j][i]*Quant8_intra_default[temp];
+           }
+           else
+           {
+             LevelScale8x8Luma[1][k][j][i]    = (quant_coef8[k][j][i]<<4)/ScalingList8x8[0][temp];
+             InvLevelScale8x8Luma[1][k][j][i] = dequant_coef8[k][j][i]*ScalingList8x8[0][temp];
+           }
+           
+           if((!present[1]) || UseDefaultScalingMatrix8x8Flag[1])
+           {
+             LevelScale8x8Luma[0][k][j][i]    = (quant_coef8[k][j][i]<<4)/Quant8_inter_default[temp];
+             InvLevelScale8x8Luma[0][k][j][i] = dequant_coef8[k][j][i]*Quant8_inter_default[temp];
+           }
+           else
+           {
+             LevelScale8x8Luma[0][k][j][i]    = (quant_coef8[k][j][i]<<4)/ScalingList8x8[1][temp];
+             InvLevelScale8x8Luma[0][k][j][i] = dequant_coef8[k][j][i]*ScalingList8x8[1][temp];
+           }
+         }
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/q_matrix.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/q_matrix.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/q_matrix.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,40 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file
+  *    q_matrix.h
+  *
+  * \brief
+  *    Headerfile for q_matrix array
+  *
+  * \date
+  *    07. Apr 2004
+  ***************************************************************************
+  */
+ 
+ #ifndef _Q_MATRIX_H_
+ #define _Q_MATRIX_H_
+ 
+ extern int ****LevelScale4x4Luma;
+ extern int *****LevelScale4x4Chroma;
+ extern int ****LevelScale8x8Luma;
+ 
+ extern int ****InvLevelScale4x4Luma;
+ extern int *****InvLevelScale4x4Chroma;
+ extern int ****InvLevelScale8x8Luma;
+ 
+ extern short ScalingList4x4input[6][16];
+ extern short ScalingList8x8input[2][64];
+ extern short ScalingList4x4[6][16];
+ extern short ScalingList8x8[2][64];
+ 
+ extern short UseDefaultScalingMatrix4x4Flag[6];
+ extern short UseDefaultScalingMatrix8x8Flag[2];
+ 
+ 
+ void Init_QMatrix (void);
+ void CalculateQuantParam();
+ void CalculateQuant8Param();
+ void free_QMatrix();
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/q_offsets.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/q_offsets.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/q_offsets.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,550 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file q_offsets.c
+  *
+  * \brief
+  *    read Quantization Offset matrix parameters from input file: q_OffsetMatrix.cfg
+  *
+  *************************************************************************************
+  */
+ #include <stdlib.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ #include "memalloc.h"
+ 
+ extern char *GetConfigFileContent (char *Filename, int error_type);
+ 
+ #define MAX_ITEMS_TO_PARSE  1000
+ 
+ int offset4x4_check[6] = { 0, 0, 0, 0, 0, 0 };
+ int offset8x8_check[2] = { 0, 0 };
+ 
+ static const char OffsetType4x4[15][24] = {
+   "INTRA4X4_LUMA_INTRA",
+   "INTRA4X4_CHROMAU_INTRA",
+   "INTRA4X4_CHROMAV_INTRA",
+   "INTRA4X4_LUMA_INTERP",
+   "INTRA4X4_CHROMAU_INTERP",
+   "INTRA4X4_CHROMAV_INTERP",
+   "INTRA4X4_LUMA_INTERB",
+   "INTRA4X4_CHROMAU_INTERB",
+   "INTRA4X4_CHROMAV_INTERB",
+   "INTER4X4_LUMA_INTERP",
+   "INTER4X4_CHROMAU_INTERP",
+   "INTER4X4_CHROMAV_INTERP",
+   "INTER4X4_LUMA_INTERB",
+   "INTER4X4_CHROMAU_INTERB",
+   "INTER4X4_CHROMAV_INTERB"
+ };
+ 
+ static const char OffsetType8x8[5][24] = {
+   "INTRA8X8_LUMA_INTRA",
+   "INTRA8X8_LUMA_INTERP",
+   "INTRA8X8_LUMA_INTERB",
+   "INTER8X8_LUMA_INTERP",
+   "INTER8X8_LUMA_INTERB"
+ };
+ 
+ 
+ int ****LevelOffset4x4Luma;
+ int *****LevelOffset4x4Chroma;
+ int ****LevelOffset8x8Luma;
+ 
+ int AdaptRndWeight;
+ 
+ short OffsetList4x4input[15][16];
+ short OffsetList8x8input[5][64];
+ short OffsetList4x4[15][16];
+ short OffsetList8x8[5][64];
+ 
+ void InitOffsetParam ();
+ 
+ const int OffsetBits = 11;
+ 
+ static const short Offset_intra_default_intra[16] = {
+   682, 682, 682, 682,
+   682, 682, 682, 682,
+   682, 682, 682, 682,
+   682, 682, 682, 682
+ };
+ 
+ static const short Offset_intra_default_chroma[16] = {
+   682, 682, 682, 682,
+   682, 682, 682, 682,
+   682, 682, 682, 682,
+   682, 682, 682, 682
+ };
+ 
+ 
+ static const short Offset_intra_default_inter[16] = {
+   342, 342, 342, 342,
+   342, 342, 342, 342,
+   342, 342, 342, 342,
+   342, 342, 342, 342,
+ };
+ 
+ static const short Offset_inter_default[16] = {
+   342, 342, 342, 342,
+   342, 342, 342, 342,
+   342, 342, 342, 342,
+   342, 342, 342, 342,
+ };
+ 
+ static const short Offset8_intra_default_intra[64] = {
+   682, 682, 682, 682, 682, 682, 682, 682,
+   682, 682, 682, 682, 682, 682, 682, 682,
+   682, 682, 682, 682, 682, 682, 682, 682,
+   682, 682, 682, 682, 682, 682, 682, 682,
+   682, 682, 682, 682, 682, 682, 682, 682,
+   682, 682, 682, 682, 682, 682, 682, 682,
+   682, 682, 682, 682, 682, 682, 682, 682,
+   682, 682, 682, 682, 682, 682, 682, 682
+ };
+ 
+ static const short Offset8_intra_default_inter[64] = {
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342
+ };
+ 
+ static const short Offset8_inter_default[64] = {
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342,
+   342, 342, 342, 342, 342, 342, 342, 342
+ };
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Allocate Q matrix arrays
+  ***********************************************************************
+  */
+ void allocate_QOffsets ()
+ {
+   get_mem4Dint(&LevelOffset4x4Luma,      2, 13, 4, 4);
+   get_mem5Dint(&LevelOffset4x4Chroma, 2, 2, 13, 4, 4);
+   get_mem4Dint(&LevelOffset8x8Luma,      2, 13, 8, 8);
+ }
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Free Q matrix arrays
+  ***********************************************************************
+  */
+ void free_QOffsets ()
+ {
+   free_mem4Dint(LevelOffset4x4Luma,      2, 13);
+   free_mem5Dint(LevelOffset4x4Chroma, 2, 2, 13);
+   free_mem4Dint(LevelOffset8x8Luma,      2, 13);
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Check the parameter name.
+  * \param s
+  *    parameter name string
+  * \param type
+  *    4x4 or 8x8 offset matrix type
+  * \return
+  *    the index number if the string is a valid parameter name,         \n
+  *    -1 for error
+  ***********************************************************************
+  */
+ 
+ int CheckOffsetParameterName (char *s, int *type)
+ {
+   int i = 0;
+ 
+   *type = 0;
+   while ((OffsetType4x4[i] != NULL) && (i < 15))
+   {
+     if (0 == strcmp (OffsetType4x4[i], s))
+       return i;
+     else
+       i++;
+   }
+ 
+   i = 0;
+   *type = 1;
+   while ((OffsetType8x8[i] != NULL) && (i < 5))
+   {
+     if (0 == strcmp (OffsetType8x8[i], s))
+       return i;
+     else
+       i++;
+   }
+ 
+   return -1;
+ };
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Parse the Q Offset Matrix values read from cfg file.
+  * \param buf
+  *    buffer to be parsed
+  * \param bufsize
+  *    buffer size of buffer
+  ***********************************************************************
+  */
+ void ParseQOffsetMatrix (char *buf, int bufsize)
+ {
+   char *items[MAX_ITEMS_TO_PARSE];
+   int MapIdx;
+   int item = 0;
+   int InString = 0, InItem = 0;
+   char *p = buf;
+   char *bufend = &buf[bufsize];
+   int IntContent;
+   int i, j, range, type, cnt;
+   short *OffsetList;
+ 
+   while (p < bufend)
+   {
+     switch (*p)
+     {
+       case 13:
+         p++;
+         break;
+       case '#':                 // Found comment
+         *p = '\0';              // Replace '#' with '\0' in case of comment immediately following integer or string
+         while (*p != '\n' && p < bufend)  // Skip till EOL or EOF, whichever comes first
+           p++;
+         InString = 0;
+         InItem = 0;
+         break;
+       case '\n':
+         InItem = 0;
+         InString = 0;
+         *p++ = '\0';
+         break;
+       case ' ':
+       case '\t':              // Skip whitespace, leave state unchanged
+         if (InString)
+           p++;
+         else
+         {                     // Terminate non-strings once whitespace is found
+           *p++ = '\0';
+           InItem = 0;
+         }
+         break;
+ 
+       case '"':               // Begin/End of String
+         *p++ = '\0';
+         if (!InString)
+         {
+           items[item++] = p;
+           InItem = ~InItem;
+         }
+         else
+           InItem = 0;
+         InString = ~InString; // Toggle
+         break;
+ 
+       case ',':
+         p++;
+         InItem = 0;
+         break;
+ 
+       default:
+         if (!InItem)
+         {
+           items[item++] = p;
+           InItem = ~InItem;
+         }
+         p++;
+     }
+   }
+ 
+   item--;
+ 
+   for (i = 0; i < item; i += cnt)
+   {
+     cnt = 0;
+     if (0 > (MapIdx = CheckOffsetParameterName (items[i + cnt], &type)))
+     {
+       snprintf (errortext, ET_SIZE,
+         " Parsing error in config file: Parameter Name '%s' not recognized.",
+         items[i + cnt]);
+       error (errortext, 300);
+     }
+     cnt++;
+     if (strcmp ("=", items[i + cnt]))
+     {
+       snprintf (errortext, ET_SIZE,
+         " Parsing error in config file: '=' expected as the second token in each item.");
+       error (errortext, 300);
+     }
+     cnt++;
+ 
+     if (!type) //4x4 Matrix
+     {
+       range = 16;
+       OffsetList = OffsetList4x4input[MapIdx];
+       offset4x4_check[MapIdx] = 1; //to indicate matrix found in cfg file
+     }
+     else //8x8 matrix
+     {
+       range = 64;
+       OffsetList = OffsetList8x8input[MapIdx];
+       offset8x8_check[MapIdx] = 1; //to indicate matrix found in cfg file
+     }
+ 
+     for (j = 0; j < range; j++)
+     {
+       if (1 != sscanf (items[i + cnt + j], "%d", &IntContent))
+       {
+         snprintf (errortext, ET_SIZE,
+           " Parsing error: Expected numerical value for Parameter of %s, found '%s'.",
+           items[i], items[i + cnt + j]);
+         error (errortext, 300);
+       }
+ 
+       OffsetList[j] = (short) IntContent; //save value in matrix
+     }
+     cnt += j;
+     printf (".");
+   }
+ }
+ 
+ 
+ /*!
+  ***********************************************************************
+  * \brief
+  *    Initialise Q offset matrix values.
+  ***********************************************************************
+  */
+ void Init_QOffsetMatrix ()
+ {
+   char *content;
+ 
+   allocate_QOffsets ();
+ 
+   if (input->OffsetMatrixPresentFlag)
+   {
+     printf ("Parsing Quantization Offset Matrix file %s ",
+       input->QOffsetMatrixFile);
+     content = GetConfigFileContent (input->QOffsetMatrixFile, 0);
+     if (content != '\0')
+       ParseQOffsetMatrix (content, strlen (content));
+     else
+     {
+       printf
+         ("\nError: %s\nProceeding with default values for all matrices.",
+         errortext);
+       input->OffsetMatrixPresentFlag = 0;
+     }   
+ 
+     printf ("\n");
+ 
+     free (content);
+   }
+   //! Now set up all offset params. This process could be reused if we wish to re-init offsets
+   InitOffsetParam ();
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Intit quantization offset params 
+  *
+  * \par Input:
+  *    none
+  *
+  * \par Output:
+  *    none
+  ************************************************************************
+  */
+ void InitOffsetParam ()
+ {
+   int i, j, k, temp;
+   
+   if (input->OffsetMatrixPresentFlag)
+   {
+     for (j = 0; j < 4; j++)
+     {
+       for (i = 0; i < 4; i++)
+       {
+         temp = (i << 2) + j;
+         for (k = 0; k < 15; k++)
+         {          
+           OffsetList4x4[k][temp] = OffsetList4x4input[k][temp];
+         }
+       }
+     }
+     for (j = 0; j < 8; j++)
+     {
+       for (i = 0; i < 8; i++)
+       {           
+         temp = (i << 3) + j;
+         for (k = 0; k < 5; k++)
+           OffsetList8x8[k][temp] = OffsetList8x8input[k][temp];
+       }
+     }
+   }
+   else
+   {        
+     for (j = 0; j < 4; j++)
+     {
+       for (i = 0; i < 4; i++)
+       {
+         temp = (i << 2) + j;
+         
+         for (k = 0; k < 3; k++)
+           OffsetList4x4[k][temp] = Offset_intra_default_intra[temp];
+         for (k = 3; k < 9; k++)
+           OffsetList4x4[k][temp] = Offset_intra_default_inter[temp];
+         for (k = 9; k < 15; k++)
+           OffsetList4x4[k][temp] = Offset_inter_default[temp];
+       }
+     }     
+     for (j = 0; j < 8; j++)
+     {
+       for (i = 0; i < 8; i++)
+       {
+         temp = (i << 3) + j;
+         OffsetList8x8[0][temp]  = Offset8_intra_default_intra[temp];           
+         OffsetList8x8[1][temp]  = Offset8_intra_default_inter[temp];
+         OffsetList8x8[2][temp]  = Offset8_intra_default_inter[temp];
+         OffsetList8x8[3][temp] = Offset8_inter_default[temp];
+         OffsetList8x8[4][temp] = Offset8_inter_default[temp];
+       }
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Calculation of the quantization offset params at the frame level
+  *
+  * \par Input:
+  *    none
+  *
+  * \par Output:
+  *    none
+  ************************************************************************
+  */
+ void CalculateOffsetParam ()
+ {
+   int i, j, k, temp;
+   int qp_per;
+   int img_type = (img->type == SI_SLICE ? I_SLICE : (img->type == SP_SLICE ? P_SLICE : img->type));
+   
+   AdaptRndWeight = input->AdaptRndWFactor[img->nal_reference_idc!=0][img_type];
+   for (k = 0; k < 13; k++)
+   {
+     qp_per = Q_BITS + k - OffsetBits;
+     for (j = 0; j < 4; j++)
+     {
+       for (i = 0; i < 4; i++)
+       {
+         temp = (i << 2) + j;
+         if (img_type == I_SLICE)
+         {
+           LevelOffset4x4Luma[1][k][j][i] =
+             (int) OffsetList4x4[0][temp] << qp_per;
+           LevelOffset4x4Chroma[0][1][k][j][i] =
+             (int) OffsetList4x4[1][temp] << qp_per;
+           LevelOffset4x4Chroma[1][1][k][j][i] =
+             (int) OffsetList4x4[2][temp] << qp_per;
+         }
+         else if (img_type == B_SLICE)
+         {
+           LevelOffset4x4Luma[1][k][j][i] =
+             (int) OffsetList4x4[6][temp] << qp_per;
+           LevelOffset4x4Chroma[0][1][k][j][i] =
+             (int) OffsetList4x4[7][temp] << qp_per;
+           LevelOffset4x4Chroma[1][1][k][j][i] =
+             (int) OffsetList4x4[8][temp] << qp_per;
+         }
+         else
+         {
+           LevelOffset4x4Luma[1][k][j][i] =
+             (int) OffsetList4x4[3][temp] << qp_per;
+           LevelOffset4x4Chroma[0][1][k][j][i] =
+             (int) OffsetList4x4[4][temp] << qp_per;
+           LevelOffset4x4Chroma[1][1][k][j][i] =
+             (int) OffsetList4x4[5][temp] << qp_per;
+         }
+         
+         if (img_type == B_SLICE)
+         {
+           LevelOffset4x4Luma[0][k][j][i] =
+             (int) OffsetList4x4[12][temp] << qp_per;
+           LevelOffset4x4Chroma[0][0][k][j][i] =
+             (int) OffsetList4x4[13][temp] << qp_per;
+           LevelOffset4x4Chroma[1][0][k][j][i] =
+             (int) OffsetList4x4[14][temp] << qp_per;
+         }
+         else
+         {
+           LevelOffset4x4Luma[0][k][j][i] =
+             (int) OffsetList4x4[9][temp] << qp_per;
+           LevelOffset4x4Chroma[0][0][k][j][i] =
+             (int) OffsetList4x4[10][temp] << qp_per;
+           LevelOffset4x4Chroma[1][0][k][j][i] =
+             (int) OffsetList4x4[11][temp] << qp_per;
+         }
+       }
+     }
+   }
+ }
+  
+  /*!
+  ************************************************************************
+  * \brief
+  *    Calculate the quantisation offset parameters
+  *
+  ************************************************************************
+  */
+ void CalculateOffset8Param ()
+ {
+   int i, j, k, temp;
+   int q_bits;
+   
+   for (k = 0; k < 13; k++)
+   {
+     q_bits = Q_BITS_8 + k - OffsetBits;
+     for (j = 0; j < 8; j++)
+     {
+       for (i = 0; i < 8; i++)
+       {           
+         temp = (i << 3) + j;
+         if (img->type == I_SLICE)
+           LevelOffset8x8Luma[1][k][j][i] =
+           (int) OffsetList8x8[0][temp] << q_bits;
+         else if (img->type == B_SLICE)
+           LevelOffset8x8Luma[1][k][j][i] =
+           (int) OffsetList8x8[2][temp] << q_bits;
+         else
+           LevelOffset8x8Luma[1][k][j][i] =
+           (int) OffsetList8x8[1][temp] << q_bits;
+ 
+         if (img->type == B_SLICE)
+           LevelOffset8x8Luma[0][k][j][i] =
+           (int) OffsetList8x8[4][temp] << q_bits;
+         else
+           LevelOffset8x8Luma[0][k][j][i] =
+           (int) OffsetList8x8[3][temp] << q_bits;
+       }
+     }
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/q_offsets.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/q_offsets.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/q_offsets.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,28 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file
+  *    q_offsets.h
+  *
+  * \brief
+  *    Headerfile for q_offsets array
+  *
+  * \date
+  *    18. Nov 2004
+  ***************************************************************************
+  */
+ 
+ #ifndef _Q_OFFSETS_H_
+ #define _Q_OFFSETS_H_
+ 
+ extern int ****LevelOffset4x4Luma;
+ extern int *****LevelOffset4x4Chroma;
+ extern int ****LevelOffset8x8Luma;
+ 
+ extern int AdaptRndWeight;
+ 
+ void Init_QOffsetMatrix ();
+ void CalculateOffsetParam();
+ void CalculateOffset8Param();
+ void free_QOffsets ();
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/ratectl.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/ratectl.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/ratectl.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,1803 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file ratectl.c
+  *
+  * \brief
+  *    Rate Control algorithm
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details) 
+  *     - Siwei Ma <swma at jdl.ac.cn>
+  *     - Zhengguo LI<ezgli at lit.a-star.edu.sg>
+  *
+  * \date
+  *   16 Jan. 2003
+  **************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <math.h>
+ #include <assert.h>
+ 
+ #include "global.h"
+ #include "ratectl.h"
+ 
+ const double THETA=1.3636;
+ const int Switch=0;
+ 
+ int Iprev_bits=0;
+ int Pprev_bits=0;
+ 
+ 
+ /* rate control variables */
+ int Xp, Xb;
+ static int R,T_field;
+ static int Np, Nb, bits_topfield;
+ long T,T1;
+ //HRD consideration
+ long UpperBound1, UpperBound2, LowerBound;
+ double InitialDelayOffset;
+ const double OMEGA=0.9;
+ 
+ double Wp,Wb; 
+ int TotalPFrame;
+ int DuantQp; 
+ int PDuantQp;
+ FILE *BitRate;
+ double DeltaP;
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Dynamically allocate memory needed for rate control
+  *
+  *************************************************************************************
+ */
+ void rc_alloc()
+ {
+   img->MADofMB = (double*) calloc (img->FrameSizeInMbs, sizeof (double));
+   if (NULL==img->MADofMB)
+   {
+     no_mem_exit("rc_alloc: img->MADofMB");
+   }
+   
+   BUPFMAD = (double*) calloc ((img->FrameSizeInMbs/input->basicunit), sizeof (double));
+   if (NULL==BUPFMAD)
+   {
+     no_mem_exit("rc_alloc: img->BUPFMAD");
+   }
+ 
+   BUCFMAD = (double*) calloc ((img->FrameSizeInMbs/input->basicunit), sizeof (double));
+   if (NULL==BUCFMAD)
+   {
+     no_mem_exit("rc_alloc: img->BUCFMAD");
+   }
+ 
+   FCBUCFMAD = (double*) calloc ((img->FrameSizeInMbs/input->basicunit), sizeof (double));
+   if (NULL==FCBUCFMAD)
+   {
+     no_mem_exit("rc_alloc: img->FCBUCFMAD");
+   }
+ 
+   FCBUPFMAD = (double*) calloc ((img->FrameSizeInMbs/input->basicunit), sizeof (double));
+   if (NULL==FCBUPFMAD)
+   {
+     no_mem_exit("rc_alloc: img->FCBUPFMAD");
+   }
+ 
+ }
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Free memory needed for rate control
+  *
+  *************************************************************************************
+ */
+ void rc_free()
+ {
+   if (NULL!=img->MADofMB)
+   {
+     free (img->MADofMB);
+     img->MADofMB = NULL;
+   }
+   if (NULL!=BUPFMAD)
+   {
+     free (BUPFMAD);
+     BUPFMAD = NULL;
+   }
+   if (NULL!=BUCFMAD)
+   {
+     free (BUCFMAD);
+     BUCFMAD = NULL;
+   }
+   if (NULL!=FCBUCFMAD)
+   {
+     free (FCBUCFMAD);
+     FCBUCFMAD = NULL;
+   }
+   if (NULL!=FCBUPFMAD)
+   {
+     free (FCBUPFMAD);
+     FCBUPFMAD = NULL;
+   }
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Initialize rate control parameters
+  *
+  *************************************************************************************
+ */
+ void rc_init_seq()
+ {
+   double L1,L2,L3,bpp;
+   int qp;
+   int i;
+   
+   Xp=0;
+   Xb=0;
+    
+   bit_rate=input->bit_rate;
+   frame_rate = (img->framerate *(float)(input->successive_Bframe + 1)) / (float) (input->jumpd + 1);
+   PreviousBit_Rate=bit_rate;
+    
+   /*compute the total number of MBs in a frame*/
+   
+   img->Frame_Total_Number_MB=img->height*img->width/256;
+   if(input->basicunit>img->Frame_Total_Number_MB)
+     input->basicunit=img->Frame_Total_Number_MB;
+   if(input->basicunit<img->Frame_Total_Number_MB)
+     TotalNumberofBasicUnit=img->Frame_Total_Number_MB/input->basicunit;
+   
+   MINVALUE=4.0;
+   /*initialize the parameters of fluid flow traffic model*/
+   
+   BufferSize=bit_rate*2.56;
+   CurrentBufferFullness=0;
+   GOPTargetBufferLevel=CurrentBufferFullness;
+   /*HRD consideration*/
+   InitialDelayOffset=BufferSize*0.8;
+   
+   /*initialize the previous window size*/
+   m_windowSize=0;
+   MADm_windowSize=0;
+   img->NumberofCodedBFrame=0;
+   img->NumberofCodedPFrame=0;
+   img->NumberofGOP=0;
+   /*remaining # of bits in GOP */
+   R = 0;
+   /*control parameter */
+   if(input->successive_Bframe>0)
+   {
+     GAMMAP=0.25;
+     BETAP=0.9;
+   }
+   else
+   {
+     GAMMAP=0.5;
+     BETAP=0.5;
+   }
+   
+   /*quadratic rate-distortion model*/
+   PPreHeader=0;
+   
+   Pm_X1=bit_rate*1.0;
+   Pm_X2=0.0;
+   /* linear prediction model for P picture*/
+   PMADPictureC1=1.0;
+   PMADPictureC2=0.0;
+   
+   for(i=0;i<20;i++)
+   {
+     Pm_rgQp[i]=0;
+     Pm_rgRp[i]=0.0;
+     PPictureMAD[i]=0.0;
+   }
+   PPictureMAD[20]=0.0;
+    
+   //Define the largest variation of quantization parameters
+   PDuantQp=2;
+   
+   /*basic unit layer rate control*/
+   PAveHeaderBits1=0;
+   PAveHeaderBits3=0;  
+   if(TotalNumberofBasicUnit>=9)
+     DDquant=1;
+   else
+     DDquant=2;
+   
+   MBPerRow=img->width/16;
+   
+   /*adaptive field/frame coding*/
+   img->FieldControl=0;
+   
+   RC_MAX_QUANT = 51;  // clipping
+   RC_MIN_QUANT = 0;//clipping
+   
+   /*compute the initial QP*/
+   bpp = 1.0*bit_rate /(frame_rate*img->width*img->height);
+   if (img->width == 176) 
+   {
+     L1 = 0.1;
+     L2 = 0.3;
+     L3 = 0.6;
+   }else if (img->width == 352)
+   {
+     L1 = 0.2;
+     L2 = 0.6;
+     L3 = 1.2;
+   }else 
+   {
+     L1 = 0.6;
+     L2 = 1.4;
+     L3 = 2.4;
+   }
+   
+   if (input->SeinitialQP==0)
+   {
+     if(bpp<= L1)
+       qp = 35;
+     else
+       if(bpp<=L2)
+         qp = 25;
+       else
+         if(bpp<=L3)
+           qp  = 20;
+         else
+           qp =10;
+     input->SeinitialQP = qp;
+   }
+ }
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Initialize one GOP
+  *
+  *************************************************************************************
+ */
+ void rc_init_GOP(int np, int nb)
+ {
+   Boolean Overum=FALSE;
+   int OverBits;
+   int OverDuantQp;
+   int AllocatedBits;
+   int GOPDquant;
+ 
+   /* check if the last GOP over uses its budget. If yes, the initial QP of the I frame in 
+      the coming  GOP will be increased.*/
+ 
+   if(R<0)
+     Overum=TRUE;
+   OverBits=-R;
+ 
+   /*initialize the lower bound and the upper bound for the target bits of each frame, HRD consideration*/
+   LowerBound=(long)(R+bit_rate/frame_rate);
+   UpperBound1=(long)(R+InitialDelayOffset);
+ 
+  /*compute the total number of bits for the current GOP*/ 
+   AllocatedBits = (int) floor((1 + np + nb) * bit_rate / frame_rate + 0.5);
+   R +=AllocatedBits;
+   Np  = np;
+   Nb  = nb;
+ 
+   OverDuantQp=(int)(8*OverBits/AllocatedBits+0.5);
+   GOPOverdue=FALSE;
+   
+   /*field coding*/
+   img->IFLAG=1;
+ 
+   /*Compute InitialQp for each GOP*/
+   TotalPFrame=np;
+   img->NumberofGOP++;
+   if(img->NumberofGOP==1)
+   {
+     MyInitialQp=input->SeinitialQP;
+     PreviousQp2=MyInitialQp-1; //recent change -0;
+     QPLastGOP=MyInitialQp;
+   
+   }
+   else
+   {
+     /*adaptive field/frame coding*/
+     if((input->PicInterlace==ADAPTIVE_CODING)\
+       ||(input->MbInterlace))
+     {
+       if (img->FieldFrame == 1)
+       {
+         img->TotalQpforPPicture += FrameQPBuffer;
+         QPLastPFrame = FrameQPBuffer;
+       }
+       else
+       {
+         img->TotalQpforPPicture += FieldQPBuffer;
+         QPLastPFrame = FieldQPBuffer;
+       }
+       
+     }
+     /*compute the average QP of P frames in the previous GOP*/
+     PAverageQp=(int)(1.0*img->TotalQpforPPicture/img->NumberofPPicture+0.5);
+ 
+     GOPDquant=(int)(0.5+1.0*(np+nb+1)/15);
+     if(GOPDquant>2)
+         GOPDquant=2;
+ 
+     PAverageQp-=GOPDquant;
+ 
+     if (PAverageQp > (QPLastPFrame - 2))
+       PAverageQp--;
+     PAverageQp = MAX(QPLastGOP-2,  PAverageQp);
+     PAverageQp = MIN(QPLastGOP+2, PAverageQp);
+     PAverageQp = MIN(RC_MAX_QUANT, PAverageQp);
+     PAverageQp = MAX(RC_MIN_QUANT, PAverageQp);
+   
+ 
+     MyInitialQp=PAverageQp;
+     QPLastGOP = MyInitialQp;
+     Pm_Qp=PAverageQp;
+     PAveFrameQP=PAverageQp;
+     PreviousQp1=PreviousQp2;
+     PreviousQp2=MyInitialQp-1;  
+   }
+ 
+   img->TotalQpforPPicture=0;
+   img->NumberofPPicture=0;
+   NumberofBFrames=0; 
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Initialize one picture
+  *
+  *************************************************************************************
+ */
+ void rc_init_pict(int fieldpic,int topfield,int targetcomputation)
+ {
+   int i;
+ 
+   /* compute the total number of basic units in a frame */
+   if(input->MbInterlace)
+     TotalNumberofBasicUnit=img->Frame_Total_Number_MB/img->BasicUnit;
+   img->NumberofCodedMacroBlocks=0;
+ 
+   /* Normally, the bandwidth for the VBR case is estimated by 
+      a congestion control algorithm. A bandwidth curve can be predefined if we only want to 
+      test the proposed algorithm */
+   if(input->channel_type==1)
+   {
+     if(img->NumberofCodedPFrame==58)
+       bit_rate *=1.5;
+     else if(img->NumberofCodedPFrame==59)
+       PreviousBit_Rate=bit_rate;
+   }
+ 
+   /* predefine a target buffer level for each frame */
+   if((fieldpic||topfield)&&targetcomputation)
+   {
+     switch (img->type)
+     {
+       case P_SLICE:
+       /* Since the available bandwidth may vary at any time, the total number of 
+          bits is updated picture by picture*/
+         if(PreviousBit_Rate!=bit_rate)
+           R +=(int) floor((bit_rate-PreviousBit_Rate)*(Np+Nb)/frame_rate+0.5);
+               
+         /* predefine the  target buffer level for each picture.
+            frame layer rate control */
+         if(img->BasicUnit==img->Frame_Total_Number_MB)
+         {
+           if(img->NumberofPPicture==1)
+           {
+             TargetBufferLevel=CurrentBufferFullness;
+             DeltaP=(CurrentBufferFullness-GOPTargetBufferLevel)/(TotalPFrame-1);
+             TargetBufferLevel -=DeltaP;
+           }
+           else if(img->NumberofPPicture>1)
+             TargetBufferLevel -=DeltaP;
+         }
+         /* basic unit layer rate control */
+         else
+         {
+           if(img->NumberofCodedPFrame>0)
+           {
+             /* adaptive frame/filed coding */
+             if(((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))\
+               &&(img->FieldControl==1))
+             {
+               for(i=0;i<TotalNumberofBasicUnit;i++)
+                 FCBUPFMAD[i]=FCBUCFMAD[i];
+             }
+             else
+             {
+               for(i=0;i<TotalNumberofBasicUnit;i++)
+                 BUPFMAD[i]=BUCFMAD[i];
+             }     
+           }
+ 
+           if(img->NumberofGOP==1)
+           {
+             if(img->NumberofPPicture==1)
+             {
+               TargetBufferLevel=CurrentBufferFullness;
+               DeltaP=(CurrentBufferFullness-GOPTargetBufferLevel)/(TotalPFrame-1);
+               TargetBufferLevel -=DeltaP;
+             }
+             else if(img->NumberofPPicture>1)
+               TargetBufferLevel -=DeltaP;
+           }
+           else if(img->NumberofGOP>1)
+           {
+             if(img->NumberofPPicture==0)
+             {
+               TargetBufferLevel=CurrentBufferFullness;
+               DeltaP=(CurrentBufferFullness-GOPTargetBufferLevel)/TotalPFrame;
+               TargetBufferLevel -=DeltaP;
+             }
+             else if(img->NumberofPPicture>0)
+               TargetBufferLevel -=DeltaP;
+           }
+         }
+ 
+         if(img->NumberofCodedPFrame==1)
+           AWp=Wp;
+         if((img->NumberofCodedPFrame<8)&&(img->NumberofCodedPFrame>1))
+             AWp=Wp*(img->NumberofCodedPFrame-1)/img->NumberofCodedPFrame+\
+               AWp/img->NumberofCodedPFrame;
+           else if(img->NumberofCodedPFrame>1)
+             AWp=Wp/8+7*AWp/8;
+           
+         // compute the average complexity of B frames
+         if(input->successive_Bframe>0)
+         {
+           // compute the target buffer level
+           TargetBufferLevel +=(AWp*(input->successive_Bframe+1)*bit_rate\
+             /(frame_rate*(AWp+AWb*input->successive_Bframe))-bit_rate/frame_rate);
+         }
+         
+         break;
+ 
+          case B_SLICE:
+          /* update the total number of bits if the bandwidth is changed*/
+            if(PreviousBit_Rate!=bit_rate)
+              R +=(int) floor((bit_rate-PreviousBit_Rate)*(Np+Nb)/frame_rate+0.5);
+             if((img->NumberofCodedPFrame==1)&&(img->NumberofCodedBFrame==1))
+           {
+             AWp=Wp;
+             AWb=Wb;
+           }
+           else if(img->NumberofCodedBFrame>1)
+           {
+             //compute the average weight
+             if(img->NumberofCodedBFrame<8)
+               AWb=Wb*(img->NumberofCodedBFrame-1)/img->NumberofCodedBFrame+\
+                 AWb/img->NumberofCodedBFrame;
+             else
+               AWb=Wb/8+7*AWb/8;
+           }
+ 
+             break;
+     }
+      /* Compute the target bit for each frame */
+     if(img->type==P_SLICE)
+     {
+       /* frame layer rate control */
+       if(img->BasicUnit==img->Frame_Total_Number_MB)
+       {
+         if(img->NumberofCodedPFrame>0)
+         {
+           T = (long) floor(Wp*R/(Np*Wp+Nb*Wb) + 0.5);
+                 
+           T1 = (long) floor(bit_rate/frame_rate-GAMMAP*(CurrentBufferFullness-TargetBufferLevel)+0.5);
+           T1=MAX(0,T1);
+           T = (long)(floor(BETAP*T+(1.0-BETAP)*T1+0.5));
+         }
+        }
+       /* basic unit layer rate control */
+       else
+       {
+         if((img->NumberofGOP==1)&&(img->NumberofCodedPFrame>0))
+         {
+           T = (int) floor(Wp*R/(Np*Wp+Nb*Wb) + 0.5);
+           T1 = (int) floor(bit_rate/frame_rate-GAMMAP*(CurrentBufferFullness-TargetBufferLevel)+0.5);
+           T1=MAX(0,T1);
+           T = (int)(floor(BETAP*T+(1.0-BETAP)*T1+0.5));
+         }
+         else if(img->NumberofGOP>1)
+         {
+           T = (long) floor(Wp*R/(Np*Wp+Nb*Wb) + 0.5);
+           T1 = (long) floor(bit_rate/frame_rate-GAMMAP*(CurrentBufferFullness-TargetBufferLevel)+0.5);
+           T1 = MAX(0,T1);
+           T = (long)(floor(BETAP*T+(1.0-BETAP)*T1+0.5));
+         }
+       }
+ 
+       /* reserve some bits for smoothing */
+ 
+       T=(long)((1.0-0.0*input->successive_Bframe)*T);
+       /* HRD consideration */
+       T = MAX(T, (long) LowerBound);
+         T = MIN(T, (long) UpperBound2);
+ 
+       if((topfield)||(fieldpic&&((input->PicInterlace==ADAPTIVE_CODING)\
+         ||(input->MbInterlace))))
+         T_field=T;
+     }
+   }
+ 
+   if(fieldpic||topfield)
+   {
+     /* frame layer rate control */
+     img->NumberofHeaderBits=0;
+     img->NumberofTextureBits=0;
+ 
+     /* basic unit layer rate control */
+     if(img->BasicUnit<img->Frame_Total_Number_MB)
+     {
+       TotalFrameQP=0;
+       img->NumberofBasicUnitHeaderBits=0;
+       img->NumberofBasicUnitTextureBits=0;
+       img->TotalMADBasicUnit=0;
+       if(img->FieldControl==0)
+         NumberofBasicUnit=TotalNumberofBasicUnit;
+       else
+         NumberofBasicUnit=TotalNumberofBasicUnit/2;
+     }
+   }
+     
+   if((img->type==P_SLICE)&&(img->BasicUnit<img->Frame_Total_Number_MB)\
+     &&(img->FieldControl==1))
+   {
+   /* top filed at basic unit layer rate control */
+     if(topfield)
+     {
+       bits_topfield=0;
+       T=(long)(T_field*0.6);
+     }
+   /* bottom filed at basic unit layer rate control */
+     else
+     {
+       T=T_field-bits_topfield;
+       img->NumberofBasicUnitHeaderBits=0;
+       img->NumberofBasicUnitTextureBits=0;
+       img->TotalMADBasicUnit=0;
+       NumberofBasicUnit=TotalNumberofBasicUnit/2;
+     }
+   }
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    calculate MAD for the current macroblock 
+  *
+  * \return
+  *    calculated MAD
+  *
+  *************************************************************************************
+ */
+ double calc_MAD()
+ {
+   int k,l;
+     int s = 0;
+   double MAD;
+ 
+   for (k = 0; k < 16; k++)
+     for (l = 0; l < 16; l++)
+       s+= abs(diffy[k][l]);
+   
+   MAD=s*1.0/256;
+   return MAD;
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    update one picture after frame/field encoding
+  *
+  * \param nbits
+  *    number of bits used for picture
+  *
+  *************************************************************************************
+ */
+ void rc_update_pict(int nbits)
+ {
+   R-= nbits; /* remaining # of bits in GOP */
+   CurrentBufferFullness += nbits - bit_rate/frame_rate;
+ 
+   /*update the lower bound and the upper bound for the target bits of each frame, HRD consideration*/
+   LowerBound  +=(long)(bit_rate/frame_rate-nbits);
+   UpperBound1 +=(long)(bit_rate/frame_rate-nbits);
+   UpperBound2 = (long)(OMEGA*UpperBound1);
+   
+   return;
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    update after frame encoding
+  *
+  * \param nbits
+  *    number of bits used for frame
+  *
+  *************************************************************************************
+ */
+ void rc_update_pict_frame(int nbits)
+ {
+ 
+   /* update the complexity weight of I, P, B frame */
+   int Avem_Qc;
+   int X=0;
+     
+   /* frame layer rate control */
+   if(img->BasicUnit==img->Frame_Total_Number_MB)
+     X = (int) floor(nbits*m_Qc+ 0.5);
+   /* basic unit layer rate control */
+   else
+   {
+     if(img->type==P_SLICE)
+     {
+       if(((img->IFLAG==0)&&(img->FieldControl==1))\
+         ||(img->FieldControl==0))
+       {
+         Avem_Qc=TotalFrameQP/TotalNumberofBasicUnit;
+         X=(int)floor(nbits*Avem_Qc+0.5);
+       }
+     }
+     else if(img->type==B_SLICE)
+       X = (int) floor(nbits*m_Qc+ 0.5);
+   }
+ 
+ 
+   switch (img->type)
+   {
+   case P_SLICE:
+     /* field coding */
+     if(((img->IFLAG==0)&&(img->FieldControl==1))\
+       ||(img->FieldControl==0))
+     {
+       Xp = X;
+       Np--;
+       Wp=Xp;
+       Pm_Hp=img->NumberofHeaderBits;
+       img->NumberofCodedPFrame++;
+       img->NumberofPPicture++;
+     }
+     else if((img->IFLAG!=0)&&(img->FieldControl==1))
+       img->IFLAG=0;
+     break;
+   case B_SLICE:
+     Xb = X;
+     Nb--;
+     Wb=Xb/THETA; 
+     
+     img->NumberofCodedBFrame++;
+     NumberofBFrames++;
+     
+     break;
+   }
+ }
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    coded bits for top field
+  *
+  * \param nbits
+  *    number of bits used for top field
+  *
+  *************************************************************************************
+ */
+ void setbitscount(int nbits)
+ {
+   bits_topfield = nbits;
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    compute a  quantization parameter for each frame
+  *
+  *************************************************************************************
+ */
+ int updateQuantizationParameter(int topfield)
+ {
+   double dtmp;
+   int m_Bits;
+   int BFrameNumber;
+   int StepSize;
+   int PAverageQP;
+   int SumofBasicUnit;
+   int i;
+   
+   /* frame layer rate control */
+   if(img->BasicUnit==img->Frame_Total_Number_MB)
+   {
+     /* fixed quantization parameter is used to coded I frame, the first P frame and the first B frame
+     the quantization parameter is adjusted according the available channel bandwidth and 
+     the type of vide */  
+     /*top field*/
+     if((topfield)||(img->FieldControl==0))
+     {
+       if(img->type==I_SLICE)
+       {
+         m_Qc=MyInitialQp;
+         return m_Qc;
+       }
+       else if(img->type==B_SLICE)
+       {
+         if(input->successive_Bframe==1)
+         {
+             if((input->PicInterlace==ADAPTIVE_CODING)\
+               ||(input->MbInterlace))
+             {
+               if(img->FieldControl==0)
+               {                   
+                 /*previous choice is frame coding*/
+                 if(img->FieldFrame==1)
+                 {
+                   PreviousQp1=PreviousQp2;
+                   PreviousQp2=FrameQPBuffer;
+                 }           
+                 /*previous choice is field coding*/
+                 else
+                 {
+                   PreviousQp1=PreviousQp2;
+                   PreviousQp2=FieldQPBuffer;
+                 }
+               }
+             }
+ /*
+           if(PreviousQp1==PreviousQp2)
+             m_Qc=PreviousQp1+2;
+           else
+             m_Qc=(PreviousQp1+PreviousQp2)/2+1;
+             */
+           m_Qc = max(max(min(PreviousQp1,PreviousQp2) + 2, max(PreviousQp1,PreviousQp2)), PreviousQp2 + 1);
+ 
+           m_Qc = MIN(m_Qc, RC_MAX_QUANT); // clipping
+           m_Qc = MAX(RC_MIN_QUANT, m_Qc);//clipping
+         }
+         else
+         {
+           BFrameNumber=(NumberofBFrames+1)%input->successive_Bframe;
+           if(BFrameNumber==0)
+             BFrameNumber=input->successive_Bframe;
+           
+           /*adaptive field/frame coding*/
+           if(BFrameNumber==1)
+           {
+             if((input->PicInterlace==ADAPTIVE_CODING)\
+               ||(input->MbInterlace))
+             {
+               if(img->FieldControl==0)
+               {
+                 /*previous choice is frame coding*/
+                 if(img->FieldFrame==1)
+                 {
+                   PreviousQp1=PreviousQp2;
+                   PreviousQp2=FrameQPBuffer;
+                 }
+                 /*previous choice is field coding*/
+                 else
+                 {
+                   PreviousQp1=PreviousQp2;
+                   PreviousQp2=FieldQPBuffer;
+                 }
+               }
+             }
+           }
+           
+           if((PreviousQp2-PreviousQp1)<=(-2*input->successive_Bframe-3))
+             StepSize=-3;
+           else  if((PreviousQp2-PreviousQp1)==(-2*input->successive_Bframe-2))
+             StepSize=-2;
+           else if((PreviousQp2-PreviousQp1)==(-2*input->successive_Bframe-1))
+             StepSize=-1;
+           else if((PreviousQp2-PreviousQp1)==(-2*input->successive_Bframe))
+             StepSize=0;
+           else if((PreviousQp2-PreviousQp1)==(-2*input->successive_Bframe+1))
+             StepSize=1;
+           else
+             StepSize=2;
+           
+           m_Qc=PreviousQp1+StepSize;
+           m_Qc +=MIN(2*(BFrameNumber-1),MAX(-2*(BFrameNumber-1), \
+             (BFrameNumber-1)*(PreviousQp2-PreviousQp1)/(input->successive_Bframe-1)));
+           m_Qc = MIN(m_Qc, RC_MAX_QUANT); // clipping
+           m_Qc = MAX(RC_MIN_QUANT, m_Qc);//clipping
+         }
+         return m_Qc;
+       }
+       else if((img->type==P_SLICE)&&(img->NumberofPPicture==0))
+       {
+         m_Qc=MyInitialQp;
+         
+         if(img->FieldControl==0)
+         {
+           if(active_sps->frame_mbs_only_flag)
+           {
+             img->TotalQpforPPicture +=m_Qc;
+             PreviousQp1=PreviousQp2;
+             PreviousQp2=m_Qc;
+             Pm_Qp=m_Qc;
+           }
+           /*adaptive field/frame coding*/
+           else
+             FrameQPBuffer=m_Qc;
+         }
+         
+         return m_Qc;  
+       }
+       else
+       {
+         /*adaptive field/frame coding*/
+         if(((input->PicInterlace==ADAPTIVE_CODING)\
+           ||(input->MbInterlace))\
+           &&(img->FieldControl==0))
+         {
+           /*previous choice is frame coding*/
+           if(img->FieldFrame==1)
+           {
+             img->TotalQpforPPicture +=FrameQPBuffer;
+             Pm_Qp=FrameQPBuffer;
+           }
+           /*previous choice is field coding*/
+           else
+           {
+             img->TotalQpforPPicture +=FieldQPBuffer;
+             Pm_Qp=FieldQPBuffer;
+           }
+         }
+         
+         m_X1=Pm_X1;
+         m_X2=Pm_X2;
+         m_Hp=PPreHeader;
+         m_Qp=Pm_Qp;
+         DuantQp=PDuantQp;
+         MADPictureC1=PMADPictureC1;
+         MADPictureC2=PMADPictureC2;
+         PreviousPictureMAD=PPictureMAD[0];
+         
+         /* predict the MAD of current picture*/
+         CurrentFrameMAD=MADPictureC1*PreviousPictureMAD+MADPictureC2;
+         
+         /*compute the number of bits for the texture*/      
+         
+         if(T<0)
+         {
+           m_Qc=m_Qp+DuantQp;
+           m_Qc = MIN(m_Qc, RC_MAX_QUANT); // clipping
+         }
+         else
+         {
+           m_Bits =T-m_Hp;
+           m_Bits = MAX(m_Bits, (int)(bit_rate/(MINVALUE*frame_rate)));
+           dtmp = CurrentFrameMAD * m_X1 * CurrentFrameMAD * m_X1 \
+             + 4 * m_X2 * CurrentFrameMAD * m_Bits;
+           if ((m_X2 == 0.0) || (dtmp < 0) || ((sqrt (dtmp) - m_X1 * CurrentFrameMAD) <= 0.0)) // fall back 1st order mode
+             m_Qstep = (float) (m_X1 * CurrentFrameMAD / (double) m_Bits);
+           else // 2nd order mode
+             m_Qstep = (float) ((2 * m_X2 * CurrentFrameMAD) / (sqrt (dtmp) - m_X1 * CurrentFrameMAD));
+           
+           m_Qc=Qstep2QP(m_Qstep);
+           
+           m_Qc = MIN(m_Qp+DuantQp,  m_Qc);  // control variation
+           m_Qc = MIN(m_Qc, RC_MAX_QUANT); // clipping
+           m_Qc = MAX(m_Qp-DuantQp, m_Qc); // control variation
+           m_Qc = MAX(RC_MIN_QUANT, m_Qc);
+         }
+         
+         if(img->FieldControl==0)
+         {
+           /*frame coding*/
+           if(active_sps->frame_mbs_only_flag)
+           {
+             img->TotalQpforPPicture +=m_Qc;
+             PreviousQp1=PreviousQp2;
+             PreviousQp2=m_Qc;
+             Pm_Qp=m_Qc;
+           }
+           /*adaptive field/frame coding*/
+           else
+             FrameQPBuffer=m_Qc;
+         }
+         
+         return m_Qc;
+       }
+    }
+    /*bottom field*/
+    else
+    {
+      if((img->type==P_SLICE)&&(img->IFLAG==0))
+      {
+        /*field coding*/
+        if(input->PicInterlace==FIELD_CODING)
+        {
+          img->TotalQpforPPicture +=m_Qc;
+          PreviousQp1=PreviousQp2+1; 
+          PreviousQp2=m_Qc;//+0 Recent change 13/1/2003
+          Pm_Qp=m_Qc;
+        }
+        /*adaptive field/frame coding*/
+        else
+          FieldQPBuffer=m_Qc;     
+      }
+      return m_Qc;
+    }
+   }
+   /*basic unit layer rate control*/
+   else
+   {
+     /*top filed of I frame*/
+     if(img->type==I_SLICE)
+     {
+       m_Qc=MyInitialQp;
+       return m_Qc;
+     }
+     /*bottom field of I frame*/
+     else if((img->type==P_SLICE)&&(img->IFLAG==1)&&(img->FieldControl==1))
+     {
+       m_Qc=MyInitialQp;
+       return m_Qc;
+     }
+     else if(img->type==B_SLICE)
+     {
+       /*top filed of B frame*/
+       if((topfield)||(img->FieldControl==0))
+       {
+         if(input->successive_Bframe==1)
+         {
+          /*adaptive field/frame coding*/
+           if((input->PicInterlace==ADAPTIVE_CODING)\
+               ||(input->MbInterlace))
+             {
+               if(img->FieldControl==0)
+               {             
+                 /*previous choice is frame coding*/
+                 if(img->FieldFrame==1)
+                 {
+                   PreviousQp1=PreviousQp2;
+                   PreviousQp2=FrameQPBuffer;
+                 }
+                 /*previous choice is field coding*/
+                 else
+                 {
+                   PreviousQp1=PreviousQp2;
+                   PreviousQp2=FieldQPBuffer;
+                 }
+               }
+             }
+ 
+           if(PreviousQp1==PreviousQp2)
+             m_Qc=PreviousQp1+2;
+           else
+             m_Qc=(PreviousQp1+PreviousQp2)/2+1;
+           m_Qc = MIN(m_Qc, RC_MAX_QUANT); // clipping
+           m_Qc = MAX(RC_MIN_QUANT, m_Qc);//clipping
+         }
+         else
+         {
+           BFrameNumber=(NumberofBFrames+1)%input->successive_Bframe;
+           if(BFrameNumber==0)
+             BFrameNumber=input->successive_Bframe;
+           
+           /*adaptive field/frame coding*/
+           if(BFrameNumber==1)
+           {
+             if((input->PicInterlace==ADAPTIVE_CODING)\
+               ||(input->MbInterlace))
+             {
+               if(img->FieldControl==0)
+               {
+                 /*previous choice is frame coding*/
+                 if(img->FieldFrame==1)
+                 {
+                   PreviousQp1=PreviousQp2;
+                   PreviousQp2=FrameQPBuffer;
+                 }
+                 /*previous choice is field coding*/
+                 else
+                 {
+                   PreviousQp1=PreviousQp2;
+                   PreviousQp2=FieldQPBuffer;
+                 }
+               } 
+             }
+           }
+           
+           if((PreviousQp2-PreviousQp1)<=(-2*input->successive_Bframe-3))
+             StepSize=-3;
+           else  if((PreviousQp2-PreviousQp1)==(-2*input->successive_Bframe-2))
+             StepSize=-2;
+           else if((PreviousQp2-PreviousQp1)==(-2*input->successive_Bframe-1))
+             StepSize=-1;
+           else if((PreviousQp2-PreviousQp1)==(-2*input->successive_Bframe))
+             StepSize=0;//0
+           else if((PreviousQp2-PreviousQp1)==(-2*input->successive_Bframe+1))
+             StepSize=1;//1
+           else
+             StepSize=2;//2
+           m_Qc=PreviousQp1+StepSize;
+           m_Qc +=MIN(2*(BFrameNumber-1),MAX(-2*(BFrameNumber-1), \
+             (BFrameNumber-1)*(PreviousQp2-PreviousQp1)/(input->successive_Bframe-1)));
+           m_Qc = MIN(m_Qc, RC_MAX_QUANT); // clipping
+           m_Qc = MAX(RC_MIN_QUANT, m_Qc);//clipping
+         }
+         return m_Qc;
+       }
+       /*bottom field of B frame*/
+       else
+         return m_Qc;
+     }
+     else if(img->type==P_SLICE)
+     {
+       if((img->NumberofGOP==1)&&(img->NumberofPPicture==0))
+       {
+         if((img->FieldControl==0)||((img->FieldControl==1)\
+           &&(img->IFLAG==0)))
+         {
+           /*top field of the first P frame*/
+           m_Qc=MyInitialQp;
+           img->NumberofBasicUnitHeaderBits=0;
+           img->NumberofBasicUnitTextureBits=0;
+           NumberofBasicUnit--;
+           /*bottom field of the first P frame*/
+           if((!topfield)&&(NumberofBasicUnit==0))
+           {
+             /*frame coding or field coding*/
+             if((active_sps->frame_mbs_only_flag)||(input->PicInterlace==FIELD_CODING))
+             {
+               img->TotalQpforPPicture +=m_Qc;
+               PreviousQp1=PreviousQp2;
+               PreviousQp2=m_Qc;
+               PAveFrameQP=m_Qc;
+               PAveHeaderBits3=PAveHeaderBits2;
+             }
+             /*adaptive frame/field coding*/
+             else if((input->PicInterlace==ADAPTIVE_CODING)\
+               ||(input->MbInterlace))
+             {
+               if(img->FieldControl==0)
+               {
+                 FrameQPBuffer=m_Qc;
+                 FrameAveHeaderBits=PAveHeaderBits2;
+               }
+               else
+               {
+                 FieldQPBuffer=m_Qc;
+                 FieldAveHeaderBits=PAveHeaderBits2;
+               }
+             }
+           }
+           Pm_Qp=m_Qc;
+           TotalFrameQP +=m_Qc;
+           return m_Qc;
+         }
+       }
+       else
+       {
+         m_X1=Pm_X1;
+         m_X2=Pm_X2;
+         m_Hp=PPreHeader;
+         m_Qp=Pm_Qp;
+         DuantQp=PDuantQp;
+         MADPictureC1=PMADPictureC1;
+         MADPictureC2=PMADPictureC2;
+ 
+         if(img->FieldControl==0)
+           SumofBasicUnit=TotalNumberofBasicUnit;
+         else
+           SumofBasicUnit=TotalNumberofBasicUnit/2;
+ 
+         /*the average QP of the previous frame is used to coded the first basic unit of the current frame or field*/
+         if(NumberofBasicUnit==SumofBasicUnit)
+         {
+ 
+           /*adaptive field/frame coding*/
+           if(((input->PicInterlace==ADAPTIVE_CODING)\
+             ||(input->MbInterlace))\
+             &&(img->FieldControl==0))
+           {
+             /*previous choice is frame coding*/
+             if(img->FieldFrame==1)
+             {
+               if(img->NumberofPPicture>0)
+                 img->TotalQpforPPicture +=FrameQPBuffer;
+               PAveFrameQP=FrameQPBuffer;
+               PAveHeaderBits3=FrameAveHeaderBits;
+             }       
+             /*previous choice is field coding*/
+             else
+             {
+               if(img->NumberofPPicture>0)
+                 img->TotalQpforPPicture +=FieldQPBuffer;
+               PAveFrameQP=FieldQPBuffer;
+               PAveHeaderBits3=FieldAveHeaderBits;
+             }
+           }
+ 
+           if(T<=0)
+           {
+             m_Qc=PAveFrameQP+2;
+             if(m_Qc>RC_MAX_QUANT)
+               m_Qc=RC_MAX_QUANT;
+             if(topfield||(img->FieldControl==0))
+               GOPOverdue=TRUE;
+           }
+           else
+           {
+             m_Qc=PAveFrameQP; 
+           }
+           TotalFrameQP +=m_Qc;
+           NumberofBasicUnit--;
+           Pm_Qp=PAveFrameQP;
+           return m_Qc;
+         }else
+         {
+           /*compute the number of remaining bits*/
+           TotalBasicUnitBits=img->NumberofBasicUnitHeaderBits+img->NumberofBasicUnitTextureBits;
+           T -=TotalBasicUnitBits;
+           img->NumberofBasicUnitHeaderBits=0;
+           img->NumberofBasicUnitTextureBits=0;
+           if(T<0)
+           {
+             if(GOPOverdue==TRUE)
+               m_Qc=m_Qp+2;
+             else 
+               m_Qc=m_Qp+DDquant;//2 
+             m_Qc = MIN(m_Qc, RC_MAX_QUANT);  // clipping
+             if(input->basicunit>=MBPerRow)
+               m_Qc = MIN(m_Qc, PAveFrameQP+6); 
+             else
+               m_Qc = MIN(m_Qc, PAveFrameQP+3);
+             
+             TotalFrameQP +=m_Qc;
+             NumberofBasicUnit--;
+             if(NumberofBasicUnit==0)
+             {
+               if((!topfield)||(img->FieldControl==0))
+               {
+                 /*frame coding or field coding*/
+                 if((active_sps->frame_mbs_only_flag)||(input->PicInterlace==FIELD_CODING))
+                 {
+                   PAverageQP=(int)(1.0*TotalFrameQP/TotalNumberofBasicUnit+0.5);
+                   if (img->NumberofPPicture == (input->intra_period - 2))
+                     QPLastPFrame = PAverageQP;
+                   
+                   img->TotalQpforPPicture +=PAverageQP;
+                   if(GOPOverdue==TRUE)
+                   {
+                     PreviousQp1=PreviousQp2+1;
+                     PreviousQp2=PAverageQP;                   
+                   }
+                   else
+                   {
+                     if((img->NumberofPPicture==0)&&(img->NumberofGOP>1))
+                     {
+                       PreviousQp1=PreviousQp2;
+                       PreviousQp2=PAverageQP;
+                     }
+                     else if(img->NumberofPPicture>0)
+                     {
+                       PreviousQp1=PreviousQp2+1;
+                       PreviousQp2=PAverageQP;
+                     }
+                   }
+                   PAveFrameQP=PAverageQP;
+                   PAveHeaderBits3=PAveHeaderBits2;
+                 }
+                 /*adaptive field/frame coding*/
+                 else if((input->PicInterlace==ADAPTIVE_CODING)\
+                   ||(input->MbInterlace))
+                 {
+                   if(img->FieldControl==0)
+                   {
+                     PAverageQP=(int)(1.0*TotalFrameQP/TotalNumberofBasicUnit+0.5);
+                     FrameQPBuffer=PAverageQP;
+                     FrameAveHeaderBits=PAveHeaderBits2;
+                   }
+                   else
+                   {
+                     PAverageQP=(int)(1.0*TotalFrameQP/TotalNumberofBasicUnit+0.5);
+                     FieldQPBuffer=PAverageQP;
+                     FieldAveHeaderBits=PAveHeaderBits2;
+                   }
+                 }
+               }
+             }
+             if(GOPOverdue==TRUE)
+               Pm_Qp=PAveFrameQP;
+             else
+               Pm_Qp=m_Qc;
+             return m_Qc;
+           }
+           else
+           {
+             /*predict the MAD of current picture*/
+             if(((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))\
+               &&(img->FieldControl==1))
+             {
+               CurrentFrameMAD=MADPictureC1*FCBUPFMAD[TotalNumberofBasicUnit-NumberofBasicUnit]+MADPictureC2;
+               TotalBUMAD=0;
+               for(i=TotalNumberofBasicUnit-1; i>=(TotalNumberofBasicUnit-NumberofBasicUnit);i--)
+               {
+                 CurrentBUMAD=MADPictureC1*FCBUPFMAD[i]+MADPictureC2;
+                 TotalBUMAD +=CurrentBUMAD*CurrentBUMAD;
+               }
+             }
+             else
+             {
+               CurrentFrameMAD=MADPictureC1*BUPFMAD[TotalNumberofBasicUnit-NumberofBasicUnit]+MADPictureC2;
+               TotalBUMAD=0;
+               for(i=TotalNumberofBasicUnit-1; i>=(TotalNumberofBasicUnit-NumberofBasicUnit);i--)
+               {
+                 CurrentBUMAD=MADPictureC1*BUPFMAD[i]+MADPictureC2;
+                 TotalBUMAD +=CurrentBUMAD*CurrentBUMAD;
+               }
+             }
+             
+             /*compute the total number of bits for the current basic unit*/
+             m_Bits =(int)(T*CurrentFrameMAD*CurrentFrameMAD/TotalBUMAD);
+             /*compute the number of texture bits*/
+             m_Bits -=PAveHeaderBits2;
+             
+             m_Bits=MAX(m_Bits,(int)(bit_rate/(MINVALUE*frame_rate*TotalNumberofBasicUnit)));
+             
+             dtmp = CurrentFrameMAD * m_X1 * CurrentFrameMAD * m_X1 \
+               + 4 * m_X2 * CurrentFrameMAD * m_Bits;
+             if ((m_X2 == 0.0) || (dtmp < 0) || ((sqrt (dtmp) - m_X1 * CurrentFrameMAD) <= 0.0))  // fall back 1st order mode
+               m_Qstep = (float)(m_X1 * CurrentFrameMAD / (double) m_Bits);
+             else // 2nd order mode
+               m_Qstep = (float) ((2 * m_X2 * CurrentFrameMAD) / (sqrt (dtmp) - m_X1 * CurrentFrameMAD));
+             
+             m_Qc=Qstep2QP(m_Qstep);
+             m_Qc = MIN(m_Qp+DDquant,  m_Qc); // control variation
+             
+             if(input->basicunit>=MBPerRow)
+               m_Qc = MIN(PAveFrameQP+6, m_Qc);
+             else
+               m_Qc = MIN(PAveFrameQP+3, m_Qc);
+             
+             m_Qc = MIN(m_Qc, RC_MAX_QUANT);  // clipping
+             m_Qc = MAX(m_Qp-DDquant, m_Qc);  // control variation 
+             if(input->basicunit>=MBPerRow)
+               m_Qc = MAX(PAveFrameQP-6, m_Qc);
+             else
+               m_Qc = MAX(PAveFrameQP-3, m_Qc);
+             
+             m_Qc = MAX(RC_MIN_QUANT, m_Qc);
+             TotalFrameQP +=m_Qc;
+             Pm_Qp=m_Qc;
+             NumberofBasicUnit--;
+             if((NumberofBasicUnit==0)&&(img->type==P_SLICE))
+             {
+               if((!topfield)||(img->FieldControl==0))
+               {
+                 /*frame coding or field coding*/
+                 if((active_sps->frame_mbs_only_flag)||(input->PicInterlace==FIELD_CODING))
+                 {
+                   PAverageQP=(int)(1.0*TotalFrameQP/TotalNumberofBasicUnit+0.5);
+                   if (img->NumberofPPicture == (input->intra_period - 2))
+                     QPLastPFrame = PAverageQP;
+ 
+                   img->TotalQpforPPicture +=PAverageQP;
+                   PreviousQp1=PreviousQp2;
+                   PreviousQp2=PAverageQP; 
+                   PAveFrameQP=PAverageQP;
+                   PAveHeaderBits3=PAveHeaderBits2;
+                 }
+                 else if((input->PicInterlace==ADAPTIVE_CODING)\
+                   ||(input->MbInterlace))
+                 {
+                   if(img->FieldControl==0)
+                   {
+                     PAverageQP=(int)(1.0*TotalFrameQP/TotalNumberofBasicUnit+0.5);
+                     FrameQPBuffer=PAverageQP;
+                     FrameAveHeaderBits=PAveHeaderBits2;
+                   }
+                   else
+                   {
+                     PAverageQP=(int)(1.0*TotalFrameQP/TotalNumberofBasicUnit+0.5);
+                     FieldQPBuffer=PAverageQP;
+                     FieldAveHeaderBits=PAveHeaderBits2;
+                   }
+                 }
+               }
+             }
+             return m_Qc;
+           }
+         }
+       }
+     } 
+   }
+   return m_Qc;
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    update the parameters of quadratic R-D model
+  *
+  *************************************************************************************
+ */
+ void updateRCModel ()
+ {
+ 
+   int n_windowSize;
+   int i;
+   double error[20], std = 0.0, threshold;
+   int m_Nc;
+   Boolean MADModelFlag = FALSE;
+ 
+   if(img->type==P_SLICE)
+   {
+     /*frame layer rate control*/
+     if(img->BasicUnit==img->Frame_Total_Number_MB)
+     {
+       CurrentFrameMAD=ComputeFrameMAD();
+       m_Nc=img->NumberofCodedPFrame;
+     }
+     /*basic unit layer rate control*/
+     else
+     {
+       /*compute the MAD of the current basic unit*/
+       if((input->MbInterlace)&&(img->FieldControl==0))
+         CurrentFrameMAD=img->TotalMADBasicUnit/img->BasicUnit/2;
+       else
+         CurrentFrameMAD=img->TotalMADBasicUnit/img->BasicUnit;
+       
+       
+       img->TotalMADBasicUnit=0;
+               
+       /* compute the average number of header bits*/
+       
+       CodedBasicUnit=TotalNumberofBasicUnit-NumberofBasicUnit;
+       if(CodedBasicUnit>0)
+       {
+         PAveHeaderBits1=(int)(1.0*(PAveHeaderBits1*(CodedBasicUnit-1)+\
+           +img->NumberofBasicUnitHeaderBits)/CodedBasicUnit+0.5);
+         if(PAveHeaderBits3==0)
+           PAveHeaderBits2=PAveHeaderBits1;
+         else
+           PAveHeaderBits2=(int)(1.0*(PAveHeaderBits1*CodedBasicUnit+\
+           +PAveHeaderBits3*NumberofBasicUnit)/TotalNumberofBasicUnit+0.5);
+       }
+       /*update the record of MADs for reference*/
+       if(((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))\
+         &&(img->FieldControl==1))
+         FCBUCFMAD[TotalNumberofBasicUnit-1-NumberofBasicUnit]=CurrentFrameMAD;
+       else
+         BUCFMAD[TotalNumberofBasicUnit-1-NumberofBasicUnit]=CurrentFrameMAD;
+       
+       if(NumberofBasicUnit!=0)
+         m_Nc=img->NumberofCodedPFrame*TotalNumberofBasicUnit+CodedBasicUnit;
+       else
+         m_Nc=(img->NumberofCodedPFrame-1)*TotalNumberofBasicUnit+CodedBasicUnit;
+       
+     }
+     
+     if(m_Nc>1)
+       MADModelFlag=TRUE;
+     
+     PPreHeader=img->NumberofHeaderBits;
+     for (i = 19; i > 0; i--) 
+     {// update the history
+       Pm_rgQp[i] = Pm_rgQp[i - 1];
+       m_rgQp[i]=Pm_rgQp[i];
+       Pm_rgRp[i] = Pm_rgRp[i - 1];
+       m_rgRp[i]=Pm_rgRp[i];
+     }
+     Pm_rgQp[0] = QP2Qstep(m_Qc); //*1.0/CurrentFrameMAD;
+     /*frame layer rate control*/
+     if(img->BasicUnit==img->Frame_Total_Number_MB)
+       Pm_rgRp[0] = img->NumberofTextureBits*1.0/CurrentFrameMAD;
+     /*basic unit layer rate control*/
+     else
+       Pm_rgRp[0]=img->NumberofBasicUnitTextureBits*1.0/CurrentFrameMAD;
+     
+     m_rgQp[0]=Pm_rgQp[0];
+     m_rgRp[0]=Pm_rgRp[0];
+     m_X1=Pm_X1;
+     m_X2=Pm_X2;
+     
+     
+     /*compute the size of window*/
+     n_windowSize = (CurrentFrameMAD>PreviousFrameMAD)?(int)(PreviousFrameMAD/CurrentFrameMAD*20)\
+                    :(int)(CurrentFrameMAD/PreviousFrameMAD*20);
+     n_windowSize=MAX(n_windowSize, 1);
+     n_windowSize=MIN(n_windowSize,m_Nc);
+     n_windowSize=MIN(n_windowSize,m_windowSize+1);
+     n_windowSize=MIN(n_windowSize,20);
+     
+     /*update the previous window size*/
+     m_windowSize=n_windowSize;
+     
+     for (i = 0; i < 20; i++) 
+     {
+       m_rgRejected[i] = FALSE;
+     }
+     
+     // initial RD model estimator
+     RCModelEstimator (n_windowSize);
+     
+     n_windowSize = m_windowSize;
+     // remove outlier 
+     
+     for (i = 0; i < (int) n_windowSize; i++) 
+     {
+       error[i] = m_X1 / m_rgQp[i] + m_X2 / (m_rgQp[i] * m_rgQp[i]) - m_rgRp[i];
+       std += error[i] * error[i]; 
+     }
+     threshold = (n_windowSize == 2) ? 0 : sqrt (std / n_windowSize);
+     for (i = 0; i < (int) n_windowSize; i++) 
+     {
+       if (fabs(error[i]) > threshold)
+         m_rgRejected[i] = TRUE;
+     }
+     // always include the last data point
+     m_rgRejected[0] = FALSE;
+     
+     // second RD model estimator
+     RCModelEstimator (n_windowSize);
+     
+     if(MADModelFlag)
+       updateMADModel();
+     else if(img->type==P_SLICE)
+       PPictureMAD[0]=CurrentFrameMAD;
+   } 
+ }
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Model Estimator
+  *
+  *************************************************************************************
+ */
+ void RCModelEstimator (int n_windowSize)
+ {
+   int n_realSize = n_windowSize;
+   int i;
+   double oneSampleQ = 0;
+   double a00 = 0.0, a01 = 0.0, a10 = 0.0, a11 = 0.0, b0 = 0.0, b1 = 0.0;
+   double MatrixValue;
+   Boolean estimateX2 = FALSE;
+ 
+   for (i = 0; i < n_windowSize; i++) 
+   {// find the number of samples which are not rejected
+     if (m_rgRejected[i])
+       n_realSize--;
+   }
+ 
+   // default RD model estimation results
+ 
+   m_X1 = m_X2 = 0.0;
+ 
+   for (i = 0; i < n_windowSize; i++)  
+   {
+     if (!m_rgRejected[i])
+       oneSampleQ = m_rgQp[i];
+   }
+   for (i = 0; i < n_windowSize; i++)  
+   {// if all non-rejected Q are the same, take 1st order model
+     if ((m_rgQp[i] != oneSampleQ) && !m_rgRejected[i])
+       estimateX2 = TRUE;
+     if (!m_rgRejected[i])
+       m_X1 += (m_rgQp[i] * m_rgRp[i]) / n_realSize;
+   }
+ 
+   // take 2nd order model to estimate X1 and X2
+   if ((n_realSize >= 1) && estimateX2) 
+   {
+     for (i = 0; i < n_windowSize; i++) 
+     {
+       if (!m_rgRejected[i]) 
+       {
+         a00 = a00 + 1.0;
+         a01 += 1.0 / m_rgQp[i];
+         a10 = a01;
+         a11 += 1.0 / (m_rgQp[i] * m_rgQp[i]);
+         b0 += m_rgQp[i] * m_rgRp[i];
+         b1 += m_rgRp[i];
+       }
+     }
+     // solve the equation of AX = B
+     MatrixValue=a00*a11-a01*a10;
+     if(fabs(MatrixValue)>0.000001)
+     {
+       m_X1=(b0*a11-b1*a01)/MatrixValue;
+       m_X2=(b1*a00-b0*a10)/MatrixValue;
+     }
+     else
+     {
+       m_X1=b0/a00;
+       m_X2=0.0;
+     }
+     
+   }
+   if(img->type==P_SLICE)
+   {
+     Pm_X1=m_X1;
+     Pm_X2=m_X2;
+   }
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Compute Frame MAD
+  *
+  *************************************************************************************
+ */
+ double ComputeFrameMAD()
+ {
+   double TotalMAD;
+   int i;
+   TotalMAD=0.0;
+   for(i=0;i<img->Frame_Total_Number_MB;i++)
+     TotalMAD +=img->MADofMB[i];
+   TotalMAD /=img->Frame_Total_Number_MB;
+   return TotalMAD;
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    update the parameters of linear prediction model
+  *
+  *************************************************************************************
+ */
+ void updateMADModel ()
+ {
+   
+   int n_windowSize;
+   int i;
+   double error[20], std = 0.0, threshold;
+   int m_Nc;
+   
+   if(img->NumberofCodedPFrame>0)
+   {
+     //assert (img->type!=P_SLICE);
+     
+     /*frame layer rate control*/
+     if(img->BasicUnit==img->Frame_Total_Number_MB)
+       m_Nc=img->NumberofCodedPFrame;
+     /*basic unit layer rate control*/
+     else
+       m_Nc=img->NumberofCodedPFrame*TotalNumberofBasicUnit+CodedBasicUnit;
+     
+     for (i = 19; i > 0; i--) 
+     {// update the history
+       PPictureMAD[i] = PPictureMAD[i - 1];
+       PictureMAD[i]=PPictureMAD[i];
+       ReferenceMAD[i]= ReferenceMAD[i-1];
+     }
+     PPictureMAD[0] = CurrentFrameMAD;
+     PictureMAD[0]=PPictureMAD[0];
+     if(img->BasicUnit==img->Frame_Total_Number_MB)
+       ReferenceMAD[0]=PictureMAD[1];
+     else
+     {
+       if(((input->PicInterlace==ADAPTIVE_CODING)||(input->MbInterlace))\
+         &&(img->FieldControl==1))
+         ReferenceMAD[0]=FCBUPFMAD[TotalNumberofBasicUnit-1-NumberofBasicUnit];
+       else
+         ReferenceMAD[0]=BUPFMAD[TotalNumberofBasicUnit-1-NumberofBasicUnit];
+     }
+     MADPictureC1=PMADPictureC1;
+     MADPictureC2=PMADPictureC2;
+     
+     
+     /*compute the size of window*/
+     
+     n_windowSize = (CurrentFrameMAD>PreviousFrameMAD)?(int)(PreviousFrameMAD/CurrentFrameMAD*20)\
+       :(int)(CurrentFrameMAD/PreviousFrameMAD*20);
+     n_windowSize=MIN(n_windowSize,(m_Nc-1));
+     n_windowSize=MAX(n_windowSize, 1);
+     n_windowSize=MIN(n_windowSize,MADm_windowSize+1);
+     n_windowSize=MIN(20,n_windowSize);
+     /*update the previous window size*/
+     MADm_windowSize=n_windowSize;
+     
+     for (i = 0; i < 20; i++) 
+     {
+       PictureRejected[i] = FALSE;
+     }
+     //update the MAD for the previous frame
+     if(img->type==P_SLICE)
+       PreviousFrameMAD=CurrentFrameMAD;
+     
+     // initial MAD model estimator
+     MADModelEstimator (n_windowSize);
+     
+     // remove outlier 
+     
+     for (i = 0; i < (int) n_windowSize; i++) 
+     {
+       error[i] = MADPictureC1*ReferenceMAD[i]+MADPictureC2-PictureMAD[i];
+       std += error[i] * error[i]; 
+     }
+     threshold = (n_windowSize == 2) ? 0 : sqrt (std / n_windowSize);
+     for (i = 0; i < (int) n_windowSize; i++) 
+     {
+       if (fabs(error[i]) > threshold)
+         PictureRejected[i] = TRUE;
+     }
+     // always include the last data point
+     PictureRejected[0] = FALSE;
+     
+     // second MAD model estimator
+     MADModelEstimator (n_windowSize);
+   }
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    MAD mode estimator
+  *
+  *************************************************************************************
+ */
+ void MADModelEstimator (int n_windowSize)
+ {
+   int n_realSize = n_windowSize;
+   int i;
+   double oneSampleQ = 0;
+   double a00 = 0.0, a01 = 0.0, a10 = 0.0, a11 = 0.0, b0 = 0.0, b1 = 0.0;
+   double MatrixValue;
+   Boolean estimateX2 = FALSE;
+   
+   for (i = 0; i < n_windowSize; i++) 
+   {// find the number of samples which are not rejected
+     if (PictureRejected[i])
+       n_realSize--;
+   }
+   
+   // default MAD model estimation results
+   
+   MADPictureC1 = MADPictureC2 = 0.0;
+   
+   for (i = 0; i < n_windowSize; i++)  
+   {
+     if (!PictureRejected[i])
+       oneSampleQ = PictureMAD[i];
+   }
+ 
+   for (i = 0; i < n_windowSize; i++)  
+   {// if all non-rejected MAD are the same, take 1st order model
+     if ((PictureMAD[i] != oneSampleQ) && !PictureRejected[i])
+       estimateX2 = TRUE;
+     if (!PictureRejected[i])
+       MADPictureC1 += PictureMAD[i] / (ReferenceMAD[i]*n_realSize);
+   }
+   
+   // take 2nd order model to estimate X1 and X2
+   if ((n_realSize >= 1) && estimateX2) 
+   {
+     for (i = 0; i < n_windowSize; i++) 
+     {
+       if (!PictureRejected[i]) 
+       {
+         a00 = a00 + 1.0;
+         a01 += ReferenceMAD[i];
+         a10 = a01;
+         a11 += ReferenceMAD[i]*ReferenceMAD[i];
+         b0 += PictureMAD[i];
+         b1 += PictureMAD[i]*ReferenceMAD[i];
+       }
+     }
+     // solve the equation of AX = B
+     MatrixValue=a00*a11-a01*a10;
+     if(fabs(MatrixValue)>0.000001)
+     {
+       MADPictureC2=(b0*a11-b1*a01)/MatrixValue;
+       MADPictureC1=(b1*a00-b0*a10)/MatrixValue;
+     }
+     else
+     {
+       MADPictureC1=b0/a01;
+       MADPictureC2=0.0;
+     }
+     
+   }
+   if(img->type==P_SLICE)
+   {
+     PMADPictureC1=MADPictureC1;
+     PMADPictureC2=MADPictureC2;
+   }
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    map QP to Qstep
+  *
+  *************************************************************************************
+ */
+ double QP2Qstep( int QP )
+ {
+   int i; 
+   double Qstep;
+   static const double QP2QSTEP[6] = { 0.625, 0.6875, 0.8125, 0.875, 1.0, 1.125 };
+   
+   Qstep = QP2QSTEP[QP % 6];
+   for( i=0; i<(QP/6); i++)
+     Qstep *= 2;
+   
+   return Qstep;
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    map Qstep to QP
+  *
+  *************************************************************************************
+ */
+ int Qstep2QP( double Qstep )
+ {
+   int q_per = 0, q_rem = 0;
+   
+   //  assert( Qstep >= QP2Qstep(0) && Qstep <= QP2Qstep(51) );
+   if( Qstep < QP2Qstep(0))
+     return 0;
+   else if (Qstep > QP2Qstep(51) )
+     return 51;
+   
+   while( Qstep > QP2Qstep(5) )
+   {
+     Qstep /= 2;
+     q_per += 1;
+   }
+   
+   if (Qstep <= (0.625+0.6875)/2) 
+   {
+     Qstep = 0.625;
+     q_rem = 0;
+   }
+   else if (Qstep <= (0.6875+0.8125)/2)
+   {
+     Qstep = 0.6875;
+     q_rem = 1;
+   }
+   else if (Qstep <= (0.8125+0.875)/2)
+   {
+     Qstep = 0.8125;
+     q_rem = 2;
+   }
+   else if (Qstep <= (0.875+1.0)/2)
+   {
+     Qstep = 0.875;
+     q_rem = 3;
+   }
+   else if (Qstep <= (1.0+1.125)/2)
+   {
+     Qstep = 1.0;  
+     q_rem = 4;
+   }
+   else 
+   {
+     Qstep = 1.125;
+     q_rem = 5;
+   }
+   
+   return (q_per * 6 + q_rem);
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/ratectl.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/ratectl.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/ratectl.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,146 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file
+  *    ratectl.h
+  *
+  * \author
+  *    Zhengguo LI
+  *
+  * \date
+  *    14 Jan 2003
+  *
+  * \brief
+  *    Headerfile for rate control 
+  **************************************************************************
+  */
+ 
+ #ifndef _RATE_CTL_H_
+ #define _RATE_CTL_H_
+ 
+ 
+ #define MIN(a,b)  (((a)<(b)) ? (a) : (b))//LIZG 28/10/2002
+ #define MAX(a,b)  (((a)<(b)) ? (b) : (a))//LIZG 28/10/2002
+ 
+ double bit_rate; 
+ double frame_rate;
+ double GAMMAP;//LIZG, JVT019r1
+ double BETAP;//LIZG, JVT019r1
+ 
+ int RC_MAX_QUANT;//LIZG 28/10/2002
+ int RC_MIN_QUANT;//LIZG 28/10/2002
+ 
+ double BufferSize; //LIZG 25/10/2002
+ double GOPTargetBufferLevel;
+ double CurrentBufferFullness; //LIZG 25/10/2002
+ double TargetBufferLevel;//LIZG 25/10/2002
+ double PreviousBit_Rate;//LIZG  25/10/2002
+ double AWp;
+ double AWb;
+ int MyInitialQp;
+ int PAverageQp;
+ 
+ /*LIZG JVT50V2 distortion prediction model*/
+ /*coefficients of the prediction model*/
+ double PreviousPictureMAD;
+ double MADPictureC1;
+ double MADPictureC2;
+ double PMADPictureC1;
+ double PMADPictureC2;
+ /* LIZG JVT50V2 picture layer MAD */
+ Boolean PictureRejected[21];
+ double PPictureMAD[21];
+ double PictureMAD[21];
+ double ReferenceMAD[21];
+ 
+ /*quadratic rate-distortion model*/
+ Boolean   m_rgRejected[21];
+ double  m_rgQp[21];
+ double m_rgRp[21];
+ double m_X1;
+ double m_X2;
+ int m_Qc;
+ double m_Qstep;
+ int m_Qp;
+ int Pm_Qp;
+ int PreAveMBHeader;
+ int CurAveMBHeader;
+ int PPreHeader;
+ int PreviousQp1;
+ int PreviousQp2;
+ int NumberofBFrames;
+ /*basic unit layer rate control*/
+ int TotalFrameQP;
+ int NumberofBasicUnit;
+ int PAveHeaderBits1;
+ int PAveHeaderBits2;
+ int PAveHeaderBits3;
+ int PAveFrameQP;
+ int TotalNumberofBasicUnit;
+ int CodedBasicUnit;
+ double MINVALUE;
+ double CurrentFrameMAD;
+ double CurrentBUMAD;
+ double TotalBUMAD;
+ double PreviousFrameMAD;
+ int m_Hp;
+ int m_windowSize;
+ int MADm_windowSize;
+ int DDquant;
+ int MBPerRow;
+ double AverageMADPreviousFrame;
+ int TotalBasicUnitBits;
+ int QPLastPFrame;
+ int QPLastGOP;
+ //int MADn_windowSize;
+ //int n_windowSize;
+ 
+ double Pm_rgQp[20];
+ double Pm_rgRp[20];
+ double Pm_X1;
+ double Pm_X2;
+ int Pm_Hp;
+ /* adaptive field/frame coding*/
+ int FieldQPBuffer;
+ int FrameQPBuffer;
+ int FrameAveHeaderBits;
+ int FieldAveHeaderBits;
+ double *BUPFMAD;
+ double *BUCFMAD;
+ double *FCBUCFMAD;
+ double *FCBUPFMAD;
+ 
+ Boolean GOPOverdue;
+ 
+ 
+ //comput macroblock activity for rate control
+ int diffy[16][16];
+ int diffyy[16][16];
+ int diffy8[16][16];//for P8X8 mode 
+ 
+ extern int Iprev_bits;
+ extern int Pprev_bits;
+ 
+ void rc_alloc();
+ void rc_free();
+ 
+ void rc_init_seq();
+ void rc_init_GOP(int np, int nb);
+ void rc_update_pict_frame(int nbits);
+ void rc_init_pict(int fieldpic,int topfield, int targetcomputation);
+ void rc_update_pict(int nbits);
+ void setbitscount(int nbits);
+ 
+ int updateQuantizationParameter(int topfield);
+ void updateRCModel ();
+ void updateMADModel ();
+ Boolean skipThisFrame ();
+ void RCModelEstimator (int n_windowSize);
+ void MADModelEstimator (int n_windowSize);
+ double calc_MAD();
+ double ComputeFrameMAD();
+ int Qstep2QP( double Qstep );
+ double QP2Qstep( int QP );
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/rdopt.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/rdopt.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/rdopt.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,3568 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file rdopt.c
+  *
+  * \brief
+  *    Rate-Distortion optimized mode decision
+  *
+  * \author
+  *    - Heiko Schwarz              <hschwarz at hhi.de>
+  *    - Valeri George              <george at hhi.de>
+  *    - Lowell Winger              <lwinger at lsil.com>
+  *    - Alexis Michael Tourapis    <alexismt at ieee.org>
+  * \date
+  *    12. April 2001
+  **************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <math.h>
+ #include <assert.h>
+ #include <limits.h>
+ #include <memory.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ 
+ #include "rdopt_coding_state.h"
+ #include "memalloc.h"
+ #include "mb_access.h"
+ #include "elements.h"
+ #include "intrarefresh.h"
+ #include "image.h"
+ #include "transform8x8.h"
+ #include "cabac.h"   
+ #include "vlc.h"
+ #include "fast_me.h"
+ #include "ratectl.h"            // head file for rate control
+ #include "mode_decision.h"
+ #define KS_MV
+ 
+ //Rate control
+ 
+ int QP,QP2;
+ int DELTA_QP,DELTA_QP2;
+ 
+ imgpel pred[16][16];
+ 
+ #define FASTMODE 1
+ //#define RESET_STATE
+ 
+ extern const int LEVELMVLIMIT[17][6];
+ extern int   QP2QUANT[40];
+ 
+ extern short OffsetList4x4[15][16];
+ extern short OffsetList8x8[5][64];
+ extern const int OffsetBits;
+ 
+ imgpel   rec_mbY[16][16], rec_mbU[16][16], rec_mbV[16][16];    // reconstruction values
+ 
+ RD_8x8DATA tr4x4, tr8x8;
+ 
+ int   bestInterFAdjust4x4[16][16], bestIntraFAdjust4x4[16][16];
+ int   bestInterFAdjust8x8[16][16], bestIntraFAdjust8x8[16][16];
+ int   bestInterFAdjust4x4Cr[2][16][16], bestIntraFAdjust4x4Cr[2][16][16];
+ int   fadjust8x8[16][16], fadjust4x4[16][16], fadjust4x4Cr[2][16][16], fadjust8x8Cr[2][16][16];    
+ 
+ int   ****cofAC=NULL, ****cofAC8x8=NULL;        // [8x8block][4x4block][level/run][scan_pos]
+ int   ***cofDC=NULL;                       // [yuv][level/run][scan_pos]
+ int   **cofAC4x4=NULL, ****cofAC4x4intern=NULL; // [level/run][scan_pos]
+ int   cbp, cbp8x8, cnt_nonz_8x8;
+ int64 cbp_blk;
+ int   cbp_blk8x8;
+ char  frefframe[4][4], brefframe[4][4];
+ int   b8mode[4], b8pdir[4];
+ short best8x8mode [4];                // [block]
+ short best8x8pdir  [MAXMODE][4];       // [mode][block]
+ short best8x8fwref [MAXMODE][4];       // [mode][block]
+ short best8x8bwref [MAXMODE][4];       // [mode][block]
+ 
+ 
+ CSptr cs_mb=NULL, cs_b8=NULL, cs_cm=NULL, cs_imb=NULL, cs_ib8=NULL, cs_ib4=NULL, cs_pc=NULL;
+ int   best_c_imode;
+ int   best_i16offset;
+ short best_mode;
+ short  bi_pred_me;
+ 
+ //mixed transform sizes definitions
+ int   luma_transform_size_8x8_flag;
+ 
+ short all_mv8x8[2][2][4][4][2];       //[8x8_data/temp_data][LIST][block_x][block_y][MVx/MVy]
+ short pred_mv8x8[2][2][4][4][2];
+ 
+ int   ****cofAC_8x8ts = NULL;        // [8x8block][4x4block][level/run][scan_pos]
+ 
+ int64    cbp_blk8_8x8ts;
+ int      cbp8_8x8ts;
+ int      cost8_8x8ts;
+ int      cnt_nonz8_8x8ts;
+ 
+ 
+ void StoreMV8x8(int dir);
+ void RestoreMV8x8(int dir);
+ // end of mixed transform sizes definitions
+ 
+ //Adaptive Rounding update function
+ void update_offset_params(int mode, int luma_transform_size_8x8_flag);
+ 
+ // Residue Color Transform
+ int   cofAC4x4_chroma[2][2][18];
+ int   rec_resG_8x8[16][16], resTrans_R_8x8[16][16], resTrans_B_8x8[16][16];
+ int   rec_resG_8x8ts[16][16], resTrans_R_8x8ts[16][16], resTrans_B_8x8ts[16][16];
+ int   mprRGB_8x8[3][16][16], mprRGB_8x8ts[3][16][16];
+ char  b4_ipredmode[16], b4_intra_pred_modes[16];
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    delete structure for RD-optimized mode decision
+  ************************************************************************
+  */
+ void clear_rdopt ()
+ {
+   free_mem_DCcoeff (cofDC);
+   free_mem_ACcoeff (cofAC);
+   free_mem_ACcoeff (cofAC8x8);
+   free_mem_ACcoeff (cofAC4x4intern);
+   
+   if (input->Transform8x8Mode)
+   {
+     free_mem_ACcoeff (cofAC_8x8ts);
+   }
+   
+   // structure for saving the coding state
+   delete_coding_state (cs_mb);
+   delete_coding_state (cs_b8);
+   delete_coding_state (cs_cm);
+   delete_coding_state (cs_imb);
+   delete_coding_state (cs_ib8);
+   delete_coding_state (cs_ib4);
+   delete_coding_state (cs_pc);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    create structure for RD-optimized mode decision
+  ************************************************************************
+  */
+ void init_rdopt ()
+ {
+   rdopt = NULL;
+   
+   get_mem_DCcoeff (&cofDC);
+   get_mem_ACcoeff (&cofAC);
+   get_mem_ACcoeff (&cofAC8x8);
+   get_mem_ACcoeff (&cofAC4x4intern);
+   cofAC4x4 = cofAC4x4intern[0][0];
+   
+   if (input->Transform8x8Mode)
+   {
+     get_mem_ACcoeff (&cofAC_8x8ts);
+   }
+   
+   // structure for saving the coding state
+   cs_mb  = create_coding_state ();
+   cs_b8  = create_coding_state ();
+   cs_cm  = create_coding_state ();
+   cs_imb = create_coding_state ();
+   cs_ib8 = create_coding_state ();
+   cs_ib4 = create_coding_state ();
+   cs_pc  = create_coding_state ();
+ }
+ 
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Updates the pixel map that shows, which reference frames are reliable for
+  *    each MB-area of the picture.
+  *
+  * \note
+  *    The new values of the pixel_map are taken from the temporary buffer refresh_map
+  *
+  *************************************************************************************
+  */
+ void UpdatePixelMap()
+ {
+   int mx,my,y,x,i,j;
+   if (img->type==I_SLICE)
+   {
+     for (y=0; y<img->height; y++)
+       for (x=0; x<img->width; x++)
+       {
+         pixel_map[y][x]=1;
+       }
+   }
+   else
+   {
+     for (my=0; my<img->height >> 3; my++)
+       for (mx=0; mx<img->width >> 3;  mx++)
+       {
+         j = my*8 + 8;
+         i = mx*8 + 8;
+         if (refresh_map[my][mx])
+         {
+           for (y=my*8; y<j; y++)
+             for (x=mx*8; x<i; x++)  
+               pixel_map[y][x] = 1;
+         }
+         else
+         {
+           for (y=my*8; y<j; y++)
+             for (x=mx*8; x<i; x++)  
+               pixel_map[y][x] = min(pixel_map[y][x] + 1, input->num_ref_frames+1);
+         }
+       }
+   }
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Checks if a given reference frame is reliable for the current
+  *    macroblock, given the motion vectors that the motion search has
+  *    returned.
+  *
+  * \return
+  *    If the return value is 1, the reference frame is reliable. If it
+  *    is 0, then it is not reliable.
+  *
+  * \note
+  *    A specific area in each reference frame is assumed to be unreliable
+  *    if the same area has been intra-refreshed in a subsequent frame.
+  *    The information about intra-refreshed areas is kept in the pixel_map.
+  *
+  *************************************************************************************
+  */
+ int CheckReliabilityOfRef (int block, int list_idx, int ref, int mode)
+ {
+   int y,x, block_y, block_x, dy, dx, y_pos, x_pos, yy, xx, pres_x, pres_y;
+   int maxold_x  = img->width-1;
+   int maxold_y  = img->height-1;
+   int ref_frame = ref+1;
+   
+   int by0 = (mode>=4?2*(block >> 1):mode==2?2*block:0);
+   int by1 = by0 + (mode>=4||mode==2?2:4);
+   int bx0 = (mode>=4?2*(block & 0x01):mode==3?2*block:0);
+   int bx1 = bx0 + (mode>=4||mode==3?2:4);
+   
+   for (block_y=by0; block_y<by1; block_y++)
+   {
+     for (block_x=bx0; block_x<bx1; block_x++)
+     {
+       y_pos  = img->all_mv[block_y][block_x][list_idx][ref][mode][1];
+       y_pos += (img->block_y + block_y) * BLOCK_SIZE * 4;
+       x_pos  = img->all_mv[block_y][block_x][list_idx][ref][mode][0];
+       x_pos += (img->block_x + block_x) * BLOCK_SIZE * 4;
+       
+       /* Here we specify which pixels of the reference frame influence
+          the reference values and check their reliability. This is
+          based on the function Get_Reference_Pixel */
+ 
+       dy = y_pos & 3;
+       dx = x_pos & 3;
+ 
+       y_pos = (y_pos-dy) >> 2;
+       x_pos = (x_pos-dx) >> 2;
+       
+       if (dy==0 && dx==0) //full-pel
+       {
+         for (y=y_pos ; y < y_pos + BLOCK_SIZE ; y++)
+           for (x=x_pos ; x < x_pos + BLOCK_SIZE ; x++)
+             if (pixel_map[max(0,min(maxold_y,y))][max(0,min(maxold_x,x))] < ref_frame)
+               return 0;
+       }
+       else  /* other positions */
+       {
+         if (dy == 0)
+         {
+           for (y = y_pos ; y < y_pos + BLOCK_SIZE ; y++)
+           {
+             pres_y = max(0,min(maxold_y,y));
+             for (x = x_pos ; x < x_pos + BLOCK_SIZE ; x++)
+             {
+               for(xx = -2 ; xx < 4 ; xx++) {
+                 pres_x = max(0, min( maxold_x, x + xx));
+                 if (pixel_map[pres_y][pres_x] < ref_frame)
+                   return 0;
+               }
+             }
+           }
+         }        
+         else if (dx == 0)
+         {
+           for (y = y_pos ; y < y_pos + BLOCK_SIZE ; y++)
+             for (x=x_pos ; x < x_pos + BLOCK_SIZE ; x++)
+             {
+               pres_x = max(0,min(maxold_x,x));
+               for(yy=-2;yy<4;yy++) {
+                 pres_y = max(0,min(maxold_y, yy + y));
+                 if (pixel_map[pres_y][pres_x] < ref_frame)
+                   return 0;
+               }
+             }
+         }
+         else if (dx == 2)
+         {
+           for (y = y_pos ; y < y_pos + BLOCK_SIZE ; y++)
+             for (x = x_pos ; x < x_pos + BLOCK_SIZE ; x++)
+             {
+               for(yy=-2;yy<4;yy++) {
+                 pres_y = max(0,min(maxold_y, yy + y));
+                 for(xx=-2;xx<4;xx++) {
+                   pres_x = max(0,min(maxold_x, xx + x));
+                   if (pixel_map[pres_y][pres_x] < ref_frame)
+                     return 0;
+                 }
+               }
+             }
+         }
+         else if (dy == 2)
+         {
+           for (y = y_pos ; y < y_pos + BLOCK_SIZE ; y++)
+             for (x = x_pos ; x < x_pos + BLOCK_SIZE ; x++)
+             {
+               for(xx=-2;xx<4;xx++) {
+                 pres_x = max(0,min(maxold_x, xx + x));
+                 for(yy=-2;yy<4;yy++) {
+                   pres_y = max(0,min(maxold_y, yy + y));
+                   if (pixel_map[pres_y][pres_x] < ref_frame)
+                     return 0;
+                 }
+               }
+             }
+         }
+         else
+         {
+           for (y = y_pos ; y < y_pos + BLOCK_SIZE ; y++)
+           {
+             for (x = x_pos ; x < x_pos + BLOCK_SIZE ; x++)
+             {
+               pres_y = dy == 1 ? y : y + 1;
+               pres_y = max(0,min(maxold_y,pres_y));
+               
+               for(xx=-2;xx<4;xx++) 
+               {
+                 pres_x = max(0,min(maxold_x,xx + x));
+                 if (pixel_map[pres_y][pres_x] < ref_frame)
+                   return 0;
+               }
+               
+               pres_x = dx == 1 ? x : x + 1;
+               pres_x = max(0,min(maxold_x,pres_x));
+               
+               for(yy=-2;yy<4;yy++) 
+               {
+                 pres_y = max(0,min(maxold_y, yy + y));
+                 if (pixel_map[pres_y][pres_x] < ref_frame)
+                   return 0;
+               }
+             }
+           }
+         }        
+       }
+     }
+   }
+   return 1;
+ }
+ 
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    R-D Cost for an 4x4 Intra block
+  *************************************************************************************
+  */
+ double RDCost_for_4x4IntraBlocks (int*    nonzero,
+                                   int     b8,
+                                   int     b4,
+                                   int    ipmode,
+                                   double  lambda,
+                                   double  min_rdcost,
+                                   int mostProbableMode)
+ {
+   double  rdcost;
+   int     dummy, x, y, rate;
+   int64   distortion  = 0;
+   int     block_x     = 8*(b8 & 0x01)+4*(b4 & 0x01);
+   int     block_y     = 8*(b8 >> 1)+4*(b4 >> 1);
+   int     pic_pix_x   = img->pix_x+block_x;
+   int     pic_pix_y   = img->pix_y+block_y;
+   int     pic_opix_y  = img->opix_y+block_y;
+   imgpel  **imgY      = enc_picture->imgY;
+   
+   Slice          *currSlice    =  img->currentSlice;
+   Macroblock     *currMB       = &img->mb_data[img->current_mb_nr];
+   SyntaxElement  *currSE       = &img->MB_SyntaxElements[currMB->currSEnr];
+   const int      *partMap      = assignSE2partition[input->partition_mode];
+   DataPartition  *dataPart;
+   
+   //===== perform DCT, Q, IQ, IDCT, Reconstruction =====
+   dummy = 0;
+   
+   *nonzero = dct_luma (block_x, block_y, &dummy, 1);
+   
+   //===== get distortion (SSD) of 4x4 block =====
+   if(!img->residue_transform_flag)
+   {
+     for (y=0; y<4; y++)
+     {
+       for (x=pic_pix_x; x<pic_pix_x+4; x++)
+       {
+         distortion += img->quad [imgY_org[pic_opix_y+y][x] - imgY[pic_pix_y+y][x]];
+       }
+     }
+   }
+   
+   //===== RATE for INTRA PREDICTION MODE  (SYMBOL MODE MUST BE SET TO UVLC) =====
+   currSE->value1 = (mostProbableMode == ipmode) ? -1 : ipmode < mostProbableMode ? ipmode : ipmode-1;
+   
+   //--- set position and type ---
+   currSE->context = 4*b8 + b4;
+   currSE->type    = SE_INTRAPREDMODE;
+   
+   //--- set function pointer ----
+   if (input->symbol_mode != UVLC)    
+     currSE->writing = writeIntraPredMode_CABAC;
+   
+   //--- choose data partition ---
+   dataPart = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);
+   //--- encode and update rate ---
+   if (input->symbol_mode == UVLC)    writeSyntaxElement_Intra4x4PredictionMode(currSE, dataPart);
+   else                               dataPart->writeSyntaxElement (currSE, dataPart);
+   
+   rate = currSE->len;
+   currSE++;
+   currMB->currSEnr++;
+   
+   //===== RATE for LUMINANCE COEFFICIENTS =====
+   if (input->symbol_mode == UVLC)
+   {
+     rate  += writeCoeff4x4_CAVLC (LUMA, b8, b4, 0);
+   }
+   else
+   {
+     rate  += writeLumaCoeff4x4_CABAC (b8, b4, 1);
+   }
+   //reset_coding_state (cs_cm);
+   rdcost = (double)distortion + lambda*(double)rate;
+   
+   if(img->residue_transform_flag)
+     return (double)rate;
+   else
+     return rdcost;
+ }
+ 
+ 
+ // Residue Color Transform
+ int RDCost_for_4x4Blocks_Chroma (int     b8,
+                                  int     b4,
+                                  int  chroma)
+ {
+   int     rate=0;
+   
+   Slice          *currSlice    =  img->currentSlice;
+   Macroblock     *currMB       = &img->mb_data[img->current_mb_nr];
+   SyntaxElement  *currSE       = &img->MB_SyntaxElements[currMB->currSEnr];
+   const int      *partMap      = assignSE2partition[input->partition_mode];
+   int uv;
+   
+   //===== perform DCT, Q, IQ, IDCT, Reconstruction =====
+   if(b8 > 7) 
+     uv = 1;
+   else 
+     uv = 0;
+   
+   cbp_chroma_block_temp[uv][2*((b8-4*(uv+1)) & 0x01)+(b4 & 0x01)][2*((b8-4*(uv+1)) >> 1)+(b4 >> 1)] = dct_chroma4x4 (chroma, b8, b4);
+   
+   //===== RATE for LUMINANCE COEFFICIENTS =====
+   if (input->symbol_mode == UVLC)
+   {
+     rate  = writeCoeff4x4_CAVLC (CHROMA_AC, b8, b4, ((2*(b8 & 0x01)+ (b4 & 0x01))<<4) | (2*(b8 >> 1)+(b4 >> 1)));
+   }
+   else
+   {
+     int * ACLevel, * ACRun;
+     int level, run, k;
+     DataPartition*  dataPart;
+     int*            bitCount  = currMB->bitcounter;
+     ACLevel = img->cofAC[b8][b4][0];
+     ACRun   = img->cofAC[b8][b4][1];
+     
+     level=1;
+     
+     img->subblock_y = b4 >> 1;
+     img->subblock_x = b4 & 0x01;
+     
+     for (k=0; k < 17 && level != 0; k++)
+     {
+       level = currSE->value1 = ACLevel[k]; // level
+       run   = currSE->value2 = ACRun  [k]; // run
+       
+       if (input->symbol_mode == UVLC)   
+         currSE->mapping = levrun_linfo_inter;
+       else                              
+         currSE->writing = writeRunLevel_CABAC;
+       
+       currSE->context     = CHROMA_AC;
+       currSE->type        = SE_CHR_AC_INTRA;
+       
+       img->is_intra_block =  IS_INTRA(currMB);
+       img->is_v_block     = uv;
+       
+       // choose the appropriate data partition
+       dataPart = &(currSlice->partArr[partMap[currSE->type]]); 
+       dataPart->writeSyntaxElement (currSE, dataPart);
+       bitCount[BITS_COEFF_UV_MB] += currSE->len;
+       rate                       += currSE->len;
+       
+       // proceed to next SE
+       currSE++;
+       currMB->currSEnr++;
+     }
+   }
+   reset_coding_state (cs_cm);
+   
+   return rate;
+ }
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Mode Decision for an 4x4 Intra block
+  *************************************************************************************
+  */
+ int Mode_Decision_for_4x4IntraBlocks (int  b8,  int  b4,  double  lambda,  int*  min_cost)
+ {
+   int     ipmode, best_ipmode = 0, i, j, k, x, y, cost, dummy;
+   int     c_nz, nonzero = 0, diff[16];
+   imgpel  rec4x4[4][4];
+   double  rdcost;
+   int     block_x     = 8*(b8 & 0x01)+4*(b4 & 0x01);
+   int     block_y     = 8*(b8 >> 1)+4*(b4 >> 1);
+   int     block_x4    = block_x >> 2;
+   int     block_y4    = block_y >> 2;
+   int     pic_pix_x   = img->pix_x+block_x;
+   int     pic_pix_y   = img->pix_y+block_y;
+   int     pic_opix_x   = img->opix_x+block_x;
+   int     pic_opix_y   = img->opix_y+block_y;
+   int     pic_block_x = pic_pix_x >> 2;
+   int     pic_block_y = pic_pix_y >> 2;
+   double  min_rdcost  = 1e30;
+   
+   int left_available, up_available, all_available;
+   
+   char   upMode;
+   char   leftMode;
+   int     mostProbableMode;
+   
+   PixelPos left_block;
+   PixelPos top_block;
+   
+ #ifdef BEST_NZ_COEFF
+   int best_nz_coeff = 0;
+ #endif
+   
+   // Residue Color Transform
+   int residue_R, residue_G, residue_B;
+   int rate, temp;
+   int64 distortion;
+   int c_ipmode = img->mb_data[img->current_mb_nr].c_ipred_mode;
+   imgpel rec4x4_c[2][4][4];
+   
+   getLuma4x4Neighbour(img->current_mb_nr, block_x4, block_y4, -1,  0, &left_block);
+   getLuma4x4Neighbour(img->current_mb_nr, block_x4, block_y4,  0, -1, &top_block);
+   
+   // constrained intra pred
+   if (input->UseConstrainedIntraPred)
+   {
+     left_block.available = left_block.available ? img->intra_block[left_block.mb_addr] : 0;
+     top_block.available  = top_block.available  ? img->intra_block[top_block.mb_addr]  : 0;
+   }
+   
+   upMode            =  top_block.available ? img->ipredmode[top_block.pos_y ][top_block.pos_x ] : -1;
+   leftMode          = left_block.available ? img->ipredmode[left_block.pos_y][left_block.pos_x] : -1;
+   
+   mostProbableMode  = (upMode < 0 || leftMode < 0) ? DC_PRED : upMode < leftMode ? upMode : leftMode;
+   
+   *min_cost = INT_MAX;
+   
+   //===== INTRA PREDICTION FOR 4x4 BLOCK =====
+   intrapred_luma (pic_pix_x, pic_pix_y, &left_available, &up_available, &all_available);
+   
+   //===== LOOP OVER ALL 4x4 INTRA PREDICTION MODES =====
+   for (ipmode=0; ipmode<NO_INTRA_PMODE; ipmode++)
+   {
+     int available_mode =  (ipmode==DC_PRED) ||
+       ((ipmode==VERT_PRED||ipmode==VERT_LEFT_PRED||ipmode==DIAG_DOWN_LEFT_PRED) && up_available ) ||
+       ((ipmode==HOR_PRED||ipmode==HOR_UP_PRED) && left_available ) ||(all_available);
+     
+     if (input->IntraDisableInterOnly==0 || img->type != I_SLICE)
+     {
+       if (input->Intra4x4ParDisable && (ipmode==VERT_PRED||ipmode==HOR_PRED))
+         continue;      
+       
+       if (input->Intra4x4DiagDisable && (ipmode==DIAG_DOWN_LEFT_PRED||ipmode==DIAG_DOWN_RIGHT_PRED))
+         continue;      
+       
+       if (input->Intra4x4DirDisable && ipmode>=VERT_RIGHT_PRED)
+         continue;
+     }
+     
+     if( available_mode)
+     {
+       if (!input->rdopt)
+       {
+         for (k=j=0; j<4; j++)
+         {
+           for (i=0; i<4; i++, k++)
+           {
+             diff[k] = imgY_org[pic_opix_y+j][pic_opix_x+i] - img->mprr[ipmode][j][i];
+           }
+         }
+         cost  = (ipmode == mostProbableMode) ? 0 : (int)floor(4 * lambda );
+         cost += SATD (diff, input->hadamard);
+         if (cost < *min_cost)
+         {
+           best_ipmode = ipmode;
+           *min_cost   = cost;
+         }
+       }
+       else
+       {
+         // Residue Color Transform
+         if(!img->residue_transform_flag)
+         {
+           // get prediction and prediction error
+           for (j=0; j<4; j++)
+           {
+             memcpy(&img->mpr[block_y+j][block_x], img->mprr[ipmode][j], BLOCK_SIZE * sizeof(imgpel));
+             for (i=0; i<4; i++)
+             {
+               img->m7[j][i] = (int) (imgY_org[pic_opix_y+j][pic_opix_x+i] - img->mprr[ipmode][j][i]);
+             }
+           }
+           
+           //===== store the coding state =====
+           //store_coding_state (cs_cm);
+           // get and check rate-distortion cost
+           if ((rdcost = RDCost_for_4x4IntraBlocks (&c_nz, b8, b4, ipmode, lambda, min_rdcost, mostProbableMode)) < min_rdcost)
+           {
+             //--- set coefficients ---
+             memcpy(cofAC4x4[0],img->cofAC[b8][b4][0], 18 * sizeof(int));
+             memcpy(cofAC4x4[1],img->cofAC[b8][b4][1], 18 * sizeof(int));
+ 
+             //--- set reconstruction ---
+             for (y=0; y<4; y++)
+             {
+               memcpy(rec4x4[y],&enc_picture->imgY[pic_pix_y+y][pic_pix_x], BLOCK_SIZE * sizeof(imgpel));
+             } 
+             //--- flag if dct-coefficients must be coded ---
+             nonzero = c_nz;
+             
+             //--- set best mode update minimum cost ---
+             min_rdcost    = rdcost;
+             best_ipmode   = ipmode;
+ #ifdef BEST_NZ_COEFF
+             best_nz_coeff = img->nz_coeff [img->current_mb_nr][block_x4][block_y4];
+ #endif            
+             //store_coding_state (cs_ib4);
+             if (img->AdaptiveRounding)
+             {
+               for (j=0; j<4; j++)
+                 memcpy(&fadjust4x4[block_y+j][block_x],&img->fadjust4x4[1][block_y+j][block_x], BLOCK_SIZE * sizeof(int));
+             }
+           }
+ 
+ #ifndef RESET_STATE
+           reset_coding_state (cs_cm);
+ #endif
+         }
+         else 
+         {
+           for (j=0; j<4; j++)
+           {
+             for (i=0; i<4; i++)
+             {
+               residue_B = imgUV_org[0][pic_opix_y+j][pic_opix_x+i] - img->mprr_c[0][c_ipmode][block_y+j][block_x+i];
+               residue_G = imgY_org[pic_opix_y+j][pic_opix_x+i] - img->mprr[ipmode][j][i];
+               residue_R = imgUV_org[1][pic_opix_y+j][pic_opix_x+i] - img->mprr_c[1][c_ipmode][block_y+j][block_x+i];
+               
+               /* Foward Residue Transform */
+               resTrans_R[j][i] = residue_R-residue_B;
+               temp = residue_B+(resTrans_R[j][i]>>1);
+               resTrans_B[j][i] = residue_G-temp;
+               resTrans_G[j][i] = temp+(resTrans_B[j][i]>>1);
+             }
+           }
+           
+           for (j=0; j<4; j++)
+           {
+             for (i=0; i<4; i++)
+             {
+               img->m7[j][i]  = resTrans_G[j][i];
+             }
+           }
+           
+           store_coding_state (cs_cm);
+           rate = (int) RDCost_for_4x4IntraBlocks (&c_nz, b8, b4, ipmode, lambda, min_rdcost, mostProbableMode);
+           reset_coding_state (cs_cm);
+           
+           for (j=0; j<4; j++)
+           {
+             for (i=0; i<4; i++)
+             {
+               rec_resG[j][i] = img->m7[j][i];
+               img->m7[j][i]  = resTrans_B[j][i];
+             }
+           }
+           //store_coding_state (cs_cm);
+           rate += RDCost_for_4x4Blocks_Chroma (b8+4, b4, 0);
+           for (j=0; j<4; j++)
+           {
+             for (i=0; i<4; i++)
+             {
+               rec_resB[j][i] = img->m7[j][i];
+               img->m7[j][i]  = resTrans_R[j][i];
+             }
+           }
+           rate += RDCost_for_4x4Blocks_Chroma (b8+8, b4, 1);
+           
+           reset_coding_state (cs_cm);
+           for (j=0; j<4; j++)
+           {
+             for (i=0; i<4; i++)
+             {
+               rec_resR[j][i] = img->m7[j][i];
+             }
+           }
+           
+           for (j=0; j<4; j++)
+           {
+             for (i=0; i<4; i++)
+             {
+               /* Inverse Residue Transform */
+               temp      = rec_resG[j][i]-(rec_resB[j][i]>>1);
+               residue_G = rec_resB[j][i]+temp;
+               residue_B = temp - (rec_resR[j][i]>>1);
+               residue_R = residue_B+rec_resR[j][i];
+               enc_picture->imgUV[0][pic_pix_y+j][pic_pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_B+(int)img->mprr_c[0][c_ipmode][block_y+j][block_x+i]));
+               enc_picture->imgY[pic_pix_y+j][pic_pix_x+i]     = min(img->max_imgpel_value,max(0,residue_G+(int)img->mprr[ipmode][j][i]));
+               enc_picture->imgUV[1][pic_pix_y+j][pic_pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_R+(int)img->mprr_c[1][c_ipmode][block_y+j][block_x+i]));
+             }
+           } 
+           //===== get distortion (SSD) of 4x4 block =====
+           distortion = 0;
+           for (y=0; y<4; y++)
+           {
+             for (x=pic_pix_x; x<pic_pix_x+4; x++)
+             {
+               distortion += img->quad[imgY_org    [pic_pix_y+y][x] - enc_picture->imgY    [pic_pix_y+y][x]];
+               distortion += img->quad[imgUV_org[0][pic_pix_y+y][x] - enc_picture->imgUV[0][pic_pix_y+y][x]];
+               distortion += img->quad[imgUV_org[1][pic_pix_y+y][x] - enc_picture->imgUV[1][pic_pix_y+y][x]];
+             }
+           }
+           rdcost = (double)distortion + lambda*(double)rate;
+           
+           if (rdcost < min_rdcost)
+           {
+             //--- set coefficients ---
+             for (j=0; j<2; j++)
+             {
+               for (i=0; i<18;i++)  
+                 cofAC4x4[j][i]=img->cofAC[b8][b4][j][i];
+               for (i=0; i<18;i++)  
+                 cofAC4x4_chroma[0][j][i]=img->cofAC[b8+4][b4][j][i];
+               for (i=0; i<18;i++)
+                 cofAC4x4_chroma[1][j][i]=img->cofAC[b8+8][b4][j][i];
+             }
+             
+             for (i=0; i<2; i++)
+             { //uv
+               dc_level        [i][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)] = dc_level_temp        [i][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)];
+               cbp_chroma_block[i][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)] = cbp_chroma_block_temp[i][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)];
+               //--- set reconstruction ---
+               for (y=0; y<BLOCK_SIZE; y++)
+                 memcpy(rec4x4_c[i][y],&enc_picture->imgUV[i][pic_pix_y+y][pic_pix_x], BLOCK_SIZE * sizeof(imgpel)); 
+             }
+             
+             //--- set reconstruction ---
+             for (y=0; y<BLOCK_SIZE; y++)
+             {
+               memcpy(rec4x4[y],&enc_picture->imgY[pic_pix_y+y][pic_pix_x], BLOCK_SIZE * sizeof(imgpel));
+             }
+             
+             //--- flag if dct-coefficients must be coded ---
+             nonzero = c_nz;
+             
+             //--- set best mode update minimum cost ---
+             min_rdcost  = rdcost;
+             best_ipmode = ipmode;
+ #ifdef BEST_NZ_COEFF
+             best_nz_coeff = img->nz_coeff [img->current_mb_nr][block_x4][block_y4];
+ #endif
+           }
+         }
+       }
+     }
+   }
+   
+ #ifdef BEST_NZ_COEFF
+   img->nz_coeff [img->current_mb_nr][block_x4][block_y4] = best_nz_coeff;
+ #endif
+   //===== set intra mode prediction =====
+   img->ipredmode[pic_block_y][pic_block_x] = best_ipmode;
+   img->mb_data[img->current_mb_nr].intra_pred_modes[4*b8+b4] = mostProbableMode == best_ipmode ? -1 : best_ipmode < mostProbableMode ? best_ipmode : best_ipmode-1;
+   
+   if (!input->rdopt)
+   {
+     // Residue Color Transform
+     if(!img->residue_transform_flag)
+     {
+       // get prediction and prediction error
+       for (j=0; j<4; j++)
+       {
+         for (i=0; i<4; i++)
+         {
+           img->mpr[block_y+j][block_x+i]  = img->mprr[best_ipmode][j][i];
+           img->m7[j][i]                   = imgY_org[pic_opix_y+j][pic_opix_x+i] - img->mprr[best_ipmode][j][i];
+         }
+       }
+       nonzero = dct_luma (block_x, block_y, &dummy, 1);
+     } 
+     else 
+     {
+       int y_pos = 2*(b8 & 0x01)+(b4 & 0x01);
+       int x_pos = 2*(b8 >> 1)+(b4 >> 1);
+       for (j=0; j<4; j++)
+       {
+         for (i=0; i<4; i++)
+         {
+           residue_B = imgUV_org[0][pic_opix_y+j][pic_opix_x+i] - img->mprr_c[0][c_ipmode][block_y+j][block_x+i];
+           residue_G = imgY_org[pic_opix_y+j][pic_opix_x+i] - img->mprr[best_ipmode][j][i];
+           residue_R = imgUV_org[1][pic_opix_y+j][pic_opix_x+i] - img->mprr_c[1][c_ipmode][block_y+j][block_x+i];
+           
+           /* Forward Residue Transform */
+           resTrans_R[j][i] = residue_R-residue_B;
+           temp = residue_B+(resTrans_R[j][i]>>1);
+           resTrans_B[j][i] = residue_G-temp;
+           resTrans_G[j][i] = temp+(resTrans_B[j][i]>>1);
+         }
+       }
+       
+       for (j=0; j<4; j++)
+       {
+         for (i=0; i<4; i++)
+         {
+           img->m7[j][i]  = resTrans_G[j][i];
+         }
+       }
+       nonzero = dct_luma (block_x, block_y, &dummy, 1);
+       for (j=0; j<4; j++)
+       {
+         for (i=0; i<4; i++)
+         {
+           rec_resG[j][i] = img->m7[j][i];
+           img->m7[j][i]  = resTrans_B[j][i];
+         }
+       }
+       cbp_chroma_block[0][y_pos][x_pos] = dct_chroma4x4 (0, b8+4, b4);
+       dc_level        [0][y_pos][x_pos] = dc_level_temp[0][y_pos][x_pos];
+       for (j=0; j<4; j++)
+       {
+         for (i=0; i<4; i++)
+         {
+           rec_resB[j][i] = img->m7[j][i];
+           img->m7[j][i]  = resTrans_R[j][i];
+         }
+       }
+       cbp_chroma_block[1][y_pos][x_pos] = dct_chroma4x4 (1, b8+8, b4);
+       dc_level        [1][y_pos][x_pos] = dc_level_temp[1][y_pos][x_pos];
+       for (j=0; j<4; j++)
+       {
+         for (i=0; i<4; i++)
+         {
+           rec_resR[j][i] = img->m7[j][i];
+         }
+       }
+       for (j=0; j<4; j++)
+       {
+         for (i=0; i<4; i++)
+         {
+           /* Inverse Residue Transform */
+           temp      = rec_resG[j][i]-(rec_resB[j][i]>>1);
+           residue_G = rec_resB[j][i]+temp;
+           residue_B = temp - (rec_resR[j][i]>>1);
+           residue_R = residue_B+rec_resR[j][i];
+           enc_picture->imgUV[0][pic_pix_y+j][pic_pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_B+(int)img->mprr_c[0][c_ipmode][block_y+j][block_x+i]));
+           enc_picture->imgY[pic_pix_y+j][pic_pix_x+i]     = min(img->max_imgpel_value,max(0,residue_G+(int)img->mprr[best_ipmode][j][i]));
+           enc_picture->imgUV[1][pic_pix_y+j][pic_pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_R+(int)img->mprr_c[1][c_ipmode][block_y+j][block_x+i]));
+         }
+       }
+     }
+   }
+   else
+   {
+     //===== restore coefficients =====
+     for (j=0; j<2; j++)
+     {
+       memcpy (img->cofAC[b8][b4][j],cofAC4x4[j], 18 * sizeof(int));
+     } 
+     // Residue Color Transform
+     if(img->residue_transform_flag)
+     {
+       for (j=0; j<2; j++)
+       {
+         memcpy (img->cofAC[b8+4][b4][j],cofAC4x4_chroma[0][j], 18 * sizeof(int));            
+         memcpy (img->cofAC[b8+8][b4][j],cofAC4x4_chroma[1][j], 18 * sizeof(int));            
+       }
+     }
+     
+     //===== restore reconstruction and prediction (needed if single coeffs are removed) =====
+     for (y=0; y<BLOCK_SIZE; y++)
+     {
+       memcpy (&enc_picture->imgY[pic_pix_y+y][pic_pix_x],rec4x4[y],    BLOCK_SIZE * sizeof(imgpel));
+       memcpy (&img->mpr[block_y+y][block_x],img->mprr[best_ipmode][y], BLOCK_SIZE * sizeof(imgpel));
+     }
+     
+     if (img->AdaptiveRounding)
+     {
+       for (j=0; j<BLOCK_SIZE; j++)
+         memcpy (&img->fadjust4x4[1][block_y+j][block_x],&fadjust4x4[block_y+j][block_x], BLOCK_SIZE * sizeof(int));      
+     }
+     
+     // Residue Color Transform
+     if(img->residue_transform_flag)
+     {
+       for (i=0; i<2; i++)
+       { //uv
+         //--- set reconstruction ---
+         for (y=0; y<4; y++)
+           memcpy(&enc_picture->imgUV[i][pic_pix_y+y][pic_pix_x],rec4x4_c[i][y], BLOCK_SIZE * sizeof(imgpel));
+       }
+     }    
+   }  
+   return nonzero;
+ }
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Mode Decision for an 8x8 Intra block
+  *************************************************************************************
+  */
+ int Mode_Decision_for_8x8IntraBlocks(int b8,double lambda,int *cost)
+ {
+   int  nonzero=0, b4;
+   int  cost4x4;
+   
+   *cost = (int)floor(6.0 * lambda + 0.4999);
+   
+   for (b4=0; b4<4; b4++)
+   {
+     if (Mode_Decision_for_4x4IntraBlocks (b8, b4, lambda, &cost4x4))
+     {
+       nonzero        = 1;
+     }
+     *cost += cost4x4;
+   }
+ #ifdef RESET_STATE
+   reset_coding_state (cs_cm);
+ #endif
+   
+   return nonzero;
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    4x4 Intra mode decision for an macroblock
+  *************************************************************************************
+  */
+ int Mode_Decision_for_Intra4x4Macroblock (double lambda,  int* cost)
+ {
+   int  cbp=0, b8, cost8x8;
+   
+   for (*cost=0, b8=0; b8<4; b8++)
+   {
+     if (Mode_Decision_for_8x8IntraBlocks (b8, lambda, &cost8x8))
+     {
+       cbp |= (1<<b8);
+     }
+     *cost += cost8x8;
+   }
+   
+   return cbp;
+ }
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    R-D Cost for an 8x8 Partition
+  *************************************************************************************
+  */
+ double RDCost_for_8x8blocks (int*    cnt_nonz,   // --> number of nonzero coefficients
+                              int64*  cbp_blk,    // --> cbp blk
+                              double  lambda,     // <-- lagrange multiplier
+                              int     block,      // <-- 8x8 block number
+                              int     mode,       // <-- partitioning mode
+                              short   pdir,       // <-- prediction direction
+                              short   ref,        // <-- reference frame
+                              short   bwd_ref)    // <-- abp type
+ {
+   int  i, j, k;
+   int  rate=0;
+   int64 distortion=0;
+   int  dummy, mrate;
+   int  fw_mode, bw_mode;
+   int  cbp     = 0;
+   int  pax     = 8*(block & 0x01);
+   int  pay     = 8*(block >> 1);
+   int  i0      = pax >> 2;
+   int  j0      = pay >> 2;
+   int  bframe  = (img->type==B_SLICE);
+   int  direct  = (bframe && mode==0);
+   int  b8value = B8Mode2Value (mode, pdir);
+   
+   Macroblock    *currMB    = &img->mb_data[img->current_mb_nr];
+   SyntaxElement *currSE    = &img->MB_SyntaxElements[currMB->currSEnr];
+   Slice         *currSlice = img->currentSlice;
+   DataPartition *dataPart;
+   const int     *partMap   = assignSE2partition[input->partition_mode];
+   
+   EncodingEnvironmentPtr eep_dp;
+   
+   // Residue Color Transform
+   int residue_R, residue_G, residue_B, temp, b4;
+   int b4_x, b4_y;
+   
+   //=====
+   //=====  GET COEFFICIENTS, RECONSTRUCTIONS, CBP
+   //=====
+   currMB->bi_pred_me=0;
+   
+   if (direct)
+   {
+     if (direct_pdir[img->block_y+j0][img->block_x+i0]<0) // mode not allowed
+       return (1e20);
+     else
+       *cnt_nonz = LumaResidualCoding8x8 (&cbp, cbp_blk, block, direct_pdir[img->block_y+j0][img->block_x+i0], 0, 0, 
+                   (short)max(0,direct_ref_idx[LIST_0][img->block_y+j0][img->block_x+i0]), direct_ref_idx[LIST_1][img->block_y+j0][img->block_x+i0]);
+   }
+   else
+   {
+     fw_mode   = (pdir==0||pdir==2 ? mode : 0);
+     bw_mode   = (pdir==1||pdir==2 ? mode : 0);
+     *cnt_nonz = LumaResidualCoding8x8 (&cbp, cbp_blk, block, pdir, fw_mode, bw_mode, ref, bwd_ref);
+   }
+   
+   // Residue Color Transform
+   if(img->residue_transform_flag)
+   {
+     for(b4 = 0; b4 < 4; b4++)
+     {
+       b4_x = pax+(b4 & 0x01)*4;
+       b4_y = pay+(b4 >> 1  )*4;
+       for (j=0; j<4; j++)
+       {
+         for (i=0; i<4; i++)
+           img->m7[j][i] = resTrans_B[j+b4_y][i+b4_x];
+       }
+       rate += RDCost_for_4x4Blocks_Chroma (block+4, b4, 0);
+       
+       for (j=0; j<4; j++)
+       {
+         for (i=0; i<4; i++)
+         {
+           rec_resB[j+b4_y][i+b4_x] = img->m7[j][i];
+           img->m7[j][i]  = resTrans_R[j+b4_y][i+b4_x];
+         }
+       }
+       rate += RDCost_for_4x4Blocks_Chroma (block+8, b4, 1);
+       
+       for (j=0; j<4; j++)
+       {
+         for (i=0; i<4; i++)
+         {
+           rec_resR[j+b4_y][i+b4_x] = img->m7[j][i];
+         }
+       }
+     }
+     reset_coding_state (cs_cm);  
+     /* Inverse Residue Transform */
+     for (j=pay; j<pay+8; j++)
+       for (i=pax; i<pax+8; i++)
+       {
+         /* YCoCg-R */
+         temp      = rec_resG[j][i]-(rec_resB[j][i]>>1);
+         residue_G = rec_resB[j][i]+temp;
+         residue_B = temp - (rec_resR[j][i]>>1);
+         residue_R = residue_B+rec_resR[j][i];
+         
+         enc_picture->imgUV[0][img->pix_y+j][img->pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_B+mprRGB[1][j][i]));
+         enc_picture->imgY[img->pix_y+j][img->pix_x+i]     = min(img->max_imgpel_value,max(0,residue_G+mprRGB[0][j][i]));
+         enc_picture->imgUV[1][img->pix_y+j][img->pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_R+mprRGB[2][j][i]));
+       }
+   }
+   
+   //===== get residue =====
+   if (input->rdopt==3 && img->type!=B_SLICE)
+   {
+     // We need the reconstructed prediction residue for the simulated decoders.
+     compute_residue_b8block (block, -1);
+   }
+   
+   //=====
+   //=====   GET DISTORTION
+   //=====
+   if (input->rdopt==3 && img->type!=B_SLICE)
+   {
+     for (k=0; k<input->NoOfDecoders ;k++)
+     {
+       decode_one_b8block (k, P8x8, block, mode, ref);
+       for (j=img->opix_y+pay; j<img->opix_y+pay+8; j++)
+         for (i=img->opix_x+pax; i<img->opix_x+pax+8; i++)
+         {
+           distortion += img->quad[imgY_org[j][i] - decs->decY[k][j][i]];
+         }
+     }
+     distortion /= input->NoOfDecoders;
+   }
+   else
+   {
+     for (j=pay; j<pay+8; j++)
+       for (i=img->pix_x+pax; i<img->pix_x+pax+8; i++)
+       {
+         distortion += img->quad [imgY_org[img->opix_y+j][i] - enc_picture->imgY[img->pix_y+j][i]];
+         // Residue Color Transform
+         if(img->residue_transform_flag)
+         {
+           distortion += img->quad [imgUV_org[0][img->opix_y+j][i] - enc_picture->imgUV[0][img->pix_y+j][i]];
+           distortion += img->quad [imgUV_org[1][img->opix_y+j][i] - enc_picture->imgUV[1][img->pix_y+j][i]];
+         }
+       }
+   }
+   
+   //=====
+   //=====   GET RATE
+   //=====
+   //----- block 8x8 mode -----
+   if (input->symbol_mode == UVLC)
+   {
+     ue_linfo (b8value, dummy, &mrate, &dummy);
+     rate += mrate;
+   }
+   else
+   {
+     currSE->value1  = b8value;
+     currSE->writing = writeB8_typeInfo_CABAC;
+     currSE->type    = SE_MBTYPE;
+     dataPart = &(currSlice->partArr[partMap[currSE->type]]);
+     dataPart->writeSyntaxElement (currSE, dataPart);
+     rate += currSE->len;
+     currSE++;
+     currMB->currSEnr++;
+   }
+   
+   //----- motion information -----
+   if (!direct)
+   {
+     if ((img->num_ref_idx_l0_active > 1 ) && (pdir==0 || pdir==2))
+       rate  += writeReferenceFrame (mode, i0, j0, 1, ref);
+     if(img->num_ref_idx_l1_active > 1 && img->type== B_SLICE)
+     {
+       if (pdir==1 || pdir==2)
+       {
+         rate  += writeReferenceFrame (mode, i0, j0, 0, bwd_ref);
+       }
+     }
+     
+     if (pdir==0 || pdir==2)
+     {
+       rate  += writeMotionVector8x8 (i0, j0, i0+2, j0+2, ref,LIST_0, mode);
+     }
+     if (pdir==1 || pdir==2)
+     {
+       rate  += writeMotionVector8x8 (i0, j0, i0+2, j0+2, bwd_ref, LIST_1, mode);
+     }
+   }
+   
+   //----- coded block pattern (for CABAC only) -----
+   if (input->symbol_mode == CABAC)
+   {
+     dataPart = &(currSlice->partArr[partMap[SE_CBP_INTER]]);
+     eep_dp   = &(dataPart->ee_cabac);
+     mrate    = arienco_bits_written (eep_dp);
+     writeCBP_BIT_CABAC (block, ((*cnt_nonz>0)?1:0), cbp8x8, currMB, 1, eep_dp);
+     mrate    = arienco_bits_written (eep_dp) - mrate;
+     rate    += mrate;
+   }
+   
+   //----- luminance coefficients -----
+   if (*cnt_nonz)
+   {
+     rate += writeLumaCoeff8x8 (block, mode, currMB->luma_transform_size_8x8_flag);
+   }
+   
+   return (double)distortion + lambda * (double)rate;
+ }
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Gets mode offset for intra16x16 mode
+  *************************************************************************************
+  */
+ int I16Offset (int cbp, int i16mode)
+ {
+   return (cbp&15?13:1) + i16mode + ((cbp&0x30)>>2);
+ }
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Sets modes and reference frames for a macroblock
+  *************************************************************************************
+  */
+ void SetModesAndRefframeForBlocks (int mode)
+ {
+   int i,j,k,l;
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   int  bframe  = (img->type==B_SLICE);
+   int  block_x, block_y;
+   int  cur_ref[2];  
+   
+   //--- macroblock type ---
+   currMB->mb_type = mode;    
+   currMB->bi_pred_me= (mode == 1 ? img->bi_pred_me[mode] : 0);  
+   
+   //--- block 8x8 mode and prediction direction ---
+   switch (mode)
+   {
+   case 0:
+     for(i=0;i<4;i++)
+     {
+       currMB->b8mode[i] = 0;
+       currMB->b8pdir[i] = (bframe ? direct_pdir[img->block_y + (i >> 1)*2][img->block_x + (i & 0x01)*2] : 0);
+     }
+     break;
+   case 1:
+   case 2:
+   case 3:
+     for(i=0;i<4;i++)
+     {
+       currMB->b8mode[i] = mode;
+       currMB->b8pdir[i] = best8x8pdir[mode][i];
+     }
+     break;
+   case P8x8:
+     for(i=0;i<4;i++)
+     {
+       currMB->b8mode[i]   = best8x8mode[i];
+       currMB->b8pdir[i]   = best8x8pdir[mode][i];
+     }
+     break;
+   case I4MB:
+     for(i=0;i<4;i++)
+     {
+       currMB->b8mode[i] = IBLOCK; 
+       currMB->b8pdir[i] = -1;
+     }
+     break;
+   case I16MB:
+     for(i=0;i<4;i++)
+     {
+       currMB->b8mode[i] =  0;
+       currMB->b8pdir[i] = -1;
+     }
+     break;
+   case I8MB:
+     for(i=0;i<4;i++)
+     {
+       currMB->b8mode[i] = I8MB;
+       currMB->b8pdir[i] = -1;
+     }
+     //switch to 8x8 transform
+     currMB->luma_transform_size_8x8_flag = 1;
+     break;
+   case IPCM:
+     for(i=0;i<4;i++)
+     {
+       currMB->b8mode[i] = IPCM;
+       currMB->b8pdir[i] = -1;
+     }
+     currMB->luma_transform_size_8x8_flag = 0;
+     break;
+   default:
+     printf ("Unsupported mode in SetModesAndRefframeForBlocks!\n");
+     exit (1);
+   }
+   
+ #define IS_FW ((best8x8pdir[mode][k]==0 || best8x8pdir[mode][k]==2) && (mode!=P8x8 || best8x8mode[k]!=0 || !bframe))
+ #define IS_BW ((best8x8pdir[mode][k]==1 || best8x8pdir[mode][k]==2) && (mode!=P8x8 || best8x8mode[k]!=0))
+   //--- reference frame arrays ---
+   if (mode==0 || mode==I4MB || mode==I16MB || mode==I8MB)
+   {
+     if (bframe)
+     {
+       if (!mode)
+       {
+         for (j = img->block_y; j < img->block_y + 4; j++)
+         {
+           memcpy(&enc_picture->ref_idx[LIST_0][j][img->block_x],&direct_ref_idx[LIST_0][j][img->block_x], 4 * sizeof(char));
+           memcpy(&enc_picture->ref_idx[LIST_1][j][img->block_x],&direct_ref_idx[LIST_1][j][img->block_x], 4 * sizeof(char));
+         }
+       }
+       else
+       {
+         for (j = img->block_y; j < img->block_y + 4; j++)
+         {
+           memset(&enc_picture->ref_idx[LIST_0][j][img->block_x],-1, 4 * sizeof(char));
+           memset(&enc_picture->ref_idx[LIST_1][j][img->block_x],-1, 4 * sizeof(char));
+         }
+       }
+     }
+     else
+     {
+       if (!mode)
+       {
+         for (j = img->block_y; j < img->block_y + 4; j++)
+           memset(&enc_picture->ref_idx[LIST_0][j][img->block_x],0, 4 * sizeof(char));
+       }
+       else
+       {
+         for (j = img->block_y; j < img->block_y + 4; j++)
+           memset(&enc_picture->ref_idx[LIST_0][j][img->block_x],-1, 4 * sizeof(char));
+       }
+     }
+   }
+   else
+   {
+     if (bframe)
+     {
+       for (j=0;j<4;j++)
+       {
+         block_y = img->block_y + j;
+         for (i=0;i<4;i++)
+         {
+           block_x = img->block_x + i;
+           k = 2*(j >> 1) + (i >> 1);
+           l = 2*(j & 0x01) + (i & 0x01);
+           
+           if(mode == P8x8 && best8x8mode[k]==0)
+           {
+             enc_picture->ref_idx[LIST_0][block_y][block_x] = direct_ref_idx[LIST_0][block_y][block_x];
+             enc_picture->ref_idx[LIST_1][block_y][block_x] = direct_ref_idx[LIST_1][block_y][block_x];
+           }
+           else if (mode ==1 && currMB->bi_pred_me && IS_FW && IS_BW)
+           {
+             enc_picture->ref_idx[LIST_0][block_y][block_x] = 0;
+             enc_picture->ref_idx[LIST_1][block_y][block_x] = 0;
+           }
+           else
+           {
+             enc_picture->ref_idx[LIST_0][block_y][block_x] = (IS_FW ? best8x8fwref[mode][k] : -1);
+             enc_picture->ref_idx[LIST_1][block_y][block_x] = (IS_BW ? best8x8bwref[mode][k] : -1);
+           }
+         }
+       }
+     }
+     else
+     {
+       for (j=0;j<4;j++)
+       {
+         block_y = img->block_y + j;
+         for (i=0;i<4;i++)
+         {
+           block_x = img->block_x + i;
+           k = 2*(j >> 1) + (i >> 1);
+           l = 2*(j & 0x01) + (i & 0x01);
+           enc_picture->ref_idx[LIST_0][block_y][block_x] = (IS_FW ? best8x8fwref[mode][k] : -1);
+         }
+       }
+     }
+   }
+   
+   if (bframe)
+   {
+ 
+     for (j = img->block_y; j < img->block_y + 4; j++)
+       for (i = img->block_x; i < img->block_x + 4;i++)
+       {
+         cur_ref[LIST_0] = (int) enc_picture->ref_idx[LIST_0][j][i];
+         cur_ref[LIST_1] = (int) enc_picture->ref_idx[LIST_1][j][i];
+         
+         enc_picture->ref_pic_id [LIST_0][j][i] = (cur_ref[LIST_0]>=0 
+           ? enc_picture->ref_pic_num[LIST_0 + currMB->list_offset][cur_ref[LIST_0]]
+           : -1);
+         enc_picture->ref_pic_id [LIST_1][j][i] = (cur_ref[LIST_1]>=0 
+           ? enc_picture->ref_pic_num[LIST_1 + currMB->list_offset][cur_ref[LIST_1]]
+           : -1);
+       }
+   }
+   else
+   {  
+     for (j = img->block_y; j < img->block_y + 4; j++)
+       for (i = img->block_x; i < img->block_x + 4;i++)
+       {
+         cur_ref[LIST_0] = (int) enc_picture->ref_idx[LIST_0][j][i];
+         enc_picture->ref_pic_id [LIST_0][j][i] = (cur_ref[LIST_0]>=0 
+           ? enc_picture->ref_pic_num[LIST_0 + currMB->list_offset][cur_ref[LIST_0]]
+           : -1);
+       }
+   }
+   
+ #undef IS_FW
+ #undef IS_BW
+ }
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Intra 16x16 mode decision
+  *************************************************************************************
+  */
+ void
+ Intra16x16_Mode_Decision (Macroblock* currMB, int* i16mode)
+ {
+   // Residue Color Transform
+   int residue_R, residue_G, residue_B;
+   int c_ipmode = img->mb_data[img->current_mb_nr].c_ipred_mode;
+   int i, j, temp;
+   int pic_pix_x   = img->pix_x;
+   int pic_pix_y   = img->pix_y;
+   pel_t   **imgY_orig  = imgY_org;
+   pel_t   ***imgUV_orig  = imgUV_org;
+   int cr_cbp;
+   
+   intrapred_luma_16x16 ();   /* make intra pred for all 4 new modes */
+   
+   if(!img->residue_transform_flag)
+     find_sad_16x16 (i16mode);   /* get best new intra mode */
+   
+   // Residue Color Transform
+   if(img->residue_transform_flag)
+   {
+     for (j=0; j < MB_BLOCK_SIZE; j++)
+       for (i=0; i < MB_BLOCK_SIZE; i++)
+       {
+         residue_B = imgUV_orig[0][pic_pix_y+j][pic_pix_x+i] - img->mprr_c[0][c_ipmode][j][i];
+         residue_G = imgY_orig[pic_pix_y+j][pic_pix_x+i] - img->mprr_2[*i16mode][j][i];
+         residue_R = imgUV_orig[1][pic_pix_y+j][pic_pix_x+i] - img->mprr_c[1][c_ipmode][j][i];
+         
+         /* Forward Residue Transform */
+         resTrans_R[j][i] = residue_R-residue_B;
+         temp = residue_B+(resTrans_R[j][i]>>1);
+         resTrans_B[j][i] = residue_G-temp;
+         resTrans_G[j][i] = temp+(resTrans_B[j][i]>>1);
+         
+         img->m7[j][i]  = resTrans_G[j][i];
+       }
+   }
+   
+   currMB->cbp = dct_luma_16x16 (*i16mode);
+   
+   // Residue Color Transform
+   if(img->residue_transform_flag)
+   {
+     for (j=0; j < MB_BLOCK_SIZE; j++)
+     {
+       for (i=0; i < MB_BLOCK_SIZE; i++)
+       {
+         rec_resG[j][i] = img->m7[j][i];
+         img->m7[j][i]  = resTrans_B[j][i];
+       }
+     }
+     cr_cbp = dct_chroma(0, 0);
+     
+     for (j=0; j < MB_BLOCK_SIZE; j++)
+     {
+       for (i=0; i < MB_BLOCK_SIZE; i++)
+       {
+         rec_resB[j][i] = img->m7[j][i];
+         img->m7[j][i]  = resTrans_R[j][i];
+       }
+     }
+     cr_cbp = dct_chroma(1, cr_cbp);
+     
+     for (j = 0; j < MB_BLOCK_SIZE; j++)
+     {
+       for (i = 0; i < MB_BLOCK_SIZE; i++)
+         
+         rec_resR[j][i] = img->m7[j][i];
+     }
+     
+     currMB->cbp += (cr_cbp<<4);
+     
+     /* Inverse Residue Transform */
+     for (j = 0; j < MB_BLOCK_SIZE; j++)
+     {
+       for (i = 0; i < MB_BLOCK_SIZE; i++)
+       {
+         temp      = rec_resG[j][i]-(rec_resB[j][i]>>1);
+         residue_G = rec_resB[j][i]+temp;
+         residue_B = temp - (rec_resR[j][i]>>1);
+         residue_R = residue_B+rec_resR[j][i];
+         
+         enc_picture->imgUV[0][pic_pix_y+j][pic_pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_B+(int)img->mprr_c[0][c_ipmode][j][i]));
+         enc_picture->imgY[pic_pix_y+j][pic_pix_x+i]     = min(img->max_imgpel_value,max(0,residue_G+(int)img->mprr_2[*i16mode][j][i]));
+         enc_picture->imgUV[1][pic_pix_y+j][pic_pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_R+(int)img->mprr_c[1][c_ipmode][j][i]));
+       }
+     }
+   }
+ }
+ 
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Sets Coefficients and reconstruction for an 8x8 block
+  *************************************************************************************
+  */
+ void SetCoeffAndReconstruction8x8 (Macroblock* currMB)
+ {
+   int block, k, j, i;
+   int cur_ref[2];
+ 
+   //============= MIXED TRANSFORM SIZES FOR 8x8 PARTITION ==============
+   //--------------------------------------------------------------------
+   int l;
+   int bframe = img->type==B_SLICE; 
+ 
+   if (currMB->luma_transform_size_8x8_flag)
+   {
+    
+     //============= set mode and ref. frames ==============
+     for(i = 0;i<4;i++)
+     {
+       currMB->b8mode[i]   = tr8x8.part8x8mode[i];
+       currMB->b8pdir[i]   = tr8x8.part8x8pdir[i];
+     }
+     
+     if (bframe)
+     {
+       for (j = 0;j<4;j++)
+         for (i = 0;i<4;i++)
+         {
+           k = 2*(j >> 1)+(i >> 1);
+           l = 2*(j & 0x01)+(i & 0x01);
+           enc_picture->ref_idx[LIST_0][img->block_y+j][img->block_x+i] = tr8x8.part8x8fwref[k];
+           enc_picture->ref_idx[LIST_1][img->block_y+j][img->block_x+i] = tr8x8.part8x8bwref[k];
+         }
+     }
+     else
+     {
+       for (j = 0;j<4;j++)
+         for (i = 0;i<4;i++)
+         {
+           k = 2*(j >> 1)+(i >> 1);
+           l = 2*(j & 0x01)+(i & 0x01);
+           enc_picture->ref_idx[LIST_0][img->block_y+j][img->block_x+i] = tr8x8.part8x8fwref[k];
+         }
+     }
+ 
+ 
+     for (j = img->block_y;j<img->block_y + BLOCK_MULTIPLE;j++)
+     {
+       for (i = img->block_x;i<img->block_x + BLOCK_MULTIPLE;i++)
+       {
+         cur_ref[LIST_0] = (int) enc_picture->ref_idx[LIST_0][j][i];
+         enc_picture->ref_pic_id [LIST_0][j][i] =(cur_ref[LIST_0]>=0 
+         ? enc_picture->ref_pic_num[LIST_0 + currMB->list_offset][cur_ref[LIST_0]]
+         : -1);
+       }
+     }
+    
+     if (bframe)
+     {
+       for (j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+       {
+         for (i = img->block_x;i<img->block_x + BLOCK_MULTIPLE;i++)
+         {
+           cur_ref[LIST_1] = (int) enc_picture->ref_idx[LIST_1][j][i];
+           enc_picture->ref_pic_id [LIST_1][j][i] = (cur_ref[LIST_1]>=0 
+             ? enc_picture->ref_pic_num[LIST_1 + currMB->list_offset][cur_ref[LIST_1]]
+             : -1);
+         }
+         
+       }
+     }
+     
+     //====== set the mv's for 8x8 partition with transform size 8x8 ======
+     //save the mv data for 4x4 transform
+     StoreMV8x8(1);
+     //set new mv data for 8x8 transform
+     RestoreMV8x8(0);
+     
+     //============= get pre-calculated data ==============
+     //restore coefficients from 8x8 transform
+     
+     for (block = 0; block<4; block++)
+     {
+       for (k = 0; k<4; k++)
+         for (j = 0; j<2; j++)
+           memcpy (img->cofAC[block][k][j],cofAC_8x8ts[block][k][j], 65 * sizeof(int));            
+     }     
+     //restore reconstruction 
+     if (cnt_nonz8_8x8ts <= _LUMA_8x8_COEFF_COST_ && 
+       ((img->qp + img->bitdepth_luma_qp_scale)!=0 || img->lossless_qpprime_flag==0))
+     {
+       currMB->cbp     = 0;
+       currMB->cbp_blk = 0;
+       if(!img->residue_transform_flag) // Residue Color Transform
+       {
+         for (j = 0; j < MB_BLOCK_SIZE; j++)
+           memcpy(&enc_picture->imgY[img->pix_y+j][img->pix_x], tr8x8.mpr8x8[j], MB_BLOCK_SIZE * sizeof(imgpel));
+       }
+       else
+       {
+         for (j = 0; j < MB_BLOCK_SIZE; j++)
+           memset(rec_resG[j], 0, MB_BLOCK_SIZE * sizeof(int));
+       }                  
+     }
+     else
+     {
+       currMB->cbp     = cbp8_8x8ts;
+       currMB->cbp_blk = cbp_blk8_8x8ts;
+       if(!img->residue_transform_flag)                // Residue Color Transform
+       {
+         for (j = 0; j < MB_BLOCK_SIZE; j++)
+           memcpy (&enc_picture->imgY[img->pix_y+j][img->pix_x],tr8x8.rec_mbY8x8[j], MB_BLOCK_SIZE * sizeof(imgpel));            
+       }
+       else
+       {
+         for (j = 0; j < MB_BLOCK_SIZE; j++)
+           memcpy (rec_resG[j], tr8x8.rec_resG_8x8[j], MB_BLOCK_SIZE * sizeof(int));
+       }
+     }
+     
+     if(img->residue_transform_flag)                // Residue Color Transform
+     {
+       // Residue Color Transform
+       for (j = 0; j < MB_BLOCK_SIZE; j++)
+         for (i = 0; i < MB_BLOCK_SIZE; i++)
+         {
+           mprRGB[0][j][i]  = tr8x8.mprRGB_8x8[0][j][i];
+           mprRGB[1][j][i]  = tr8x8.mprRGB_8x8[1][j][i];
+           mprRGB[2][j][i]  = tr8x8.mprRGB_8x8[2][j][i];
+           resTrans_R[j][i] = tr8x8.resTrans_R_8x8[j][i];
+           resTrans_B[j][i] = tr8x8.resTrans_B_8x8[j][i];
+         }
+     }
+   }
+   else
+   {
+     //============= get pre-calculated data ==============
+     //---------------------------------------------------
+     //--- restore coefficients ---
+     for (block = 0; block<4+img->num_blk8x8_uv; block++)
+     {
+       for (k = 0; k<4; k++)
+         for (j = 0; j<2; j++)
+           memcpy (img->cofAC[block][k][j],cofAC8x8[block][k][j], 65 * sizeof(int));            
+     }
+     
+     if (cnt_nonz_8x8<=5 && img->type!=SP_SLICE &&
+       ((img->qp + img->bitdepth_luma_qp_scale)!=0 || img->lossless_qpprime_flag==0))
+     {
+       currMB->cbp     = 0;
+       currMB->cbp_blk = 0;
+       if(!img->residue_transform_flag) // Residue Color Transform
+       {
+         for (j = 0; j < MB_BLOCK_SIZE; j++)
+           memcpy (&enc_picture->imgY[img->pix_y+j][img->pix_x],tr4x4.mpr8x8[j], MB_BLOCK_SIZE * sizeof(imgpel));            
+       }
+       else
+       {
+         for (j = 0; j < MB_BLOCK_SIZE; j++)
+           memset(rec_resG[j], 0, MB_BLOCK_SIZE * sizeof(int));
+       }
+     }
+     else
+     {
+       currMB->cbp     = cbp8x8;
+       currMB->cbp_blk = cbp_blk8x8;
+       if(!img->residue_transform_flag)           // Residue Color Transform
+       {
+         for (j = 0; j < MB_BLOCK_SIZE; j++)
+           memcpy (&enc_picture->imgY[img->pix_y+j][img->pix_x],tr4x4.rec_mbY8x8[j], MB_BLOCK_SIZE * sizeof(imgpel));            
+       }
+       else
+       {
+         for (j = 0; j < MB_BLOCK_SIZE; j++)
+           memcpy(rec_resG[j], tr4x4.rec_resG_8x8[j], MB_BLOCK_SIZE * sizeof(int));
+       }
+     }
+     
+     if(img->residue_transform_flag)           // Residue Color Transform
+     {
+       // Residue Color Transform
+       for (j = 0; j < MB_BLOCK_SIZE; j++)
+       {
+         memcpy(mprRGB[0][j], tr4x4.mprRGB_8x8[0][j], MB_BLOCK_SIZE * sizeof(int));
+         memcpy(mprRGB[1][j], tr4x4.mprRGB_8x8[1][j], MB_BLOCK_SIZE * sizeof(int));
+         memcpy(mprRGB[2][j], tr4x4.mprRGB_8x8[2][j], MB_BLOCK_SIZE * sizeof(int));
+         memcpy(resTrans_R[j], tr4x4.resTrans_R_8x8[j], MB_BLOCK_SIZE * sizeof(int));
+         memcpy(resTrans_B[j], tr4x4.resTrans_B_8x8[j], MB_BLOCK_SIZE * sizeof(int));
+       }
+     }
+   }
+ }
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Sets motion vectors for a macroblock
+  *************************************************************************************
+  */
+ void SetMotionVectorsMB (Macroblock* currMB, int bframe)
+ {
+   int i, j, k, l, m, mode8, pdir8, ref, by, bx, bxr;
+   short ******all_mv  = img->all_mv;
+   short ******pred_mv = img->pred_mv;
+   int  bw_ref;
+   int jdiv, jmod;
+  
+   for (j = 0; j<4; j++)
+   {
+     jmod = j & 0x01;
+     jdiv = j >>   1;
+     by    = img->block_y+j;
+     for (i = 0; i<4; i++)
+     {
+       mode8 = currMB->b8mode[k=2*jdiv+(i>>1)];
+       l     = 2*jmod + (i & 0x01);
+       
+       bxr   = img->block_x+i;
+       bx    = img->block_x+i+4;
+       
+       pdir8 = currMB->b8pdir[k];
+       ref    = enc_picture->ref_idx[LIST_0][by][bxr];
+       bw_ref = enc_picture->ref_idx[LIST_1][by][bxr];
+       
+       if (currMB->bi_pred_me && (pdir8 == 2) && currMB->mb_type==1)
+       {
+         all_mv  = currMB->bi_pred_me == 1 ? img->bipred_mv1 : img->bipred_mv2;
+         ref = 0;
+         bw_ref = 0;
+       }
+       
+       if (!bframe)
+       {
+         if (pdir8>=0) //(mode8!=IBLOCK)&&(mode8!=I16MB))  // && ref != -1)
+         {
+           enc_picture->mv[LIST_0][by][bxr][0] = all_mv [j][i][LIST_0][ ref][mode8][0];
+           enc_picture->mv[LIST_0][by][bxr][1] = all_mv [j][i][LIST_0][ ref][mode8][1];
+         }
+         else
+         {
+           enc_picture->mv[LIST_0][by][bxr][0] = 0;
+           enc_picture->mv[LIST_0][by][bxr][1] = 0;
+         }
+       }
+       else
+       {
+         if (pdir8==-1) // intra
+         {
+           enc_picture->mv[LIST_0][by][bxr][0] = 0;
+           enc_picture->mv[LIST_0][by][bxr][1] = 0;
+           enc_picture->mv[LIST_1][by][bxr][0] = 0;
+           enc_picture->mv[LIST_1][by][bxr][1] = 0;
+         }
+         else if (pdir8==0) // list 0
+         {
+           enc_picture->mv[LIST_0][by][bxr][0] = all_mv [j][i][LIST_0][ ref][mode8][0];
+           enc_picture->mv[LIST_0][by][bxr][1] = all_mv [j][i][LIST_0][ ref][mode8][1];
+           enc_picture->mv[LIST_1][by][bxr][0] = 0;
+           enc_picture->mv[LIST_1][by][bxr][1] = 0;
+         }
+         else if (pdir8==1) // list 1
+         {
+           enc_picture->mv[LIST_0][by][bxr][0] = 0;
+           enc_picture->mv[LIST_0][by][bxr][1] = 0;          
+           enc_picture->mv[LIST_1][by][bxr][0] = all_mv [j][i][LIST_1][bw_ref][mode8][0];
+           enc_picture->mv[LIST_1][by][bxr][1] = all_mv [j][i][LIST_1][bw_ref][mode8][1];
+         }
+         else if (pdir8==2) // bipredictive
+         {
+           enc_picture->mv[LIST_0][by][bxr][0] = all_mv [j][i][LIST_0][ ref][mode8][0];
+           enc_picture->mv[LIST_0][by][bxr][1] = all_mv [j][i][LIST_0][ ref][mode8][1];                   
+           enc_picture->mv[LIST_1][by][bxr][0] = all_mv [j][i][LIST_1][bw_ref][mode8][0];
+           enc_picture->mv[LIST_1][by][bxr][1] = all_mv [j][i][LIST_1][bw_ref][mode8][1];
+         }
+         else
+         {
+           error("invalid direction mode", 255);
+         }
+       }
+     }
+   }
+   
+   // copy all the motion vectors into rdopt structure
+   // Can simplify this by copying the MV's of the best mode (TBD)
+   if(img->MbaffFrameFlag)
+   {
+     for(i = 0;i<4;i++)
+     {
+       for(j = 0;j<4;j++)
+       {
+         for (k = 0;k<2;k++)
+         {
+           for(l = 0;l<img->max_num_references;l++)
+           {
+             for(m = 0;m<9;m++)
+             {
+               rdopt->all_mv [j][i][k][l][m][0]  = all_mv [j][i][k][l][m][0];
+               rdopt->pred_mv[j][i][k][l][m][0]  = pred_mv[j][i][k][l][m][0];
+               
+               rdopt->all_mv [j][i][k][l][m][1]  = all_mv [j][i][k][l][m][1];
+               rdopt->pred_mv[j][i][k][l][m][1]  = pred_mv[j][i][k][l][m][1];
+             }
+           }
+         }
+       }
+     }
+   }
+ }
+ 
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    R-D Cost for a macroblock
+  *************************************************************************************
+  */
+ int RDCost_for_macroblocks (double   lambda,       // <-- lagrange multiplier
+                             int      mode,         // <-- modus (0-COPY/DIRECT, 1-16x16, 2-16x8, 3-8x16, 4-8x8(+), 5-Intra4x4, 6-Intra16x16)
+                             double*  min_rdcost,   // <-> minimum rate-distortion cost
+                             double*  min_rate,     // --> bitrate of mode which has minimum rate-distortion cost. 
+                             int i16mode )
+ {
+   int         i, j, k; //, k, ****ip4;
+   int         j1, j2;
+   int         rate = 0, coeff_rate = 0;
+   int64       distortion = 0;
+   double      rdcost;
+   Macroblock  *currMB   = &img->mb_data[img->current_mb_nr];
+   Macroblock  *prevMB   = img->current_mb_nr ? &img->mb_data[img->current_mb_nr-1] : NULL;
+   int         bframe    = (img->type==B_SLICE);
+   int         tmp_cc;
+   int         use_of_cc =  (img->type!=I_SLICE &&  input->symbol_mode!=CABAC);
+   int         cc_rate, dummy;
+   int         cr_cbp = 0, uv;
+   
+   //=====
+   //=====  SET REFERENCE FRAMES AND BLOCK MODES
+   //=====
+   SetModesAndRefframeForBlocks (mode);
+   
+   //=====
+   //=====  GET COEFFICIENTS, RECONSTRUCTIONS, CBP
+   //=====
+   if (bframe && mode==0)
+   {
+     int block_x=img->pix_x>>2;
+     int block_y=img->pix_y>>2;    
+     for (j = block_y;j< block_y + 4;j++)
+       for (i = block_x;i<block_x + 4;i++)
+         if (direct_pdir[j][i] < 0)
+           return 0;
+   }
+   
+   // Test MV limits for Skip Mode. This could be necessary for MBAFF case Frame MBs. 
+   if ((img->MbaffFrameFlag) && (!currMB->mb_field) && (img->type==P_SLICE) && (mode==0) )
+   {
+     if ( img->all_mv[0][0][0][0][0][0] < - 8192 
+       || img->all_mv[0][0][0][0][0][0] > 8191 
+       || img->all_mv[0][0][0][0][0][1] < LEVELMVLIMIT[img->LevelIndex][4] 
+       || img->all_mv[0][0][0][0][0][1] > LEVELMVLIMIT[img->LevelIndex][4])
+       return 0;
+   }
+   
+   if (img->AdaptiveRounding)
+   {
+     for (j = 0;j < MB_BLOCK_SIZE;j++)
+     {
+       memset(img->fadjust4x4[0][j], 0, MB_BLOCK_SIZE * sizeof(int));;
+       memset(img->fadjust8x8[0][j], 0, MB_BLOCK_SIZE * sizeof(int));
+       memset(img->fadjust4x4Cr[0][0][j], 0, MB_BLOCK_SIZE * sizeof(int));
+       memset(img->fadjust4x4Cr[0][1][j], 0, MB_BLOCK_SIZE * sizeof(int));
+     }
+   }
+   
+   if (mode<P8x8)
+   {
+     LumaResidualCoding ();
+     if(mode==0 && currMB->cbp==0 && currMB->luma_transform_size_8x8_flag==1) //for B_skip, luma_transform_size_8x8_flag=0 only
+       return 0;
+   }
+   else if (mode==P8x8)
+   {
+     SetCoeffAndReconstruction8x8 (currMB);
+   }
+   else if (mode==I4MB)
+   {
+     currMB->cbp = Mode_Decision_for_Intra4x4Macroblock (lambda, &dummy);
+     
+     // Residue Color Transform
+     if(img->residue_transform_flag)
+     {
+       for(i = 0; i<2; i++)
+       {
+         for(j = 0; j<4; j++)
+           for(k = 0; k<4; k++)
+             if(cbp_chroma_block[i][j][k])
+               cr_cbp = 2;
+       }
+       for(uv=0; uv<2; uv++)
+         cr_cbp = dct_chroma_DC(uv, cr_cbp);
+       
+       currMB->cbp += (cr_cbp<<4);
+     }
+   }
+   else if (mode==I16MB)
+   {
+     Intra16x16_Mode_Decision  (currMB, &i16mode);
+   }
+   else if(mode==I8MB)
+   {
+     currMB->cbp = Mode_Decision_for_new_Intra8x8Macroblock(lambda, &dummy);
+     
+     // Residue Color Transform
+     if(img->residue_transform_flag)
+     {
+       for(i = 0; i<2; i++)
+       {
+         for(j = 0; j<4; j++)
+           for(k = 0; k<4; k++)
+             if(cbp_chroma_block[i][j][k])
+               cr_cbp = 2;
+       }     
+       for(uv = 0; uv<2; uv++)
+         cr_cbp = dct_chroma_DC(uv, cr_cbp);
+       
+       currMB->cbp += (cr_cbp<<4);
+     }
+   }
+   else if(mode==IPCM)
+   {
+     for (j = 0; j < MB_BLOCK_SIZE; j++)
+     {
+       j1 = j + img->opix_y;
+       j2 = j + img->pix_y;
+       for (i=img->opix_x; i<img->opix_x+MB_BLOCK_SIZE; i++)        
+         enc_picture->imgY[j2][i] = imgY_org[j1][i];
+     }
+     if (img->yuv_format != YUV400)
+     {
+       // CHROMA
+       for (j = 0; j<img->mb_cr_size_y; j++)
+       {
+         j1 = j + img->opix_c_y;
+         j2 = j + img->pix_c_y;
+         for (i=img->opix_c_x; i<img->opix_c_x+img->mb_cr_size_x; i++)
+         {
+           enc_picture->imgUV[0][j2][i] = imgUV_org[0][j1][i];
+           enc_picture->imgUV[1][j2][i] = imgUV_org[1][j1][i];
+         }
+       }
+     }  
+   }
+ 
+   if (input->rdopt==3 && img->type!=B_SLICE)
+   {
+     // We need the reconstructed prediction residue for the simulated decoders.
+     compute_residue_mb (mode==I16MB?i16mode:-1);
+   }
+   
+   //Rate control
+   if (input->RCEnable)
+   {
+     if (mode == I16MB)
+       memcpy(pred,img->mprr_2[i16mode],MB_PIXELS * sizeof(imgpel));
+     else
+       memcpy(pred,img->mpr,MB_PIXELS * sizeof(imgpel));
+   }
+ 
+   img->i16offset = 0;
+   dummy = 0;
+   if ((!(img->residue_transform_flag && (mode==I4MB || mode==I16MB || mode==I8MB))) && img->yuv_format!=YUV400)
+     ChromaResidualCoding (&dummy);
+   if (mode==I16MB)     
+     img->i16offset = I16Offset  (currMB->cbp, i16mode);
+   
+   //=====
+   //=====   GET DISTORTION
+   //=====
+   // LUMA
+   if (input->rdopt==3 && img->type!=B_SLICE)
+   {
+     for (k = 0; k<input->NoOfDecoders ;k++)
+     {
+       decode_one_mb (k, currMB);
+       for (j = 0; j<MB_BLOCK_SIZE; j++)
+       {
+         for (i=img->opix_x; i<img->opix_x+MB_BLOCK_SIZE; i++)
+           distortion += img->quad [imgY_org[img->opix_y+j][i] - decs->decY[k][img->opix_y+j][i]];
+       }
+     }
+     distortion /= input->NoOfDecoders;
+   }
+   else
+   {
+     for (j = 0; j < MB_BLOCK_SIZE; j++)
+     {
+       j1 = j + img->opix_y;
+       j2 = j + img->pix_y;
+       for (i=img->opix_x; i<img->opix_x+MB_BLOCK_SIZE; i++)        
+         distortion += img->quad [imgY_org[j1][i] - enc_picture->imgY[j2][i]];
+     }
+   }
+   
+   if (img->yuv_format != YUV400)
+   {
+     // CHROMA
+     for (j = 0; j<img->mb_cr_size_y; j++)
+     {
+       j1 = j + img->opix_c_y;
+       j2 = j + img->pix_c_y;
+       for (i=img->opix_c_x; i<img->opix_c_x+img->mb_cr_size_x; i++)
+       {
+         distortion += img->quad [imgUV_org[0][j1][i] - enc_picture->imgUV[0][j2][i]];
+         distortion += img->quad [imgUV_org[1][j1][i] - enc_picture->imgUV[1][j2][i]];
+       }
+     }
+   }  
+   
+   //=====   S T O R E   C O D I N G   S T A T E   =====
+   //---------------------------------------------------
+   store_coding_state (cs_cm);
+   
+   //=====
+   //=====   GET RATE
+   //=====
+   //----- macroblock header -----
+   if (use_of_cc)
+   {
+     if (currMB->mb_type!=0 || (bframe && currMB->cbp!=0))
+     {
+       // cod counter and macroblock mode are written ==> do not consider code counter
+       tmp_cc = img->cod_counter;
+       rate   = writeMBLayer (1, &coeff_rate);
+       ue_linfo (tmp_cc, dummy, &cc_rate, &dummy);
+       rate  -= cc_rate;
+       img->cod_counter = tmp_cc;
+     }
+     else
+     {
+       // cod counter is just increased  ==> get additional rate
+       ue_linfo (img->cod_counter+1, dummy, &rate,    &dummy);
+       ue_linfo (img->cod_counter,   dummy, &cc_rate, &dummy);
+       rate -= cc_rate;
+     }
+   }
+   else
+   {
+     rate = writeMBLayer (1, &coeff_rate);
+   }
+   
+   //=====   R E S T O R E   C O D I N G   S T A T E   =====
+   //-------------------------------------------------------
+   reset_coding_state (cs_cm);
+ 
+   rdcost = (double)distortion + lambda * max(0.5,(double)rate);
+ 
+   if (rdcost >= *min_rdcost ||
+     ((img->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1 && distortion!=0))
+   {
+ #if FASTMODE
+     // Reordering RDCost comparison order of mode 0 and mode 1 in P_SLICE
+     // if RDcost of mode 0 and mode 1 is same, we choose best_mode is 0
+     // This might not always be good since mode 0 is more biased towards rate than quality.
+     if((img->type!=P_SLICE || mode != 0 || rdcost != *min_rdcost) || input->ProfileIDC>=FREXT_HP)
+ #endif
+       return 0;
+   }
+   
+   
+   if ((img->MbaffFrameFlag) && (mode ? 0: ((img->type == B_SLICE) ? !currMB->cbp:1)))  // AFF and current is skip
+   {
+     if (img->current_mb_nr & 0x01) //bottom
+     {
+       if (prevMB->mb_type ? 0:((img->type == B_SLICE) ? !prevMB->cbp:1)) //top is skip
+       {
+         if (!(field_flag_inference() == currMB->mb_field)) //skip only allowed when correct inference
+           return 0;
+       }
+     }
+   }
+ 
+   //=====   U P D A T E   M I N I M U M   C O S T   =====
+   //-----------------------------------------------------
+   *min_rdcost = rdcost;
+   *min_rate = lambda * (double)coeff_rate;
+   return 1;
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Store adaptive rounding parameters
+  *************************************************************************************
+  */
+ void store_adaptive_rounding_parameters (int mode, Macroblock *currMB)
+ {
+   int i,j;
+ 
+   for (j = 0; j < MB_BLOCK_SIZE; j++)
+   {
+     for (i = 0; i < MB_BLOCK_SIZE; i++)
+     {      
+       if ((mode == P8x8))
+       {
+         if (currMB->luma_transform_size_8x8_flag)
+         {
+           bestInterFAdjust8x8[j][i]=img->fadjust8x8[2][j][i]; 
+           if ((i >> 3) ==0 && (j >> 3) == 0)
+           {
+             bestInterFAdjust4x4Cr[0][j][i]=img->fadjust8x8Cr[0][0][j][i]; 
+             bestInterFAdjust4x4Cr[1][j][i]=img->fadjust8x8Cr[0][1][j][i]; 
+           }
+         }
+         else
+         {
+           bestInterFAdjust4x4[j][i]=img->fadjust4x4[3][j][i]; 
+           
+           if ((i >> 3) ==0 && (j >> 3) == 0)
+           {
+             bestInterFAdjust4x4Cr[0][j][i]=img->fadjust4x4Cr[2][0][j][i]; 
+             bestInterFAdjust4x4Cr[1][j][i]=img->fadjust4x4Cr[2][1][j][i]; 
+           }
+         }
+       }
+       else if ((mode != I4MB)&&(mode != I16MB)&&(mode != I8MB))
+       {
+         if (currMB->luma_transform_size_8x8_flag)
+           bestInterFAdjust8x8[j][i]=img->fadjust8x8[0][j][i]; 
+         else
+           bestInterFAdjust4x4[j][i]=img->fadjust4x4[0][j][i]; 
+         
+         if ((i >> 3) ==0 && (j >> 3) == 0)
+         {
+           bestInterFAdjust4x4Cr[0][j][i]=img->fadjust4x4Cr[0][0][j][i]; 
+           bestInterFAdjust4x4Cr[1][j][i]=img->fadjust4x4Cr[0][1][j][i]; 
+         }        
+       }
+       else if (mode != I8MB)
+       {
+         bestIntraFAdjust4x4[j][i]=img->fadjust4x4[1 + mode == I16MB][j][i]; 
+         
+         if ((i >> 3) ==0 && (j >> 3) == 0)
+         {
+           bestIntraFAdjust4x4Cr[0][j][i]=img->fadjust4x4Cr[1][0][j][i]; 
+           bestIntraFAdjust4x4Cr[1][j][i]=img->fadjust4x4Cr[1][1][j][i]; 
+         } 
+       }
+       else
+       {
+         bestIntraFAdjust8x8[j][i]=img->fadjust8x8[1][j][i]; 
+         
+         if ((i >> 3) ==0 && (j >> 3) == 0)
+         {
+           bestIntraFAdjust4x4Cr[0][j][i]=img->fadjust4x4Cr[0][1][j][i]; 
+           bestIntraFAdjust4x4Cr[1][j][i]=img->fadjust4x4Cr[1][1][j][i]; 
+         } 
+       }
+     }
+   }
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Store macroblock parameters
+  *************************************************************************************
+  */
+ void store_macroblock_parameters (int mode)
+ {
+   int  i, j, k, ****i4p, ***i3p;
+   Macroblock *currMB  = &img->mb_data[img->current_mb_nr];
+   int        bframe   = (img->type==B_SLICE);
+   
+   //--- store best mode ---
+   best_mode = mode;
+   best_c_imode = currMB->c_ipred_mode;
+   best_i16offset = img->i16offset;
+   
+   // If condition is not really necessary.
+   bi_pred_me = (mode == 1) ? currMB->bi_pred_me : 0;  
+   
+   memcpy(b8mode, currMB->b8mode, BLOCK_MULTIPLE * sizeof(int));
+   memcpy(b8pdir, currMB->b8pdir, BLOCK_MULTIPLE * sizeof(int));
+ 
+   // Residue Color Transform
+   //for (k = 0, j=img->block_y; j<img->block_y+BLOCK_MULTIPLE; j++, k+=BLOCK_MULTIPLE)
+   memcpy(b4_intra_pred_modes,currMB->intra_pred_modes, MB_BLOCK_PARTITIONS * sizeof(char));
+   memcpy(b8_intra_pred_modes8x8,currMB->intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));
+   for (j = 0 ; j<BLOCK_MULTIPLE; j++)
+   {
+     memcpy(&b4_ipredmode[j * BLOCK_MULTIPLE],&img->ipredmode[img->block_y + j][img->block_x],BLOCK_MULTIPLE * sizeof(char));
+     memcpy(b8_ipredmode8x8[j],&img->ipredmode8x8[img->block_y + j][img->block_x],BLOCK_MULTIPLE * sizeof(char));
+   }  
+   //--- reconstructed blocks ----
+   for (j = 0; j < MB_BLOCK_SIZE; j++)
+     memcpy(rec_mbY[j],&enc_picture->imgY[img->pix_y+j][img->pix_x], MB_BLOCK_SIZE * sizeof(imgpel));
+ 
+   if (img->AdaptiveRounding)
+     store_adaptive_rounding_parameters (mode, currMB);
+ 
+   if (img->yuv_format != YUV400)
+   {
+     for (j = 0; j<img->mb_cr_size_y; j++)
+     {
+       memcpy(rec_mbU[j],&enc_picture->imgUV[0][img->pix_c_y+j][img->pix_c_x], img->mb_cr_size_x * sizeof(imgpel));
+       memcpy(rec_mbV[j],&enc_picture->imgUV[1][img->pix_c_y+j][img->pix_c_x], img->mb_cr_size_x * sizeof(imgpel));
+     }
+   }
+   
+   
+   //--- store results of decoders ---
+   if (input->rdopt==3 && img->type!=B_SLICE)
+   {
+     for (k = 0; k<input->NoOfDecoders; k++)
+     {
+       for (j=img->pix_y; j<img->pix_y+16; j++)
+         for (i=img->pix_x; i<img->pix_x+16; i++)
+         {
+           // Keep the decoded values of each MB for updating the ref frames
+           decs->decY_best[k][j][i] = decs->decY[k][j][i];
+         }
+     }
+   }
+   
+   //--- coeff, cbp, kac ---
+   if (mode || bframe)
+   {
+     i4p=cofAC; cofAC=img->cofAC; img->cofAC=i4p;
+     i3p=cofDC; cofDC=img->cofDC; img->cofDC=i3p;
+     cbp     = currMB->cbp;
+     cbp_blk = currMB->cbp_blk;
+   }
+   else
+   {
+     cbp_blk = cbp = 0;
+   }
+   
+   //--- store transform size ---
+   luma_transform_size_8x8_flag = currMB->luma_transform_size_8x8_flag;
+   
+   
+   for (j = 0; j<4; j++)
+     memcpy(frefframe[j],&enc_picture->ref_idx[LIST_0][img->block_y+j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+ 
+   if (bframe)
+   {
+     for (j = 0; j<4; j++)
+       memcpy(brefframe[j],&enc_picture->ref_idx[LIST_1][img->block_y+j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+     }  
+ }
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Set stored macroblock parameters
+  *************************************************************************************
+  */
+ void set_stored_macroblock_parameters ()
+ {
+   int  i, j, k, ****i4p, ***i3p;
+   Macroblock  *currMB  = &img->mb_data[img->current_mb_nr];
+   int         mode     = best_mode;
+   int         bframe   = (img->type==B_SLICE);
+   char    **ipredmodes = img->ipredmode;
+   
+   imgpel        **imgY  = enc_picture->imgY;
+   imgpel       ***imgUV = enc_picture->imgUV;
+   int        block_x, block_y;  
+   short   *cur_mv;
+   
+   //===== reconstruction values =====
+   for (j = 0; j < MB_BLOCK_SIZE; j++)
+   {
+     memcpy(&imgY[img->pix_y+j][img->pix_x],rec_mbY[j], MB_BLOCK_SIZE * sizeof(imgpel));
+     if(img->MbaffFrameFlag)
+       memcpy(rdopt->rec_mbY[j],rec_mbY[j], MB_BLOCK_SIZE * sizeof(imgpel));
+   } 
+   
+   if (img->AdaptiveRounding)
+   {
+     update_offset_params(mode,luma_transform_size_8x8_flag);
+   }
+   
+   if (img->yuv_format != YUV400)
+   {
+     for (j = 0; j<img->mb_cr_size_y; j++)
+     {
+       memcpy(&imgUV[0][img->pix_c_y+j][img->pix_c_x],rec_mbU[j], img->mb_cr_size_x * sizeof(imgpel));
+       memcpy(&imgUV[1][img->pix_c_y+j][img->pix_c_x],rec_mbV[j], img->mb_cr_size_x * sizeof(imgpel));
+       if(img->MbaffFrameFlag)
+       {
+         memcpy(rdopt->rec_mbU[j],rec_mbU[j], img->mb_cr_size_x * sizeof(imgpel));
+         memcpy(rdopt->rec_mbV[j],rec_mbV[j], img->mb_cr_size_x * sizeof(imgpel));
+       }
+     }
+   }
+   
+   //===== coefficients and cbp =====
+   i4p=cofAC; cofAC=img->cofAC; img->cofAC=i4p;
+   i3p=cofDC; cofDC=img->cofDC; img->cofDC=i3p;
+   currMB->cbp      = cbp;
+   currMB->cbp_blk = cbp_blk;
+   //==== macroblock type ====
+   currMB->mb_type = mode;
+   
+   if(img->MbaffFrameFlag)
+   {
+     rdopt->mode = mode;
+     rdopt->i16offset = img->i16offset;
+     rdopt->cbp = cbp;
+     rdopt->cbp_blk = cbp_blk;
+     rdopt->mb_type  = mode;
+     
+     rdopt->prev_qp=currMB->prev_qp;
+     rdopt->prev_delta_qp=currMB->prev_delta_qp;
+     rdopt->delta_qp = currMB->delta_qp;
+     rdopt->qp=currMB->qp;
+     rdopt->prev_cbp=currMB->prev_cbp;
+     
+     for(i = 0;i<4+img->num_blk8x8_uv;i++)
+     {
+       for(j = 0;j<4;j++)
+         for(k = 0;k<2;k++)
+           memcpy(rdopt->cofAC[i][j][k], img->cofAC[i][j][k], 65 * sizeof(int));
+     }     
+     for(i = 0;i<3;i++)
+       for(k = 0;k<2;k++)
+         memcpy(rdopt->cofDC[i][k], img->cofDC[i][k], 18 * sizeof(int));
+   }
+   
+ 
+   memcpy(currMB->b8mode,b8mode, BLOCK_MULTIPLE * sizeof(int));
+   memcpy(currMB->b8pdir,b8pdir, BLOCK_MULTIPLE * sizeof(int));
+   if(img->MbaffFrameFlag)
+   {
+     memcpy(rdopt->b8mode,b8mode, BLOCK_MULTIPLE * sizeof(int));
+     memcpy(rdopt->b8pdir,b8pdir, BLOCK_MULTIPLE * sizeof(int));    
+   }
+     
+   currMB->bi_pred_me = currMB->mb_type == 1 ? bi_pred_me : 0;  
+   
+   
+   //if P8x8 mode and transform size 4x4 choosen, restore motion vector data for this transform size 
+   if (mode == P8x8 && !luma_transform_size_8x8_flag && input->Transform8x8Mode)
+     RestoreMV8x8(1);
+   
+   //==== transform size flag ====
+   if (((currMB->cbp & 15) == 0) && !(IS_OLDINTRA(currMB) || currMB->mb_type == I8MB))
+     currMB->luma_transform_size_8x8_flag = 0;
+   else
+     currMB->luma_transform_size_8x8_flag = luma_transform_size_8x8_flag;
+   
+   rdopt->luma_transform_size_8x8_flag  = currMB->luma_transform_size_8x8_flag;
+   
+   if (input->rdopt==3 && img->type!=B_SLICE)
+   {
+     //! save the MB Mode of every macroblock
+     decs->dec_mb_mode[img->mb_x][img->mb_y] = mode;
+   }
+   
+   //==== reference frames =====
+   for (j = 0; j < 4; j++)
+   {
+     block_y = img->block_y + j;
+     for (i = 0; i < 4; i++)
+     {
+       block_x = img->block_x + i;
+       k = 2*(j >> 1)+(i >> 1);
+       
+       // backward prediction or intra
+       if ((currMB->b8pdir[k] == 1) || IS_INTRA(currMB))
+       {
+         enc_picture->ref_idx    [LIST_0][block_y][block_x]    = -1;
+         enc_picture->ref_pic_id [LIST_0][block_y][block_x]    = -1;          
+         enc_picture->mv         [LIST_0][block_y][block_x][0] = 0;
+         enc_picture->mv         [LIST_0][block_y][block_x][1] = 0;
+         if(img->MbaffFrameFlag)
+           rdopt->refar[LIST_0][j][i] = -1;
+       }
+       else
+       {
+         if (currMB->bi_pred_me && (currMB->b8pdir[k] == 2) && currMB->mb_type==1)
+         {
+           cur_mv = currMB->bi_pred_me == 1 
+             ? img->bipred_mv1[j][i][LIST_0][0][currMB->b8mode[k]] 
+             : img->bipred_mv2[j][i][LIST_0][0][currMB->b8mode[k]];
+           
+           enc_picture->ref_idx    [LIST_0][block_y][block_x] = 0;                         
+           enc_picture->ref_pic_id [LIST_0][block_y][block_x] = enc_picture->ref_pic_num[LIST_0 + currMB->list_offset][0];
+           enc_picture->mv         [LIST_0][block_y][block_x][0] = cur_mv[0];
+           enc_picture->mv         [LIST_0][block_y][block_x][1] = cur_mv[1];
+           if(img->MbaffFrameFlag)
+             rdopt->refar[LIST_0][j][i] = 0;        
+         }
+         else
+         {
+           cur_mv = img->all_mv[j][i][LIST_0][(short)frefframe[j][i]][currMB->b8mode[k]];
+           
+           enc_picture->ref_idx    [LIST_0][block_y][block_x]    = frefframe[j][i];
+           enc_picture->ref_pic_id [LIST_0][block_y][block_x]    = enc_picture->ref_pic_num[LIST_0 + currMB->list_offset][(short)frefframe[j][i]];
+           enc_picture->mv         [LIST_0][block_y][block_x][0] = cur_mv[0];
+           enc_picture->mv         [LIST_0][block_y][block_x][1] = cur_mv[1];
+           if(img->MbaffFrameFlag)
+             rdopt->refar[LIST_0][j][i] = frefframe[j][i];
+         }
+       }
+       
+       // forward prediction or intra
+       if ((currMB->b8pdir[k] == 0) || IS_INTRA(currMB))
+       {
+         enc_picture->ref_idx    [LIST_1][block_y][block_x]    = -1;
+         enc_picture->ref_pic_id [LIST_1][block_y][block_x]    = -1;
+         enc_picture->mv         [LIST_1][block_y][block_x][0] = 0;
+         enc_picture->mv         [LIST_1][block_y][block_x][1] = 0;
+         if(img->MbaffFrameFlag)
+           rdopt->refar[LIST_1][j][i] = -1;
+       }
+     }
+   }
+   
+   if (bframe)
+   {
+     for (j=0; j<4; j++)
+     {
+       block_y = img->block_y + j;
+       for (i=0; i<4; i++)
+       {          
+         block_x = img->block_x + i;
+         k = 2*(j >> 1)+(i >> 1);
+ 
+         // forward
+         if (IS_INTRA(currMB)||(currMB->b8pdir[k] == 0))
+         {
+           enc_picture->ref_idx    [LIST_1][block_y][block_x]    = -1;
+           enc_picture->ref_pic_id [LIST_1][block_y][block_x]    = -1;
+           enc_picture->mv         [LIST_1][block_y][block_x][0] = 0;
+           enc_picture->mv         [LIST_1][block_y][block_x][1] = 0;
+           if(img->MbaffFrameFlag)
+             rdopt->refar[LIST_1][j][i] = -1;
+         }
+         else
+         {
+           if (currMB->bi_pred_me && (currMB->b8pdir[k] == 2) && currMB->mb_type==1)
+           {
+             cur_mv = currMB->bi_pred_me == 1 
+               ? img->bipred_mv1[j][i][LIST_1][0][currMB->b8mode[k]] 
+               : img->bipred_mv2[j][i][LIST_1][0][currMB->b8mode[k]];
+             
+             enc_picture->ref_idx    [LIST_1][block_y][block_x] = 0; 
+             enc_picture->ref_pic_id [LIST_1][block_y][block_x] = enc_picture->ref_pic_num[LIST_1 + currMB->list_offset][0];
+             enc_picture->mv         [LIST_1][block_y][block_x][0] = cur_mv[0];
+             enc_picture->mv         [LIST_1][block_y][block_x][1] = cur_mv[1];        
+             if(img->MbaffFrameFlag)
+               rdopt->refar[LIST_1][j][i] = 0;        
+           }
+           else
+           {
+             cur_mv = img->all_mv[j][i][LIST_1][(short)brefframe[j][i]][currMB->b8mode[k]];
+             
+             enc_picture->ref_idx    [LIST_1][block_y][block_x] = brefframe[j][i];
+             enc_picture->ref_pic_id [LIST_1][block_y][block_x] = enc_picture->ref_pic_num[LIST_1 + currMB->list_offset][(short)brefframe[j][i]];
+             enc_picture->mv         [LIST_1][block_y][block_x][0] = cur_mv[0];
+             enc_picture->mv         [LIST_1][block_y][block_x][1] = cur_mv[1];
+             if(img->MbaffFrameFlag)
+               rdopt->refar[LIST_1][j][i] = brefframe[j][i];
+           }
+         }
+       }
+     }
+   }
+   
+   //==== intra prediction modes ====
+   currMB->c_ipred_mode = best_c_imode;
+   img->i16offset = best_i16offset;
+   
+   if(currMB->mb_type == I8MB)
+   {
+     memcpy(currMB->intra_pred_modes,currMB->intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));
+     for(j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+       memcpy(&img->ipredmode[j][img->block_x],&img->ipredmode8x8[j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+   }
+   else if (mode!=I4MB && mode!=I8MB)
+   {
+     memset(currMB->intra_pred_modes,DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
+     for(j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
+       memset(&img->ipredmode[j][img->block_x], DC_PRED, BLOCK_MULTIPLE * sizeof(char));
+   }
+   // Residue Color Transform
+   else if (mode == I4MB)
+   {
+     memcpy(currMB->intra_pred_modes,b4_intra_pred_modes, MB_BLOCK_PARTITIONS * sizeof(char));
+     for(j = 0; j < BLOCK_MULTIPLE; j++)
+       memcpy(&img->ipredmode[img->block_y + j][img->block_x],&b4_ipredmode[BLOCK_MULTIPLE * j], BLOCK_MULTIPLE * sizeof(char));
+   }
+   
+   if(currMB->mb_type == I8MB)
+   {
+     memcpy(currMB->intra_pred_modes,b8_intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));            
+     for(j = 0; j < BLOCK_MULTIPLE; j++)
+       memcpy(&img->ipredmode[img->block_y + j][img->block_x],&b8_ipredmode8x8[j], BLOCK_MULTIPLE * sizeof(char));
+   }
+   
+   
+   if(img->MbaffFrameFlag)
+   {
+     rdopt->c_ipred_mode = currMB->c_ipred_mode;
+     rdopt->i16offset = img->i16offset;  
+     memcpy(rdopt->intra_pred_modes,currMB->intra_pred_modes, MB_BLOCK_PARTITIONS * sizeof(char));
+     for(j = img->block_y; j < img->block_y +BLOCK_MULTIPLE; j++)
+       memcpy(&rdopt->ipredmode[j][img->block_x],&ipredmodes[j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+   }
+   
+   //==== motion vectors =====
+   SetMotionVectorsMB (currMB, bframe);
+ }
+ 
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Set reference frames and motion vectors
+  *************************************************************************************
+  */
+ void SetRefAndMotionVectors (int block, int mode, int pdir, int fwref, int bwref)
+ {
+   int     i, j=0;
+   int     bslice  = (img->type==B_SLICE);
+   int     pmode   = (mode==1||mode==2||mode==3?mode:4);
+   int     j0      = ((block >> 1)<<1);
+   int     i0      = ((block & 0x01)<<1);
+   int     j1      = j0 + (input->part_size[pmode][1]);
+   int     i1      = i0 + (input->part_size[pmode][0]);
+   int     block_x, block_y;
+   short   *cur_mv;
+   Macroblock  *currMB  = &img->mb_data[img->current_mb_nr];
+   
+   if (pdir<0)
+   {
+     for (j = img->block_y + j0; j < img->block_y + j1; j++)
+     {
+       for (i=img->block_x + i0; i<img->block_x +i1; i++)
+       {
+         enc_picture->ref_pic_id[LIST_0][j][i] = -1;
+         enc_picture->ref_pic_id[LIST_1][j][i] = -1;
+       }
+       memset(&enc_picture->ref_idx[LIST_0][j][img->block_x + i0], -1, (input->part_size[pmode][0]) * sizeof(char));
+       memset(&enc_picture->ref_idx[LIST_1][j][img->block_x + i0], -1, (input->part_size[pmode][0]) * sizeof(char));
+       memset(enc_picture->mv[LIST_0][j][img->block_x + i0], 0, 2*(input->part_size[pmode][0]) * sizeof(short));
+       memset(enc_picture->mv[LIST_1][j][img->block_x + i0], 0, 2*(input->part_size[pmode][0]) * sizeof(short));
+     }
+     return;
+   }
+   
+   if (!bslice)
+   {
+     for (j=j0; j<j1; j++)
+     {
+       block_y = img->block_y + j;
+       memset(&enc_picture->ref_idx   [LIST_0][block_y][img->block_x + i0], fwref, (input->part_size[pmode][0]) * sizeof(char));
+       for (i=i0; i<i1; i++)
+       {
+         block_x = img->block_x + i;        
+         cur_mv = img->all_mv[j][i][LIST_0][fwref][mode];                
+         enc_picture->mv        [LIST_0][block_y][block_x][0] = cur_mv[0];
+         enc_picture->mv        [LIST_0][block_y][block_x][1] = cur_mv[1];
+         enc_picture->ref_pic_id[LIST_0][block_y][block_x] = enc_picture->ref_pic_num[LIST_0+currMB->list_offset][fwref];
+       }
+     }
+     return;
+   }
+   else
+   {
+     for (j=j0; j<j1; j++)
+     {
+       block_y = img->block_y + j;
+       for (i=i0; i<i1; i++)
+       {
+         block_x = img->block_x + i;
+         if (mode==0)
+         {
+           pdir  = direct_pdir[block_y][block_x];
+           fwref = direct_ref_idx[LIST_0][block_y][block_x];
+           bwref = direct_ref_idx[LIST_1][block_y][block_x];
+         }
+         
+         if ((pdir==0 || pdir==2))
+         {
+           if (currMB->bi_pred_me && (pdir == 2) && mode == 1)
+           {
+             cur_mv = currMB->bi_pred_me == 1 
+               ? img->bipred_mv1[j][i][LIST_0][0][mode]
+               : img->bipred_mv2[j][i][LIST_0][0][mode];
+             
+             enc_picture->mv        [LIST_0][block_y][block_x][0] = cur_mv[0];
+             enc_picture->mv        [LIST_0][block_y][block_x][1] = cur_mv[1];
+             enc_picture->ref_idx   [LIST_0][block_y][block_x]    = 0;            
+             enc_picture->ref_pic_id[LIST_0][block_y][block_x]    = enc_picture->ref_pic_num[LIST_0+currMB->list_offset][0];
+           }
+           else
+           {
+             cur_mv = img->all_mv[j][i][LIST_0][fwref][mode];
+             
+             enc_picture->mv        [LIST_0][block_y][block_x][0] = cur_mv[0];
+             enc_picture->mv        [LIST_0][block_y][block_x][1] = cur_mv[1];
+             enc_picture->ref_idx   [LIST_0][block_y][block_x] = fwref;
+             enc_picture->ref_pic_id[LIST_0][block_y][block_x] = 
+             enc_picture->ref_pic_num[LIST_0+currMB->list_offset][(short)enc_picture->ref_idx[LIST_0][block_y][block_x]];
+           }
+         }
+         else
+         {
+           enc_picture->mv        [LIST_0][block_y][block_x][0] = 0;
+           enc_picture->mv        [LIST_0][block_y][block_x][1] = 0;
+           enc_picture->ref_idx   [LIST_0][block_y][block_x]    = -1;
+           enc_picture->ref_pic_id[LIST_0][block_y][block_x]    = -1;
+         }
+         
+         if ((pdir==1 || pdir==2))
+         {
+           if (currMB->bi_pred_me && (pdir == 2) && mode == 1)
+           {
+             cur_mv = currMB->bi_pred_me == 1 
+               ? img->bipred_mv1[j][i][LIST_1][0][mode]
+               : img->bipred_mv2[j][i][LIST_1][0][mode];
+             
+             enc_picture->mv        [LIST_1][block_y][block_x][0] = cur_mv[0];
+             enc_picture->mv        [LIST_1][block_y][block_x][1] = cur_mv[1];
+             enc_picture->ref_idx   [LIST_1][block_y][block_x]    = 0;            
+             enc_picture->ref_pic_id[LIST_1][block_y][block_x]    = enc_picture->ref_pic_num[LIST_1+currMB->list_offset][0];
+           }
+           else
+           {
+             cur_mv = img->all_mv[j][i][LIST_1][bwref][mode];
+             
+             enc_picture->mv        [LIST_1][block_y][block_x][0] = cur_mv[0];
+             enc_picture->mv        [LIST_1][block_y][block_x][1] = cur_mv[1];
+             enc_picture->ref_idx   [LIST_1][block_y][block_x] = bwref;
+             enc_picture->ref_pic_id[LIST_1][block_y][block_x] = 
+             enc_picture->ref_pic_num[LIST_1+currMB->list_offset][(short)enc_picture->ref_idx[LIST_1][block_y][block_x]];
+           }
+         }
+         else
+         {
+           enc_picture->mv        [LIST_1][block_y][block_x][0] = 0;
+           enc_picture->mv        [LIST_1][block_y][block_x][1] = 0;
+           enc_picture->ref_idx   [LIST_1][block_y][block_x]    = -1;
+           enc_picture->ref_pic_id[LIST_1][block_y][block_x]    = -1;
+         }
+       }
+     }
+   }
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    skip macroblock field inference
+  * \return
+  *    inferred field flag
+  *************************************************************************************
+  */
+ int field_flag_inference()
+ {
+   int mb_field;
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   
+   if (currMB->mbAvailA)
+   {
+     mb_field = img->mb_data[currMB->mbAddrA].mb_field;
+   }
+   else
+   {
+     // check top macroblock pair
+     if (currMB->mbAvailB)
+       mb_field = img->mb_data[currMB->mbAddrB].mb_field;
+     else
+       mb_field = 0;
+   }
+   
+   return mb_field;
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Store motion vectors for 8x8 partition
+  *************************************************************************************
+  */
+ 
+ void StoreMVBlock8x8(int dir, int block8x8, int mode, int ref, int bw_ref, int pdir8, int bframe)
+ {
+   int i, j, i0, j0, ii, jj;
+   short ******all_mv  = img->all_mv;
+   short ******pred_mv = img->pred_mv;
+   
+   
+   i0 = (block8x8 & 0x01) << 1;
+   j0 = (block8x8 >> 1) << 1;
+   ii = i0+2;
+   jj = j0+2;
+   
+   if (!bframe)
+   {
+     if (pdir8>=0) //(mode8!=IBLOCK)&&(mode8!=I16MB))  // && ref != -1)
+     {
+       for (j=j0; j<jj; j++)
+         for (i=i0; i<ii; i++)
+         {
+           all_mv8x8 [dir][LIST_0][j][i][0] = all_mv [j][i][LIST_0][ref][4][0];
+           all_mv8x8 [dir][LIST_0][j][i][1] = all_mv [j][i][LIST_0][ref][4][1];
+           pred_mv8x8[dir][LIST_0][j][i][0] = pred_mv[j][i][LIST_0][ref][4][0];
+           pred_mv8x8[dir][LIST_0][j][i][1] = pred_mv[j][i][LIST_0][ref][4][1];
+         }
+     }
+   }
+   else
+   {
+     if (pdir8==0) // forward
+     {
+       for (j=j0; j<jj; j++)
+         for (i=i0; i<ii; i++)
+         {
+           all_mv8x8 [dir][LIST_0][j][i][0] = all_mv [j][i][LIST_0][ref][mode][0];
+           all_mv8x8 [dir][LIST_0][j][i][1] = all_mv [j][i][LIST_0][ref][mode][1];
+           pred_mv8x8[dir][LIST_0][j][i][0] = pred_mv[j][i][LIST_0][ref][mode][0];
+           pred_mv8x8[dir][LIST_0][j][i][1] = pred_mv[j][i][LIST_0][ref][mode][1];
+         }
+     }
+     else if (pdir8==1) // backward
+     {
+       for (j=j0; j<jj; j++)
+         for (i=i0; i<ii; i++)
+         {
+           all_mv8x8 [dir][LIST_1][j][i][0] = all_mv [j][i][LIST_1][bw_ref][mode][0];
+           all_mv8x8 [dir][LIST_1][j][i][1] = all_mv [j][i][LIST_1][bw_ref][mode][1];
+           pred_mv8x8[dir][LIST_1][j][i][0] = pred_mv[j][i][LIST_1][bw_ref][mode][0];
+           pred_mv8x8[dir][LIST_1][j][i][1] = pred_mv[j][i][LIST_1][bw_ref][mode][1];
+         }
+     }
+     else if (pdir8==2) // bidir
+     {
+       for (j=j0; j<jj; j++)
+         for (i=i0; i<ii; i++)
+         {
+           all_mv8x8 [dir][LIST_0][j][i][0] = all_mv [j][i][LIST_0][ref][mode][0];
+           all_mv8x8 [dir][LIST_0][j][i][1] = all_mv [j][i][LIST_0][ref][mode][1];
+           pred_mv8x8[dir][LIST_0][j][i][0] = pred_mv[j][i][LIST_0][ref][mode][0];
+           pred_mv8x8[dir][LIST_0][j][i][1] = pred_mv[j][i][LIST_0][ref][mode][1];
+           
+           all_mv8x8 [dir][LIST_1][j][i][0] = all_mv [j][i][LIST_1][bw_ref][mode][0];
+           all_mv8x8 [dir][LIST_1][j][i][1] = all_mv [j][i][LIST_1][bw_ref][mode][1];
+           pred_mv8x8[dir][LIST_1][j][i][0] = pred_mv[j][i][LIST_1][bw_ref][mode][0];
+           pred_mv8x8[dir][LIST_1][j][i][1] = pred_mv[j][i][LIST_1][bw_ref][mode][1];
+         }
+     }
+     else
+     {
+       error("invalid direction mode", 255);
+     }
+   }
+ }
+ 
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Store motion vectors of 8x8 partitions of one macroblock
+  *************************************************************************************
+  */
+ void StoreMV8x8(int dir)
+ {
+   int block8x8;
+   
+   int bframe = (img->type == B_SLICE);
+   
+   for (block8x8=0; block8x8<4; block8x8++)
+     StoreMVBlock8x8(dir, block8x8, tr8x8.part8x8mode[block8x8], tr8x8.part8x8fwref[block8x8], 
+     tr8x8.part8x8bwref[block8x8], tr8x8.part8x8pdir[block8x8], bframe);
+ }
+ 
+ /*!
+ *************************************************************************************
+ * \brief
+ *    Restore motion vectors for 8x8 partition
+ *************************************************************************************
+ */
+ void RestoreMVBlock8x8(int dir, int block8x8, RD_8x8DATA tr, int bframe)
+ {
+   int i, j, i0, j0, ii, jj;
+   short ******all_mv  = img->all_mv;
+   short ******pred_mv = img->pred_mv;
+   short pdir8  = tr.part8x8pdir [block8x8];
+   short mode   = tr.part8x8mode [block8x8];
+   short ref    = tr.part8x8fwref[block8x8];
+   short bw_ref = tr.part8x8bwref[block8x8];
+   
+   i0 = (block8x8 & 0x01) << 1;
+   j0 = (block8x8 >> 1) << 1;
+   ii = i0+2;
+   jj = j0+2;
+   
+   if (!bframe)
+   {
+     if (pdir8>=0) //(mode8!=IBLOCK)&&(mode8!=I16MB))  // && ref != -1)
+     {
+       for (j=j0; j<jj; j++)
+         for (i=i0; i<ii; i++)
+         {
+           all_mv [j][i][LIST_0][ref][4][0] = all_mv8x8 [dir][LIST_0][j][i][0] ;
+           all_mv [j][i][LIST_0][ref][4][1] = all_mv8x8 [dir][LIST_0][j][i][1] ;
+           pred_mv[j][i][LIST_0][ref][4][0] = pred_mv8x8[dir][LIST_0][j][i][0];
+           pred_mv[j][i][LIST_0][ref][4][1] = pred_mv8x8[dir][LIST_0][j][i][1];
+         }
+     }
+   }
+   else
+   {
+     if (pdir8==0) // forward
+     {
+       for (j=j0; j<jj; j++)
+         for (i=i0; i<ii; i++)
+         {
+           all_mv [j][i][LIST_0][ref][mode][0] = all_mv8x8 [dir][LIST_0][j][i][0] ;
+           all_mv [j][i][LIST_0][ref][mode][1] = all_mv8x8 [dir][LIST_0][j][i][1] ;
+           pred_mv[j][i][LIST_0][ref][mode][0] = pred_mv8x8[dir][LIST_0][j][i][0];
+           pred_mv[j][i][LIST_0][ref][mode][1] = pred_mv8x8[dir][LIST_0][j][i][1];
+         }
+     }
+     else if (pdir8==1) // backward
+     {
+       for (j=j0; j<jj; j++)
+         for (i=i0; i<ii; i++)
+         {
+           all_mv [j][i][LIST_1][bw_ref][mode][0] = all_mv8x8 [dir][LIST_1][j][i][0] ;
+           all_mv [j][i][LIST_1][bw_ref][mode][1] = all_mv8x8 [dir][LIST_1][j][i][1] ;
+           pred_mv[j][i][LIST_1][bw_ref][mode][0] = pred_mv8x8[dir][LIST_1][j][i][0];
+           pred_mv[j][i][LIST_1][bw_ref][mode][1] = pred_mv8x8[dir][LIST_1][j][i][1];
+         }
+     }
+     else if (pdir8==2) // bidir
+     {
+       for (j=j0; j<jj; j++)
+         for (i=i0; i<ii; i++)
+         {
+           all_mv [j][i][LIST_0][ref][mode][0] = all_mv8x8 [dir][LIST_0][j][i][0] ;
+           all_mv [j][i][LIST_0][ref][mode][1] = all_mv8x8 [dir][LIST_0][j][i][1] ;
+           pred_mv[j][i][LIST_0][ref][mode][0] = pred_mv8x8[dir][LIST_0][j][i][0];
+           pred_mv[j][i][LIST_0][ref][mode][1] = pred_mv8x8[dir][LIST_0][j][i][1];
+           
+           all_mv [j][i][LIST_1][bw_ref][mode][0] = all_mv8x8 [dir][LIST_1][j][i][0] ;
+           all_mv [j][i][LIST_1][bw_ref][mode][1] = all_mv8x8 [dir][LIST_1][j][i][1] ;
+           pred_mv[j][i][LIST_1][bw_ref][mode][0] = pred_mv8x8[dir][LIST_1][j][i][0];
+           pred_mv[j][i][LIST_1][bw_ref][mode][1] = pred_mv8x8[dir][LIST_1][j][i][1];
+         }
+     }
+     else
+     {
+       error("invalid direction mode", 255);
+     }
+   }
+ }
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Restore motion vectors of 8x8 partitions of one macroblock
+  *************************************************************************************
+  */
+ void RestoreMV8x8(int dir)
+ {
+   int block8x8;
+   
+   int bframe = (img->type == B_SLICE);
+   
+   for (block8x8=0; block8x8<4; block8x8++)
+     RestoreMVBlock8x8(dir, block8x8, tr8x8, bframe);   
+ }
+ 
+ 
+ /*!
+  *************************************************************************************
+  * \brief
+  *    Store predictors for 8x8 partition
+  *************************************************************************************
+  */
+ 
+ void StoreNewMotionVectorsBlock8x8(int dir, int block8x8, int mode, int fw_ref, int bw_ref, int pdir8, int bframe)
+ {
+   int i, j, i0, j0, ii, jj;
+   short ******all_mv  = img->all_mv;
+   short ******pred_mv = img->pred_mv;
+   
+   
+   i0 = (block8x8 & 0x01) << 1;
+   j0 = (block8x8 >> 1) << 1;
+   ii = i0+2;
+   jj = j0+2;
+   
+   if (pdir8<0)
+   {
+     for (j=j0; j<jj; j++)
+     {
+       memset(&all_mv8x8[dir][LIST_0][j][i0], 0, 2 * 2 * sizeof(short));
+       memset(&all_mv8x8[dir][LIST_0][j][i0], 0, 2 * 2 * sizeof(short));
+     }
+     return;
+   }
+   
+   if (!bframe)
+   {
+     for (j=j0; j<jj; j++)
+     {
+       for (i=i0; i<ii; i++)
+       {
+         all_mv8x8 [dir][LIST_0][j][i][0] = all_mv [j][i][LIST_0][fw_ref][4][0];
+         all_mv8x8 [dir][LIST_0][j][i][1] = all_mv [j][i][LIST_0][fw_ref][4][1];
+         pred_mv8x8[dir][LIST_0][j][i][0] = pred_mv[j][i][LIST_0][fw_ref][4][0];
+         pred_mv8x8[dir][LIST_0][j][i][1] = pred_mv[j][i][LIST_0][fw_ref][4][1];
+       }
+     }
+     return;
+   }
+   else
+   {
+     if ((pdir8==0 || pdir8==2))
+     {
+       for (j=j0; j<jj; j++)
+         for (i=i0; i<ii; i++)
+         {
+           all_mv8x8 [dir][LIST_0][j][i][0] = all_mv [j][i][LIST_0][fw_ref][mode][0];
+           all_mv8x8 [dir][LIST_0][j][i][1] = all_mv [j][i][LIST_0][fw_ref][mode][1];
+           pred_mv8x8[dir][LIST_0][j][i][0] = pred_mv[j][i][LIST_0][fw_ref][mode][0];
+           pred_mv8x8[dir][LIST_0][j][i][1] = pred_mv[j][i][LIST_0][fw_ref][mode][1];
+         }
+     }
+     else
+     {
+       for (j=j0; j<jj; j++)
+         memset(&all_mv8x8[dir][LIST_0][j][i0], 0, 2 * 2 * sizeof(short));
+     }
+     
+     if ((pdir8==1 || pdir8==2))
+     {
+       for (j=j0; j<jj; j++)
+         for (i=i0; i<ii; i++)
+         {
+           all_mv8x8 [dir][LIST_1][j][i][0] = all_mv [j][i][LIST_1][bw_ref][mode][0];
+           all_mv8x8 [dir][LIST_1][j][i][1] = all_mv [j][i][LIST_1][bw_ref][mode][1];
+           pred_mv8x8[dir][LIST_1][j][i][0] = pred_mv[j][i][LIST_1][bw_ref][mode][0];
+           pred_mv8x8[dir][LIST_1][j][i][1] = pred_mv[j][i][LIST_1][bw_ref][mode][1];
+         }
+     }
+     else
+     {
+       for (j=j0; j<jj; j++)
+         memset(&all_mv8x8[dir][LIST_1][j][i0], 0, 2 * 2 * sizeof(short));
+     }
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Makes the decision if 8x8 tranform will be used (for RD-off)
+  ************************************************************************
+  */
+ int GetBestTransformP8x8()
+ {
+   int    block_y, block_x, pic_pix_y, pic_pix_x, i, j, k;
+   int    mb_y, mb_x, block8x8;
+   int    cost8x8=0, cost4x4=0;
+   int    diff4x4[64], *diff_ptr;
+   int    diff8x8[64];
+   
+   if(input->Transform8x8Mode==2) //always allow 8x8 transform
+     return 1;
+   
+   for (block8x8=0; block8x8<4; block8x8++)
+   {
+     mb_y = (block8x8 >>   1) << 3;
+     mb_x = (block8x8 & 0x01) << 3;
+     //===== loop over 4x4 blocks =====
+     k=0;
+     for (block_y=mb_y; block_y<mb_y+8; block_y+=4)
+     {
+       pic_pix_y = img->opix_y + block_y;
+       
+       //get cost for transform size 4x4
+       for (block_x=mb_x; block_x<mb_x+8; block_x+=4)
+       {
+         pic_pix_x = img->opix_x + block_x;
+         
+         //===== get displaced frame difference ======
+         diff_ptr=&diff4x4[k];
+         for (j=0; j<4; j++)
+         {
+           for (i=0; i<4; i++, k++)
+           {
+             //4x4 transform size
+             diff4x4[k] = imgY_org[pic_pix_y+j][pic_pix_x+i] - tr4x4.mpr8x8[j+block_y][i+block_x];
+             //8x8 transform size
+             diff8x8[k] = imgY_org[pic_pix_y+j][pic_pix_x+i] - tr8x8.mpr8x8[j+block_y][i+block_x];
+           }
+         } 
+         
+         cost4x4 += SATD (diff_ptr, input->hadamard);
+       }
+     }    
+     cost8x8 += SATD8X8 (diff8x8, input->hadamard);
+   }  
+   return (cost8x8 < cost4x4);
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Sets MBAFF RD parameters
+ ************************************************************************
+ */
+ void set_mbaff_parameters()
+ {
+   int  i, j, k;
+   Macroblock  *currMB  = &img->mb_data[img->current_mb_nr];
+   int         mode     = best_mode;
+   int         bframe   = (img->type==B_SLICE);
+   char    **ipredmodes = img->ipredmode;
+   
+   
+   //===== reconstruction values =====
+   for (j=0; j < MB_BLOCK_SIZE; j++)
+     memcpy(rdopt->rec_mbY[j],&enc_picture->imgY[img->pix_y + j][img->pix_x], MB_BLOCK_SIZE * sizeof(imgpel));
+ 
+   if (img->yuv_format != YUV400)
+   {
+     for (j=0; j<img->mb_cr_size_y; j++)
+     {
+       memcpy(rdopt->rec_mbU[j],&enc_picture->imgUV[0][img->pix_c_y + j][img->pix_c_x], img->mb_cr_size_x * sizeof(imgpel));
+       memcpy(rdopt->rec_mbV[j],&enc_picture->imgUV[1][img->pix_c_y + j][img->pix_c_x], img->mb_cr_size_x * sizeof(imgpel));
+     }
+   }
+   
+   //===== coefficients and cbp =====
+   rdopt->mode      = mode;
+   rdopt->i16offset = img->i16offset;
+   rdopt->cbp       = currMB->cbp;
+   rdopt->cbp_blk   = currMB->cbp_blk;
+   rdopt->mb_type   = currMB->mb_type;
+   
+   rdopt->luma_transform_size_8x8_flag = currMB->luma_transform_size_8x8_flag;
+   
+   if(rdopt->mb_type == 0 && mode != 0)
+   {
+     mode=0;
+     rdopt->mode=0;
+   }
+   
+   for(i=0;i<4+img->num_blk8x8_uv;i++)
+   {
+     for(j=0;j<4;j++)
+       for(k=0;k<2;k++)
+         memcpy(rdopt->cofAC[i][j][k], img->cofAC[i][j][k], 65 * sizeof(int));
+   }
+   
+   for(i=0;i<3;i++)
+   {
+     for(k=0;k<2;k++)
+         memcpy(rdopt->cofDC[i][k], img->cofDC[i][k], 18 * sizeof(int));
+   }   
+ 
+   memcpy(rdopt->b8mode,currMB->b8mode, BLOCK_MULTIPLE * sizeof(int));
+   memcpy(rdopt->b8pdir,currMB->b8pdir, BLOCK_MULTIPLE * sizeof(int));
+   
+   //==== reference frames =====
+   if (bframe)
+   {
+     for (j = 0; j < BLOCK_MULTIPLE; j++)
+     {
+       memcpy(rdopt->refar[LIST_0][j],&enc_picture->ref_idx[LIST_0][img->block_y + j][img->block_x] , BLOCK_MULTIPLE * sizeof(char));
+       memcpy(rdopt->refar[LIST_1][j],&enc_picture->ref_idx[LIST_1][img->block_y + j][img->block_x] , BLOCK_MULTIPLE * sizeof(char));
+     }
+     rdopt->bi_pred_me = currMB->bi_pred_me;
+   }
+   else
+   {
+     for (j = 0; j < BLOCK_MULTIPLE; j++)
+       memcpy(rdopt->refar[LIST_0][j],&enc_picture->ref_idx[LIST_0][img->block_y + j][img->block_x] , BLOCK_MULTIPLE * sizeof(char));
+   }      
+   
+   memcpy(rdopt->intra_pred_modes,currMB->intra_pred_modes, MB_BLOCK_PARTITIONS * sizeof(char));
+   for (j = img->block_y; j < img->block_y + 4; j++)
+   {
+     memcpy(&rdopt->ipredmode[j][img->block_x],&ipredmodes[j][img->block_x], BLOCK_MULTIPLE * sizeof(char));
+   }
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    store coding state (for rd-optimized mode decision), used for 8x8 transformation
+ ************************************************************************
+ */
+ void store_coding_state_cs_cm()
+ {
+   store_coding_state(cs_cm);
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    restore coding state (for rd-optimized mode decision), used for 8x8 transformation
+ ************************************************************************
+ */
+ void reset_coding_state_cs_cm()
+ {
+   reset_coding_state(cs_cm);
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    update rounding offsets based on JVT-N011
+ ************************************************************************
+ */
+ void update_offset_params(int mode, int luma_transform_size_8x8_flag)
+ {
+   int i,j;
+   int temp = 0;
+   int offsetRange = 1 << (OffsetBits - 1);
+   
+   for (j=0; j < MB_BLOCK_SIZE; j++)
+     for (i=0; i < MB_BLOCK_SIZE; i++)
+     {     
+       if ((mode != I4MB)&&(mode != I16MB) && (mode != I8MB) )
+       {
+         if (img->type == B_SLICE)
+         {
+           if (!luma_transform_size_8x8_flag )
+           {
+             temp = ((j & 0x03)<<2)+ (i & 0x03);    
+             OffsetList4x4[12][temp] += bestInterFAdjust4x4[j][i];
+             OffsetList4x4[12][temp] = Clip3(0,offsetRange,OffsetList4x4[12][temp]);
+           }
+           else
+           {
+             temp = ((j & 0x07)<<3)+ (i & 0x07);
+             OffsetList8x8[4][temp] += bestInterFAdjust8x8[j][i];
+             OffsetList8x8[4][temp] = Clip3(0,offsetRange,OffsetList8x8[4][temp]);
+           }
+           
+           if (input->AdaptRndChroma && (i >> 3) ==0 && (j >> 3) == 0)
+           {
+             temp = ((j & 0x03)<<2)+ (i & 0x03);
+             OffsetList4x4[13][temp] += bestInterFAdjust4x4Cr[0][j][i];
+             OffsetList4x4[13][temp] = Clip3(0,offsetRange,OffsetList4x4[13][temp]);
+             OffsetList4x4[14][temp] += bestInterFAdjust4x4Cr[1][j][i];
+             OffsetList4x4[14][temp] = Clip3(0,offsetRange,OffsetList4x4[14][temp]);
+           }
+         }
+         else
+         {
+           if (!luma_transform_size_8x8_flag )
+           {
+             temp = ((j & 0x03)<<2)+ (i & 0x03);    
+             OffsetList4x4[9][temp] += bestInterFAdjust4x4[j][i];
+             OffsetList4x4[9][temp] = Clip3(0,offsetRange,OffsetList4x4[9][temp]);
+           }
+           else
+           {
+             temp = ((j & 0x07)<<3)+ (i & 0x07);
+             OffsetList8x8[3][temp] += bestInterFAdjust8x8[j][i];
+             OffsetList8x8[3][temp] = Clip3(0,offsetRange,OffsetList8x8[3][temp]);
+           }
+           
+           if (input->AdaptRndChroma && (i >> 3) ==0 && (j >> 3) == 0)
+           {
+             temp = ((j & 0x03)<<2)+ (i & 0x03);
+             OffsetList4x4[10][temp] += bestInterFAdjust4x4Cr[0][j][i];
+             OffsetList4x4[10][temp] = Clip3(0,offsetRange,OffsetList4x4[10][temp]);
+             OffsetList4x4[11][temp] += bestInterFAdjust4x4Cr[1][j][i];
+             OffsetList4x4[11][temp] = Clip3(0,offsetRange,OffsetList4x4[11][temp]);
+           }
+         }
+       }
+       else if (mode != I8MB)
+       {
+         if (img->type == I_SLICE)
+         {
+           
+           temp = ((j & 0x03)<<2)+ (i & 0x03);
+           OffsetList4x4[0][temp] += bestIntraFAdjust4x4[j][i];
+           OffsetList4x4[0][temp] = Clip3(0,offsetRange,OffsetList4x4[0][temp]);
+           if (input->AdaptRndChroma && (i >> 3) ==0 && (j >> 3) == 0)
+           {
+             temp = ((j & 0x03)<<2)+ (i & 0x03);
+             OffsetList4x4[1][temp] += bestIntraFAdjust4x4Cr[0][j][i];
+             OffsetList4x4[1][temp] = Clip3(0,offsetRange,OffsetList4x4[1][temp]);
+             OffsetList4x4[2][temp] += bestIntraFAdjust4x4Cr[1][j][i];
+             OffsetList4x4[2][temp] = Clip3(0,offsetRange,OffsetList4x4[2][temp]);
+           }
+         }
+         else if (img->type == B_SLICE)
+         {
+           temp = ((j & 0x03)<<2)+ (i & 0x03);
+           OffsetList4x4[6][temp] += bestIntraFAdjust4x4[j][i];
+           OffsetList4x4[6][temp] = Clip3(0,offsetRange,OffsetList4x4[6][temp]);
+           if (input->AdaptRndChroma && (i >> 3) ==0 && (j >> 3) == 0)
+           {
+             temp = ((j & 0x03)<<2)+ (i & 0x03);
+             OffsetList4x4[7][temp] += bestIntraFAdjust4x4Cr[0][j][i];
+             OffsetList4x4[7][temp] = Clip3(0,offsetRange,OffsetList4x4[7][temp]);
+             OffsetList4x4[8][temp] += bestIntraFAdjust4x4Cr[1][j][i];
+             OffsetList4x4[8][temp] = Clip3(0,offsetRange,OffsetList4x4[8][temp]);
+           }
+         }
+         else
+         {
+           temp = ((j & 0x03)<<2)+ (i & 0x03);
+           OffsetList4x4[3][temp] += bestIntraFAdjust4x4[j][i];
+           OffsetList4x4[3][temp] = Clip3(0,offsetRange,OffsetList4x4[3][temp]);
+           if (input->AdaptRndChroma && (i >> 3) ==0 && (j >> 3) == 0)
+           {
+             temp = ((j & 0x03)<<2)+ (i & 0x03);
+             OffsetList4x4[4][temp] += bestIntraFAdjust4x4Cr[0][j][i];
+             OffsetList4x4[4][temp] = Clip3(0,offsetRange,OffsetList4x4[4][temp]);
+             OffsetList4x4[5][temp] += bestIntraFAdjust4x4Cr[1][j][i];
+             OffsetList4x4[5][temp] = Clip3(0,offsetRange,OffsetList4x4[5][temp]);
+           }
+         }
+         
+       }
+       else
+       {
+         if (img->type == I_SLICE)
+         {
+           temp = ((j & 0x07)<<3)+ (i & 0x07);
+           OffsetList8x8[0][temp] += bestIntraFAdjust8x8[j][i];
+           OffsetList8x8[0][temp] = Clip3(0,offsetRange,OffsetList8x8[0][temp]);
+           if (input->AdaptRndChroma && (i >> 3) ==0 && (j >> 3) == 0)
+           {
+             temp = ((j & 0x03)<<2)+ (i & 0x03);
+             OffsetList4x4[1][temp] += bestIntraFAdjust4x4Cr[0][j][i];
+             OffsetList4x4[1][temp] = Clip3(0,offsetRange,OffsetList4x4[1][temp]);
+             OffsetList4x4[2][temp] += bestIntraFAdjust4x4Cr[1][j][i];
+             OffsetList4x4[2][temp] = Clip3(0,offsetRange,OffsetList4x4[2][temp]);
+           }
+         }
+         else if (img->type == B_SLICE)
+         {
+           temp = ((j & 0x07)<<3)+ (i & 0x07);
+           OffsetList8x8[2][temp] += bestIntraFAdjust8x8[j][i];
+           OffsetList8x8[2][temp] = Clip3(0,offsetRange,OffsetList8x8[2][temp]);
+           if (input->AdaptRndChroma && (i >> 3) ==0 && (j >> 3) == 0)
+           {
+             temp = ((j & 0x03)<<2)+ (i & 0x03);
+             OffsetList4x4[7][temp] += bestIntraFAdjust4x4Cr[0][j][i];
+             OffsetList4x4[7][temp] = Clip3(0,offsetRange,OffsetList4x4[7][temp]);
+             OffsetList4x4[8][temp] += bestIntraFAdjust4x4Cr[1][j][i];
+             OffsetList4x4[8][temp] = Clip3(0,offsetRange,OffsetList4x4[8][temp]);
+           }
+         }
+         else
+         {
+           temp = ((j & 0x07)<<3)+ (i & 0x07);
+           OffsetList8x8[1][temp] += bestIntraFAdjust8x8[j][i];
+           OffsetList8x8[1][temp] = Clip3(0,offsetRange,OffsetList8x8[1][temp]);
+           if (input->AdaptRndChroma && (i >> 3) ==0 && (j >> 3) == 0)
+           {
+             temp = ((j & 0x03)<<2)+ (i & 0x03);
+             OffsetList4x4[4][temp] += bestIntraFAdjust4x4Cr[0][j][i];
+             OffsetList4x4[4][temp] = Clip3(0,offsetRange,OffsetList4x4[4][temp]);
+             OffsetList4x4[5][temp] += bestIntraFAdjust4x4Cr[1][j][i];
+             OffsetList4x4[5][temp] = Clip3(0,offsetRange,OffsetList4x4[5][temp]);
+           }
+         }
+       }
+     }
+ }
+ 
+ void assign_enc_picture_params(int mode, int best_pdir, int block, int list_offset, int best_fw_ref, int best_bw_ref, int bframe)
+ {
+   int i,j;
+   int block_x, block_y;
+   short *cur_mv;
+   
+   if (mode==1)
+   {
+     if (best_pdir==1)
+     {
+       for (j=img->block_y+(block&2); j<img->block_y+(block&2) + BLOCK_MULTIPLE; j++)
+       {
+         block_x = img->block_x+(block&1)*2;
+        
+         memset(&enc_picture->ref_idx[LIST_0][j][block_x], -1 ,     BLOCK_MULTIPLE * sizeof(char));
+         memset(enc_picture->mv      [LIST_0][j][block_x],  0 , 2 * BLOCK_MULTIPLE * sizeof(short));
+         for (i=block_x; i<block_x + BLOCK_MULTIPLE; i++)
+         {
+           enc_picture->ref_pic_id [LIST_0][j][i]    = -1;
+         }
+       }
+     }
+     else if (img->bi_pred_me[mode])
+     {
+       for (j=0; j<BLOCK_MULTIPLE; j++)
+       {
+         block_y = img->block_y+(block&2)+j;
+         block_x = img->block_x+(block&1)*2;
+         memset(&enc_picture->ref_idx[LIST_0][block_y][block_x], 0, BLOCK_MULTIPLE * sizeof(char));
+         for (i=0; i<BLOCK_MULTIPLE; i++)
+         {            
+           cur_mv = img->bi_pred_me[mode] == 1 
+             ? img->bipred_mv1[i][j][LIST_0][0][mode] 
+             : img->bipred_mv2[i][j][LIST_0][0][mode];
+           
+           enc_picture->ref_pic_id [LIST_0][block_y][block_x + i]    = enc_picture->ref_pic_num[LIST_0 + list_offset][0];  
+           enc_picture->mv         [LIST_0][block_y][block_x + i][0] = cur_mv[0];
+           enc_picture->mv         [LIST_0][block_y][block_x + i][1] = cur_mv[1];
+         }
+       }
+     }
+     else 
+     {
+       for (j=0; j<BLOCK_MULTIPLE; j++)
+       {
+         block_y = img->block_y+(block&2)+j;
+         block_x = img->block_x+(block&1)*2;
+         memset(&enc_picture->ref_idx[LIST_0][block_y][block_x], best_fw_ref , BLOCK_MULTIPLE * sizeof(char));
+         for (i=0; i<BLOCK_MULTIPLE; i++)
+         {                                
+           cur_mv = img->all_mv[j][i][LIST_0][best_fw_ref][mode];
+ 
+           enc_picture->ref_pic_id [LIST_0][block_y][block_x + i]    = enc_picture->ref_pic_num[LIST_0 + list_offset][best_fw_ref];  
+           enc_picture->mv         [LIST_0][block_y][block_x + i][0] = cur_mv[0];
+           enc_picture->mv         [LIST_0][block_y][block_x + i][1] = cur_mv[1];
+         }          
+       }
+     }
+     
+     if (bframe)
+     {
+       if (best_pdir==0)
+       {
+         for (j=img->block_y+(block&2); j<img->block_y+(block&2) + BLOCK_MULTIPLE; j++)
+         {
+           block_x = img->block_x+(block&1)*2;
+           memset(&enc_picture->ref_idx[LIST_1][j][block_x], -1 , BLOCK_MULTIPLE * sizeof(char));
+           memset(enc_picture->mv[LIST_1][j][block_x], 0 , 2 * BLOCK_MULTIPLE * sizeof(short));
+           for (i=block_x; i<block_x + BLOCK_MULTIPLE; i++)
+           {
+             enc_picture->ref_pic_id [LIST_1][j][i] = -1;
+           }
+         }
+       }
+       else
+       {
+         if (img->bi_pred_me[mode])
+         {
+           for (j=0; j<BLOCK_MULTIPLE; j++)
+           {
+             block_y = img->block_y+(block&2)+j;
+             block_x = img->block_x+(block&1)*2;
+             memset(&enc_picture->ref_idx[LIST_1][block_y][block_x], 0, BLOCK_MULTIPLE * sizeof(char));
+             for (i=0; i<BLOCK_MULTIPLE; i++)
+             {                     
+               cur_mv = img->bi_pred_me[mode] == 1 
+                 ? img->bipred_mv1[i][j][LIST_1][0][mode] 
+                 : img->bipred_mv2[i][j][LIST_1][0][mode];
+               
+               enc_picture->ref_pic_id [LIST_1][block_y][block_x + i] = 
+                 enc_picture->ref_pic_num[LIST_1 + list_offset][0];
+               enc_picture->mv         [LIST_1][block_y][block_x + i][0] = cur_mv[0];
+               enc_picture->mv         [LIST_1][block_y][block_x + i][1] = cur_mv[1];
+             }
+           }
+         }
+         else 
+         {
+           for (j=0; j<BLOCK_MULTIPLE; j++)
+           {
+             block_y = img->block_y+(block&2)+j;
+             block_x = img->block_x+(block&1)*2;
+             memset(&enc_picture->ref_idx[LIST_1][block_y][block_x], best_bw_ref, BLOCK_MULTIPLE * sizeof(char));
+             for (i=0; i<BLOCK_MULTIPLE; i++)
+             {                     
+              
+               enc_picture->ref_pic_id [LIST_1][block_y][block_x + i] = 
+                 enc_picture->ref_pic_num[LIST_1 + list_offset][best_bw_ref];
+               if(best_bw_ref>=0)
+               {
+                 cur_mv = img->all_mv[j][i][LIST_1][best_bw_ref][mode];
+                 enc_picture->mv[LIST_1][block_y][block_x + i][0] = cur_mv[0];
+                 enc_picture->mv[LIST_1][block_y][block_x + i][1] = cur_mv[1];
+               }
+             }            
+           }
+         }
+       }
+     }
+   }
+   else if (mode==2)
+   {
+     for (j=0; j<2; j++)
+     {
+       block_y = img->block_y + block * 2 + j;
+       for (i=0; i<BLOCK_MULTIPLE; i++)
+       {
+         block_x = img->block_x + i;
+         if (best_pdir==1)
+         {
+           enc_picture->ref_idx    [LIST_0][block_y][block_x]    = -1;
+           enc_picture->ref_pic_id [LIST_0][block_y][block_x]    = -1;
+           enc_picture->mv         [LIST_0][block_y][block_x][0] = 0;
+           enc_picture->mv         [LIST_0][block_y][block_x][1] = 0;
+         }
+         else
+         {                     
+           cur_mv = img->all_mv[j+block*2][i][LIST_0][best_fw_ref][mode];
+ 
+           enc_picture->ref_idx    [LIST_0][block_y][block_x]    = best_fw_ref;
+           enc_picture->ref_pic_id [LIST_0][block_y][block_x]    = 
+             enc_picture->ref_pic_num[LIST_0 + list_offset][best_fw_ref];
+           enc_picture->mv         [LIST_0][block_y][block_x][0] = cur_mv[0];
+           enc_picture->mv         [LIST_0][block_y][block_x][1] = cur_mv[1];
+         }
+         
+         if (bframe)
+         {
+           if (best_pdir==0)
+           {
+             enc_picture->ref_idx    [LIST_1][block_y][block_x]    = -1;
+             enc_picture->ref_pic_id [LIST_1][block_y][block_x]    = -1;
+             enc_picture->mv         [LIST_1][block_y][block_x][0] = 0;
+             enc_picture->mv         [LIST_1][block_y][block_x][1] = 0;
+           }
+           else
+           {
+             enc_picture->ref_idx[LIST_1][block_y][block_x] = best_bw_ref;
+             if(best_bw_ref>=0)
+             {
+               cur_mv = img->all_mv[j+ block*2][i][LIST_1][best_bw_ref][mode];
+ 
+               enc_picture->ref_pic_id [LIST_1][block_y][block_x] = 
+                 enc_picture->ref_pic_num[LIST_1 + list_offset][best_bw_ref];
+               enc_picture->mv[LIST_1][block_y][block_x][0] = cur_mv[0];
+               enc_picture->mv[LIST_1][block_y][block_x][1] = cur_mv[1];
+             }                       
+           }
+         }
+       }
+     }
+   }
+   else
+   {
+     for (j=0; j<BLOCK_MULTIPLE; j++)
+     {
+       block_y = img->block_y+j;
+       for (i=0; i<2; i++)
+       {
+         block_x = img->block_x + block*2 + i;
+         if (best_pdir==1)
+         {
+           enc_picture->ref_idx    [LIST_0][block_y][block_x]    = -1;
+           enc_picture->ref_pic_id [LIST_0][block_y][block_x]    = -1;
+           enc_picture->mv         [LIST_0][block_y][block_x][0] = 0;
+           enc_picture->mv         [LIST_0][block_y][block_x][1] = 0;
+         }
+         else
+         {
+           cur_mv = img->all_mv[j][block*2+i][LIST_0][best_fw_ref][mode];
+ 
+           enc_picture->ref_idx    [LIST_0][block_y][block_x] = best_fw_ref;
+           enc_picture->ref_pic_id [LIST_0][block_y][block_x] = 
+             enc_picture->ref_pic_num[LIST_0 + list_offset][best_fw_ref];          
+           enc_picture->mv[LIST_0][block_y][block_x][0] = cur_mv[0];
+           enc_picture->mv[LIST_0][block_y][block_x][1] = cur_mv[1];
+         }
+         
+         if (bframe)
+         {
+           if (best_pdir==0)
+           {
+             enc_picture->ref_idx    [LIST_1][block_y][block_x]    = -1;
+             enc_picture->ref_pic_id [LIST_1][block_y][block_x]    = -1;
+             enc_picture->mv         [LIST_1][block_y][block_x][0] = 0;
+             enc_picture->mv         [LIST_1][block_y][block_x][1] = 0;
+           }
+           else
+           {
+             enc_picture->ref_idx[LIST_1][block_y][block_x] = best_bw_ref;
+             if(best_bw_ref>=0)
+             {
+               cur_mv = img->all_mv[j][block*2+i][LIST_1][best_bw_ref][mode];
+               enc_picture->ref_pic_id [LIST_1][block_y][block_x] = 
+                 enc_picture->ref_pic_num[LIST_1 + list_offset][best_bw_ref];
+               
+               enc_picture->mv[LIST_1][block_y][block_x][0] = cur_mv[0];
+               enc_picture->mv[LIST_1][block_y][block_x][1] = cur_mv[1];
+             }
+           }
+         }
+       }
+     }
+   }
+ }
+ 
+ void update_refresh_map(int intra, int intra1, Macroblock *currMB)
+ {
+   if (input->RestrictRef==1)
+   {
+     // Modified for Fast Mode Decision. Inchoon Choi, SungKyunKwan Univ.
+     if (input->rdopt<2)
+     {
+       refresh_map[2*img->mb_y  ][2*img->mb_x  ] = (intra ? 1 : 0);
+       refresh_map[2*img->mb_y  ][2*img->mb_x+1] = (intra ? 1 : 0);
+       refresh_map[2*img->mb_y+1][2*img->mb_x  ] = (intra ? 1 : 0);
+       refresh_map[2*img->mb_y+1][2*img->mb_x+1] = (intra ? 1 : 0);
+     }
+     else if (input->rdopt==3)
+     {
+       refresh_map[2*img->mb_y  ][2*img->mb_x  ] = (intra1==0 && (currMB->mb_type==I16MB || currMB->mb_type==I4MB) ? 1 : 0);
+       refresh_map[2*img->mb_y  ][2*img->mb_x+1] = (intra1==0 && (currMB->mb_type==I16MB || currMB->mb_type==I4MB) ? 1 : 0);
+       refresh_map[2*img->mb_y+1][2*img->mb_x  ] = (intra1==0 && (currMB->mb_type==I16MB || currMB->mb_type==I4MB) ? 1 : 0);
+       refresh_map[2*img->mb_y+1][2*img->mb_x+1] = (intra1==0 && (currMB->mb_type==I16MB || currMB->mb_type==I4MB) ? 1 : 0);
+     }
+   }
+   else if (input->RestrictRef==2)
+   {
+     refresh_map[2*img->mb_y  ][2*img->mb_x  ] = (currMB->mb_type==I16MB || currMB->mb_type==I4MB ? 1 : 0);
+     refresh_map[2*img->mb_y  ][2*img->mb_x+1] = (currMB->mb_type==I16MB || currMB->mb_type==I4MB ? 1 : 0);
+     refresh_map[2*img->mb_y+1][2*img->mb_x  ] = (currMB->mb_type==I16MB || currMB->mb_type==I4MB ? 1 : 0);
+     refresh_map[2*img->mb_y+1][2*img->mb_x+1] = (currMB->mb_type==I16MB || currMB->mb_type==I4MB ? 1 : 0);
+   }
+ }  
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/rdopt_coding_state.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/rdopt_coding_state.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/rdopt_coding_state.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,203 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file rdopt_coding_state.c
+  *
+  * \brief
+  *    Storing/restoring coding state for
+  *    Rate-Distortion optimized mode decision
+  *
+  * \author
+  *    Heiko Schwarz
+  *
+  * \date
+  *    17. April 2001
+  **************************************************************************/
+ 
+ #include <stdlib.h>
+ #include <memory.h>
+ 
+ #include "global.h"
+ 
+ #include "rdopt_coding_state.h"
+ #include "cabac.h"
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    delete structure for storing coding state
+  ************************************************************************
+  */
+ void
+ delete_coding_state (CSptr cs)
+ {
+   if (cs != NULL)
+   {
+     //=== structures of data partition array ===
+     if (cs->encenv    != NULL)   free (cs->encenv);
+     if (cs->bitstream != NULL)   free (cs->bitstream);
+ 
+     //=== contexts for binary arithmetic coding ===
+     delete_contexts_MotionInfo  (cs->mot_ctx);
+     delete_contexts_TextureInfo (cs->tex_ctx);
+ 
+     //=== coding state structure ===
+     free (cs);
+     cs=NULL;
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    create structure for storing coding state
+  ************************************************************************
+  */
+ CSptr
+ create_coding_state ()
+ {
+   CSptr cs;
+ 
+   //=== coding state structure ===
+   if ((cs = (CSptr) calloc (1, sizeof(CSobj))) == NULL)
+     no_mem_exit("init_coding_state: cs");
+ 
+   //=== important variables of data partition array ===
+   cs->no_part = input->partition_mode==0?1:3;
+   if (input->symbol_mode == CABAC)
+   {
+     if ((cs->encenv = (EncodingEnvironment*) calloc (cs->no_part, sizeof(EncodingEnvironment))) == NULL)
+       no_mem_exit("init_coding_state: cs->encenv");
+   }
+   else
+   {
+     cs->encenv = NULL;
+   }
+   if ((cs->bitstream = (Bitstream*) calloc (cs->no_part, sizeof(Bitstream))) == NULL)
+     no_mem_exit("init_coding_state: cs->bitstream");
+ 
+   //=== context for binary arithmetic coding ===
+   cs->symbol_mode = input->symbol_mode;
+   if (cs->symbol_mode == CABAC)
+   {
+     cs->mot_ctx = create_contexts_MotionInfo ();
+     cs->tex_ctx = create_contexts_TextureInfo();
+   }
+   else
+   {
+     cs->mot_ctx = NULL;
+     cs->tex_ctx = NULL;
+   }
+ 
+   return cs;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    store coding state (for rd-optimized mode decision)
+  ************************************************************************
+  */
+ void
+ store_coding_state (CSptr cs)
+ {
+   int  i;
+   int  i_last = img->currentPicture->idr_flag? 1:cs->no_part; 
+   Slice *currSlice = img->currentSlice;
+ 
+   Macroblock *currMB  = &(img->mb_data [img->current_mb_nr]);
+   
+   
+   if (!input->rdopt)  return;
+   
+   if (cs->symbol_mode==CABAC)
+   {
+     //=== important variables of data partition array ===
+     //only one partition for IDR img
+     for (i = 0; i < i_last; i++)
+     {
+       memcpy (&(cs->encenv[i]), &(currSlice->partArr[i].ee_cabac), sizeof(EncodingEnvironment));
+       memcpy (&(cs->bitstream[i]), currSlice->partArr[i].bitstream, sizeof(Bitstream));
+     }
+     
+     //=== contexts for binary arithmetic coding ===
+     memcpy (cs->mot_ctx, currSlice->mot_ctx, sizeof(MotionInfoContexts));
+     memcpy (cs->tex_ctx, currSlice->tex_ctx, sizeof(TextureInfoContexts));
+     
+   }
+   else
+   {
+     //=== important variables of data partition array ===
+     for (i = 0; i < i_last; i++)
+     {    
+       memcpy (&(cs->bitstream[i]), currSlice->partArr[i].bitstream, sizeof(Bitstream));
+     }
+   }
+   //=== syntax element number and bitcounters ===
+   cs->currSEnr = currMB->currSEnr;
+   memcpy (cs->bitcounter, currMB->bitcounter, MAX_BITCOUNTER_MB * sizeof(int));
+   
+   //=== elements of current macroblock ===
+   memcpy (cs->mvd, currMB->mvd, BLOCK_CONTEXT * sizeof(int));
+   cs->cbp_bits = currMB->cbp_bits;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    restore coding state (for rd-optimized mode decision)
+  ************************************************************************
+  */
+ void
+ reset_coding_state (CSptr cs)
+ {
+   int  i;
+   int  i_last = img->currentPicture->idr_flag? 1:cs->no_part; 
+   Slice *currSlice = img->currentSlice;
+ 
+   Macroblock *currMB  = &(img->mb_data [img->current_mb_nr]);
+   
+   if (!input->rdopt)  return;
+   
+   if (cs->symbol_mode==CABAC) 
+   {
+     //=== important variables of data partition array ===
+     //only one partition for IDR img
+     for (i = 0; i < i_last; i++)
+     {
+       //--- parameters of encoding environments ---
+       memcpy (&(currSlice->partArr[i].ee_cabac), &(cs->encenv   [i]), sizeof(EncodingEnvironment));
+       memcpy (currSlice->partArr[i].bitstream, &(cs->bitstream[i]), sizeof(Bitstream));
+     }
+     
+     
+     //=== contexts for binary arithmetic coding ===
+     memcpy (currSlice->mot_ctx, cs->mot_ctx, sizeof(MotionInfoContexts));
+     memcpy (currSlice->tex_ctx, cs->tex_ctx, sizeof(TextureInfoContexts));
+     
+   }
+   else
+   {
+     //=== important variables of data partition array ===
+     //only one partition for IDR img
+     for (i = 0; i < i_last; i++)      
+     {
+       //--- parameters of encoding environments ---   
+       memcpy (currSlice->partArr[i].bitstream, &(cs->bitstream[i]), sizeof(Bitstream));
+     }
+   }
+   
+   //=== syntax element number and bitcounters ===
+   currMB->currSEnr = cs->currSEnr;
+   memcpy (currMB->bitcounter, cs->bitcounter, MAX_BITCOUNTER_MB * sizeof(int));
+   
+   //=== elements of current macroblock ===
+   memcpy (currMB->mvd, cs->mvd, BLOCK_CONTEXT * sizeof(int));
+   currMB->cbp_bits = cs->cbp_bits;
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/rdopt_coding_state.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/rdopt_coding_state.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/rdopt_coding_state.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,53 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file
+  *    rdopt_coding_state.h
+  *
+  * \author
+  *    Heiko Schwarz
+  *
+  * \date
+  *    17. April 2001
+  *
+  * \brief
+  *    Headerfile for storing/restoring coding state
+  *    (for rd-optimized mode decision)
+  **************************************************************************
+  */
+ 
+ #ifndef _RD_OPT_CS_H_
+ #define _RD_OPT_CS_H_
+ 
+ typedef struct {
+ 
+   // important variables of data partition array
+   int                   no_part;
+   EncodingEnvironment  *encenv;
+   Bitstream            *bitstream;
+ 
+   // contexts for binary arithmetic coding
+   int                   symbol_mode;
+   MotionInfoContexts   *mot_ctx;
+   TextureInfoContexts  *tex_ctx;
+ 
+   // syntax element number and bitcounters
+   int                   currSEnr;
+   int                   bitcounter[MAX_BITCOUNTER_MB];
+ 
+   // elements of current macroblock
+   int                   mvd[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE][2];
+   int64                 cbp_bits;
+ } CSobj;
+ typedef CSobj* CSptr;
+ 
+ 
+ void  delete_coding_state  (CSptr);  //!< delete structure
+ CSptr create_coding_state  ();       //!< create structure
+ 
+ void  store_coding_state   (CSptr);  //!< store parameters
+ void  reset_coding_state   (CSptr);  //!< restore parameters
+ 
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/rdpicdecision.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/rdpicdecision.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/rdpicdecision.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,64 ----
+ 
+ /*!
+ *************************************************************************************
+ * \file rdpicdecision.c
+ *
+ * \brief
+ *    Perform RD optimal decisions between multiple coded versions of the same picture
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *     - Alexis Michael Tourapis         <alexismt at ieee.org>
+ *************************************************************************************
+ */
+ 
+ #include "global.h"
+ #include <math.h>
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    RD decision between possible encoding cases
+  ************************************************************************
+  */
+ int rd_pic_decision(double snrY_version1, double snrY_version2, int bits_version1, int bits_version2, double lambda_picture)
+ {
+   double cost_version1, cost_version2;
+ 
+   cost_version1 = (double) bits_version1 * lambda_picture + snrY_version1;
+   cost_version2 = (double) bits_version2 * lambda_picture + snrY_version2;
+   //printf("%d %d %.2f %.2f %.2f %.2f \n",bits_version1,bits_version2,snrY_version1,snrY_version2,cost_version1,cost_version2);
+   if (cost_version2 > cost_version1 || (cost_version2 == cost_version1 && snrY_version2 >= snrY_version1) )
+     return (0);
+   else
+     return (1);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Picture Coding Decision
+  ************************************************************************
+  */
+ int picture_coding_decision (Picture *picture1, Picture *picture2, int qp)
+ {
+   double lambda_picture;
+   int spframe = (img->type == SP_SLICE);
+   int bframe = (img->type == B_SLICE);
+   double snr_picture1, snr_picture2;
+   int bit_picture1, bit_picture2;
+ 
+   if (input->successive_Bframe)
+     lambda_picture = 0.68 * pow (2, (qp - SHIFT_QP) / 3.0) * (bframe || spframe ? 2 : 1);    
+   else
+     lambda_picture = 0.68 * pow (2, (qp - SHIFT_QP) / 3.0);
+   
+   snr_picture1 = picture1->distortion_y + picture1->distortion_u + picture1->distortion_v;  
+   snr_picture2 = picture2->distortion_y + picture2->distortion_u + picture2->distortion_v;
+   bit_picture2 = picture2->bits_per_picture ;
+   bit_picture1 = picture1->bits_per_picture;
+   
+   return rd_pic_decision(snr_picture1, snr_picture2, bit_picture1, bit_picture2, lambda_picture);
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/refbuf.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/refbuf.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/refbuf.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,167 ----
+ 
+ /*!
+  ************************************************************************
+  * \file refbuf.c
+  *
+  * \brief
+  *    Declarations of teh reference frame buffer types and functions
+  ************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <memory.h>
+ #include "global.h"
+ 
+ #include "refbuf.h"
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Reference buffer write routines
+  ************************************************************************
+  */
+ void PutPel_14 (pel_t **Pic, int y, int x, pel_t val)
+ {
+   Pic [y][x] = val;
+ }
+ 
+ void PutPel_11 (pel_t *Pic, int pel_pos, pel_t val)
+ {
+   Pic [pel_pos] = val;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \note
+  *    The following functions returning line are NOT reentrant!  Use a buffer
+  *    provided by the caller to change that (but it costs a memcpy()...
+  ************************************************************************
+  */
+ static pel_t line[16];
+ static pel_t line4[64];
+ 
+ pel_t *FastLine16Y_11 (pel_t *Pic, int y, int x, int height, int width)
+ {
+   return &Pic [y*width+x];
+ }
+ 
+ 
+ pel_t *UMVLine16Y_11 (pel_t *Pic, int y, int x, int height, int width)
+ {
+   int i, maxx;
+   pel_t *Picy;
+ 
+   Picy = &Pic [max(0,min(height-1,y)) * width];
+ 
+   if (x < 0) {                    // Left edge ?
+ 
+     maxx = min(0,x+16);
+     for (i = x; i < maxx; i++)
+       line[i-x] = Picy [0];       // Replicate left edge pixel
+ 
+     maxx = x+16;
+     for (i = 0; i < maxx; i++)    // Copy non-edge pixels
+       line[i-x] = Picy [i];
+   }
+   else if (x > width-16)  {  // Right edge ?
+ 
+     maxx = width;
+     for (i = x; i < maxx; i++)
+       line[i-x] = Picy [i];       // Copy non-edge pixels
+ 
+     maxx = x+16;
+     for (i = max(width,x); i < maxx; i++)
+       line[i-x] = Picy [width-1];  // Replicate right edge pixel
+   }
+   else                            // No edge
+     return &Picy [x];
+ 
+   return line;
+ }
+ 
+ 
+ pel_t *FastLineX (int dummy, pel_t* Pic, int y, int x, int height, int width)
+ {
+   return Pic + y*width + x;
+ }
+ 
+ 
+ pel_t *UMVLineX (int size, pel_t* Pic, int y, int x, int height, int width)
+ {
+   int i;
+   pel_t *Picy;
+ 
+   Picy = Pic + max(0,min(height-1,y)) * width;
+ 
+   if (x < 0)                            // Left edge
+   {   
+     for (i = x; i < min(0,x+size); i++)
+     {
+       line[i-x] = Picy [0];             // Replicate left edge pixel
+     }
+ 
+     memcpy(&line[min(-x,15)],Picy,max(x+size,0) * sizeof(pel_t)); // Copy non-edge pixels
+   }
+   else if (x > width-size)         // Right edge
+   {
+     memcpy(line,&Picy[x], max((width - x),0) * sizeof(pel_t)); // Copy non-edge pixels
+     for (i = max(width,x); i < x+size; i++)
+     {
+       line[i-x] = Picy [width-1];  // Replicate right edge pixel
+     }
+   }
+   else                                  // No edge
+   {
+     return Picy + x;
+   }
+ 
+   return line;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Reference buffer, 1/4 pel
+  ************************************************************************
+  */
+ pel_t UMVPelY_14 (pel_t **Pic, int y, int x, int height4, int width4)
+ {
+   int ypos = (y < 0 ? y&3 : (y >  height4 ? height4+(y&3) : y));
+   int xpos = (x < 0 ? x&3 : (x >  width4 ? width4+(x&3) : x));
+ 
+   return Pic [ypos][xpos];
+ }
+ 
+ pel_t FastPelY_14 (pel_t **Pic, int y, int x, int height, int width)
+ {
+   return Pic [y][x];
+ }
+ 
+ 
+ 
+ pel_t *FastLine4X (pel_t **Pic, int y, int x, int height, int width)
+ {
+   return &Pic [y][x];
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Reference buffer, 1/4 pel
+  ************************************************************************
+  */
+ pel_t *UMVLine4X (pel_t **Pic, int y, int x, int height4, int width4)
+ {
+   int i, xx;
+ 
+   int xpos, ypos = (y < 0 ? y&3 : (y >  height4 ? height4+(y&3) : y));
+ 
+   for (i=0; i< 64; i+=4)
+   {
+     xx = x + i;
+     xpos = (xx < 0 ? (xx) &3 : ((xx) >  width4 ? width4+((xx) &3) : (xx) ));
+     line4[i] = Pic [ypos][xpos];
+   }
+   return line4;
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/refbuf.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/refbuf.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/refbuf.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,28 ----
+ 
+ /*!
+  ************************************************************************
+  * \file refbuf.h
+  *
+  * \brief
+  *    Declarations of the reference frame buffer types and functions
+  ************************************************************************
+  */
+ #ifndef _REBUF_H_
+ #define _REBUF_H_
+ 
+ pel_t UMVPelY_14 (pel_t **Pic, int y, int x, int height, int width);
+ pel_t FastPelY_14 (pel_t **Pic, int y, int x, int height, int width);
+ 
+ pel_t FastPelY14 (pel_t **Pic, int y, int x, int height, int width);
+ pel_t *UMVLine4X (pel_t **Pic, int y, int x, int height4, int width4);
+ 
+ pel_t *FastLine4X (pel_t **Pic, int y, int x, int height, int width);
+ 
+ pel_t *FastLine16Y_11 (pel_t *Pic, int y, int x, int height, int width);
+ pel_t *UMVLine16Y_11 (pel_t *Pic, int y, int x, int height, int width);
+ 
+ void PutPel_14 (pel_t **Pic, int y, int x, pel_t val);
+ void PutPel_11 (pel_t *Pic, int pel_pos, pel_t val);
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/rtp.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/rtp.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/rtp.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,613 ----
+ 
+ /*!
+  *****************************************************************************
+  *
+  * \file rtp.c
+  *
+  * \brief
+  *    Functions to handle RTP headers and packets per RFC1889 and RTP NAL spec
+  *    Functions support little endian systems only (Intel, not Motorola/Sparc)
+  *
+  * \date
+  *    30 September 2001
+  *
+  * \author
+  *    Stephan Wenger   stewe at cs.tu-berlin.de
+  *****************************************************************************/
+ 
+ #include <stdlib.h>
+ #include <assert.h>
+ #include <memory.h>
+ 
+ #include "global.h"
+ 
+ #include "rtp.h"
+ 
+ // A little trick to avoid those horrible #if TRACE all over the source code
+ #if TRACE
+ #define SYMTRACESTRING(s) strncpy(sym.tracestring,s,TRACESTRING_SIZE)
+ #else
+ #define SYMTRACESTRING(s) // to nothing
+ #endif
+ 
+ 
+ int CurrentRTPTimestamp = 0;      //! The RTP timestamp of the current packet,
+                                   //! incremented with all P and I frames
+ int CurrentRTPSequenceNumber = 0; //! The RTP sequence number of the current packet
+                                   //! incremented by one for each sent packet
+ 
+ FILE *f;
+ /*!
+  *****************************************************************************
+  *
+  * \brief 
+  *    ComposeRTPpacket composes the complete RTP packet using the various
+  *    structure members of the RTPpacket_t structure
+  *
+  * \return
+  *    0 in case of success
+  *    negative error code in case of failure
+  *
+  * \par Parameters
+  *    Caller is responsible to allocate enough memory for the generated packet
+  *    in parameter->packet. Typically a malloc of 12+paylen bytes is sufficient
+  *
+  * \par Side effects
+  *    none
+  *
+  * \note
+  *    Function contains assert() tests for debug purposes (consistency checks
+  *    for RTP header fields
+  *
+  * \date
+  *    30 Spetember 2001
+  *
+  * \author
+  *    Stephan Wenger   stewe at cs.tu-berlin.de
+  *****************************************************************************/
+ 
+ 
+ int ComposeRTPPacket (RTPpacket_t *p)
+ 
+ {
+   // Consistency checks through assert, only used for debug purposes
+   assert (p->v == 2);
+   assert (p->p == 0);
+   assert (p->x == 0);
+   assert (p->cc == 0);    // mixer designers need to change this one
+   assert (p->m == 0 || p->m == 1);
+   assert (p->pt < 128);
+   assert (p->seq < 65536);
+   assert (p->payload != NULL);
+   assert (p->paylen < 65536 - 40);  // 2**16 -40 for IP/UDP/RTP header
+   assert (p->packet != NULL);
+ 
+   // Compose RTP header, little endian
+ 
+   p->packet[0] = (   (p->v)
+                   |  (p->p << 2)
+                   |  (p->x << 3)
+                   |  (p->cc << 4) );
+   p->packet[1] = (   (p->m)
+                   |  (p->pt << 1) );
+   p->packet[2] = p->seq & 0xff;
+   p->packet[3] = (p->seq >> 8) & 0xff;
+ 
+   memcpy (&p->packet[4], &p->timestamp, 4);  // change to shifts for unified byte sex
+   memcpy (&p->packet[8], &p->ssrc, 4);// change to shifts for unified byte sex
+ 
+   // Copy payload 
+ 
+   memcpy (&p->packet[12], p->payload, p->paylen);
+   p->packlen = p->paylen+12;
+   return 0;
+ }
+ 
+ 
+ 
+ /*!
+  *****************************************************************************
+  *
+  * \brief 
+  *    WriteRTPPacket writes the supplied RTP packet to the output file
+  *
+  * \return
+  *    0 in case of access
+  *    <0 in case of write failure (typically fatal)
+  *
+  * \param p
+  *    the RTP packet to be written (after ComposeRTPPacket() )
+  * \param f
+  *    output file
+  *
+  * \date
+  *    October 23, 2001
+  *
+  * \author
+  *    Stephan Wenger   stewe at cs.tu-berlin.de
+  *****************************************************************************/
+ 
+ int WriteRTPPacket (RTPpacket_t *p, FILE *f)
+ 
+ {
+   int intime = -1;
+ 
+   assert (f != NULL);
+   assert (p != NULL);
+ 
+ 
+   if (1 != fwrite (&p->packlen, 4, 1, f))
+     return -1;
+   if (1 != fwrite (&intime, 4, 1, f))
+     return -1;
+   if (1 != fwrite (p->packet, p->packlen, 1, f))
+     return -1;
+   return 0;
+ }
+ 
+ 
+ 
+ 
+ 
+ /*!
+  *****************************************************************************
+  *
+  * \brief 
+  *    int RTPWriteNALU write a NALU to the RTP file
+  *
+  * \return
+  *    Number of bytes written to output file
+  *
+  * \par Side effects
+  *    Packet written, RTPSequenceNumber and RTPTimestamp updated
+  *   
+  * \date
+  *    December 13, 2002
+  *
+  * \author
+  *    Stephan Wenger   stewe at cs.tu-berlin.de
+  *****************************************************************************/
+ 
+ 
+ int WriteRTPNALU (NALU_t *n)
+ {
+   RTPpacket_t *p;
+ 
+   assert (f != NULL);
+   assert (n != NULL);
+   assert (n->len < 65000);
+ 
+   n->buf[0] =
+     n->forbidden_bit << 7      |
+     n->nal_reference_idc << 5  |
+     n->nal_unit_type;
+ 
+   // Set RTP structure elements and alloca() memory foor the buffers
+   if ((p = (RTPpacket_t *) malloc (sizeof (RTPpacket_t))) == NULL)
+     no_mem_exit ("RTPWriteNALU-1");
+   if ((p->packet = malloc (MAXRTPPACKETSIZE)) == NULL)
+     no_mem_exit ("RTPWriteNALU-2");
+   if ((p->payload = malloc (MAXRTPPACKETSIZE)) == NULL)
+     no_mem_exit ("RTPWriteNALU-3");
+ 
+   p->v=2;
+   p->p=0;
+   p->x=0;
+   p->cc=0;
+   p->m=(n->startcodeprefix_len==4)&1;     // a long startcode of Annex B sets marker bit of RTP
+                                           // Not exactly according to the RTP paylaod spec, but
+                                           // good enough for now (hopefully).
+                                           //! For error resilience work, we need the correct
+                                           //! marker bit.  Introduce a nalu->marker and set it in
+                                           //! terminate_slice()?
+   p->pt=H26LPAYLOADTYPE;
+   p->seq=CurrentRTPSequenceNumber++;
+   p->timestamp=CurrentRTPTimestamp;
+   p->ssrc=H26LSSRC;
+   p->paylen = n->len;
+   memcpy (p->payload, n->buf, n->len);
+ 
+ 
+ 
+   // Generate complete RTP packet
+   if (ComposeRTPPacket (p) < 0)
+   {
+     printf ("Cannot compose RTP packet, exit\n");
+     exit (-1);
+   }
+   if (WriteRTPPacket (p, f) < 0)
+   {
+     printf ("Cannot write %d bytes of RTP packet to outfile, exit\n", p->packlen);
+     exit (-1);
+   }
+   free (p->packet);
+   free (p->payload);
+   free (p);
+   return (n->len * 8);
+ }
+ 
+ 
+ /*!
+  ********************************************************************************************
+  * \brief 
+  *    RTPUpdateTimestamp: patches the RTP timestamp depending on the TR
+  *
+  * \param 
+  *    tr: TRof the following NALUs
+  *
+  * \return
+  *    none.  
+  *
+  ********************************************************************************************
+ */
+ 
+ 
+ void RTPUpdateTimestamp (int tr)
+ {
+   int delta;
+   static int oldtr = -1;
+ 
+   if (oldtr == -1)            // First invocation
+   {
+     CurrentRTPTimestamp = 0;  //! This is a violation of the security req. of
+                               //! RTP (random timestamp), but easier to debug
+     oldtr = 0;
+     return;
+   }
+ 
+   /*! The following code assumes a wrap around of TR at 256, and
+       needs to be changed as soon as this is no more true.
+       
+       The support for B frames is a bit tricky, because it is not easy to distinguish
+       between a natural wrap-around of the tr, and the intentional going back of the
+       tr because of a B frame.  It is solved here by a heuristic means: It is assumed that
+       B frames are never "older" than 10 tr ticks.  Everything higher than 10 is considered
+       a wrap around.
+   */
+ 
+   delta = tr - oldtr;
+ 
+   if (delta < -10)        // wrap-around
+     delta+=256;
+ 
+   CurrentRTPTimestamp += delta * RTP_TR_TIMESTAMP_MULT;
+   oldtr = tr;
+ }
+ 
+ 
+ /*!
+  ********************************************************************************************
+  * \brief 
+  *    Opens the output file for the RTP packet stream
+  *
+  * \param Filename
+  *    The filename of the file to be opened
+  *
+  * \return
+  *    none.  Function terminates the program in case of an error
+  *
+  ********************************************************************************************
+ */
+ 
+ void OpenRTPFile (char *Filename)
+ {
+   if ((f = fopen (Filename, "wb")) == NULL)
+   {
+     printf ("Fatal: cannot open bitstream file '%s', exit (-1)\n", Filename);
+     exit (-1);
+   }
+ }
+ 
+ 
+ /*!
+  ********************************************************************************************
+  * \brief 
+  *    Closes the output file for the RTP packet stream
+  *
+  * \return
+  *    none.  Function terminates the program in case of an error
+  *
+  ********************************************************************************************
+ */
+ 
+ void CloseRTPFile ()
+ {
+   fclose(f);
+ }
+ 
+ 
+ 
+ 
+ 
+ 
+ 
+ 
+ #if 0
+ /*!
+  *****************************************************************************
+  *
+  * \brief 
+  *    int aggregationRTPWriteBits (int marker) write the Slice header for the RTP NAL      
+  *
+  * \return
+  *    Number of bytes written to output file
+  *
+  * \param marker
+  *    marker bit,
+  *
+  * \par Side effects
+  *    Packet written, RTPSequenceNumber and RTPTimestamp updated
+  *   
+  * \date
+  *    September 10, 2002
+  *
+  * \author
+  *    Dong Tian   tian at cs.tut.fi
+  *****************************************************************************/
+ 
+ int aggregationRTPWriteBits (int Marker, int PacketType, int subPacketType, void * bitstream, 
+                     int BitStreamLenInByte, FILE *out)
+ {
+   RTPpacket_t *p;
+   int offset;
+ 
+ //  printf( "writing aggregation packet...\n");
+   assert (out != NULL);
+   assert (BitStreamLenInByte < 65000);
+   assert (bitstream != NULL);
+   assert ((PacketType&0xf) == 4);
+ 
+   // Set RTP structure elements and alloca() memory foor the buffers
+   p = (RTPpacket_t *) alloca (sizeof (RTPpacket_t));
+   p->packet=alloca (MAXRTPPACKETSIZE);
+   p->payload=alloca (MAXRTPPACKETSIZE);
+   p->v=2;
+   p->p=0;
+   p->x=0;
+   p->cc=0;
+   p->m=Marker&1;
+   p->pt=H26LPAYLOADTYPE;
+   p->seq=CurrentRTPSequenceNumber++;
+   p->timestamp=CurrentRTPTimestamp;
+   p->ssrc=H26LSSRC;
+ 
+   offset = 0;
+   p->payload[offset++] = PacketType; // This is the first byte of the compound packet
+ 
+   // FIRST, write the sei message to aggregation packet, if it is available
+   if ( HaveAggregationSEI() )
+   {
+     p->payload[offset++] = sei_message[AGGREGATION_SEI].subPacketType; // this is the first byte of the first subpacket
+     *(short*)&(p->payload[offset]) = sei_message[AGGREGATION_SEI].payloadSize;
+     offset += 2;
+     memcpy (&p->payload[offset], sei_message[AGGREGATION_SEI].data, sei_message[AGGREGATION_SEI].payloadSize);
+     offset += sei_message[AGGREGATION_SEI].payloadSize;
+ 
+     clear_sei_message(AGGREGATION_SEI);
+   }
+ 
+   // SECOND, write other payload to the aggregation packet
+   // to do ...
+ 
+   // LAST, write the slice data to the aggregation packet
+   p->payload[offset++] = subPacketType;  // this is the first byte of the second subpacket
+   *(short*)&(p->payload[offset]) = BitStreamLenInByte;
+   offset += 2;
+   memcpy (&p->payload[offset], bitstream, BitStreamLenInByte);
+   offset += BitStreamLenInByte;
+ 
+   p->paylen = offset;  // 1 +3 +seiPayload.payloadSize +3 +BitStreamLenInByte
+ 
+   // Now the payload is ready, we can ...
+   // Generate complete RTP packet
+   if (ComposeRTPPacket (p) < 0)
+   {
+     printf ("Cannot compose RTP packet, exit\n");
+     exit (-1);
+   }
+   if (WriteRTPPacket (p, out) < 0)
+   {
+     printf ("Cannot write %d bytes of RTP packet to outfile, exit\n", p->packlen);
+     exit (-1);
+   }
+   return (p->packlen);
+ 
+ }
+ 
+ 
+ /*!
+  *****************************************************************************
+  * \isAggregationPacket
+  * \brief 
+  *    Determine if current packet is normal packet or compound packet (aggregation
+  *    packet)
+  *
+  * \return
+  *    return TRUE, if it is compound packet.
+  *    return FALSE, otherwise.
+  *   
+  * \date
+  *    September 10, 2002
+  *
+  * \author
+  *    Dong Tian   tian at cs.tut.fi
+  *****************************************************************************/
+ Boolean isAggregationPacket()
+ {
+   if (HaveAggregationSEI())
+   {
+     return TRUE;
+   }
+   // Until Sept 2002, the JM will produce aggregation packet only for some SEI messages
+ 
+   return FALSE;
+ }
+ 
+ /*!
+  *****************************************************************************
+  * \PrepareAggregationSEIMessage
+  * \brief 
+  *    Prepare the aggregation sei message.
+  *    
+  * \date
+  *    September 10, 2002
+  *
+  * \author
+  *    Dong Tian   tian at cs.tut.fi
+  *****************************************************************************/
+ void PrepareAggregationSEIMessage()
+ {
+   Boolean has_aggregation_sei_message = FALSE;
+   // prepare the sei message here
+   // write the spare picture sei payload to the aggregation sei message
+   if (seiHasSparePicture && img->type != B_SLICE)
+   {
+     FinalizeSpareMBMap();
+     assert(seiSparePicturePayload.data->byte_pos == seiSparePicturePayload.payloadSize);
+     write_sei_message(AGGREGATION_SEI, seiSparePicturePayload.data->streamBuffer, seiSparePicturePayload.payloadSize, SEI_SPARE_PICTURE);
+     has_aggregation_sei_message = TRUE;
+   }
+   // write the sub sequence information sei paylaod to the aggregation sei message
+   if (seiHasSubseqInfo)
+   {
+     FinalizeSubseqInfo(img->layer);
+     write_sei_message(AGGREGATION_SEI, seiSubseqInfo[img->layer].data->streamBuffer, seiSubseqInfo[img->layer].payloadSize, SEI_SUBSEQ_INFORMATION);
+     ClearSubseqInfoPayload(img->layer);
+     has_aggregation_sei_message = TRUE;
+   }
+   // write the sub sequence layer information sei paylaod to the aggregation sei message
+   if (seiHasSubseqLayerInfo && img->number == 0)
+   {
+     FinalizeSubseqLayerInfo();
+     write_sei_message(AGGREGATION_SEI, seiSubseqLayerInfo.data, seiSubseqLayerInfo.payloadSize, SEI_SUBSEQ_LAYER_CHARACTERISTICS);
+     seiHasSubseqLayerInfo = FALSE;
+     has_aggregation_sei_message = TRUE;
+   }
+   // write the sub sequence characteristics payload to the aggregation sei message
+   if (seiHasSubseqChar)
+   {
+     FinalizeSubseqChar();
+     write_sei_message(AGGREGATION_SEI, seiSubseqChar.data->streamBuffer, seiSubseqChar.payloadSize, SEI_SUBSEQ_CHARACTERISTICS);
+     ClearSubseqCharPayload();
+     has_aggregation_sei_message = TRUE;
+   }
+   // write the pan scan rectangle info sei playload to the aggregation sei message
+   if (seiHasPanScanRectInfo)
+   {
+     FinalizePanScanRectInfo();
+     write_sei_message(AGGREGATION_SEI, seiPanScanRectInfo.data->streamBuffer, seiPanScanRectInfo.payloadSize, SEI_PANSCAN_RECT);
+     ClearPanScanRectInfoPayload();
+     has_aggregation_sei_message = TRUE;
+   }
+   // write the arbitrary (unregistered) info sei playload to the aggregation sei message
+   if (seiHasUser_data_unregistered_info)
+   {
+     FinalizeUser_data_unregistered();
+     write_sei_message(AGGREGATION_SEI, seiUser_data_unregistered.data->streamBuffer, seiUser_data_unregistered.payloadSize, SEI_USER_DATA_UNREGISTERED);
+     ClearUser_data_unregistered();
+     has_aggregation_sei_message = TRUE;
+   }
+   // write the arbitrary (unregistered) info sei playload to the aggregation sei message
+   if (seiHasUser_data_registered_itu_t_t35_info)
+   {
+     FinalizeUser_data_registered_itu_t_t35();
+     write_sei_message(AGGREGATION_SEI, seiUser_data_registered_itu_t_t35.data->streamBuffer, seiUser_data_registered_itu_t_t35.payloadSize, SEI_USER_DATA_REGISTERED_ITU_T_T35);
+     ClearUser_data_registered_itu_t_t35();
+     has_aggregation_sei_message = TRUE;
+   }
+   //write RandomAccess info sei payload to the aggregation sei message
+   if (seiHasRandomAccess_info)
+   {
+     FinalizeRandomAccess();
+     write_sei_message(AGGREGATION_SEI, seiRandomAccess.data->streamBuffer, seiRandomAccess.payloadSize, SEI_RANDOM_ACCESS_POINT);
+     ClearRandomAccess();
+     has_aggregation_sei_message = TRUE;
+   }
+   // more aggregation sei payload is written here...
+ 
+   // JVT-D099 write the scene information SEI payload
+   if (seiHasSceneInformation)
+   {
+     FinalizeSceneInformation();
+     write_sei_message(AGGREGATION_SEI, seiSceneInformation.data->streamBuffer, seiSceneInformation.payloadSize, SEI_SCENE_INFORMATION);
+     has_aggregation_sei_message = TRUE;
+   }
+   // End JVT-D099
+ 
+   // after all the sei payload is written
+   if (has_aggregation_sei_message)
+     finalize_sei_message(AGGREGATION_SEI);
+ }
+ 
+ /*!
+  *****************************************************************************
+  * \begin_sub_sequence_rtp
+  * \brief 
+  *    do some initialization for sub-sequence under rtp
+  *    
+  * \date
+  *    September 10, 2002
+  *
+  * \author
+  *    Dong Tian   tian at cs.tut.fi
+  *****************************************************************************/
+ 
+ void begin_sub_sequence_rtp()
+ {
+   if ( input->of_mode != PAR_OF_RTP || input->NumFramesInELSubSeq == 0 ) 
+     return;
+ 
+   // begin to encode the base layer subseq
+   if ( IMG_NUMBER == 0 )
+   {
+ //    printf("begin to encode the base layer subseq\n");
+     InitSubseqInfo(0);
+     if (1)
+       UpdateSubseqChar();
+   }
+   // begin to encode the enhanced layer subseq
+   if ( IMG_NUMBER % (input->NumFramesInELSubSeq+1) == 1 )
+   {
+ //    printf("begin to encode the enhanced layer subseq\n");
+     InitSubseqInfo(1);  // init the sub-sequence in the enhanced layer
+ //    add_dependent_subseq(1);
+     if (1)
+       UpdateSubseqChar();
+   }
+ }
+ 
+ /*!
+  *****************************************************************************
+  * \end_sub_sequence_rtp
+  * \brief 
+  *    do nothing
+  *    
+  * \date
+  *    September 10, 2002
+  *
+  * \author
+  *    Dong Tian   tian at cs.tut.fi
+  *****************************************************************************/
+ void end_sub_sequence_rtp()
+ {
+   // end of the base layer:
+   if ( img->number == input->no_frames-1 )
+   {
+ //    printf("end of encoding the base layer subseq\n");
+     CloseSubseqInfo(0);
+ //    updateSubSequenceBox(0);
+   }
+   // end of the enhanced layer:
+   if ( ((IMG_NUMBER%(input->NumFramesInELSubSeq+1)==0) && (input->successive_Bframe != 0) && (IMG_NUMBER>0)) || // there are B frames
+     ((IMG_NUMBER%(input->NumFramesInELSubSeq+1)==input->NumFramesInELSubSeq) && (input->successive_Bframe==0))   // there are no B frames
+     )
+   {
+ //    printf("end of encoding the enhanced layer subseq\n");
+     CloseSubseqInfo(1);
+ //    add_dependent_subseq(1);
+ //    updateSubSequenceBox(1);
+   }
+ }
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/rtp.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/rtp.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/rtp.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,72 ----
+ 
+ /*!
+  ***************************************************************************
+  *
+  * \file rtp.h
+  *
+  * \brief
+  *    Definition of structures and functions to handle RTP headers.  For a
+  *    description of RTP see RFC1889 on http://www.ietf.org
+  *
+  * \date
+  *    30 September 2001
+  *
+  * \author
+  *    Stephan Wenger   stewe at cs.tu-berlin.de
+  **************************************************************************/
+ 
+ #ifndef _RTP_H_
+ #define _RTP_H_
+ 
+ #include "nalu.h"
+ 
+ #define MAXRTPPAYLOADLEN  (65536 - 40)    //!< Maximum payload size of an RTP packet
+ #define MAXRTPPACKETSIZE  (65536 - 28)    //!< Maximum size of an RTP packet incl. header
+ #define H26LPAYLOADTYPE 105               //!< RTP paylaod type fixed here for simplicity
+ #define H26LSSRC 0x12345678               //!< SSRC, chosen to simplify debugging
+ #define RTP_TR_TIMESTAMP_MULT 1000        //!< should be something like 27 Mhz / 29.97 Hz
+ 
+ typedef struct 
+ {
+   unsigned int v;          //!< Version, 2 bits, MUST be 0x2
+   unsigned int p;          //!< Padding bit, Padding MUST NOT be used
+   unsigned int x;          //!< Extension, MUST be zero */
+   unsigned int cc;         /*!< CSRC count, normally 0 in the absence 
+                                 of RTP mixers */
+   unsigned int m;          //!< Marker bit
+   unsigned int pt;         //!< 7 bits, Payload Type, dynamically established
+   unsigned int seq;        /*!< RTP sequence number, incremented by one for 
+                                 each sent packet */
+   unsigned int timestamp;  //!< timestamp, 27 MHz for H.264
+   unsigned int ssrc;       //!< Synchronization Source, chosen randomly
+   byte *       payload;    //!< the payload including payload headers
+   unsigned int paylen;     //!< length of payload in bytes
+   byte *       packet;     //!< complete packet including header and payload
+   unsigned int packlen;    //!< length of packet, typically paylen+12
+ } RTPpacket_t;
+ 
+ #if 0
+ int  ComposeRTPPacket (RTPpacket_t *p);
+ int  DecomposeRTPpacket (RTPpacket_t *p);
+ int  WriteRTPPacket (RTPpacket_t *p, FILE *f);
+ void DumpRTPHeader (RTPpacket_t *p);
+ void RTPUpdateTimestamp (int tr);
+ int  RTPWriteBits (int Marker, int PacketType, void * bitstream, 
+                    int BitStreamLenInByte, FILE *out);
+ 
+ Boolean isAggregationPacket();
+ int aggregationRTPWriteBits (int Marker, int PacketType, int subPacketType, void * bitstream, int BitStreamLenInByte, FILE *out);
+ 
+ void begin_sub_sequence_rtp();
+ void end_sub_sequence_rtp();
+ #endif
+ 
+ void RTPUpdateTimestamp (int tr);
+ void OpenRTPFile (char *Filename);
+ void CloseRTPFile ();
+ int WriteRTPNALU (NALU_t *n);
+ 
+ 
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/sei.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/sei.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/sei.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,1644 ----
+ 
+ /*!
+  ************************************************************************
+  *  \file
+  *     sei.c
+  *  \brief
+  *     implementation of SEI related functions
+  *  \author(s)
+  *      - Dong Tian                             <tian at cs.tut.fi>
+  *
+  ************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <assert.h>
+ #include <memory.h>
+ 
+ #include "global.h"
+ 
+ #include "memalloc.h"
+ #include "rtp.h"
+ #include "mbuffer.h"
+ #include "sei.h"
+ #include "vlc.h"
+ 
+ Boolean seiHasTemporal_reference=FALSE;
+ Boolean seiHasClock_timestamp=FALSE;
+ Boolean seiHasPanscan_rect=FALSE;
+ Boolean seiHasBuffering_period=FALSE;
+ Boolean seiHasHrd_picture=FALSE;
+ Boolean seiHasFiller_payload=FALSE;
+ Boolean seiHasUser_data_registered_itu_t_t35=FALSE;
+ Boolean seiHasUser_data_unregistered=FALSE;
+ Boolean seiHasRandom_access_point=FALSE;
+ Boolean seiHasRef_pic_buffer_management_repetition=FALSE;
+ Boolean seiHasSpare_picture=FALSE;
+ 
+ Boolean seiHasSceneInformation=FALSE;
+ 
+ Boolean seiHasSubseq_information=FALSE;
+ Boolean seiHasSubseq_layer_characteristics=FALSE;
+ Boolean seiHasSubseq_characteristics=FALSE;
+ 
+ /*
+  ************************************************************************
+  *  \basic functions on supplemental enhancement information
+  *  \brief
+  *     The implementations are based on FCD
+  ************************************************************************
+  */
+ 
+ //! sei_message[0]: this struct is to store the sei message packetized independently 
+ //! sei_message[1]: this struct is to store the sei message packetized together with slice data
+ sei_struct sei_message[2];
+ 
+ void InitSEIMessages()
+ {
+   int i;
+   for (i=0; i<2; i++)
+   {
+     sei_message[i].data = malloc(MAXRTPPAYLOADLEN);
+     if( sei_message[i].data == NULL ) no_mem_exit("InitSEIMessages: sei_message[i].data");
+     sei_message[i].subPacketType = SEI_PACKET_TYPE;
+     clear_sei_message(i);
+   }
+ 
+   // init sei messages
+   seiSparePicturePayload.data = NULL;
+   InitSparePicture();
+   InitSubseqChar();
+   if (input->NumFramesInELSubSeq != 0)
+     InitSubseqLayerInfo();
+   InitSceneInformation(); // JVT-D099
+   // init panscanrect sei message
+   InitPanScanRectInfo();
+   // init user_data_unregistered
+   InitUser_data_unregistered();
+   // init user_data_unregistered
+   InitUser_data_registered_itu_t_t35();
+   // init user_RandomAccess
+   InitRandomAccess();
+ }
+ 
+ void CloseSEIMessages()
+ {
+   int i;
+   
+   if (input->NumFramesInELSubSeq != 0)
+     CloseSubseqLayerInfo();
+ 
+   CloseSubseqChar();
+   CloseSparePicture();
+   CloseSceneInformation(); // JVT-D099
+   //Shankar Regunathan Oct 2002
+   ClosePanScanRectInfo();
+   CloseUser_data_unregistered();
+   CloseUser_data_registered_itu_t_t35();
+   CloseRandomAccess();
+ 
+   for (i=0; i<MAX_LAYER_NUMBER; i++)
+   {
+     if ( sei_message[i].data ) free( sei_message[i].data );
+     sei_message[i].data = NULL;
+   }
+ }
+ 
+ Boolean HaveAggregationSEI()
+ {
+   if (sei_message[AGGREGATION_SEI].available && img->type != B_SLICE) 
+     return TRUE;
+   if (seiHasSubseqInfo)
+     return TRUE;
+   if (seiHasSubseqLayerInfo && img->number == 0)
+     return TRUE;
+   if (seiHasSubseqChar)
+     return TRUE;
+   if (seiHasSceneInformation)
+     return TRUE;
+   if (seiHasPanScanRectInfo)
+     return TRUE;
+   if (seiHasUser_data_unregistered_info)
+     return TRUE;
+   if (seiHasUser_data_registered_itu_t_t35_info)
+     return TRUE;
+   if (seiHasRecoveryPoint_info)
+     return TRUE;
+   return FALSE;
+ //  return input->SparePictureOption && ( seiHasSpare_picture || seiHasSubseq_information || 
+ //    seiHasSubseq_layer_characteristics || seiHasSubseq_characteristics );
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *     write one sei payload to the sei message
+  *  \param id
+  *    0, if this is the normal packet\n
+  *    1, if this is a aggregation packet
+  *  \param payload
+  *    a pointer that point to the sei payload. Note that the bitstream
+  *    should have be byte aligned already. 
+  *  \param payload_size
+  *    the size of the sei payload
+  *  \param payload_type
+  *    the type of the sei payload
+  *  \par Output
+  *    the content of the sei message (sei_message[id]) is updated.
+  ************************************************************************
+  */
+ void write_sei_message(int id, byte* payload, int payload_size, int payload_type)
+ {
+   int offset, type, size;
+   assert(payload_type >= 0 && payload_type < SEI_MAX_ELEMENTS);
+ 
+   type = payload_type;
+   size = payload_size;
+   offset = sei_message[id].payloadSize;
+ 
+   while ( type > 255 )
+   {
+     sei_message[id].data[offset++] = 0xFF;
+     type = type - 255;
+   }
+   sei_message[id].data[offset++] = type;
+ 
+   while ( size > 255 )
+   {
+     sei_message[id].data[offset++] = 0xFF;
+     size = size - 255;
+   }
+   sei_message[id].data[offset++] = size;
+ 
+   memcpy(sei_message[id].data + offset, payload, payload_size);
+   offset += payload_size;
+ 
+   sei_message[id].payloadSize = offset;
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *     write rbsp_trailing_bits to the sei message
+  *  \param id
+  *    0, if this is the normal packet \n
+  *    1, if this is a aggregation packet
+  *  \par Output
+  *    the content of the sei message is updated and ready for packetisation
+  ************************************************************************
+  */
+ void finalize_sei_message(int id)
+ {
+   int offset = sei_message[id].payloadSize;
+ 
+   sei_message[id].data[offset] = 0x80;
+   sei_message[id].payloadSize++;
+ 
+   sei_message[id].available = TRUE;
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *     empty the sei message buffer
+  *  \param id
+  *    0, if this is the normal packet \n
+  *    1, if this is a aggregation packet
+  *  \par Output
+  *    the content of the sei message is cleared and ready for storing new 
+  *      messages
+  ************************************************************************
+  */
+ void clear_sei_message(int id)
+ {
+   memset( sei_message[id].data, 0, MAXRTPPAYLOADLEN);
+   sei_message[id].payloadSize       = 0;
+   sei_message[id].available         = FALSE;
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *     copy the bits from one bitstream buffer to another one
+  *  \param dest
+  *    pointer to the dest bitstream buffer
+  *  \param source
+  *    pointer to the source bitstream buffer
+  *  \par Output
+  *    the content of the dest bitstream is changed.
+  ************************************************************************
+  */
+ void AppendTmpbits2Buf( Bitstream* dest, Bitstream* source )
+ {
+   int i, j;
+   unsigned char mask;
+   int bits_in_last_byte;
+ 
+   // copy the first bytes in source buffer
+   for (i=0; i<source->byte_pos; i++)
+   {
+     mask = 0x80;
+     for (j=0; j<8; j++)
+     {
+       dest->byte_buf <<= 1;
+       if (source->streamBuffer[i] & mask)
+         dest->byte_buf |= 1;
+       dest->bits_to_go--;
+       mask >>= 1;
+       if (dest->bits_to_go==0)
+       {
+         dest->bits_to_go = 8;
+         dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+         dest->byte_buf = 0;
+       }
+     }
+   }
+   // copy the last byte, there are still (8-source->bits_to_go) bits in the source buffer
+   bits_in_last_byte = 8-source->bits_to_go;
+   if ( bits_in_last_byte > 0 )
+   {
+     mask = 1 << (bits_in_last_byte-1);
+     for (j=0; j<bits_in_last_byte; j++)
+     {
+       dest->byte_buf <<= 1;
+       if (source->byte_buf & mask)
+         dest->byte_buf |= 1;
+       dest->bits_to_go--;
+       mask >>= 1;
+       if (dest->bits_to_go==0)
+       {
+         dest->bits_to_go = 8;
+         dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+         dest->byte_buf = 0;
+       }
+     }
+   }
+ }
+ 
+ /*
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  *  \functions on spare pictures
+  *  \brief
+  *     implementation of Spare Pictures related functions based on 
+  *      JVT-D100
+  *  \author
+  *      Dong Tian                 <tian at cs.tut.fi>
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  */
+ 
+ // global variables for spare pictures
+ 
+ // Tian Dong (Sept 2002)
+ // In current implementation, Sept 2002, the spare picture info is 
+ // paketized together with the immediately following frame. Thus we 
+ // define one set of global variables to save the info.
+ Boolean seiHasSparePicture = FALSE;
+ spare_picture_struct seiSparePicturePayload;
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *      Init the global variables for spare picture information
+  ************************************************************************
+  */
+ void InitSparePicture()
+ {
+   if ( seiSparePicturePayload.data != NULL ) CloseSparePicture();
+ 
+   seiSparePicturePayload.data = malloc( sizeof(Bitstream) );
+   if ( seiSparePicturePayload.data == NULL ) no_mem_exit("InitSparePicture: seiSparePicturePayload.data"); 
+   seiSparePicturePayload.data->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+   if ( seiSparePicturePayload.data->streamBuffer == NULL ) no_mem_exit("InitSparePicture: seiSparePicturePayload.data->streamBuffer"); 
+   memset( seiSparePicturePayload.data->streamBuffer, 0, MAXRTPPAYLOADLEN);
+   seiSparePicturePayload.num_spare_pics = 0;
+   seiSparePicturePayload.target_frame_num = 0;
+ 
+   seiSparePicturePayload.data->bits_to_go  = 8;
+   seiSparePicturePayload.data->byte_pos    = 0;
+   seiSparePicturePayload.data->byte_buf    = 0;
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *      Close the global variables for spare picture information
+  ************************************************************************
+  */
+ void CloseSparePicture()
+ {
+   if (seiSparePicturePayload.data->streamBuffer) 
+     free(seiSparePicturePayload.data->streamBuffer);
+   seiSparePicturePayload.data->streamBuffer = NULL;
+   if (seiSparePicturePayload.data) 
+     free(seiSparePicturePayload.data);
+   seiSparePicturePayload.data = NULL;
+   seiSparePicturePayload.num_spare_pics = 0;
+   seiSparePicturePayload.target_frame_num = 0;
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *     Calculate the spare picture info, save the result in map_sp
+  *      then compose the spare picture information.
+  *  \par Output
+  *      the spare picture payload is available in *seiSparePicturePayload*
+  *      the syntax elements in the loop (see FCD), excluding the two elements
+  *      at the beginning.
+  ************************************************************************
+  */
+ void CalculateSparePicture()
+ {
+   /*
+   int i, j, tmp, i0, j0, m;
+   byte **map_sp;
+   int delta_spare_frame_num;
+   Bitstream *tmpBitstream;
+ 
+   int num_of_mb=(img->height/16) * (img->width/16);
+   int threshold1 = 16*16*input->SPDetectionThreshold;
+   int threshold2 = num_of_mb * input->SPPercentageThreshold / 100;
+   int ref_area_indicator;
+   int CandidateSpareFrameNum, SpareFrameNum;
+   int possible_spare_pic_num;
+ 
+   // define it for debug purpose
+   #define WRITE_MAP_IMAGE
+ 
+ #ifdef WRITE_MAP_IMAGE
+   byte **y;
+   int k;
+   FILE* fp;
+   static int first = 1;
+   char map_file_name[255]="map.yuv";
+ #endif
+ 
+   // basic check
+   if (fb->picbuf_short[0]->used==0 || fb->picbuf_short[1]->used==0)
+   { 
+ #ifdef WRITE_MAP_IMAGE
+     fp = fopen( map_file_name, "wb" );
+     assert( fp != NULL );
+     // write the map image
+     for (i=0; i < img->height; i++)
+       for (j=0; j < img->width; j++)
+         fputc(0, fp);
+ 
+     for (k=0; k < 2; k++)
+       for (i=0; i < img->height/2; i++)
+         for (j=0; j < img->width/2; j++)
+           fputc(128, fp);
+     fclose( fp );
+ #endif
+     seiHasSparePicture = FALSE;
+     return;
+   }
+   seiHasSparePicture = TRUE;
+ 
+   // set the global bitstream memory. 
+   InitSparePicture();
+   seiSparePicturePayload.target_frame_num = img->number % MAX_FN;
+   // init the local bitstream memory.
+   tmpBitstream = malloc(sizeof(Bitstream));
+   if ( tmpBitstream == NULL ) no_mem_exit("CalculateSparePicture: tmpBitstream");
+   tmpBitstream->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+   if ( tmpBitstream->streamBuffer == NULL ) no_mem_exit("CalculateSparePicture: tmpBitstream->streamBuffer");
+   memset( tmpBitstream->streamBuffer, 0, MAXRTPPAYLOADLEN);
+ 
+ #ifdef WRITE_MAP_IMAGE
+   if ( first )
+   {
+     fp = fopen( map_file_name, "wb" );
+     first = 0;
+   }
+   else
+     fp = fopen( map_file_name, "ab" );
+   get_mem2D(&y, img->height, img->width);
+ #endif
+   get_mem2D(&map_sp, img->height/16, img->width/16);
+ 
+   if (fb->picbuf_short[2]->used!=0) possible_spare_pic_num = 2;
+   else possible_spare_pic_num = 1;
+   // loop over the spare pictures
+   for (m=0; m<possible_spare_pic_num; m++)
+   {
+     // clear the temporal bitstream buffer
+     tmpBitstream->bits_to_go  = 8;
+     tmpBitstream->byte_pos    = 0;
+     tmpBitstream->byte_buf    = 0;
+     memset( tmpBitstream->streamBuffer, 0, MAXRTPPAYLOADLEN);
+ 
+     // set delta_spare_frame_num
+     // the order of the following lines cannot be changed.
+     if (m==0)
+       CandidateSpareFrameNum = seiSparePicturePayload.target_frame_num - 1; // TargetFrameNum - 1;
+     else
+       CandidateSpareFrameNum = SpareFrameNum - 1;
+     if ( CandidateSpareFrameNum < 0 ) CandidateSpareFrameNum = MAX_FN - 1;
+     SpareFrameNum = fb->picbuf_short[m+1]->frame_num_256;
+     delta_spare_frame_num = CandidateSpareFrameNum - SpareFrameNum;
+     assert( delta_spare_frame_num == 0 );
+ 
+     // calculate the spare macroblock map of one spare picture
+     // the results are stored into map_sp[][]
+     for (i=0; i < img->height/16; i++)
+       for (j=0; j < img->width/16; j++)
+       {
+         tmp = 0;
+         for (i0=0; i0<16; i0++)
+           for (j0=0; j0<16; j0++)
+             tmp+=abs(fb->picbuf_short[m+1]->Refbuf11[(i*16+i0)*img->width+j*16+j0]-
+                        fb->picbuf_short[0]->Refbuf11[(i*16+i0)*img->width+j*16+j0]);
+         tmp = (tmp<=threshold1? 255 : 0);
+         map_sp[i][j] = (tmp==0? 1 : 0);
+ #ifdef WRITE_MAP_IMAGE
+ //        if (m==0)
+         {
+         for (i0=0; i0<16; i0++)
+           for (j0=0; j0<16; j0++)
+             y[i*16+i0][j*16+j0]=tmp;
+         }
+ #endif
+       }
+ 
+     // based on map_sp[][], compose the spare picture information
+     // and write the spare picture information to a temp bitstream
+     tmp = 0;
+     for (i=0; i < img->height/16; i++)
+       for (j=0; j < img->width/16; j++)
+         if (map_sp[i][j]==0) tmp++;
+     if ( tmp > threshold2 )
+       ref_area_indicator = 0;
+     else if ( !CompressSpareMBMap(map_sp, tmpBitstream) )
+       ref_area_indicator = 1;
+     else
+       ref_area_indicator = 2;
+ 
+ //    printf( "ref_area_indicator = %d\n", ref_area_indicator );
+ 
+ #ifdef WRITE_MAP_IMAGE
+     // write the map to a file
+ //    if (m==0)
+     {
+       // write the map image
+       for (i=0; i < img->height; i++)
+         for (j=0; j < img->width; j++)
+         {
+           if ( ref_area_indicator == 0 ) fputc(255, fp);
+           else fputc(y[i][j], fp);
+         }
+ 
+       for (k=0; k < 2; k++)
+         for (i=0; i < img->height/2; i++)
+           for (j=0; j < img->width/2; j++)
+             fputc(128, fp);
+     }
+ #endif
+ 
+     // Finnally, write the current spare picture information to 
+     // the global variable: seiSparePicturePayload
+     ComposeSparePictureMessage(delta_spare_frame_num, ref_area_indicator, tmpBitstream);
+     seiSparePicturePayload.num_spare_pics++;
+   }  // END for (m=0; m<2; m++)
+ 
+   free_mem2D( map_sp );
+   free( tmpBitstream->streamBuffer );
+   free( tmpBitstream );
+ 
+ #ifdef WRITE_MAP_IMAGE
+   free_mem2D( y );
+   fclose( fp );
+ #undef WRITE_MAP_IMAGE
+ #endif
+   */
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *      compose the spare picture information.
+  *  \param delta_spare_frame_num
+  *      see FCD
+  *  \param ref_area_indicator
+  *      Indicate how to represent the spare mb map
+  *  \param tmpBitstream
+  *      pointer to a buffer to save the payload
+  *  \par Output
+  *      bitstream: the composed spare picture payload are 
+  *        ready to put into the sei_message. 
+  ************************************************************************
+  */
+ void ComposeSparePictureMessage(int delta_spare_frame_num, int ref_area_indicator, Bitstream *tmpBitstream)
+ {
+   Bitstream *bitstream = seiSparePicturePayload.data;
+   SyntaxElement sym;
+ 
+   sym.type = SE_HEADER;
+   sym.mapping = ue_linfo;
+ 
+   sym.value1 = delta_spare_frame_num;
+   writeSyntaxElement2Buf_UVLC(&sym, bitstream);
+   sym.value1 = ref_area_indicator;
+   writeSyntaxElement2Buf_UVLC(&sym, bitstream);
+ 
+   AppendTmpbits2Buf( bitstream, tmpBitstream );
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *      test if the compressed spare mb map will occupy less mem and
+  *      fill the payload buffer.
+  *  \param map_sp
+  *      in which the spare picture information are stored.
+  *  \param bitstream
+  *      pointer to a buffer to save the payload
+  *  \return
+  *      TRUE: If it is compressed version, \n
+  *             FALSE: If it is not compressed.
+  ************************************************************************
+  */
+ Boolean CompressSpareMBMap(unsigned char **map_sp, Bitstream *bitstream)
+ {
+   int j, k;
+   int noc, bit0, bit1, bitc;
+   SyntaxElement sym;
+   int x, y, left, right, bottom, top, directx, directy;
+ 
+   // this is the size of the uncompressed mb map:
+   int size_uncompressed = (img->height/16) * (img->width/16);
+   int size_compressed   = 0;
+   Boolean ret;
+ 
+   // initialization
+   sym.type = SE_HEADER;
+   sym.mapping = ue_linfo;
+   noc = 0;
+   bit0 = 0;
+   bit1 = 1;
+   bitc = bit0;
+ 
+   // compress the map, the result goes to the temporal bitstream buffer
+   x = ( img->width/16 - 1 ) / 2;
+   y = ( img->height/16 - 1 ) / 2;
+   left = right = x;
+   top = bottom = y;
+   directx = 0;
+   directy = 1;
+   for (j=0; j<img->height/16; j++)
+     for (k=0; k<img->width/16; k++)
+     {
+       // check current mb
+       if ( map_sp[y][x] == bitc ) noc++;
+       else
+       {
+         sym.value1 = noc;
+         size_compressed += writeSyntaxElement2Buf_UVLC(&sym, bitstream);    // the return value indicate the num of bits written
+         noc=0;
+       }
+       // go to the next mb:
+       if ( directx == -1 && directy == 0 )
+       {
+         if (x > left) x--;
+         else if (x == 0)
+         {
+           y = bottom + 1;
+           bottom++;
+           directx = 1;
+           directy = 0;
+         }
+         else if (x == left)
+         {
+           x--;
+           left--;
+           directx = 0;
+           directy = 1;
+         }
+       }
+       else if ( directx == 1 && directy == 0 )
+       {
+         if (x < right) x++;
+         else if (x == img->width/16 - 1)
+         {
+           y = top - 1;
+           top--;
+           directx = -1;
+           directy = 0;
+         }
+         else if (x == right)
+         {
+           x++;
+           right++;
+           directx = 0;
+           directy = -1;
+         }
+       }
+       else if ( directx == 0 && directy == -1 )
+       {
+         if ( y > top) y--;
+         else if (y == 0)
+         {
+           x = left - 1;
+           left--;
+           directx = 0;
+           directy = 1;
+         }
+         else if (y == top)
+         {
+           y--;
+           top--;
+           directx = -1;
+           directy = 0;
+         }
+       }
+       else if ( directx == 0 && directy == 1 )
+       {
+         if (y < bottom) y++;
+         else if (y == img->height/16 - 1)
+         {
+           x = right+1;
+           right++;
+           directx = 0;
+           directy = -1;
+         }
+         else if (y == bottom)
+         {
+           y++;
+           bottom++;
+           directx = 1;
+           directy = 0;
+         }
+       }
+     }
+   if (noc!=0)
+   {
+     sym.value1 = noc;
+     size_compressed += writeSyntaxElement2Buf_UVLC(&sym, bitstream);
+   }
+ 
+   ret = (size_compressed<size_uncompressed? TRUE : FALSE);
+   if ( !ret ) // overwrite the streambuffer with the original mb map
+   {
+     // write the mb map to payload bit by bit
+     bitstream->byte_buf = 0;
+     bitstream->bits_to_go = 8;
+     bitstream->byte_pos = 0;
+     for (j=0; j<img->height/16; j++)
+     {
+       for (k=0; k<img->width/16; k++)
+       {
+         bitstream->byte_buf <<= 1;
+         if (map_sp[j][k]) bitstream->byte_buf |= 1;
+         bitstream->bits_to_go--;
+         if (bitstream->bits_to_go==0)
+         {
+           bitstream->bits_to_go = 8;
+           bitstream->streamBuffer[bitstream->byte_pos++]=bitstream->byte_buf;
+           bitstream->byte_buf = 0;
+         }
+       }
+     }
+   }
+ 
+   return ret;
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *      Finalize the spare picture SEI payload.
+  *        The spare picture paylaod will be ready for encapsulation, and it
+  *        should be called before current picture packetized.
+  *  \par Input
+  *      seiSparePicturePayload.data: points to the payload starting from
+  *        delta_spare_frame_num. (See FCD)
+  *  \par Output
+  *      seiSparePicturePayload.data is updated, pointing to the whole spare
+  *        picture information: spare_picture( PayloadSize ) (See FCD)
+  *        Make sure it is byte aligned.
+  ************************************************************************
+  */
+ void FinalizeSpareMBMap()
+ {
+   int CurrFrameNum = img->number % MAX_FN;
+   int delta_frame_num;
+   SyntaxElement sym;
+   Bitstream *dest, *source;
+ 
+   sym.type = SE_HEADER;
+   sym.mapping = ue_linfo;
+ 
+   source = seiSparePicturePayload.data;
+   dest = malloc(sizeof(Bitstream));
+   if ( dest == NULL ) no_mem_exit("FinalizeSpareMBMap: dest");
+   dest->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+   if ( dest->streamBuffer == NULL ) no_mem_exit("FinalizeSpareMBMap: dest->streamBuffer");
+   dest->bits_to_go  = 8;
+   dest->byte_pos    = 0;
+   dest->byte_buf    = 0;
+   memset( dest->streamBuffer, 0, MAXRTPPAYLOADLEN);
+ 
+   //    delta_frame_num
+   delta_frame_num = CurrFrameNum - seiSparePicturePayload.target_frame_num;
+   if ( delta_frame_num < 0 ) delta_frame_num += MAX_FN;
+   sym.value1 = delta_frame_num;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+ 
+   // num_spare_pics_minus1
+   sym.value1 = seiSparePicturePayload.num_spare_pics - 1;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+ 
+   // copy the other bits
+   AppendTmpbits2Buf( dest, source);
+ 
+   // make sure the payload is byte aligned, stuff bits are 10..0
+   if ( dest->bits_to_go != 8 )
+   {
+     (dest->byte_buf) <<= 1;
+     dest->byte_buf |= 1;
+     dest->bits_to_go--;
+     if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+     dest->bits_to_go = 8;
+     dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+     dest->byte_buf = 0;
+   }
+   seiSparePicturePayload.payloadSize = dest->byte_pos;
+ 
+   // the payload is ready now
+   seiSparePicturePayload.data = dest;
+   free( source->streamBuffer );
+   free( source );
+ }
+ 
+ /*
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  *  \functions on subseq information sei messages
+  *  \brief
+  *      JVT-D098
+  *  \author
+  *      Dong Tian                 <tian at cs.tut.fi>
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  */
+ 
+ Boolean seiHasSubseqInfo = FALSE;
+ subseq_information_struct seiSubseqInfo[MAX_LAYER_NUMBER];
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *      init subseqence info
+  ************************************************************************
+  */
+ void InitSubseqInfo(int currLayer)
+ {
+   static unsigned short id = 0;
+ 
+   seiHasSubseqInfo = TRUE;
+   seiSubseqInfo[currLayer].subseq_layer_num = currLayer;
+   seiSubseqInfo[currLayer].subseq_id = id++;
+   seiSubseqInfo[currLayer].last_picture_flag = 0;
+   seiSubseqInfo[currLayer].stored_frame_cnt = -1;
+   seiSubseqInfo[currLayer].payloadSize = 0;
+ 
+   seiSubseqInfo[currLayer].data = malloc( sizeof(Bitstream) );
+   if ( seiSubseqInfo[currLayer].data == NULL ) no_mem_exit("InitSubseqInfo: seiSubseqInfo[currLayer].data");
+   seiSubseqInfo[currLayer].data->streamBuffer = malloc( MAXRTPPAYLOADLEN );
+   if ( seiSubseqInfo[currLayer].data->streamBuffer == NULL ) no_mem_exit("InitSubseqInfo: seiSubseqInfo[currLayer].data->streamBuffer");
+   seiSubseqInfo[currLayer].data->bits_to_go  = 8;
+   seiSubseqInfo[currLayer].data->byte_pos    = 0;
+   seiSubseqInfo[currLayer].data->byte_buf    = 0;
+   memset( seiSubseqInfo[currLayer].data->streamBuffer, 0, MAXRTPPAYLOADLEN );
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *      update subseqence info
+  ************************************************************************
+  */
+ void UpdateSubseqInfo(int currLayer)
+ {
+   if (img->type != B_SLICE)
+   {
+     seiSubseqInfo[currLayer].stored_frame_cnt ++;
+     seiSubseqInfo[currLayer].stored_frame_cnt = seiSubseqInfo[currLayer].stored_frame_cnt % MAX_FN;
+   }
+ 
+   if ( currLayer == 0 )
+   {
+     if ( img->number == input->no_frames-1 )
+       seiSubseqInfo[currLayer].last_picture_flag = 1;
+     else
+       seiSubseqInfo[currLayer].last_picture_flag = 0;
+   }
+   if ( currLayer == 1 )
+   {
+     if ( ((IMG_NUMBER%(input->NumFramesInELSubSeq+1)==0) && (input->successive_Bframe != 0) && (IMG_NUMBER>0)) || // there are B frames
+       ((IMG_NUMBER%(input->NumFramesInELSubSeq+1)==input->NumFramesInELSubSeq) && (input->successive_Bframe==0))  // there are no B frames
+       )
+       seiSubseqInfo[currLayer].last_picture_flag = 1;
+     else
+       seiSubseqInfo[currLayer].last_picture_flag = 0;
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *      Finalize subseqence info
+  ************************************************************************
+  */
+ void FinalizeSubseqInfo(int currLayer)
+ {
+   SyntaxElement sym;
+   Bitstream *dest = seiSubseqInfo[currLayer].data;
+ 
+   sym.type = SE_HEADER;
+   sym.mapping = ue_linfo;
+ 
+   sym.value1 = seiSubseqInfo[currLayer].subseq_layer_num;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+   sym.value1 = seiSubseqInfo[currLayer].subseq_id;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+   sym.bitpattern = seiSubseqInfo[currLayer].last_picture_flag;
+   sym.len = 1;
+   writeSyntaxElement2Buf_Fixed(&sym, dest);
+   sym.value1 = seiSubseqInfo[currLayer].stored_frame_cnt;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+ 
+   // make sure the payload is byte aligned, stuff bits are 10..0
+   if ( dest->bits_to_go != 8 )
+   {
+     (dest->byte_buf) <<= 1;
+     dest->byte_buf |= 1;
+     dest->bits_to_go--;
+     if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+     dest->bits_to_go = 8;
+     dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+     dest->byte_buf = 0;
+   }
+   seiSubseqInfo[currLayer].payloadSize = dest->byte_pos;
+ 
+ //  printf("layer %d, last picture %d, stored_cnt %d\n", currLayer, seiSubseqInfo[currLayer].last_picture_flag, seiSubseqInfo[currLayer].stored_frame_cnt );
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *      Clear the payload buffer
+  ************************************************************************
+  */
+ void ClearSubseqInfoPayload(int currLayer)
+ {
+   seiSubseqInfo[currLayer].data->bits_to_go  = 8;
+   seiSubseqInfo[currLayer].data->byte_pos    = 0;
+   seiSubseqInfo[currLayer].data->byte_buf    = 0;
+   memset( seiSubseqInfo[currLayer].data->streamBuffer, 0, MAXRTPPAYLOADLEN );
+   seiSubseqInfo[currLayer].payloadSize = 0;
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *      Close the global variables for spare picture information
+  ************************************************************************
+  */
+ void CloseSubseqInfo(int currLayer)
+ {
+   seiSubseqInfo[currLayer].stored_frame_cnt = -1;
+   seiSubseqInfo[currLayer].payloadSize = 0;
+ 
+   free( seiSubseqInfo[currLayer].data->streamBuffer );
+   free( seiSubseqInfo[currLayer].data );
+ }
+ 
+ /*
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  *  \functions on subseq layer characteristic sei messages
+  *  \brief
+  *      JVT-D098
+  *  \author
+  *      Dong Tian                 <tian at cs.tut.fi>
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  */
+ 
+ Boolean seiHasSubseqLayerInfo = FALSE;
+ subseq_layer_information_struct seiSubseqLayerInfo;
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *      Init the global variables for spare picture information
+  ************************************************************************
+  */
+ void InitSubseqLayerInfo()
+ {
+   int i;
+   seiHasSubseqLayerInfo = TRUE;
+   seiSubseqLayerInfo.layer_number = 0;
+   for (i=0; i<MAX_LAYER_NUMBER; i++)
+   {
+     seiSubseqLayerInfo.bit_rate[i] = 0;
+     seiSubseqLayerInfo.frame_rate[i] = 0;
+     seiSubseqLayerInfo.layer_number++;
+   }
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *      
+  ************************************************************************
+  */
+ void CloseSubseqLayerInfo()
+ {
+ }
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *      Write the data to buffer, which is byte aligned
+  ************************************************************************
+  */
+ void FinalizeSubseqLayerInfo()
+ {
+   int i, pos;
+   pos = 0;
+   seiSubseqLayerInfo.payloadSize = 0;
+   for (i=0; i<seiSubseqLayerInfo.layer_number; i++)
+   {
+     *((unsigned short*)&(seiSubseqLayerInfo.data[pos])) = seiSubseqLayerInfo.bit_rate[i];
+     pos += 2;
+     *((unsigned short*)&(seiSubseqLayerInfo.data[pos])) = seiSubseqLayerInfo.frame_rate[i];
+     pos += 2;
+     seiSubseqLayerInfo.payloadSize += 4;
+   }
+ }
+ 
+ /*
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  *  \functions on subseq characteristic sei messages
+  *  \brief
+  *      JVT-D098
+  *  \author
+  *      Dong Tian                 <tian at cs.tut.fi>
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  */
+ 
+ Boolean seiHasSubseqChar = FALSE;
+ subseq_char_information_struct seiSubseqChar;
+ 
+ void InitSubseqChar()
+ {
+   seiSubseqChar.data = malloc( sizeof(Bitstream) );
+   if( seiSubseqChar.data == NULL ) no_mem_exit("InitSubseqChar: seiSubseqChar.data");
+   seiSubseqChar.data->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+   if( seiSubseqChar.data->streamBuffer == NULL ) no_mem_exit("InitSubseqChar: seiSubseqChar.data->streamBuffer");
+   ClearSubseqCharPayload();
+ 
+   seiSubseqChar.subseq_layer_num = img->layer;
+   seiSubseqChar.subseq_id = seiSubseqInfo[img->layer].subseq_id;
+   seiSubseqChar.duration_flag = 0;
+   seiSubseqChar.average_rate_flag = 0;
+   seiSubseqChar.num_referenced_subseqs = 0;
+ }
+ 
+ void ClearSubseqCharPayload()
+ {
+   memset( seiSubseqChar.data->streamBuffer, 0, MAXRTPPAYLOADLEN);
+   seiSubseqChar.data->bits_to_go  = 8;
+   seiSubseqChar.data->byte_pos    = 0;
+   seiSubseqChar.data->byte_buf    = 0;
+   seiSubseqChar.payloadSize       = 0;
+ 
+   seiHasSubseqChar = FALSE;
+ }
+ 
+ void UpdateSubseqChar()
+ {
+   seiSubseqChar.subseq_layer_num = img->layer;
+   seiSubseqChar.subseq_id = seiSubseqInfo[img->layer].subseq_id;
+   seiSubseqChar.duration_flag = 0;
+   seiSubseqChar.average_rate_flag = 0;
+   seiSubseqChar.average_bit_rate = 100;
+   seiSubseqChar.average_frame_rate = 30;
+   seiSubseqChar.num_referenced_subseqs = 0;
+   seiSubseqChar.ref_subseq_layer_num[0] = 1;
+   seiSubseqChar.ref_subseq_id[0] = 2;
+   seiSubseqChar.ref_subseq_layer_num[1] = 3;
+   seiSubseqChar.ref_subseq_id[1] = 4;
+ 
+   seiHasSubseqChar = TRUE;
+ }
+ 
+ void FinalizeSubseqChar()
+ {
+   int i;
+   SyntaxElement sym;
+   Bitstream *dest = seiSubseqChar.data;
+ 
+   sym.type = SE_HEADER;
+   sym.mapping = ue_linfo;
+ 
+   sym.value1 = seiSubseqChar.subseq_layer_num;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+   sym.value1 = seiSubseqChar.subseq_id;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+   sym.bitpattern = seiSubseqChar.duration_flag;
+   sym.len = 1;
+   writeSyntaxElement2Buf_Fixed(&sym, dest);
+   if ( seiSubseqChar.duration_flag )
+   {
+     sym.bitpattern = seiSubseqChar.subseq_duration;
+     sym.len = 32;
+     writeSyntaxElement2Buf_Fixed(&sym, dest);
+   }
+   sym.bitpattern = seiSubseqChar.average_rate_flag;
+   sym.len = 1;
+   writeSyntaxElement2Buf_Fixed(&sym, dest);
+   if ( seiSubseqChar.average_rate_flag )
+   {
+     sym.bitpattern = seiSubseqChar.average_bit_rate;
+     sym.len = 16;
+     writeSyntaxElement2Buf_Fixed(&sym, dest);
+     sym.bitpattern = seiSubseqChar.average_frame_rate;
+     sym.len = 16;
+     writeSyntaxElement2Buf_Fixed(&sym, dest);
+   }
+   sym.value1 = seiSubseqChar.num_referenced_subseqs;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+   for (i=0; i<seiSubseqChar.num_referenced_subseqs; i++)
+   {
+     sym.value1 = seiSubseqChar.ref_subseq_layer_num[i];
+     writeSyntaxElement2Buf_UVLC(&sym, dest);
+     sym.value1 = seiSubseqChar.ref_subseq_id[i];
+     writeSyntaxElement2Buf_UVLC(&sym, dest);
+   }
+ 
+   // make sure the payload is byte aligned, stuff bits are 10..0
+   if ( dest->bits_to_go != 8 )
+   {
+     (dest->byte_buf) <<= 1;
+     dest->byte_buf |= 1;
+     dest->bits_to_go--;
+     if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+     dest->bits_to_go = 8;
+     dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+     dest->byte_buf = 0;
+   }
+   seiSubseqChar.payloadSize = dest->byte_pos;
+ }
+ 
+ void CloseSubseqChar()
+ {
+   if (seiSubseqChar.data)
+   {
+     free(seiSubseqChar.data->streamBuffer);
+     free(seiSubseqChar.data);
+   }
+   seiSubseqChar.data = NULL;
+ }
+ 
+ 
+ // JVT-D099
+ /*
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  *  \functions on scene information SEI message
+  *  \brief
+  *      JVT-D099
+  *  \author
+  *      Ye-Kui Wang                 <wyk at ieee.org>
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  */
+ 
+ scene_information_struct seiSceneInformation;
+ 
+ void InitSceneInformation()
+ {
+   seiHasSceneInformation = TRUE;
+ 
+   seiSceneInformation.scene_id = 0;
+   seiSceneInformation.scene_transition_type = 0;
+   seiSceneInformation.second_scene_id = -1;
+ 
+   seiSceneInformation.data = malloc( sizeof(Bitstream) );
+   if( seiSceneInformation.data == NULL ) no_mem_exit("InitSceneInformation: seiSceneInformation.data");
+   seiSceneInformation.data->streamBuffer = malloc( MAXRTPPAYLOADLEN );
+   if( seiSceneInformation.data->streamBuffer == NULL ) no_mem_exit("InitSceneInformation: seiSceneInformation.data->streamBuffer");
+   seiSceneInformation.data->bits_to_go  = 8;
+   seiSceneInformation.data->byte_pos    = 0;
+   seiSceneInformation.data->byte_buf    = 0;
+   memset( seiSceneInformation.data->streamBuffer, 0, MAXRTPPAYLOADLEN );
+ }
+ 
+ void CloseSceneInformation()
+ {
+   if (seiSceneInformation.data)
+   {
+     free(seiSceneInformation.data->streamBuffer);
+     free(seiSceneInformation.data);
+   }
+   seiSceneInformation.data = NULL;
+ }
+ 
+ void FinalizeSceneInformation()
+ {
+   SyntaxElement sym;
+   Bitstream *dest = seiSceneInformation.data;
+ 
+   sym.type = SE_HEADER;
+   sym.mapping = ue_linfo;
+ 
+   sym.bitpattern = seiSceneInformation.scene_id;
+   sym.len = 8;
+   writeSyntaxElement2Buf_Fixed(&sym, dest);
+ 
+   sym.value1 = seiSceneInformation.scene_transition_type;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+ 
+   if(seiSceneInformation.scene_transition_type > 3)
+   {
+     sym.bitpattern = seiSceneInformation.second_scene_id;
+     sym.len = 8;
+     writeSyntaxElement2Buf_Fixed(&sym, dest);
+   }
+ 
+   // make sure the payload is byte aligned, stuff bits are 10..0
+   if ( dest->bits_to_go != 8 )
+   {
+     (dest->byte_buf) <<= 1;
+     dest->byte_buf |= 1;
+     dest->bits_to_go--;
+     if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+     dest->bits_to_go = 8;
+     dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+     dest->byte_buf = 0;
+   }
+   seiSceneInformation.payloadSize = dest->byte_pos;
+ }
+ 
+ // HasSceneInformation: To include a scene information SEI into the next slice/DP, 
+ //      set HasSceneInformation to be TRUE when calling this function. Otherwise, 
+ //      set HasSceneInformation to be FALSE.
+ void UpdateSceneInformation(Boolean HasSceneInformation, int sceneID, int sceneTransType, int secondSceneID)
+ {
+   seiHasSceneInformation = HasSceneInformation;
+ 
+   assert (sceneID < 256);
+   seiSceneInformation.scene_id = sceneID;
+ 
+   assert (sceneTransType <= 6 );
+   seiSceneInformation.scene_transition_type = sceneTransType;
+ 
+   if(sceneTransType > 3)
+   {
+     assert (secondSceneID < 256);
+     seiSceneInformation.second_scene_id = secondSceneID;
+   }
+ }
+ // End JVT-D099
+ 
+ 
+ /*
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  *  \functions on Pan Scan messages
+  *  \brief
+  *      Based on FCD
+  *  \author
+  *      Shankar Regunathan                 <tian at cs.tut.fi>
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  */
+ 
+ Boolean seiHasPanScanRectInfo = FALSE;
+ panscanrect_information_struct seiPanScanRectInfo;
+ 
+ void InitPanScanRectInfo()
+ {
+ 
+   seiPanScanRectInfo.data = malloc( sizeof(Bitstream) );
+   if( seiPanScanRectInfo.data == NULL ) no_mem_exit("InitPanScanRectInfo: seiPanScanRectInfo.data");
+   seiPanScanRectInfo.data->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+   if( seiPanScanRectInfo.data->streamBuffer == NULL ) no_mem_exit("InitPanScanRectInfo: seiPanScanRectInfo.data->streamBuffer");
+   ClearPanScanRectInfoPayload();
+ 
+   seiPanScanRectInfo.pan_scan_rect_left_offset = 0;
+   seiPanScanRectInfo.pan_scan_rect_right_offset = 0;
+   seiPanScanRectInfo.pan_scan_rect_top_offset = 0;
+   seiPanScanRectInfo.pan_scan_rect_bottom_offset = 0;
+ 
+ }
+ 
+ 
+ void ClearPanScanRectInfoPayload()
+ {
+   memset( seiPanScanRectInfo.data->streamBuffer, 0, MAXRTPPAYLOADLEN);
+   seiPanScanRectInfo.data->bits_to_go  = 8;
+   seiPanScanRectInfo.data->byte_pos    = 0;
+   seiPanScanRectInfo.data->byte_buf    = 0;
+   seiPanScanRectInfo.payloadSize       = 0;
+ 
+   seiHasPanScanRectInfo = TRUE;
+ }
+ 
+ void UpdatePanScanRectInfo()
+ {
+   seiPanScanRectInfo.pan_scan_rect_id = 3;
+   seiPanScanRectInfo.pan_scan_rect_left_offset = 10;
+   seiPanScanRectInfo.pan_scan_rect_right_offset = 40;
+   seiPanScanRectInfo.pan_scan_rect_top_offset = 20;
+   seiPanScanRectInfo.pan_scan_rect_bottom_offset =32;
+   seiHasPanScanRectInfo = TRUE;
+ }
+ 
+ void FinalizePanScanRectInfo()
+ {
+   SyntaxElement sym;
+   Bitstream *dest = seiPanScanRectInfo.data;
+ 
+ 
+   sym.type = SE_HEADER;
+   sym.mapping = ue_linfo;
+ 
+   sym.value1 = seiPanScanRectInfo.pan_scan_rect_id;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+   sym.value1 = seiPanScanRectInfo.pan_scan_rect_left_offset;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+   sym.value1 = seiPanScanRectInfo.pan_scan_rect_right_offset;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+   sym.value1 = seiPanScanRectInfo.pan_scan_rect_top_offset;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+   sym.value1 = seiPanScanRectInfo.pan_scan_rect_bottom_offset;
+   writeSyntaxElement2Buf_UVLC(&sym, dest);
+ 
+ // #define PRINT_PAN_SCAN_RECT
+ #ifdef PRINT_PAN_SCAN_RECT
+   printf("Pan Scan Id %d Left %d Right %d Top %d Bottom %d \n", seiPanScanRectInfo.pan_scan_rect_id, seiPanScanRectInfo.pan_scan_rect_left_offset, seiPanScanRectInfo.pan_scan_rect_right_offset, seiPanScanRectInfo.pan_scan_rect_top_offset, seiPanScanRectInfo.pan_scan_rect_bottom_offset);
+ #endif
+ #ifdef PRINT_PAN_SCAN_RECT
+ #undef PRINT_PAN_SCAN_RECT
+ #endif
+   // make sure the payload is byte aligned, stuff bits are 10..0
+   if ( dest->bits_to_go != 8 )
+   {
+     (dest->byte_buf) <<= 1;
+     dest->byte_buf |= 1;
+     dest->bits_to_go--;
+     if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+     dest->bits_to_go = 8;
+     dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+     dest->byte_buf = 0;
+   }
+   seiPanScanRectInfo.payloadSize = dest->byte_pos;
+ }
+ 
+ 
+ 
+ void ClosePanScanRectInfo()
+ {
+   if (seiPanScanRectInfo.data)
+   {
+     free(seiPanScanRectInfo.data->streamBuffer);
+     free(seiPanScanRectInfo.data);
+   }
+   seiPanScanRectInfo.data = NULL;
+ }
+ 
+ /*
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  *  \functions on arbitrary (unregistered) data
+  *  \brief
+  *      Based on FCD
+  *  \author
+  *      Shankar Regunathan                 <tian at cs.tut.fi>
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  */
+ Boolean seiHasUser_data_unregistered_info;
+ user_data_unregistered_information_struct seiUser_data_unregistered;
+ void InitUser_data_unregistered()
+ {
+ 
+   seiUser_data_unregistered.data = malloc( sizeof(Bitstream) );
+   if( seiUser_data_unregistered.data == NULL ) no_mem_exit("InitUser_data_unregistered: seiUser_data_unregistered.data");
+   seiUser_data_unregistered.data->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+   if( seiUser_data_unregistered.data->streamBuffer == NULL ) no_mem_exit("InitUser_data_unregistered: seiUser_data_unregistered.data->streamBuffer");
+   seiUser_data_unregistered.byte = malloc(MAXRTPPAYLOADLEN);
+   if( seiUser_data_unregistered.byte == NULL ) no_mem_exit("InitUser_data_unregistered: seiUser_data_unregistered.byte");
+   ClearUser_data_unregistered();
+ 
+ }
+ 
+ 
+ void ClearUser_data_unregistered()
+ {
+   memset( seiUser_data_unregistered.data->streamBuffer, 0, MAXRTPPAYLOADLEN);
+   seiUser_data_unregistered.data->bits_to_go  = 8;
+   seiUser_data_unregistered.data->byte_pos    = 0;
+   seiUser_data_unregistered.data->byte_buf    = 0;
+   seiUser_data_unregistered.payloadSize       = 0;
+ 
+   memset( seiUser_data_unregistered.byte, 0, MAXRTPPAYLOADLEN);
+   seiUser_data_unregistered.total_byte = 0;
+ 
+   seiHasUser_data_unregistered_info = TRUE;
+ }
+ 
+ void UpdateUser_data_unregistered()
+ {
+   int i, temp_data;
+   int total_byte;
+ 
+ 
+   total_byte = 7;
+   for(i = 0; i < total_byte; i++)
+   {
+     temp_data = i * 4;
+     seiUser_data_unregistered.byte[i] = max(0, min(temp_data, 255));
+   }
+   seiUser_data_unregistered.total_byte = total_byte;
+ }
+ 
+ void FinalizeUser_data_unregistered()
+ {
+   int i;
+   SyntaxElement sym;
+   Bitstream *dest = seiUser_data_unregistered.data;
+ 
+   sym.type = SE_HEADER;
+   sym.mapping = ue_linfo;
+ 
+ // #define PRINT_USER_DATA_UNREGISTERED_INFO
+   for( i = 0; i < seiUser_data_unregistered.total_byte; i++)
+   {
+     sym.bitpattern = seiUser_data_unregistered.byte[i];
+     sym.len = 8; // b (8)
+     writeSyntaxElement2Buf_Fixed(&sym, dest);
+ #ifdef PRINT_USER_DATA_UNREGISTERED_INFO
+     printf("Unreg data payload_byte = %d\n", seiUser_data_unregistered.byte[i]);
+ #endif
+   }
+ #ifdef PRINT_USER_DATA_UNREGISTERED_INFO
+ #undef PRINT_USER_DATA_UNREGISTERED_INFO
+ #endif
+   // make sure the payload is byte aligned, stuff bits are 10..0
+   if ( dest->bits_to_go != 8 )
+   {
+     (dest->byte_buf) <<= 1;
+     dest->byte_buf |= 1;
+     dest->bits_to_go--;
+     if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+     dest->bits_to_go = 8;
+     dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+     dest->byte_buf = 0;
+   }
+   seiUser_data_unregistered.payloadSize = dest->byte_pos;
+ }
+ 
+ void CloseUser_data_unregistered()
+ {
+   if (seiUser_data_unregistered.data)
+   {
+     free(seiUser_data_unregistered.data->streamBuffer);
+     free(seiUser_data_unregistered.data);
+   }
+   seiUser_data_unregistered.data = NULL;
+   if(seiUser_data_unregistered.byte)
+   {
+     free(seiUser_data_unregistered.byte);
+   }
+ }
+ 
+ 
+ /*
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  *  \functions on registered ITU_T_T35 user data
+  *  \brief
+  *      Based on FCD
+  *  \author
+  *      Shankar Regunathan                 <tian at cs.tut.fi>
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  */
+ Boolean seiHasUser_data_registered_itu_t_t35_info;
+ user_data_registered_itu_t_t35_information_struct seiUser_data_registered_itu_t_t35;
+ void InitUser_data_registered_itu_t_t35()
+ {
+ 
+   seiUser_data_registered_itu_t_t35.data = malloc( sizeof(Bitstream) );
+   if( seiUser_data_registered_itu_t_t35.data == NULL ) no_mem_exit("InitUser_data_unregistered: seiUser_data_registered_itu_t_t35.data");
+   seiUser_data_registered_itu_t_t35.data->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+   if( seiUser_data_registered_itu_t_t35.data->streamBuffer == NULL ) no_mem_exit("InitUser_data_unregistered: seiUser_data_registered_itu_t_t35.data->streamBuffer");
+   seiUser_data_registered_itu_t_t35.byte = malloc(MAXRTPPAYLOADLEN);
+   if( seiUser_data_registered_itu_t_t35.data == NULL ) no_mem_exit("InitUser_data_unregistered: seiUser_data_registered_itu_t_t35.byte");
+   ClearUser_data_registered_itu_t_t35();
+ 
+ }
+ 
+ 
+ void ClearUser_data_registered_itu_t_t35()
+ {
+   memset( seiUser_data_registered_itu_t_t35.data->streamBuffer, 0, MAXRTPPAYLOADLEN);
+   seiUser_data_registered_itu_t_t35.data->bits_to_go  = 8;
+   seiUser_data_registered_itu_t_t35.data->byte_pos    = 0;
+   seiUser_data_registered_itu_t_t35.data->byte_buf    = 0;
+   seiUser_data_registered_itu_t_t35.payloadSize       = 0;
+ 
+   memset( seiUser_data_registered_itu_t_t35.byte, 0, MAXRTPPAYLOADLEN);
+   seiUser_data_registered_itu_t_t35.total_byte = 0;
+   seiUser_data_registered_itu_t_t35.itu_t_t35_country_code = 0;
+   seiUser_data_registered_itu_t_t35.itu_t_t35_country_code_extension_byte = 0;
+ 
+   seiHasUser_data_registered_itu_t_t35_info = TRUE;
+ }
+ 
+ void UpdateUser_data_registered_itu_t_t35()
+ {
+   int i, temp_data;
+   int total_byte;
+   int country_code;
+ 
+   country_code = 82; // Country_code for India
+ 
+   if(country_code < 0xFF) 
+   {
+     seiUser_data_registered_itu_t_t35.itu_t_t35_country_code = country_code;
+   }
+   else 
+   {
+     seiUser_data_registered_itu_t_t35.itu_t_t35_country_code = 0xFF;
+     seiUser_data_registered_itu_t_t35.itu_t_t35_country_code_extension_byte = country_code - 0xFF;
+   }
+ 
+   total_byte = 7;
+   for(i = 0; i < total_byte; i++)
+   {
+     temp_data = i * 3;
+     seiUser_data_registered_itu_t_t35.byte[i] = max(0, min(temp_data, 255));
+   }
+   seiUser_data_registered_itu_t_t35.total_byte = total_byte;
+ }
+ 
+ void FinalizeUser_data_registered_itu_t_t35()
+ {
+   int i;
+   SyntaxElement sym;
+   Bitstream *dest = seiUser_data_registered_itu_t_t35.data;
+ 
+   sym.type = SE_HEADER;
+   sym.mapping = ue_linfo;
+ 
+   sym.bitpattern = seiUser_data_registered_itu_t_t35.itu_t_t35_country_code;
+   sym.len = 8;
+   writeSyntaxElement2Buf_Fixed(&sym, dest);
+ 
+ // #define PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+ #ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+   printf(" ITU_T_T35_COUNTRTY_CODE %d \n", seiUser_data_registered_itu_t_t35.itu_t_t35_country_code);
+ #endif
+ 
+   if(seiUser_data_registered_itu_t_t35.itu_t_t35_country_code == 0xFF)
+   {
+     sym.bitpattern = seiUser_data_registered_itu_t_t35.itu_t_t35_country_code_extension_byte;
+     sym.len = 8;
+     writeSyntaxElement2Buf_Fixed(&sym, dest);
+ #ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+     printf(" ITU_T_T35_COUNTRTY_CODE_EXTENSION_BYTE %d \n", seiUser_data_registered_itu_t_t35.itu_t_t35_country_code_extension_byte);
+ #endif
+   }
+ 
+   for( i = 0; i < seiUser_data_registered_itu_t_t35.total_byte; i++)
+   {
+     sym.bitpattern = seiUser_data_registered_itu_t_t35.byte[i];
+     sym.len = 8; // b (8)
+     writeSyntaxElement2Buf_Fixed(&sym, dest);
+ #ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+     printf("itu_t_t35 payload_byte = %d\n", seiUser_data_registered_itu_t_t35.byte[i]);
+ #endif
+   }
+ #ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+ #undef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO
+ #endif
+   // make sure the payload is byte aligned, stuff bits are 10..0
+   if ( dest->bits_to_go != 8 )
+   {
+     (dest->byte_buf) <<= 1;
+     dest->byte_buf |= 1;
+     dest->bits_to_go--;
+     if ( dest->bits_to_go != 0 ) (dest->byte_buf) <<= (dest->bits_to_go);
+     dest->bits_to_go = 8;
+     dest->streamBuffer[dest->byte_pos++]=dest->byte_buf;
+     dest->byte_buf = 0;
+   }
+   seiUser_data_registered_itu_t_t35.payloadSize = dest->byte_pos;
+ }
+ 
+ void CloseUser_data_registered_itu_t_t35()
+ {
+   if (seiUser_data_registered_itu_t_t35.data)
+   {
+     free(seiUser_data_registered_itu_t_t35.data->streamBuffer);
+     free(seiUser_data_registered_itu_t_t35.data);
+   }
+   seiUser_data_registered_itu_t_t35.data = NULL;
+   if(seiUser_data_registered_itu_t_t35.byte)
+   {
+     free(seiUser_data_registered_itu_t_t35.byte);
+   }
+ }
+ 
+ /*
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  *  \functions on random access message
+  *  \brief
+  *      Based on FCD
+  *  \author
+  *      Shankar Regunathan                 <tian at cs.tut.fi>
+  **++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  */
+ Boolean seiHasRecoveryPoint_info;
+ recovery_point_information_struct seiRecoveryPoint;
+ void InitRandomAccess()
+ {
+ 
+   seiRecoveryPoint.data = malloc( sizeof(Bitstream) );
+   if( seiRecoveryPoint.data == NULL ) no_mem_exit("InitRandomAccess: seiRandomAccess.data");
+   seiRecoveryPoint.data->streamBuffer = malloc(MAXRTPPAYLOADLEN);
+   if( seiRecoveryPoint.data->streamBuffer == NULL ) no_mem_exit("InitRandomAccess: seiRandomAccess.data->streamBuffer");
+   ClearRandomAccess();
+ 
+ }
+ 
+ 
+ void ClearRandomAccess()
+ {
+   memset( seiRecoveryPoint.data->streamBuffer, 0, MAXRTPPAYLOADLEN);
+   seiRecoveryPoint.data->bits_to_go  = 8;
+   seiRecoveryPoint.data->byte_pos    = 0;
+   seiRecoveryPoint.data->byte_buf    = 0;
+   seiRecoveryPoint.payloadSize       = 0;
+ 
+   seiRecoveryPoint.recovery_frame_cnt = 0;
+   seiRecoveryPoint.broken_link_flag = 0;
+   seiRecoveryPoint.exact_match_flag = 0;
+ 
+   seiHasRecoveryPoint_info = FALSE;
+ }
+ 
+ void UpdateRandomAccess()
+ {
+ 
+   if(img->type == I_SLICE)
+   {
+     seiRecoveryPoint.recovery_frame_cnt = 0;
+     seiRecoveryPoint.exact_match_flag = 1;
+     seiRecoveryPoint.broken_link_flag = 0;
+     seiHasRecoveryPoint_info = TRUE;
+   }
+   else
+   {
+     seiHasRecoveryPoint_info = FALSE;
+   }
+ }
+ 
+ void FinalizeRandomAccess()
+ {
+   Bitstream *bitstream = seiRecoveryPoint.data;
+ 
+   ue_v(   "SEI: recovery_frame_cnt",       seiRecoveryPoint.recovery_frame_cnt,       bitstream);
+   u_1 (   "SEI: exact_match_flag",         seiRecoveryPoint.exact_match_flag,         bitstream);
+   u_1 (   "SEI: broken_link_flag",         seiRecoveryPoint.broken_link_flag,         bitstream);
+   u_v (2, "SEI: changing_slice_group_idc", seiRecoveryPoint.changing_slice_group_idc, bitstream);
+ 
+ 
+ // #define PRINT_RECOVERY_POINT
+ #ifdef PRINT_RECOVERY_POINT
+   printf(" recovery_frame_cnt %d \n",       seiRecoveryPoint.recovery_frame_cnt);
+   printf(" exact_match_flag %d \n",         seiRecoveryPoint.exact_match_flag);
+   printf(" broken_link_flag %d \n",         seiRecoveryPoint.broken_link_flag);
+   printf(" changing_slice_group_idc %d \n", seiRecoveryPoint.changing_slice_group_idc);
+   printf(" %d %d \n", bitstream->byte_pos, bitstream->bits_to_go);
+ 
+ #undef PRINT_RECOVERY_POINT
+ #endif
+   // make sure the payload is byte aligned, stuff bits are 10..0
+   if ( bitstream->bits_to_go != 8 )
+   {
+     (bitstream->byte_buf) <<= 1;
+     bitstream->byte_buf |= 1;
+     bitstream->bits_to_go--;
+     if ( bitstream->bits_to_go != 0 ) 
+       (bitstream->byte_buf) <<= (bitstream->bits_to_go);
+     bitstream->bits_to_go = 8;
+     bitstream->streamBuffer[bitstream->byte_pos++]=bitstream->byte_buf;
+     bitstream->byte_buf = 0;
+   }
+   seiRecoveryPoint.payloadSize = bitstream->byte_pos;
+ }
+ 
+ void CloseRandomAccess()
+ {
+   if (seiRecoveryPoint.data)
+   {
+     free(seiRecoveryPoint.data->streamBuffer);
+     free(seiRecoveryPoint.data);
+   }
+   seiRecoveryPoint.data = NULL;
+ }


Index: llvm-test/MultiSource/Applications/JM/lencod/sei.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/sei.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/sei.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,267 ----
+ 
+ /*!
+  ************************************************************************
+  *  \file
+  *     sei.h
+  *  \brief
+  *     definitions for Supplemental Enhanced Information
+  *  \author(s)
+  *      - Dong Tian                             <tian at cs.tut.fi>
+  *      - TBD
+  *
+  * ************************************************************************
+  */
+ 
+ #ifndef SEI_H
+ #define SEI_H
+ 
+ #define MAX_LAYER_NUMBER 2
+ #define MAX_DEPENDENT_SUBSEQ 5
+ 
+ 
+ //! definition of SEI payload type
+ typedef enum {
+   SEI_BUFFERING_PERIOD = 0,
+   SEI_PIC_TIMING,
+   SEI_PAN_SCAN_RECT,
+   SEI_FILLER_PAYLOAD,
+   SEI_USER_DATA_REGISTERED_ITU_T_T35,
+   SEI_USER_DATA_UNREGISTERED,
+   SEI_RECOVERY_POINT,
+   SEI_DEC_REF_PIC_MARKING_REPETITION,
+   SEI_SPARE_PIC,
+   SEI_SCENE_INFO,
+   SEI_SUB_SEQ_INFO,
+   SEI_SUB_SEQ_LAYER_CHARACTERISTICS,
+   SEI_SUB_SEQ_CHARACTERISTICS,
+   SEI_FULL_FRAME_FREEZE,
+   SEI_FULL_FRAME_FREEZE_RELEASE,
+   SEI_FULL_FRAME_SNAPSHOT,
+   SEI_PROGRESSIVE_REFINEMENT_SEGMENT_START,
+   SEI_PROGRESSIVE_REFINEMENT_SEGMENT_END,
+   SEI_MOTION_CONSTRAINED_SLICE_GROUP_SET,
+   SEI_FILM_GRAIN_CHARACTERISTICS,
+   SEI_DEBLOCKING_FILTER_DISPLAY_PREFERENCE,
+   SEI_STEREO_VIDEO_INFO,
+ 
+   SEI_MAX_ELEMENTS  //!< number of maximum syntax elements
+ } SEI_type;
+ 
+ #define MAX_FN 256
+ 
+ #define AGGREGATION_PACKET_TYPE 4
+ #define SEI_PACKET_TYPE 5  // Tian Dong: See VCEG-N72, it need updates
+ 
+ #define NORMAL_SEI 0
+ #define AGGREGATION_SEI 1
+ 
+ //! SEI structure
+ typedef struct
+ {
+   Boolean available;
+   int payloadSize;
+   unsigned char subPacketType;
+   byte* data;
+ } sei_struct;
+ 
+ //!< sei_message[0]: this struct is to store the sei message packtized independently 
+ //!< sei_message[1]: this struct is to store the sei message packtized together with slice data
+ extern sei_struct sei_message[2];
+ 
+ void InitSEIMessages();
+ void CloseSEIMessages();
+ Boolean HaveAggregationSEI();
+ void write_sei_message(int id, byte* payload, int payload_size, int payload_type);
+ void finalize_sei_message(int id);
+ void clear_sei_message(int id);
+ void AppendTmpbits2Buf( Bitstream* dest, Bitstream* source );
+ 
+ void PrepareAggregationSEIMessage();
+ 
+ 
+ //! Spare Picture
+ typedef struct
+ {
+   int target_frame_num;
+   int num_spare_pics;
+   int payloadSize;
+   Bitstream* data;
+ } spare_picture_struct;
+ 
+ extern Boolean seiHasSparePicture;
+ //extern Boolean sei_has_sp;
+ extern spare_picture_struct seiSparePicturePayload;
+ 
+ void InitSparePicture();
+ void CloseSparePicture();
+ void CalculateSparePicture();
+ void ComposeSparePictureMessage(int delta_spare_frame_num, int ref_area_indicator, Bitstream *tmpBitstream);
+ Boolean CompressSpareMBMap(unsigned char **map_sp, Bitstream *bitstream);
+ void FinalizeSpareMBMap();
+ 
+ //! Subseq Information
+ typedef struct
+ {
+   int subseq_layer_num;
+   int subseq_id;
+   unsigned int last_picture_flag;
+   unsigned int stored_frame_cnt;
+ 
+   int payloadSize;
+   Bitstream* data;
+ } subseq_information_struct;
+ 
+ extern Boolean seiHasSubseqInfo;
+ extern subseq_information_struct seiSubseqInfo[MAX_LAYER_NUMBER];
+ 
+ void InitSubseqInfo(int currLayer);
+ void UpdateSubseqInfo(int currLayer);
+ void FinalizeSubseqInfo(int currLayer);
+ void ClearSubseqInfoPayload(int currLayer);
+ void CloseSubseqInfo(int currLayer);
+ 
+ //! Subseq Layer Information
+ typedef struct
+ {
+   unsigned short bit_rate[MAX_LAYER_NUMBER];
+   unsigned short frame_rate[MAX_LAYER_NUMBER];
+   byte data[4*MAX_LAYER_NUMBER];
+   int layer_number;
+   int payloadSize;
+ } subseq_layer_information_struct;
+ 
+ extern Boolean seiHasSubseqLayerInfo;
+ extern subseq_layer_information_struct seiSubseqLayerInfo;
+ 
+ void InitSubseqLayerInfo();
+ void CloseSubseqLayerInfo();
+ void FinalizeSubseqLayerInfo();
+ 
+ //! Subseq Characteristics
+ typedef struct
+ {
+   int subseq_layer_num;
+   int subseq_id;
+   int duration_flag;
+   unsigned int subseq_duration;
+   unsigned int average_rate_flag;
+   unsigned int average_bit_rate;
+   unsigned int average_frame_rate;
+   int num_referenced_subseqs;
+   int ref_subseq_layer_num[MAX_DEPENDENT_SUBSEQ];
+   int ref_subseq_id[MAX_DEPENDENT_SUBSEQ];
+ 
+   Bitstream* data;
+   int payloadSize;
+ } subseq_char_information_struct;
+ 
+ extern Boolean seiHasSubseqChar;
+ extern subseq_char_information_struct seiSubseqChar;
+ 
+ void InitSubseqChar();
+ void ClearSubseqCharPayload();
+ void UpdateSubseqChar();
+ void FinalizeSubseqChar();
+ void CloseSubseqChar();
+ 
+ 
+ typedef struct
+ {
+   int scene_id;
+   int scene_transition_type;
+   int second_scene_id;
+ 
+   Bitstream* data;
+   int payloadSize;
+ } scene_information_struct;
+ 
+ extern Boolean seiHasSceneInformation;
+ extern scene_information_struct seiSceneInformation;
+ 
+ void InitSceneInformation();
+ void CloseSceneInformation();
+ void UpdateSceneInformation(Boolean HasSceneInformation, int sceneID, int sceneTransType, int secondSceneID);
+ void FinalizeSceneInformation();
+ 
+ //! PanScanRect Information
+ typedef struct
+ {
+   int pan_scan_rect_id; 
+   int pan_scan_rect_left_offset;
+   int pan_scan_rect_right_offset;
+   int pan_scan_rect_top_offset;
+   int pan_scan_rect_bottom_offset;
+ 
+   Bitstream *data;
+   int payloadSize;
+ } panscanrect_information_struct;
+ 
+ extern Boolean seiHasPanScanRectInfo;
+ extern panscanrect_information_struct seiPanScanRectInfo;
+ 
+ void InitPanScanRectInfo();
+ void ClearPanScanRectInfoPayload();
+ void UpdatePanScanRectInfo();
+ void FinalizePanScanRectInfo();
+ void ClosePanScanRectInfo();
+ 
+ //! User_data_unregistered Information
+ typedef struct
+ {
+   char *byte;
+   int total_byte;
+   Bitstream *data;
+   int payloadSize;
+ } user_data_unregistered_information_struct;
+ 
+ extern Boolean seiHasUser_data_unregistered_info;
+ extern user_data_unregistered_information_struct seiUser_data_unregistered;
+ 
+ void InitUser_data_unregistered();
+ void ClearUser_data_unregistered();
+ void UpdateUser_data_unregistered();
+ void FinalizeUser_data_unregistered();
+ void CloseUser_data_unregistered();
+ 
+ //! User_data_registered_itu_t_t35 Information
+ typedef struct
+ {
+   char *byte;
+   int total_byte;
+   int itu_t_t35_country_code;
+   int itu_t_t35_country_code_extension_byte;
+   Bitstream *data;
+   int payloadSize;
+ } user_data_registered_itu_t_t35_information_struct;
+ 
+ extern Boolean seiHasUser_data_registered_itu_t_t35_info;
+ extern user_data_registered_itu_t_t35_information_struct seiUser_data_registered_itu_t_t35;
+ 
+ void InitUser_data_registered_itu_t_t35();
+ void ClearUser_data_registered_itu_t_t35();
+ void UpdateUser_data_registered_itu_t_t35();
+ void FinalizeUser_data_registered_itu_t_t35();
+ void CloseUser_data_registered_itu_t_t35();
+ 
+ //! Recovery Point Information
+ typedef struct
+ {
+   unsigned int  recovery_frame_cnt;
+   unsigned char exact_match_flag;
+   unsigned char broken_link_flag;
+   unsigned char changing_slice_group_idc;
+ 
+   Bitstream *data;
+   int payloadSize;
+ } recovery_point_information_struct;
+ 
+ extern Boolean seiHasRecoveryPoint_info;
+ extern recovery_point_information_struct seiRecoveryPoint;
+ 
+ void InitRandomAccess();
+ void ClearRandomAccess();
+ void UpdateRandomAccess();
+ void FinalizeRandomAccess();
+ void CloseRandomAccess();
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/simplified_fast_me.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/simplified_fast_me.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/simplified_fast_me.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,825 ----
+ 
+ /*!
+  *************************************************************************************
+  *
+  * \file simplified_fast_me.c
+  *
+  * \brief
+  *   Fast integer pixel and sub pixel motion estimation
+  *   Improved and simplified from the original UMHexagonS algorithms
+  *   See JVT-P021 for details
+  *
+  * \author 
+  *    Main contributors: (see contributors.h for copyright, address and affiliation details)
+  *    - Zhibo Chen                      <chenzhibo at tsinghua.org.cn>
+  *    - JianFeng Xu                     <fenax at video.mdc.tsinghua.edu.cn>  
+  *    - Wenfang Fu                      <fwf at video.mdc.tsinghua.edu.cn>
+  *
+  *    - Xiaoquan Yi                     <xyi at engr.scu.edu>
+  *    - Jun Zhang                       <jzhang2 at engr.scu.edu>
+  *
+  * \date
+  *    16. June 2005
+  *************************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <string.h>
+ 
+ #include "global.h"
+ #include "memalloc.h"
+ #include "simplified_fast_me.h"
+ #include "refbuf.h"
+ 
+ extern  unsigned int *byte_abs;
+ extern           int *mvbits;
+ 
+ static const short Diamond_X[4]      = {-1, 1, 0, 0};
+ static const short Diamond_Y[4]      = { 0, 0,-1, 1};
+ static const short Hexagon_X[6]      = {-2, 2,-1, 1,-1, 1};
+ static const short Hexagon_Y[6]      = { 0, 0,-2, 2, 2,-2};
+ static const short Big_Hexagon_X[16] = {-4, 4, 0, 0,-4, 4,-4, 4,-4, 4,-4, 4,-2, 2,-2, 2};
+ static const short Big_Hexagon_Y[16] = { 0, 0,-4, 4,-1, 1, 1,-1,-2, 2, 2,-2,-3, 3, 3,-3};
+ 
+ static pel_t *(*get_line) (pel_t**, int, int, int, int);
+ 
+ // Macro for motion estimation cost computation per match
+ #define SEARCH_ONE_PIXEL_HELPER                                                         \
+ if(absm(cand_x - center_x) <= search_range && absm(cand_y - center_y) <= search_range)  \
+ {                                                                                       \
+   mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);             \
+   mcost = simplified_partial_SAD_calculate(ref_pic, orig_pic, get_ref_line, height,     \
+              blocksize_y, blocksize_x, blocksize_x4, mcost, min_mcost, cand_x, cand_y); \
+   if (mcost < min_mcost)                                                                \
+   {                                                                                     \
+     best_x    = cand_x;                                                                 \
+     best_y    = cand_y;                                                                 \
+     min_mcost = mcost;                                                                  \
+   }                                                                                     \
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set thresholds for fast motion estimation
+  *    Those thresholds may be adjusted to trade off rate-distortion
+  *    performance and FME speed
+  ************************************************************************
+  */
+ void simplified_init_FME()
+ {
+   SymmetricalCrossSearchThreshold1 =  800;
+   SymmetricalCrossSearchThreshold2 = 7000;
+   ConvergeThreshold                = 1000;
+   SubPelThreshold1                 = 1000;
+   SubPelThreshold3                 =  400;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocation of space for fast motion estimation
+  ************************************************************************
+  */
+ int simplified_get_mem_FME()
+ {
+   int memory_size = 0;
+   if (NULL==(simplified_flag_intra = calloc((img->width>>4)+1, sizeof(byte))))
+     no_mem_exit("simplified_get_mem_FME: simplified_flag_intra");
+ 
+   memory_size += get_mem3Dint(&simplified_fastme_l0_cost, 9, img->height/4, img->width/4);
+   memory_size += get_mem3Dint(&simplified_fastme_l1_cost, 9, img->height/4, img->width/4);
+   memory_size += get_mem2D(&simplified_SearchState, 7, 7);
+   
+   return memory_size;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Free space for fast motion estimation
+  ************************************************************************
+  */
+ void simplified_free_mem_FME()
+ {
+   free_mem3Dint(simplified_fastme_l0_cost, 9);
+   free_mem3Dint(simplified_fastme_l1_cost, 9);
+   free_mem2D(simplified_SearchState);
+ 
+   free (simplified_flag_intra);
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Partial SAD calculation for fast motion estimation
+  ************************************************************************
+  */
+ int simplified_partial_SAD_calculate(pel_t*   ref_pic,
+                pel_t**  orig_pic,
+                pel_t* (*get_ref_line)(int, pel_t*, int, int, int, int),
+                int      height,
+                int      blocksize_y,
+                int      blocksize_x,
+                int      blocksize_x4,
+                int      mcost,
+                int      min_mcost,
+                int      cand_x,
+                int      cand_y)
+ {
+   unsigned short    y, x4;
+   pel_t *orig_line, *ref_line;
+ 
+   for (y = 0; y < blocksize_y; y++)
+   {
+     ref_line  = get_ref_line (blocksize_x, ref_pic, cand_y+y, cand_x, height, img->width);
+     orig_line = orig_pic[y];
+     
+     for (x4 = 0; x4 < blocksize_x4; x4++)
+     {
+       mcost += byte_abs[ *orig_line++ - *ref_line++ ];
+       mcost += byte_abs[ *orig_line++ - *ref_line++ ];
+       mcost += byte_abs[ *orig_line++ - *ref_line++ ];
+       mcost += byte_abs[ *orig_line++ - *ref_line++ ];
+     }
+     if (mcost >= min_mcost)
+       break;
+   }
+   return mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Add up SAD for sub pixel for fast motion estimation
+  ************************************************************************
+  */
+ int simplified_add_up_SAD_quarter_pel(int   pic_pix_x,
+                     int                     pic_pix_y,
+                     int                     blocksize_x,
+                     int                     blocksize_y,
+                     int                     cand_mv_x,
+                     int                     cand_mv_y,
+                     StorablePicture        *ref_picture,
+                     pel_t**                 orig_pic,
+                     int                     Mvmcost,
+                     int                     min_mcost,
+                     int                     useABT,
+                     int                     blocktype)
+ {
+   int j, i, k;  
+   int diff[16], *d; 
+   int mcost = Mvmcost;
+   int c_diff[MB_PIXELS];
+   int y_offset, ypels =(128 - ((blocktype == 3)<<6));
+   int ry0, ry4, ry8, ry12;
+   int y0, y1, y2, y3;
+   int x0, x1, x2, x3;
+   int abort_search, rx0; 
+   int img_width  = (ref_picture->size_x + (IMG_PAD_SIZE<<1) - 1)<<2;
+   int img_height = (ref_picture->size_y + (IMG_PAD_SIZE<<1) - 1)<<2;
+ 
+   //===== Use weighted Reference for ME ====
+   pel_t **ref_pic;      
+   pel_t *ref_line;
+   pel_t *orig_line;
+   int    apply_weights = ( (active_pps->weighted_pred_flag && 
+                (img->type == P_SLICE || img->type == SP_SLICE)) ||
+           (active_pps->weighted_bipred_idc && (img->type == B_SLICE)) );  
+   
+   if (apply_weights && input->UseWeightedReferenceME)
+   {
+     ref_pic = ref_picture->imgY_ups_w;
+   }
+   else
+   {
+     ref_pic = ref_picture->imgY_ups;
+   }
+ 
+   for (y0 = 0, abort_search = 0; y0 < blocksize_y && !abort_search; y0 += 4)
+   {
+     y_offset = (y0>7)*ypels;
+     ry0  = (y0<<2) + cand_mv_y;
+     ry4  = ry0 + 4;
+     ry8  = ry4 + 4;
+     ry12 = ry8 + 4;
+     y1   = y0  + 1;
+     y2   = y1  + 1;
+     y3   = y2  + 1;
+ 
+     for (x0 = 0; x0 < blocksize_x; x0 += 4)
+     {
+       rx0 = (x0<<2) + cand_mv_x;
+       x1  = x0 + 1;
+       x2  = x1 + 1;
+       x3  = x2 + 1;
+       d   = diff;
+ 
+       orig_line = orig_pic [y0];    
+       ref_line  = get_line (ref_pic, ry0, rx0, img_height, img_width);
+       *d++      = orig_line[x0] - *(ref_line     );
+       *d++      = orig_line[x1] - *(ref_line + 4 );
+       *d++      = orig_line[x2] - *(ref_line + 8 );
+       *d++      = orig_line[x3] - *(ref_line + 12);
+ 
+       orig_line = orig_pic [y1];    
+       ref_line  = get_line (ref_pic, ry4, rx0, img_height, img_width);
+       *d++      = orig_line[x0] - *(ref_line     );
+       *d++      = orig_line[x1] - *(ref_line + 4 );
+       *d++      = orig_line[x2] - *(ref_line + 8 );
+       *d++      = orig_line[x3] - *(ref_line + 12);
+ 
+       orig_line = orig_pic [y2];
+       ref_line  = get_line (ref_pic, ry8, rx0, img_height, img_width);
+       *d++      = orig_line[x0] - *(ref_line     );
+       *d++      = orig_line[x1] - *(ref_line += 4);
+       *d++      = orig_line[x2] - *(ref_line += 4);
+       *d++      = orig_line[x3] - *(ref_line += 4);
+ 
+       orig_line = orig_pic [y3];    
+       ref_line  = get_line (ref_pic, ry12, rx0, img_height, img_width);
+       *d++      = orig_line[x0] - *(ref_line     );
+       *d++      = orig_line[x1] - *(ref_line += 4);
+       *d++      = orig_line[x2] - *(ref_line += 4);
+       *d        = orig_line[x3] - *(ref_line += 4);
+ 
+       if (!useABT)
+       {
+         if ((mcost += SATD (diff, input->hadamard)) >= min_mcost) {
+           abort_search = 1;
+           break;
+         }
+       }
+       else
+       { // copy diff to curr_diff for ABT SATD calculation
+         i = (x0&0x7) +  (x0>7) * 64 + y_offset;
+         for(k=0, j=y0; j<BLOCK_SIZE + y0; j++, k+=BLOCK_SIZE)
+         {
+           memcpy(&(c_diff[i + ((j&0x7)<<3)]), &diff[k], BLOCK_SIZE*sizeof(int));
+         }
+       }
+     }
+   }
+ 
+   // Adaptive Block Transform
+   if(useABT)
+   {
+     mcost += find_SATD (c_diff, blocktype);
+   }
+ 
+   return mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Fast integer pixel block motion estimation
+  ************************************************************************
+  */
+ int                                     //  ==> minimum motion cost after search
+ simplified_FastIntegerPelBlockMotionSearch (
+                pel_t   **orig_pic,      // <--  not used
+                short     ref,           // <--  reference frame (0... or -1 (backward))
+                int       list,          // <--  reference picture list
+                int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                short*    mv_x,          //  --> motion vector (x) - in pel units
+                short*    mv_y,          //  --> motion vector (y) - in pel units
+                int       search_range,  // <--  1-d search range in pel units                         
+                int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                int       lambda_factor) // <--  lagrangian parameter for determining motion cost
+ {
+   short mvshift       = 2;
+   int   blocksize_y   = input->blc_size[blocktype][1];
+   int   blocksize_x   = input->blc_size[blocktype][0];
+   int   blocksize_x4  = blocksize_x >> 2;
+   int   pred_x        = (pic_pix_x << mvshift) + pred_mv_x;
+   int   pred_y        = (pic_pix_y << mvshift) + pred_mv_y;
+   int   center_x      = pic_pix_x + *mv_x;
+   int   center_y      = pic_pix_y + *mv_y;
+   int   best_x        = 0, best_y = 0;
+   int   search_step, iYMinNow, iXMinNow;
+   int   cand_x, cand_y, mcost;
+ 
+   unsigned short        i, m; 
+   pel_t                *ref_pic;
+   pel_t *(*get_ref_line)(int, pel_t*, int, int, int, int);
+ 
+   short list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))?
+                           img->current_mb_nr%2 ? 4 : 2 : 0;
+   int   height        = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))?
+                          (img->height+1)>>1 : img->height;
+    
+   //===== Use weighted Reference for ME ====
+   int  apply_weights = ( (active_pps->weighted_pred_flag  && 
+                          (img->type == P_SLICE || img->type == SP_SLICE)) ||
+                          (active_pps->weighted_bipred_idc && (img->type == B_SLICE)) );  
+ 
+   if (apply_weights && input->UseWeightedReferenceME)
+   {
+     ref_pic       = listX[list+list_offset][ref]->imgY_11_w;
+   }
+   else
+   {
+     ref_pic       = listX[list+list_offset][ref]->imgY_11;
+   }
+ 
+   //===== set function for getting reference picture lines =====
+   if ((center_x > search_range) && (center_x < img->width-1-search_range-blocksize_x) &&
+       (center_y > search_range) && (center_y < height-1-search_range-blocksize_y))
+   {
+     get_ref_line = FastLineX;
+   }
+   else
+   {
+     get_ref_line = UMVLineX;
+   }
+  
+   // Check the center median predictor
+   cand_x = center_x ;
+   cand_y = center_y ;
+   mcost  = MV_COST    (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);
+   mcost  = simplified_partial_SAD_calculate (ref_pic, orig_pic, get_ref_line,
+                        height, blocksize_y, blocksize_x,
+                        blocksize_x4, mcost, min_mcost, cand_x, cand_y);
+ 
+   if (mcost < min_mcost)
+   {
+     min_mcost = mcost;
+     best_x    = cand_x;
+     best_y    = cand_y;
+   }
+ 
+   iXMinNow = best_x;
+   iYMinNow = best_y;
+   if ((0 != pred_mv_x) || (0 != pred_mv_y))
+   {
+     cand_x = pic_pix_x;
+     cand_y = pic_pix_y;
+     SEARCH_ONE_PIXEL_HELPER
+   } 
+ 
+   // If the min_mcost is small enough, do a local search then terminate
+   // Ihis is good for stationary or quasi-stationary areas
+   if (min_mcost < (ConvergeThreshold>>block_type_shift_factor[blocktype]))
+   {
+     for (m = 0; m < 4; m++)
+     {   
+       cand_x = iXMinNow + Diamond_X[m];
+       cand_y = iYMinNow + Diamond_Y[m];
+       SEARCH_ONE_PIXEL_HELPER
+     }
+     *mv_x = best_x - pic_pix_x;
+     *mv_y = best_y - pic_pix_y; 
+     return min_mcost;
+   }
+ 
+   // Small local search
+   for (m = 0; m < 4; m++)
+   {   
+     cand_x = iXMinNow + Diamond_X[m];
+     cand_y = iYMinNow + Diamond_Y[m];
+     SEARCH_ONE_PIXEL_HELPER
+   }
+ 
+   // First_step: Symmetrical-cross search
+   // If distortion is large, use large shapes. Otherwise, compact shapes are faster
+   if ( (blocktype == 1 && 
+         min_mcost > (SymmetricalCrossSearchThreshold1>>block_type_shift_factor[blocktype])) ||
+        (min_mcost > (SymmetricalCrossSearchThreshold2>>block_type_shift_factor[blocktype])) )
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+ 
+     for(i = 1; i <= search_range/2; i++)
+     {
+       search_step = (i<<1) - 1;
+       cand_x = iXMinNow + search_step;
+       cand_y = iYMinNow;
+       SEARCH_ONE_PIXEL_HELPER
+ 
+       cand_x = iXMinNow - search_step;
+       SEARCH_ONE_PIXEL_HELPER
+ 
+       cand_x = iXMinNow;
+       cand_y = iYMinNow + search_step;
+       SEARCH_ONE_PIXEL_HELPER
+ 
+       cand_y = iYMinNow - search_step;
+       SEARCH_ONE_PIXEL_HELPER
+     }
+ 
+     // Hexagon Search
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 6; m++)
+     {
+       cand_x = iXMinNow + Hexagon_X[m];
+       cand_y = iYMinNow + Hexagon_Y[m];
+       SEARCH_ONE_PIXEL_HELPER
+     }
+     // Multi Big Hexagon Search
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for(i = 1; i <= search_range/4; i++)
+     {
+       for (m = 0; m < 16; m++)
+       {
+         cand_x = iXMinNow + Big_Hexagon_X[m]*i;
+         cand_y = iYMinNow + Big_Hexagon_Y[m]*i;
+         SEARCH_ONE_PIXEL_HELPER
+       }
+     }
+   }
+ 
+   // Search up_layer predictor for non 16x16 blocks
+   if (blocktype > 1)
+   {
+     cand_x = pic_pix_x + (simplified_pred_MV_uplayer_X/4);
+     cand_y = pic_pix_y + (simplified_pred_MV_uplayer_Y/4);
+     SEARCH_ONE_PIXEL_HELPER
+   }
+ 
+   if(center_x != pic_pix_x || center_y != pic_pix_y)
+   {
+     cand_x = pic_pix_x;
+     cand_y = pic_pix_y;
+     SEARCH_ONE_PIXEL_HELPER
+ 
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     // Local diamond search
+     for (m = 0; m < 4; m++)
+     {   
+       cand_x = iXMinNow + Diamond_X[m];
+       cand_y = iYMinNow + Diamond_Y[m];
+       SEARCH_ONE_PIXEL_HELPER
+     }
+   }
+ 
+   // If the minimum cost is small enough, do a local search
+   // and finish the search here
+   if (min_mcost < (ConvergeThreshold>>block_type_shift_factor[blocktype]))
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 4; m++)
+     {   
+       cand_x = iXMinNow + Diamond_X[m];
+       cand_y = iYMinNow + Diamond_Y[m];
+       SEARCH_ONE_PIXEL_HELPER
+     }
+     *mv_x = best_x - pic_pix_x;
+     *mv_y = best_y - pic_pix_y; 
+     return min_mcost;
+   }
+ 
+   //second_step:  Extended Hexagon-based Search
+   for(i = 0; i < search_range; i++)
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 6; m++)
+     {
+       cand_x = iXMinNow + Hexagon_X[m];
+       cand_y = iYMinNow + Hexagon_Y[m];
+       SEARCH_ONE_PIXEL_HELPER
+     }
+     // The minimum cost point happens in the center
+     if (best_x == iXMinNow && best_y == iYMinNow)
+     {
+       break;
+     }
+   }
+ 
+   //third_step: Small diamond search
+   for(i = 0; i < search_range; i++)
+   {
+     iXMinNow = best_x;
+     iYMinNow = best_y;
+     for (m = 0; m < 4; m++)
+     {
+       cand_x = iXMinNow + Diamond_X[m];
+       cand_y = iYMinNow + Diamond_Y[m];
+       SEARCH_ONE_PIXEL_HELPER
+     }
+ 
+     // The minimum cost point happens in the center
+     if (best_x == iXMinNow && best_y == iYMinNow)
+     {
+       break;
+     }
+   }
+ 
+   *mv_x = best_x - pic_pix_x;
+   *mv_y = best_y - pic_pix_y; 
+   return min_mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Fast sub pixel block motion estimation 
+  ************************************************************************
+  */
+ int                                     //  ==> minimum motion cost after search
+ simplified_FastSubPelBlockMotionSearch (
+                pel_t** orig_pic,        // <--  original pixel values for the AxB block
+                short     ref,           // <--  reference frame (0... or -1 (backward))
+                int       list,          // <--  reference picture list
+                int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                short*    mv_x,          // <--> in: search center (x) / out: MV (x) - in pel units
+                short*    mv_y,          // <--> in: search center (y) / out: MV (y) - in pel units
+                int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                int       lambda_factor, // <--  lagrangian parameter for determining motion cost
+                int       useABT)
+ {
+   int   mcost;
+   int   cand_mv_x, cand_mv_y;
+   
+   int   list_offset     = ((img->MbaffFrameFlag) &&
+                            (img->mb_data[img->current_mb_nr].mb_field)) ?
+                             img->current_mb_nr%2 ? 4 : 2 : 0;
+   StorablePicture *ref_picture = listX[list+list_offset][ref];
+   
+   short mv_shift        = 0;
+   short blocksize_x     = input->blc_size[blocktype][0];
+   short blocksize_y     = input->blc_size[blocktype][1];
+   int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<<2);
+   int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<<2);
+   short max_pos_x4      = ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
+   short max_pos_y4      = ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
+   
+   int   iXMinNow, iYMinNow;
+   short dynamic_search_range, i, m;
+   int   currmv_x = 0, currmv_y = 0;
+   int   pred_frac_mv_x,pred_frac_mv_y,abort_search;
+   int   mv_cost;
+   int   pred_frac_up_mv_x, pred_frac_up_mv_y;
+ 
+   if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 1) &&
+       (pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 1))
+   {
+     get_line = FastLine4X;
+   }
+   else
+   {
+     get_line = UMVLine4X;    
+   }
+   
+   dynamic_search_range = 3;
+   pred_frac_mv_x = (pred_mv_x - *mv_x) % 4;
+   pred_frac_mv_y = (pred_mv_y - *mv_y) % 4; 
+   
+   pred_frac_up_mv_x = (simplified_pred_MV_uplayer_X - *mv_x) % 4;
+   pred_frac_up_mv_y = (simplified_pred_MV_uplayer_Y - *mv_y) % 4;
+ 
+   memset(simplified_SearchState[0], 0, 
+          (2*dynamic_search_range+1)*(2*dynamic_search_range+1));
+ 
+   simplified_SearchState[dynamic_search_range][dynamic_search_range] = 1;
+   if(input->hadamard)
+   {
+     cand_mv_x = *mv_x;    
+     cand_mv_y = *mv_y;    
+     mv_cost   = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
+     mcost     = simplified_add_up_SAD_quarter_pel(pic_pix_x, pic_pix_y, blocksize_x, blocksize_y,
+                                 cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y,
+                                 ref_picture, orig_pic, mv_cost, min_mcost, useABT, blocktype);
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       currmv_x  = cand_mv_x;
+       currmv_y  = cand_mv_y; 
+     }
+   }
+   else
+   {
+     currmv_x = *mv_x;
+     currmv_y = *mv_y; 
+   }
+ 
+   // If the min_mcost is small enough and other statistics are positive,
+   // better to stop the search now
+   if ( ((*mv_x) == 0) && ((*mv_y) == 0) && 
+ 	    (pred_frac_mv_x == 0 && pred_frac_up_mv_x == 0) &&
+         (pred_frac_mv_y == 0 && pred_frac_up_mv_y == 0) &&
+         (min_mcost < (SubPelThreshold1>>block_type_shift_factor[blocktype])) )
+   {
+     *mv_x = currmv_x;
+     *mv_y = currmv_y;
+ 	return min_mcost;
+   }
+ 
+   if(pred_frac_mv_x || pred_frac_mv_y)
+   {
+     cand_mv_x = *mv_x + pred_frac_mv_x;    
+     cand_mv_y = *mv_y + pred_frac_mv_y;    
+     mv_cost   = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);    
+     mcost     = simplified_add_up_SAD_quarter_pel(pic_pix_x, pic_pix_y, blocksize_x, blocksize_y,
+                             cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y,
+                             ref_picture, orig_pic,mv_cost, min_mcost, useABT, blocktype);
+     simplified_SearchState[cand_mv_y -*mv_y + dynamic_search_range][cand_mv_x - *mv_x + dynamic_search_range] = 1;
+     if (mcost < min_mcost)
+     {
+       min_mcost = mcost;
+       currmv_x  = cand_mv_x;
+       currmv_y  = cand_mv_y; 
+     }
+   }
+ 
+   // Multiple small diamond search
+   for(i = 0; i < dynamic_search_range; i++)
+   {
+     abort_search = 1;
+ 
+     iXMinNow = currmv_x;
+     iYMinNow = currmv_y;
+     for (m = 0; m < 4; m++)
+     {
+       cand_mv_x = iXMinNow + Diamond_X[m];    
+       cand_mv_y = iYMinNow + Diamond_Y[m]; 
+ 
+       if(absm(cand_mv_x - *mv_x) <= dynamic_search_range && absm(cand_mv_y - *mv_y) <= dynamic_search_range)
+       {
+         if(!simplified_SearchState[cand_mv_y - *mv_y + dynamic_search_range][cand_mv_x - *mv_x + dynamic_search_range])
+         {
+           mv_cost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);    
+           mcost   = simplified_add_up_SAD_quarter_pel(pic_pix_x, pic_pix_y, blocksize_x, blocksize_y,
+                                   cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y,
+                                   ref_picture, orig_pic, mv_cost, min_mcost, useABT, blocktype);
+           simplified_SearchState[cand_mv_y - *mv_y + dynamic_search_range][cand_mv_x - *mv_x + dynamic_search_range] = 1;
+ 
+           if (mcost < min_mcost)
+           {
+             min_mcost    = mcost;
+             currmv_x     = cand_mv_x;
+             currmv_y     = cand_mv_y; 
+             abort_search = 0; 
+           }
+           if (min_mcost < (SubPelThreshold3>>block_type_shift_factor[blocktype]))
+           {
+             *mv_x = currmv_x;
+             *mv_y = currmv_y;
+             return min_mcost;
+           }
+         }
+       }
+     }
+     // If the minimum cost point is in the center, break out the loop
+     if (abort_search)
+     {
+       break;
+     }
+   }
+   
+   *mv_x = currmv_x;
+   *mv_y = currmv_y;
+   return min_mcost;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set neighbouring block mode (intra/inter)
+  *    used for fast motion estimation
+  ************************************************************************
+  */
+ void simplified_decide_intrabk_SAD()
+ {
+   if (img->type != I_SLICE)
+   {
+     if (img->pix_x == 0 && img->pix_y == 0)
+     {
+       simplified_flag_intra_SAD = 0;
+     }
+     else if (img->pix_x == 0)
+     {
+       simplified_flag_intra_SAD = simplified_flag_intra[(img->pix_x)>>4];
+     }
+     else if (img->pix_y == 0)
+     {
+       simplified_flag_intra_SAD = simplified_flag_intra[((img->pix_x)>>4)-1];
+     }
+     else
+     {
+       simplified_flag_intra_SAD = ((simplified_flag_intra[(img->pix_x)>>4])||
+                                    (simplified_flag_intra[((img->pix_x)>>4)-1])||
+                                    (simplified_flag_intra[((img->pix_x)>>4)+1])) ;
+     }
+   }
+   return;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set cost to zero if neighbouring block is intra
+  *    used for fast motion estimation
+  ************************************************************************
+  */
+ void simplified_skip_intrabk_SAD(int best_mode, int ref_max)
+ {
+   short i, j, k;
+ 
+   if (img->number > 0)
+   {
+     simplified_flag_intra[(img->pix_x)>>4] = (best_mode == 9 || best_mode == 10) ? 1 : 0;
+   }
+ 
+   if (img->type != I_SLICE  && (best_mode == 9 || best_mode == 10))
+   {
+     for (i=0; i < 4; i++)
+     {
+       for (j=0; j < 4; j++)
+       {
+         for (k=0; k < 9;k++)
+         {
+           simplified_fastme_l0_cost[k][j][i] = 0;
+           simplified_fastme_l1_cost[k][j][i] = 0;
+         }
+       }
+     }
+   }
+   return;
+ }
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Set up prediction MV and prediction up layer cost
+  *    used for fast motion estimation
+  ************************************************************************
+  */
+ void simplified_setup_FME(short ref,
+                           int list,
+                           int block_y,
+                           int block_x,
+                           int blocktype,
+                           short ******all_mv)
+ {
+   if (blocktype > 6)
+   {
+     simplified_pred_MV_uplayer_X = all_mv[block_y][block_x][list][ref][5][0];
+     simplified_pred_MV_uplayer_Y = all_mv[block_y][block_x][list][ref][5][1];
+   }
+   else if (blocktype > 4)
+   {
+     simplified_pred_MV_uplayer_X = all_mv[block_y][block_x][list][ref][4][0];
+     simplified_pred_MV_uplayer_Y = all_mv[block_y][block_x][list][ref][4][1];
+   }
+   else if (blocktype == 4)
+   {
+     simplified_pred_MV_uplayer_X = all_mv[block_y][block_x][list][ref][2][0];
+     simplified_pred_MV_uplayer_Y = all_mv[block_y][block_x][list][ref][2][1];
+   }
+   else if (blocktype > 1)
+   {
+     simplified_pred_MV_uplayer_X = all_mv[block_y][block_x][list][ref][1][0];
+     simplified_pred_MV_uplayer_Y = all_mv[block_y][block_x][list][ref][1][1];
+   }
+ 
+   if (blocktype > 1)
+   {
+     if (blocktype > 6)
+     {   
+       simplified_pred_SAD_uplayer = (list==1) ? 
+           (simplified_fastme_l1_cost[5][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
+           : (simplified_fastme_l0_cost[5][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
+       simplified_pred_SAD_uplayer /= 2; 	     
+     }
+     else if (blocktype > 4)
+     {
+       simplified_pred_SAD_uplayer = (list==1) ?
+           (simplified_fastme_l1_cost[4][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
+           : (simplified_fastme_l0_cost[4][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
+       simplified_pred_SAD_uplayer /= 2; 
+     }
+     else if (blocktype == 4)
+     {
+       simplified_pred_SAD_uplayer = (list==1) ?
+           (simplified_fastme_l1_cost[2][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
+           : (simplified_fastme_l0_cost[2][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
+       simplified_pred_SAD_uplayer /= 2; 
+     }
+     else
+     {
+       simplified_pred_SAD_uplayer = (list==1) ?
+           (simplified_fastme_l1_cost[1][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
+           : (simplified_fastme_l0_cost[1][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
+       simplified_pred_SAD_uplayer /= 2;
+     }
+ 
+     simplified_pred_SAD_uplayer = simplified_flag_intra_SAD ? 0 : simplified_pred_SAD_uplayer;
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/simplified_fast_me.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/simplified_fast_me.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/simplified_fast_me.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,90 ----
+ 
+ /*!
+  *************************************************************************************
+  *
+  * \file fast_me.h
+  *
+  * \brief
+  *   Fast integer pixel and sub pixel motion estimation
+  *   Improved and simplified from the original UMHexagonS algorithms
+  *   See JVT-P021 for details
+  *
+  * \author 
+  *    Main contributors: (see contributors.h for copyright, address and affiliation details)
+  *    - Zhibo Chen                      <chenzhibo at tsinghua.org.cn>
+  *    - JianFeng Xu                     <fenax at video.mdc.tsinghua.edu.cn>  
+  *    - Wenfang Fu                      <fwf at video.mdc.tsinghua.edu.cn>
+  *
+  *    - Xiaoquan Yi                     <xyi at engr.scu.edu>
+  *    - Jun Zhang                       <jzhang2 at engr.scu.edu>
+  *
+  * \date
+  *    16. June 2005
+  *************************************************************************************
+  */
+ 
+ #ifndef _SIMPLIFIED_FAST_ME_H_
+ #define _SIMPLIFIED_FAST_ME_H_
+ 
+ #include "mbuffer.h"
+ 
+ static const short block_type_shift_factor[8] = {0, 0, 1, 1, 2, 3, 3, 1}; // last one relaxed to 1 instead 4
+ 
+ unsigned short  SymmetricalCrossSearchThreshold1;
+ unsigned short  SymmetricalCrossSearchThreshold2;
+ unsigned short  ConvergeThreshold;
+ unsigned short  SubPelThreshold1;
+ unsigned short  SubPelThreshold3;
+ 
+ byte  **simplified_SearchState;          //state for fractional pel search
+ int  ***simplified_fastme_l0_cost;       //store SAD information needed for forward median and uplayer prediction
+ int  ***simplified_fastme_l1_cost;       //store SAD information needed for backward median and uplayer prediction
+ byte   *simplified_flag_intra;
+ int     simplified_flag_intra_SAD;
+ 
+ int     simplified_pred_SAD_uplayer;     // Up layer SAD prediction
+ short   simplified_pred_MV_uplayer_X;    // Up layer MV predictor X-component
+ short   simplified_pred_MV_uplayer_Y;    // Up layer MV predictor Y-component
+ 
+ void    simplified_init_FME();
+ int     simplified_get_mem_FME();
+ void    simplified_free_mem_FME();
+ void    simplified_decide_intrabk_SAD();
+ void    simplified_skip_intrabk_SAD(int, int);
+ void    simplified_setup_FME(short, int, int, int, int, short ******);
+ 
+ int                                     //  ==> minimum motion cost after search
+ simplified_FastIntegerPelBlockMotionSearch (
+                pel_t   **orig_pic,      // <--  not used
+                short     ref,           // <--  reference frame (0... or -1 (backward))
+                int       list,          // <--  reference picture list
+                int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                short*    mv_x,          //  --> motion vector (x) - in pel units
+                short*    mv_y,          //  --> motion vector (y) - in pel units
+                int       search_range,  // <--  1-d search range in pel units                         
+                int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                int       lambda_factor);// <--  lagrangian parameter for determining motion cost
+ 
+ int                                     //  ==> minimum motion cost after search
+ simplified_FastSubPelBlockMotionSearch (
+                pel_t** orig_pic,        // <--  original pixel values for the AxB block
+                short     ref,           // <--  reference frame (0... or -1 (backward))
+                int       list,          // <--  reference picture list
+                int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
+                int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
+                int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
+                short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
+                short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
+                short*    mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
+                short*    mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
+                int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
+                int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
+                int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
+                int       lambda_factor, // <--  lagrangian parameter for determining motion cost
+                int       useABT);
+ 
+ #endif


Index: llvm-test/MultiSource/Applications/JM/lencod/slice.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/slice.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/slice.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,1134 ----
+ 
+ /*!
+  **************************************************************************************
+  * \file
+  *    slice.c
+  * \brief
+  *    generate the slice header, setup the bit buffer for slices,
+  *    and generates the slice NALU(s)
+ 
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *      - Thomas Stockhammer            <stockhammer at ei.tum.de>
+  *      - Detlev Marpe                  <marpe at hhi.de>
+  *      - Stephan Wenger                <stewe at cs.tu-berlin.de>
+  *      - Alexis Michael Tourapis       <alexismt at ieee.org>
+  ***************************************************************************************
+  */
+ 
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ #include <assert.h>
+ #include <math.h>
+ #include <float.h>
+ 
+ #include "global.h"
+ #include "header.h"
+ #include "rtp.h"
+ #include "fmo.h"
+ #include "vlc.h"
+ #include "image.h"
+ #include "cabac.h"
+ #include "elements.h"
+ #include "epzs.h"
+ 
+ // Local declarations
+ static Slice *malloc_slice();
+ static void  free_slice(Slice *slice);
+ static void  init_slice(int start_mb_addr);
+ static void set_ref_pic_num();
+ extern ColocatedParams *Co_located;
+ extern StorablePicture **listX[6];
+ void poc_ref_pic_reorder(StorablePicture **list, unsigned num_ref_idx_lX_active, int *reordering_of_pic_nums_idc, int *abs_diff_pic_num_minus1, int *long_term_pic_idx, int weighted_prediction, int list_no);
+ void SetLagrangianMultipliers();
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    init_ref_pic_list_reordering initializations should go here
+  ************************************************************************
+  */
+ void init_ref_pic_list_reordering()
+ {
+   Slice* currSlice = img->currentSlice;
+ 
+   currSlice->ref_pic_list_reordering_flag_l0 = 0;
+   currSlice->ref_pic_list_reordering_flag_l1 = 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  *  \brief
+  *     This function generates the slice (and partition) header(s) 
+  *
+  *  \return number of bits used for the slice (and partition) header(s)
+  *
+  *  \par Side effects:
+  *      Adds slice/partition header symbols to the symbol buffer
+  *      increments Picture->no_slices, allocates memory for the
+  *      slice, sets img->currSlice
+  ************************************************************************
+ */
+ int start_slice()
+ {
+   EncodingEnvironmentPtr eep;
+   Slice *currSlice = img->currentSlice;
+   Bitstream *currStream;
+   int header_len = 0;
+   int i;
+   int NumberOfPartitions = (input->partition_mode == PAR_DP_1?1:3);
+ 
+   //one  partition for IDR img
+   if(img->currentPicture->idr_flag)
+   {
+      NumberOfPartitions = 1;
+   }
+ 
+   RTPUpdateTimestamp (img->tr);   // this has no side effects, just leave it for all NALs
+ 
+   for (i=0; i<NumberOfPartitions; i++)
+   {
+     currStream = (currSlice->partArr[i]).bitstream;
+ 
+     currStream->write_flag = 0;
+     if (i==0)     // First partition
+       header_len += SliceHeader (0);
+     else          // Second/Third partition
+       header_len += Partition_BC_Header(i);
+      
+     //! Initialize CABAC
+     if (input->symbol_mode == CABAC)
+     {
+       eep = &((currSlice->partArr[i]).ee_cabac);
+       if (currStream->bits_to_go != 8)
+         header_len+=currStream->bits_to_go;
+       writeVlcByteAlign(currStream);
+       arienco_start_encoding(eep, currStream->streamBuffer, &(currStream->byte_pos));
+       cabac_new_slice();
+     } 
+     else 
+     {
+       // Initialize CA-VLC
+       CAVLC_init();
+     }
+   }
+   if(input->symbol_mode == CABAC)
+   {
+     init_contexts();
+   }
+   return header_len;
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    This function terminates a slice (but doesn't write it out), 
+  *    the old terminate_slice (0)
+  * \return
+  *    0 if OK,                                                         \n
+  *    1 in case of error
+  *
+  ************************************************************************
+  */
+ int terminate_slice(int lastslice)
+ {
+   static int MbWidthC  [4]= { 0, 8, 8,  16};
+   static int MbHeightC [4]= { 0, 8, 16, 16};
+ 
+   int bytes_written;
+   Bitstream *currStream;
+   Slice *currSlice = img->currentSlice;
+   EncodingEnvironmentPtr eep;
+   int i;
+   int byte_pos_before_startcode_emu_prevention;
+   int min_num_bytes=0;
+   int stuffing_bytes=0;
+   int RawMbBits;
+ 
+   if (input->symbol_mode == CABAC)
+     write_terminating_bit (1);      // only once, not for all partitions
+   
+   for (i=0; i<currSlice->max_part_nr; i++)
+   {
+     currStream = (currSlice->partArr[i]).bitstream;
+     if (input->symbol_mode == UVLC)
+     {
+       SODBtoRBSP(currStream);
+       byte_pos_before_startcode_emu_prevention = currStream->byte_pos;
+       currStream->byte_pos = RBSPtoEBSP(currStream->streamBuffer, 0 , currStream->byte_pos, 0);
+       *(stats->em_prev_bits) += (currStream->byte_pos - byte_pos_before_startcode_emu_prevention) * 8;
+     }
+     else     // CABAC
+     {
+       eep = &((currSlice->partArr[i]).ee_cabac);
+       // terminate the arithmetic code
+       arienco_done_encoding(eep);
+       currStream->bits_to_go = eep->Ebits_to_go;
+       currStream->byte_buf = 0;
+       bytes_written = currStream->byte_pos;
+       img->bytes_in_picture += currStream->byte_pos;
+ 
+       byte_pos_before_startcode_emu_prevention= currStream->byte_pos;
+       if (lastslice && i==((currSlice->max_part_nr-1)))
+       {
+         RawMbBits = 256 * img->bitdepth_luma + 2 * MbWidthC[active_sps->chroma_format_idc] * MbHeightC[active_sps->chroma_format_idc] * img->bitdepth_chroma;
+         min_num_bytes = ((96 * get_pic_bin_count()) - (RawMbBits * (int)img->PicSizeInMbs *3) + 1023) / 1024;
+         if (min_num_bytes>img->bytes_in_picture)
+         {
+           stuffing_bytes = min_num_bytes - img->bytes_in_picture;
+           printf ("CABAC stuffing words = %6d\n", stuffing_bytes/3);
+         }
+       }
+ 
+ //      printf ("bytepos: %d\n", currStream->byte_pos);
+       currStream->byte_pos = RBSPtoEBSP(currStream->streamBuffer, 0, currStream->byte_pos, currStream->byte_pos + stuffing_bytes);
+       *(stats->em_prev_bits) += (currStream->byte_pos - byte_pos_before_startcode_emu_prevention) * 8;
+     }           // CABAC
+   }           // partition loop
+   if( input->symbol_mode == CABAC )
+   {
+     store_contexts();
+   }
+   return 0;   
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Encodes one slice
+ * \par
+ *   returns the number of coded MBs in the SLice 
+ ************************************************************************
+ */
+ int encode_one_slice (int SliceGroupId, Picture *pic, int TotalCodedMBs)
+ {
+   Boolean end_of_slice = FALSE;
+   Boolean recode_macroblock;
+   int len;
+   int NumberOfCodedMBs = 0;
+   int CurrentMbAddr;
+   double FrameRDCost = DBL_MAX, FieldRDCost = DBL_MAX;
+   
+   img->cod_counter = 0;
+ 
+   CurrentMbAddr = FmoGetFirstMacroblockInSlice (SliceGroupId);
+ // printf ("\n\nEncode_one_slice: PictureID %d SliceGroupId %d  SliceID %d  FirstMB %d \n", img->tr, SliceGroupId, img->current_slice_nr, CurrentMbInScanOrder);
+ 
+   init_slice (CurrentMbAddr);
+   Bytes_After_Header = img->currentSlice->partArr[0].bitstream->byte_pos;
+ 
+   SetLagrangianMultipliers();
+ 
+   if (input->symbol_mode==CABAC)
+   {
+     SetCtxModelNumber ();
+   }
+   
+   img->checkref = (input->rdopt && input->RestrictRef && (img->type==P_SLICE || img->type==SP_SLICE));
+ 
+ /*
+   // Tian Dong: June 7, 2002 JVT-B042
+   // When the pictures are put into different layers and subseq, not all the reference frames
+   // in multi-frame buffer are valid for prediction. The acutual number of the valid reference
+   // frames, fb->num_short_used, will be given by start_slice(sym).
+   // Save the fb->short_used.
+   if (input->NumFramesInELSubSeq)
+     {
+       short_used = fb->short_used;
+     }
+ */
+ 
+   len = start_slice ();
+ 
+   // Rate control
+   img->NumberofHeaderBits +=len;
+ 
+   // basic unit layer rate control
+   if(img->BasicUnit<img->Frame_Total_Number_MB)
+     img->NumberofBasicUnitHeaderBits +=len;
+ 
+ //  printf("short size, used, num-used: (%d,%d,%d)\n", fb->short_size, fb->short_used, fb->num_short_used);
+ 
+ /*
+   // Tian Dong: June 7, 2002 JVT-B042
+   if (input->NumFramesInELSubSeq)
+     {
+       fb->short_used = fb->num_short_used;
+     }
+ */
+   // Update statistics
+   stats->bit_slice += len;
+   stats->bit_use_header[img->type] += len;
+ // printf ("\n\n");
+ 
+   while (end_of_slice == FALSE) // loop over macroblocks
+   {
+     if (img->AdaptiveRounding && input->AdaptRndPeriod && (img->current_mb_nr % input->AdaptRndPeriod == 0))
+     {
+       CalculateOffsetParam();
+       
+       if(input->Transform8x8Mode)
+       {
+         CalculateOffset8Param();
+       }
+     }
+ 
+     //sw paff
+     if (!img->MbaffFrameFlag)
+     {
+       recode_macroblock = FALSE;
+       rdopt = &rddata_top_frame_mb;   // store data in top frame MB 
+       
+       start_macroblock (CurrentMbAddr, FALSE);
+       encode_one_macroblock ();
+ 
+       write_one_macroblock (1);
+       
+       terminate_macroblock (&end_of_slice, &recode_macroblock);
+       
+ //       printf ("encode_one_slice: mb %d,  slice %d,   bitbuf bytepos %d EOS %d\n", 
+ //       img->current_mb_nr, img->current_slice_nr, 
+ //       img->currentSlice->partArr[0].bitstream->byte_pos, end_of_slice);
+       
+       if (recode_macroblock == FALSE)       // The final processing of the macroblock has been done
+       {
+         CurrentMbAddr = FmoGetNextMBNr (CurrentMbAddr);
+         if (CurrentMbAddr == -1)   // end of slice
+         {
+ //          printf ("FMO End of Slice Group detected, current MBs %d, force end of slice\n", NumberOfCodedMBs+1);
+           end_of_slice = TRUE;
+         }
+         NumberOfCodedMBs++;       // only here we are sure that the coded MB is actually included in the slice
+         proceed2nextMacroblock (CurrentMbAddr);
+       }
+       else
+       {
+         //!Go back to the previous MB to recode it
+         img->current_mb_nr = FmoGetPreviousMBNr(img->current_mb_nr);
+         if(img->current_mb_nr == -1 )   // The first MB of the slice group  is too big,
+                                         // which means it's impossible to encode picture using current slice bits restriction
+         {
+           snprintf (errortext, ET_SIZE, "Error encoding first MB with spcified parameter, bits of current MB may be too big");
+           error (errortext, 300);
+         }
+       }
+     }
+     else                      // TBD -- Addition of FMO
+     {
+       
+ //! This following ugly code breaks slices, at least for a slice mode that accumulates a certain
+ //! number of bits into one slice.  
+ //! The suggested algorithm is as follows:
+ //!
+ //! SaveState (Bitstream, stats,  etc. etc.);
+ //! BitsForThisMBPairInFrameMode = CodeMB (Upper, FRAME_MODE) + CodeMB (Lower, FRAME_MODE);
+ //! DistortionForThisMBPairInFrameMode = CalculateDistortion(Upper) + CalculateDistortion (Lower);
+ //! RestoreState();
+ //! BitsForThisMBPairInFieldMode = CodeMB (Upper, FIELD_MODE) + CodeMB (Lower, FIELD_MODE);
+ //! DistortionForThisMBPairInFrameMode = CalculateDistortion(Upper) + CalculateDistortion (Lower);
+ //! FrameFieldMode = Decision (...)
+ //! RestoreState()
+ //! if (FrameFieldMode == FRAME) {
+ //!   CodeMB (Upper, FRAME); CodeMB (Lower, FRAME);
+ //! } else {
+ //!   CodeMB (Upper FIELD); CodeMB (Lower, FIELD);
+ //! }
+ //!
+ //! Open questions/issues:
+ //!   1. CABAC/CA-VLC state:  It seems that the CABAC/CA_VLC states are changed during the
+ //!      dummy encoding processes (for the R-D based selection), but that they are never
+ //!      reset, once the selection is made.  I believe that this breaks the MB-adaptive
+ //!      frame/field coding.  The necessary code for the state saves is readily available
+ //!      in macroblock.c, start_macroblock() and terminate_macroblock() (this code needs
+ //!      to be double checked that it works with CA-VLC as well
+ //!   2. would it be an option to allocate Bitstreams with zero data in them (or copy the
+ //!      already generated bitstream) for the "test coding"?  
+       
+       if (input->MbInterlace == ADAPTIVE_CODING)
+       {
+         //================ code MB pair as frame MB ================
+         //----------------------------------------------------------
+         recode_macroblock = FALSE;
+         
+         
+         img->field_mode = 0;  // MB coded as frame
+         img->top_field = 0;   // Set top field to 0
+         
+         //Rate control
+         img->write_macroblock = 0;
+         img->bot_MB = 0;   
+         
+         start_macroblock (CurrentMbAddr, FALSE);
+         
+         rdopt = &rddata_top_frame_mb; // store data in top frame MB 
+         encode_one_macroblock ();     // code the MB as frame
+         FrameRDCost = rdopt->min_rdcost;
+         //***   Top MB coded as frame MB ***//
+ 
+         //Rate control
+         img->bot_MB = 1; //for Rate control
+         
+         // go to the bottom MB in the MB pair
+         img->field_mode = 0;  // MB coded as frame  //GB
+         
+         start_macroblock (CurrentMbAddr+1, FALSE);
+         rdopt = &rddata_bot_frame_mb; // store data in top frame MB
+         encode_one_macroblock ();     // code the MB as frame
+         FrameRDCost += rdopt->min_rdcost;
+ 
+         //***   Bottom MB coded as frame MB ***//
+       }
+ 
+       if ((input->MbInterlace == ADAPTIVE_CODING) || (input->MbInterlace == FIELD_CODING))
+       {
+         //Rate control
+         img->bot_MB = 0; 
+         
+         //=========== start coding the MB pair as a field MB pair =============
+         //---------------------------------------------------------------------
+         img->field_mode = 1;  // MB coded as frame
+         img->top_field = 1;   // Set top field to 1
+         img->buf_cycle <<= 1;
+         input->num_ref_frames <<= 1;
+         img->num_ref_idx_l0_active <<= 1;
+         img->num_ref_idx_l0_active += 1;
+         start_macroblock (CurrentMbAddr, TRUE);
+         
+ 
+         rdopt = &rddata_top_field_mb; // store data in top frame MB 
+ //        TopFieldIsSkipped = 0;        // set the top field MB skipped flag to 0
+         encode_one_macroblock ();     // code the MB as frame
+         FieldRDCost = rdopt->min_rdcost;
+         //***   Top MB coded as field MB ***//
+         //Rate control
+         img->bot_MB = 1;//for Rate control
+ 
+         img->top_field = 0;   // Set top field to 0
+         start_macroblock (CurrentMbAddr+1, TRUE);
+         rdopt = &rddata_bot_field_mb; // store data in top frame MB 
+         encode_one_macroblock ();     // code the MB as frame
+         FieldRDCost += rdopt->min_rdcost;
+         //***   Bottom MB coded as field MB ***//
+       }
+ 
+       //Rate control
+       img->write_macroblock_frame = 0;  //Rate control
+ 
+       //=========== decide between frame/field MB pair ============
+       //-----------------------------------------------------------
+       if ((input->MbInterlace == ADAPTIVE_CODING) && (FrameRDCost < FieldRDCost))
+       {
+         img->field_mode = 0;
+         img->buf_cycle >>= 1;
+         input->num_ref_frames >>= 1;
+         MBPairIsField = 0;
+         img->num_ref_idx_l0_active -= 1;
+         img->num_ref_idx_l0_active >>= 1;
+         
+         //Rate control
+         img->write_macroblock_frame = 1;  //for Rate control
+       }
+       else
+       {
+         img->field_mode = 1;
+         MBPairIsField = 1;
+       }
+       
+       //Rate control
+       img->write_macroblock = 1;//Rate control 
+       
+       if (MBPairIsField)
+         img->top_field = 1;
+       else
+         img->top_field = 0;
+       
+       //Rate control
+       img->bot_MB = 0;// for Rate control
+ 
+       // go back to the Top MB in the MB pair
+       start_macroblock (CurrentMbAddr, img->field_mode);
+       
+       rdopt =  img->field_mode ? &rddata_top_field_mb : &rddata_top_frame_mb;
+       copy_rdopt_data (0);  // copy the MB data for Top MB from the temp buffers
+       write_one_macroblock (1);     // write the Top MB data to the bitstream
+       NumberOfCodedMBs++;   // only here we are sure that the coded MB is actually included in the slice
+       terminate_macroblock (&end_of_slice, &recode_macroblock);     // done coding the Top MB 
+       proceed2nextMacroblock (CurrentMbAddr);        // Go to next macroblock
+       
+       //Rate control
+       img->bot_MB = 1;//for Rate control
+       // go to the Bottom MB in the MB pair
+       img->top_field = 0;
+       start_macroblock (CurrentMbAddr+1, img->field_mode);
+ 
+       rdopt = img->field_mode ? &rddata_bot_field_mb : &rddata_bot_frame_mb;
+       copy_rdopt_data (1);  // copy the MB data for Bottom MB from the temp buffers
+       
+       write_one_macroblock (0);     // write the Bottom MB data to the bitstream
+       NumberOfCodedMBs++;   // only here we are sure that the coded MB is actually included in the slice
+       terminate_macroblock (&end_of_slice, &recode_macroblock);     // done coding the Top MB 
+       proceed2nextMacroblock (CurrentMbAddr);        // Go to next macroblock
+       
+       if (MBPairIsField)    // if MB Pair was coded as field the buffer size variables back to frame mode
+       {
+         img->buf_cycle >>= 1;
+         input->num_ref_frames >>= 1;
+         img->num_ref_idx_l0_active -= 1;
+         img->num_ref_idx_l0_active >>= 1;
+       }
+       img->field_mode = img->top_field = 0; // reset to frame mode
+       
+       
+       // go to next MB pair, not next MB
+       CurrentMbAddr = FmoGetNextMBNr (CurrentMbAddr);
+       CurrentMbAddr = FmoGetNextMBNr (CurrentMbAddr);
+       
+       if (CurrentMbAddr == FmoGetLastCodedMBOfSliceGroup (FmoMB2SliceGroup (CurrentMbAddr)))
+         end_of_slice = TRUE;        // just in case it does n't get set in terminate_macroblock
+     }
+   }  
+ /*
+   // Tian Dong: June 7, 2002 JVT-B042
+   // Restore the short_used
+   if (input->NumFramesInELSubSeq)
+     {
+       fb->short_used = short_used;
+     }
+ */
+   terminate_slice ( (NumberOfCodedMBs+TotalCodedMBs >= (int)img->PicSizeInMbs) );
+   return NumberOfCodedMBs;
+ }
+ 
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Initializes the parameters for a new slice and
+  *     allocates the memory for the coded slice in the Picture structure
+  *  \par Side effects:
+  *      Adds slice/partition header symbols to the symbol buffer
+  *      increments Picture->no_slices, allocates memory for the
+  *      slice, sets img->currSlice
+  ************************************************************************
+  */
+ static void init_slice (int start_mb_addr)
+ {
+   int i;
+   Picture *currPic = img->currentPicture;
+   DataPartition *dataPart;
+   Bitstream *currStream;
+   Slice *currSlice;
+ 
+   img->current_mb_nr = start_mb_addr;
+ 
+   // Allocate new Slice in the current Picture, and set img->currentSlice
+   assert (currPic != NULL);
+   currPic->no_slices++;
+   
+   if (currPic->no_slices >= MAXSLICEPERPICTURE)
+     error ("Too many slices per picture, increase MAXSLICEPERPICTURE in global.h.", -1);
+ 
+   currPic->slices[currPic->no_slices-1] = malloc_slice();
+   currSlice = currPic->slices[currPic->no_slices-1];
+ 
+   img->currentSlice = currSlice;
+ 
+   currSlice->picture_id = img->tr % 256;
+   currSlice->qp = img->qp;
+   currSlice->start_mb_nr = start_mb_addr;
+   currSlice->slice_too_big = dummy_slice_too_big;
+ 
+   for (i = 0; i < currSlice->max_part_nr; i++)
+   {
+     dataPart = &(currSlice->partArr[i]);
+     if (input->symbol_mode == UVLC)
+       dataPart->writeSyntaxElement = writeSyntaxElement_UVLC;
+     else
+       dataPart->writeSyntaxElement = writeSyntaxElement_CABAC;
+     
+     currStream = dataPart->bitstream;
+     currStream->bits_to_go = 8;
+     currStream->byte_pos = 0;
+     currStream->byte_buf = 0;
+   }
+ 
+   img->num_ref_idx_l0_active = active_pps->num_ref_idx_l0_active_minus1 + 1; 
+   img->num_ref_idx_l1_active = active_pps->num_ref_idx_l1_active_minus1 + 1;
+ 
+   // generate reference picture lists
+   init_lists(img->type, img->structure);
+ 
+   // assign list 0 size from list size
+   img->num_ref_idx_l0_active = listXsize[0];
+   img->num_ref_idx_l1_active = listXsize[1];
+   
+   // code now also considers fields. Issue whether we should account this within the appropriate input params directly
+   if ((img->type == P_SLICE || img->type == SP_SLICE) && input->P_List0_refs)
+   {
+     img->num_ref_idx_l0_active = min(img->num_ref_idx_l0_active, input->P_List0_refs * ((img->structure !=0) + 1));
+     listXsize[0] = min(listXsize[0], input->P_List0_refs * ((img->structure !=0) + 1));  
+   }
+   if (img->type == B_SLICE )
+   {
+     if (input->B_List0_refs)
+     {
+       img->num_ref_idx_l0_active = min(img->num_ref_idx_l0_active, input->B_List0_refs * ((img->structure !=0) + 1));
+       listXsize[0] = min(listXsize[0], input->B_List0_refs * ((img->structure !=0) + 1));  
+     }
+     if (input->B_List1_refs)
+     {
+       
+       img->num_ref_idx_l1_active = min(img->num_ref_idx_l1_active, input->B_List1_refs * ((img->structure !=0) + 1));
+       listXsize[1] = min(listXsize[1], input->B_List1_refs * ((img->structure !=0) + 1));  
+     }
+   }
+ 
+   //Perform memory management based on poc distances for PyramidCoding
+   if (img->nal_reference_idc  && input->PyramidCoding && input->PocMemoryManagement && dpb.ref_frames_in_buffer==active_sps->num_ref_frames)
+   {    
+     poc_based_ref_management(img->frame_num);
+   }
+ 
+   if (input->EnableOpenGOP)
+   {
+     for (i = 0; i<listXsize[0]; i++)
+     {    
+       if (listX[0][i]->poc < img->last_valid_reference && img->ThisPOC > img->last_valid_reference)      
+       {
+         listXsize[0] = img->num_ref_idx_l0_active = max(1,i);
+         break;
+       }
+     }
+     
+     for (i = 0; i<listXsize[1]; i++)
+     {
+       if (listX[1][i]->poc < img->last_valid_reference && img->ThisPOC > img->last_valid_reference)
+       {
+         listXsize[1] = img->num_ref_idx_l1_active = max(1,i);
+         break;
+       }
+     }
+   }
+ 
+   init_ref_pic_list_reordering();
+ 
+   //Perform reordering based on poc distances for PyramidCoding
+   if (img->type==P_SLICE && input->PyramidCoding && input->PyramidRefReorder)
+   {
+     
+     int i, num_ref;
+ 
+     alloc_ref_pic_list_reordering_buffer(currSlice);
+     
+     if ((img->type != I_SLICE) && (img->type !=SI_SLICE))
+     {
+       for (i=0; i<img->num_ref_idx_l0_active + 1; i++)
+       {
+         currSlice->reordering_of_pic_nums_idc_l0[i] = 3;
+         currSlice->abs_diff_pic_num_minus1_l0[i] = 0;
+         currSlice->long_term_pic_idx_l0[i] = 0;
+       }
+       
+       if (img->type == B_SLICE)
+       {
+         for (i=0; i<img->num_ref_idx_l1_active + 1; i++)
+         {
+           currSlice->reordering_of_pic_nums_idc_l1[i] = 3;
+           currSlice->abs_diff_pic_num_minus1_l1[i] = 0;
+           currSlice->long_term_pic_idx_l1[i] = 0;
+         }
+       }
+     }
+     
+     if ((img->type != I_SLICE) && (img->type !=SI_SLICE))
+     {
+       num_ref = img->num_ref_idx_l0_active;
+       poc_ref_pic_reorder(listX[LIST_0], 
+                           num_ref, 
+                           currSlice->reordering_of_pic_nums_idc_l0, 
+                           currSlice->abs_diff_pic_num_minus1_l0, 
+                           currSlice->long_term_pic_idx_l0, 0, LIST_0);
+       
+       //reference picture reordering
+       reorder_ref_pic_list(listX[LIST_0], &listXsize[LIST_0], 
+                            img->num_ref_idx_l0_active - 1, 
+                            currSlice->reordering_of_pic_nums_idc_l0, 
+                            currSlice->abs_diff_pic_num_minus1_l0, 
+                            currSlice->long_term_pic_idx_l0);
+       
+ 		// This is not necessary since order is already poc based...  
+       if (img->type == B_SLICE)
+       {
+         num_ref = img->num_ref_idx_l1_active;
+         poc_ref_pic_reorder(listX[LIST_1], 
+                             num_ref, 
+                             currSlice->reordering_of_pic_nums_idc_l1, 
+                             currSlice->abs_diff_pic_num_minus1_l1, 
+                             currSlice->long_term_pic_idx_l1, 0, LIST_1);
+         
+         //reference picture reordering
+         reorder_ref_pic_list(listX[LIST_1], &listXsize[LIST_1], 
+                              img->num_ref_idx_l1_active - 1, 
+                              currSlice->reordering_of_pic_nums_idc_l1, 
+                              currSlice->abs_diff_pic_num_minus1_l1, 
+                              currSlice->long_term_pic_idx_l1);
+       }
+     }
+   }
+ 
+ 
+   //if (img->MbaffFrameFlag)
+   if (img->structure==FRAME)
+     init_mbaff_lists();
+ 
+   if (img->type != I_SLICE && (active_pps->weighted_pred_flag == 1 || (active_pps->weighted_bipred_idc > 0 && (img->type == B_SLICE))))
+   {
+   	if (img->type==P_SLICE || img->type==SP_SLICE)
+     {
+       if (input->GenerateMultiplePPS && input->RDPictureDecision)
+       {
+         if (enc_picture==enc_frame_picture2)
+           estimate_weighting_factor_P_slice (0);
+         else
+           estimate_weighting_factor_P_slice (1);
+       }
+       else
+         estimate_weighting_factor_P_slice (0);
+       
+     }
+     else
+        estimate_weighting_factor_B_slice ();
+   }
+ 
+   set_ref_pic_num();
+ 
+   if (img->type == B_SLICE)
+     compute_colocated(Co_located, listX);
+   if (img->type != I_SLICE && input->FMEnable == 3)
+     EPZSSliceInit(EPZSCo_located, listX);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Allocates a slice structure along with its dependent data structures
+  * \return
+  *    Pointer to a Slice
+  ************************************************************************
+  */
+ static Slice *malloc_slice()
+ {
+   int i;
+   DataPartition *dataPart;
+   Slice *slice;
+ 
+ //  const int buffer_size = (img->width * img->height * 4); // AH 190202: There can be data expansion with 
+                                                           // low QP values. So, we make sure that buffer 
+                                                           // does not overflow. 4 is probably safe multiplier.
+   const int buffer_size = 500 + img->FrameSizeInMbs * (128 + 256 * img->bitdepth_luma + 512 * img->bitdepth_chroma);
+                                                           // KS: this is approx. max. allowed code picture size
+ 
+   if ((slice = (Slice *) calloc(1, sizeof(Slice))) == NULL) no_mem_exit ("malloc_slice: slice structure");
+ 
+   if (input->symbol_mode == CABAC)
+     {
+       // create all context models
+       slice->mot_ctx = create_contexts_MotionInfo();
+       slice->tex_ctx = create_contexts_TextureInfo();
+     }
+ 
+   slice->max_part_nr = input->partition_mode==0?1:3;
+ 
+   //for IDR img there should be only one partition
+   if(img->currentPicture->idr_flag)
+     slice->max_part_nr = 1;
+   
+   assignSE2partition[0] = assignSE2partition_NoDP;
+   //ZL 
+   //for IDR img all the syntax element should be mapped to one partition        
+   if(!img->currentPicture->idr_flag&&input->partition_mode==1)
+     assignSE2partition[1] =  assignSE2partition_DP;
+   else
+     assignSE2partition[1] =  assignSE2partition_NoDP;        
+ 
+ 
+ 
+   slice->num_mb = 0;          // no coded MBs so far
+ 
+   if ((slice->partArr = (DataPartition *) calloc(slice->max_part_nr, sizeof(DataPartition))) == NULL) no_mem_exit ("malloc_slice: partArr");
+   for (i=0; i<slice->max_part_nr; i++) // loop over all data partitions
+   {
+     dataPart = &(slice->partArr[i]);
+     if ((dataPart->bitstream = (Bitstream *) calloc(1, sizeof(Bitstream))) == NULL) no_mem_exit ("malloc_slice: Bitstream");
+     if ((dataPart->bitstream->streamBuffer = (byte *) calloc(buffer_size, sizeof(byte))) == NULL) no_mem_exit ("malloc_slice: StreamBuffer");
+     // Initialize storage of bitstream parameters
+   }
+   return slice;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Memory frees of all Slice structures and of its dependent
+  *    data structures
+  * \par Input:
+  *    Image Parameters struct struct img_par *img
+  ************************************************************************
+  */
+ void free_slice_list(Picture *currPic)
+ {
+   int i;
+ 
+   for (i=0; i<currPic->no_slices; i++)
+   {
+     free_slice (currPic->slices[i]);
+     currPic->slices[i]=NULL;
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Memory frees of the Slice structure and of its dependent
+  *    data structures
+  * \param slice:
+  *    Slice to be freed
+  ************************************************************************
+  */
+ static void free_slice(Slice *slice)
+ {
+   int i;
+   DataPartition *dataPart;
+ 
+   if (slice != NULL)
+   {
+     for (i=0; i<slice->max_part_nr; i++) // loop over all data partitions
+     {
+       dataPart = &(slice->partArr[i]);
+       if (dataPart != NULL)
+       {
+         if (dataPart->bitstream->streamBuffer != NULL)
+           free(dataPart->bitstream->streamBuffer);
+         if (dataPart->bitstream != NULL)
+           free(dataPart->bitstream);
+       }
+     }
+     if (slice->partArr != NULL)
+       free(slice->partArr);
+     if (input->symbol_mode == CABAC)
+     {
+       delete_contexts_MotionInfo(slice->mot_ctx);
+       delete_contexts_TextureInfo(slice->tex_ctx);
+     }
+ 
+     free(slice);
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    This function set the value of a bit in a bitstream to 1
+  ************************************************************************
+  */
+ void modify_redundant_pic_cnt(unsigned char *buffer)
+ {
+   unsigned char tmp = 1 << (rpc_bits_to_go-1);
+   buffer[rpc_bytes_to_go] |= tmp;
+ }
+ 
+ void set_ref_pic_num()
+ {
+   int i,j;
+   StorablePicture *this_ref;
+   
+   //! need to add field ref_pic_num that handles field pair.
+   
+   for (i=0;i<listXsize[LIST_0];i++)
+   {
+     this_ref = listX[LIST_0][i];
+     enc_picture->ref_pic_num        [LIST_0][i] = this_ref->poc * 2 + ((this_ref->structure==BOTTOM_FIELD)?1:0) ; 
+     enc_picture->frm_ref_pic_num    [LIST_0][i] = this_ref->frame_poc * 2; 
+     enc_picture->top_ref_pic_num    [LIST_0][i] = this_ref->top_poc * 2; 
+     enc_picture->bottom_ref_pic_num [LIST_0][i] = this_ref->bottom_poc * 2 + 1; 
+   }
+   
+   for (i=0;i<listXsize[LIST_1];i++)
+   {
+     this_ref = listX[LIST_1][i];
+     enc_picture->ref_pic_num        [LIST_1][i] = this_ref->poc  *2 + ((this_ref->structure==BOTTOM_FIELD)?1:0);
+     enc_picture->frm_ref_pic_num    [LIST_1][i] = this_ref->frame_poc * 2; 
+     enc_picture->top_ref_pic_num    [LIST_1][i] = this_ref->top_poc * 2; 
+     enc_picture->bottom_ref_pic_num [LIST_1][i] = this_ref->bottom_poc * 2 + 1; 
+   }
+   
+   if (!active_sps->frame_mbs_only_flag && img->structure==FRAME)
+   {
+     for (j=2;j<6;j++)
+       for (i=0;i<listXsize[j];i++)
+       {    
+         this_ref = listX[j][i];
+         enc_picture->ref_pic_num[j][i] = this_ref->poc * 2 + ((this_ref->structure==BOTTOM_FIELD)?1:0);
+         enc_picture->frm_ref_pic_num[j][i] = this_ref->frame_poc * 2 ;
+         enc_picture->top_ref_pic_num[j][i] = this_ref->top_poc * 2 ;
+         enc_picture->bottom_ref_pic_num[j][i] = this_ref->bottom_poc * 2 + 1;
+       }
+   }
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    decide reference picture reordering, Frame only
+ ************************************************************************
+ */
+ void poc_ref_pic_reorder(StorablePicture **list, unsigned num_ref_idx_lX_active, int *reordering_of_pic_nums_idc, int *abs_diff_pic_num_minus1, int *long_term_pic_idx, int weighted_prediction, int list_no)
+ {
+   unsigned i,j,k;
+   
+   int currPicNum, picNumLXPred;
+ 
+   int default_order[32];
+   int re_order[32];
+   int tmp_reorder[32];
+   int list_sign[32];
+   int reorder_stop, no_reorder;
+   int poc_diff[32];
+   int tmp_value, diff;  
+ 
+   int abs_poc_dist;
+   int maxPicNum, MaxFrameNum = 1 << (log2_max_frame_num_minus4 + 4);
+   
+   if (img->structure==FRAME)
+   {
+     maxPicNum  = MaxFrameNum;
+     currPicNum = img->frame_num;
+   }
+   else
+   {
+     maxPicNum  = 2 * MaxFrameNum;
+     currPicNum = 2 * img->frame_num + 1;
+   }
+   
+   picNumLXPred = currPicNum;
+   
+   // First assign default list order. 
+   for (i=0; i<num_ref_idx_lX_active; i++)
+   {
+     default_order[i] = list[i]->pic_num;
+   }
+   
+   // Now access all references in buffer and assign them
+   // to a pottential reordering list. For each one of these 
+   // references compute the poc distance compared to current
+   // frame.
+   for (i=0; i<dpb.ref_frames_in_buffer; i++)
+   {
+     re_order[i] = dpb.fs_ref[i]->frame->pic_num;
+   
+     if (dpb.fs_ref[i]->is_used==3 && (dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term))
+     {
+       abs_poc_dist = abs(dpb.fs_ref[i]->frame->poc - enc_picture->poc) ;
+       poc_diff[i] = abs_poc_dist;
+       if (list_no == LIST_0)
+       {
+         list_sign[i] = (enc_picture->poc < dpb.fs_ref[i]->frame->poc) ? +1 : -1;
+       }
+       else
+       {
+         list_sign[i] = (enc_picture->poc > dpb.fs_ref[i]->frame->poc) ? +1 : -1;
+       }
+     }
+   }
+   
+   
+   // now sort these references based on poc (temporal) distance
+   for (i=0; i< dpb.ref_frames_in_buffer-1; i++)
+   {
+     for (j=i+1; j< dpb.ref_frames_in_buffer; j++)
+     {      
+       if (poc_diff[i]>poc_diff[j] || (poc_diff[i] == poc_diff[j] && list_sign[j] > list_sign[i]))
+       {
+         
+         tmp_value = poc_diff[i];
+         poc_diff[i] = poc_diff[j];
+         poc_diff[j] = tmp_value;
+         tmp_value  = re_order[i];
+         re_order[i] = re_order[j];
+         re_order[j] = tmp_value ;
+         tmp_value  = list_sign[i];
+         list_sign[i] = list_sign[j];        
+         list_sign[j] = tmp_value ;
+       }
+     }
+   }
+   
+   // Check versus default list to see if any
+   // change has happened
+   no_reorder = 1;
+   for (i=0; i<num_ref_idx_lX_active; i++)
+   {
+     if (default_order[i] != re_order[i])
+     {
+       no_reorder = 0;
+     }
+   }
+   
+   // If different, then signal reordering
+   if (no_reorder==0)
+   {
+     for (i=0; i<num_ref_idx_lX_active; i++)
+     {
+       diff = re_order[i]-picNumLXPred;
+       if (diff <= 0)
+       {
+         reordering_of_pic_nums_idc[i] = 0;
+         abs_diff_pic_num_minus1[i] = abs(diff)-1;
+         if (abs_diff_pic_num_minus1[i] < 0)
+           abs_diff_pic_num_minus1[i] = maxPicNum -1; 
+       }
+       else
+       {
+         reordering_of_pic_nums_idc[i] = 1;
+         abs_diff_pic_num_minus1[i] = abs(diff)-1;
+       }
+       picNumLXPred = re_order[i];
+       
+       tmp_reorder[i] = re_order[i];
+       
+       k = i;
+       for (j=i; j<num_ref_idx_lX_active; j++)
+       {
+         if (default_order[j] != re_order[i])
+         {
+           ++k;
+           tmp_reorder[k] = default_order[j];
+         }
+       }
+       reorder_stop = 1;
+       for(j=i+1; j<num_ref_idx_lX_active; j++)
+       {
+         if (tmp_reorder[j] != re_order[j])
+         {
+           reorder_stop = 0;
+           break;
+         }
+       }
+       
+       if (reorder_stop==1)
+       {
+         ++i;
+         break;
+       }
+       
+       
+       for(j=0; j<num_ref_idx_lX_active; j++)
+       {
+         default_order[j] = tmp_reorder[j];
+       }
+             
+     }
+     reordering_of_pic_nums_idc[i] = 3;
+     
+     for(j=0; j<num_ref_idx_lX_active; j++)
+     {
+       default_order[j] = tmp_reorder[j];
+     }
+     
+     if (list_no==0)
+     {
+       img->currentSlice->ref_pic_list_reordering_flag_l0=1;
+     }
+     else
+     {
+       img->currentSlice->ref_pic_list_reordering_flag_l1=1;
+     }
+   }
+ }
+ 
+ extern int QP2QUANT[40];
+ 
+ void SetLagrangianMultipliers()
+ {
+   int qp, j;
+   double qp_temp;
+   double lambda_scale = 1.0 - Clip3(0.0,0.5,0.05 * (double) input->jumpd);;
+   
+   if (input->rdopt) // RDOPT on computation of Lagrangian multipliers
+   {
+     for (j = 0; j < 5; j++)
+     {
+       for (qp = 0; qp < 52; qp++)
+       {          
+         qp_temp = max(0.0,(double)qp - SHIFT_QP);
+ 
+         if (input->UseExplicitLambdaParams) // consideration of explicit weights.
+         {
+           img->lambda_md[j][qp] = input->LambdaWeight[j] * pow (2, img->bitdepth_lambda_scale + qp_temp/3.0);
+           // Scale lambda due to hadamard qpel only consideration
+           img->lambda_md[j][qp] = (input->hadamard == 2 ? 0.95 : 1.00) * img->lambda_md[j][qp];
+           img->lambda_me[j][qp] = sqrt(img->lambda_md[j][qp]);
+           img->lambda_mf[j][qp] = LAMBDA_FACTOR (img->lambda_me[j][qp]);
+           if (j == B_SLICE)
+           {
+             img->lambda_md[5][qp] = input->LambdaWeight[5] * pow (2, img->bitdepth_lambda_scale + qp_temp/3.0);
+             img->lambda_md[5][qp] = (input->hadamard == 2 ? 0.95 : 1.00) * img->lambda_md[5][qp];
+             img->lambda_me[5][qp] = sqrt(img->lambda_md[5][qp]);
+             img->lambda_mf[5][qp] = LAMBDA_FACTOR (img->lambda_me[5][qp]);
+           }
+         }
+         else
+         {                          
+           if (input->successive_Bframe>0)
+             img->lambda_md[j][qp] = 0.68 * pow (2, img->bitdepth_lambda_scale + qp_temp/3.0) 
+             * (j == B_SLICE ? Clip3(2.00,4.00,(qp_temp / 6.0)) : (j == SP_SLICE) ? Clip3(1.4,3.0,(qp_temp / 12.0)) : 1.0);
+           else
+             img->lambda_md[j][qp] = 0.85 * pow (2, img->bitdepth_lambda_scale + qp_temp/3.0) 
+             * ( (j == B_SLICE) ? 4.0 : (j == SP_SLICE) ? Clip3(1.4,3.0,(qp_temp / 12.0)) : 1.0);
+           // Scale lambda due to hadamard qpel only consideration
+           img->lambda_md[j][qp] = (input->hadamard == 2 ? 0.95 : 1.00) * img->lambda_md[j][qp];
+           img->lambda_md[j][qp] = (j == B_SLICE && input->BRefPictures == 2 && img->b_frame_to_code == 0 ? 0.50 : 1.00) * img->lambda_md[j][qp];
+           
+           if (j == B_SLICE)
+           {
+             img->lambda_md[5][qp] = img->lambda_md[j][qp];
+ 
+             if (input->PyramidCoding == 2)
+               img->lambda_md[5][qp] *= (1.0 - min(0.4,0.2 * (double) gop_structure[img->b_frame_to_code-1].pyramid_layer)) ;
+             else
+               img->lambda_md[5][qp] *= 0.80;
+             img->lambda_md[5][qp] *= lambda_scale;
+             img->lambda_me[5][qp] = sqrt(img->lambda_md[5][qp]);
+             img->lambda_mf[5][qp] = LAMBDA_FACTOR (img->lambda_me[5][qp]);
+           }
+           else
+             img->lambda_md[j][qp] *= lambda_scale;
+ 
+           img->lambda_me[j][qp] = sqrt(img->lambda_md[j][qp]);  
+           img->lambda_mf[j][qp] = LAMBDA_FACTOR (img->lambda_me[j][qp]);
+         }
+       }
+     }
+   }
+   else // RDOPT off computation of Lagrangian multipliers
+   {
+     for (j = 0; j < 6; j++)
+     {
+       for (qp = 0; qp < 52; qp++)
+       {
+         img->lambda_md[j][qp] = img->lambda_me[j][qp] = QP2QUANT[max(0,qp-SHIFT_QP)];
+         img->lambda_mf[j][qp] = LAMBDA_FACTOR (img->lambda_me[j][qp]);
+       }
+     }
+   }
+ }
+  


Index: llvm-test/MultiSource/Applications/JM/lencod/transform8x8.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/transform8x8.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/transform8x8.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,1801 ----
+ /*!
+  ***************************************************************************
+  * \file transform8x8.c
+  *
+  * \brief
+  *    8x8 transform functions
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details) 
+  *    - Yuri Vatis                      <vatis at hhi.de>
+  *    - Jan Muenster                    <muenster at hhi.de>
+  *    - Lowell Winger                   <lwinger at lsil.com>
+  * \date
+  *    12. October 2003
+  **************************************************************************
+  */
+ 
+ #include <stdlib.h>
+ #include <string.h>
+ #include <math.h>
+ #include <limits.h>
+ 
+ #include "global.h"
+ 
+ #include "image.h"
+ #include "mb_access.h"
+ #include "elements.h"
+ #include "cabac.h"
+ #include "vlc.h"
+ 
+ #include "transform8x8.h"
+ 
+ int   cofAC8x8_chroma[2][4][2][18];
+ 
+ 
+ #define max(a, b) (((a) > (b)) ? (a) : (b))
+ #define min(a, b) (((a) < (b)) ? (a) : (b))
+ 
+ 
+ const int quant_coef8[6][8][8] = 
+ {
+   { 
+     {13107, 12222,  16777,  12222,  13107,  12222,  16777,  12222},
+     {12222, 11428,  15481,  11428,  12222,  11428,  15481,  11428},
+     {16777, 15481,  20972,  15481,  16777,  15481,  20972,  15481},
+     {12222, 11428,  15481,  11428,  12222,  11428,  15481,  11428},
+     {13107, 12222,  16777,  12222,  13107,  12222,  16777,  12222},
+     {12222, 11428,  15481,  11428,  12222,  11428,  15481,  11428},
+     {16777, 15481,  20972,  15481,  16777,  15481,  20972,  15481},
+     {12222, 11428,  15481,  11428,  12222,  11428,  15481,  11428}
+   },
+   {
+     {11916, 11058,  14980,  11058,  11916,  11058,  14980,  11058},
+     {11058, 10826,  14290,  10826,  11058,  10826,  14290,  10826},
+     {14980, 14290,  19174,  14290,  14980,  14290,  19174,  14290},
+     {11058, 10826,  14290,  10826,  11058,  10826,  14290,  10826},
+     {11916, 11058,  14980,  11058,  11916,  11058,  14980,  11058},
+     {11058, 10826,  14290,  10826,  11058,  10826,  14290,  10826},
+     {14980, 14290,  19174,  14290,  14980,  14290,  19174,  14290},
+     {11058, 10826,  14290,  10826,  11058,  10826,  14290,  10826}
+   },
+   {
+     {10082, 9675,   12710,  9675,   10082,  9675, 12710,  9675},
+     {9675,  8943,   11985,  8943,   9675,   8943, 11985,  8943},
+     {12710, 11985,  15978,  11985,  12710,  11985,  15978,  11985},
+     {9675,  8943,   11985,  8943,   9675,   8943, 11985,  8943},
+     {10082, 9675,   12710,  9675,   10082,  9675, 12710,  9675},
+     {9675,  8943,   11985,  8943,   9675, 8943, 11985,  8943},
+     {12710, 11985,  15978,  11985,  12710,  11985,  15978,  11985},
+     {9675,  8943,   11985,  8943,   9675, 8943, 11985,  8943}
+   },
+   {
+     {9362,  8931, 11984,  8931, 9362, 8931, 11984,  8931},
+     {8931,  8228, 11259,  8228, 8931, 8228, 11259,  8228},
+     {11984, 11259,  14913,  11259,  11984,  11259,  14913,  11259},
+     {8931,  8228, 11259,  8228, 8931, 8228, 11259,  8228},
+     {9362,  8931, 11984,  8931, 9362, 8931, 11984,  8931},
+     {8931,  8228, 11259,  8228, 8931, 8228, 11259,  8228},
+     {11984, 11259,  14913,  11259,  11984,  11259,  14913,  11259},
+     {8931,  8228, 11259,  8228, 8931, 8228, 11259,  8228}
+   },
+   {
+     {8192,  7740, 10486,  7740, 8192, 7740, 10486,  7740},
+     {7740,  7346, 9777, 7346, 7740, 7346, 9777, 7346},
+     {10486, 9777, 13159,  9777, 10486,  9777, 13159,  9777},
+     {7740,  7346, 9777, 7346, 7740, 7346, 9777, 7346},
+     {8192,  7740, 10486,  7740, 8192, 7740, 10486,  7740},
+     {7740,  7346, 9777, 7346, 7740, 7346, 9777, 7346},
+     {10486, 9777, 13159,  9777, 10486,  9777, 13159,  9777},
+     {7740,  7346, 9777, 7346, 7740, 7346, 9777, 7346}
+   },
+   {
+     {7282,  6830, 9118, 6830, 7282, 6830, 9118, 6830},
+     {6830,  6428, 8640, 6428, 6830, 6428, 8640, 6428},
+     {9118,  8640, 11570,  8640, 9118, 8640, 11570,  8640},
+     {6830,  6428, 8640, 6428, 6830, 6428, 8640, 6428},
+     {7282,  6830, 9118, 6830, 7282, 6830, 9118, 6830},
+     {6830,  6428, 8640, 6428, 6830, 6428, 8640, 6428},
+     {9118,  8640, 11570,  8640, 9118, 8640, 11570,  8640},
+     {6830,  6428, 8640, 6428, 6830, 6428, 8640, 6428}
+   }
+ };
+ 
+ 
+ const int dequant_coef8[6][8][8] = 
+ {
+   {
+     {20,  19, 25, 19, 20, 19, 25, 19},
+     {19,  18, 24, 18, 19, 18, 24, 18},
+     {25,  24, 32, 24, 25, 24, 32, 24},
+     {19,  18, 24, 18, 19, 18, 24, 18},
+     {20,  19, 25, 19, 20, 19, 25, 19},
+     {19,  18, 24, 18, 19, 18, 24, 18},
+     {25,  24, 32, 24, 25, 24, 32, 24},
+     {19,  18, 24, 18, 19, 18, 24, 18}
+   },
+   {
+     {22,  21, 28, 21, 22, 21, 28, 21},
+     {21,  19, 26, 19, 21, 19, 26, 19},
+     {28,  26, 35, 26, 28, 26, 35, 26},
+     {21,  19, 26, 19, 21, 19, 26, 19},
+     {22,  21, 28, 21, 22, 21, 28, 21},
+     {21,  19, 26, 19, 21, 19, 26, 19},
+     {28,  26, 35, 26, 28, 26, 35, 26},
+     {21,  19, 26, 19, 21, 19, 26, 19}
+   },
+   {
+     {26,  24, 33, 24, 26, 24, 33, 24},
+     {24,  23, 31, 23, 24, 23, 31, 23},
+     {33,  31, 42, 31, 33, 31, 42, 31},
+     {24,  23, 31, 23, 24, 23, 31, 23},
+     {26,  24, 33, 24, 26, 24, 33, 24},
+     {24,  23, 31, 23, 24, 23, 31, 23},
+     {33,  31, 42, 31, 33, 31, 42, 31},
+     {24,  23, 31, 23, 24, 23, 31, 23}
+   },
+   {
+     {28,  26, 35, 26, 28, 26, 35, 26},
+     {26,  25, 33, 25, 26, 25, 33, 25},
+     {35,  33, 45, 33, 35, 33, 45, 33},
+     {26,  25, 33, 25, 26, 25, 33, 25},
+     {28,  26, 35, 26, 28, 26, 35, 26},
+     {26,  25, 33, 25, 26, 25, 33, 25},
+     {35,  33, 45, 33, 35, 33, 45, 33},
+     {26,  25, 33, 25, 26, 25, 33, 25}
+   },
+   {
+     {32,  30, 40, 30, 32, 30, 40, 30},
+     {30,  28, 38, 28, 30, 28, 38, 28},
+     {40,  38, 51, 38, 40, 38, 51, 38},
+     {30,  28, 38, 28, 30, 28, 38, 28},
+     {32,  30, 40, 30, 32, 30, 40, 30},
+     {30,  28, 38, 28, 30, 28, 38, 28},
+     {40,  38, 51, 38, 40, 38, 51, 38},
+     {30,  28, 38, 28, 30, 28, 38, 28}
+   },
+   {
+     {36,  34, 46, 34, 36, 34, 46, 34},
+     {34,  32, 43, 32, 34, 32, 43, 32},
+     {46,  43, 58, 43, 46, 43, 58, 43},
+     {34,  32, 43, 32, 34, 32, 43, 32},
+     {36,  34, 46, 34, 36, 34, 46, 34},
+     {34,  32, 43, 32, 34, 32, 43, 32},
+     {46,  43, 58, 43, 46, 43, 58, 43},
+     {34,  32, 43, 32, 34, 32, 43, 32}
+   }
+ 
+ };
+ 
+ 
+ //! single scan pattern
+ const byte SNGL_SCAN8x8[64][2] = {
+   {0,0}, {1,0}, {0,1}, {0,2}, {1,1}, {2,0}, {3,0}, {2,1}, 
+   {1,2}, {0,3}, {0,4}, {1,3}, {2,2}, {3,1}, {4,0}, {5,0},
+   {4,1}, {3,2}, {2,3}, {1,4}, {0,5}, {0,6}, {1,5}, {2,4},
+   {3,3}, {4,2}, {5,1}, {6,0}, {7,0}, {6,1}, {5,2}, {4,3},
+   {3,4}, {2,5}, {1,6}, {0,7}, {1,7}, {2,6}, {3,5}, {4,4},
+   {5,3}, {6,2}, {7,1}, {7,2}, {6,3}, {5,4}, {4,5}, {3,6},
+   {2,7}, {3,7}, {4,6}, {5,5}, {6,4}, {7,3}, {7,4}, {6,5},
+   {5,6}, {4,7}, {5,7}, {6,6}, {7,5}, {7,6}, {6,7}, {7,7}
+ };
+ 
+ 
+ //! field scan pattern
+ const byte FIELD_SCAN8x8[64][2] = {   // 8x8
+   {0,0}, {0,1}, {0,2}, {1,0}, {1,1}, {0,3}, {0,4}, {1,2}, 
+   {2,0}, {1,3}, {0,5}, {0,6}, {0,7}, {1,4}, {2,1}, {3,0}, 
+   {2,2}, {1,5}, {1,6}, {1,7}, {2,3}, {3,1}, {4,0}, {3,2}, 
+   {2,4}, {2,5}, {2,6}, {2,7}, {3,3}, {4,1}, {5,0}, {4,2}, 
+   {3,4}, {3,5}, {3,6}, {3,7}, {4,3}, {5,1}, {6,0}, {5,2}, 
+   {4,4}, {4,5}, {4,6}, {4,7}, {5,3}, {6,1}, {6,2}, {5,4}, 
+   {5,5}, {5,6}, {5,7}, {6,3}, {7,0}, {7,1}, {6,4}, {6,5}, 
+   {6,6}, {6,7}, {7,2}, {7,3}, {7,4}, {7,5}, {7,6}, {7,7}
+ };
+ 
+ 
+ //! array used to find expensive coefficients
+ const byte COEFF_COST8x8[2][64] =
+ {
+   {3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
+   1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
+   {9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9}
+ };
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    8x8 Intra mode decision for a macroblock
+  *************************************************************************************
+  */
+ 
+ int Mode_Decision_for_new_Intra8x8Macroblock (double lambda, int *min_cost)
+ {
+   int  cbp=0, b8, cost8x8;
+ 
+   *min_cost = (int)floor(6.0 * lambda + 0.4999);
+ 
+   for (b8=0; b8<4; b8++)
+   {
+     if (Mode_Decision_for_new_8x8IntraBlocks (b8, lambda, &cost8x8))
+     {
+       cbp |= (1<<b8);
+     }
+     *min_cost += cost8x8;
+   }
+ 
+   return cbp;
+ }
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    8x8 Intra mode decision for a macroblock
+  *************************************************************************************
+  */
+ 
+ int Mode_Decision_for_new_8x8IntraBlocks (int b8, double lambda, int *min_cost)
+ {
+   int     ipmode, best_ipmode = 0, i, j, k, x, y, cost, dummy;
+   int     c_nz, nonzero = 0, diff[64];
+   imgpel  rec8x8[8][8];
+   double  rdcost = 0.0;
+   int     block4x4_x, block4x4_y;
+   int     block_x     = 8*(b8 & 0x01);
+   int     block_y     = 8*(b8 >> 1);
+   int     pic_pix_x   = img->pix_x+block_x;
+   int     pic_pix_y   = img->pix_y+block_y;
+   int     pic_opix_x   = img->opix_x+block_x;
+   int     pic_opix_y   = img->opix_y+block_y;
+   int     pic_block_x = pic_pix_x/4;
+   int     pic_block_y = pic_pix_y/4;
+   double  min_rdcost  = 1e30;
+   imgpel    **imgY_orig  = imgY_org;
+   extern  int ****cofAC8x8; 
+   int fadjust8x8[2][16][16];
+   int left_available, up_available, all_available;
+ 
+   char   upMode;
+   char   leftMode;
+   int     mostProbableMode;  
+ 
+   PixelPos left_block;
+   PixelPos top_block;
+ 
+   // Residue Color Transform
+   int residue_R, residue_G, residue_B;
+   int rate, temp, b4;
+   int64 distortion;
+   Macroblock     *currMB       = &img->mb_data[img->current_mb_nr];
+   int c_ipmode = currMB->c_ipred_mode;
+   int rec8x8_c[2][4][4][4];
+ 
+   getLuma4x4Neighbour(img->current_mb_nr, block_x/4, block_y/4, -1,  0, &left_block);
+   getLuma4x4Neighbour(img->current_mb_nr, block_x/4, block_y/4,  0, -1, &top_block);
+ 
+   if (input->UseConstrainedIntraPred)
+   {
+     top_block.available  = top_block.available ? img->intra_block [top_block.mb_addr] : 0;
+     left_block.available = left_block.available ? img->intra_block [left_block.mb_addr] : 0;
+   }
+ 
+   if(b8 >> 1)
+     upMode    =  top_block.available ? img->ipredmode8x8[top_block.pos_y ][top_block.pos_x ] : -1; 
+   else
+     upMode    =  top_block.available ? img->ipredmode   [top_block.pos_y ][top_block.pos_x ] : -1;
+   if(b8 & 0x01)
+     leftMode  = left_block.available ? img->ipredmode8x8[left_block.pos_y][left_block.pos_x] : -1;
+   else
+     leftMode  = left_block.available ? img->ipredmode[left_block.pos_y][left_block.pos_x] : -1;
+ 
+   mostProbableMode  = (upMode < 0 || leftMode < 0) ? DC_PRED : upMode < leftMode ? upMode : leftMode;
+ 
+   *min_cost = INT_MAX;
+ 
+   //===== INTRA PREDICTION FOR 8x8 BLOCK =====
+   intrapred_luma8x8 (pic_pix_x, pic_pix_y, &left_available, &up_available, &all_available);
+ 
+   //===== LOOP OVER ALL 8x8 INTRA PREDICTION MODES =====
+   for (ipmode=0; ipmode<NO_INTRA_PMODE; ipmode++)
+   {
+     if( (ipmode==DC_PRED) ||
+         ((ipmode==VERT_PRED||ipmode==VERT_LEFT_PRED||ipmode==DIAG_DOWN_LEFT_PRED) && up_available ) ||
+         ((ipmode==HOR_PRED||ipmode==HOR_UP_PRED) && left_available ) ||
+         (all_available) )
+     {
+       if (!input->rdopt)
+       {
+         for (k=j=0; j<8; j++)
+           for (i=0; i<8; i++, k++)
+           {
+             diff[k] = imgY_orig[pic_opix_y+j][pic_opix_x+i] - img->mprr_3[ipmode][j][i];
+           }
+         cost  = (ipmode == mostProbableMode) ? 0 : (int)floor(4 * lambda );
+         cost += SATD8X8 (diff, input->hadamard);
+         if (cost < *min_cost)
+         {
+           best_ipmode = ipmode;
+           *min_cost   = cost;
+         }
+       }
+       else
+       {
+         // Residue Color Transform
+         if(!img->residue_transform_flag)
+         {
+           // get prediction and prediction error
+           for (j=0; j<8; j++)
+           {
+             memcpy(&img->mpr[block_y+j][block_x],img->mprr_3[ipmode][j], 8 * sizeof(imgpel));
+             for (i=0; i<8; i++)
+             {
+               img->m7[j][i] = imgY_orig[pic_opix_y+j][pic_opix_x+i] - img->mprr_3[ipmode][j][i];
+             }
+           }
+           //===== store the coding state =====
+           //store_coding_state_cs_cm();
+           // get and check rate-distortion cost
+           
+           if ((rdcost = RDCost_for_8x8IntraBlocks (&c_nz, b8, ipmode, lambda, min_rdcost, mostProbableMode)) < min_rdcost)
+           {
+             //--- set coefficients ---
+             for(k=0; k<4; k++) // do 4x now
+             {
+               for (j=0; j<2; j++)
+                 memcpy(cofAC8x8[b8][k][j],img->cofAC[b8][k][j], 65 * sizeof(int));
+             }
+             
+             //--- set reconstruction ---
+             for (y=0; y<8; y++)
+             {
+               memcpy(rec8x8[y],&enc_picture->imgY[pic_pix_y+y][pic_pix_x], 8 * sizeof(imgpel));
+             }
+ 
+             if (img->AdaptiveRounding)
+             {
+               for (j=block_y; j<block_y + 8; j++)
+                 memcpy(&fadjust8x8[1][j][block_x],&img->fadjust8x8[1][j][block_x], 8 * sizeof(int));
+             }
+             
+             //--- flag if dct-coefficients must be coded ---
+             nonzero = c_nz;
+             
+             //--- set best mode update minimum cost ---
+             min_rdcost  = rdcost;
+             best_ipmode = ipmode;
+           }
+           reset_coding_state_cs_cm();  
+         }
+         else
+         {
+ 
+           for (j=0; j<8; j++)
+           for (i=0; i<8; i++)
+           {
+             residue_B = imgUV_org[0][pic_opix_y+j][pic_opix_x+i] - img->mprr_c[0][c_ipmode][block_y+j][block_x+i];
+             residue_G = imgY_org[pic_opix_y+j][pic_opix_x+i] - img->mprr_3[ipmode][j][i];
+             residue_R = imgUV_org[1][pic_opix_y+j][pic_opix_x+i] - img->mprr_c[1][c_ipmode][block_y+j][block_x+i];
+ 
+             /* Forward Residue Transform */
+             resTrans_R[j][i] = residue_R-residue_B;
+             temp = residue_B+(resTrans_R[j][i]>>1);
+             resTrans_B[j][i] = residue_G-temp;
+             resTrans_G[j][i] = temp+(resTrans_B[j][i]>>1);
+           }
+ 
+           for (j=0; j<8; j++)
+           for (i=0; i<8; i++)
+           {
+             img->m7[j][i]  = resTrans_G[j][i];
+           }
+ 
+           //store_coding_state_cs_cm();
+           rate = (int) RDCost_for_8x8IntraBlocks (&c_nz, b8, ipmode, lambda, min_rdcost, mostProbableMode);
+           reset_coding_state_cs_cm();
+ 
+           for (j=0; j<8; j++)
+             for (i=0; i<8; i++)
+             {
+               rec_resG[j][i] = img->m7[j][i];            
+             }
+ 
+ 
+           for(b4=0;b4<4;b4++)
+           {
+             
+             block4x4_x = 4*(b4 & 0x01);
+             block4x4_y = 4*(b4 >> 1);
+             
+             for (j=0; j<4; j++)
+               for (i=0; i<4; i++)
+               {
+                 img->m7[j][i]  = resTrans_B[j+block4x4_y][i+block4x4_x];
+               }
+             rate += RDCost_for_4x4Blocks_Chroma (b8+4, b4, 0);
+             for (j=0; j<4; j++)
+               for (i=0; i<4; i++)
+               {
+                 rec_resB[j+block4x4_y][i+block4x4_x] = img->m7[j][i];
+                 img->m7[j][i]  = resTrans_R[j+block4x4_y][i+block4x4_x];
+               }
+             rate += RDCost_for_4x4Blocks_Chroma (b8+8, b4, 1);
+             for (j=0; j<4; j++)
+               for (i=0; i<4; i++)
+               {
+                 rec_resR[j+block4x4_y][i+block4x4_x] = img->m7[j][i];
+               }
+           }
+           reset_coding_state_cs_cm();
+ 
+           for (j=0; j<8; j++)
+           {
+             for (i=0; i<8; i++)
+             {
+               /* Inverse Residue Transform */
+               temp      = rec_resG[j][i]-(rec_resB[j][i]>>1);
+               residue_G = rec_resB[j][i]+temp;
+               residue_B = temp - (rec_resR[j][i]>>1);
+               residue_R = residue_B+rec_resR[j][i];
+               enc_picture->imgUV[0][pic_pix_y+j][pic_pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_B+(int)img->mprr_c[0][c_ipmode][block_y+j][block_x+i]));
+               enc_picture->imgY[pic_pix_y+j][pic_pix_x+i]     = min(img->max_imgpel_value,max(0,residue_G+(int)img->mprr_3[ipmode][j][i]));
+               enc_picture->imgUV[1][pic_pix_y+j][pic_pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_R+(int)img->mprr_c[1][c_ipmode][block_y+j][block_x+i]));
+             }
+           }
+           //===== get distortion (SSD) of 8x8 block =====
+           distortion = 0;
+           for (y=0; y<8; y++)
+             for (x=pic_pix_x; x<pic_pix_x+8; x++)
+             {
+               distortion += img->quad[imgY_org    [pic_pix_y+y][x] - enc_picture->imgY    [pic_pix_y+y][x]];
+               distortion += img->quad[imgUV_org[0][pic_pix_y+y][x] - enc_picture->imgUV[0][pic_pix_y+y][x]];
+               distortion += img->quad[imgUV_org[1][pic_pix_y+y][x] - enc_picture->imgUV[1][pic_pix_y+y][x]];
+             }
+           rdcost = (double)distortion + lambda*(double)rate;
+ 
+           if (rdcost < min_rdcost)
+           {
+             //--- set coefficients ---
+             for (j=0; j<2; j++)
+               for (i=0; i<65;i++)  
+                 for(k=0; k<4; k++) //do 4x now
+                   cofAC8x8[b8][k][j][i]=img->cofAC[b8][k][j][i]; //k vs 0
+ 
+             for(b4=0; b4<4; b4++)
+             {
+               block4x4_x = 4*(b4 & 0x01);
+               block4x4_y = 4*(b4 >> 1);
+ 
+               for (j=0; j<2; j++)
+                 for (i=0; i<18;i++)  cofAC8x8_chroma[0][b4][j][i]=img->cofAC[b8+4][b4][j][i];
+               for (j=0; j<2; j++)
+                 for (i=0; i<18;i++)  cofAC8x8_chroma[1][b4][j][i]=img->cofAC[b8+8][b4][j][i];
+ 
+               for (i=0; i<2; i++)
+               { //uv
+                 dc_level[i][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)] = dc_level_temp[i][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)];
+                 cbp_chroma_block[i][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)] = cbp_chroma_block_temp[i][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)];
+                 //--- set reconstruction ---
+                 for (y=0; y<4; y++)
+                   for (x=0; x<4; x++)  rec8x8_c[i][b4][y][x] = enc_picture->imgUV[i][pic_pix_y+y+block4x4_y][pic_pix_x+x+block4x4_x];
+               }
+             }
+ 
+             //--- set reconstruction ---
+             for (y=0; y<8; y++)
+               for (x=0; x<8; x++)  
+                 rec8x8[y][x] = enc_picture->imgY[pic_pix_y+y][pic_pix_x+x];
+ 
+             //--- flag if dct-coefficients must be coded ---
+             nonzero = c_nz;
+ 
+             //--- set best mode update minimum cost ---
+             min_rdcost  = rdcost;
+             best_ipmode = ipmode;
+           }
+          }
+       }
+     }
+   }
+ 
+   //===== set intra mode prediction =====
+   img->ipredmode8x8[pic_block_y][pic_block_x] = best_ipmode;
+   currMB->intra_pred_modes8x8[4*b8] = (mostProbableMode == best_ipmode) 
+     ? -1 
+     : (best_ipmode < mostProbableMode ? best_ipmode : best_ipmode-1);
+ 
+   for(j = img->mb_y*4+(b8 >> 1)*2; j < img->mb_y*4+(b8 >> 1)*2 + 2; j++)   //loop 4x4s in the subblock for 8x8 prediction setting
+    memset(&img->ipredmode8x8[j][img->mb_x*4+(b8 & 0x01)*2], best_ipmode, 2 * sizeof(char));
+ 
+ 
+   if (!input->rdopt)
+   {
+     // Residue Color Transform
+     if(!img->residue_transform_flag)
+     {
+       // get prediction and prediction error
+       for (j=0; j<8; j++)
+       {
+         memcpy(&img->mpr[block_y+j][block_x],img->mprr_3[best_ipmode][j], 8 * sizeof(imgpel));
+         for (i=0; i<8; i++)
+         {
+           img->m7[j][i] = imgY_orig[pic_opix_y+j][pic_opix_x+i] - img->mprr_3[best_ipmode][j][i];
+         }
+       } 
+       nonzero = dct_luma8x8 (b8, &dummy, 1);
+     }
+     else 
+     {
+       for (j=0; j<8; j++)
+       {
+         for (i=0; i<8; i++)
+         {
+           img->mpr[block_y+j][block_x+i]  = img->mprr_3[best_ipmode][j][i];
+           residue_B = imgUV_org[0][pic_opix_y+j][pic_opix_x+i] - img->mprr_c[0][c_ipmode][block_y+j][block_x+i];
+           residue_G = imgY_org[pic_opix_y+j][pic_opix_x+i] - img->mprr_3[best_ipmode][j][i];
+           residue_R = imgUV_org[1][pic_opix_y+j][pic_opix_x+i] - img->mprr_c[1][c_ipmode][block_y+j][block_x+i];
+           
+           /* Forward Residue Transform */
+           resTrans_R[j][i] = residue_R-residue_B;
+           temp = residue_B+(resTrans_R[j][i]>>1);
+           resTrans_B[j][i] = residue_G-temp;
+           resTrans_G[j][i] = temp+(resTrans_B[j][i]>>1);
+         }
+       } 
+       for (j=0; j<8; j++)
+       {
+         for (i=0; i<8; i++)
+           
+           img->m7[j][i]  = resTrans_G[j][i];
+       }
+       
+       nonzero = dct_luma8x8 (b8, &dummy, 1);
+       
+       for (j=0; j<8; j++)
+       {
+         for (i=0; i<8; i++)
+           rec_resG[j][i] = img->m7[j][i];
+       }
+       
+       for(b4=0;b4<4;b4++)
+       {
+ 
+         block4x4_x = 4*(b4 & 0x01);
+         block4x4_y = 4*(b4 >> 1);
+         
+         for (j=0; j<4; j++)
+         {
+           for (i=0; i<4; i++)                
+             img->m7[j][i]  = resTrans_B[j+block4x4_y][i+block4x4_x];
+         }
+         cbp_chroma_block[0][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)] = dct_chroma4x4 (0, b8+4, b4);
+         dc_level        [0][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)] = dc_level_temp[0][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)];
+         for (j=0; j<4; j++)
+         {
+           for (i=0; i<4; i++)
+           {
+             rec_resB[j+block4x4_y][i+block4x4_x] = img->m7[j][i];
+             img->m7[j][i]  = resTrans_R[j+block4x4_y][i+block4x4_x];
+           }
+         }
+         cbp_chroma_block[1][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)] = dct_chroma4x4 (1, b8+8, b4);
+         dc_level        [1][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)] = dc_level_temp[1][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)];
+         for (j=0; j<4; j++)
+         {
+           for (i=0; i<4; i++)
+             rec_resR[j+block4x4_y][i+block4x4_x] = img->m7[j][i];
+         }
+       }
+       
+       for (j=0; j<8; j++)
+       {
+         for (i=0; i<8; i++)
+         {
+           /* Inverse Residue Transform */
+           temp      = rec_resG[j][i]-(rec_resB[j][i]>>1);
+           residue_G = rec_resB[j][i]+temp;
+           residue_B = temp - (rec_resR[j][i]>>1);
+           residue_R = residue_B+rec_resR[j][i];
+           enc_picture->imgUV[0][pic_pix_y+j][pic_pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_B+(int)img->mprr_c[0][c_ipmode][block_y+j][block_x+i]));
+           enc_picture->imgY[pic_pix_y+j][pic_pix_x+i]     = min(img->max_imgpel_value,max(0,residue_G+(int)img->mprr_3[best_ipmode][j][i]));
+           enc_picture->imgUV[1][pic_pix_y+j][pic_pix_x+i] = min(img->max_imgpel_value_uv,max(0,residue_R+(int)img->mprr_c[1][c_ipmode][block_y+j][block_x+i]));
+         }
+       }
+     }
+   }
+   else
+   {
+     //===== restore coefficients =====
+     for(k=0; k<4; k++) // do 4x now    
+     {
+       for (j=0; j<2; j++)
+         memcpy(img->cofAC[b8][k][j],cofAC8x8[b8][k][j], 65 * sizeof(int));
+     }
+ 
+     if (img->AdaptiveRounding)
+     {
+       for (j=0; j<8; j++)
+         memcpy(&img->fadjust8x8[1][block_y+j][block_x], &fadjust8x8[1][block_y+j][block_x], 8 * sizeof(int));
+     }
+     
+     // Residue Color Transform
+     if(img->residue_transform_flag)
+     for(b4=0; b4<4; b4++){
+       for (j=0; j<2; j++)
+       for (i=0; i<18;i++)  
+         img->cofAC[b8+4][b4][j][i]=cofAC8x8_chroma[0][b4][j][i];
+       for (j=0; j<2; j++)
+       for (i=0; i<18;i++)  
+         img->cofAC[b8+8][b4][j][i]=cofAC8x8_chroma[1][b4][j][i];
+     }
+ 
+     //===== restore reconstruction and prediction (needed if single coeffs are removed) =====
+     for (y=0; y<8; y++) 
+     {
+       memcpy(&enc_picture->imgY[pic_pix_y+y][pic_pix_x], rec8x8[y], 8 * sizeof(imgpel));
+       memcpy(&  img->mpr[block_y+y][block_x], img->mprr_3[best_ipmode][y], 8 * sizeof(imgpel));
+     }
+ 
+     // Residue Color Transform
+       if(img->residue_transform_flag)
+       {
+         for(b4=0; b4<4; b4++)
+         {
+           block4x4_x = 4*(b4 & 0x01);
+           block4x4_y = 4*(b4>>1);
+           for (i=0; i<2; i++)
+           { //uv
+             //--- set reconstruction ---
+             for (y=0; y<4; y++)
+               memcpy(&enc_picture->imgUV[i][pic_pix_y + block4x4_y + y][pic_pix_x + block4x4_x], rec8x8_c[i][b4][y], 4 * sizeof(imgpel));
+           }
+         }
+       }
+   }
+ 
+   return nonzero;
+ }
+ 
+ 
+ 
+ // Notation for comments regarding prediction and predictors.
+ // The pels of the 4x4 block are labelled a..p. The predictor pels above
+ // are labelled A..H, from the left I..P, and from above left X, as follows:
+ //
+ //  Z  A  B  C  D  E  F  G  H  I  J  K  L  M   N  O  P  
+ //  Q  a1 b1 c1 d1 e1 f1 g1 h1
+ //  R  a2 b2 c2 d2 e2 f2 g2 h2
+ //  S  a3 b3 c3 d3 e3 f3 g3 h3
+ //  T  a4 b4 c4 d4 e4 f4 g4 h4
+ //  U  a5 b5 c5 d5 e5 f5 g5 h5
+ //  V  a6 b6 c6 d6 e6 f6 g6 h6
+ //  W  a7 b7 c7 d7 e7 f7 g7 h7
+ //  X  a8 b8 c8 d8 e8 f8 g8 h8
+ 
+ 
+ // Predictor array index definitions
+ #define P_Z (PredPel[0])
+ #define P_A (PredPel[1])
+ #define P_B (PredPel[2])
+ #define P_C (PredPel[3])
+ #define P_D (PredPel[4])
+ #define P_E (PredPel[5])
+ #define P_F (PredPel[6])
+ #define P_G (PredPel[7])
+ #define P_H (PredPel[8])
+ #define P_I (PredPel[9])
+ #define P_J (PredPel[10])
+ #define P_K (PredPel[11])
+ #define P_L (PredPel[12])
+ #define P_M (PredPel[13])
+ #define P_N (PredPel[14])
+ #define P_O (PredPel[15])
+ #define P_P (PredPel[16])
+ #define P_Q (PredPel[17])
+ #define P_R (PredPel[18])
+ #define P_S (PredPel[19])
+ #define P_T (PredPel[20])
+ #define P_U (PredPel[21])
+ #define P_V (PredPel[22])
+ #define P_W (PredPel[23])
+ #define P_X (PredPel[24])
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Make intra 8x8 prediction according to all 9 prediction modes.
+  *    The routine uses left and upper neighbouring points from
+  *    previous coded blocks to do this (if available). Notice that
+  *    inaccessible neighbouring points are signalled with a negative
+  *    value in the predmode array .
+  *
+  *  \par Input:
+  *     Starting point of current 8x8 block image posision
+  *
+  *  \par Output:
+  *      none
+  ************************************************************************
+  */
+ void intrapred_luma8x8(int img_x,int img_y, int *left_available, int *up_available, int *all_available)
+ {
+   int i,j;
+   int s0;
+   int PredPel[25];  // array of predictor pels
+   imgpel **imgY = enc_picture->imgY;  // For MB level frame/field coding tools -- set default to imgY
+ 
+   int ioff = (img_x & 15);
+   int joff = (img_y & 15);
+   int mb_nr=img->current_mb_nr;
+ 
+   PixelPos pix_a[8];
+   PixelPos pix_b, pix_c, pix_d;
+ 
+   int block_available_up;
+   int block_available_left;
+   int block_available_up_left;
+   int block_available_up_right;
+ 
+   for (i=0;i<8;i++)
+   {
+     getNeighbour(mb_nr, ioff -1 , joff +i , 1, &pix_a[i]);
+   }
+ 
+   getNeighbour(mb_nr, ioff    , joff -1 , 1, &pix_b);
+   getNeighbour(mb_nr, ioff +8 , joff -1 , 1, &pix_c);
+   getNeighbour(mb_nr, ioff -1 , joff -1 , 1, &pix_d);
+   
+   pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8);
+ 
+   if (input->UseConstrainedIntraPred)
+   {
+     for (i=0, block_available_left=1; i<8;i++)
+       block_available_left  &= pix_a[i].available ? img->intra_block[pix_a[i].mb_addr]: 0;
+     block_available_up       = pix_b.available ? img->intra_block [pix_b.mb_addr] : 0;
+     block_available_up_right = pix_c.available ? img->intra_block [pix_c.mb_addr] : 0;
+     block_available_up_left  = pix_d.available ? img->intra_block [pix_d.mb_addr] : 0;
+   }
+   else
+   {
+     block_available_left     = pix_a[0].available;
+     block_available_up       = pix_b.available;
+     block_available_up_right = pix_c.available;
+     block_available_up_left  = pix_d.available;
+   }
+ 
+   *left_available = block_available_left;
+   *up_available   = block_available_up;
+   *all_available  = block_available_up && block_available_left && block_available_up_left;
+ 
+   i = (img_x & 15);
+   j = (img_y & 15);
+ 
+   // form predictor pels
+   // form predictor pels
+   if (block_available_up)
+   {
+     P_A = imgY[pix_b.pos_y][pix_b.pos_x+0];
+     P_B = imgY[pix_b.pos_y][pix_b.pos_x+1];
+     P_C = imgY[pix_b.pos_y][pix_b.pos_x+2];
+     P_D = imgY[pix_b.pos_y][pix_b.pos_x+3];
+     P_E = imgY[pix_b.pos_y][pix_b.pos_x+4];
+     P_F = imgY[pix_b.pos_y][pix_b.pos_x+5];
+     P_G = imgY[pix_b.pos_y][pix_b.pos_x+6];
+     P_H = imgY[pix_b.pos_y][pix_b.pos_x+7];
+   }
+   else
+   {
+     P_A = P_B = P_C = P_D = P_E = P_F = P_G = P_H = img->dc_pred_value;
+   }
+ 
+   if (block_available_up_right)
+   {
+     P_I = imgY[pix_c.pos_y][pix_c.pos_x+0];
+     P_J = imgY[pix_c.pos_y][pix_c.pos_x+1];
+     P_K = imgY[pix_c.pos_y][pix_c.pos_x+2];
+     P_L = imgY[pix_c.pos_y][pix_c.pos_x+3];
+     P_M = imgY[pix_c.pos_y][pix_c.pos_x+4];
+     P_N = imgY[pix_c.pos_y][pix_c.pos_x+5];
+     P_O = imgY[pix_c.pos_y][pix_c.pos_x+6];
+     P_P = imgY[pix_c.pos_y][pix_c.pos_x+7];
+ 
+   }
+   else
+   {
+     P_I = P_J = P_K = P_L = P_M = P_N = P_O = P_P = P_H;
+   }
+ 
+   if (block_available_left)
+   {
+     P_Q = imgY[pix_a[0].pos_y][pix_a[0].pos_x];
+     P_R = imgY[pix_a[1].pos_y][pix_a[1].pos_x];
+     P_S = imgY[pix_a[2].pos_y][pix_a[2].pos_x];
+     P_T = imgY[pix_a[3].pos_y][pix_a[3].pos_x];
+     P_U = imgY[pix_a[4].pos_y][pix_a[4].pos_x];
+     P_V = imgY[pix_a[5].pos_y][pix_a[5].pos_x];
+     P_W = imgY[pix_a[6].pos_y][pix_a[6].pos_x];
+     P_X = imgY[pix_a[7].pos_y][pix_a[7].pos_x];
+   }
+   else
+   {
+     P_Q = P_R = P_S = P_T = P_U = P_V = P_W = P_X = img->dc_pred_value;
+   }
+ 
+   if (block_available_up_left)
+   {
+     P_Z = imgY[pix_d.pos_y][pix_d.pos_x];
+   }
+   else
+   {
+     P_Z = img->dc_pred_value;
+   }
+ 
+   for(i=0;i<9;i++)
+     img->mprr_3[i][0][0]=-1;
+ 
+   LowPassForIntra8x8Pred(&(P_Z), block_available_up_left, block_available_up, block_available_left);
+ 
+   ///////////////////////////////
+   // make DC prediction
+   ///////////////////////////////
+   s0 = 0;
+   if (block_available_up && block_available_left)
+   {   
+     // no edge
+     s0 = (P_A + P_B + P_C + P_D + P_E + P_F + P_G + P_H + P_Q + P_R + P_S + P_T + P_U + P_V + P_W + P_X + 8) >> 4;
+   }
+   else if (!block_available_up && block_available_left)
+   {
+     // upper edge
+     s0 = (P_Q + P_R + P_S + P_T + P_U + P_V + P_W + P_X + 4) >> 3;             
+   }
+   else if (block_available_up && !block_available_left)
+   {
+     // left edge
+     s0 = (P_A + P_B + P_C + P_D + P_E + P_F + P_G + P_H + 4) >> 3;             
+   }
+   else //if (!block_available_up && !block_available_left)
+   {
+     // top left corner, nothing to predict from
+     s0 = img->dc_pred_value;                           
+   }
+   
+   // store DC prediction
+   for (j=0; j < 2*BLOCK_SIZE; j++)
+   {
+     for (i=0; i < 2*BLOCK_SIZE; i++)
+     {
+       img->mprr_3[DC_PRED][i][j] = s0;
+     }
+   }
+ 
+   
+   ///////////////////////////////
+   // make horiz and vert prediction
+   ///////////////////////////////
+ 
+   for (i=0; i < 2*BLOCK_SIZE; i++)
+   {
+     img->mprr_3[VERT_PRED][0][i] = 
+     img->mprr_3[VERT_PRED][1][i] = 
+     img->mprr_3[VERT_PRED][2][i] = 
+     img->mprr_3[VERT_PRED][3][i] = 
+     img->mprr_3[VERT_PRED][4][i] = 
+     img->mprr_3[VERT_PRED][5][i] = 
+     img->mprr_3[VERT_PRED][6][i] = 
+     img->mprr_3[VERT_PRED][7][i] = (&P_A)[i];
+     img->mprr_3[HOR_PRED][i][0]  = 
+     img->mprr_3[HOR_PRED][i][1]  = 
+     img->mprr_3[HOR_PRED][i][2]  = 
+     img->mprr_3[HOR_PRED][i][3]  = 
+     img->mprr_3[HOR_PRED][i][4]  = 
+     img->mprr_3[HOR_PRED][i][5]  = 
+     img->mprr_3[HOR_PRED][i][6]  = 
+     img->mprr_3[HOR_PRED][i][7]  = (&P_Q)[i];
+   }
+ 
+   if(!block_available_up)img->mprr_3[VERT_PRED][0][0]=-1;
+   if(!block_available_left)img->mprr_3[HOR_PRED][0][0]=-1;
+ 
+   ///////////////////////////////////
+   // make diagonal down left prediction
+   ///////////////////////////////////
+   if (block_available_up) 
+   {
+     // Mode DIAG_DOWN_LEFT_PRED
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][0][0] = (P_A + P_C + 2*(P_B) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][0][1] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][1][0] = (P_B + P_D + 2*(P_C) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][0][2] =
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][1][1] =
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][2][0] = (P_C + P_E + 2*(P_D) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][0][3] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][1][2] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][2][1] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][3][0] = (P_D + P_F + 2*(P_E) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][0][4] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][1][3] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][2][2] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][3][1] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][4][0] = (P_E + P_G + 2*(P_F) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][0][5] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][1][4] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][2][3] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][3][2] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][4][1] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][5][0] = (P_F + P_H + 2*(P_G) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][0][6] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][1][5] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][2][4] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][3][3] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][4][2] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][5][1] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][6][0] = (P_G + P_I + 2*(P_H) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][0][7] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][1][6] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][2][5] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][3][4] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][4][3] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][5][2] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][6][1] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][7][0] = (P_H + P_J + 2*(P_I) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][1][7] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][2][6] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][3][5] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][4][4] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][5][3] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][6][2] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][7][1] = (P_I + P_K + 2*(P_J) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][2][7] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][3][6] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][4][5] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][5][4] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][6][3] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][7][2] = (P_J + P_L + 2*(P_K) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][3][7] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][4][6] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][5][5] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][6][4] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][7][3] = (P_K + P_M + 2*(P_L) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][4][7] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][5][6] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][6][5] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][7][4] = (P_L + P_N + 2*(P_M) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][5][7] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][6][6] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][7][5] = (P_M + P_O + 2*(P_N) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][6][7] = 
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][7][6] = (P_N + P_P + 2*(P_O) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_LEFT_PRED][7][7] = (P_O + 3*(P_P) + 2) >> 2;
+ 
+     ///////////////////////////////////
+     // make vertical left prediction
+     ///////////////////////////////////
+     img->mprr_3[VERT_LEFT_PRED][0][0] = (P_A + P_B + 1) >> 1;
+     img->mprr_3[VERT_LEFT_PRED][0][1] = 
+     img->mprr_3[VERT_LEFT_PRED][2][0] = (P_B + P_C + 1) >> 1;
+     img->mprr_3[VERT_LEFT_PRED][0][2] = 
+     img->mprr_3[VERT_LEFT_PRED][2][1] = 
+     img->mprr_3[VERT_LEFT_PRED][4][0] = (P_C + P_D + 1) >> 1;
+     img->mprr_3[VERT_LEFT_PRED][0][3] = 
+     img->mprr_3[VERT_LEFT_PRED][2][2] = 
+     img->mprr_3[VERT_LEFT_PRED][4][1] = 
+     img->mprr_3[VERT_LEFT_PRED][6][0] = (P_D + P_E + 1) >> 1;
+     img->mprr_3[VERT_LEFT_PRED][0][4] = 
+     img->mprr_3[VERT_LEFT_PRED][2][3] = 
+     img->mprr_3[VERT_LEFT_PRED][4][2] = 
+     img->mprr_3[VERT_LEFT_PRED][6][1] = (P_E + P_F + 1) >> 1;
+     img->mprr_3[VERT_LEFT_PRED][0][5] = 
+     img->mprr_3[VERT_LEFT_PRED][2][4] = 
+     img->mprr_3[VERT_LEFT_PRED][4][3] = 
+     img->mprr_3[VERT_LEFT_PRED][6][2] = (P_F + P_G + 1) >> 1;
+     img->mprr_3[VERT_LEFT_PRED][0][6] = 
+     img->mprr_3[VERT_LEFT_PRED][2][5] = 
+     img->mprr_3[VERT_LEFT_PRED][4][4] = 
+     img->mprr_3[VERT_LEFT_PRED][6][3] = (P_G + P_H + 1) >> 1;
+     img->mprr_3[VERT_LEFT_PRED][0][7] = 
+     img->mprr_3[VERT_LEFT_PRED][2][6] = 
+     img->mprr_3[VERT_LEFT_PRED][4][5] = 
+     img->mprr_3[VERT_LEFT_PRED][6][4] = (P_H + P_I + 1) >> 1;
+     img->mprr_3[VERT_LEFT_PRED][2][7] = 
+     img->mprr_3[VERT_LEFT_PRED][4][6] = 
+     img->mprr_3[VERT_LEFT_PRED][6][5] = (P_I + P_J + 1) >> 1;
+     img->mprr_3[VERT_LEFT_PRED][4][7] = 
+     img->mprr_3[VERT_LEFT_PRED][6][6] = (P_J + P_K + 1) >> 1;
+     img->mprr_3[VERT_LEFT_PRED][6][7] = (P_K + P_L + 1) >> 1;
+     img->mprr_3[VERT_LEFT_PRED][1][0] = (P_A + P_C + 2*P_B + 2) >> 2;
+     img->mprr_3[VERT_LEFT_PRED][1][1] = 
+     img->mprr_3[VERT_LEFT_PRED][3][0] = (P_B + P_D + 2*P_C + 2) >> 2;
+     img->mprr_3[VERT_LEFT_PRED][1][2] = 
+     img->mprr_3[VERT_LEFT_PRED][3][1] = 
+     img->mprr_3[VERT_LEFT_PRED][5][0] = (P_C + P_E + 2*P_D + 2) >> 2;
+     img->mprr_3[VERT_LEFT_PRED][1][3] = 
+     img->mprr_3[VERT_LEFT_PRED][3][2] = 
+     img->mprr_3[VERT_LEFT_PRED][5][1] = 
+     img->mprr_3[VERT_LEFT_PRED][7][0] = (P_D + P_F + 2*P_E + 2) >> 2;
+     img->mprr_3[VERT_LEFT_PRED][1][4] = 
+     img->mprr_3[VERT_LEFT_PRED][3][3] = 
+     img->mprr_3[VERT_LEFT_PRED][5][2] = 
+     img->mprr_3[VERT_LEFT_PRED][7][1] = (P_E + P_G + 2*P_F + 2) >> 2;
+     img->mprr_3[VERT_LEFT_PRED][1][5] = 
+     img->mprr_3[VERT_LEFT_PRED][3][4] = 
+     img->mprr_3[VERT_LEFT_PRED][5][3] = 
+     img->mprr_3[VERT_LEFT_PRED][7][2] = (P_F + P_H + 2*P_G + 2) >> 2;
+     img->mprr_3[VERT_LEFT_PRED][1][6] = 
+     img->mprr_3[VERT_LEFT_PRED][3][5] = 
+     img->mprr_3[VERT_LEFT_PRED][5][4] = 
+     img->mprr_3[VERT_LEFT_PRED][7][3] = (P_G + P_I + 2*P_H + 2) >> 2;
+     img->mprr_3[VERT_LEFT_PRED][1][7] = 
+     img->mprr_3[VERT_LEFT_PRED][3][6] = 
+     img->mprr_3[VERT_LEFT_PRED][5][5] = 
+     img->mprr_3[VERT_LEFT_PRED][7][4] = (P_H + P_J + 2*P_I + 2) >> 2;
+     img->mprr_3[VERT_LEFT_PRED][3][7] = 
+     img->mprr_3[VERT_LEFT_PRED][5][6] = 
+     img->mprr_3[VERT_LEFT_PRED][7][5] = (P_I + P_K + 2*P_J + 2) >> 2;
+     img->mprr_3[VERT_LEFT_PRED][5][7] = 
+     img->mprr_3[VERT_LEFT_PRED][7][6] = (P_J + P_L + 2*P_K + 2) >> 2;
+     img->mprr_3[VERT_LEFT_PRED][7][7] = (P_K + P_M + 2*P_L + 2) >> 2;
+   }
+ 
+   ///////////////////////////////////
+   // make diagonal down right prediction
+   ///////////////////////////////////
+   if (block_available_up && block_available_left && block_available_up_left) 
+   {
+     // Mode DIAG_DOWN_RIGHT_PRED
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][7][0] = (P_X + P_V + 2*(P_W) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][6][0] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][7][1] = (P_W + P_U + 2*(P_V) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][5][0] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][6][1] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][7][2] = (P_V + P_T + 2*(P_U) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][4][0] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][5][1] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][6][2] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][7][3] = (P_U + P_S + 2*(P_T) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][3][0] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][4][1] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][5][2] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][6][3] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][7][4] = (P_T + P_R + 2*(P_S) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][2][0] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][3][1] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][4][2] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][5][3] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][6][4] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][7][5] = (P_S + P_Q + 2*(P_R) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][1][0] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][2][1] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][3][2] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][4][3] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][5][4] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][6][5] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][7][6] = (P_R + P_Z + 2*(P_Q) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][0][0] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][1][1] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][2][2] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][3][3] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][4][4] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][5][5] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][6][6] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][7][7] = (P_Q + P_A + 2*(P_Z) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][0][1] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][1][2] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][2][3] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][3][4] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][4][5] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][5][6] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][6][7] = (P_Z + P_B + 2*(P_A) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][0][2] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][1][3] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][2][4] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][3][5] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][4][6] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][5][7] = (P_A + P_C + 2*(P_B) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][0][3] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][1][4] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][2][5] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][3][6] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][4][7] = (P_B + P_D + 2*(P_C) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][0][4] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][1][5] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][2][6] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][3][7] = (P_C + P_E + 2*(P_D) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][0][5] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][1][6] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][2][7] = (P_D + P_F + 2*(P_E) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][0][6] = 
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][1][7] = (P_E + P_G + 2*(P_F) + 2) >> 2;
+     img->mprr_3[DIAG_DOWN_RIGHT_PRED][0][7] = (P_F + P_H + 2*(P_G) + 2) >> 2;
+ 
+   ///////////////////////////////////
+   // make vertical right prediction
+   ///////////////////////////////////
+     img->mprr_3[VERT_RIGHT_PRED][0][0] = 
+     img->mprr_3[VERT_RIGHT_PRED][2][1] = 
+     img->mprr_3[VERT_RIGHT_PRED][4][2] = 
+     img->mprr_3[VERT_RIGHT_PRED][6][3] = (P_Z + P_A + 1) >> 1;
+     img->mprr_3[VERT_RIGHT_PRED][0][1] = 
+     img->mprr_3[VERT_RIGHT_PRED][2][2] = 
+     img->mprr_3[VERT_RIGHT_PRED][4][3] = 
+     img->mprr_3[VERT_RIGHT_PRED][6][4] = (P_A + P_B + 1) >> 1;
+     img->mprr_3[VERT_RIGHT_PRED][0][2] = 
+     img->mprr_3[VERT_RIGHT_PRED][2][3] = 
+     img->mprr_3[VERT_RIGHT_PRED][4][4] = 
+     img->mprr_3[VERT_RIGHT_PRED][6][5] = (P_B + P_C + 1) >> 1;
+     img->mprr_3[VERT_RIGHT_PRED][0][3] = 
+     img->mprr_3[VERT_RIGHT_PRED][2][4] = 
+     img->mprr_3[VERT_RIGHT_PRED][4][5] = 
+     img->mprr_3[VERT_RIGHT_PRED][6][6] = (P_C + P_D + 1) >> 1;
+     img->mprr_3[VERT_RIGHT_PRED][0][4] = 
+     img->mprr_3[VERT_RIGHT_PRED][2][5] = 
+     img->mprr_3[VERT_RIGHT_PRED][4][6] = 
+     img->mprr_3[VERT_RIGHT_PRED][6][7] = (P_D + P_E + 1) >> 1;
+     img->mprr_3[VERT_RIGHT_PRED][0][5] = 
+     img->mprr_3[VERT_RIGHT_PRED][2][6] = 
+     img->mprr_3[VERT_RIGHT_PRED][4][7] = (P_E + P_F + 1) >> 1;
+     img->mprr_3[VERT_RIGHT_PRED][0][6] = 
+     img->mprr_3[VERT_RIGHT_PRED][2][7] = (P_F + P_G + 1) >> 1;
+     img->mprr_3[VERT_RIGHT_PRED][0][7] = (P_G + P_H + 1) >> 1;
+     img->mprr_3[VERT_RIGHT_PRED][1][0] = 
+     img->mprr_3[VERT_RIGHT_PRED][3][1] = 
+     img->mprr_3[VERT_RIGHT_PRED][5][2] = 
+     img->mprr_3[VERT_RIGHT_PRED][7][3] = (P_Q + P_A + 2*P_Z + 2) >> 2;
+     img->mprr_3[VERT_RIGHT_PRED][1][1] = 
+     img->mprr_3[VERT_RIGHT_PRED][3][2] = 
+     img->mprr_3[VERT_RIGHT_PRED][5][3] = 
+     img->mprr_3[VERT_RIGHT_PRED][7][4] = (P_Z + P_B + 2*P_A + 2) >> 2;
+     img->mprr_3[VERT_RIGHT_PRED][1][2] = 
+     img->mprr_3[VERT_RIGHT_PRED][3][3] = 
+     img->mprr_3[VERT_RIGHT_PRED][5][4] = 
+     img->mprr_3[VERT_RIGHT_PRED][7][5] = (P_A + P_C + 2*P_B + 2) >> 2;
+     img->mprr_3[VERT_RIGHT_PRED][1][3] = 
+     img->mprr_3[VERT_RIGHT_PRED][3][4] = 
+     img->mprr_3[VERT_RIGHT_PRED][5][5] = 
+     img->mprr_3[VERT_RIGHT_PRED][7][6] = (P_B + P_D + 2*P_C + 2) >> 2;
+     img->mprr_3[VERT_RIGHT_PRED][1][4] = 
+     img->mprr_3[VERT_RIGHT_PRED][3][5] = 
+     img->mprr_3[VERT_RIGHT_PRED][5][6] = 
+     img->mprr_3[VERT_RIGHT_PRED][7][7] = (P_C + P_E + 2*P_D + 2) >> 2;
+     img->mprr_3[VERT_RIGHT_PRED][1][5] = 
+     img->mprr_3[VERT_RIGHT_PRED][3][6] = 
+     img->mprr_3[VERT_RIGHT_PRED][5][7] = (P_D + P_F + 2*P_E + 2) >> 2;
+     img->mprr_3[VERT_RIGHT_PRED][1][6] = 
+     img->mprr_3[VERT_RIGHT_PRED][3][7] = (P_E + P_G + 2*P_F + 2) >> 2;
+     img->mprr_3[VERT_RIGHT_PRED][1][7] = (P_F + P_H + 2*P_G + 2) >> 2;
+     img->mprr_3[VERT_RIGHT_PRED][2][0] =
+     img->mprr_3[VERT_RIGHT_PRED][4][1] =
+     img->mprr_3[VERT_RIGHT_PRED][6][2] = (P_R + P_Z + 2*P_Q + 2) >> 2;
+     img->mprr_3[VERT_RIGHT_PRED][3][0] =
+     img->mprr_3[VERT_RIGHT_PRED][5][1] =
+     img->mprr_3[VERT_RIGHT_PRED][7][2] = (P_S + P_Q + 2*P_R + 2) >> 2;
+     img->mprr_3[VERT_RIGHT_PRED][4][0] =
+     img->mprr_3[VERT_RIGHT_PRED][6][1] = (P_T + P_R + 2*P_S + 2) >> 2;
+     img->mprr_3[VERT_RIGHT_PRED][5][0] =
+     img->mprr_3[VERT_RIGHT_PRED][7][1] = (P_U + P_S + 2*P_T + 2) >> 2;
+     img->mprr_3[VERT_RIGHT_PRED][6][0] = (P_V + P_T + 2*P_U + 2) >> 2;
+     img->mprr_3[VERT_RIGHT_PRED][7][0] = (P_W + P_U + 2*P_V + 2) >> 2;
+ 
+   ///////////////////////////////////
+   // make horizontal down prediction
+   ///////////////////////////////////
+     
+     img->mprr_3[HOR_DOWN_PRED][0][0] = 
+     img->mprr_3[HOR_DOWN_PRED][1][2] = 
+     img->mprr_3[HOR_DOWN_PRED][2][4] = 
+     img->mprr_3[HOR_DOWN_PRED][3][6] = (P_Q + P_Z + 1) >> 1;
+     img->mprr_3[HOR_DOWN_PRED][1][0] = 
+     img->mprr_3[HOR_DOWN_PRED][2][2] = 
+     img->mprr_3[HOR_DOWN_PRED][3][4] = 
+     img->mprr_3[HOR_DOWN_PRED][4][6] = (P_R + P_Q + 1) >> 1;
+     img->mprr_3[HOR_DOWN_PRED][2][0] = 
+     img->mprr_3[HOR_DOWN_PRED][3][2] = 
+     img->mprr_3[HOR_DOWN_PRED][4][4] = 
+     img->mprr_3[HOR_DOWN_PRED][5][6] = (P_S + P_R + 1) >> 1;
+     img->mprr_3[HOR_DOWN_PRED][3][0] = 
+     img->mprr_3[HOR_DOWN_PRED][4][2] = 
+     img->mprr_3[HOR_DOWN_PRED][5][4] = 
+     img->mprr_3[HOR_DOWN_PRED][6][6] = (P_T + P_S + 1) >> 1;
+     img->mprr_3[HOR_DOWN_PRED][4][0] = 
+     img->mprr_3[HOR_DOWN_PRED][5][2] = 
+     img->mprr_3[HOR_DOWN_PRED][6][4] = 
+     img->mprr_3[HOR_DOWN_PRED][7][6] = (P_U + P_T + 1) >> 1;
+     img->mprr_3[HOR_DOWN_PRED][5][0] = 
+     img->mprr_3[HOR_DOWN_PRED][6][2] = 
+     img->mprr_3[HOR_DOWN_PRED][7][4] = (P_V + P_U + 1) >> 1;
+     img->mprr_3[HOR_DOWN_PRED][6][0] = 
+     img->mprr_3[HOR_DOWN_PRED][7][2] = (P_W + P_V + 1) >> 1;
+     img->mprr_3[HOR_DOWN_PRED][7][0] = (P_X + P_W + 1) >> 1;
+     img->mprr_3[HOR_DOWN_PRED][0][1] =
+     img->mprr_3[HOR_DOWN_PRED][1][3] =
+     img->mprr_3[HOR_DOWN_PRED][2][5] =
+     img->mprr_3[HOR_DOWN_PRED][3][7] = (P_Q + P_A + 2*P_Z + 2) >> 2;
+     img->mprr_3[HOR_DOWN_PRED][1][1] =
+     img->mprr_3[HOR_DOWN_PRED][2][3] =
+     img->mprr_3[HOR_DOWN_PRED][3][5] =
+     img->mprr_3[HOR_DOWN_PRED][4][7] = (P_Z + P_R + 2*P_Q + 2) >> 2;
+     img->mprr_3[HOR_DOWN_PRED][2][1] =
+     img->mprr_3[HOR_DOWN_PRED][3][3] =
+     img->mprr_3[HOR_DOWN_PRED][4][5] =
+     img->mprr_3[HOR_DOWN_PRED][5][7] = (P_Q + P_S + 2*P_R + 2) >> 2;
+     img->mprr_3[HOR_DOWN_PRED][3][1] =
+     img->mprr_3[HOR_DOWN_PRED][4][3] =
+     img->mprr_3[HOR_DOWN_PRED][5][5] =
+     img->mprr_3[HOR_DOWN_PRED][6][7] = (P_R + P_T + 2*P_S + 2) >> 2;
+     img->mprr_3[HOR_DOWN_PRED][4][1] =
+     img->mprr_3[HOR_DOWN_PRED][5][3] =
+     img->mprr_3[HOR_DOWN_PRED][6][5] =
+     img->mprr_3[HOR_DOWN_PRED][7][7] = (P_S + P_U + 2*P_T + 2) >> 2;
+     img->mprr_3[HOR_DOWN_PRED][5][1] =
+     img->mprr_3[HOR_DOWN_PRED][6][3] =
+     img->mprr_3[HOR_DOWN_PRED][7][5] = (P_T + P_V + 2*P_U + 2) >> 2;
+     img->mprr_3[HOR_DOWN_PRED][6][1] =
+     img->mprr_3[HOR_DOWN_PRED][7][3] = (P_U + P_W + 2*P_V + 2) >> 2;
+     img->mprr_3[HOR_DOWN_PRED][7][1] = (P_V + P_X + 2*P_W + 2) >> 2;
+     img->mprr_3[HOR_DOWN_PRED][0][2] = 
+     img->mprr_3[HOR_DOWN_PRED][1][4] = 
+     img->mprr_3[HOR_DOWN_PRED][2][6] = (P_Z + P_B + 2*P_A + 2) >> 2;
+     img->mprr_3[HOR_DOWN_PRED][0][3] = 
+     img->mprr_3[HOR_DOWN_PRED][1][5] = 
+     img->mprr_3[HOR_DOWN_PRED][2][7] = (P_A + P_C + 2*P_B + 2) >> 2;
+     img->mprr_3[HOR_DOWN_PRED][0][4] = 
+     img->mprr_3[HOR_DOWN_PRED][1][6] = (P_B + P_D + 2*P_C + 2) >> 2;
+     img->mprr_3[HOR_DOWN_PRED][0][5] = 
+     img->mprr_3[HOR_DOWN_PRED][1][7] = (P_C + P_E + 2*P_D + 2) >> 2;
+     img->mprr_3[HOR_DOWN_PRED][0][6] = (P_D + P_F + 2*P_E + 2) >> 2;
+     img->mprr_3[HOR_DOWN_PRED][0][7] = (P_E + P_G + 2*P_F + 2) >> 2;
+   }
+ 
+   ///////////////////////////////////
+   // make horizontal up prediction
+   ///////////////////////////////////
+   if (block_available_left)
+   {
+     img->mprr_3[HOR_UP_PRED][0][0] = (P_Q + P_R + 1) >> 1;
+     img->mprr_3[HOR_UP_PRED][1][0] =
+     img->mprr_3[HOR_UP_PRED][0][2] = (P_R + P_S + 1) >> 1;
+     img->mprr_3[HOR_UP_PRED][2][0] =
+     img->mprr_3[HOR_UP_PRED][1][2] =
+     img->mprr_3[HOR_UP_PRED][0][4] = (P_S + P_T + 1) >> 1;
+     img->mprr_3[HOR_UP_PRED][3][0] =
+     img->mprr_3[HOR_UP_PRED][2][2] =
+     img->mprr_3[HOR_UP_PRED][1][4] =
+     img->mprr_3[HOR_UP_PRED][0][6] = (P_T + P_U + 1) >> 1;
+     img->mprr_3[HOR_UP_PRED][4][0] =
+     img->mprr_3[HOR_UP_PRED][3][2] =
+     img->mprr_3[HOR_UP_PRED][2][4] =
+     img->mprr_3[HOR_UP_PRED][1][6] = (P_U + P_V + 1) >> 1;
+     img->mprr_3[HOR_UP_PRED][5][0] =
+     img->mprr_3[HOR_UP_PRED][4][2] =
+     img->mprr_3[HOR_UP_PRED][3][4] =
+     img->mprr_3[HOR_UP_PRED][2][6] = (P_V + P_W + 1) >> 1;
+     img->mprr_3[HOR_UP_PRED][6][0] =
+     img->mprr_3[HOR_UP_PRED][5][2] =
+     img->mprr_3[HOR_UP_PRED][4][4] =
+     img->mprr_3[HOR_UP_PRED][3][6] = (P_W + P_X + 1) >> 1;
+     img->mprr_3[HOR_UP_PRED][4][6] =
+     img->mprr_3[HOR_UP_PRED][4][7] =
+     img->mprr_3[HOR_UP_PRED][5][4] =
+     img->mprr_3[HOR_UP_PRED][5][5] =
+     img->mprr_3[HOR_UP_PRED][5][6] =
+     img->mprr_3[HOR_UP_PRED][5][7] =
+     img->mprr_3[HOR_UP_PRED][6][2] =
+     img->mprr_3[HOR_UP_PRED][6][3] =
+     img->mprr_3[HOR_UP_PRED][6][4] =
+     img->mprr_3[HOR_UP_PRED][6][5] =
+     img->mprr_3[HOR_UP_PRED][6][6] =
+     img->mprr_3[HOR_UP_PRED][6][7] =
+     img->mprr_3[HOR_UP_PRED][7][0] =
+     img->mprr_3[HOR_UP_PRED][7][1] =
+     img->mprr_3[HOR_UP_PRED][7][2] =
+     img->mprr_3[HOR_UP_PRED][7][3] =
+     img->mprr_3[HOR_UP_PRED][7][4] =
+     img->mprr_3[HOR_UP_PRED][7][5] =
+     img->mprr_3[HOR_UP_PRED][7][6] =
+     img->mprr_3[HOR_UP_PRED][7][7] = P_X;
+     img->mprr_3[HOR_UP_PRED][6][1] =
+     img->mprr_3[HOR_UP_PRED][5][3] =
+     img->mprr_3[HOR_UP_PRED][4][5] =
+     img->mprr_3[HOR_UP_PRED][3][7] = (P_W + 3*P_X + 2) >> 2;
+     img->mprr_3[HOR_UP_PRED][5][1] =
+     img->mprr_3[HOR_UP_PRED][4][3] =
+     img->mprr_3[HOR_UP_PRED][3][5] =
+     img->mprr_3[HOR_UP_PRED][2][7] = (P_X + P_V + 2*P_W + 2) >> 2;
+     img->mprr_3[HOR_UP_PRED][4][1] =
+     img->mprr_3[HOR_UP_PRED][3][3] =
+     img->mprr_3[HOR_UP_PRED][2][5] =
+     img->mprr_3[HOR_UP_PRED][1][7] = (P_W + P_U + 2*P_V + 2) >> 2;
+     img->mprr_3[HOR_UP_PRED][3][1] =
+     img->mprr_3[HOR_UP_PRED][2][3] =
+     img->mprr_3[HOR_UP_PRED][1][5] =
+     img->mprr_3[HOR_UP_PRED][0][7] = (P_V + P_T + 2*P_U + 2) >> 2;
+     img->mprr_3[HOR_UP_PRED][2][1] =
+     img->mprr_3[HOR_UP_PRED][1][3] =
+     img->mprr_3[HOR_UP_PRED][0][5] = (P_U + P_S + 2*P_T + 2) >> 2;
+     img->mprr_3[HOR_UP_PRED][1][1] =
+     img->mprr_3[HOR_UP_PRED][0][3] = (P_T + P_R + 2*P_S + 2) >> 2;
+     img->mprr_3[HOR_UP_PRED][0][1] = (P_S + P_Q + 2*P_R + 2) >> 2;
+   }
+ }
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    Prefiltering for Intra8x8 prediction
+  *************************************************************************************
+  */
+ void LowPassForIntra8x8Pred(int *PredPel, int block_up_left, int block_up, int block_left)
+ {
+   int i;
+   int LoopArray[25];
+  
+ 
+   for(i = 0; i < 25; i++)
+      LoopArray[i] = PredPel[i] ;
+ 
+  	if(block_up)
+ 	{
+ 		if(block_up_left) 
+ 		{
+ 			LoopArray[1] = ((&P_Z)[0] + ((&P_Z)[1]<<1) + (&P_Z)[2] + 2)>>2;
+ 		}
+ 		else
+ 			LoopArray[1] = ((&P_Z)[1] + ((&P_Z)[1]<<1) + (&P_Z)[2] + 2)>>2; 
+ 
+ 
+ 		for(i = 2; i <16; i++)
+ 		{
+ 			LoopArray[i] = ((&P_Z)[i-1] + ((&P_Z)[i]<<1) + (&P_Z)[i+1] + 2)>>2;
+ 		}
+ 		LoopArray[16] = (P_P + (P_P<<1) + P_O + 2)>>2;
+ 	}
+ 
+ 	if(block_up_left) 
+ 	{
+ 		
+ 		if(block_up && block_left)
+ 		{
+ 				LoopArray[0] = (P_Q + (P_Z<<1) + P_A +2)>>2;
+ 		}
+ 		else
+ 		{
+ 			if(block_up)
+ 				LoopArray[0] = (P_Z + (P_Z<<1) + P_A +2)>>2;
+ 			else
+ 				if(block_left)
+ 					LoopArray[0] = (P_Z + (P_Z<<1) + P_Q +2)>>2;
+ 		}
+ 
+ 	}
+ 
+ 	if(block_left)
+ 	{
+ 		if(block_up_left)
+ 			LoopArray[17] = (P_Z + (P_Q<<1) + P_R + 2)>>2; 
+ 		else
+ 			LoopArray[17] = (P_Q + (P_Q<<1) + P_R + 2)>>2;
+ 
+ 		for(i = 18; i <24; i++)
+ 		{
+ 			LoopArray[i] = ((&P_Z)[i-1] + ((&P_Z)[i]<<1) + (&P_Z)[i+1] + 2)>>2;
+ 		}
+ 		LoopArray[24] = (P_W + (P_X<<1) + P_X + 2)>>2;
+ 	}
+ 
+   for(i = 0; i < 25; i++)
+     PredPel[i] = LoopArray[i];
+ }
+ 
+ 
+ 
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    R-D Cost for an 8x8 Intra block
+  *************************************************************************************
+  */
+ 
+ double RDCost_for_8x8IntraBlocks(int *nonzero, int b8, int ipmode, double lambda, double min_rdcost, int mostProbableMode)
+ {
+   double  rdcost = 0.0;
+   int     dummy, x, y, rate;
+   int64   distortion  = 0;
+   int     block_x     = 8*(b8 & 0x01);
+   int     block_y     = 8*(b8 >> 1);
+   int     pic_pix_x   = img->pix_x+block_x;
+   int     pic_pix_y   = img->pix_y+block_y;
+   int     pic_opix_y  = img->opix_y+block_y;
+   imgpel    **imgY_orig  = imgY_org;
+   imgpel    **imgY       = enc_picture->imgY;
+ 
+   Slice          *currSlice    =  img->currentSlice;
+   Macroblock     *currMB       = &img->mb_data[img->current_mb_nr];
+   SyntaxElement  *currSE       = &img->MB_SyntaxElements[currMB->currSEnr];
+   const int      *partMap      = assignSE2partition[input->partition_mode];
+   DataPartition  *dataPart;
+ 
+   //===== perform DCT, Q, IQ, IDCT, Reconstruction =====
+   dummy = 0;
+ 
+   *nonzero = dct_luma8x8 (b8, &dummy, 1);
+ 
+   //===== get distortion (SSD) of 8x8 block =====
+   for (y=0; y<8; y++)
+     for (x=pic_pix_x; x<pic_pix_x+8; x++)  
+       distortion += img->quad [imgY_orig[pic_opix_y+y][x] - imgY[pic_pix_y+y][x]];
+ 
+   //===== RATE for INTRA PREDICTION MODE  (SYMBOL MODE MUST BE SET TO UVLC) =====
+   currSE->value1 = (mostProbableMode == ipmode) ? -1 : ipmode < mostProbableMode ? ipmode : ipmode-1;
+ 
+   //--- set position and type ---
+   currSE->context = b8;
+   currSE->type    = SE_INTRAPREDMODE;
+ 
+   //--- set function pointer ----
+   if (input->symbol_mode != UVLC)    
+     currSE->writing = writeIntraPredMode_CABAC;
+ 
+   //--- choose data partition ---
+   if (img->type!=B_SLICE)
+     dataPart = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);
+   else
+     dataPart = &(currSlice->partArr[partMap[SE_BFRAME]]);
+ 
+   //--- encode and update rate ---
+   if (input->symbol_mode == UVLC)
+     writeSyntaxElement_Intra4x4PredictionMode(currSE, dataPart);
+   else
+     dataPart->writeSyntaxElement (currSE, dataPart);
+ 
+   rate = currSE->len;
+   currSE++;
+   currMB->currSEnr++;
+ 
+   //===== RATE for LUMINANCE COEFFICIENTS =====
+ 
+   if (input->symbol_mode == UVLC)
+   {
+     int b4;
+     for(b4=0; b4<4; b4++)
+       rate  += writeCoeff4x4_CAVLC (LUMA, b8, b4, 0);
+   }
+   else
+   {
+     rate  += writeLumaCoeff8x8_CABAC (b8, 1);
+   }
+ 
+ 
+   rdcost = (double)distortion + lambda*(double)rate;
+ 
+   if(img->residue_transform_flag)
+     return (double)rate;
+   else
+     return rdcost;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    The routine performs transform,quantization,inverse transform, adds the diff.
+  *    to the prediction and writes the result to the decoded luma frame. Includes the
+  *    RD constrained quantization also.
+  *
+  * \par Input:
+  *    b8: Block position inside a macro block (0,1,2,3).
+  *
+  * \par Output:
+  *    nonzero: 0 if no levels are nonzero.  1 if there are nonzero levels.  
+  *    coeff_cost: Counter for nonzero coefficients, used to discard expensive levels.
+  ************************************************************************
+  */
+ 
+ #define MC(coeff) ((coeff)&3)
+ 
+ int dct_luma8x8(int b8,int *coeff_cost, int intra)
+ {
+   int sign(int a,int b);
+ 
+   int i,j,ilev,coeff_ctr;
+   int level,scan_pos,run;
+   int nonzero;
+   int qp_per,qp_rem,q_bits;
+   int dq_lshift = 0, dq_rshift = 0, dq_round = 0;
+ 
+   int block_x = 8*(b8 & 0x01);
+   int block_y = 8*(b8 >> 1);
+   int*  ACLevel = img->cofAC[b8][0][0];
+   int*  ACRun   = img->cofAC[b8][0][1];
+   int m6[8][8];
+   int a[8], b[8];
+   int scan_poss[4],runs[4];
+   int pix_x, pix_y, ipix_y;
+   int **levelscale,**leveloffset;
+   int **invlevelscale;
+   int MCcoeff;
+   Macroblock *currMB = &img->mb_data[img->current_mb_nr];
+   short is_field_mode = (img->field_picture || ( img->MbaffFrameFlag && currMB->mb_field));
+ 
+   Boolean lossless_qpprime = ((img->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1);
+   
+   qp_per    = (img->qp + img->bitdepth_luma_qp_scale - MIN_QP)/6;
+   qp_rem    = (img->qp + img->bitdepth_luma_qp_scale - MIN_QP)%6;
+   q_bits    = Q_BITS_8+qp_per;
+   levelscale    = LevelScale8x8Luma[intra][qp_rem];
+   leveloffset   = LevelOffset8x8Luma[intra][qp_per];
+   invlevelscale = InvLevelScale8x8Luma[intra][qp_rem];
+ 
+   if (qp_per < 6)
+   {
+     dq_rshift = 6 - qp_per;
+     dq_round  = 1<<(5-qp_per);
+   }
+   else
+     dq_lshift = qp_per - 6;
+     
+ 
+   // horizontal transform
+   if (!lossless_qpprime) 
+   {
+     for( i=0; i<8; i++)
+     {
+       a[0] = img->m7[i][0] + img->m7[i][7];
+       a[1] = img->m7[i][1] + img->m7[i][6];
+       a[2] = img->m7[i][2] + img->m7[i][5];
+       a[3] = img->m7[i][3] + img->m7[i][4];
+       
+       b[0] = a[0] + a[3];
+       b[1] = a[1] + a[2];
+       b[2] = a[0] - a[3];
+       b[3] = a[1] - a[2];
+       
+       a[4] = img->m7[i][0] - img->m7[i][7];
+       a[5] = img->m7[i][1] - img->m7[i][6];
+       a[6] = img->m7[i][2] - img->m7[i][5];
+       a[7] = img->m7[i][3] - img->m7[i][4];
+       
+       b[4]= a[5] + a[6] + ((a[4]>>1) + a[4]);
+       b[5]= a[4] - a[7] - ((a[6]>>1) + a[6]);
+       b[6]= a[4] + a[7] - ((a[5]>>1) + a[5]);
+       b[7]= a[5] - a[6] + ((a[7]>>1) + a[7]);
+       
+       m6[0][i] = b[0] + b[1];
+       m6[2][i] = b[2] + (b[3]>>1);
+       m6[4][i] = b[0] - b[1];
+       m6[6][i] = (b[2]>>1) - b[3];
+       m6[1][i] =   b[4] + (b[7]>>2);
+       m6[3][i] =   b[5] + (b[6]>>2);
+       m6[5][i] =   b[6] - (b[5]>>2);
+       m6[7][i] = - b[7] + (b[4]>>2);
+       
+     }
+     // vertical transform
+     for( i=0; i<8; i++)
+     {
+       a[0] = m6[i][0] + m6[i][7];
+       a[1] = m6[i][1] + m6[i][6];
+       a[2] = m6[i][2] + m6[i][5];
+       a[3] = m6[i][3] + m6[i][4];
+       
+       b[0] = a[0] + a[3];
+       b[1] = a[1] + a[2];
+       b[2] = a[0] - a[3];
+       b[3] = a[1] - a[2];
+       
+       a[4] = m6[i][0] - m6[i][7];
+       a[5] = m6[i][1] - m6[i][6];
+       a[6] = m6[i][2] - m6[i][5];
+       a[7] = m6[i][3] - m6[i][4];
+       
+       b[4]= a[5] + a[6] + ((a[4]>>1) + a[4]);
+       b[5]= a[4] - a[7] - ((a[6]>>1) + a[6]);
+       b[6]= a[4] + a[7] - ((a[5]>>1) + a[5]);
+       b[7]= a[5] - a[6] + ((a[7]>>1) + a[7]);
+       
+       img->m7[0][i] = b[0] + b[1];
+       img->m7[2][i] = b[2] + (b[3]>>1);
+       img->m7[4][i] = b[0] - b[1];
+       img->m7[6][i] = (b[2]>>1) - b[3];
+       img->m7[1][i] =   b[4] + (b[7]>>2);
+       img->m7[3][i] =   b[5] + (b[6]>>2);
+       img->m7[5][i] =   b[6] - (b[5]>>2);
+       img->m7[7][i] = - b[7] + (b[4]>>2);
+     }
+   }
+ 
+   // Quant  
+   nonzero=FALSE;
+   
+   run=-1;
+   scan_pos=0;
+   
+   runs[0]=runs[1]=runs[2]=runs[3]=-1;
+   scan_poss[0]=scan_poss[1]=scan_poss[2]=scan_poss[3]=0;
+   
+   for (coeff_ctr=0;coeff_ctr < 64;coeff_ctr++)
+   {
+     
+     if (is_field_mode) 
+     {  // Alternate scan for field coding
+       i=FIELD_SCAN8x8[coeff_ctr][0];
+       j=FIELD_SCAN8x8[coeff_ctr][1];
+     }
+     else 
+     {
+       i=SNGL_SCAN8x8[coeff_ctr][0];
+       j=SNGL_SCAN8x8[coeff_ctr][1];
+     }
+     MCcoeff = MC(coeff_ctr);
+     run++;
+     ilev=0;
+     
+     runs[MCcoeff]++;
+     
+     if(lossless_qpprime)
+       level = absm (img->m7[j][i]);
+     else 
+       level = (absm (img->m7[j][i]) * levelscale[i][j] + leveloffset[i][j]) >> q_bits;
+   
+     if (img->AdaptiveRounding)
+     {
+       if (lossless_qpprime || level == 0 )
+       {
+         img->fadjust8x8[intra][block_y+j][block_x+i] = 0;
+       }
+       else 
+       {
+         img->fadjust8x8[intra][block_y + j][block_x + i] = 
+           (AdaptRndWeight * (absm (img->m7[j][i]) * levelscale[i][j] - (level << q_bits)) + (1<< (q_bits))) >> (q_bits + 1);       
+       }
+     }
+ 
+     if (level != 0)
+     {
+       nonzero=TRUE;
+       
+       if (currMB->luma_transform_size_8x8_flag && input->symbol_mode == UVLC)
+       {
+         *coeff_cost += (level > 1) ? MAX_VALUE : COEFF_COST8x8[input->disthres][runs[MCcoeff]];
+ 
+         img->cofAC[b8][MCcoeff][0][scan_poss[MCcoeff]] = sign(level,img->m7[j][i]);
+         img->cofAC[b8][MCcoeff][1][scan_poss[MCcoeff]] = runs[MCcoeff];
+         ++scan_poss[MCcoeff];
+         runs[MCcoeff]=-1;
+       }
+       else
+       {
+         *coeff_cost += (level > 1) ? MAX_VALUE : COEFF_COST8x8[input->disthres][run];
+         ACLevel[scan_pos] = sign(level,img->m7[j][i]);
+         ACRun  [scan_pos] = run;
+         ++scan_pos;
+         run=-1;                     // reset zero level counter
+       }      
+       level = sign(level, img->m7[j][i]);
+       if(lossless_qpprime)
+       {
+         ilev = level;
+       }
+       else 
+       {
+         if (qp_per>=6)
+           ilev = level*invlevelscale[i][j]<<dq_lshift; // dequantization
+         else
+           ilev = (level*invlevelscale[i][j] + dq_round)>>dq_rshift; // dequantization
+       }
+     }
+     if(!lossless_qpprime)
+       img->m7[j][i] = ilev;
+   }
+   if (!currMB->luma_transform_size_8x8_flag || input->symbol_mode != UVLC)
+     ACLevel[scan_pos] = 0;
+   else
+   {
+     for(i=0; i<4; i++)
+       img->cofAC[b8][i][0][scan_poss[i]] = 0;
+   }
+  
+   
+   //    Inverse Transform
+   // horizontal inverse transform
+   if (!lossless_qpprime)
+   {
+     for( i=0; i<8; i++)
+     {
+       a[0] = img->m7[i][0] + img->m7[i][4];
+       a[4] = img->m7[i][0] - img->m7[i][4];
+       a[2] = (img->m7[i][2]>>1) - img->m7[i][6];
+       a[6] = img->m7[i][2] + (img->m7[i][6]>>1);
+       
+       b[0] = a[0] + a[6];
+       b[2] = a[4] + a[2];
+       b[4] = a[4] - a[2];
+       b[6] = a[0] - a[6];
+       
+       a[1] = -img->m7[i][3] + img->m7[i][5] - img->m7[i][7] - (img->m7[i][7]>>1);
+       a[3] =  img->m7[i][1] + img->m7[i][7] - img->m7[i][3] - (img->m7[i][3]>>1);
+       a[5] = -img->m7[i][1] + img->m7[i][7] + img->m7[i][5] + (img->m7[i][5]>>1);
+       a[7] =  img->m7[i][3] + img->m7[i][5] + img->m7[i][1] + (img->m7[i][1]>>1);
+       
+       b[1] = a[1] + (a[7]>>2);
+       b[7] = -(a[1]>>2) + a[7];
+       b[3] = a[3] + (a[5]>>2);
+       b[5] = (a[3]>>2) - a[5];
+       
+       m6[0][i] = b[0] + b[7];
+       m6[1][i] = b[2] + b[5];
+       m6[2][i] = b[4] + b[3];
+       m6[3][i] = b[6] + b[1];
+       m6[4][i] = b[6] - b[1];
+       m6[5][i] = b[4] - b[3];
+       m6[6][i] = b[2] - b[5];
+       m6[7][i] = b[0] - b[7];
+     }
+     
+     // vertical inverse transform
+     for( i=0; i<8; i++)
+     {
+       a[0] =  m6[i][0] + m6[i][4];
+       a[4] =  m6[i][0] - m6[i][4];
+       a[2] = (m6[i][2]>>1) - m6[i][6];
+       a[6] =  m6[i][2] + (m6[i][6]>>1);
+       
+       b[0] = a[0] + a[6];
+       b[2] = a[4] + a[2];
+       b[4] = a[4] - a[2];
+       b[6] = a[0] - a[6];
+       
+       a[1] = -m6[i][3] + m6[i][5] - m6[i][7] - (m6[i][7]>>1);
+       a[3] =  m6[i][1] + m6[i][7] - m6[i][3] - (m6[i][3]>>1);
+       a[5] = -m6[i][1] + m6[i][7] + m6[i][5] + (m6[i][5]>>1);
+       a[7] =  m6[i][3] + m6[i][5] + m6[i][1] + (m6[i][1]>>1);
+       
+       b[1] =   a[1] + (a[7]>>2);
+       b[7] = -(a[1]>>2) + a[7];
+       b[3] =   a[3] + (a[5]>>2);
+       b[5] =  (a[3]>>2) - a[5];
+       
+       img->m7[0][i] = b[0] + b[7];
+       img->m7[1][i] = b[2] + b[5];
+       img->m7[2][i] = b[4] + b[3];
+       img->m7[3][i] = b[6] + b[1];
+       img->m7[4][i] = b[6] - b[1];
+       img->m7[5][i] = b[4] - b[3];
+       img->m7[6][i] = b[2] - b[5];
+       img->m7[7][i] = b[0] - b[7];
+     }
+   }
+   
+   if (!img->residue_transform_flag)
+   {
+     for( j=0; j<2*BLOCK_SIZE; j++)
+     {
+       pix_y = block_y+j;    
+       ipix_y = img->pix_y + pix_y;
+       for( i=0; i<2*BLOCK_SIZE; i++)
+       {
+         pix_x = block_x+i;
+         if(lossless_qpprime)
+           img->m7[j][i] = img->m7[j][i]+img->mpr[pix_y][block_x+i];
+         else
+           img->m7[j][i] = clip1a((img->m7[j][i]+((long)img->mpr[pix_y][pix_x] << DQ_BITS_8)+DQ_ROUND_8)>>DQ_BITS_8);
+         enc_picture->imgY[ipix_y][img->pix_x + pix_x]=img->m7[j][i];
+       }
+     }
+   }
+   else if(!lossless_qpprime)
+   {
+     for( j=0; j<2*BLOCK_SIZE; j++)
+       for( i=0; i<2*BLOCK_SIZE; i++)
+         img->m7[j][i] =(img->m7[j][i]+DQ_ROUND_8)>>DQ_BITS_8;
+   }
+   
+   //  Decoded block moved to frame memory
+   
+   return nonzero;
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/transform8x8.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/transform8x8.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/transform8x8.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,32 ----
+ 
+ /*!
+  ***************************************************************************
+  *
+  * \file transform8x8.h
+  *
+  * \brief
+ *    prototypes of 8x8 transform functions
+   *
+  * \date
+  *    9. October 2003
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details) 
+  *    Yuri Vatis  vatis at hhi.de
+  **************************************************************************/
+ 
+ #ifndef _TRANSFORM8X8_H_
+ #define _TRANSFORM8X8_H_
+ 
+ int    Mode_Decision_for_new_Intra8x8Macroblock (double lambda, int *min_cost);
+ int    Mode_Decision_for_new_8x8IntraBlocks (int b8, double lambda, int *min_cost);
+ 
+ void   intrapred_luma8x8(int img_x,int img_y, int *left_available, int *up_available, int *all_available);
+ 
+ double RDCost_for_8x8IntraBlocks(int *c_nz, int b8, int ipmode, double lambda, double min_rdcost, int mostProbableMode);
+ 
+ int    dct_luma8x8(int b8,int *coeff_cost, int intra);
+ 
+ void   LowPassForIntra8x8Pred(int *PredPel, int block_up_left, int block_up, int block_left);
+ 
+ #endif //_TRANSFORM8X8_H_


Index: llvm-test/MultiSource/Applications/JM/lencod/vlc.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/vlc.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/vlc.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,1265 ----
+ 
+ /*!
+  ***************************************************************************
+  * \file vlc.c
+  *
+  * \brief
+  *    (CA)VLC coding functions
+  *
+  * \author
+  *    Main contributors (see contributors.h for copyright, address and affiliation details)
+  *    - Inge Lille-Langoy               <inge.lille-langoy at telenor.com>
+  *    - Detlev Marpe                    <marpe at hhi.de>
+  *    - Stephan Wenger                  <stewe at cs.tu-berlin.de>
+  ***************************************************************************
+  */
+ 
+ #include "contributors.h"
+ 
+ #include <stdlib.h>
+ #include <math.h>
+ #include <string.h>
+ #include <assert.h>
+ 
+ #include "global.h"
+ 
+ #include "vlc.h"
+ 
+ #if TRACE
+ #define SYMTRACESTRING(s) strncpy(sym.tracestring,s,TRACESTRING_SIZE)
+ #else
+ #define SYMTRACESTRING(s) // do nothing
+ #endif
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    ue_v, writes an ue(v) syntax element, returns the length in bits
+  *
+  * \param tracestring
+  *    the string for the trace file
+  * \param value
+  *    the value to be coded
+  *  \param bitstream
+  *    the target bitstream the value should be coded into
+  *
+  * \return
+  *    Number of bits used by the coded syntax element
+  *
+  * \ note
+  *    This function writes always the bit buffer for the progressive scan flag, and
+  *    should not be used (or should be modified appropriately) for the interlace crap
+  *    When used in the context of the Parameter Sets, this is obviously not a
+  *    problem.
+  *
+  *************************************************************************************
+  */
+ int ue_v (char *tracestring, int value, Bitstream *bitstream)
+ {
+   SyntaxElement symbol, *sym=&symbol;
+   sym->value1 = value;
+   sym->value2 = 0;
+ 
+   assert (bitstream->streamBuffer != NULL);
+ 
+   ue_linfo(sym->value1,sym->value2,&(sym->len),&(sym->inf));
+   symbol2uvlc(sym);
+ 
+   writeUVLC2buffer (sym, bitstream);
+ 
+ #if TRACE
+   strncpy(sym->tracestring,tracestring,TRACESTRING_SIZE);
+   trace2out (sym);
+ #endif
+ 
+   return (sym->len);
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    se_v, writes an se(v) syntax element, returns the length in bits
+  *
+  * \param tracestring
+  *    the string for the trace file
+  * \param value
+  *    the value to be coded
+  *  \param bitstream
+  *    the target bitstream the value should be coded into
+  *
+  * \return
+  *    Number of bits used by the coded syntax element
+  *
+  * \ note
+  *    This function writes always the bit buffer for the progressive scan flag, and
+  *    should not be used (or should be modified appropriately) for the interlace crap
+  *    When used in the context of the Parameter Sets, this is obviously not a
+  *    problem.
+  *
+  *************************************************************************************
+  */
+ int se_v (char *tracestring, int value, Bitstream *bitstream)
+ {
+   SyntaxElement symbol, *sym=&symbol;
+   sym->value1 = value;
+   sym->value2 = 0;
+ 
+   assert (bitstream->streamBuffer != NULL);
+   
+   se_linfo(sym->value1,sym->value2,&(sym->len),&(sym->inf));
+   symbol2uvlc(sym);
+ 
+   writeUVLC2buffer (sym, bitstream);
+ 
+ #if TRACE
+   strncpy(sym->tracestring,tracestring,TRACESTRING_SIZE);
+   trace2out (sym);
+ #endif
+ 
+   return (sym->len);
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    u_1, writes a flag (u(1) syntax element, returns the length in bits, 
+  *    always 1
+  *
+  * \param tracestring
+  *    the string for the trace file
+  * \param value
+  *    the value to be coded
+  *  \param bitstream
+  *    the target bitstream the value should be coded into
+  *
+  * \return
+  *    Number of bits used by the coded syntax element (always 1)
+  *
+  * \ note
+  *    This function writes always the bit buffer for the progressive scan flag, and
+  *    should not be used (or should be modified appropriately) for the interlace crap
+  *    When used in the context of the Parameter Sets, this is obviously not a
+  *    problem.
+  *
+  *************************************************************************************
+  */
+ int u_1 (char *tracestring, int value, Bitstream *bitstream)
+ {
+   SyntaxElement symbol, *sym=&symbol;
+ 
+   sym->bitpattern = value;
+   sym->len = 1;
+   sym->value1 = value;
+ 
+   assert (bitstream->streamBuffer != NULL);
+ 
+   writeUVLC2buffer(sym, bitstream);
+   
+ #if TRACE
+   strncpy(sym->tracestring,tracestring,TRACESTRING_SIZE);
+   trace2out (sym);
+ #endif
+ 
+   return (sym->len);
+ }
+ 
+ 
+ /*! 
+  *************************************************************************************
+  * \brief
+  *    u_v, writes a n bit fixed length syntax element, returns the length in bits, 
+  *
+  * \param n
+  *    length in bits
+  * \param tracestring
+  *    the string for the trace file
+  * \param value
+  *    the value to be coded
+  *  \param bitstream
+  *    the target bitstream the value should be coded into
+  *
+  * \return
+  *    Number of bits used by the coded syntax element 
+  *
+  * \ note
+  *    This function writes always the bit buffer for the progressive scan flag, and
+  *    should not be used (or should be modified appropriately) for the interlace crap
+  *    When used in the context of the Parameter Sets, this is obviously not a
+  *    problem.
+  *
+  *************************************************************************************
+  */
+ 
+ int u_v (int n, char *tracestring, int value, Bitstream *bitstream)
+ {
+   SyntaxElement symbol, *sym=&symbol;
+ 
+   sym->bitpattern = value;
+   sym->len = n;
+   sym->value1 = value;
+ 
+   assert (bitstream->streamBuffer != NULL);
+ 
+   writeUVLC2buffer(sym, bitstream);
+   
+ #if TRACE
+   strncpy(sym->tracestring,tracestring,TRACESTRING_SIZE);
+   trace2out (sym);
+ #endif
+ 
+   return (sym->len);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    mapping for ue(v) syntax elements
+  * \param ue
+  *    value to be mapped
+  * \param dummy
+  *    dummy parameter
+  * \param info
+  *    returns mapped value
+  * \param len
+  *    returns mapped value length
+  ************************************************************************
+  */
+ void ue_linfo(int ue, int dummy, int *len,int *info)
+ {
+   int i,nn;
+ 
+   nn=(ue+1)/2;
+ 
+   for (i=0; i < 16 && nn != 0; i++)
+   {
+     nn /= 2;
+   }
+   *len= 2*i + 1;
+   *info=ue+1-(int)pow(2,i);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    mapping for se(v) syntax elements
+  * \param se
+  *    value to be mapped
+  * \param dummy
+  *    dummy parameter
+  * \param len
+  *    returns mapped value length
+  * \param info
+  *    returns mapped value
+  ************************************************************************
+  */
+ void se_linfo(int se, int dummy, int *len,int *info)
+ {
+ 
+   int i,n,sign,nn;
+ 
+   sign=0;
+ 
+   if (se <= 0)
+   {
+     sign=1;
+   }
+   n=abs(se) << 1;
+ 
+   /*
+   n+1 is the number in the code table.  Based on this we find length and info
+   */
+ 
+   nn=n/2;
+   for (i=0; i < 16 && nn != 0; i++)
+   {
+     nn /= 2;
+   }
+   *len=i*2 + 1;
+   *info=n - (int)pow(2,i) + sign;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \par Input:
+  *    Number in the code table
+  * \par Output:
+  *    length and info
+  ************************************************************************
+  */
+ void cbp_linfo_intra(int cbp, int dummy, int *len,int *info)
+ {
+   extern const unsigned char NCBP[2][48][2];
+   ue_linfo(NCBP[img->yuv_format?1:0][cbp][0], dummy, len, info);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \par Input:
+  *    Number in the code table
+  * \par Output:
+  *    length and info
+  ************************************************************************
+  */
+ void cbp_linfo_inter(int cbp, int dummy, int *len,int *info)
+ {
+   extern const unsigned char NCBP[2][48][2];
+   ue_linfo(NCBP[img->yuv_format?1:0][cbp][1], dummy, len, info);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    2x2 transform of chroma DC
+  * \par Input:
+  *    level and run for coefficients
+  * \par Output:
+  *    length and info
+  * \note
+  *    see ITU document for bit assignment
+  ************************************************************************
+  */
+ void levrun_linfo_c2x2(int level,int run,int *len,int *info)
+ {
+   const int NTAB[2][2]=
+   {
+     {1,5},
+     {3,0}
+   };
+   const int LEVRUN[4]=
+   {
+     2,1,0,0
+   };
+ 
+   int levabs,i,n,sign,nn;
+ 
+   if (level == 0) //  check if the coefficient sign EOB (level=0)
+   {
+     *len=1;
+     return;
+   }
+   sign=0;
+   if (level <= 0)
+   {
+     sign=1;
+   }
+   levabs=abs(level);
+   if (levabs <= LEVRUN[run])
+   {
+     n=NTAB[levabs-1][run]+1;
+   }
+   else
+   {
+     n=(levabs-LEVRUN[run])*8 + run*2;
+   }
+ 
+   nn=n/2;
+ 
+   for (i=0; i < 16 && nn != 0; i++)
+   {
+     nn /= 2;
+   }
+   *len= 2*i + 1;
+   *info=n-(int)pow(2,i)+sign;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Single scan coefficients
+  * \par Input:
+  *    level and run for coefficients
+  * \par Output:
+  *    length and info
+  * \note
+  *    see ITU document for bit assignment
+  ************************************************************************
+  */
+ void levrun_linfo_inter(int level,int run,int *len,int *info)
+ {
+   const byte LEVRUN[16]=
+   {
+     4,2,2,1,1,1,1,1,1,1,0,0,0,0,0,0
+   };
+   const byte NTAB[4][10]=
+   {
+     { 1, 3, 5, 9,11,13,21,23,25,27},
+     { 7,17,19, 0, 0, 0, 0, 0, 0, 0},
+     {15, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+     {29, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+   };
+ 
+   int levabs,i,n,sign,nn;
+ 
+   if (level == 0)           //  check for EOB
+   {
+     *len=1;
+     return;
+   }
+ 
+   if (level <= 0)
+     sign=1;
+   else
+     sign=0;
+ 
+   levabs=abs(level);
+   if (levabs <= LEVRUN[run])
+   {
+     n=NTAB[levabs-1][run]+1;
+   }
+   else
+   {
+     n=(levabs-LEVRUN[run])*32 + run*2;
+   }
+ 
+   nn=n/2;
+ 
+   for (i=0; i < 16 && nn != 0; i++)
+   {
+     nn /= 2;
+   }
+   *len= 2*i + 1;
+   *info=n-(int)pow(2,i)+sign;
+ 
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Double scan coefficients
+  * \par Input:
+  *    level and run for coefficients
+  * \par Output:
+  *    length and info
+  * \note
+  *    see ITU document for bit assignment
+  ************************************************************************
+  */
+ void levrun_linfo_intra(int level,int run,int *len,int *info)
+ {
+   const byte LEVRUN[8]=
+   {
+     9,3,1,1,1,0,0,0
+   };
+ 
+   const byte NTAB[9][5] =
+   {
+     { 1, 3, 7,15,17},
+     { 5,19, 0, 0, 0},
+     { 9,21, 0, 0, 0},
+     {11, 0, 0, 0, 0},
+     {13, 0, 0, 0, 0},
+     {23, 0, 0, 0, 0},
+     {25, 0, 0, 0, 0},
+     {27, 0, 0, 0, 0},
+     {29, 0, 0, 0, 0},
+   };
+ 
+   int levabs,i,n,sign,nn;
+ 
+   if (level == 0)     //  check for EOB
+   {
+     *len=1;
+     return;
+   }
+   if (level <= 0)
+     sign=1;
+   else
+     sign=0;
+ 
+   levabs=abs(level);
+   if (levabs <= LEVRUN[run])
+   {
+     n=NTAB[levabs-1][run]+1;
+   }
+   else
+   {
+     n=(levabs-LEVRUN[run])*16 + 16 + run*2;
+   }
+ 
+   nn=n/2;
+ 
+   for (i=0; i < 16 && nn != 0; i++)
+   {
+     nn /= 2;
+   }
+   *len= 2*i + 1;
+   *info=n-(int)pow(2,i)+sign;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Makes code word and passes it back
+  *    A code word has the following format: 0 0 0 ... 1 Xn ...X2 X1 X0.
+  *
+  * \par Input:
+  *    Info   : Xn..X2 X1 X0                                             \n
+  *    Length : Total number of bits in the codeword
+  ************************************************************************
+  */
+  // NOTE this function is called with sym->inf > (1<<(sym->len/2)).  The upper bits of inf are junk
+ int symbol2uvlc(SyntaxElement *sym)
+ {
+   int suffix_len=sym->len/2;  
+   sym->bitpattern = (1<<suffix_len)|(sym->inf&((1<<suffix_len)-1));
+   return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    generates UVLC code and passes the codeword to the buffer
+  ************************************************************************
+  */
+ int writeSyntaxElement_UVLC(SyntaxElement *se, DataPartition *this_dataPart)
+ {
+   se->mapping(se->value1,se->value2,&(se->len),&(se->inf));
+   symbol2uvlc(se);
+ 
+   writeUVLC2buffer(se, this_dataPart->bitstream);
+ 
+   if(se->type != SE_HEADER)
+     this_dataPart->bitstream->write_flag = 1;
+ 
+ #if TRACE
+   if(se->type <= 1)
+     trace2out (se);
+ #endif
+ 
+   return (se->len);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    generates code and passes the codeword to the buffer
+  ************************************************************************
+  */
+ int writeSyntaxElement_Intra4x4PredictionMode(SyntaxElement *se, DataPartition *this_dataPart)
+ {
+ 
+   if (se->value1 == -1)
+   {
+     se->len = 1;
+     se->inf = 1;
+   }
+   else 
+   {
+     se->len = 4;  
+     se->inf = se->value1;
+   }
+ 
+   se->bitpattern = se->inf;
+   writeUVLC2buffer(se, this_dataPart->bitstream);
+ 
+   if(se->type != SE_HEADER)
+     this_dataPart->bitstream->write_flag = 1;
+ 
+ #if TRACE
+   if(se->type <= 1)
+     trace2out (se);
+ #endif
+ 
+   return (se->len);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    generates UVLC code and passes the codeword to the buffer
+  * \author
+  *  Tian Dong
+  ************************************************************************
+  */
+ int writeSyntaxElement2Buf_UVLC(SyntaxElement *se, Bitstream* this_streamBuffer )
+ {
+ 
+   se->mapping(se->value1,se->value2,&(se->len),&(se->inf));
+ 
+   symbol2uvlc(se);
+ 
+   writeUVLC2buffer(se, this_streamBuffer );
+ 
+ #if TRACE
+   if(se->type <= 1)
+     trace2out (se);
+ #endif
+ 
+   return (se->len);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    writes UVLC code to the appropriate buffer
+  ************************************************************************
+  */
+ void  writeUVLC2buffer(SyntaxElement *se, Bitstream *currStream)
+ {
+ 
+   int i;
+   unsigned int mask = 1 << (se->len-1);
+ 
+   // Add the new bits to the bitstream.
+   // Write out a byte if it is full
+   for (i=0; i<se->len; i++)
+   {
+     currStream->byte_buf <<= 1;
+     if (se->bitpattern & mask)
+       currStream->byte_buf |= 1;
+     currStream->bits_to_go--;
+     mask >>= 1;
+     if (currStream->bits_to_go==0)
+     {
+       currStream->bits_to_go = 8;
+       currStream->streamBuffer[currStream->byte_pos++]=currStream->byte_buf;
+       currStream->byte_buf = 0;
+     }
+   }
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    generates UVLC code and passes the codeword to the buffer
+  * \author
+  *  Tian Dong
+  ************************************************************************
+  */
+ int writeSyntaxElement2Buf_Fixed(SyntaxElement *se, Bitstream* this_streamBuffer )
+ {
+ 
+   writeUVLC2buffer(se, this_streamBuffer );
+ 
+ #if TRACE
+   if(se->type <= 1)
+     trace2out (se);
+ #endif
+   return (se->len);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Makes code word and passes it back
+  *
+  * \par Input:
+  *    Info   : Xn..X2 X1 X0                                             \n
+  *    Length : Total number of bits in the codeword
+  ************************************************************************
+  */
+ 
+ int symbol2vlc(SyntaxElement *sym)
+ {
+   int info_len = sym->len;
+ 
+   // Convert info into a bitpattern int
+   sym->bitpattern = 0;
+ 
+   // vlc coding
+   while(--info_len >= 0)
+   {
+     sym->bitpattern <<= 1;
+     sym->bitpattern |= (0x01 & (sym->inf >> info_len));
+   }
+   return 0;
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    generates VLC code and passes the codeword to the buffer
+  ************************************************************************
+  */
+ int writeSyntaxElement_VLC(SyntaxElement *se, DataPartition *this_dataPart)
+ {
+ 
+   se->inf = se->value1;
+   se->len = se->value2;
+   symbol2vlc(se);
+ 
+   writeUVLC2buffer(se, this_dataPart->bitstream);
+ #if TRACE
+   if (se->type <= 1)
+     trace2out (se);
+ #endif
+ 
+   return (se->len);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    write VLC for NumCoeff and TrailingOnes
+  ************************************************************************
+  */
+ 
+ int writeSyntaxElement_NumCoeffTrailingOnes(SyntaxElement *se, DataPartition *this_dataPart)
+ {
+   static const int lentab[3][4][17] = 
+   {
+     {   // 0702
+       { 1, 6, 8, 9,10,11,13,13,13,14,14,15,15,16,16,16,16},
+       { 0, 2, 6, 8, 9,10,11,13,13,14,14,15,15,15,16,16,16},
+       { 0, 0, 3, 7, 8, 9,10,11,13,13,14,14,15,15,16,16,16},
+       { 0, 0, 0, 5, 6, 7, 8, 9,10,11,13,14,14,15,15,16,16},
+     },                                                 
+     {                                                  
+       { 2, 6, 6, 7, 8, 8, 9,11,11,12,12,12,13,13,13,14,14},
+       { 0, 2, 5, 6, 6, 7, 8, 9,11,11,12,12,13,13,14,14,14},
+       { 0, 0, 3, 6, 6, 7, 8, 9,11,11,12,12,13,13,13,14,14},
+       { 0, 0, 0, 4, 4, 5, 6, 6, 7, 9,11,11,12,13,13,13,14},
+     },                                                 
+     {                                                  
+       { 4, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9,10,10,10,10},
+       { 0, 4, 5, 5, 5, 5, 6, 6, 7, 8, 8, 9, 9, 9,10,10,10},
+       { 0, 0, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,10,10,10},
+       { 0, 0, 0, 4, 4, 4, 4, 4, 5, 6, 7, 8, 8, 9,10,10,10},
+     },
+ 
+   };
+ 
+   static const int codtab[3][4][17] = 
+   {
+     {
+       { 1, 5, 7, 7, 7, 7,15,11, 8,15,11,15,11,15,11, 7,4}, 
+       { 0, 1, 4, 6, 6, 6, 6,14,10,14,10,14,10, 1,14,10,6}, 
+       { 0, 0, 1, 5, 5, 5, 5, 5,13, 9,13, 9,13, 9,13, 9,5}, 
+       { 0, 0, 0, 3, 3, 4, 4, 4, 4, 4,12,12, 8,12, 8,12,8},
+     },
+     {
+       { 3,11, 7, 7, 7, 4, 7,15,11,15,11, 8,15,11, 7, 9,7}, 
+       { 0, 2, 7,10, 6, 6, 6, 6,14,10,14,10,14,10,11, 8,6}, 
+       { 0, 0, 3, 9, 5, 5, 5, 5,13, 9,13, 9,13, 9, 6,10,5}, 
+       { 0, 0, 0, 5, 4, 6, 8, 4, 4, 4,12, 8,12,12, 8, 1,4},
+     },
+     {
+       {15,15,11, 8,15,11, 9, 8,15,11,15,11, 8,13, 9, 5,1}, 
+       { 0,14,15,12,10, 8,14,10,14,14,10,14,10, 7,12, 8,4},
+       { 0, 0,13,14,11, 9,13, 9,13,10,13, 9,13, 9,11, 7,3},
+       { 0, 0, 0,12,11,10, 9, 8,13,12,12,12, 8,12,10, 6,2},
+     },
+   };
+   int vlcnum;
+ 
+   vlcnum = se->len;
+ 
+   // se->value1 : numcoeff
+   // se->value2 : numtrailingones
+ 
+   if (vlcnum == 3)
+   {
+     se->len = 6;  // 4 + 2 bit FLC
+     if (se->value1 > 0)
+     {
+       se->inf = ((se->value1-1) << 2) | se->value2;
+     }
+     else
+     {
+       se->inf = 3;
+     }
+   }
+   else
+   {
+     se->len = lentab[vlcnum][se->value2][se->value1];
+     se->inf = codtab[vlcnum][se->value2][se->value1];
+   }
+   //se->inf = 0;
+ 
+   if (se->len == 0)
+   {
+     printf("ERROR: (numcoeff,trailingones) not valid: vlc=%d (%d, %d)\n", 
+       vlcnum, se->value1, se->value2);
+     exit(-1);
+   }
+ 
+   symbol2vlc(se);
+ 
+   writeUVLC2buffer(se, this_dataPart->bitstream);
+ #if TRACE
+   if (se->type <= 1)
+     trace2out (se);
+ #endif
+ 
+   return (se->len);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    write VLC for NumCoeff and TrailingOnes for Chroma DC
+  ************************************************************************
+  */
+ int writeSyntaxElement_NumCoeffTrailingOnesChromaDC(SyntaxElement *se, DataPartition *this_dataPart)
+ {
+   static const int lentab[3][4][17] = 
+   {
+     //YUV420
+    {{ 2, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+     { 0, 1, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
+     { 0, 0, 3, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
+     { 0, 0, 0, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
+     //YUV422
+    {{ 1, 7, 7, 9, 9,10,11,12,13, 0, 0, 0, 0, 0, 0, 0, 0},
+     { 0, 2, 7, 7, 9,10,11,12,12, 0, 0, 0, 0, 0, 0, 0, 0}, 
+     { 0, 0, 3, 7, 7, 9,10,11,12, 0, 0, 0, 0, 0, 0, 0, 0}, 
+     { 0, 0, 0, 5, 6, 7, 7,10,11, 0, 0, 0, 0, 0, 0, 0, 0}},
+     //YUV444
+    {{ 1, 6, 8, 9,10,11,13,13,13,14,14,15,15,16,16,16,16},
+     { 0, 2, 6, 8, 9,10,11,13,13,14,14,15,15,15,16,16,16},
+     { 0, 0, 3, 7, 8, 9,10,11,13,13,14,14,15,15,16,16,16},
+     { 0, 0, 0, 5, 6, 7, 8, 9,10,11,13,14,14,15,15,16,16}}
+   };
+ 
+   static const int codtab[3][4][17] = 
+   {
+     //YUV420
+    {{ 1, 7, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+     { 0, 1, 6, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+     { 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+     { 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
+     //YUV422
+    {{ 1,15,14, 7, 6, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0},
+     { 0, 1,13,12, 5, 6, 6, 6, 5, 0, 0, 0, 0, 0, 0, 0, 0},
+     { 0, 0, 1,11,10, 4, 5, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0},
+     { 0, 0, 0, 1, 1, 9, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0}},
+     //YUV444
+    {{ 1, 5, 7, 7, 7, 7,15,11, 8,15,11,15,11,15,11, 7, 4}, 
+     { 0, 1, 4, 6, 6, 6, 6,14,10,14,10,14,10, 1,14,10, 6}, 
+     { 0, 0, 1, 5, 5, 5, 5, 5,13, 9,13, 9,13, 9,13, 9, 5}, 
+     { 0, 0, 0, 3, 3, 4, 4, 4, 4, 4,12,12, 8,12, 8,12, 8}}
+   
+   };
+   int yuv = img->yuv_format - 1;
+ 
+   // se->value1 : numcoeff
+   // se->value2 : numtrailingones
+   se->len = lentab[yuv][se->value2][se->value1];
+   se->inf = codtab[yuv][se->value2][se->value1];
+ 
+   if (se->len == 0)
+   {
+     printf("ERROR: (numcoeff,trailingones) not valid: (%d, %d)\n", 
+       se->value1, se->value2);
+     exit(-1);
+   }
+ 
+   symbol2vlc(se);
+ 
+   writeUVLC2buffer(se, this_dataPart->bitstream);
+ #if TRACE
+   if (se->type <= 1)
+     trace2out (se);
+ #endif
+ 
+   return (se->len);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    write VLC for TotalZeros
+  ************************************************************************
+  */
+ int writeSyntaxElement_TotalZeros(SyntaxElement *se, DataPartition *this_dataPart)
+ {
+   static const int lentab[TOTRUN_NUM][16] = 
+   {
+     { 1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},  
+     { 3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},  
+     { 4,3,3,3,4,4,3,3,4,5,5,6,5,6},  
+     { 5,3,4,4,3,3,3,4,3,4,5,5,5},  
+     { 4,4,4,3,3,3,3,3,4,5,4,5},  
+     { 6,5,3,3,3,3,3,3,4,3,6},  
+     { 6,5,3,3,3,2,3,4,3,6},  
+     { 6,4,5,3,2,2,3,3,6},  
+     { 6,6,4,2,2,3,2,5},  
+     { 5,5,3,2,2,2,4},  
+     { 4,4,3,3,1,3},  
+     { 4,4,2,1,3},  
+     { 3,3,1,2},  
+     { 2,2,1},  
+     { 1,1},  
+   };
+ 
+   static const int codtab[TOTRUN_NUM][16] = 
+   {
+     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
+     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
+     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
+     {3,7,5,4,6,5,4,3,3,2,2,1,0},
+     {5,4,3,7,6,5,4,3,2,1,1,0},
+     {1,1,7,6,5,4,3,2,1,1,0},
+     {1,1,5,4,3,3,2,1,1,0},
+     {1,1,1,3,3,2,2,1,0},
+     {1,0,1,3,2,1,1,1,},
+     {1,0,1,3,2,1,1,},
+     {0,1,1,2,1,3},
+     {0,1,1,1,1},
+     {0,1,1,1},
+     {0,1,1},
+     {0,1},  
+   };
+   int vlcnum;
+ 
+   vlcnum = se->len;
+ 
+   // se->value1 : TotalZeros
+   se->len = lentab[vlcnum][se->value1];
+   se->inf = codtab[vlcnum][se->value1];
+ 
+   if (se->len == 0)
+   {
+     printf("ERROR: (TotalZeros) not valid: (%d)\n",se->value1);
+     exit(-1);
+   }
+ 
+   symbol2vlc(se);
+ 
+   writeUVLC2buffer(se, this_dataPart->bitstream);
+ #if TRACE
+   if (se->type <= 1)
+     trace2out (se);
+ #endif
+ 
+   return (se->len);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    write VLC for TotalZeros for Chroma DC
+  ************************************************************************
+  */
+ int writeSyntaxElement_TotalZerosChromaDC(SyntaxElement *se, DataPartition *this_dataPart)
+ {
+   static const int lentab[3][TOTRUN_NUM][16] = 
+   {
+     //YUV420
+    {{ 1,2,3,3},
+     { 1,2,2},
+     { 1,1}},
+     //YUV422
+    {{ 1,3,3,4,4,4,5,5},
+     { 3,2,3,3,3,3,3},
+     { 3,3,2,2,3,3},
+     { 3,2,2,2,3},
+     { 2,2,2,2},
+     { 2,2,1},   
+     { 1,1}},
+     //YUV444
+    {{ 1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},  
+     { 3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},  
+     { 4,3,3,3,4,4,3,3,4,5,5,6,5,6},  
+     { 5,3,4,4,3,3,3,4,3,4,5,5,5},  
+     { 4,4,4,3,3,3,3,3,4,5,4,5},  
+     { 6,5,3,3,3,3,3,3,4,3,6},  
+     { 6,5,3,3,3,2,3,4,3,6},  
+     { 6,4,5,3,2,2,3,3,6},  
+     { 6,6,4,2,2,3,2,5},  
+     { 5,5,3,2,2,2,4},  
+     { 4,4,3,3,1,3},  
+     { 4,4,2,1,3},  
+     { 3,3,1,2},  
+     { 2,2,1},  
+     { 1,1}}  
+   };
+ 
+   static const int codtab[3][TOTRUN_NUM][16] = 
+   {
+     //YUV420
+    {{ 1,1,1,0},
+     { 1,1,0},
+     { 1,0}},
+     //YUV422
+    {{ 1,2,3,2,3,1,1,0},
+     { 0,1,1,4,5,6,7},
+     { 0,1,1,2,6,7},
+     { 6,0,1,2,7},
+     { 0,1,2,3},
+     { 0,1,1},   
+     { 0,1}},
+     //YUV444
+    {{1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
+     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
+     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
+     {3,7,5,4,6,5,4,3,3,2,2,1,0},
+     {5,4,3,7,6,5,4,3,2,1,1,0},
+     {1,1,7,6,5,4,3,2,1,1,0},
+     {1,1,5,4,3,3,2,1,1,0},
+     {1,1,1,3,3,2,2,1,0},
+     {1,0,1,3,2,1,1,1,},
+     {1,0,1,3,2,1,1,},
+     {0,1,1,2,1,3},
+     {0,1,1,1,1},
+     {0,1,1,1},
+     {0,1,1},
+     {0,1}}  
+   };
+   int vlcnum;
+   int yuv = img->yuv_format - 1;
+   
+   vlcnum = se->len;
+ 
+   // se->value1 : TotalZeros
+   se->len = lentab[yuv][vlcnum][se->value1];
+   se->inf = codtab[yuv][vlcnum][se->value1];
+ 
+   if (se->len == 0)
+   {
+     printf("ERROR: (TotalZeros) not valid: (%d)\n",se->value1);
+     exit(-1);
+   }
+ 
+   symbol2vlc(se);
+ 
+   writeUVLC2buffer(se, this_dataPart->bitstream);
+ #if TRACE
+   if (se->type <= 1)
+     trace2out (se);
+ #endif
+ 
+   return (se->len);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    write VLC for Run Before Next Coefficient, VLC0
+  ************************************************************************
+  */
+ int writeSyntaxElement_Run(SyntaxElement *se, DataPartition *this_dataPart)
+ {
+   static const int lentab[TOTRUN_NUM][16] = 
+   {
+     {1,1},
+     {1,2,2},
+     {2,2,2,2},
+     {2,2,2,3,3},
+     {2,2,3,3,3,3},
+     {2,3,3,3,3,3,3},
+     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
+   };
+ 
+   static const int codtab[TOTRUN_NUM][16] = 
+   {
+     {1,0},
+     {1,1,0},
+     {3,2,1,0},
+     {3,2,1,1,0},
+     {3,2,3,2,1,0},
+     {3,0,1,3,2,5,4},
+     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
+   };
+   int vlcnum;
+ 
+   vlcnum = se->len;
+ 
+   // se->value1 : run
+   se->len = lentab[vlcnum][se->value1];
+   se->inf = codtab[vlcnum][se->value1];
+ 
+   if (se->len == 0)
+   {
+     printf("ERROR: (run) not valid: (%d)\n",se->value1);
+     exit(-1);
+   }
+ 
+   symbol2vlc(se);
+ 
+   writeUVLC2buffer(se, this_dataPart->bitstream);
+ #if TRACE
+   if (se->type <= 1)
+     trace2out (se);
+ #endif
+ 
+   return (se->len);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    write VLC for Coeff Level (VLC1)
+  ************************************************************************
+  */
+ int writeSyntaxElement_Level_VLC1(SyntaxElement *se, DataPartition *this_dataPart)
+ {
+   int level, levabs, sign;
+ 
+   level = se->value1;
+   levabs = abs(level);
+   sign = (level < 0 ? 1 : 0);
+ 
+   
+   if (levabs < 8)
+   {
+     se->len = levabs * 2 + sign - 1;
+     se->inf = 1;
+   }
+   else if (levabs < 8+8)
+   {
+     // escape code1
+     se->len = 14 + 1 + 4;
+     se->inf = (1 << 4) | ((levabs - 8) << 1) | sign;
+   }
+   else
+   {
+     // escape code2
+     se->len = 14 + 2 + 12;
+     se->inf = (0x1 << 12) | ((levabs - 16)<< 1) | sign;
+   }
+ 
+ 
+   symbol2vlc(se);
+ 
+   writeUVLC2buffer(se, this_dataPart->bitstream);
+ #if TRACE
+   if (se->type <= 1)
+     trace2out (se);
+ #endif
+ 
+   return (se->len);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    write VLC for Coeff Level
+  ************************************************************************
+  */
+ int writeSyntaxElement_Level_VLCN(SyntaxElement *se, int vlc, DataPartition *this_dataPart)
+ {
+   int iCodeword;
+   int iLength;
+ 
+   int level = se->value1;
+ 
+   int levabs = abs(level);
+   int sign = (level < 0 ? 1 : 0);  
+ 
+   int shift = vlc-1;
+   int escape = (15<<shift)+1;
+ 
+   int numPrefix = (levabs-1)>>shift;
+ 
+   int sufmask = ~((0xffffffff)<<shift);
+   int suffix = (levabs-1)&sufmask;
+ 
+   if (levabs < escape)
+   {
+     iLength = numPrefix + vlc + 1;
+     iCodeword = (1<<(shift+1))|(suffix<<1)|sign;
+   }
+   else
+   {
+     iLength = 28;
+     iCodeword = (1<<12)|((levabs-escape)<<1)|sign;
+   }
+   se->len = iLength;
+   se->inf = iCodeword;
+ 
+   symbol2vlc(se);
+ 
+   writeUVLC2buffer(se, this_dataPart->bitstream);
+ #if TRACE
+   if (se->type <= 1)
+     trace2out (se);
+ #endif
+ 
+   return (se->len);
+ }
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    Write out a trace string on the trace file
+  ************************************************************************
+  */
+ #if TRACE
+ void trace2out(SyntaxElement *sym)
+ {
+   static int bitcounter = 0;
+   int i, chars;
+ 
+   if (p_trace != NULL)
+   {
+     putc('@', p_trace);
+     chars = fprintf(p_trace, "%i", bitcounter);
+     while(chars++ < 6)
+       putc(' ',p_trace);
+ 
+     chars += fprintf(p_trace, "%s", sym->tracestring);
+     while(chars++ < 55)
+       putc(' ',p_trace);
+ 
+     // align bit pattern
+     if(sym->len<15)
+     {
+       for(i=0 ; i<15-sym->len ; i++)
+         fputc(' ', p_trace);
+     }
+     
+     // print bit pattern
+     bitcounter += sym->len;
+     for(i=1 ; i<=sym->len ; i++)
+     {
+       if((sym->bitpattern >> (sym->len-i)) & 0x1)
+         fputc('1', p_trace);
+       else
+         fputc('0', p_trace);
+     }
+     fprintf(p_trace, " (%3d) \n",sym->value1);
+   }
+   fflush (p_trace);
+ }
+ #endif
+ 
+ 
+ /*!
+  ************************************************************************
+  * \brief
+  *    puts the less than 8 bits in the byte buffer of the Bitstream into
+  *    the streamBuffer.  
+  *
+  * \param
+  *   currStream: the Bitstream the alignment should be established
+  *
+  ************************************************************************
+  */
+ void writeVlcByteAlign(Bitstream* currStream)
+ {
+   if (currStream->bits_to_go < 8)
+   { // trailing bits to process
+     currStream->byte_buf = (currStream->byte_buf <<currStream->bits_to_go) | (0xff >> (8 - currStream->bits_to_go));
+     stats->bit_use_stuffingBits[img->type]+=currStream->bits_to_go;
+     currStream->streamBuffer[currStream->byte_pos++]=currStream->byte_buf;
+     currStream->bits_to_go = 8;
+   }
+ }
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/vlc.h
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/vlc.h:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/vlc.h	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,52 ----
+ 
+ /*!
+  *************************************************************************************
+  * \file vlc.h
+  *
+  * \brief
+  *    Prototypes for VLC coding funtions
+  * \author
+  *     Karsten Suehring
+  *************************************************************************************
+  */
+ 
+ #ifndef _VLC_H_
+ #define _VLC_H_
+ 
+ int u_1  (char *tracestring, int value, Bitstream *bitstream);
+ int se_v (char *tracestring, int value, Bitstream *bitstream);
+ int ue_v (char *tracestring, int value, Bitstream *bitstream);
+ int u_v  (int n, char *tracestring, int value, Bitstream *bitstream);
+ 
+ 
+ void levrun_linfo_c2x2(int level,int run,int *len,int *info);
+ void levrun_linfo_intra(int level,int run,int *len,int *info);
+ void levrun_linfo_inter(int level,int run,int *len,int *info);
+ 
+ int   writeSyntaxElement_UVLC(SyntaxElement *se, DataPartition *this_dataPart);
+ 
+ int   writeSyntaxElement2Buf_UVLC(SyntaxElement *se, Bitstream* this_streamBuffer );
+ void  writeUVLC2buffer(SyntaxElement *se, Bitstream *currStream);
+ int   writeSyntaxElement2Buf_Fixed(SyntaxElement *se, Bitstream* this_streamBuffer );
+ int   symbol2uvlc(SyntaxElement *se);
+ void  ue_linfo(int n, int dummy, int *len,int *info);
+ void  se_linfo(int mvd, int dummy, int *len,int *info);
+ void  cbp_linfo_intra(int cbp, int dummy, int *len,int *info);
+ void  cbp_linfo_inter(int cbp, int dummy, int *len,int *info);
+ 
+ // CAVLC
+ void  CAVLC_init();
+ int writeCoeff4x4_CAVLC (int block_type, int b8, int b4, int param);
+ 
+ int   writeSyntaxElement_VLC(SyntaxElement *se, DataPartition *this_dataPart);
+ int   writeSyntaxElement_TotalZeros(SyntaxElement *se, DataPartition *this_dataPart);
+ int   writeSyntaxElement_TotalZerosChromaDC(SyntaxElement *se, DataPartition *this_dataPart);
+ int   writeSyntaxElement_Run(SyntaxElement *se, DataPartition *this_dataPart);
+ int   writeSyntaxElement_NumCoeffTrailingOnes(SyntaxElement *se, DataPartition *this_dataPart);
+ int   writeSyntaxElement_NumCoeffTrailingOnesChromaDC(SyntaxElement *se, DataPartition *this_dataPart);
+ int   writeSyntaxElement_Level_VLC1(SyntaxElement *se, DataPartition *this_dataPart);
+ int   writeSyntaxElement_Level_VLCN(SyntaxElement *se, int vlc, DataPartition *this_dataPart);
+ int   writeSyntaxElement_Intra4x4PredictionMode(SyntaxElement *se, DataPartition *this_dataPart);
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Applications/JM/lencod/weighted_prediction.c
diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/weighted_prediction.c:1.1
*** /dev/null	Sat Feb 11 04:33:42 2006
--- llvm-test/MultiSource/Applications/JM/lencod/weighted_prediction.c	Sat Feb 11 04:33:22 2006
***************
*** 0 ****
--- 1,764 ----
+ 
+ /*!
+ *************************************************************************************
+ * \file weighted_prediction.c
+ *
+ * \brief
+ *    Estimate weights for WP
+ *
+ * \author
+ *    Main contributors (see contributors.h for copyright, address and affiliation details)
+ *     - Alexis Michael Tourapis         <alexismt at ieee.org>
+ *************************************************************************************
+ */
+ #include <stdlib.h>
+ #include "contributors.h"
+ 
+ #include "global.h"
+ #include <memory.h>
+ #include "image.h"
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Estimates reference picture weighting factors
+ ************************************************************************
+ */
+ 
+ void estimate_weighting_factor_P_slice(int select_offset)
+ {
+   int i, j, n;
+   
+   double dc_org = 0.0;
+   int index;
+   int comp;
+   double dc_ref[MAX_REFERENCE_PICTURES];
+ 
+   pel_t*  ref_pic;   
+   pel_t*  ref_pic_w;   
+   int default_weight[3];
+ 
+   int list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))? (img->current_mb_nr & 0x01) ? 4 : 2 : 0;
+   int weight[2][MAX_REFERENCE_PICTURES][3]; 
+   int offset[2][MAX_REFERENCE_PICTURES][3];       
+   int clist;
+   
+   
+   
+   luma_log_weight_denom = 5;
+   chroma_log_weight_denom = 5;
+   wp_luma_round = 1 << (luma_log_weight_denom - 1);
+   wp_chroma_round = 1 << (chroma_log_weight_denom - 1);
+   default_weight[0] = 1<<luma_log_weight_denom;
+   default_weight[1] = default_weight[2] = 1<<chroma_log_weight_denom;
+   
+   /* set all values to defaults */
+   for (i = 0; i < 2 + list_offset; i++)
+   {
+     for (j = 0; j < listXsize[i]; j++)
+     {
+       for (n = 0; n < 3; n++)
+       {
+         weight[i][j][n] = default_weight[n];
+         wp_weight[i][j][n] = default_weight[n];
+         wp_offset[i][j][n] = 0;
+         offset[i][j][n] = 0;
+       }
+     }
+   }
+   
+   for (i = 0; i < img->height; i++)
+   {
+     for (j = 0; j < img->width; j++)
+     {
+       dc_org += (double) imgY_org[i][j];
+     }
+   } 
+     
+   for (clist=0; clist<2 + list_offset; clist++)
+   {
+     for (n = 0; n < listXsize[clist]; n++)
+     {
+       dc_ref[n] = 0.0;
+       
+       ref_pic       = listX[clist][n]->imgY_11;
+       ref_pic_w     = listX[clist][n]->imgY_11_w;
+       
+       // Y
+       for (i = 0; i < img->height * img->width; i++)
+       {
+         dc_ref[n] += (double) ref_pic[i];
+       }
+       
+       if (select_offset==0.0)
+       {
+         if (dc_ref[n] != 0)
+           weight[clist][n][0] = (int) (default_weight[0] * dc_org / dc_ref[n] + 0.5);
+         else
+           weight[clist][n][0] = default_weight[0];  // only used when reference picture is black
+         if (weight[clist][n][0] < -64 || weight[clist][n][0] >127)
+           weight[clist][n][0] = default_weight[0];
+       }
+       else
+       {        
+         offset[clist][n][0] = (int) ((dc_org-dc_ref[n])/(img->height*img->width)+0.5);
+         offset[clist][n][0] = (offset[clist][n][0]<-128) ? -128: (offset[clist][n][0]>127) ? 127:offset[clist][n][0];
+         weight[clist][n][0] = default_weight[0];
+       }
+       
+       
+       /* for now always use default weight for chroma weight */
+       weight[clist][n][1] = default_weight[1];
+       weight[clist][n][2] = default_weight[2];
+       
+       
+       
+       /* store weighted reference pic for motion estimation */
+       for (i = 0; i < img->height * img->width; i++)
+       {          
+         ref_pic_w[i] = Clip3 (0, img->max_imgpel_value , (((int) ref_pic[i] * weight[clist][n][0] + wp_luma_round) >> luma_log_weight_denom) + offset[clist][n][0]);
+       }
+       for (i = 0; i < 4*(img->height + 2*IMG_PAD_SIZE) ; i++)
+       {
+         for (j = 0; j< 4*(img->width + 2*IMG_PAD_SIZE); j++)
+         {
+           listX[LIST_0][n]->imgY_ups_w[i][j] =   Clip3 (0, img->max_imgpel_value, (((int) listX[LIST_0 ][n]->imgY_ups[i][j] * weight[clist][n][0] + wp_luma_round) >> luma_log_weight_denom) + offset[clist][n][0]);
+         }
+       }
+     }
+   }
+   
+   for (clist=0; clist<2 + list_offset; clist++)
+   {
+     for (index = 0; index < listXsize[clist]; index++)
+     {
+       for (comp=0; comp < 3; comp ++)
+       {
+         wp_weight[clist][index][comp] = weight[clist][index][comp];
+         wp_offset[clist][index][comp] = offset[clist][index][comp];
+         // printf("index %d component %d weight %d offset %d\n",index,comp,weight[0][index][comp],offset[0][index][comp]);
+       }
+     }
+   }
+   
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    Estimates reference picture weighting factors
+ ************************************************************************
+ */
+ void estimate_weighting_factor_B_slice()
+ {
+   int i, j, k, n;
+   
+   int tx,DistScaleFactor;
+   double dc_org = 0.0;
+   int index;
+   int comp;
+   double dc_ref[6][MAX_REFERENCE_PICTURES];
+   
+   int log_weight_denom;
+   
+   pel_t*  ref_pic;   
+   pel_t*  ref_pic_w;   
+   pel_t**  ref_qpic;   
+   pel_t**  ref_qpic_w;   
+ 
+   int default_weight[3];
+   int list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))? (img->current_mb_nr & 0x01) ? 4 : 2 : 0;
+   int weight[6][MAX_REFERENCE_PICTURES][3]; 
+   int offset[6][MAX_REFERENCE_PICTURES][3];       
+   int im_weight[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES][3]; 
+   int im_offset[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES][3]; 
+   int clist;
+   int wf_weight, wf_offset;
+   
+   if (active_pps->weighted_bipred_idc == 2) //! implicit mode
+   {
+     luma_log_weight_denom = 5;
+     chroma_log_weight_denom = 5;
+   }
+   else
+   {
+     luma_log_weight_denom = 6;
+     chroma_log_weight_denom = 6;
+   }
+ 
+   wp_luma_round = 1 << (luma_log_weight_denom - 1);
+   wp_chroma_round = 1 << (chroma_log_weight_denom - 1);
+   default_weight[0] = 1<<luma_log_weight_denom;
+   default_weight[1] = 1<<chroma_log_weight_denom;
+   default_weight[2] = 1<<chroma_log_weight_denom;
+   
+   /* set all values to defaults */
+   for (i = 0; i < 2 + list_offset; i++)
+   {
+     for (j = 0; j < listXsize[i]; j++)
+     {
+       for (n = 0; n < 3; n++)
+       {
+         wp_weight[i][j][n] = default_weight[n];
+         wp_offset[i][j][n] = 0;
+         offset   [i][j][n] = 0;
+         weight   [i][j][n] = default_weight[n];
+       }
+     }
+   }
+   
+   for (i = 0; i < listXsize[LIST_0]; i++)
+   {
+     for (j = 0; j < listXsize[LIST_1]; j++)
+     {
+       int td, tb;
+       td = Clip3(-128,127,(listX[LIST_1][j]->poc - listX[LIST_0][i]->poc));
+       tb = Clip3(-128,127,(enc_picture->poc - listX[LIST_0][i]->poc));
+       for (comp = 0; comp < 3; comp++)
+       {
+         // implicit weights          
+         if (td == 0)
+         {
+           im_weight[1][i][j][comp] = default_weight[comp];
+           im_weight[0][i][j][comp] = default_weight[comp];
+           im_offset[1][i][j][comp] = 0;
+           im_offset[0][i][j][comp] = 0;
+         }
+         else
+         {            
+           tx = (16384 + absm(td/2))/td;
+           DistScaleFactor = Clip3(-1024, 1023, (tx*tb + 32 )>>6);
+           im_weight[1][i][j][comp] = DistScaleFactor>>2;
+           if (im_weight[1][i][j][comp] < -64 || im_weight[1][i][j][comp] >128)
+             im_weight[1][i][j][comp] = default_weight[comp];
+           im_weight[0][i][j][comp] = 64 - im_weight[1][i][j][comp];            
+           im_offset[1][i][j][comp] = 0;
+           im_offset[0][i][j][comp] = 0;
+         }        
+       }
+       /*
+       printf ("%d imp weight[%d][%d] = %d  , %d (%d %d %d) (%d %d) (%d %d)\n",enc_picture->poc, i, j,  im_weight[0][i][j][0], im_weight[1][i][j][0],
+         enc_picture->poc,listX[LIST_0][i]->poc, listX[LIST_1][j]->poc,
+         DistScaleFactor ,tx,td,tb);
+       */
+     }
+   }
+   
+   
+   if (active_pps->weighted_bipred_idc == 2) //! implicit mode
+   {
+     
+     for (i = 0; i < listXsize[LIST_0]; i++)
+     {
+       for (j = 0; j < listXsize[LIST_1]; j++)
+       {
+         for (comp = 0; comp < 3; comp++)
+         {
+           log_weight_denom = (comp == 0) ? luma_log_weight_denom : chroma_log_weight_denom;         
+           wbp_weight[1][i][j][comp] = im_weight[1][i][j][comp] ;
+           wbp_weight[0][i][j][comp] = im_weight[0][i][j][comp];
+         }
+       }
+     }
+     
+     for (clist=0; clist<2 + list_offset; clist++)
+     {
+       for (index = 0; index < listXsize[clist]; index++)
+       {
+         wp_weight[clist][index][0] = default_weight[0];
+         wp_weight[clist][index][1] = default_weight[1];
+         wp_weight[clist][index][2] = default_weight[2];
+         wp_offset[clist][index][0] = 0;
+         wp_offset[clist][index][1] = 0;
+         wp_offset[clist][index][2] = 0;
+       }
+     }
+         
+     for (k= 0; k<2; k++) 
+     {
+       for (i = 0; i < listXsize[k]; i++)
+       {
+         memcpy(listX[k][i]->imgY_11_w, listX[k][i]->imgY_11, img->height * img->width * sizeof(imgpel));
+         memcpy(&listX[k][i]->imgY_ups_w[0][0], &listX[k][i]->imgY_ups[0][0], 
+           16 * (img->height + 2*IMG_PAD_SIZE) * (img->width + 2*IMG_PAD_SIZE) * sizeof(imgpel));
+       }
+     }
+   }
+   else
+   {
+     for (i = 0; i < img->height; i++)
+     {
+       for (j = 0; j < img->width; j++)
+       {
+         dc_org += (double) imgY_org[i][j];
+       }
+     }
+     
+     for (clist=0; clist<2 + list_offset; clist++)
+     {
+       for (n = 0; n < listXsize[clist]; n++)
+       {
+         dc_ref[clist][n] = 0;
+         
+         ref_pic       = listX[clist][n]->imgY_11;
+         ref_pic_w     = listX[clist][n]->imgY_11_w;
+         ref_qpic      = listX[clist][n]->imgY_ups;
+         ref_qpic_w    = listX[clist][n]->imgY_ups_w;
+         // Y
+         for (i = 0; i < img->height * img->width; i++)
+         {
+           dc_ref[clist][n] += (double) ref_pic[i];
+         }
+         if (dc_ref[clist][n] != 0.0)
+           wf_weight = (int) (default_weight[0] * dc_org / dc_ref[clist][n] + 0.5);
+         else
+           wf_weight = default_weight[0];  // only used when reference picture is black
+         
+         if ( (wf_weight<-64) || (wf_weight>127) )
+         {
+           wf_weight = default_weight[0];
+         }
+         wf_offset = 0;
+         
+         //    printf("dc_org = %d, dc_ref = %d, weight[%d] = %d\n",dc_org, dc_ref[n],n,weight[n][0]);        
+         
+         weight[clist][n][0] = wf_weight;
+         weight[clist][n][1] = default_weight[1];
+         weight[clist][n][2] = default_weight[2];
+         offset[clist][n][0] = 0;
+         offset[clist][n][1] = 0;
+         offset[clist][n][2] = 0;
+         
+         /* store weighted reference pic for motion estimation */
+         for (i = 0; i < img->height * img->width; i++)
+         {
+           ref_pic_w[i] = Clip3 (0, img->max_imgpel_value, (((int) ref_pic[i] * wf_weight + wp_luma_round) >> luma_log_weight_denom) + wf_offset);
+         }
+         for (i = 0; i < 4*(img->height + 2*IMG_PAD_SIZE) ; i++)
+         {
+           for (j = 0; j< 4*(img->width + 2*IMG_PAD_SIZE); j++)
+           {
+             ref_qpic_w[i][j] =   Clip3 (0, img->max_imgpel_value, (((int) ref_qpic[i][j] * wf_weight + wp_luma_round) >> (luma_log_weight_denom)) + wf_offset );      	
+           }
+         }
+       }
+     }
+     
+     if (active_pps->weighted_bipred_idc == 1)
+     {
+       for (clist=0; clist<2 + list_offset; clist++)
+       {
+         for (index = 0; index < listXsize[clist]; index++)
+         {
+           for (comp = 0; comp < 3; comp++)
+           {
+             wp_weight[clist][index][comp] = weight[clist][index][comp];
+             wp_offset[clist][index][comp] = offset[clist][index][comp];
+             //printf("%d %d\n",wp_weight[clist][index][comp],wp_offset[clist][index][comp]);
+           }
+         }
+       }
+     }
+     else
+     {    
+       for (clist=0; clist<2 + list_offset; clist++)
+       {
+         for (index = 0; index < listXsize[clist]; index++)
+         {
+           wp_weight[clist][index][0] = default_weight[0];
+           wp_weight[clist][index][1] = default_weight[1];
+           wp_weight[clist][index][2] = default_weight[2];
+           wp_offset[clist][index][0] = 0;
+           wp_offset[clist][index][1] = 0;
+           wp_offset[clist][index][2] = 0;
+         }
+       }
+     }
+     for (i = 0; i < listXsize[LIST_0]; i++)
+     {
+       for (j = 0; j < listXsize[LIST_1]; j++)
+       {
+         for (comp = 0; comp < 3; comp++)
+         {
+           log_weight_denom = (comp == 0) ? luma_log_weight_denom : chroma_log_weight_denom;
+           wbp_weight[0][i][j][comp] = wp_weight[0][i][comp];
+           wbp_weight[1][i][j][comp] = wp_weight[1][j][comp];
+         }
+         /*
+         printf ("bpw weight[%d][%d] = %d  , %d (%d %d %d) (%d %d) (%d %d)\n", i, j, wbp_weight[0][i][j][0], wbp_weight[1][i][j][0],
+           enc_picture->poc,listX[LIST_0][i]->poc, listX[LIST_1][j]->poc,
+           DistScaleFactor ,tx,tx,tx);
+         */
+       }
+     }    
+   }
+ }
+     
+     
+ /*!
+ ************************************************************************
+ * \brief
+ *    Tests P slice weighting factors to perform or not WP RD decision
+ ************************************************************************
+ */
+ 
+ int test_wp_P_slice(int select_offset)
+ {
+   int i, j, n;
+   
+   double dc_org = 0.0;
+   int index;
+   int comp;
+   double dc_ref[MAX_REFERENCE_PICTURES];
+   
+   pel_t*  ref_pic;   
+   pel_t*  ref_pic_w;   
+   int default_weight;
+   int default_weight_chroma;
+   int list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))? (img->current_mb_nr & 0x01) ? 4 : 2 : 0;
+   int weight[2][MAX_REFERENCE_PICTURES][3]; 
+   int offset[2][MAX_REFERENCE_PICTURES][3];       
+   int clist;
+   int perform_wp = 0;      
+   
+   
+   luma_log_weight_denom = 5;
+   chroma_log_weight_denom = 5;
+   wp_luma_round = 1 << (luma_log_weight_denom - 1);
+   wp_chroma_round = 1 << (chroma_log_weight_denom - 1);
+   default_weight = 1<<luma_log_weight_denom;
+   default_weight_chroma = 1<<chroma_log_weight_denom;
+   
+   /* set all values to defaults */
+   for (i = 0; i < 2 + list_offset; i++)
+   {
+     for (j = 0; j < listXsize[i]; j++)
+     {
+       for (n = 0; n < 3; n++)
+       {
+         weight[i][j][n] = default_weight;
+         wp_weight[i][j][n] = default_weight;
+         wp_offset[i][j][n] = 0;
+         offset[i][j][n] = 0;
+       }
+     }
+   }
+   
+   for (i = 0; i < img->height; i++)
+   {
+     for (j = 0; j < img->width; j++)
+     {
+       dc_org += (double) imgY_org[i][j];
+     }
+   } 
+     
+   for (clist=0; clist<2 + list_offset; clist++)
+   {
+     for (n = 0; n < listXsize[clist]; n++)
+     {
+       dc_ref[n] = 0.0;
+       
+       ref_pic       = listX[clist][n]->imgY_11;
+       ref_pic_w     = listX[clist][n]->imgY_11_w;
+       
+       // Y
+       for (i = 0; i < img->height * img->width; i++)
+       {
+         dc_ref[n] += (double) ref_pic[i];
+       }
+       
+       if (select_offset==0)
+       {
+         if (dc_ref[n] != 0.0)
+           weight[clist][n][0] = (int) (default_weight * dc_org / dc_ref[n] + 0.5);
+         else
+           weight[clist][n][0] = default_weight;  // only used when reference picture is black
+         if (weight[clist][n][0] < -64 || weight[clist][n][0] >127)
+           weight[clist][n][0] = 32;
+       }
+       else
+       {        
+         offset[clist][n][0] = (int) ((dc_org-dc_ref[n])/(img->height*img->width)+0.5);
+         offset[clist][n][0] = (offset[clist][n][0]<-128) ? -128: (offset[clist][n][0]>127) ? 127:offset[clist][n][0];
+         weight[clist][n][0] = default_weight;
+       }
+       
+       /* for now always use default weight for chroma weight */
+       weight[clist][n][1] = default_weight_chroma;
+       weight[clist][n][2] = default_weight_chroma;
+                   
+     }
+   }
+   
+   for (clist=0; clist<2 + list_offset; clist++)
+   {
+     for (index = 0; index < listXsize[clist]; index++)
+     {
+       for (comp=0; comp < 3; comp ++)
+       {
+         int offset_test = input->RDPSliceBTest && active_sps->profile_idc != 66 
+           ? abs(offset[clist][index][comp]) > 2 
+           : offset[clist][index][comp] != 0;
+         
+         if (weight[clist][index][comp] != default_weight ||  offset_test)
+         {
+           perform_wp = 1;
+           break;
+         }
+       }
+       if (perform_wp == 1) break;
+     }
+     if (perform_wp == 1) break;
+   }
+   
+   return perform_wp;
+ }
+ 
+ /*!
+ ************************************************************************
+ * \brief
+ *    test_wp_B_slice:
+ *    Tests B slice weighting prediction
+ ************************************************************************
+ */
+ int test_wp_B_slice(int select_method)
+ {
+   int i, j, n;
+   
+   int tx,DistScaleFactor;
+   double dc_org = 0.0;
+   int index;
+   int comp;
+   double dc_ref[6][MAX_REFERENCE_PICTURES];
+   
+   int log_weight_denom;
+   
+   pel_t*  ref_pic;   
+   int default_weight[3];
+   // this needs to be fixed. 
+   int list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))? (img->current_mb_nr & 0x01) ? 4 : 2 : 0;
+   int weight[6][MAX_REFERENCE_PICTURES][3]; 
+   int offset[6][MAX_REFERENCE_PICTURES][3];       
+   int im_weight[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES][3]; 
+   int im_offset[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES][3]; 
+   int clist;
+   int wf_weight, wf_offset;
+   int perform_wp = 0;      
+   
+   if (select_method == 1) //! implicit mode
+   {
+     luma_log_weight_denom = 5;
+     chroma_log_weight_denom = 5;
+   }
+   else
+   {
+     luma_log_weight_denom = 6;
+     chroma_log_weight_denom = 6;
+   }
+ 
+   wp_luma_round = 1 << (luma_log_weight_denom - 1);
+   wp_chroma_round = 1 << (chroma_log_weight_denom - 1);
+   default_weight[0] = 1<<luma_log_weight_denom;
+   default_weight[1] = 1<<chroma_log_weight_denom;
+   default_weight[2] = 1<<chroma_log_weight_denom;
+   
+   /* set all values to defaults */
+   for (i = 0; i < 2 + list_offset; i++)
+   {
+     for (j = 0; j < listXsize[i]; j++)
+     {
+       for (n = 0; n < 3; n++)
+       {
+         wp_weight[i][j][n] = default_weight[n];
+         wp_offset[i][j][n] = 0;
+         offset   [i][j][n] = 0;
+         weight   [i][j][n] = default_weight[n];
+       }
+     }
+   }
+   
+   for (i = 0; i < listXsize[LIST_0]; i++)
+   {
+     for (j = 0; j < listXsize[LIST_1]; j++)
+     {
+       int td, tb;
+       td = Clip3(-128,127,(listX[LIST_1][j]->poc - listX[LIST_0][i]->poc));
+       tb = Clip3(-128,127,(enc_picture->poc - listX[LIST_0][i]->poc));
+       for (comp = 0; comp < 3; comp++)
+       {
+         // implicit weights          
+         if (td == 0)
+         {
+           im_weight[1][i][j][comp] = default_weight[comp];
+           im_weight[0][i][j][comp] = default_weight[comp];
+           im_offset[1][i][j][comp] = 0;
+           im_offset[0][i][j][comp] = 0;
+         }
+         else
+         {            
+           tx = (16384 + absm(td/2))/td;
+           DistScaleFactor = Clip3(-1024, 1023, (tx*tb + 32 )>>6);
+           im_weight[1][i][j][comp] = DistScaleFactor>>2;
+           if (im_weight[1][i][j][comp] < -64 || im_weight[1][i][j][comp] >128)
+             im_weight[1][i][j][comp] = 32;
+           im_weight[0][i][j][comp] = 64 - im_weight[1][i][j][comp];            
+           im_offset[1][i][j][comp] = 0;
+           im_offset[0][i][j][comp] = 0;
+         }        
+       }
+     }
+   }
+   
+   
+   if (select_method == 1) //! implicit mode
+   {
+     
+     for (i = 0; i < listXsize[LIST_0]; i++)
+     {
+       for (j = 0; j < listXsize[LIST_1]; j++)
+       {
+         for (comp = 0; comp < 3; comp++)
+         {
+           log_weight_denom = (comp == 0) ? luma_log_weight_denom : chroma_log_weight_denom;         
+           wbp_weight[1][i][j][comp] = im_weight[1][i][j][comp] ;
+           wbp_weight[0][i][j][comp] = im_weight[0][i][j][comp];
+         }
+       }
+     }
+     
+     for (clist=0; clist<2 + list_offset; clist++)
+     {
+       for (index = 0; index < listXsize[clist]; index++)
+       {
+         wp_weight[clist][index][0] = default_weight[0];
+         wp_weight[clist][index][1] = default_weight[1];
+         wp_weight[clist][index][2] = default_weight[2];
+         wp_offset[clist][index][0] = 0;
+         wp_offset[clist][index][1] = 0;
+         wp_offset[clist][index][2] = 0;
+       }
+     }
+   }
+   else
+   {
+     for (i = 0; i < img->height; i++)
+     {
+       for (j = 0; j < img->width; j++)
+       {
+         dc_org += (double) imgY_org[i][j];
+       }
+     }
+     
+     for (clist=0; clist<2 + list_offset; clist++)
+     {
+       for (n = 0; n < listXsize[clist]; n++)
+       {
+         dc_ref[clist][n] = 0;
+         
+ 
+         ref_pic       = listX[clist][n]->imgY_11;
+         
+         // Y
+         for (i = 0; i < img->height * img->width; i++)
+         {
+           dc_ref[clist][n] += (double) ref_pic[i];
+         }
+ 
+         if (dc_ref[clist][n] != 0.0)
+           wf_weight = (int) (default_weight[0] * dc_org / dc_ref[clist][n] + 0.5);
+         else
+           wf_weight = default_weight[0];  // only used when reference picture is black
+ 
+         if ( (wf_weight<-64) || (wf_weight>127) )
+         {
+           wf_weight = default_weight[0];
+         }
+         wf_offset = 0;
+         
+                
+         weight[clist][n][0] = wf_weight;
+         weight[clist][n][1] = default_weight[1];
+         weight[clist][n][2] = default_weight[2];
+         offset[clist][n][0] = 0;
+         offset[clist][n][1] = 0;
+         offset[clist][n][2] = 0;
+        
+       }
+     }
+     
+     if (select_method == 0) //! explicit mode
+     {
+       for (clist=0; clist<2 + list_offset; clist++)
+       {
+         for (index = 0; index < listXsize[clist]; index++)
+         {
+           for (comp = 0; comp < 3; comp++)
+           {
+             wp_weight[clist][index][comp] = weight[clist][index][comp];
+             wp_offset[clist][index][comp] = offset[clist][index][comp];
+           }
+         }
+       }
+     }
+     else
+     {    
+       for (clist=0; clist<2 + list_offset; clist++)
+       {
+         for (index = 0; index < listXsize[clist]; index++)
+         {
+           wp_weight[clist][index][0] = default_weight[0];
+           wp_weight[clist][index][1] = default_weight[1];
+           wp_weight[clist][index][2] = default_weight[2];
+           wp_offset[clist][index][0] = 0;
+           wp_offset[clist][index][1] = 0;
+           wp_offset[clist][index][2] = 0;
+         }
+       }
+     }
+     for (i = 0; i < listXsize[LIST_0]; i++)
+     {
+       for (j = 0; j < listXsize[LIST_1]; j++)
+       {
+         for (comp = 0; comp < 3; comp++)
+         {
+           log_weight_denom = (comp == 0) ? luma_log_weight_denom : chroma_log_weight_denom;
+           wbp_weight[0][i][j][comp] = wp_weight[0][i][comp];
+           wbp_weight[1][i][j][comp] = wp_weight[1][j][comp];
+         }
+         /*
+         printf ("bpw weight[%d][%d] = %d  , %d (%d %d %d) (%d %d) (%d %d)\n", i, j, wbp_weight[0][i][j][0], wbp_weight[1][i][j][0],
+           enc_picture->poc,listX[LIST_0][i]->poc, listX[LIST_1][j]->poc,
+           DistScaleFactor ,tx,tx,tx);
+         */
+       }
+     }
+   }
+ 
+   if (select_method == 0) //! implicit mode
+   {
+     int active_refs[2];
+ 
+     active_refs[0]=input->B_List0_refs == 0 ? listXsize[0] : min(input->B_List0_refs,listXsize[0]);
+     active_refs[1]=input->B_List1_refs == 0 ? listXsize[1] : min(input->B_List0_refs,listXsize[1]);
+ 
+     for (clist=0; clist<2 + list_offset; clist++)
+     {
+       for (index = 0; index < active_refs[clist]; index++)
+       {
+         for (comp=0; comp < 3; comp ++)
+         {
+           if (wp_weight[clist][index][comp] != default_weight[comp])
+           {
+             perform_wp = 1;
+             break;
+           }
+         }
+         if (perform_wp == 1) break;
+       }
+       if (perform_wp == 1) break;
+     }
+   }
+   return perform_wp;
+ }
+     
+     






More information about the llvm-commits mailing list