git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  *
  22  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  23  */
  24
  25 /**
  26  * @file mpegvideo.c
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "avcodec.h"
  31 #include "dsputil.h"
  32 #include "mpegvideo.h"
  33 #include "faandct.h"
  34 #include <limits.h>
  35
  36 #ifdef USE_FASTMEMCPY
  37 #include "libvo/fastmemcpy.h"
  38 #endif
  39
  40 //#undef NDEBUG
  41 //#include <assert.h>
  42
  43 #ifdef CONFIG_ENCODERS
  44 static int encode_picture(MpegEncContext *s, int picture_number);
  45 #endif //CONFIG_ENCODERS
  46 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
  53                                    DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  55                                    DCTELEM *block, int n, int qscale);
  56 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  57                                   DCTELEM *block, int n, int qscale);
  58 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  59                                   DCTELEM *block, int n, int qscale);
  60 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  61 #ifdef CONFIG_ENCODERS
  62 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  63 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  64 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  65 static int sse_mb(MpegEncContext *s);
  66 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  67 #endif //CONFIG_ENCODERS
  68
  69 #ifdef HAVE_XVMC
  70 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  71 extern void XVMC_field_end(MpegEncContext *s);
  72 extern void XVMC_decode_mb(MpegEncContext *s);
  73 #endif
  74
  75 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  76
  77
  78 /* enable all paranoid tests for rounding, overflows, etc... */
  79 //#define PARANOID
  80
  81 //#define DEBUG
  82
  83
  84 /* for jpeg fast DCT */
  85 #define CONST_BITS 14
  86
  87 static const uint16_t aanscales[64] = {
  88     /* precomputed values scaled up by 14 bits */
  89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  90     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  91     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  92     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  93     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  94     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  95     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  96     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  97 };
  98
  99 static const uint8_t h263_chroma_roundtab[16] = {
 100 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
 101     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
 102 };
 103
 104 static const uint8_t ff_default_chroma_qscale_table[32]={
 105 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 106     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 107 };
 108
 109 #ifdef CONFIG_ENCODERS
 110 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 111 static uint8_t default_fcode_tab[MAX_MV*2+1];
 112
 113 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 114
 115 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 116                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 117 {
 118     int qscale;
 119     int shift=0;
 120
 121     for(qscale=qmin; qscale<=qmax; qscale++){
 122         int i;
 123         if (dsp->fdct == ff_jpeg_fdct_islow
 124 #ifdef FAAN_POSTSCALE
 125             || dsp->fdct == ff_faandct
 126 #endif
 127             ) {
 128             for(i=0;i<64;i++) {
 129                 const int j= dsp->idct_permutation[i];
 130                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 131                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 132                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 133                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 134
 135                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 136                                 (qscale * quant_matrix[j]));
 137             }
 138         } else if (dsp->fdct == fdct_ifast
 139 #ifndef FAAN_POSTSCALE
 140                    || dsp->fdct == ff_faandct
 141 #endif
 142                    ) {
 143             for(i=0;i<64;i++) {
 144                 const int j= dsp->idct_permutation[i];
 145                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 146                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 147                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 148                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 149
 150                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 151                                 (aanscales[i] * qscale * quant_matrix[j]));
 152             }
 153         } else {
 154             for(i=0;i<64;i++) {
 155                 const int j= dsp->idct_permutation[i];
 156                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 157                    So 16           <= qscale * quant_matrix[i]             <= 7905
 158                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 159                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 160                 */
 161                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 162 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 163                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 164
 165                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 166                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 167             }
 168         }
 169
 170         for(i=intra; i<64; i++){
 171             int64_t max= 8191;
 172             if (dsp->fdct == fdct_ifast
 173 #ifndef FAAN_POSTSCALE
 174                    || dsp->fdct == ff_faandct
 175 #endif
 176                    ) {
 177                 max= (8191LL*aanscales[i]) >> 14;
 178             }
 179             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 180                 shift++;
 181             }
 182         }
 183     }
 184     if(shift){
 185         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
 186     }
 187 }
 188
 189 static inline void update_qscale(MpegEncContext *s){
 190     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 191     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 192
 193     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 194 }
 195 #endif //CONFIG_ENCODERS
 196
 197 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 198     int i;
 199     int end;
 200
 201     st->scantable= src_scantable;
 202
 203     for(i=0; i<64; i++){
 204         int j;
 205         j = src_scantable[i];
 206         st->permutated[i] = permutation[j];
 207 #ifdef ARCH_POWERPC
 208         st->inverse[j] = i;
 209 #endif
 210     }
 211
 212     end=-1;
 213     for(i=0; i<64; i++){
 214         int j;
 215         j = st->permutated[i];
 216         if(j>end) end=j;
 217         st->raster_end[i]= end;
 218     }
 219 }
 220
 221 #ifdef CONFIG_ENCODERS
 222 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
 223     int i;
 224
 225     if(matrix){
 226         put_bits(pb, 1, 1);
 227         for(i=0;i<64;i++) {
 228             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 229         }
 230     }else
 231         put_bits(pb, 1, 0);
 232 }
 233 #endif //CONFIG_ENCODERS
 234
 235 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
 236     int i;
 237
 238     assert(p<=end);
 239     if(p>=end)
 240         return end;
 241
 242     for(i=0; i<3; i++){
 243         uint32_t tmp= *state << 8;
 244         *state= tmp + *(p++);
 245         if(tmp == 0x100 || p==end)
 246             return p;
 247     }
 248
 249     while(p<end){
 250         if     (p[-1] > 1      ) p+= 3;
 251         else if(p[-2]          ) p+= 2;
 252         else if(p[-3]|(p[-1]-1)) p++;
 253         else{
 254             p++;
 255             break;
 256         }
 257     }
 258
 259     p= FFMIN(p, end)-4;
 260     *state=  be2me_32(unaligned32(p));
 261
 262     return p+4;
 263 }
 264
 265 /* init common dct for both encoder and decoder */
 266 int DCT_common_init(MpegEncContext *s)
 267 {
 268     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 269     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 270     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 271     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 272     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 273     if(s->flags & CODEC_FLAG_BITEXACT)
 274         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
 275     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 276
 277 #ifdef CONFIG_ENCODERS
 278     s->dct_quantize= dct_quantize_c;
 279     s->denoise_dct= denoise_dct_c;
 280 #endif //CONFIG_ENCODERS
 281
 282 #ifdef HAVE_MMX
 283     MPV_common_init_mmx(s);
 284 #endif
 285 #ifdef ARCH_ALPHA
 286     MPV_common_init_axp(s);
 287 #endif
 288 #ifdef HAVE_MLIB
 289     MPV_common_init_mlib(s);
 290 #endif
 291 #ifdef HAVE_MMI
 292     MPV_common_init_mmi(s);
 293 #endif
 294 #ifdef ARCH_ARMV4L
 295     MPV_common_init_armv4l(s);
 296 #endif
 297 #ifdef ARCH_POWERPC
 298     MPV_common_init_ppc(s);
 299 #endif
 300
 301 #ifdef CONFIG_ENCODERS
 302     s->fast_dct_quantize= s->dct_quantize;
 303
 304     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 305         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 306     }
 307
 308 #endif //CONFIG_ENCODERS
 309
 310     /* load & permutate scantables
 311        note: only wmv uses different ones
 312     */
 313     if(s->alternate_scan){
 314         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 315         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 316     }else{
 317         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 318         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 319     }
 320     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 321     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 322
 323     return 0;
 324 }
 325
 326 static void copy_picture(Picture *dst, Picture *src){
 327     *dst = *src;
 328     dst->type= FF_BUFFER_TYPE_COPY;
 329 }
 330
 331 #ifdef CONFIG_ENCODERS
 332 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 333     int i;
 334
 335     dst->pict_type              = src->pict_type;
 336     dst->quality                = src->quality;
 337     dst->coded_picture_number   = src->coded_picture_number;
 338     dst->display_picture_number = src->display_picture_number;
 339 //    dst->reference              = src->reference;
 340     dst->pts                    = src->pts;
 341     dst->interlaced_frame       = src->interlaced_frame;
 342     dst->top_field_first        = src->top_field_first;
 343
 344     if(s->avctx->me_threshold){
 345         if(!src->motion_val[0])
 346             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 347         if(!src->mb_type)
 348             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 349         if(!src->ref_index[0])
 350             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 351         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 352             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 353             src->motion_subsample_log2, dst->motion_subsample_log2);
 354
 355         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 356
 357         for(i=0; i<2; i++){
 358             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 359             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 360
 361             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 362                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 363             }
 364             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 365                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 366             }
 367         }
 368     }
 369 }
 370 #endif
 371
 372 /**
 373  * allocates a Picture
 374  * The pixels are allocated/set by calling get_buffer() if shared=0
 375  */
 376 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 377     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 378     const int mb_array_size= s->mb_stride*s->mb_height;
 379     const int b8_array_size= s->b8_stride*s->mb_height*2;
 380     const int b4_array_size= s->b4_stride*s->mb_height*4;
 381     int i;
 382
 383     if(shared){
 384         assert(pic->data[0]);
 385         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 386         pic->type= FF_BUFFER_TYPE_SHARED;
 387     }else{
 388         int r;
 389
 390         assert(!pic->data[0]);
 391
 392         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 393
 394         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 395             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 396             return -1;
 397         }
 398
 399         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 400             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 401             return -1;
 402         }
 403
 404         if(pic->linesize[1] != pic->linesize[2]){
 405             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 406             return -1;
 407         }
 408
 409         s->linesize  = pic->linesize[0];
 410         s->uvlinesize= pic->linesize[1];
 411     }
 412
 413     if(pic->qscale_table==NULL){
 414         if (s->encoding) {
 415             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 416             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 417             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 418         }
 419
 420         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 421         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 422         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 423         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 424         if(s->out_format == FMT_H264){
 425             for(i=0; i<2; i++){
 426                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 427                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 428                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 429             }
 430             pic->motion_subsample_log2= 2;
 431         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 432             for(i=0; i<2; i++){
 433                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 434                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 435                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 436             }
 437             pic->motion_subsample_log2= 3;
 438         }
 439         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 440             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 441         }
 442         pic->qstride= s->mb_stride;
 443         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 444     }
 445
 446     //it might be nicer if the application would keep track of these but it would require a API change
 447     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 448     s->prev_pict_types[0]= s->pict_type;
 449     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 450         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 451
 452     return 0;
 453 fail: //for the CHECKED_ALLOCZ macro
 454     return -1;
 455 }
 456
 457 /**
 458  * deallocates a picture
 459  */
 460 static void free_picture(MpegEncContext *s, Picture *pic){
 461     int i;
 462
 463     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 464         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 465     }
 466
 467     av_freep(&pic->mb_var);
 468     av_freep(&pic->mc_mb_var);
 469     av_freep(&pic->mb_mean);
 470     av_freep(&pic->mbskip_table);
 471     av_freep(&pic->qscale_table);
 472     av_freep(&pic->mb_type_base);
 473     av_freep(&pic->dct_coeff);
 474     av_freep(&pic->pan_scan);
 475     pic->mb_type= NULL;
 476     for(i=0; i<2; i++){
 477         av_freep(&pic->motion_val_base[i]);
 478         av_freep(&pic->ref_index[i]);
 479     }
 480
 481     if(pic->type == FF_BUFFER_TYPE_SHARED){
 482         for(i=0; i<4; i++){
 483             pic->base[i]=
 484             pic->data[i]= NULL;
 485         }
 486         pic->type= 0;
 487     }
 488 }
 489
 490 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 491     int i;
 492
 493     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 494     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
 495     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
 496
 497      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 498     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 499     s->rd_scratchpad=   s->me.scratchpad;
 500     s->b_scratchpad=    s->me.scratchpad;
 501     s->obmc_scratchpad= s->me.scratchpad + 16;
 502     if (s->encoding) {
 503         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 504         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 505         if(s->avctx->noise_reduction){
 506             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 507         }
 508     }
 509     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 510     s->block= s->blocks[0];
 511
 512     for(i=0;i<12;i++){
 513         s->pblocks[i] = (short *)(&s->block[i]);
 514     }
 515     return 0;
 516 fail:
 517     return -1; //free() through MPV_common_end()
 518 }
 519
 520 static void free_duplicate_context(MpegEncContext *s){
 521     if(s==NULL) return;
 522
 523     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 524     av_freep(&s->me.scratchpad);
 525     s->rd_scratchpad=
 526     s->b_scratchpad=
 527     s->obmc_scratchpad= NULL;
 528
 529     av_freep(&s->dct_error_sum);
 530     av_freep(&s->me.map);
 531     av_freep(&s->me.score_map);
 532     av_freep(&s->blocks);
 533     s->block= NULL;
 534 }
 535
 536 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 537 #define COPY(a) bak->a= src->a
 538     COPY(allocated_edge_emu_buffer);
 539     COPY(edge_emu_buffer);
 540     COPY(me.scratchpad);
 541     COPY(rd_scratchpad);
 542     COPY(b_scratchpad);
 543     COPY(obmc_scratchpad);
 544     COPY(me.map);
 545     COPY(me.score_map);
 546     COPY(blocks);
 547     COPY(block);
 548     COPY(start_mb_y);
 549     COPY(end_mb_y);
 550     COPY(me.map_generation);
 551     COPY(pb);
 552     COPY(dct_error_sum);
 553     COPY(dct_count[0]);
 554     COPY(dct_count[1]);
 555 #undef COPY
 556 }
 557
 558 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 559     MpegEncContext bak;
 560     int i;
 561     //FIXME copy only needed parts
 562 //START_TIMER
 563     backup_duplicate_context(&bak, dst);
 564     memcpy(dst, src, sizeof(MpegEncContext));
 565     backup_duplicate_context(dst, &bak);
 566     for(i=0;i<12;i++){
 567         dst->pblocks[i] = (short *)(&dst->block[i]);
 568     }
 569 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 570 }
 571
 572 #ifdef CONFIG_ENCODERS
 573 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 574 #define COPY(a) dst->a= src->a
 575     COPY(pict_type);
 576     COPY(current_picture);
 577     COPY(f_code);
 578     COPY(b_code);
 579     COPY(qscale);
 580     COPY(lambda);
 581     COPY(lambda2);
 582     COPY(picture_in_gop_number);
 583     COPY(gop_picture_number);
 584     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
 585     COPY(progressive_frame); //FIXME don't set in encode_header
 586     COPY(partitioned_frame); //FIXME don't set in encode_header
 587 #undef COPY
 588 }
 589 #endif
 590
 591 /**
 592  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 593  * the changed fields will not depend upon the prior state of the MpegEncContext.
 594  */
 595 static void MPV_common_defaults(MpegEncContext *s){
 596     s->y_dc_scale_table=
 597     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 598     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 599     s->progressive_frame= 1;
 600     s->progressive_sequence= 1;
 601     s->picture_structure= PICT_FRAME;
 602
 603     s->coded_picture_number = 0;
 604     s->picture_number = 0;
 605     s->input_picture_number = 0;
 606
 607     s->picture_in_gop_number = 0;
 608
 609     s->f_code = 1;
 610     s->b_code = 1;
 611 }
 612
 613 /**
 614  * sets the given MpegEncContext to defaults for decoding.
 615  * the changed fields will not depend upon the prior state of the MpegEncContext.
 616  */
 617 void MPV_decode_defaults(MpegEncContext *s){
 618     MPV_common_defaults(s);
 619 }
 620
 621 /**
 622  * sets the given MpegEncContext to defaults for encoding.
 623  * the changed fields will not depend upon the prior state of the MpegEncContext.
 624  */
 625
 626 #ifdef CONFIG_ENCODERS
 627 static void MPV_encode_defaults(MpegEncContext *s){
 628     static int done=0;
 629
 630     MPV_common_defaults(s);
 631
 632     if(!done){
 633         int i;
 634         done=1;
 635
 636         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 637         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 638
 639         for(i=-16; i<16; i++){
 640             default_fcode_tab[i + MAX_MV]= 1;
 641         }
 642     }
 643     s->me.mv_penalty= default_mv_penalty;
 644     s->fcode_tab= default_fcode_tab;
 645 }
 646 #endif //CONFIG_ENCODERS
 647
 648 /**
 649  * init common structure for both encoder and decoder.
 650  * this assumes that some variables like width/height are already set
 651  */
 652 int MPV_common_init(MpegEncContext *s)
 653 {
 654     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 655
 656     s->mb_height = (s->height + 15) / 16;
 657
 658     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
 659         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 660         return -1;
 661     }
 662
 663     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 664         return -1;
 665
 666     dsputil_init(&s->dsp, s->avctx);
 667     DCT_common_init(s);
 668
 669     s->flags= s->avctx->flags;
 670     s->flags2= s->avctx->flags2;
 671
 672     s->mb_width  = (s->width  + 15) / 16;
 673     s->mb_stride = s->mb_width + 1;
 674     s->b8_stride = s->mb_width*2 + 1;
 675     s->b4_stride = s->mb_width*4 + 1;
 676     mb_array_size= s->mb_height * s->mb_stride;
 677     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 678
 679     /* set chroma shifts */
 680     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 681                                                     &(s->chroma_y_shift) );
 682
 683     /* set default edge pos, will be overriden in decode_header if needed */
 684     s->h_edge_pos= s->mb_width*16;
 685     s->v_edge_pos= s->mb_height*16;
 686
 687     s->mb_num = s->mb_width * s->mb_height;
 688
 689     s->block_wrap[0]=
 690     s->block_wrap[1]=
 691     s->block_wrap[2]=
 692     s->block_wrap[3]= s->b8_stride;
 693     s->block_wrap[4]=
 694     s->block_wrap[5]= s->mb_stride;
 695
 696     y_size = s->b8_stride * (2 * s->mb_height + 1);
 697     c_size = s->mb_stride * (s->mb_height + 1);
 698     yc_size = y_size + 2 * c_size;
 699
 700     /* convert fourcc to upper case */
 701     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
 702                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 703                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 704                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 705
 706     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
 707                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 708                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 709                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 710
 711     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 712
 713     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 714     for(y=0; y<s->mb_height; y++){
 715         for(x=0; x<s->mb_width; x++){
 716             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 717         }
 718     }
 719     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 720
 721     if (s->encoding) {
 722         /* Allocate MV tables */
 723         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 724         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 725         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 726         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 727         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 728         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 729         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 730         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 731         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 732         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 733         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 734         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 735
 736         if(s->msmpeg4_version){
 737             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 738         }
 739         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 740
 741         /* Allocate MB type table */
 742         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 743
 744         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 745
 746         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 747         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 748         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 749         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 750         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 751         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 752
 753         if(s->avctx->noise_reduction){
 754             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 755         }
 756     }
 757     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 758
 759     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 760
 761     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 762         /* interlaced direct mode decoding tables */
 763             for(i=0; i<2; i++){
 764                 int j, k;
 765                 for(j=0; j<2; j++){
 766                     for(k=0; k<2; k++){
 767                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 768                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 769                     }
 770                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 771                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 772                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 773                 }
 774                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 775             }
 776     }
 777     if (s->out_format == FMT_H263) {
 778         /* ac values */
 779         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 780         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 781         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 782         s->ac_val[2] = s->ac_val[1] + c_size;
 783
 784         /* cbp values */
 785         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 786         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 787
 788         /* cbp, ac_pred, pred_dir */
 789         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 790         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 791     }
 792
 793     if (s->h263_pred || s->h263_plus || !s->encoding) {
 794         /* dc values */
 795         //MN: we need these for error resilience of intra-frames
 796         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 797         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 798         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 799         s->dc_val[2] = s->dc_val[1] + c_size;
 800         for(i=0;i<yc_size;i++)
 801             s->dc_val_base[i] = 1024;
 802     }
 803
 804     /* which mb is a intra block */
 805     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 806     memset(s->mbintra_table, 1, mb_array_size);
 807
 808     /* init macroblock skip table */
 809     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 810     //Note the +1 is for a quicker mpeg4 slice_end detection
 811     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 812
 813     s->parse_context.state= -1;
 814     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 815        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 816        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 817        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 818     }
 819
 820     s->context_initialized = 1;
 821
 822     s->thread_context[0]= s;
 823     for(i=1; i<s->avctx->thread_count; i++){
 824         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 825         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 826     }
 827
 828     for(i=0; i<s->avctx->thread_count; i++){
 829         if(init_duplicate_context(s->thread_context[i], s) < 0)
 830            goto fail;
 831         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 832         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 833     }
 834
 835     return 0;
 836  fail:
 837     MPV_common_end(s);
 838     return -1;
 839 }
 840
 841 /* init common structure for both encoder and decoder */
 842 void MPV_common_end(MpegEncContext *s)
 843 {
 844     int i, j, k;
 845
 846     for(i=0; i<s->avctx->thread_count; i++){
 847         free_duplicate_context(s->thread_context[i]);
 848     }
 849     for(i=1; i<s->avctx->thread_count; i++){
 850         av_freep(&s->thread_context[i]);
 851     }
 852
 853     av_freep(&s->parse_context.buffer);
 854     s->parse_context.buffer_size=0;
 855
 856     av_freep(&s->mb_type);
 857     av_freep(&s->p_mv_table_base);
 858     av_freep(&s->b_forw_mv_table_base);
 859     av_freep(&s->b_back_mv_table_base);
 860     av_freep(&s->b_bidir_forw_mv_table_base);
 861     av_freep(&s->b_bidir_back_mv_table_base);
 862     av_freep(&s->b_direct_mv_table_base);
 863     s->p_mv_table= NULL;
 864     s->b_forw_mv_table= NULL;
 865     s->b_back_mv_table= NULL;
 866     s->b_bidir_forw_mv_table= NULL;
 867     s->b_bidir_back_mv_table= NULL;
 868     s->b_direct_mv_table= NULL;
 869     for(i=0; i<2; i++){
 870         for(j=0; j<2; j++){
 871             for(k=0; k<2; k++){
 872                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 873                 s->b_field_mv_table[i][j][k]=NULL;
 874             }
 875             av_freep(&s->b_field_select_table[i][j]);
 876             av_freep(&s->p_field_mv_table_base[i][j]);
 877             s->p_field_mv_table[i][j]=NULL;
 878         }
 879         av_freep(&s->p_field_select_table[i]);
 880     }
 881
 882     av_freep(&s->dc_val_base);
 883     av_freep(&s->ac_val_base);
 884     av_freep(&s->coded_block_base);
 885     av_freep(&s->mbintra_table);
 886     av_freep(&s->cbp_table);
 887     av_freep(&s->pred_dir_table);
 888
 889     av_freep(&s->mbskip_table);
 890     av_freep(&s->prev_pict_types);
 891     av_freep(&s->bitstream_buffer);
 892     s->allocated_bitstream_buffer_size=0;
 893
 894     av_freep(&s->avctx->stats_out);
 895     av_freep(&s->ac_stats);
 896     av_freep(&s->error_status_table);
 897     av_freep(&s->mb_index2xy);
 898     av_freep(&s->lambda_table);
 899     av_freep(&s->q_intra_matrix);
 900     av_freep(&s->q_inter_matrix);
 901     av_freep(&s->q_intra_matrix16);
 902     av_freep(&s->q_inter_matrix16);
 903     av_freep(&s->input_picture);
 904     av_freep(&s->reordered_input_picture);
 905     av_freep(&s->dct_offset);
 906
 907     if(s->picture){
 908         for(i=0; i<MAX_PICTURE_COUNT; i++){
 909             free_picture(s, &s->picture[i]);
 910         }
 911     }
 912     av_freep(&s->picture);
 913     s->context_initialized = 0;
 914     s->last_picture_ptr=
 915     s->next_picture_ptr=
 916     s->current_picture_ptr= NULL;
 917     s->linesize= s->uvlinesize= 0;
 918
 919     for(i=0; i<3; i++)
 920         av_freep(&s->visualization_buffer[i]);
 921
 922     avcodec_default_free_buffers(s->avctx);
 923 }
 924
 925 #ifdef CONFIG_ENCODERS
 926
 927 /* init video encoder */
 928 int MPV_encode_init(AVCodecContext *avctx)
 929 {
 930     MpegEncContext *s = avctx->priv_data;
 931     int i;
 932     int chroma_h_shift, chroma_v_shift;
 933
 934     MPV_encode_defaults(s);
 935
 936     switch (avctx->codec_id) {
 937     case CODEC_ID_MPEG2VIDEO:
 938         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
 939             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
 940             return -1;
 941         }
 942         break;
 943     case CODEC_ID_LJPEG:
 944     case CODEC_ID_MJPEG:
 945         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 946            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
 947             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 948             return -1;
 949         }
 950         break;
 951     default:
 952         if(avctx->pix_fmt != PIX_FMT_YUV420P){
 953             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 954             return -1;
 955         }
 956     }
 957
 958     switch (avctx->pix_fmt) {
 959     case PIX_FMT_YUVJ422P:
 960     case PIX_FMT_YUV422P:
 961         s->chroma_format = CHROMA_422;
 962         break;
 963     case PIX_FMT_YUVJ420P:
 964     case PIX_FMT_YUV420P:
 965     default:
 966         s->chroma_format = CHROMA_420;
 967         break;
 968     }
 969
 970     s->bit_rate = avctx->bit_rate;
 971     s->width = avctx->width;
 972     s->height = avctx->height;
 973     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
 974         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 975         avctx->gop_size=600;
 976     }
 977     s->gop_size = avctx->gop_size;
 978     s->avctx = avctx;
 979     s->flags= avctx->flags;
 980     s->flags2= avctx->flags2;
 981     s->max_b_frames= avctx->max_b_frames;
 982     s->codec_id= avctx->codec->id;
 983     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 984     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 985     s->strict_std_compliance= avctx->strict_std_compliance;
 986     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 987     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 988     s->mpeg_quant= avctx->mpeg_quant;
 989     s->rtp_mode= !!avctx->rtp_payload_size;
 990     s->intra_dc_precision= avctx->intra_dc_precision;
 991     s->user_specified_pts = AV_NOPTS_VALUE;
 992
 993     if (s->gop_size <= 1) {
 994         s->intra_only = 1;
 995         s->gop_size = 12;
 996     } else {
 997         s->intra_only = 0;
 998     }
 999
1000     s->me_method = avctx->me_method;
1001
1002     /* Fixed QSCALE */
1003     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
1004
1005     s->adaptive_quant= (   s->avctx->lumi_masking
1006                         || s->avctx->dark_masking
1007                         || s->avctx->temporal_cplx_masking
1008                         || s->avctx->spatial_cplx_masking
1009                         || s->avctx->p_masking
1010                         || s->avctx->border_masking
1011                         || (s->flags&CODEC_FLAG_QP_RD))
1012                        && !s->fixed_qscale;
1013
1014     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1015     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1016     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1017     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1018
1019     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1020         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1021         return -1;
1022     }
1023
1024     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1025         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1026     }
1027
1028     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1029         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1030         return -1;
1031     }
1032
1033     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1034         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1035         return -1;
1036     }
1037
1038     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1039        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1040        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1041
1042         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1043     }
1044
1045     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1046        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1047         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1048         return -1;
1049     }
1050
1051     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1052         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1053         return -1;
1054     }
1055
1056     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1057         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1058         return -1;
1059     }
1060
1061     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1062         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1063         return -1;
1064     }
1065
1066     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1067         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1068         return -1;
1069     }
1070
1071     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1072         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1073         return -1;
1074     }
1075
1076     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1077        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1078         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1079         return -1;
1080     }
1081
1082     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1083         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1084         return -1;
1085     }
1086
1087     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1088         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1089         return -1;
1090     }
1091
1092     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1093         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1094         return -1;
1095     }
1096
1097     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1098         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1099         return -1;
1100     }
1101
1102     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1103         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1104         return -1;
1105     }
1106
1107     if(s->flags & CODEC_FLAG_LOW_DELAY){
1108         if (s->codec_id != CODEC_ID_MPEG2VIDEO && s->codec_id != CODEC_ID_MPEG1VIDEO){
1109             av_log(avctx, AV_LOG_ERROR, "low delay forcing is only available for mpeg1/2\n");
1110             return -1;
1111         }
1112         if (s->max_b_frames != 0){
1113             av_log(avctx, AV_LOG_ERROR, "b frames cannot be used with low delay\n");
1114             return -1;
1115         }
1116     }
1117
1118     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1119        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1120        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1121         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1122         return -1;
1123     }
1124
1125     if(s->avctx->thread_count > 1)
1126         s->rtp_mode= 1;
1127
1128     if(!avctx->time_base.den || !avctx->time_base.num){
1129         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1130         return -1;
1131     }
1132
1133     i= (INT_MAX/2+128)>>8;
1134     if(avctx->me_threshold >= i){
1135         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1136         return -1;
1137     }
1138     if(avctx->mb_threshold >= i){
1139         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1140         return -1;
1141     }
1142
1143     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1144         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1145         avctx->b_frame_strategy = 0;
1146     }
1147
1148     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1149     if(i > 1){
1150         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1151         avctx->time_base.den /= i;
1152         avctx->time_base.num /= i;
1153 //        return -1;
1154     }
1155
1156     if(s->codec_id==CODEC_ID_MJPEG){
1157         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1158         s->inter_quant_bias= 0;
1159     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1160         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1161         s->inter_quant_bias= 0;
1162     }else{
1163         s->intra_quant_bias=0;
1164         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1165     }
1166
1167     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1168         s->intra_quant_bias= avctx->intra_quant_bias;
1169     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1170         s->inter_quant_bias= avctx->inter_quant_bias;
1171
1172     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1173
1174     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1175         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1176         return -1;
1177     }
1178     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1179
1180     switch(avctx->codec->id) {
1181     case CODEC_ID_MPEG1VIDEO:
1182         s->out_format = FMT_MPEG1;
1183         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1184         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1185         break;
1186     case CODEC_ID_MPEG2VIDEO:
1187         s->out_format = FMT_MPEG1;
1188         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1189         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1190         s->rtp_mode= 1;
1191         break;
1192     case CODEC_ID_LJPEG:
1193     case CODEC_ID_JPEGLS:
1194     case CODEC_ID_MJPEG:
1195         s->out_format = FMT_MJPEG;
1196         s->intra_only = 1; /* force intra only for jpeg */
1197         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1198         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1199         s->mjpeg_vsample[0] = 2;
1200         s->mjpeg_vsample[1] = 2>>chroma_v_shift;
1201         s->mjpeg_vsample[2] = 2>>chroma_v_shift;
1202         s->mjpeg_hsample[0] = 2;
1203         s->mjpeg_hsample[1] = 2>>chroma_h_shift;
1204         s->mjpeg_hsample[2] = 2>>chroma_h_shift;
1205         if (mjpeg_init(s) < 0)
1206             return -1;
1207         avctx->delay=0;
1208         s->low_delay=1;
1209         break;
1210     case CODEC_ID_H261:
1211         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
1212             av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.261 codec.\nValid sizes are 176x144, 352x288\n", s->width, s->height);
1213             return -1;
1214         }
1215         s->out_format = FMT_H261;
1216         avctx->delay=0;
1217         s->low_delay=1;
1218         break;
1219     case CODEC_ID_H263:
1220         if (h263_get_picture_format(s->width, s->height) == 7) {
1221             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1222             return -1;
1223         }
1224         s->out_format = FMT_H263;
1225         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1226         avctx->delay=0;
1227         s->low_delay=1;
1228         break;
1229     case CODEC_ID_H263P:
1230         s->out_format = FMT_H263;
1231         s->h263_plus = 1;
1232         /* Fx */
1233         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1234         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1235         s->modified_quant= s->h263_aic;
1236         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1237         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1238         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1239         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1240         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1241
1242         /* /Fx */
1243         /* These are just to be sure */
1244         avctx->delay=0;
1245         s->low_delay=1;
1246         break;
1247     case CODEC_ID_FLV1:
1248         s->out_format = FMT_H263;
1249         s->h263_flv = 2; /* format = 1; 11-bit codes */
1250         s->unrestricted_mv = 1;
1251         s->rtp_mode=0; /* don't allow GOB */
1252         avctx->delay=0;
1253         s->low_delay=1;
1254         break;
1255     case CODEC_ID_RV10:
1256         s->out_format = FMT_H263;
1257         avctx->delay=0;
1258         s->low_delay=1;
1259         break;
1260     case CODEC_ID_RV20:
1261         s->out_format = FMT_H263;
1262         avctx->delay=0;
1263         s->low_delay=1;
1264         s->modified_quant=1;
1265         s->h263_aic=1;
1266         s->h263_plus=1;
1267         s->loop_filter=1;
1268         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1269         break;
1270     case CODEC_ID_MPEG4:
1271         s->out_format = FMT_H263;
1272         s->h263_pred = 1;
1273         s->unrestricted_mv = 1;
1274         s->low_delay= s->max_b_frames ? 0 : 1;
1275         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1276         break;
1277     case CODEC_ID_MSMPEG4V1:
1278         s->out_format = FMT_H263;
1279         s->h263_msmpeg4 = 1;
1280         s->h263_pred = 1;
1281         s->unrestricted_mv = 1;
1282         s->msmpeg4_version= 1;
1283         avctx->delay=0;
1284         s->low_delay=1;
1285         break;
1286     case CODEC_ID_MSMPEG4V2:
1287         s->out_format = FMT_H263;
1288         s->h263_msmpeg4 = 1;
1289         s->h263_pred = 1;
1290         s->unrestricted_mv = 1;
1291         s->msmpeg4_version= 2;
1292         avctx->delay=0;
1293         s->low_delay=1;
1294         break;
1295     case CODEC_ID_MSMPEG4V3:
1296         s->out_format = FMT_H263;
1297         s->h263_msmpeg4 = 1;
1298         s->h263_pred = 1;
1299         s->unrestricted_mv = 1;
1300         s->msmpeg4_version= 3;
1301         s->flipflop_rounding=1;
1302         avctx->delay=0;
1303         s->low_delay=1;
1304         break;
1305     case CODEC_ID_WMV1:
1306         s->out_format = FMT_H263;
1307         s->h263_msmpeg4 = 1;
1308         s->h263_pred = 1;
1309         s->unrestricted_mv = 1;
1310         s->msmpeg4_version= 4;
1311         s->flipflop_rounding=1;
1312         avctx->delay=0;
1313         s->low_delay=1;
1314         break;
1315     case CODEC_ID_WMV2:
1316         s->out_format = FMT_H263;
1317         s->h263_msmpeg4 = 1;
1318         s->h263_pred = 1;
1319         s->unrestricted_mv = 1;
1320         s->msmpeg4_version= 5;
1321         s->flipflop_rounding=1;
1322         avctx->delay=0;
1323         s->low_delay=1;
1324         break;
1325     default:
1326         return -1;
1327     }
1328
1329     avctx->has_b_frames= !s->low_delay;
1330
1331     s->encoding = 1;
1332
1333     /* init */
1334     if (MPV_common_init(s) < 0)
1335         return -1;
1336
1337     if(s->modified_quant)
1338         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1339     s->progressive_frame=
1340     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN));
1341     s->quant_precision=5;
1342
1343     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1344     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1345
1346 #ifdef CONFIG_H261_ENCODER
1347     if (s->out_format == FMT_H261)
1348         ff_h261_encode_init(s);
1349 #endif
1350     if (s->out_format == FMT_H263)
1351         h263_encode_init(s);
1352     if(s->msmpeg4_version)
1353         ff_msmpeg4_encode_init(s);
1354     if (s->out_format == FMT_MPEG1)
1355         ff_mpeg1_encode_init(s);
1356
1357     /* init q matrix */
1358     for(i=0;i<64;i++) {
1359         int j= s->dsp.idct_permutation[i];
1360         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1361             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1362             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1363         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1364             s->intra_matrix[j] =
1365             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1366         }else
1367         { /* mpeg1/2 */
1368             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1369             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1370         }
1371         if(s->avctx->intra_matrix)
1372             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1373         if(s->avctx->inter_matrix)
1374             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1375     }
1376
1377     /* precompute matrix */
1378     /* for mjpeg, we do include qscale in the matrix */
1379     if (s->out_format != FMT_MJPEG) {
1380         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1381                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1382         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1383                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1384     }
1385
1386     if(ff_rate_control_init(s) < 0)
1387         return -1;
1388
1389     return 0;
1390 }
1391
1392 int MPV_encode_end(AVCodecContext *avctx)
1393 {
1394     MpegEncContext *s = avctx->priv_data;
1395
1396     ff_rate_control_uninit(s);
1397
1398     MPV_common_end(s);
1399     if (s->out_format == FMT_MJPEG)
1400         mjpeg_close(s);
1401
1402     av_freep(&avctx->extradata);
1403
1404     return 0;
1405 }
1406
1407 #endif //CONFIG_ENCODERS
1408
1409 void init_rl(RLTable *rl, int use_static)
1410 {
1411     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1412     uint8_t index_run[MAX_RUN+1];
1413     int last, run, level, start, end, i;
1414
1415     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1416     if(use_static && rl->max_level[0])
1417         return;
1418
1419     /* compute max_level[], max_run[] and index_run[] */
1420     for(last=0;last<2;last++) {
1421         if (last == 0) {
1422             start = 0;
1423             end = rl->last;
1424         } else {
1425             start = rl->last;
1426             end = rl->n;
1427         }
1428
1429         memset(max_level, 0, MAX_RUN + 1);
1430         memset(max_run, 0, MAX_LEVEL + 1);
1431         memset(index_run, rl->n, MAX_RUN + 1);
1432         for(i=start;i<end;i++) {
1433             run = rl->table_run[i];
1434             level = rl->table_level[i];
1435             if (index_run[run] == rl->n)
1436                 index_run[run] = i;
1437             if (level > max_level[run])
1438                 max_level[run] = level;
1439             if (run > max_run[level])
1440                 max_run[level] = run;
1441         }
1442         if(use_static)
1443             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1444         else
1445             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1446         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1447         if(use_static)
1448             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1449         else
1450             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1451         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1452         if(use_static)
1453             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1454         else
1455             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1456         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1457     }
1458 }
1459
1460 /* draw the edges of width 'w' of an image of size width, height */
1461 //FIXME check that this is ok for mpeg4 interlaced
1462 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1463 {
1464     uint8_t *ptr, *last_line;
1465     int i;
1466
1467     last_line = buf + (height - 1) * wrap;
1468     for(i=0;i<w;i++) {
1469         /* top and bottom */
1470         memcpy(buf - (i + 1) * wrap, buf, width);
1471         memcpy(last_line + (i + 1) * wrap, last_line, width);
1472     }
1473     /* left and right */
1474     ptr = buf;
1475     for(i=0;i<height;i++) {
1476         memset(ptr - w, ptr[0], w);
1477         memset(ptr + width, ptr[width-1], w);
1478         ptr += wrap;
1479     }
1480     /* corners */
1481     for(i=0;i<w;i++) {
1482         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1483         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1484         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1485         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1486     }
1487 }
1488
1489 int ff_find_unused_picture(MpegEncContext *s, int shared){
1490     int i;
1491
1492     if(shared){
1493         for(i=0; i<MAX_PICTURE_COUNT; i++){
1494             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1495         }
1496     }else{
1497         for(i=0; i<MAX_PICTURE_COUNT; i++){
1498             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1499         }
1500         for(i=0; i<MAX_PICTURE_COUNT; i++){
1501             if(s->picture[i].data[0]==NULL) return i;
1502         }
1503     }
1504
1505     assert(0);
1506     return -1;
1507 }
1508
1509 static void update_noise_reduction(MpegEncContext *s){
1510     int intra, i;
1511
1512     for(intra=0; intra<2; intra++){
1513         if(s->dct_count[intra] > (1<<16)){
1514             for(i=0; i<64; i++){
1515                 s->dct_error_sum[intra][i] >>=1;
1516             }
1517             s->dct_count[intra] >>= 1;
1518         }
1519
1520         for(i=0; i<64; i++){
1521             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1522         }
1523     }
1524 }
1525
1526 /**
1527  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1528  */
1529 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1530 {
1531     int i;
1532     AVFrame *pic;
1533     s->mb_skipped = 0;
1534
1535     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1536
1537     /* mark&release old frames */
1538     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1539         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1540
1541         /* release forgotten pictures */
1542         /* if(mpeg124/h263) */
1543         if(!s->encoding){
1544             for(i=0; i<MAX_PICTURE_COUNT; i++){
1545                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1546                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1547                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1548                 }
1549             }
1550         }
1551     }
1552 alloc:
1553     if(!s->encoding){
1554         /* release non reference frames */
1555         for(i=0; i<MAX_PICTURE_COUNT; i++){
1556             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1557                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1558             }
1559         }
1560
1561         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1562             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1563         else{
1564             i= ff_find_unused_picture(s, 0);
1565             pic= (AVFrame*)&s->picture[i];
1566         }
1567
1568         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1569                         && !s->dropable ? 3 : 0;
1570
1571         pic->coded_picture_number= s->coded_picture_number++;
1572
1573         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1574             return -1;
1575
1576         s->current_picture_ptr= (Picture*)pic;
1577         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1578         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1579     }
1580
1581     s->current_picture_ptr->pict_type= s->pict_type;
1582 //    if(s->flags && CODEC_FLAG_QSCALE)
1583   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1584     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1585
1586     copy_picture(&s->current_picture, s->current_picture_ptr);
1587
1588   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1589     if (s->pict_type != B_TYPE) {
1590         s->last_picture_ptr= s->next_picture_ptr;
1591         if(!s->dropable)
1592             s->next_picture_ptr= s->current_picture_ptr;
1593     }
1594 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1595         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1596         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1597         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1598         s->pict_type, s->dropable);*/
1599
1600     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1601     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1602
1603     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1604         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1605         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1606         goto alloc;
1607     }
1608
1609     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1610
1611     if(s->picture_structure!=PICT_FRAME){
1612         int i;
1613         for(i=0; i<4; i++){
1614             if(s->picture_structure == PICT_BOTTOM_FIELD){
1615                  s->current_picture.data[i] += s->current_picture.linesize[i];
1616             }
1617             s->current_picture.linesize[i] *= 2;
1618             s->last_picture.linesize[i] *=2;
1619             s->next_picture.linesize[i] *=2;
1620         }
1621     }
1622   }
1623
1624     s->hurry_up= s->avctx->hurry_up;
1625     s->error_resilience= avctx->error_resilience;
1626
1627     /* set dequantizer, we can't do it during init as it might change for mpeg4
1628        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1629     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1630         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1631         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1632     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1633         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1634         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1635     }else{
1636         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1637         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1638     }
1639
1640     if(s->dct_error_sum){
1641         assert(s->avctx->noise_reduction && s->encoding);
1642
1643         update_noise_reduction(s);
1644     }
1645
1646 #ifdef HAVE_XVMC
1647     if(s->avctx->xvmc_acceleration)
1648         return XVMC_field_start(s, avctx);
1649 #endif
1650     return 0;
1651 }
1652
1653 /* generic function for encode/decode called after a frame has been coded/decoded */
1654 void MPV_frame_end(MpegEncContext *s)
1655 {
1656     int i;
1657     /* draw edge for correct motion prediction if outside */
1658 #ifdef HAVE_XVMC
1659 //just to make sure that all data is rendered.
1660     if(s->avctx->xvmc_acceleration){
1661         XVMC_field_end(s);
1662     }else
1663 #endif
1664     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1665             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1666             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1667             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1668     }
1669     emms_c();
1670
1671     s->last_pict_type    = s->pict_type;
1672     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1673     if(s->pict_type!=B_TYPE){
1674         s->last_non_b_pict_type= s->pict_type;
1675     }
1676 #if 0
1677         /* copy back current_picture variables */
1678     for(i=0; i<MAX_PICTURE_COUNT; i++){
1679         if(s->picture[i].data[0] == s->current_picture.data[0]){
1680             s->picture[i]= s->current_picture;
1681             break;
1682         }
1683     }
1684     assert(i<MAX_PICTURE_COUNT);
1685 #endif
1686
1687     if(s->encoding){
1688         /* release non-reference frames */
1689         for(i=0; i<MAX_PICTURE_COUNT; i++){
1690             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1691                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1692             }
1693         }
1694     }
1695     // clear copies, to avoid confusion
1696 #if 0
1697     memset(&s->last_picture, 0, sizeof(Picture));
1698     memset(&s->next_picture, 0, sizeof(Picture));
1699     memset(&s->current_picture, 0, sizeof(Picture));
1700 #endif
1701     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1702 }
1703
1704 /**
1705  * draws an line from (ex, ey) -> (sx, sy).
1706  * @param w width of the image
1707  * @param h height of the image
1708  * @param stride stride/linesize of the image
1709  * @param color color of the arrow
1710  */
1711 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1712     int x, y, fr, f;
1713
1714     sx= clip(sx, 0, w-1);
1715     sy= clip(sy, 0, h-1);
1716     ex= clip(ex, 0, w-1);
1717     ey= clip(ey, 0, h-1);
1718
1719     buf[sy*stride + sx]+= color;
1720
1721     if(FFABS(ex - sx) > FFABS(ey - sy)){
1722         if(sx > ex){
1723             FFSWAP(int, sx, ex);
1724             FFSWAP(int, sy, ey);
1725         }
1726         buf+= sx + sy*stride;
1727         ex-= sx;
1728         f= ((ey-sy)<<16)/ex;
1729         for(x= 0; x <= ex; x++){
1730             y = (x*f)>>16;
1731             fr= (x*f)&0xFFFF;
1732             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1733             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1734         }
1735     }else{
1736         if(sy > ey){
1737             FFSWAP(int, sx, ex);
1738             FFSWAP(int, sy, ey);
1739         }
1740         buf+= sx + sy*stride;
1741         ey-= sy;
1742         if(ey) f= ((ex-sx)<<16)/ey;
1743         else   f= 0;
1744         for(y= 0; y <= ey; y++){
1745             x = (y*f)>>16;
1746             fr= (y*f)&0xFFFF;
1747             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1748             buf[y*stride + x+1]+= (color*         fr )>>16;;
1749         }
1750     }
1751 }
1752
1753 /**
1754  * draws an arrow from (ex, ey) -> (sx, sy).
1755  * @param w width of the image
1756  * @param h height of the image
1757  * @param stride stride/linesize of the image
1758  * @param color color of the arrow
1759  */
1760 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1761     int dx,dy;
1762
1763     sx= clip(sx, -100, w+100);
1764     sy= clip(sy, -100, h+100);
1765     ex= clip(ex, -100, w+100);
1766     ey= clip(ey, -100, h+100);
1767
1768     dx= ex - sx;
1769     dy= ey - sy;
1770
1771     if(dx*dx + dy*dy > 3*3){
1772         int rx=  dx + dy;
1773         int ry= -dx + dy;
1774         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1775
1776         //FIXME subpixel accuracy
1777         rx= ROUNDED_DIV(rx*3<<4, length);
1778         ry= ROUNDED_DIV(ry*3<<4, length);
1779
1780         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1781         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1782     }
1783     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1784 }
1785
1786 /**
1787  * prints debuging info for the given picture.
1788  */
1789 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1790
1791     if(!pict || !pict->mb_type) return;
1792
1793     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1794         int x,y;
1795
1796         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1797         switch (pict->pict_type) {
1798             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1799             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1800             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1801             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1802             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1803             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1804         }
1805         for(y=0; y<s->mb_height; y++){
1806             for(x=0; x<s->mb_width; x++){
1807                 if(s->avctx->debug&FF_DEBUG_SKIP){
1808                     int count= s->mbskip_table[x + y*s->mb_stride];
1809                     if(count>9) count=9;
1810                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1811                 }
1812                 if(s->avctx->debug&FF_DEBUG_QP){
1813                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1814                 }
1815                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1816                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1817                     //Type & MV direction
1818                     if(IS_PCM(mb_type))
1819                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1820                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1821                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1822                     else if(IS_INTRA4x4(mb_type))
1823                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1824                     else if(IS_INTRA16x16(mb_type))
1825                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1826                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1827                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1828                     else if(IS_DIRECT(mb_type))
1829                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1830                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1831                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1832                     else if(IS_GMC(mb_type))
1833                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1834                     else if(IS_SKIP(mb_type))
1835                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1836                     else if(!USES_LIST(mb_type, 1))
1837                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1838                     else if(!USES_LIST(mb_type, 0))
1839                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1840                     else{
1841                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1842                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1843                     }
1844
1845                     //segmentation
1846                     if(IS_8X8(mb_type))
1847                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1848                     else if(IS_16X8(mb_type))
1849                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1850                     else if(IS_8X16(mb_type))
1851                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1852                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1853                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1854                     else
1855                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1856
1857
1858                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1859                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1860                     else
1861                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1862                 }
1863 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1864             }
1865             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1866         }
1867     }
1868
1869     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1870         const int shift= 1 + s->quarter_sample;
1871         int mb_y;
1872         uint8_t *ptr;
1873         int i;
1874         int h_chroma_shift, v_chroma_shift;
1875         const int width = s->avctx->width;
1876         const int height= s->avctx->height;
1877         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1878         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1879         s->low_delay=0; //needed to see the vectors without trashing the buffers
1880
1881         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1882         for(i=0; i<3; i++){
1883             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1884             pict->data[i]= s->visualization_buffer[i];
1885         }
1886         pict->type= FF_BUFFER_TYPE_COPY;
1887         ptr= pict->data[0];
1888
1889         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1890             int mb_x;
1891             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1892                 const int mb_index= mb_x + mb_y*s->mb_stride;
1893                 if((s->avctx->debug_mv) && pict->motion_val){
1894                   int type;
1895                   for(type=0; type<3; type++){
1896                     int direction = 0;
1897                     switch (type) {
1898                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1899                                 continue;
1900                               direction = 0;
1901                               break;
1902                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1903                                 continue;
1904                               direction = 0;
1905                               break;
1906                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1907                                 continue;
1908                               direction = 1;
1909                               break;
1910                     }
1911                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1912                         continue;
1913
1914                     if(IS_8X8(pict->mb_type[mb_index])){
1915                       int i;
1916                       for(i=0; i<4; i++){
1917                         int sx= mb_x*16 + 4 + 8*(i&1);
1918                         int sy= mb_y*16 + 4 + 8*(i>>1);
1919                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1920                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1921                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1922                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1923                       }
1924                     }else if(IS_16X8(pict->mb_type[mb_index])){
1925                       int i;
1926                       for(i=0; i<2; i++){
1927                         int sx=mb_x*16 + 8;
1928                         int sy=mb_y*16 + 4 + 8*i;
1929                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1930                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1931                         int my=(pict->motion_val[direction][xy][1]>>shift);
1932
1933                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1934                             my*=2;
1935
1936                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1937                       }
1938                     }else if(IS_8X16(pict->mb_type[mb_index])){
1939                       int i;
1940                       for(i=0; i<2; i++){
1941                         int sx=mb_x*16 + 4 + 8*i;
1942                         int sy=mb_y*16 + 8;
1943                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1944                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1945                         int my=(pict->motion_val[direction][xy][1]>>shift);
1946
1947                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1948                             my*=2;
1949
1950                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1951                       }
1952                     }else{
1953                       int sx= mb_x*16 + 8;
1954                       int sy= mb_y*16 + 8;
1955                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1956                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1957                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1958                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1959                     }
1960                   }
1961                 }
1962                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1963                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1964                     int y;
1965                     for(y=0; y<8; y++){
1966                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1967                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1968                     }
1969                 }
1970                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1971                     int mb_type= pict->mb_type[mb_index];
1972                     uint64_t u,v;
1973                     int y;
1974 #define COLOR(theta, r)\
1975 u= (int)(128 + r*cos(theta*3.141592/180));\
1976 v= (int)(128 + r*sin(theta*3.141592/180));
1977
1978
1979                     u=v=128;
1980                     if(IS_PCM(mb_type)){
1981                         COLOR(120,48)
1982                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1983                         COLOR(30,48)
1984                     }else if(IS_INTRA4x4(mb_type)){
1985                         COLOR(90,48)
1986                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1987 //                        COLOR(120,48)
1988                     }else if(IS_DIRECT(mb_type)){
1989                         COLOR(150,48)
1990                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1991                         COLOR(170,48)
1992                     }else if(IS_GMC(mb_type)){
1993                         COLOR(190,48)
1994                     }else if(IS_SKIP(mb_type)){
1995 //                        COLOR(180,48)
1996                     }else if(!USES_LIST(mb_type, 1)){
1997                         COLOR(240,48)
1998                     }else if(!USES_LIST(mb_type, 0)){
1999                         COLOR(0,48)
2000                     }else{
2001                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
2002                         COLOR(300,48)
2003                     }
2004
2005                     u*= 0x0101010101010101ULL;
2006                     v*= 0x0101010101010101ULL;
2007                     for(y=0; y<8; y++){
2008                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
2009                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
2010                     }
2011
2012                     //segmentation
2013                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
2014                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2015                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2016                     }
2017                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
2018                         for(y=0; y<16; y++)
2019                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
2020                     }
2021                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2022                         int dm= 1 << (mv_sample_log2-2);
2023                         for(i=0; i<4; i++){
2024                             int sx= mb_x*16 + 8*(i&1);
2025                             int sy= mb_y*16 + 8*(i>>1);
2026                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2027                             //FIXME bidir
2028                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2029                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2030                                 for(y=0; y<8; y++)
2031                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2032                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2033                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2034                         }
2035                     }
2036
2037                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2038                         // hmm
2039                     }
2040                 }
2041                 s->mbskip_table[mb_index]=0;
2042             }
2043         }
2044     }
2045 }
2046
2047 #ifdef CONFIG_ENCODERS
2048
2049 static int get_sae(uint8_t *src, int ref, int stride){
2050     int x,y;
2051     int acc=0;
2052
2053     for(y=0; y<16; y++){
2054         for(x=0; x<16; x++){
2055             acc+= FFABS(src[x+y*stride] - ref);
2056         }
2057     }
2058
2059     return acc;
2060 }
2061
2062 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2063     int x, y, w, h;
2064     int acc=0;
2065
2066     w= s->width &~15;
2067     h= s->height&~15;
2068
2069     for(y=0; y<h; y+=16){
2070         for(x=0; x<w; x+=16){
2071             int offset= x + y*stride;
2072             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2073             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2074             int sae = get_sae(src + offset, mean, stride);
2075
2076             acc+= sae + 500 < sad;
2077         }
2078     }
2079     return acc;
2080 }
2081
2082
2083 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2084     AVFrame *pic=NULL;
2085     int64_t pts;
2086     int i;
2087     const int encoding_delay= s->max_b_frames;
2088     int direct=1;
2089
2090     if(pic_arg){
2091         pts= pic_arg->pts;
2092         pic_arg->display_picture_number= s->input_picture_number++;
2093
2094         if(pts != AV_NOPTS_VALUE){
2095             if(s->user_specified_pts != AV_NOPTS_VALUE){
2096                 int64_t time= pts;
2097                 int64_t last= s->user_specified_pts;
2098
2099                 if(time <= last){
2100                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2101                     return -1;
2102                 }
2103             }
2104             s->user_specified_pts= pts;
2105         }else{
2106             if(s->user_specified_pts != AV_NOPTS_VALUE){
2107                 s->user_specified_pts=
2108                 pts= s->user_specified_pts + 1;
2109                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2110             }else{
2111                 pts= pic_arg->display_picture_number;
2112             }
2113         }
2114     }
2115
2116   if(pic_arg){
2117     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2118     if(pic_arg->linesize[0] != s->linesize) direct=0;
2119     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2120     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2121
2122 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2123
2124     if(direct){
2125         i= ff_find_unused_picture(s, 1);
2126
2127         pic= (AVFrame*)&s->picture[i];
2128         pic->reference= 3;
2129
2130         for(i=0; i<4; i++){
2131             pic->data[i]= pic_arg->data[i];
2132             pic->linesize[i]= pic_arg->linesize[i];
2133         }
2134         alloc_picture(s, (Picture*)pic, 1);
2135     }else{
2136         i= ff_find_unused_picture(s, 0);
2137
2138         pic= (AVFrame*)&s->picture[i];
2139         pic->reference= 3;
2140
2141         alloc_picture(s, (Picture*)pic, 0);
2142
2143         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2144            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2145            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2146        // empty
2147         }else{
2148             int h_chroma_shift, v_chroma_shift;
2149             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2150
2151             for(i=0; i<3; i++){
2152                 int src_stride= pic_arg->linesize[i];
2153                 int dst_stride= i ? s->uvlinesize : s->linesize;
2154                 int h_shift= i ? h_chroma_shift : 0;
2155                 int v_shift= i ? v_chroma_shift : 0;
2156                 int w= s->width >>h_shift;
2157                 int h= s->height>>v_shift;
2158                 uint8_t *src= pic_arg->data[i];
2159                 uint8_t *dst= pic->data[i];
2160
2161                 if(!s->avctx->rc_buffer_size)
2162                     dst +=INPLACE_OFFSET;
2163
2164                 if(src_stride==dst_stride)
2165                     memcpy(dst, src, src_stride*h);
2166                 else{
2167                     while(h--){
2168                         memcpy(dst, src, w);
2169                         dst += dst_stride;
2170                         src += src_stride;
2171                     }
2172                 }
2173             }
2174         }
2175     }
2176     copy_picture_attributes(s, pic, pic_arg);
2177     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2178   }
2179
2180     /* shift buffer entries */
2181     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2182         s->input_picture[i-1]= s->input_picture[i];
2183
2184     s->input_picture[encoding_delay]= (Picture*)pic;
2185
2186     return 0;
2187 }
2188
2189 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2190     int x, y, plane;
2191     int score=0;
2192     int64_t score64=0;
2193
2194     for(plane=0; plane<3; plane++){
2195         const int stride= p->linesize[plane];
2196         const int bw= plane ? 1 : 2;
2197         for(y=0; y<s->mb_height*bw; y++){
2198             for(x=0; x<s->mb_width*bw; x++){
2199                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2200                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2201
2202                 switch(s->avctx->frame_skip_exp){
2203                     case 0: score= FFMAX(score, v); break;
2204                     case 1: score+= FFABS(v);break;
2205                     case 2: score+= v*v;break;
2206                     case 3: score64+= FFABS(v*v*(int64_t)v);break;
2207                     case 4: score64+= v*v*(int64_t)(v*v);break;
2208                 }
2209             }
2210         }
2211     }
2212
2213     if(score) score64= score;
2214
2215     if(score64 < s->avctx->frame_skip_threshold)
2216         return 1;
2217     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2218         return 1;
2219     return 0;
2220 }
2221
2222 static int estimate_best_b_count(MpegEncContext *s){
2223     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2224     AVCodecContext *c= avcodec_alloc_context();
2225     AVFrame input[FF_MAX_B_FRAMES+2];
2226     const int scale= s->avctx->brd_scale;
2227     int i, j, out_size, p_lambda, b_lambda, lambda2;
2228     int outbuf_size= s->width * s->height; //FIXME
2229     uint8_t *outbuf= av_malloc(outbuf_size);
2230     int64_t best_rd= INT64_MAX;
2231     int best_b_count= -1;
2232
2233     assert(scale>=0 && scale <=3);
2234
2235 //    emms_c();
2236     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2237     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2238     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2239     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2240
2241     c->width = s->width >> scale;
2242     c->height= s->height>> scale;
2243     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2244     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2245     c->mb_decision= s->avctx->mb_decision;
2246     c->me_cmp= s->avctx->me_cmp;
2247     c->mb_cmp= s->avctx->mb_cmp;
2248     c->me_sub_cmp= s->avctx->me_sub_cmp;
2249     c->pix_fmt = PIX_FMT_YUV420P;
2250     c->time_base= s->avctx->time_base;
2251     c->max_b_frames= s->max_b_frames;
2252
2253     if (avcodec_open(c, codec) < 0)
2254         return -1;
2255
2256     for(i=0; i<s->max_b_frames+2; i++){
2257         int ysize= c->width*c->height;
2258         int csize= (c->width/2)*(c->height/2);
2259         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2260
2261         if(pre_input_ptr)
2262             pre_input= *pre_input_ptr;
2263
2264         if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
2265             pre_input.data[0]+=INPLACE_OFFSET;
2266             pre_input.data[1]+=INPLACE_OFFSET;
2267             pre_input.data[2]+=INPLACE_OFFSET;
2268         }
2269
2270         avcodec_get_frame_defaults(&input[i]);
2271         input[i].data[0]= av_malloc(ysize + 2*csize);
2272         input[i].data[1]= input[i].data[0] + ysize;
2273         input[i].data[2]= input[i].data[1] + csize;
2274         input[i].linesize[0]= c->width;
2275         input[i].linesize[1]=
2276         input[i].linesize[2]= c->width/2;
2277
2278         if(!i || s->input_picture[i-1]){
2279             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2280             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2281             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2282         }
2283     }
2284
2285     for(j=0; j<s->max_b_frames+1; j++){
2286         int64_t rd=0;
2287
2288         if(!s->input_picture[j])
2289             break;
2290
2291         c->error[0]= c->error[1]= c->error[2]= 0;
2292
2293         input[0].pict_type= I_TYPE;
2294         input[0].quality= 1 * FF_QP2LAMBDA;
2295         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2296 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2297
2298         for(i=0; i<s->max_b_frames+1; i++){
2299             int is_p= i % (j+1) == j || i==s->max_b_frames;
2300
2301             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2302             input[i+1].quality= is_p ? p_lambda : b_lambda;
2303             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2304             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2305         }
2306
2307         /* get the delayed frames */
2308         while(out_size){
2309             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2310             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2311         }
2312
2313         rd += c->error[0] + c->error[1] + c->error[2];
2314
2315         if(rd < best_rd){
2316             best_rd= rd;
2317             best_b_count= j;
2318         }
2319     }
2320
2321     av_freep(&outbuf);
2322     avcodec_close(c);
2323     av_freep(&c);
2324
2325     for(i=0; i<s->max_b_frames+2; i++){
2326         av_freep(&input[i].data[0]);
2327     }
2328
2329     return best_b_count;
2330 }
2331
2332 static void select_input_picture(MpegEncContext *s){
2333     int i;
2334
2335     for(i=1; i<MAX_PICTURE_COUNT; i++)
2336         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2337     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2338
2339     /* set next picture type & ordering */
2340     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2341         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2342             s->reordered_input_picture[0]= s->input_picture[0];
2343             s->reordered_input_picture[0]->pict_type= I_TYPE;
2344             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2345         }else{
2346             int b_frames;
2347
2348             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2349                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2350                 //FIXME check that te gop check above is +-1 correct
2351 //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2352
2353                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2354                         for(i=0; i<4; i++)
2355                             s->input_picture[0]->data[i]= NULL;
2356                         s->input_picture[0]->type= 0;
2357                     }else{
2358                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2359                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2360
2361                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2362                     }
2363
2364                     emms_c();
2365                     ff_vbv_update(s, 0);
2366
2367                     goto no_output_pic;
2368                 }
2369             }
2370
2371             if(s->flags&CODEC_FLAG_PASS2){
2372                 for(i=0; i<s->max_b_frames+1; i++){
2373                     int pict_num= s->input_picture[0]->display_picture_number + i;
2374
2375                     if(pict_num >= s->rc_context.num_entries)
2376                         break;
2377                     if(!s->input_picture[i]){
2378                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2379                         break;
2380                     }
2381
2382                     s->input_picture[i]->pict_type=
2383                         s->rc_context.entry[pict_num].new_pict_type;
2384                 }
2385             }
2386
2387             if(s->avctx->b_frame_strategy==0){
2388                 b_frames= s->max_b_frames;
2389                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2390             }else if(s->avctx->b_frame_strategy==1){
2391                 for(i=1; i<s->max_b_frames+1; i++){
2392                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2393                         s->input_picture[i]->b_frame_score=
2394                             get_intra_count(s, s->input_picture[i  ]->data[0],
2395                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2396                     }
2397                 }
2398                 for(i=0; i<s->max_b_frames+1; i++){
2399                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
2400                 }
2401
2402                 b_frames= FFMAX(0, i-1);
2403
2404                 /* reset scores */
2405                 for(i=0; i<b_frames+1; i++){
2406                     s->input_picture[i]->b_frame_score=0;
2407                 }
2408             }else if(s->avctx->b_frame_strategy==2){
2409                 b_frames= estimate_best_b_count(s);
2410             }else{
2411                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2412                 b_frames=0;
2413             }
2414
2415             emms_c();
2416 //static int b_count=0;
2417 //b_count+= b_frames;
2418 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2419
2420             for(i= b_frames - 1; i>=0; i--){
2421                 int type= s->input_picture[i]->pict_type;
2422                 if(type && type != B_TYPE)
2423                     b_frames= i;
2424             }
2425             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2426                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2427             }
2428
2429             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2430               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2431                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2432               }else{
2433                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2434                     b_frames=0;
2435                 s->input_picture[b_frames]->pict_type= I_TYPE;
2436               }
2437             }
2438
2439             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2440                && b_frames
2441                && s->input_picture[b_frames]->pict_type== I_TYPE)
2442                 b_frames--;
2443
2444             s->reordered_input_picture[0]= s->input_picture[b_frames];
2445             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2446                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2447             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2448             for(i=0; i<b_frames; i++){
2449                 s->reordered_input_picture[i+1]= s->input_picture[i];
2450                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2451                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2452             }
2453         }
2454     }
2455 no_output_pic:
2456     if(s->reordered_input_picture[0]){
2457         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2458
2459         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2460
2461         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size){
2462             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2463
2464             int i= ff_find_unused_picture(s, 0);
2465             Picture *pic= &s->picture[i];
2466
2467             pic->reference              = s->reordered_input_picture[0]->reference;
2468             alloc_picture(s, pic, 0);
2469
2470             /* mark us unused / free shared pic */
2471             if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_INTERNAL)
2472                 s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
2473             for(i=0; i<4; i++)
2474                 s->reordered_input_picture[0]->data[i]= NULL;
2475             s->reordered_input_picture[0]->type= 0;
2476
2477             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2478
2479             s->current_picture_ptr= pic;
2480         }else{
2481             // input is not a shared pix -> reuse buffer for current_pix
2482
2483             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2484                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2485
2486             s->current_picture_ptr= s->reordered_input_picture[0];
2487             for(i=0; i<4; i++){
2488                 s->new_picture.data[i]+= INPLACE_OFFSET;
2489             }
2490         }
2491         copy_picture(&s->current_picture, s->current_picture_ptr);
2492
2493         s->picture_number= s->new_picture.display_picture_number;
2494 //printf("dpn:%d\n", s->picture_number);
2495     }else{
2496        memset(&s->new_picture, 0, sizeof(Picture));
2497     }
2498 }
2499
2500 int MPV_encode_picture(AVCodecContext *avctx,
2501                        unsigned char *buf, int buf_size, void *data)
2502 {
2503     MpegEncContext *s = avctx->priv_data;
2504     AVFrame *pic_arg = data;
2505     int i, stuffing_count;
2506
2507     for(i=0; i<avctx->thread_count; i++){
2508         int start_y= s->thread_context[i]->start_mb_y;
2509         int   end_y= s->thread_context[i]->  end_mb_y;
2510         int h= s->mb_height;
2511         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2512         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2513
2514         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2515     }
2516
2517     s->picture_in_gop_number++;
2518
2519     if(load_input_picture(s, pic_arg) < 0)
2520         return -1;
2521
2522     select_input_picture(s);
2523
2524     /* output? */
2525     if(s->new_picture.data[0]){
2526         s->pict_type= s->new_picture.pict_type;
2527 //emms_c();
2528 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2529         MPV_frame_start(s, avctx);
2530 vbv_retry:
2531         if (encode_picture(s, s->picture_number) < 0)
2532             return -1;
2533
2534         avctx->real_pict_num  = s->picture_number;
2535         avctx->header_bits = s->header_bits;
2536         avctx->mv_bits     = s->mv_bits;
2537         avctx->misc_bits   = s->misc_bits;
2538         avctx->i_tex_bits  = s->i_tex_bits;
2539         avctx->p_tex_bits  = s->p_tex_bits;
2540         avctx->i_count     = s->i_count;
2541         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2542         avctx->skip_count  = s->skip_count;
2543
2544         MPV_frame_end(s);
2545
2546         if (s->out_format == FMT_MJPEG)
2547             mjpeg_picture_trailer(s);
2548
2549         if(avctx->rc_buffer_size){
2550             RateControlContext *rcc= &s->rc_context;
2551             int max_size= rcc->buffer_index/3;
2552
2553             if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
2554                 s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
2555                 s->mb_skipped = 0;        //done in MPV_frame_start()
2556                 if(s->pict_type==P_TYPE){ //done in encode_picture() so we must undo it
2557                     if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
2558                         s->no_rounding ^= 1;
2559                 }
2560                 if(s->pict_type!=B_TYPE){
2561                     s->time_base= s->last_time_base;
2562                     s->last_non_b_time= s->time - s->pp_time;
2563                 }
2564 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
2565                 for(i=0; i<avctx->thread_count; i++){
2566                     PutBitContext *pb= &s->thread_context[i]->pb;
2567                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
2568                 }
2569                 goto vbv_retry;
2570             }
2571
2572             assert(s->avctx->rc_max_rate);
2573         }
2574
2575         if(s->flags&CODEC_FLAG_PASS1)
2576             ff_write_pass1_stats(s);
2577
2578         for(i=0; i<4; i++){
2579             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2580             avctx->error[i] += s->current_picture_ptr->error[i];
2581         }
2582
2583         if(s->flags&CODEC_FLAG_PASS1)
2584             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2585         flush_put_bits(&s->pb);
2586         s->frame_bits  = put_bits_count(&s->pb);
2587
2588         stuffing_count= ff_vbv_update(s, s->frame_bits);
2589         if(stuffing_count){
2590             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2591                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2592                 return -1;
2593             }
2594
2595             switch(s->codec_id){
2596             case CODEC_ID_MPEG1VIDEO:
2597             case CODEC_ID_MPEG2VIDEO:
2598                 while(stuffing_count--){
2599                     put_bits(&s->pb, 8, 0);
2600                 }
2601             break;
2602             case CODEC_ID_MPEG4:
2603                 put_bits(&s->pb, 16, 0);
2604                 put_bits(&s->pb, 16, 0x1C3);
2605                 stuffing_count -= 4;
2606                 while(stuffing_count--){
2607                     put_bits(&s->pb, 8, 0xFF);
2608                 }
2609             break;
2610             default:
2611                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2612             }
2613             flush_put_bits(&s->pb);
2614             s->frame_bits  = put_bits_count(&s->pb);
2615         }
2616
2617         /* update mpeg1/2 vbv_delay for CBR */
2618         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2619            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2620             int vbv_delay;
2621
2622             assert(s->repeat_first_field==0);
2623
2624             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2625             assert(vbv_delay < 0xFFFF);
2626
2627             s->vbv_delay_ptr[0] &= 0xF8;
2628             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2629             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2630             s->vbv_delay_ptr[2] &= 0x07;
2631             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2632         }
2633         s->total_bits += s->frame_bits;
2634         avctx->frame_bits  = s->frame_bits;
2635     }else{
2636         assert((pbBufPtr(&s->pb) == s->pb.buf));
2637         s->frame_bits=0;
2638     }
2639     assert((s->frame_bits&7)==0);
2640
2641     return s->frame_bits/8;
2642 }
2643
2644 #endif //CONFIG_ENCODERS
2645
2646 static inline void gmc1_motion(MpegEncContext *s,
2647                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2648                                uint8_t **ref_picture)
2649 {
2650     uint8_t *ptr;
2651     int offset, src_x, src_y, linesize, uvlinesize;
2652     int motion_x, motion_y;
2653     int emu=0;
2654
2655     motion_x= s->sprite_offset[0][0];
2656     motion_y= s->sprite_offset[0][1];
2657     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2658     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2659     motion_x<<=(3-s->sprite_warping_accuracy);
2660     motion_y<<=(3-s->sprite_warping_accuracy);
2661     src_x = clip(src_x, -16, s->width);
2662     if (src_x == s->width)
2663         motion_x =0;
2664     src_y = clip(src_y, -16, s->height);
2665     if (src_y == s->height)
2666         motion_y =0;
2667
2668     linesize = s->linesize;
2669     uvlinesize = s->uvlinesize;
2670
2671     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2672
2673     if(s->flags&CODEC_FLAG_EMU_EDGE){
2674         if(   (unsigned)src_x >= s->h_edge_pos - 17
2675            || (unsigned)src_y >= s->v_edge_pos - 17){
2676             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2677             ptr= s->edge_emu_buffer;
2678         }
2679     }
2680
2681     if((motion_x|motion_y)&7){
2682         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2683         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2684     }else{
2685         int dxy;
2686
2687         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2688         if (s->no_rounding){
2689             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2690         }else{
2691             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2692         }
2693     }
2694
2695     if(s->flags&CODEC_FLAG_GRAY) return;
2696
2697     motion_x= s->sprite_offset[1][0];
2698     motion_y= s->sprite_offset[1][1];
2699     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2700     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2701     motion_x<<=(3-s->sprite_warping_accuracy);
2702     motion_y<<=(3-s->sprite_warping_accuracy);
2703     src_x = clip(src_x, -8, s->width>>1);
2704     if (src_x == s->width>>1)
2705         motion_x =0;
2706     src_y = clip(src_y, -8, s->height>>1);
2707     if (src_y == s->height>>1)
2708         motion_y =0;
2709
2710     offset = (src_y * uvlinesize) + src_x;
2711     ptr = ref_picture[1] + offset;
2712     if(s->flags&CODEC_FLAG_EMU_EDGE){
2713         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2714            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2715             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2716             ptr= s->edge_emu_buffer;
2717             emu=1;
2718         }
2719     }
2720     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2721
2722     ptr = ref_picture[2] + offset;
2723     if(emu){
2724         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2725         ptr= s->edge_emu_buffer;
2726     }
2727     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2728
2729     return;
2730 }
2731
2732 static inline void gmc_motion(MpegEncContext *s,
2733                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2734                                uint8_t **ref_picture)
2735 {
2736     uint8_t *ptr;
2737     int linesize, uvlinesize;
2738     const int a= s->sprite_warping_accuracy;
2739     int ox, oy;
2740
2741     linesize = s->linesize;
2742     uvlinesize = s->uvlinesize;
2743
2744     ptr = ref_picture[0];
2745
2746     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2747     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2748
2749     s->dsp.gmc(dest_y, ptr, linesize, 16,
2750            ox,
2751            oy,
2752            s->sprite_delta[0][0], s->sprite_delta[0][1],
2753            s->sprite_delta[1][0], s->sprite_delta[1][1],
2754            a+1, (1<<(2*a+1)) - s->no_rounding,
2755            s->h_edge_pos, s->v_edge_pos);
2756     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2757            ox + s->sprite_delta[0][0]*8,
2758            oy + s->sprite_delta[1][0]*8,
2759            s->sprite_delta[0][0], s->sprite_delta[0][1],
2760            s->sprite_delta[1][0], s->sprite_delta[1][1],
2761            a+1, (1<<(2*a+1)) - s->no_rounding,
2762            s->h_edge_pos, s->v_edge_pos);
2763
2764     if(s->flags&CODEC_FLAG_GRAY) return;
2765
2766     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2767     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2768
2769     ptr = ref_picture[1];
2770     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2771            ox,
2772            oy,
2773            s->sprite_delta[0][0], s->sprite_delta[0][1],
2774            s->sprite_delta[1][0], s->sprite_delta[1][1],
2775            a+1, (1<<(2*a+1)) - s->no_rounding,
2776            s->h_edge_pos>>1, s->v_edge_pos>>1);
2777
2778     ptr = ref_picture[2];
2779     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2780            ox,
2781            oy,
2782            s->sprite_delta[0][0], s->sprite_delta[0][1],
2783            s->sprite_delta[1][0], s->sprite_delta[1][1],
2784            a+1, (1<<(2*a+1)) - s->no_rounding,
2785            s->h_edge_pos>>1, s->v_edge_pos>>1);
2786 }
2787
2788 /**
2789  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2790  * @param buf destination buffer
2791  * @param src source buffer
2792  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2793  * @param block_w width of block
2794  * @param block_h height of block
2795  * @param src_x x coordinate of the top left sample of the block in the source buffer
2796  * @param src_y y coordinate of the top left sample of the block in the source buffer
2797  * @param w width of the source buffer
2798  * @param h height of the source buffer
2799  */
2800 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2801                                     int src_x, int src_y, int w, int h){
2802     int x, y;
2803     int start_y, start_x, end_y, end_x;
2804
2805     if(src_y>= h){
2806         src+= (h-1-src_y)*linesize;
2807         src_y=h-1;
2808     }else if(src_y<=-block_h){
2809         src+= (1-block_h-src_y)*linesize;
2810         src_y=1-block_h;
2811     }
2812     if(src_x>= w){
2813         src+= (w-1-src_x);
2814         src_x=w-1;
2815     }else if(src_x<=-block_w){
2816         src+= (1-block_w-src_x);
2817         src_x=1-block_w;
2818     }
2819
2820     start_y= FFMAX(0, -src_y);
2821     start_x= FFMAX(0, -src_x);
2822     end_y= FFMIN(block_h, h-src_y);
2823     end_x= FFMIN(block_w, w-src_x);
2824
2825     // copy existing part
2826     for(y=start_y; y<end_y; y++){
2827         for(x=start_x; x<end_x; x++){
2828             buf[x + y*linesize]= src[x + y*linesize];
2829         }
2830     }
2831
2832     //top
2833     for(y=0; y<start_y; y++){
2834         for(x=start_x; x<end_x; x++){
2835             buf[x + y*linesize]= buf[x + start_y*linesize];
2836         }
2837     }
2838
2839     //bottom
2840     for(y=end_y; y<block_h; y++){
2841         for(x=start_x; x<end_x; x++){
2842             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2843         }
2844     }
2845
2846     for(y=0; y<block_h; y++){
2847        //left
2848         for(x=0; x<start_x; x++){
2849             buf[x + y*linesize]= buf[start_x + y*linesize];
2850         }
2851
2852        //right
2853         for(x=end_x; x<block_w; x++){
2854             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2855         }
2856     }
2857 }
2858
2859 static inline int hpel_motion(MpegEncContext *s,
2860                                   uint8_t *dest, uint8_t *src,
2861                                   int field_based, int field_select,
2862                                   int src_x, int src_y,
2863                                   int width, int height, int stride,
2864                                   int h_edge_pos, int v_edge_pos,
2865                                   int w, int h, op_pixels_func *pix_op,
2866                                   int motion_x, int motion_y)
2867 {
2868     int dxy;
2869     int emu=0;
2870
2871     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2872     src_x += motion_x >> 1;
2873     src_y += motion_y >> 1;
2874
2875     /* WARNING: do no forget half pels */
2876     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2877     if (src_x == width)
2878         dxy &= ~1;
2879     src_y = clip(src_y, -16, height);
2880     if (src_y == height)
2881         dxy &= ~2;
2882     src += src_y * stride + src_x;
2883
2884     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2885         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2886            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2887             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2888                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2889             src= s->edge_emu_buffer;
2890             emu=1;
2891         }
2892     }
2893     if(field_select)
2894         src += s->linesize;
2895     pix_op[dxy](dest, src, stride, h);
2896     return emu;
2897 }
2898
2899 static inline int hpel_motion_lowres(MpegEncContext *s,
2900                                   uint8_t *dest, uint8_t *src,
2901                                   int field_based, int field_select,
2902                                   int src_x, int src_y,
2903                                   int width, int height, int stride,
2904                                   int h_edge_pos, int v_edge_pos,
2905                                   int w, int h, h264_chroma_mc_func *pix_op,
2906                                   int motion_x, int motion_y)
2907 {
2908     const int lowres= s->avctx->lowres;
2909     const int s_mask= (2<<lowres)-1;
2910     int emu=0;
2911     int sx, sy;
2912
2913     if(s->quarter_sample){
2914         motion_x/=2;
2915         motion_y/=2;
2916     }
2917
2918     sx= motion_x & s_mask;
2919     sy= motion_y & s_mask;
2920     src_x += motion_x >> (lowres+1);
2921     src_y += motion_y >> (lowres+1);
2922
2923     src += src_y * stride + src_x;
2924
2925     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2926        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2927         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2928                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2929         src= s->edge_emu_buffer;
2930         emu=1;
2931     }
2932
2933     sx <<= 2 - lowres;
2934     sy <<= 2 - lowres;
2935     if(field_select)
2936         src += s->linesize;
2937     pix_op[lowres](dest, src, stride, h, sx, sy);
2938     return emu;
2939 }
2940
2941 /* apply one mpeg motion vector to the three components */
2942 static always_inline void mpeg_motion(MpegEncContext *s,
2943                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2944                                int field_based, int bottom_field, int field_select,
2945                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2946                                int motion_x, int motion_y, int h)
2947 {
2948     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2949     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2950
2951 #if 0
2952 if(s->quarter_sample)
2953 {
2954     motion_x>>=1;
2955     motion_y>>=1;
2956 }
2957 #endif
2958
2959     v_edge_pos = s->v_edge_pos >> field_based;
2960     linesize   = s->current_picture.linesize[0] << field_based;
2961     uvlinesize = s->current_picture.linesize[1] << field_based;
2962
2963     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2964     src_x = s->mb_x* 16               + (motion_x >> 1);
2965     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2966
2967     if (s->out_format == FMT_H263) {
2968         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2969             mx = (motion_x>>1)|(motion_x&1);
2970             my = motion_y >>1;
2971             uvdxy = ((my & 1) << 1) | (mx & 1);
2972             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2973             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2974         }else{
2975             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2976             uvsrc_x = src_x>>1;
2977             uvsrc_y = src_y>>1;
2978         }
2979     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2980         mx = motion_x / 4;
2981         my = motion_y / 4;
2982         uvdxy = 0;
2983         uvsrc_x = s->mb_x*8 + mx;
2984         uvsrc_y = s->mb_y*8 + my;
2985     } else {
2986         if(s->chroma_y_shift){
2987             mx = motion_x / 2;
2988             my = motion_y / 2;
2989             uvdxy = ((my & 1) << 1) | (mx & 1);
2990             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2991             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2992         } else {
2993             if(s->chroma_x_shift){
2994             //Chroma422
2995                 mx = motion_x / 2;
2996                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2997                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2998                 uvsrc_y = src_y;
2999             } else {
3000             //Chroma444
3001                 uvdxy = dxy;
3002                 uvsrc_x = src_x;
3003                 uvsrc_y = src_y;
3004             }
3005         }
3006     }
3007
3008     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3009     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3010     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3011
3012     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
3013        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
3014             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
3015                s->codec_id == CODEC_ID_MPEG1VIDEO){
3016                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
3017                 return ;
3018             }
3019             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3020                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3021             ptr_y = s->edge_emu_buffer;
3022             if(!(s->flags&CODEC_FLAG_GRAY)){
3023                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3024                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3025                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3026                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3027                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3028                 ptr_cb= uvbuf;
3029                 ptr_cr= uvbuf+16;
3030             }
3031     }
3032
3033     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3034         dest_y += s->linesize;
3035         dest_cb+= s->uvlinesize;
3036         dest_cr+= s->uvlinesize;
3037     }
3038
3039     if(field_select){
3040         ptr_y += s->linesize;
3041         ptr_cb+= s->uvlinesize;
3042         ptr_cr+= s->uvlinesize;
3043     }
3044
3045     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
3046
3047     if(!(s->flags&CODEC_FLAG_GRAY)){
3048         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
3049         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
3050     }
3051 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
3052     if(s->out_format == FMT_H261){
3053         ff_h261_loop_filter(s);
3054     }
3055 #endif
3056 }
3057
3058 /* apply one mpeg motion vector to the three components */
3059 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
3060                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3061                                int field_based, int bottom_field, int field_select,
3062                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3063                                int motion_x, int motion_y, int h)
3064 {
3065     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3066     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3067     const int lowres= s->avctx->lowres;
3068     const int block_s= 8>>lowres;
3069     const int s_mask= (2<<lowres)-1;
3070     const int h_edge_pos = s->h_edge_pos >> lowres;
3071     const int v_edge_pos = s->v_edge_pos >> lowres;
3072     linesize   = s->current_picture.linesize[0] << field_based;
3073     uvlinesize = s->current_picture.linesize[1] << field_based;
3074
3075     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3076         motion_x/=2;
3077         motion_y/=2;
3078     }
3079
3080     if(field_based){
3081         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3082     }
3083
3084     sx= motion_x & s_mask;
3085     sy= motion_y & s_mask;
3086     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3087     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3088
3089     if (s->out_format == FMT_H263) {
3090         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3091         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3092         uvsrc_x = src_x>>1;
3093         uvsrc_y = src_y>>1;
3094     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3095         mx = motion_x / 4;
3096         my = motion_y / 4;
3097         uvsx = (2*mx) & s_mask;
3098         uvsy = (2*my) & s_mask;
3099         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3100         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3101     } else {
3102         mx = motion_x / 2;
3103         my = motion_y / 2;
3104         uvsx = mx & s_mask;
3105         uvsy = my & s_mask;
3106         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3107         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3108     }
3109
3110     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3111     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3112     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3113
3114     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3115        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3116             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3117                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3118             ptr_y = s->edge_emu_buffer;
3119             if(!(s->flags&CODEC_FLAG_GRAY)){
3120                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3121                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3122                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3123                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3124                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3125                 ptr_cb= uvbuf;
3126                 ptr_cr= uvbuf+16;
3127             }
3128     }
3129
3130     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3131         dest_y += s->linesize;
3132         dest_cb+= s->uvlinesize;
3133         dest_cr+= s->uvlinesize;
3134     }
3135
3136     if(field_select){
3137         ptr_y += s->linesize;
3138         ptr_cb+= s->uvlinesize;
3139         ptr_cr+= s->uvlinesize;
3140     }
3141
3142     sx <<= 2 - lowres;
3143     sy <<= 2 - lowres;
3144     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3145
3146     if(!(s->flags&CODEC_FLAG_GRAY)){
3147         uvsx <<= 2 - lowres;
3148         uvsy <<= 2 - lowres;
3149         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3150         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3151     }
3152     //FIXME h261 lowres loop filter
3153 }
3154
3155 //FIXME move to dsputil, avg variant, 16x16 version
3156 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3157     int x;
3158     uint8_t * const top   = src[1];
3159     uint8_t * const left  = src[2];
3160     uint8_t * const mid   = src[0];
3161     uint8_t * const right = src[3];
3162     uint8_t * const bottom= src[4];
3163 #define OBMC_FILTER(x, t, l, m, r, b)\
3164     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3165 #define OBMC_FILTER4(x, t, l, m, r, b)\
3166     OBMC_FILTER(x         , t, l, m, r, b);\
3167     OBMC_FILTER(x+1       , t, l, m, r, b);\
3168     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3169     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3170
3171     x=0;
3172     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3173     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3174     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3175     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3176     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3177     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3178     x+= stride;
3179     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3180     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3181     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3182     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3183     x+= stride;
3184     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3185     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3186     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3187     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3188     x+= 2*stride;
3189     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3190     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3191     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3192     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3193     x+= 2*stride;
3194     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3195     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3196     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3197     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3198     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3199     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3200     x+= stride;
3201     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3202     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3203     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3204     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3205 }
3206
3207 /* obmc for 1 8x8 luma block */
3208 static inline void obmc_motion(MpegEncContext *s,
3209                                uint8_t *dest, uint8_t *src,
3210                                int src_x, int src_y,
3211                                op_pixels_func *pix_op,
3212                                int16_t mv[5][2]/* mid top left right bottom*/)
3213 #define MID    0
3214 {
3215     int i;
3216     uint8_t *ptr[5];
3217
3218     assert(s->quarter_sample==0);
3219
3220     for(i=0; i<5; i++){
3221         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3222             ptr[i]= ptr[MID];
3223         }else{
3224             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3225             hpel_motion(s, ptr[i], src, 0, 0,
3226                         src_x, src_y,
3227                         s->width, s->height, s->linesize,
3228                         s->h_edge_pos, s->v_edge_pos,
3229                         8, 8, pix_op,
3230                         mv[i][0], mv[i][1]);
3231         }
3232     }
3233
3234     put_obmc(dest, ptr, s->linesize);
3235 }
3236
3237 static inline void qpel_motion(MpegEncContext *s,
3238                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3239                                int field_based, int bottom_field, int field_select,
3240                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3241                                qpel_mc_func (*qpix_op)[16],
3242                                int motion_x, int motion_y, int h)
3243 {
3244     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3245     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3246
3247     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3248     src_x = s->mb_x *  16                 + (motion_x >> 2);
3249     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3250
3251     v_edge_pos = s->v_edge_pos >> field_based;
3252     linesize = s->linesize << field_based;
3253     uvlinesize = s->uvlinesize << field_based;
3254
3255     if(field_based){
3256         mx= motion_x/2;
3257         my= motion_y>>1;
3258     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3259         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3260         mx= (motion_x>>1) + rtab[motion_x&7];
3261         my= (motion_y>>1) + rtab[motion_y&7];
3262     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3263         mx= (motion_x>>1)|(motion_x&1);
3264         my= (motion_y>>1)|(motion_y&1);
3265     }else{
3266         mx= motion_x/2;
3267         my= motion_y/2;
3268     }
3269     mx= (mx>>1)|(mx&1);
3270     my= (my>>1)|(my&1);
3271
3272     uvdxy= (mx&1) | ((my&1)<<1);
3273     mx>>=1;
3274     my>>=1;
3275
3276     uvsrc_x = s->mb_x *  8                 + mx;
3277     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3278
3279     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3280     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3281     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3282
3283     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3284        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3285         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3286                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3287         ptr_y= s->edge_emu_buffer;
3288         if(!(s->flags&CODEC_FLAG_GRAY)){
3289             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3290             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3291                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3292             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3293                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3294             ptr_cb= uvbuf;
3295             ptr_cr= uvbuf + 16;
3296         }
3297     }
3298
3299     if(!field_based)
3300         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3301     else{
3302         if(bottom_field){
3303             dest_y += s->linesize;
3304             dest_cb+= s->uvlinesize;
3305             dest_cr+= s->uvlinesize;
3306         }
3307
3308         if(field_select){
3309             ptr_y  += s->linesize;
3310             ptr_cb += s->uvlinesize;
3311             ptr_cr += s->uvlinesize;
3312         }
3313         //damn interlaced mode
3314         //FIXME boundary mirroring is not exactly correct here
3315         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3316         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3317     }
3318     if(!(s->flags&CODEC_FLAG_GRAY)){
3319         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3320         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3321     }
3322 }
3323
3324 inline int ff_h263_round_chroma(int x){
3325     if (x >= 0)
3326         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3327     else {
3328         x = -x;
3329         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3330     }
3331 }
3332
3333 /**
3334  * h263 chorma 4mv motion compensation.
3335  */
3336 static inline void chroma_4mv_motion(MpegEncContext *s,
3337                                      uint8_t *dest_cb, uint8_t *dest_cr,
3338                                      uint8_t **ref_picture,
3339                                      op_pixels_func *pix_op,
3340                                      int mx, int my){
3341     int dxy, emu=0, src_x, src_y, offset;
3342     uint8_t *ptr;
3343
3344     /* In case of 8X8, we construct a single chroma motion vector
3345        with a special rounding */
3346     mx= ff_h263_round_chroma(mx);
3347     my= ff_h263_round_chroma(my);
3348
3349     dxy = ((my & 1) << 1) | (mx & 1);
3350     mx >>= 1;
3351     my >>= 1;
3352
3353     src_x = s->mb_x * 8 + mx;
3354     src_y = s->mb_y * 8 + my;
3355     src_x = clip(src_x, -8, s->width/2);
3356     if (src_x == s->width/2)
3357         dxy &= ~1;
3358     src_y = clip(src_y, -8, s->height/2);
3359     if (src_y == s->height/2)
3360         dxy &= ~2;
3361
3362     offset = (src_y * (s->uvlinesize)) + src_x;
3363     ptr = ref_picture[1] + offset;
3364     if(s->flags&CODEC_FLAG_EMU_EDGE){
3365         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3366            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3367             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3368             ptr= s->edge_emu_buffer;
3369             emu=1;
3370         }
3371     }
3372     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3373
3374     ptr = ref_picture[2] + offset;
3375     if(emu){
3376         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3377         ptr= s->edge_emu_buffer;
3378     }
3379     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3380 }
3381
3382 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3383                                      uint8_t *dest_cb, uint8_t *dest_cr,
3384                                      uint8_t **ref_picture,
3385                                      h264_chroma_mc_func *pix_op,
3386                                      int mx, int my){
3387     const int lowres= s->avctx->lowres;
3388     const int block_s= 8>>lowres;
3389     const int s_mask= (2<<lowres)-1;
3390     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3391     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3392     int emu=0, src_x, src_y, offset, sx, sy;
3393     uint8_t *ptr;
3394
3395     if(s->quarter_sample){
3396         mx/=2;
3397         my/=2;
3398     }
3399
3400     /* In case of 8X8, we construct a single chroma motion vector
3401        with a special rounding */
3402     mx= ff_h263_round_chroma(mx);
3403     my= ff_h263_round_chroma(my);
3404
3405     sx= mx & s_mask;
3406     sy= my & s_mask;
3407     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3408     src_y = s->mb_y*block_s + (my >> (lowres+1));
3409
3410     offset = src_y * s->uvlinesize + src_x;
3411     ptr = ref_picture[1] + offset;
3412     if(s->flags&CODEC_FLAG_EMU_EDGE){
3413         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3414            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3415             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3416             ptr= s->edge_emu_buffer;
3417             emu=1;
3418         }
3419     }
3420     sx <<= 2 - lowres;
3421     sy <<= 2 - lowres;
3422     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3423
3424     ptr = ref_picture[2] + offset;
3425     if(emu){
3426         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3427         ptr= s->edge_emu_buffer;
3428     }
3429     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3430 }
3431
3432 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3433     /* fetch pixels for estimated mv 4 macroblocks ahead
3434      * optimized for 64byte cache lines */
3435     const int shift = s->quarter_sample ? 2 : 1;
3436     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3437     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3438     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3439     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3440     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3441     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3442 }
3443
3444 /**
3445  * motion compensation of a single macroblock
3446  * @param s context
3447  * @param dest_y luma destination pointer
3448  * @param dest_cb chroma cb/u destination pointer
3449  * @param dest_cr chroma cr/v destination pointer
3450  * @param dir direction (0->forward, 1->backward)
3451  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3452  * @param pic_op halfpel motion compensation function (average or put normally)
3453  * @param pic_op qpel motion compensation function (average or put normally)
3454  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3455  */
3456 static inline void MPV_motion(MpegEncContext *s,
3457                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3458                               int dir, uint8_t **ref_picture,
3459                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3460 {
3461     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3462     int mb_x, mb_y, i;
3463     uint8_t *ptr, *dest;
3464
3465     mb_x = s->mb_x;
3466     mb_y = s->mb_y;
3467
3468     prefetch_motion(s, ref_picture, dir);
3469
3470     if(s->obmc && s->pict_type != B_TYPE){
3471         int16_t mv_cache[4][4][2];
3472         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3473         const int mot_stride= s->b8_stride;
3474         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3475
3476         assert(!s->mb_skipped);
3477
3478         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3479         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3480         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3481
3482         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3483             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3484         }else{
3485             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3486         }
3487
3488         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3489             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3490             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3491         }else{
3492             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3493             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3494         }
3495
3496         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3497             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3498             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3499         }else{
3500             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3501             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3502         }
3503
3504         mx = 0;
3505         my = 0;
3506         for(i=0;i<4;i++) {
3507             const int x= (i&1)+1;
3508             const int y= (i>>1)+1;
3509             int16_t mv[5][2]= {
3510                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3511                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3512                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3513                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3514                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3515             //FIXME cleanup
3516             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3517                         ref_picture[0],
3518                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3519                         pix_op[1],
3520                         mv);
3521
3522             mx += mv[0][0];
3523             my += mv[0][1];
3524         }
3525         if(!(s->flags&CODEC_FLAG_GRAY))
3526             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3527
3528         return;
3529     }
3530
3531     switch(s->mv_type) {
3532     case MV_TYPE_16X16:
3533         if(s->mcsel){
3534             if(s->real_sprite_warping_points==1){
3535                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3536                             ref_picture);
3537             }else{
3538                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3539                             ref_picture);
3540             }
3541         }else if(s->quarter_sample){
3542             qpel_motion(s, dest_y, dest_cb, dest_cr,
3543                         0, 0, 0,
3544                         ref_picture, pix_op, qpix_op,
3545                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3546         }else if(s->mspel){
3547             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3548                         ref_picture, pix_op,
3549                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3550         }else
3551         {
3552             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3553                         0, 0, 0,
3554                         ref_picture, pix_op,
3555                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3556         }
3557         break;
3558     case MV_TYPE_8X8:
3559         mx = 0;
3560         my = 0;
3561         if(s->quarter_sample){
3562             for(i=0;i<4;i++) {
3563                 motion_x = s->mv[dir][i][0];
3564                 motion_y = s->mv[dir][i][1];
3565
3566                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3567                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3568                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3569
3570                 /* WARNING: do no forget half pels */
3571                 src_x = clip(src_x, -16, s->width);
3572                 if (src_x == s->width)
3573                     dxy &= ~3;
3574                 src_y = clip(src_y, -16, s->height);
3575                 if (src_y == s->height)
3576                     dxy &= ~12;
3577
3578                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3579                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3580                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3581                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3582                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3583                         ptr= s->edge_emu_buffer;
3584                     }
3585                 }
3586                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3587                 qpix_op[1][dxy](dest, ptr, s->linesize);
3588
3589                 mx += s->mv[dir][i][0]/2;
3590                 my += s->mv[dir][i][1]/2;
3591             }
3592         }else{
3593             for(i=0;i<4;i++) {
3594                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3595                             ref_picture[0], 0, 0,
3596                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3597                             s->width, s->height, s->linesize,
3598                             s->h_edge_pos, s->v_edge_pos,
3599                             8, 8, pix_op[1],
3600                             s->mv[dir][i][0], s->mv[dir][i][1]);
3601
3602                 mx += s->mv[dir][i][0];
3603                 my += s->mv[dir][i][1];
3604             }
3605         }
3606
3607         if(!(s->flags&CODEC_FLAG_GRAY))
3608             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3609         break;
3610     case MV_TYPE_FIELD:
3611         if (s->picture_structure == PICT_FRAME) {
3612             if(s->quarter_sample){
3613                 for(i=0; i<2; i++){
3614                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3615                                 1, i, s->field_select[dir][i],
3616                                 ref_picture, pix_op, qpix_op,
3617                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3618                 }
3619             }else{
3620                 /* top field */
3621                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3622                             1, 0, s->field_select[dir][0],
3623                             ref_picture, pix_op,
3624                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3625                 /* bottom field */
3626                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3627                             1, 1, s->field_select[dir][1],
3628                             ref_picture, pix_op,
3629                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3630             }
3631         } else {
3632             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3633                 ref_picture= s->current_picture_ptr->data;
3634             }
3635
3636             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3637                         0, 0, s->field_select[dir][0],
3638                         ref_picture, pix_op,
3639                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3640         }
3641         break;
3642     case MV_TYPE_16X8:
3643         for(i=0; i<2; i++){
3644             uint8_t ** ref2picture;
3645
3646             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3647                 ref2picture= ref_picture;
3648             }else{
3649                 ref2picture= s->current_picture_ptr->data;
3650             }
3651
3652             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3653                         0, 0, s->field_select[dir][i],
3654                         ref2picture, pix_op,
3655                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3656
3657             dest_y += 16*s->linesize;
3658             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3659             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3660         }
3661         break;
3662     case MV_TYPE_DMV:
3663         if(s->picture_structure == PICT_FRAME){
3664             for(i=0; i<2; i++){
3665                 int j;
3666                 for(j=0; j<2; j++){
3667                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3668                                 1, j, j^i,
3669                                 ref_picture, pix_op,
3670                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3671                 }
3672                 pix_op = s->dsp.avg_pixels_tab;
3673             }
3674         }else{
3675             for(i=0; i<2; i++){
3676                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3677                             0, 0, s->picture_structure != i+1,
3678                             ref_picture, pix_op,
3679                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3680
3681                 // after put we make avg of the same block
3682                 pix_op=s->dsp.avg_pixels_tab;
3683
3684                 //opposite parity is always in the same frame if this is second field
3685                 if(!s->first_field){
3686                     ref_picture = s->current_picture_ptr->data;
3687                 }
3688             }
3689         }
3690     break;
3691     default: assert(0);
3692     }
3693 }
3694
3695 /**
3696  * motion compensation of a single macroblock
3697  * @param s context
3698  * @param dest_y luma destination pointer
3699  * @param dest_cb chroma cb/u destination pointer
3700  * @param dest_cr chroma cr/v destination pointer
3701  * @param dir direction (0->forward, 1->backward)
3702  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3703  * @param pic_op halfpel motion compensation function (average or put normally)
3704  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3705  */
3706 static inline void MPV_motion_lowres(MpegEncContext *s,
3707                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3708                               int dir, uint8_t **ref_picture,
3709                               h264_chroma_mc_func *pix_op)
3710 {
3711     int mx, my;
3712     int mb_x, mb_y, i;
3713     const int lowres= s->avctx->lowres;
3714     const int block_s= 8>>lowres;
3715
3716     mb_x = s->mb_x;
3717     mb_y = s->mb_y;
3718
3719     switch(s->mv_type) {
3720     case MV_TYPE_16X16:
3721         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3722                     0, 0, 0,
3723                     ref_picture, pix_op,
3724                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3725         break;
3726     case MV_TYPE_8X8:
3727         mx = 0;
3728         my = 0;
3729             for(i=0;i<4;i++) {
3730                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3731                             ref_picture[0], 0, 0,
3732                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3733                             s->width, s->height, s->linesize,
3734                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3735                             block_s, block_s, pix_op,
3736                             s->mv[dir][i][0], s->mv[dir][i][1]);
3737
3738                 mx += s->mv[dir][i][0];
3739                 my += s->mv[dir][i][1];
3740             }
3741
3742         if(!(s->flags&CODEC_FLAG_GRAY))
3743             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3744         break;
3745     case MV_TYPE_FIELD:
3746         if (s->picture_structure == PICT_FRAME) {
3747             /* top field */
3748             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3749                         1, 0, s->field_select[dir][0],
3750                         ref_picture, pix_op,
3751                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3752             /* bottom field */
3753             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3754                         1, 1, s->field_select[dir][1],
3755                         ref_picture, pix_op,
3756                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3757         } else {
3758             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3759                 ref_picture= s->current_picture_ptr->data;
3760             }
3761
3762             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3763                         0, 0, s->field_select[dir][0],
3764                         ref_picture, pix_op,
3765                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3766         }
3767         break;
3768     case MV_TYPE_16X8:
3769         for(i=0; i<2; i++){
3770             uint8_t ** ref2picture;
3771
3772             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3773                 ref2picture= ref_picture;
3774             }else{
3775                 ref2picture= s->current_picture_ptr->data;
3776             }
3777
3778             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3779                         0, 0, s->field_select[dir][i],
3780                         ref2picture, pix_op,
3781                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3782
3783             dest_y += 2*block_s*s->linesize;
3784             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3785             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3786         }
3787         break;
3788     case MV_TYPE_DMV:
3789         if(s->picture_structure == PICT_FRAME){
3790             for(i=0; i<2; i++){
3791                 int j;
3792                 for(j=0; j<2; j++){
3793                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3794                                 1, j, j^i,
3795                                 ref_picture, pix_op,
3796                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3797                 }
3798                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3799             }
3800         }else{
3801             for(i=0; i<2; i++){
3802                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3803                             0, 0, s->picture_structure != i+1,
3804                             ref_picture, pix_op,
3805                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3806
3807                 // after put we make avg of the same block
3808                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3809
3810                 //opposite parity is always in the same frame if this is second field
3811                 if(!s->first_field){
3812                     ref_picture = s->current_picture_ptr->data;
3813                 }
3814             }
3815         }
3816     break;
3817     default: assert(0);
3818     }
3819 }
3820
3821 /* put block[] to dest[] */
3822 static inline void put_dct(MpegEncContext *s,
3823                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3824 {
3825     s->dct_unquantize_intra(s, block, i, qscale);
3826     s->dsp.idct_put (dest, line_size, block);
3827 }
3828
3829 /* add block[] to dest[] */
3830 static inline void add_dct(MpegEncContext *s,
3831                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3832 {
3833     if (s->block_last_index[i] >= 0) {
3834         s->dsp.idct_add (dest, line_size, block);
3835     }
3836 }
3837
3838 static inline void add_dequant_dct(MpegEncContext *s,
3839                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3840 {
3841     if (s->block_last_index[i] >= 0) {
3842         s->dct_unquantize_inter(s, block, i, qscale);
3843
3844         s->dsp.idct_add (dest, line_size, block);
3845     }
3846 }
3847
3848 /**
3849  * cleans dc, ac, coded_block for the current non intra MB
3850  */
3851 void ff_clean_intra_table_entries(MpegEncContext *s)
3852 {
3853     int wrap = s->b8_stride;
3854     int xy = s->block_index[0];
3855
3856     s->dc_val[0][xy           ] =
3857     s->dc_val[0][xy + 1       ] =
3858     s->dc_val[0][xy     + wrap] =
3859     s->dc_val[0][xy + 1 + wrap] = 1024;
3860     /* ac pred */
3861     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3862     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3863     if (s->msmpeg4_version>=3) {
3864         s->coded_block[xy           ] =
3865         s->coded_block[xy + 1       ] =
3866         s->coded_block[xy     + wrap] =
3867         s->coded_block[xy + 1 + wrap] = 0;
3868     }
3869     /* chroma */
3870     wrap = s->mb_stride;
3871     xy = s->mb_x + s->mb_y * wrap;
3872     s->dc_val[1][xy] =
3873     s->dc_val[2][xy] = 1024;
3874     /* ac pred */
3875     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3876     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3877
3878     s->mbintra_table[xy]= 0;
3879 }
3880
3881 /* generic function called after a macroblock has been parsed by the
3882    decoder or after it has been encoded by the encoder.
3883
3884    Important variables used:
3885    s->mb_intra : true if intra macroblock
3886    s->mv_dir   : motion vector direction
3887    s->mv_type  : motion vector type
3888    s->mv       : motion vector
3889    s->interlaced_dct : true if interlaced dct used (mpeg2)
3890  */
3891 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3892 {
3893     int mb_x, mb_y;
3894     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3895 #ifdef HAVE_XVMC
3896     if(s->avctx->xvmc_acceleration){
3897         XVMC_decode_mb(s);//xvmc uses pblocks
3898         return;
3899     }
3900 #endif
3901
3902     mb_x = s->mb_x;
3903     mb_y = s->mb_y;
3904
3905     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3906        /* save DCT coefficients */
3907        int i,j;
3908        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3909        for(i=0; i<6; i++)
3910            for(j=0; j<64; j++)
3911                *dct++ = block[i][s->dsp.idct_permutation[j]];
3912     }
3913
3914     s->current_picture.qscale_table[mb_xy]= s->qscale;
3915
3916     /* update DC predictors for P macroblocks */
3917     if (!s->mb_intra) {
3918         if (s->h263_pred || s->h263_aic) {
3919             if(s->mbintra_table[mb_xy])
3920                 ff_clean_intra_table_entries(s);
3921         } else {
3922             s->last_dc[0] =
3923             s->last_dc[1] =
3924             s->last_dc[2] = 128 << s->intra_dc_precision;
3925         }
3926     }
3927     else if (s->h263_pred || s->h263_aic)
3928         s->mbintra_table[mb_xy]=1;
3929
3930     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3931         uint8_t *dest_y, *dest_cb, *dest_cr;
3932         int dct_linesize, dct_offset;
3933         op_pixels_func (*op_pix)[4];
3934         qpel_mc_func (*op_qpix)[16];
3935         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3936         const int uvlinesize= s->current_picture.linesize[1];
3937         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3938         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3939
3940         /* avoid copy if macroblock skipped in last frame too */
3941         /* skip only during decoding as we might trash the buffers during encoding a bit */
3942         if(!s->encoding){
3943             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3944             const int age= s->current_picture.age;
3945
3946             assert(age);
3947
3948             if (s->mb_skipped) {
3949                 s->mb_skipped= 0;
3950                 assert(s->pict_type!=I_TYPE);
3951
3952                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3953                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3954
3955                 /* if previous was skipped too, then nothing to do !  */
3956                 if (*mbskip_ptr >= age && s->current_picture.reference){
3957                     return;
3958                 }
3959             } else if(!s->current_picture.reference){
3960                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3961                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3962             } else{
3963                 *mbskip_ptr = 0; /* not skipped */
3964             }
3965         }
3966
3967         dct_linesize = linesize << s->interlaced_dct;
3968         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3969
3970         if(readable){
3971             dest_y=  s->dest[0];
3972             dest_cb= s->dest[1];
3973             dest_cr= s->dest[2];
3974         }else{
3975             dest_y = s->b_scratchpad;
3976             dest_cb= s->b_scratchpad+16*linesize;
3977             dest_cr= s->b_scratchpad+32*linesize;
3978         }
3979
3980         if (!s->mb_intra) {
3981             /* motion handling */
3982             /* decoding or more than one mb_type (MC was already done otherwise) */
3983             if(!s->encoding){
3984                 if(lowres_flag){
3985                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3986
3987                     if (s->mv_dir & MV_DIR_FORWARD) {
3988                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3989                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3990                     }
3991                     if (s->mv_dir & MV_DIR_BACKWARD) {
3992                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3993                     }
3994                 }else{
3995                     op_qpix= s->me.qpel_put;
3996                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3997                         op_pix = s->dsp.put_pixels_tab;
3998                     }else{
3999                         op_pix = s->dsp.put_no_rnd_pixels_tab;
4000                     }
4001                     if (s->mv_dir & MV_DIR_FORWARD) {
4002                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4003                         op_pix = s->dsp.avg_pixels_tab;
4004                         op_qpix= s->me.qpel_avg;
4005                     }
4006                     if (s->mv_dir & MV_DIR_BACKWARD) {
4007                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4008                     }
4009                 }
4010             }
4011
4012             /* skip dequant / idct if we are really late ;) */
4013             if(s->hurry_up>1) goto skip_idct;
4014             if(s->avctx->skip_idct){
4015                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
4016                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
4017                    || s->avctx->skip_idct >= AVDISCARD_ALL)
4018                     goto skip_idct;
4019             }
4020
4021             /* add dct residue */
4022             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
4023                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
4024                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4025                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4026                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4027                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4028
4029                 if(!(s->flags&CODEC_FLAG_GRAY)){
4030                     if (s->chroma_y_shift){
4031                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4032                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4033                     }else{
4034                         dct_linesize >>= 1;
4035                         dct_offset >>=1;
4036                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4037                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4038                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4039                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4040                     }
4041                 }
4042             } else if(s->codec_id != CODEC_ID_WMV2){
4043                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
4044                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
4045                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
4046                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
4047
4048                 if(!(s->flags&CODEC_FLAG_GRAY)){
4049                     if(s->chroma_y_shift){//Chroma420
4050                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
4051                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4052                     }else{
4053                         //chroma422
4054                         dct_linesize = uvlinesize << s->interlaced_dct;
4055                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4056
4057                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4058                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4059                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4060                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4061                         if(!s->chroma_x_shift){//Chroma444
4062                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4063                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4064                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4065                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4066                         }
4067                     }
4068                 }//fi gray
4069             }
4070             else{
4071                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4072             }
4073         } else {
4074             /* dct only in intra block */
4075             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4076                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4077                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4078                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4079                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4080
4081                 if(!(s->flags&CODEC_FLAG_GRAY)){
4082                     if(s->chroma_y_shift){
4083                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4084                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4085                     }else{
4086                         dct_offset >>=1;
4087                         dct_linesize >>=1;
4088                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4089                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4090                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4091                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4092                     }
4093                 }
4094             }else{
4095                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4096                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4097                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4098                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4099
4100                 if(!(s->flags&CODEC_FLAG_GRAY)){
4101                     if(s->chroma_y_shift){
4102                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4103                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4104                     }else{
4105
4106                         dct_linesize = uvlinesize << s->interlaced_dct;
4107                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4108
4109                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4110                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4111                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4112                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4113                         if(!s->chroma_x_shift){//Chroma444
4114                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4115                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4116                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4117                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4118                         }
4119                     }
4120                 }//gray
4121             }
4122         }
4123 skip_idct:
4124         if(!readable){
4125             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4126             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4127             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4128         }
4129     }
4130 }
4131
4132 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4133     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4134     else                  MPV_decode_mb_internal(s, block, 0);
4135 }
4136
4137 #ifdef CONFIG_ENCODERS
4138
4139 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4140 {
4141     static const char tab[64]=
4142         {3,2,2,1,1,1,1,1,
4143          1,1,1,1,1,1,1,1,
4144          1,1,1,1,1,1,1,1,
4145          0,0,0,0,0,0,0,0,
4146          0,0,0,0,0,0,0,0,
4147          0,0,0,0,0,0,0,0,
4148          0,0,0,0,0,0,0,0,
4149          0,0,0,0,0,0,0,0};
4150     int score=0;
4151     int run=0;
4152     int i;
4153     DCTELEM *block= s->block[n];
4154     const int last_index= s->block_last_index[n];
4155     int skip_dc;
4156
4157     if(threshold<0){
4158         skip_dc=0;
4159         threshold= -threshold;
4160     }else
4161         skip_dc=1;
4162
4163     /* are all which we could set to zero are allready zero? */
4164     if(last_index<=skip_dc - 1) return;
4165
4166     for(i=0; i<=last_index; i++){
4167         const int j = s->intra_scantable.permutated[i];
4168         const int level = FFABS(block[j]);
4169         if(level==1){
4170             if(skip_dc && i==0) continue;
4171             score+= tab[run];
4172             run=0;
4173         }else if(level>1){
4174             return;
4175         }else{
4176             run++;
4177         }
4178     }
4179     if(score >= threshold) return;
4180     for(i=skip_dc; i<=last_index; i++){
4181         const int j = s->intra_scantable.permutated[i];
4182         block[j]=0;
4183     }
4184     if(block[0]) s->block_last_index[n]= 0;
4185     else         s->block_last_index[n]= -1;
4186 }
4187
4188 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4189 {
4190     int i;
4191     const int maxlevel= s->max_qcoeff;
4192     const int minlevel= s->min_qcoeff;
4193     int overflow=0;
4194
4195     if(s->mb_intra){
4196         i=1; //skip clipping of intra dc
4197     }else
4198         i=0;
4199
4200     for(;i<=last_index; i++){
4201         const int j= s->intra_scantable.permutated[i];
4202         int level = block[j];
4203
4204         if     (level>maxlevel){
4205             level=maxlevel;
4206             overflow++;
4207         }else if(level<minlevel){
4208             level=minlevel;
4209             overflow++;
4210         }
4211
4212         block[j]= level;
4213     }
4214
4215     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4216         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4217 }
4218
4219 #endif //CONFIG_ENCODERS
4220
4221 /**
4222  *
4223  * @param h is the normal height, this will be reduced automatically if needed for the last row
4224  */
4225 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4226     if (s->avctx->draw_horiz_band) {
4227         AVFrame *src;
4228         int offset[4];
4229
4230         if(s->picture_structure != PICT_FRAME){
4231             h <<= 1;
4232             y <<= 1;
4233             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4234         }
4235
4236         h= FFMIN(h, s->avctx->height - y);
4237
4238         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4239             src= (AVFrame*)s->current_picture_ptr;
4240         else if(s->last_picture_ptr)
4241             src= (AVFrame*)s->last_picture_ptr;
4242         else
4243             return;
4244
4245         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4246             offset[0]=
4247             offset[1]=
4248             offset[2]=
4249             offset[3]= 0;
4250         }else{
4251             offset[0]= y * s->linesize;;
4252             offset[1]=
4253             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4254             offset[3]= 0;
4255         }
4256
4257         emms_c();
4258
4259         s->avctx->draw_horiz_band(s->avctx, src, offset,
4260                                   y, s->picture_structure, h);
4261     }
4262 }
4263
4264 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4265     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4266     const int uvlinesize= s->current_picture.linesize[1];
4267     const int mb_size= 4 - s->avctx->lowres;
4268
4269     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4270     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4271     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4272     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4273     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4274     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4275     //block_index is not used by mpeg2, so it is not affected by chroma_format
4276
4277     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4278     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4279     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4280
4281     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4282     {
4283         s->dest[0] += s->mb_y *   linesize << mb_size;
4284         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4285         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4286     }
4287 }
4288
4289 #ifdef CONFIG_ENCODERS
4290
4291 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4292     int x, y;
4293 //FIXME optimize
4294     for(y=0; y<8; y++){
4295         for(x=0; x<8; x++){
4296             int x2, y2;
4297             int sum=0;
4298             int sqr=0;
4299             int count=0;
4300
4301             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4302                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4303                     int v= ptr[x2 + y2*stride];
4304                     sum += v;
4305                     sqr += v*v;
4306                     count++;
4307                 }
4308             }
4309             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4310         }
4311     }
4312 }
4313
4314 static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4315 {
4316     int16_t weight[8][64];
4317     DCTELEM orig[8][64];
4318     const int mb_x= s->mb_x;
4319     const int mb_y= s->mb_y;
4320     int i;
4321     int skip_dct[8];
4322     int dct_offset   = s->linesize*8; //default for progressive frames
4323     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4324     int wrap_y, wrap_c;
4325
4326     for(i=0; i<mb_block_count; i++) skip_dct[i]=0;
4327
4328     if(s->adaptive_quant){
4329         const int last_qp= s->qscale;
4330         const int mb_xy= mb_x + mb_y*s->mb_stride;
4331
4332         s->lambda= s->lambda_table[mb_xy];
4333         update_qscale(s);
4334
4335         if(!(s->flags&CODEC_FLAG_QP_RD)){
4336             s->dquant= s->qscale - last_qp;
4337
4338             if(s->out_format==FMT_H263){
4339                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4340
4341                 if(s->codec_id==CODEC_ID_MPEG4){
4342                     if(!s->mb_intra){
4343                         if(s->pict_type == B_TYPE){
4344                             if(s->dquant&1)
4345                                 s->dquant= (s->dquant/2)*2;
4346                             if(s->mv_dir&MV_DIRECT)
4347                                 s->dquant= 0;
4348                         }
4349                         if(s->mv_type==MV_TYPE_8X8)
4350                             s->dquant=0;
4351                     }
4352                 }
4353             }
4354         }
4355         ff_set_qscale(s, last_qp + s->dquant);
4356     }else if(s->flags&CODEC_FLAG_QP_RD)
4357         ff_set_qscale(s, s->qscale + s->dquant);
4358
4359     wrap_y = s->linesize;
4360     wrap_c = s->uvlinesize;
4361     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4362     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4363     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4364
4365     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4366         uint8_t *ebuf= s->edge_emu_buffer + 32;
4367         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4368         ptr_y= ebuf;
4369         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4370         ptr_cb= ebuf+18*wrap_y;
4371         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4372         ptr_cr= ebuf+18*wrap_y+8;
4373     }
4374
4375     if (s->mb_intra) {
4376         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4377             int progressive_score, interlaced_score;
4378
4379             s->interlaced_dct=0;
4380             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4381                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4382
4383             if(progressive_score > 0){
4384                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4385                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4386                 if(progressive_score > interlaced_score){
4387                     s->interlaced_dct=1;
4388
4389                     dct_offset= wrap_y;
4390                     wrap_y<<=1;
4391                     if (s->chroma_format == CHROMA_422)
4392                         wrap_c<<=1;
4393                 }
4394             }
4395         }
4396
4397         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4398         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4399         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4400         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4401
4402         if(s->flags&CODEC_FLAG_GRAY){
4403             skip_dct[4]= 1;
4404             skip_dct[5]= 1;
4405         }else{
4406             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4407             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4408             if(!s->chroma_y_shift){ /* 422 */
4409                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4410                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4411             }
4412         }
4413     }else{
4414         op_pixels_func (*op_pix)[4];
4415         qpel_mc_func (*op_qpix)[16];
4416         uint8_t *dest_y, *dest_cb, *dest_cr;
4417
4418         dest_y  = s->dest[0];
4419         dest_cb = s->dest[1];
4420         dest_cr = s->dest[2];
4421
4422         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4423             op_pix = s->dsp.put_pixels_tab;
4424             op_qpix= s->dsp.put_qpel_pixels_tab;
4425         }else{
4426             op_pix = s->dsp.put_no_rnd_pixels_tab;
4427             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4428         }
4429
4430         if (s->mv_dir & MV_DIR_FORWARD) {
4431             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4432             op_pix = s->dsp.avg_pixels_tab;
4433             op_qpix= s->dsp.avg_qpel_pixels_tab;
4434         }
4435         if (s->mv_dir & MV_DIR_BACKWARD) {
4436             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4437         }
4438
4439         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4440             int progressive_score, interlaced_score;
4441
4442             s->interlaced_dct=0;
4443             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4444                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4445
4446             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4447
4448             if(progressive_score>0){
4449                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4450                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4451
4452                 if(progressive_score > interlaced_score){
4453                     s->interlaced_dct=1;
4454
4455                     dct_offset= wrap_y;
4456                     wrap_y<<=1;
4457                     if (s->chroma_format == CHROMA_422)
4458                         wrap_c<<=1;
4459                 }
4460             }
4461         }
4462
4463         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4464         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4465         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4466         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4467
4468         if(s->flags&CODEC_FLAG_GRAY){
4469             skip_dct[4]= 1;
4470             skip_dct[5]= 1;
4471         }else{
4472             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4473             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4474             if(!s->chroma_y_shift){ /* 422 */
4475                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4476                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4477             }
4478         }
4479         /* pre quantization */
4480         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4481             //FIXME optimize
4482             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4483             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4484             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4485             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4486             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4487             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4488             if(!s->chroma_y_shift){ /* 422 */
4489                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4490                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4491             }
4492         }
4493     }
4494
4495     if(s->avctx->quantizer_noise_shaping){
4496         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4497         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4498         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4499         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4500         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4501         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4502         if(!s->chroma_y_shift){ /* 422 */
4503             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4504             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4505         }
4506         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4507     }
4508
4509     /* DCT & quantize */
4510     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4511     {
4512         for(i=0;i<mb_block_count;i++) {
4513             if(!skip_dct[i]){
4514                 int overflow;
4515                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4516             // FIXME we could decide to change to quantizer instead of clipping
4517             // JS: I don't think that would be a good idea it could lower quality instead
4518             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4519                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4520             }else
4521                 s->block_last_index[i]= -1;
4522         }
4523         if(s->avctx->quantizer_noise_shaping){
4524             for(i=0;i<mb_block_count;i++) {
4525                 if(!skip_dct[i]){
4526                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4527                 }
4528             }
4529         }
4530
4531         if(s->luma_elim_threshold && !s->mb_intra)
4532             for(i=0; i<4; i++)
4533                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4534         if(s->chroma_elim_threshold && !s->mb_intra)
4535             for(i=4; i<mb_block_count; i++)
4536                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4537
4538         if(s->flags & CODEC_FLAG_CBP_RD){
4539             for(i=0;i<mb_block_count;i++) {
4540                 if(s->block_last_index[i] == -1)
4541                     s->coded_score[i]= INT_MAX/256;
4542             }
4543         }
4544     }
4545
4546     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4547         s->block_last_index[4]=
4548         s->block_last_index[5]= 0;
4549         s->block[4][0]=
4550         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4551     }
4552
4553     //non c quantize code returns incorrect block_last_index FIXME
4554     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4555         for(i=0; i<mb_block_count; i++){
4556             int j;
4557             if(s->block_last_index[i]>0){
4558                 for(j=63; j>0; j--){
4559                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4560                 }
4561                 s->block_last_index[i]= j;
4562             }
4563         }
4564     }
4565
4566     /* huffman encode */
4567     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4568     case CODEC_ID_MPEG1VIDEO:
4569     case CODEC_ID_MPEG2VIDEO:
4570         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4571     case CODEC_ID_MPEG4:
4572         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4573     case CODEC_ID_MSMPEG4V2:
4574     case CODEC_ID_MSMPEG4V3:
4575     case CODEC_ID_WMV1:
4576         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4577     case CODEC_ID_WMV2:
4578          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4579 #ifdef CONFIG_H261_ENCODER
4580     case CODEC_ID_H261:
4581         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4582 #endif
4583     case CODEC_ID_H263:
4584     case CODEC_ID_H263P:
4585     case CODEC_ID_FLV1:
4586     case CODEC_ID_RV10:
4587     case CODEC_ID_RV20:
4588         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4589     case CODEC_ID_MJPEG:
4590         mjpeg_encode_mb(s, s->block); break;
4591     default:
4592         assert(0);
4593     }
4594 }
4595
4596 static always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4597 {
4598     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4599     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4600 }
4601
4602 #endif //CONFIG_ENCODERS
4603
4604 void ff_mpeg_flush(AVCodecContext *avctx){
4605     int i;
4606     MpegEncContext *s = avctx->priv_data;
4607
4608     if(s==NULL || s->picture==NULL)
4609         return;
4610
4611     for(i=0; i<MAX_PICTURE_COUNT; i++){
4612        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4613                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4614         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4615     }
4616     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4617
4618     s->mb_x= s->mb_y= 0;
4619
4620     s->parse_context.state= -1;
4621     s->parse_context.frame_start_found= 0;
4622     s->parse_context.overread= 0;
4623     s->parse_context.overread_index= 0;
4624     s->parse_context.index= 0;
4625     s->parse_context.last_index= 0;
4626     s->bitstream_buffer_size=0;
4627 }
4628
4629 #ifdef CONFIG_ENCODERS
4630 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4631 {
4632     const uint16_t *srcw= (uint16_t*)src;
4633     int words= length>>4;
4634     int bits= length&15;
4635     int i;
4636
4637     if(length==0) return;
4638
4639     if(words < 16){
4640         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4641     }else if(put_bits_count(pb)&7){
4642         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4643     }else{
4644         for(i=0; put_bits_count(pb)&31; i++)
4645             put_bits(pb, 8, src[i]);
4646         flush_put_bits(pb);
4647         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4648         skip_put_bytes(pb, 2*words-i);
4649     }
4650
4651     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4652 }
4653
4654 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4655     int i;
4656
4657     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4658
4659     /* mpeg1 */
4660     d->mb_skip_run= s->mb_skip_run;
4661     for(i=0; i<3; i++)
4662         d->last_dc[i]= s->last_dc[i];
4663
4664     /* statistics */
4665     d->mv_bits= s->mv_bits;
4666     d->i_tex_bits= s->i_tex_bits;
4667     d->p_tex_bits= s->p_tex_bits;
4668     d->i_count= s->i_count;
4669     d->f_count= s->f_count;
4670     d->b_count= s->b_count;
4671     d->skip_count= s->skip_count;
4672     d->misc_bits= s->misc_bits;
4673     d->last_bits= 0;
4674
4675     d->mb_skipped= 0;
4676     d->qscale= s->qscale;
4677     d->dquant= s->dquant;
4678 }
4679
4680 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4681     int i;
4682
4683     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4684     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4685
4686     /* mpeg1 */
4687     d->mb_skip_run= s->mb_skip_run;
4688     for(i=0; i<3; i++)
4689         d->last_dc[i]= s->last_dc[i];
4690
4691     /* statistics */
4692     d->mv_bits= s->mv_bits;
4693     d->i_tex_bits= s->i_tex_bits;
4694     d->p_tex_bits= s->p_tex_bits;
4695     d->i_count= s->i_count;
4696     d->f_count= s->f_count;
4697     d->b_count= s->b_count;
4698     d->skip_count= s->skip_count;
4699     d->misc_bits= s->misc_bits;
4700
4701     d->mb_intra= s->mb_intra;
4702     d->mb_skipped= s->mb_skipped;
4703     d->mv_type= s->mv_type;
4704     d->mv_dir= s->mv_dir;
4705     d->pb= s->pb;
4706     if(s->data_partitioning){
4707         d->pb2= s->pb2;
4708         d->tex_pb= s->tex_pb;
4709     }
4710     d->block= s->block;
4711     for(i=0; i<8; i++)
4712         d->block_last_index[i]= s->block_last_index[i];
4713     d->interlaced_dct= s->interlaced_dct;
4714     d->qscale= s->qscale;
4715 }
4716
4717 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4718                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4719                            int *dmin, int *next_block, int motion_x, int motion_y)
4720 {
4721     int score;
4722     uint8_t *dest_backup[3];
4723
4724     copy_context_before_encode(s, backup, type);
4725
4726     s->block= s->blocks[*next_block];
4727     s->pb= pb[*next_block];
4728     if(s->data_partitioning){
4729         s->pb2   = pb2   [*next_block];
4730         s->tex_pb= tex_pb[*next_block];
4731     }
4732
4733     if(*next_block){
4734         memcpy(dest_backup, s->dest, sizeof(s->dest));
4735         s->dest[0] = s->rd_scratchpad;
4736         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4737         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4738         assert(s->linesize >= 32); //FIXME
4739     }
4740
4741     encode_mb(s, motion_x, motion_y);
4742
4743     score= put_bits_count(&s->pb);
4744     if(s->data_partitioning){
4745         score+= put_bits_count(&s->pb2);
4746         score+= put_bits_count(&s->tex_pb);
4747     }
4748
4749     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4750         MPV_decode_mb(s, s->block);
4751
4752         score *= s->lambda2;
4753         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4754     }
4755
4756     if(*next_block){
4757         memcpy(s->dest, dest_backup, sizeof(s->dest));
4758     }
4759
4760     if(score<*dmin){
4761         *dmin= score;
4762         *next_block^=1;
4763
4764         copy_context_after_encode(best, s, type);
4765     }
4766 }
4767
4768 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4769     uint32_t *sq = ff_squareTbl + 256;
4770     int acc=0;
4771     int x,y;
4772
4773     if(w==16 && h==16)
4774         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4775     else if(w==8 && h==8)
4776         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4777
4778     for(y=0; y<h; y++){
4779         for(x=0; x<w; x++){
4780             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4781         }
4782     }
4783
4784     assert(acc>=0);
4785
4786     return acc;
4787 }
4788
4789 static int sse_mb(MpegEncContext *s){
4790     int w= 16;
4791     int h= 16;
4792
4793     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4794     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4795
4796     if(w==16 && h==16)
4797       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4798         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4799                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4800                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4801       }else{
4802         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4803                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4804                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4805       }
4806     else
4807         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4808                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4809                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4810 }
4811
4812 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4813     MpegEncContext *s= arg;
4814
4815
4816     s->me.pre_pass=1;
4817     s->me.dia_size= s->avctx->pre_dia_size;
4818     s->first_slice_line=1;
4819     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4820         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4821             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4822         }
4823         s->first_slice_line=0;
4824     }
4825
4826     s->me.pre_pass=0;
4827
4828     return 0;
4829 }
4830
4831 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4832     MpegEncContext *s= arg;
4833
4834     s->me.dia_size= s->avctx->dia_size;
4835     s->first_slice_line=1;
4836     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4837         s->mb_x=0; //for block init below
4838         ff_init_block_index(s);
4839         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4840             s->block_index[0]+=2;
4841             s->block_index[1]+=2;
4842             s->block_index[2]+=2;
4843             s->block_index[3]+=2;
4844
4845             /* compute motion vector & mb_type and store in context */
4846             if(s->pict_type==B_TYPE)
4847                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4848             else
4849                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4850         }
4851         s->first_slice_line=0;
4852     }
4853     return 0;
4854 }
4855
4856 static int mb_var_thread(AVCodecContext *c, void *arg){
4857     MpegEncContext *s= arg;
4858     int mb_x, mb_y;
4859
4860     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4861         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4862             int xx = mb_x * 16;
4863             int yy = mb_y * 16;
4864             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4865             int varc;
4866             int sum = s->dsp.pix_sum(pix, s->linesize);
4867
4868             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4869
4870             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4871             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4872             s->me.mb_var_sum_temp    += varc;
4873         }
4874     }
4875     return 0;
4876 }
4877
4878 static void write_slice_end(MpegEncContext *s){
4879     if(s->codec_id==CODEC_ID_MPEG4){
4880         if(s->partitioned_frame){
4881             ff_mpeg4_merge_partitions(s);
4882         }
4883
4884         ff_mpeg4_stuffing(&s->pb);
4885     }else if(s->out_format == FMT_MJPEG){
4886         ff_mjpeg_stuffing(&s->pb);
4887     }
4888
4889     align_put_bits(&s->pb);
4890     flush_put_bits(&s->pb);
4891
4892     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4893         s->misc_bits+= get_bits_diff(s);
4894 }
4895
4896 static int encode_thread(AVCodecContext *c, void *arg){
4897     MpegEncContext *s= arg;
4898     int mb_x, mb_y, pdif = 0;
4899     int i, j;
4900     MpegEncContext best_s, backup_s;
4901     uint8_t bit_buf[2][MAX_MB_BYTES];
4902     uint8_t bit_buf2[2][MAX_MB_BYTES];
4903     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4904     PutBitContext pb[2], pb2[2], tex_pb[2];
4905 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4906
4907     for(i=0; i<2; i++){
4908         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4909         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4910         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4911     }
4912
4913     s->last_bits= put_bits_count(&s->pb);
4914     s->mv_bits=0;
4915     s->misc_bits=0;
4916     s->i_tex_bits=0;
4917     s->p_tex_bits=0;
4918     s->i_count=0;
4919     s->f_count=0;
4920     s->b_count=0;
4921     s->skip_count=0;
4922
4923     for(i=0; i<3; i++){
4924         /* init last dc values */
4925         /* note: quant matrix value (8) is implied here */
4926         s->last_dc[i] = 128 << s->intra_dc_precision;
4927
4928         s->current_picture.error[i] = 0;
4929     }
4930     s->mb_skip_run = 0;
4931     memset(s->last_mv, 0, sizeof(s->last_mv));
4932
4933     s->last_mv_dir = 0;
4934
4935     switch(s->codec_id){
4936     case CODEC_ID_H263:
4937     case CODEC_ID_H263P:
4938     case CODEC_ID_FLV1:
4939         s->gob_index = ff_h263_get_gob_height(s);
4940         break;
4941     case CODEC_ID_MPEG4:
4942         if(s->partitioned_frame)
4943             ff_mpeg4_init_partitions(s);
4944         break;
4945     }
4946
4947     s->resync_mb_x=0;
4948     s->resync_mb_y=0;
4949     s->first_slice_line = 1;
4950     s->ptr_lastgob = s->pb.buf;
4951     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4952 //    printf("row %d at %X\n", s->mb_y, (int)s);
4953         s->mb_x=0;
4954         s->mb_y= mb_y;
4955
4956         ff_set_qscale(s, s->qscale);
4957         ff_init_block_index(s);
4958
4959         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4960             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4961             int mb_type= s->mb_type[xy];
4962 //            int d;
4963             int dmin= INT_MAX;
4964             int dir;
4965
4966             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4967                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4968                 return -1;
4969             }
4970             if(s->data_partitioning){
4971                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4972                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4973                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4974                     return -1;
4975                 }
4976             }
4977
4978             s->mb_x = mb_x;
4979             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4980             ff_update_block_index(s);
4981
4982 #ifdef CONFIG_H261_ENCODER
4983             if(s->codec_id == CODEC_ID_H261){
4984                 ff_h261_reorder_mb_index(s);
4985                 xy= s->mb_y*s->mb_stride + s->mb_x;
4986                 mb_type= s->mb_type[xy];
4987             }
4988 #endif
4989
4990             /* write gob / video packet header  */
4991             if(s->rtp_mode){
4992                 int current_packet_size, is_gob_start;
4993
4994                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4995
4996                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4997
4998                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4999
5000                 switch(s->codec_id){
5001                 case CODEC_ID_H263:
5002                 case CODEC_ID_H263P:
5003                     if(!s->h263_slice_structured)
5004                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
5005                     break;
5006                 case CODEC_ID_MPEG2VIDEO:
5007                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
5008                 case CODEC_ID_MPEG1VIDEO:
5009                     if(s->mb_skip_run) is_gob_start=0;
5010                     break;
5011                 }
5012
5013                 if(is_gob_start){
5014                     if(s->start_mb_y != mb_y || mb_x!=0){
5015                         write_slice_end(s);
5016
5017                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
5018                             ff_mpeg4_init_partitions(s);
5019                         }
5020                     }
5021
5022                     assert((put_bits_count(&s->pb)&7) == 0);
5023                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
5024
5025                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
5026                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
5027                         int d= 100 / s->avctx->error_rate;
5028                         if(r % d == 0){
5029                             current_packet_size=0;
5030 #ifndef ALT_BITSTREAM_WRITER
5031                             s->pb.buf_ptr= s->ptr_lastgob;
5032 #endif
5033                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
5034                         }
5035                     }
5036
5037                     if (s->avctx->rtp_callback){
5038                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
5039                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
5040                     }
5041
5042                     switch(s->codec_id){
5043                     case CODEC_ID_MPEG4:
5044                         ff_mpeg4_encode_video_packet_header(s);
5045                         ff_mpeg4_clean_buffers(s);
5046                     break;
5047                     case CODEC_ID_MPEG1VIDEO:
5048                     case CODEC_ID_MPEG2VIDEO:
5049                         ff_mpeg1_encode_slice_header(s);
5050                         ff_mpeg1_clean_buffers(s);
5051                     break;
5052                     case CODEC_ID_H263:
5053                     case CODEC_ID_H263P:
5054                         h263_encode_gob_header(s, mb_y);
5055                     break;
5056                     }
5057
5058                     if(s->flags&CODEC_FLAG_PASS1){
5059                         int bits= put_bits_count(&s->pb);
5060                         s->misc_bits+= bits - s->last_bits;
5061                         s->last_bits= bits;
5062                     }
5063
5064                     s->ptr_lastgob += current_packet_size;
5065                     s->first_slice_line=1;
5066                     s->resync_mb_x=mb_x;
5067                     s->resync_mb_y=mb_y;
5068                 }
5069             }
5070
5071             if(  (s->resync_mb_x   == s->mb_x)
5072                && s->resync_mb_y+1 == s->mb_y){
5073                 s->first_slice_line=0;
5074             }
5075
5076             s->mb_skipped=0;
5077             s->dquant=0; //only for QP_RD
5078
5079             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5080                 int next_block=0;
5081                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5082
5083                 copy_context_before_encode(&backup_s, s, -1);
5084                 backup_s.pb= s->pb;
5085                 best_s.data_partitioning= s->data_partitioning;
5086                 best_s.partitioned_frame= s->partitioned_frame;
5087                 if(s->data_partitioning){
5088                     backup_s.pb2= s->pb2;
5089                     backup_s.tex_pb= s->tex_pb;
5090                 }
5091
5092                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5093                     s->mv_dir = MV_DIR_FORWARD;
5094                     s->mv_type = MV_TYPE_16X16;
5095                     s->mb_intra= 0;
5096                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5097                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5098                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5099                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5100                 }
5101                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5102                     s->mv_dir = MV_DIR_FORWARD;
5103                     s->mv_type = MV_TYPE_FIELD;
5104                     s->mb_intra= 0;
5105                     for(i=0; i<2; i++){
5106                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5107                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5108                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5109                     }
5110                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5111                                  &dmin, &next_block, 0, 0);
5112                 }
5113                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5114                     s->mv_dir = MV_DIR_FORWARD;
5115                     s->mv_type = MV_TYPE_16X16;
5116                     s->mb_intra= 0;
5117                     s->mv[0][0][0] = 0;
5118                     s->mv[0][0][1] = 0;
5119                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5120                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5121                 }
5122                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5123                     s->mv_dir = MV_DIR_FORWARD;
5124                     s->mv_type = MV_TYPE_8X8;
5125                     s->mb_intra= 0;
5126                     for(i=0; i<4; i++){
5127                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5128                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5129                     }
5130                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5131                                  &dmin, &next_block, 0, 0);
5132                 }
5133                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5134                     s->mv_dir = MV_DIR_FORWARD;
5135                     s->mv_type = MV_TYPE_16X16;
5136                     s->mb_intra= 0;
5137                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5138                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5139                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5140                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5141                 }
5142                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5143                     s->mv_dir = MV_DIR_BACKWARD;
5144                     s->mv_type = MV_TYPE_16X16;
5145                     s->mb_intra= 0;
5146                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5147                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5148                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5149                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5150                 }
5151                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5152                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5153                     s->mv_type = MV_TYPE_16X16;
5154                     s->mb_intra= 0;
5155                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5156                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5157                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5158                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5159                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5160                                  &dmin, &next_block, 0, 0);
5161                 }
5162                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5163                     int mx= s->b_direct_mv_table[xy][0];
5164                     int my= s->b_direct_mv_table[xy][1];
5165
5166                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5167                     s->mb_intra= 0;
5168                     ff_mpeg4_set_direct_mv(s, mx, my);
5169                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5170                                  &dmin, &next_block, mx, my);
5171                 }
5172                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5173                     s->mv_dir = MV_DIR_FORWARD;
5174                     s->mv_type = MV_TYPE_FIELD;
5175                     s->mb_intra= 0;
5176                     for(i=0; i<2; i++){
5177                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5178                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5179                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5180                     }
5181                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5182                                  &dmin, &next_block, 0, 0);
5183                 }
5184                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5185                     s->mv_dir = MV_DIR_BACKWARD;
5186                     s->mv_type = MV_TYPE_FIELD;
5187                     s->mb_intra= 0;
5188                     for(i=0; i<2; i++){
5189                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5190                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5191                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5192                     }
5193                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5194                                  &dmin, &next_block, 0, 0);
5195                 }
5196                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5197                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5198                     s->mv_type = MV_TYPE_FIELD;
5199                     s->mb_intra= 0;
5200                     for(dir=0; dir<2; dir++){
5201                         for(i=0; i<2; i++){
5202                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5203                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5204                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5205                         }
5206                     }
5207                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5208                                  &dmin, &next_block, 0, 0);
5209                 }
5210                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5211                     s->mv_dir = 0;
5212                     s->mv_type = MV_TYPE_16X16;
5213                     s->mb_intra= 1;
5214                     s->mv[0][0][0] = 0;
5215                     s->mv[0][0][1] = 0;
5216                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5217                                  &dmin, &next_block, 0, 0);
5218                     if(s->h263_pred || s->h263_aic){
5219                         if(best_s.mb_intra)
5220                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5221                         else
5222                             ff_clean_intra_table_entries(s); //old mode?
5223                     }
5224                 }
5225
5226                 if(s->flags & CODEC_FLAG_QP_RD){
5227                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
5228                         const int last_qp= backup_s.qscale;
5229                         int qpi, qp, dc[6];
5230                         DCTELEM ac[6][16];
5231                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5232                         static const int dquant_tab[4]={-1,1,-2,2};
5233
5234                         assert(backup_s.dquant == 0);
5235
5236                         //FIXME intra
5237                         s->mv_dir= best_s.mv_dir;
5238                         s->mv_type = MV_TYPE_16X16;
5239                         s->mb_intra= best_s.mb_intra;
5240                         s->mv[0][0][0] = best_s.mv[0][0][0];
5241                         s->mv[0][0][1] = best_s.mv[0][0][1];
5242                         s->mv[1][0][0] = best_s.mv[1][0][0];
5243                         s->mv[1][0][1] = best_s.mv[1][0][1];
5244
5245                         qpi = s->pict_type == B_TYPE ? 2 : 0;
5246                         for(; qpi<4; qpi++){
5247                             int dquant= dquant_tab[qpi];
5248                             qp= last_qp + dquant;
5249                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5250                                 continue;
5251                             backup_s.dquant= dquant;
5252                             if(s->mb_intra && s->dc_val[0]){
5253                                 for(i=0; i<6; i++){
5254                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5255                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5256                                 }
5257                             }
5258
5259                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5260                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5261                             if(best_s.qscale != qp){
5262                                 if(s->mb_intra && s->dc_val[0]){
5263                                     for(i=0; i<6; i++){
5264                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5265                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5266                                     }
5267                                 }
5268                             }
5269                         }
5270                         qp= best_s.qscale;
5271                         s->current_picture.qscale_table[xy]= qp;
5272                     }
5273                 }
5274
5275                 copy_context_after_encode(s, &best_s, -1);
5276
5277                 pb_bits_count= put_bits_count(&s->pb);
5278                 flush_put_bits(&s->pb);
5279                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5280                 s->pb= backup_s.pb;
5281
5282                 if(s->data_partitioning){
5283                     pb2_bits_count= put_bits_count(&s->pb2);
5284                     flush_put_bits(&s->pb2);
5285                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5286                     s->pb2= backup_s.pb2;
5287
5288                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5289                     flush_put_bits(&s->tex_pb);
5290                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5291                     s->tex_pb= backup_s.tex_pb;
5292                 }
5293                 s->last_bits= put_bits_count(&s->pb);
5294
5295                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5296                     ff_h263_update_motion_val(s);
5297
5298                 if(next_block==0){ //FIXME 16 vs linesize16
5299                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5300                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5301                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5302                 }
5303
5304                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5305                     MPV_decode_mb(s, s->block);
5306             } else {
5307                 int motion_x, motion_y;
5308                 s->mv_type=MV_TYPE_16X16;
5309                 // only one MB-Type possible
5310
5311                 switch(mb_type){
5312                 case CANDIDATE_MB_TYPE_INTRA:
5313                     s->mv_dir = 0;
5314                     s->mb_intra= 1;
5315                     motion_x= s->mv[0][0][0] = 0;
5316                     motion_y= s->mv[0][0][1] = 0;
5317                     break;
5318                 case CANDIDATE_MB_TYPE_INTER:
5319                     s->mv_dir = MV_DIR_FORWARD;
5320                     s->mb_intra= 0;
5321                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5322                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5323                     break;
5324                 case CANDIDATE_MB_TYPE_INTER_I:
5325                     s->mv_dir = MV_DIR_FORWARD;
5326                     s->mv_type = MV_TYPE_FIELD;
5327                     s->mb_intra= 0;
5328                     for(i=0; i<2; i++){
5329                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5330                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5331                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5332                     }
5333                     motion_x = motion_y = 0;
5334                     break;
5335                 case CANDIDATE_MB_TYPE_INTER4V:
5336                     s->mv_dir = MV_DIR_FORWARD;
5337                     s->mv_type = MV_TYPE_8X8;
5338                     s->mb_intra= 0;
5339                     for(i=0; i<4; i++){
5340                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5341                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5342                     }
5343                     motion_x= motion_y= 0;
5344                     break;
5345                 case CANDIDATE_MB_TYPE_DIRECT:
5346                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5347                     s->mb_intra= 0;
5348                     motion_x=s->b_direct_mv_table[xy][0];
5349                     motion_y=s->b_direct_mv_table[xy][1];
5350                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5351                     break;
5352                 case CANDIDATE_MB_TYPE_BIDIR:
5353                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5354                     s->mb_intra= 0;
5355                     motion_x=0;
5356                     motion_y=0;
5357                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5358                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5359                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5360                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5361                     break;
5362                 case CANDIDATE_MB_TYPE_BACKWARD:
5363                     s->mv_dir = MV_DIR_BACKWARD;
5364                     s->mb_intra= 0;
5365                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5366                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5367                     break;
5368                 case CANDIDATE_MB_TYPE_FORWARD:
5369                     s->mv_dir = MV_DIR_FORWARD;
5370                     s->mb_intra= 0;
5371                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5372                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5373 //                    printf(" %d %d ", motion_x, motion_y);
5374                     break;
5375                 case CANDIDATE_MB_TYPE_FORWARD_I:
5376                     s->mv_dir = MV_DIR_FORWARD;
5377                     s->mv_type = MV_TYPE_FIELD;
5378                     s->mb_intra= 0;
5379                     for(i=0; i<2; i++){
5380                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5381                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5382                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5383                     }
5384                     motion_x=motion_y=0;
5385                     break;
5386                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5387                     s->mv_dir = MV_DIR_BACKWARD;
5388                     s->mv_type = MV_TYPE_FIELD;
5389                     s->mb_intra= 0;
5390                     for(i=0; i<2; i++){
5391                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5392                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5393                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5394                     }
5395                     motion_x=motion_y=0;
5396                     break;
5397                 case CANDIDATE_MB_TYPE_BIDIR_I:
5398                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5399                     s->mv_type = MV_TYPE_FIELD;
5400                     s->mb_intra= 0;
5401                     for(dir=0; dir<2; dir++){
5402                         for(i=0; i<2; i++){
5403                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5404                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5405                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5406                         }
5407                     }
5408                     motion_x=motion_y=0;
5409                     break;
5410                 default:
5411                     motion_x=motion_y=0; //gcc warning fix
5412                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5413                 }
5414
5415                 encode_mb(s, motion_x, motion_y);
5416
5417                 // RAL: Update last macroblock type
5418                 s->last_mv_dir = s->mv_dir;
5419
5420                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5421                     ff_h263_update_motion_val(s);
5422
5423                 MPV_decode_mb(s, s->block);
5424             }
5425
5426             /* clean the MV table in IPS frames for direct mode in B frames */
5427             if(s->mb_intra /* && I,P,S_TYPE */){
5428                 s->p_mv_table[xy][0]=0;
5429                 s->p_mv_table[xy][1]=0;
5430             }
5431
5432             if(s->flags&CODEC_FLAG_PSNR){
5433                 int w= 16;
5434                 int h= 16;
5435
5436                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5437                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5438
5439                 s->current_picture.error[0] += sse(
5440                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5441                     s->dest[0], w, h, s->linesize);
5442                 s->current_picture.error[1] += sse(
5443                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5444                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5445                 s->current_picture.error[2] += sse(
5446                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5447                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5448             }
5449             if(s->loop_filter){
5450                 if(s->out_format == FMT_H263)
5451                     ff_h263_loop_filter(s);
5452             }
5453 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5454         }
5455     }
5456
5457     //not beautiful here but we must write it before flushing so it has to be here
5458     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5459         msmpeg4_encode_ext_header(s);
5460
5461     write_slice_end(s);
5462
5463     /* Send the last GOB if RTP */
5464     if (s->avctx->rtp_callback) {
5465         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5466         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5467         /* Call the RTP callback to send the last GOB */
5468         emms_c();
5469         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5470     }
5471
5472     return 0;
5473 }
5474
5475 #define MERGE(field) dst->field += src->field; src->field=0
5476 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5477     MERGE(me.scene_change_score);
5478     MERGE(me.mc_mb_var_sum_temp);
5479     MERGE(me.mb_var_sum_temp);
5480 }
5481
5482 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5483     int i;
5484
5485     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5486     MERGE(dct_count[1]);
5487     MERGE(mv_bits);
5488     MERGE(i_tex_bits);
5489     MERGE(p_tex_bits);
5490     MERGE(i_count);
5491     MERGE(f_count);
5492     MERGE(b_count);
5493     MERGE(skip_count);
5494     MERGE(misc_bits);
5495     MERGE(error_count);
5496     MERGE(padding_bug_score);
5497     MERGE(current_picture.error[0]);
5498     MERGE(current_picture.error[1]);
5499     MERGE(current_picture.error[2]);
5500
5501     if(dst->avctx->noise_reduction){
5502         for(i=0; i<64; i++){
5503             MERGE(dct_error_sum[0][i]);
5504             MERGE(dct_error_sum[1][i]);
5505         }
5506     }
5507
5508     assert(put_bits_count(&src->pb) % 8 ==0);
5509     assert(put_bits_count(&dst->pb) % 8 ==0);
5510     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5511     flush_put_bits(&dst->pb);
5512 }
5513
5514 static int estimate_qp(MpegEncContext *s, int dry_run){
5515     if (s->next_lambda){
5516         s->current_picture_ptr->quality=
5517         s->current_picture.quality = s->next_lambda;
5518         if(!dry_run) s->next_lambda= 0;
5519     } else if (!s->fixed_qscale) {
5520         s->current_picture_ptr->quality=
5521         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5522         if (s->current_picture.quality < 0)
5523             return -1;
5524     }
5525
5526     if(s->adaptive_quant){
5527         switch(s->codec_id){
5528         case CODEC_ID_MPEG4:
5529             ff_clean_mpeg4_qscales(s);
5530             break;
5531         case CODEC_ID_H263:
5532         case CODEC_ID_H263P:
5533         case CODEC_ID_FLV1:
5534             ff_clean_h263_qscales(s);
5535             break;
5536         }
5537
5538         s->lambda= s->lambda_table[0];
5539         //FIXME broken
5540     }else
5541         s->lambda= s->current_picture.quality;
5542 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5543     update_qscale(s);
5544     return 0;
5545 }
5546
5547 static int encode_picture(MpegEncContext *s, int picture_number)
5548 {
5549     int i;
5550     int bits;
5551
5552     s->picture_number = picture_number;
5553
5554     /* Reset the average MB variance */
5555     s->me.mb_var_sum_temp    =
5556     s->me.mc_mb_var_sum_temp = 0;
5557
5558     /* we need to initialize some time vars before we can encode b-frames */
5559     // RAL: Condition added for MPEG1VIDEO
5560     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5561         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5562
5563     s->me.scene_change_score=0;
5564
5565 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5566
5567     if(s->pict_type==I_TYPE){
5568         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5569         else                        s->no_rounding=0;
5570     }else if(s->pict_type!=B_TYPE){
5571         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5572             s->no_rounding ^= 1;
5573     }
5574
5575     if(s->flags & CODEC_FLAG_PASS2){
5576         if (estimate_qp(s,1) < 0)
5577             return -1;
5578         ff_get_2pass_fcode(s);
5579     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5580         if(s->pict_type==B_TYPE)
5581             s->lambda= s->last_lambda_for[s->pict_type];
5582         else
5583             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5584         update_qscale(s);
5585     }
5586
5587     s->mb_intra=0; //for the rate distortion & bit compare functions
5588     for(i=1; i<s->avctx->thread_count; i++){
5589         ff_update_duplicate_context(s->thread_context[i], s);
5590     }
5591
5592     ff_init_me(s);
5593
5594     /* Estimate motion for every MB */
5595     if(s->pict_type != I_TYPE){
5596         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5597         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
5598         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5599             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5600                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5601             }
5602         }
5603
5604         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5605     }else /* if(s->pict_type == I_TYPE) */{
5606         /* I-Frame */
5607         for(i=0; i<s->mb_stride*s->mb_height; i++)
5608             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5609
5610         if(!s->fixed_qscale){
5611             /* finding spatial complexity for I-frame rate control */
5612             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5613         }
5614     }
5615     for(i=1; i<s->avctx->thread_count; i++){
5616         merge_context_after_me(s, s->thread_context[i]);
5617     }
5618     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5619     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5620     emms_c();
5621
5622     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5623         s->pict_type= I_TYPE;
5624         for(i=0; i<s->mb_stride*s->mb_height; i++)
5625             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5626 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5627     }
5628
5629     if(!s->umvplus){
5630         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5631             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5632
5633             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5634                 int a,b;
5635                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5636                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5637                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5638             }
5639
5640             ff_fix_long_p_mvs(s);
5641             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5642             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5643                 int j;
5644                 for(i=0; i<2; i++){
5645                     for(j=0; j<2; j++)
5646                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5647                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5648                 }
5649             }
5650         }
5651
5652         if(s->pict_type==B_TYPE){
5653             int a, b;
5654
5655             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5656             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5657             s->f_code = FFMAX(a, b);
5658
5659             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5660             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5661             s->b_code = FFMAX(a, b);
5662
5663             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5664             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5665             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5666             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5667             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5668                 int dir, j;
5669                 for(dir=0; dir<2; dir++){
5670                     for(i=0; i<2; i++){
5671                         for(j=0; j<2; j++){
5672                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5673                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5674                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5675                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5676                         }
5677                     }
5678                 }
5679             }
5680         }
5681     }
5682
5683     if (estimate_qp(s, 0) < 0)
5684         return -1;
5685
5686     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5687         s->qscale= 3; //reduce clipping problems
5688
5689     if (s->out_format == FMT_MJPEG) {
5690         /* for mjpeg, we do include qscale in the matrix */
5691         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5692         for(i=1;i<64;i++){
5693             int j= s->dsp.idct_permutation[i];
5694
5695             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5696         }
5697         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5698                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5699         s->qscale= 8;
5700     }
5701
5702     //FIXME var duplication
5703     s->current_picture_ptr->key_frame=
5704     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5705     s->current_picture_ptr->pict_type=
5706     s->current_picture.pict_type= s->pict_type;
5707
5708     if(s->current_picture.key_frame)
5709         s->picture_in_gop_number=0;
5710
5711     s->last_bits= put_bits_count(&s->pb);
5712     switch(s->out_format) {
5713     case FMT_MJPEG:
5714         mjpeg_picture_header(s);
5715         break;
5716 #ifdef CONFIG_H261_ENCODER
5717     case FMT_H261:
5718         ff_h261_encode_picture_header(s, picture_number);
5719         break;
5720 #endif
5721     case FMT_H263:
5722         if (s->codec_id == CODEC_ID_WMV2)
5723             ff_wmv2_encode_picture_header(s, picture_number);
5724         else if (s->h263_msmpeg4)
5725             msmpeg4_encode_picture_header(s, picture_number);
5726         else if (s->h263_pred)
5727             mpeg4_encode_picture_header(s, picture_number);
5728 #ifdef CONFIG_RV10_ENCODER
5729         else if (s->codec_id == CODEC_ID_RV10)
5730             rv10_encode_picture_header(s, picture_number);
5731 #endif
5732 #ifdef CONFIG_RV20_ENCODER
5733         else if (s->codec_id == CODEC_ID_RV20)
5734             rv20_encode_picture_header(s, picture_number);
5735 #endif
5736         else if (s->codec_id == CODEC_ID_FLV1)
5737             ff_flv_encode_picture_header(s, picture_number);
5738         else
5739             h263_encode_picture_header(s, picture_number);
5740         break;
5741     case FMT_MPEG1:
5742         mpeg1_encode_picture_header(s, picture_number);
5743         break;
5744     case FMT_H264:
5745         break;
5746     default:
5747         assert(0);
5748     }
5749     bits= put_bits_count(&s->pb);
5750     s->header_bits= bits - s->last_bits;
5751
5752     for(i=1; i<s->avctx->thread_count; i++){
5753         update_duplicate_context_after_me(s->thread_context[i], s);
5754     }
5755     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5756     for(i=1; i<s->avctx->thread_count; i++){
5757         merge_context_after_encode(s, s->thread_context[i]);
5758     }
5759     emms_c();
5760     return 0;
5761 }
5762
5763 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5764     const int intra= s->mb_intra;
5765     int i;
5766
5767     s->dct_count[intra]++;
5768
5769     for(i=0; i<64; i++){
5770         int level= block[i];
5771
5772         if(level){
5773             if(level>0){
5774                 s->dct_error_sum[intra][i] += level;
5775                 level -= s->dct_offset[intra][i];
5776                 if(level<0) level=0;
5777             }else{
5778                 s->dct_error_sum[intra][i] -= level;
5779                 level += s->dct_offset[intra][i];
5780                 if(level>0) level=0;
5781             }
5782             block[i]= level;
5783         }
5784     }
5785 }
5786
5787 static int dct_quantize_trellis_c(MpegEncContext *s,
5788                         DCTELEM *block, int n,
5789                         int qscale, int *overflow){
5790     const int *qmat;
5791     const uint8_t *scantable= s->intra_scantable.scantable;
5792     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5793     int max=0;
5794     unsigned int threshold1, threshold2;
5795     int bias=0;
5796     int run_tab[65];
5797     int level_tab[65];
5798     int score_tab[65];
5799     int survivor[65];
5800     int survivor_count;
5801     int last_run=0;
5802     int last_level=0;
5803     int last_score= 0;
5804     int last_i;
5805     int coeff[2][64];
5806     int coeff_count[64];
5807     int qmul, qadd, start_i, last_non_zero, i, dc;
5808     const int esc_length= s->ac_esc_length;
5809     uint8_t * length;
5810     uint8_t * last_length;
5811     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5812
5813     s->dsp.fdct (block);
5814
5815     if(s->dct_error_sum)
5816         s->denoise_dct(s, block);
5817     qmul= qscale*16;
5818     qadd= ((qscale-1)|1)*8;
5819
5820     if (s->mb_intra) {
5821         int q;
5822         if (!s->h263_aic) {
5823             if (n < 4)
5824                 q = s->y_dc_scale;
5825             else
5826                 q = s->c_dc_scale;
5827             q = q << 3;
5828         } else{
5829             /* For AIC we skip quant/dequant of INTRADC */
5830             q = 1 << 3;
5831             qadd=0;
5832         }
5833
5834         /* note: block[0] is assumed to be positive */
5835         block[0] = (block[0] + (q >> 1)) / q;
5836         start_i = 1;
5837         last_non_zero = 0;
5838         qmat = s->q_intra_matrix[qscale];
5839         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5840             bias= 1<<(QMAT_SHIFT-1);
5841         length     = s->intra_ac_vlc_length;
5842         last_length= s->intra_ac_vlc_last_length;
5843     } else {
5844         start_i = 0;
5845         last_non_zero = -1;
5846         qmat = s->q_inter_matrix[qscale];
5847         length     = s->inter_ac_vlc_length;
5848         last_length= s->inter_ac_vlc_last_length;
5849     }
5850     last_i= start_i;
5851
5852     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5853     threshold2= (threshold1<<1);
5854
5855     for(i=63; i>=start_i; i--) {
5856         const int j = scantable[i];
5857         int level = block[j] * qmat[j];
5858
5859         if(((unsigned)(level+threshold1))>threshold2){
5860             last_non_zero = i;
5861             break;
5862         }
5863     }
5864
5865     for(i=start_i; i<=last_non_zero; i++) {
5866         const int j = scantable[i];
5867         int level = block[j] * qmat[j];
5868
5869 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5870 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5871         if(((unsigned)(level+threshold1))>threshold2){
5872             if(level>0){
5873                 level= (bias + level)>>QMAT_SHIFT;
5874                 coeff[0][i]= level;
5875                 coeff[1][i]= level-1;
5876 //                coeff[2][k]= level-2;
5877             }else{
5878                 level= (bias - level)>>QMAT_SHIFT;
5879                 coeff[0][i]= -level;
5880                 coeff[1][i]= -level+1;
5881 //                coeff[2][k]= -level+2;
5882             }
5883             coeff_count[i]= FFMIN(level, 2);
5884             assert(coeff_count[i]);
5885             max |=level;
5886         }else{
5887             coeff[0][i]= (level>>31)|1;
5888             coeff_count[i]= 1;
5889         }
5890     }
5891
5892     *overflow= s->max_qcoeff < max; //overflow might have happened
5893
5894     if(last_non_zero < start_i){
5895         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5896         return last_non_zero;
5897     }
5898
5899     score_tab[start_i]= 0;
5900     survivor[0]= start_i;
5901     survivor_count= 1;
5902
5903     for(i=start_i; i<=last_non_zero; i++){
5904         int level_index, j;
5905         const int dct_coeff= FFABS(block[ scantable[i] ]);
5906         const int zero_distoration= dct_coeff*dct_coeff;
5907         int best_score=256*256*256*120;
5908         for(level_index=0; level_index < coeff_count[i]; level_index++){
5909             int distoration;
5910             int level= coeff[level_index][i];
5911             const int alevel= FFABS(level);
5912             int unquant_coeff;
5913
5914             assert(level);
5915
5916             if(s->out_format == FMT_H263){
5917                 unquant_coeff= alevel*qmul + qadd;
5918             }else{ //MPEG1
5919                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5920                 if(s->mb_intra){
5921                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5922                         unquant_coeff =   (unquant_coeff - 1) | 1;
5923                 }else{
5924                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5925                         unquant_coeff =   (unquant_coeff - 1) | 1;
5926                 }
5927                 unquant_coeff<<= 3;
5928             }
5929
5930             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5931             level+=64;
5932             if((level&(~127)) == 0){
5933                 for(j=survivor_count-1; j>=0; j--){
5934                     int run= i - survivor[j];
5935                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5936                     score += score_tab[i-run];
5937
5938                     if(score < best_score){
5939                         best_score= score;
5940                         run_tab[i+1]= run;
5941                         level_tab[i+1]= level-64;
5942                     }
5943                 }
5944
5945                 if(s->out_format == FMT_H263){
5946                     for(j=survivor_count-1; j>=0; j--){
5947                         int run= i - survivor[j];
5948                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5949                         score += score_tab[i-run];
5950                         if(score < last_score){
5951                             last_score= score;
5952                             last_run= run;
5953                             last_level= level-64;
5954                             last_i= i+1;
5955                         }
5956                     }
5957                 }
5958             }else{
5959                 distoration += esc_length*lambda;
5960                 for(j=survivor_count-1; j>=0; j--){
5961                     int run= i - survivor[j];
5962                     int score= distoration + score_tab[i-run];
5963
5964                     if(score < best_score){
5965                         best_score= score;
5966                         run_tab[i+1]= run;
5967                         level_tab[i+1]= level-64;
5968                     }
5969                 }
5970
5971                 if(s->out_format == FMT_H263){
5972                   for(j=survivor_count-1; j>=0; j--){
5973                         int run= i - survivor[j];
5974                         int score= distoration + score_tab[i-run];
5975                         if(score < last_score){
5976                             last_score= score;
5977                             last_run= run;
5978                             last_level= level-64;
5979                             last_i= i+1;
5980                         }
5981                     }
5982                 }
5983             }
5984         }
5985
5986         score_tab[i+1]= best_score;
5987
5988         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5989         if(last_non_zero <= 27){
5990             for(; survivor_count; survivor_count--){
5991                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5992                     break;
5993             }
5994         }else{
5995             for(; survivor_count; survivor_count--){
5996                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5997                     break;
5998             }
5999         }
6000
6001         survivor[ survivor_count++ ]= i+1;
6002     }
6003
6004     if(s->out_format != FMT_H263){
6005         last_score= 256*256*256*120;
6006         for(i= survivor[0]; i<=last_non_zero + 1; i++){
6007             int score= score_tab[i];
6008             if(i) score += lambda*2; //FIXME exacter?
6009
6010             if(score < last_score){
6011                 last_score= score;
6012                 last_i= i;
6013                 last_level= level_tab[i];
6014                 last_run= run_tab[i];
6015             }
6016         }
6017     }
6018
6019     s->coded_score[n] = last_score;
6020
6021     dc= FFABS(block[0]);
6022     last_non_zero= last_i - 1;
6023     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
6024
6025     if(last_non_zero < start_i)
6026         return last_non_zero;
6027
6028     if(last_non_zero == 0 && start_i == 0){
6029         int best_level= 0;
6030         int best_score= dc * dc;
6031
6032         for(i=0; i<coeff_count[0]; i++){
6033             int level= coeff[i][0];
6034             int alevel= FFABS(level);
6035             int unquant_coeff, score, distortion;
6036
6037             if(s->out_format == FMT_H263){
6038                     unquant_coeff= (alevel*qmul + qadd)>>3;
6039             }else{ //MPEG1
6040                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
6041                     unquant_coeff =   (unquant_coeff - 1) | 1;
6042             }
6043             unquant_coeff = (unquant_coeff + 4) >> 3;
6044             unquant_coeff<<= 3 + 3;
6045
6046             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
6047             level+=64;
6048             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
6049             else                    score= distortion + esc_length*lambda;
6050
6051             if(score < best_score){
6052                 best_score= score;
6053                 best_level= level - 64;
6054             }
6055         }
6056         block[0]= best_level;
6057         s->coded_score[n] = best_score - dc*dc;
6058         if(best_level == 0) return -1;
6059         else                return last_non_zero;
6060     }
6061
6062     i= last_i;
6063     assert(last_level);
6064
6065     block[ perm_scantable[last_non_zero] ]= last_level;
6066     i -= last_run + 1;
6067
6068     for(; i>start_i; i -= run_tab[i] + 1){
6069         block[ perm_scantable[i-1] ]= level_tab[i];
6070     }
6071
6072     return last_non_zero;
6073 }
6074
6075 //#define REFINE_STATS 1
6076 static int16_t basis[64][64];
6077
6078 static void build_basis(uint8_t *perm){
6079     int i, j, x, y;
6080     emms_c();
6081     for(i=0; i<8; i++){
6082         for(j=0; j<8; j++){
6083             for(y=0; y<8; y++){
6084                 for(x=0; x<8; x++){
6085                     double s= 0.25*(1<<BASIS_SHIFT);
6086                     int index= 8*i + j;
6087                     int perm_index= perm[index];
6088                     if(i==0) s*= sqrt(0.5);
6089                     if(j==0) s*= sqrt(0.5);
6090                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6091                 }
6092             }
6093         }
6094     }
6095 }
6096
6097 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6098                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6099                         int n, int qscale){
6100     int16_t rem[64];
6101     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6102     const int *qmat;
6103     const uint8_t *scantable= s->intra_scantable.scantable;
6104     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6105 //    unsigned int threshold1, threshold2;
6106 //    int bias=0;
6107     int run_tab[65];
6108     int prev_run=0;
6109     int prev_level=0;
6110     int qmul, qadd, start_i, last_non_zero, i, dc;
6111     uint8_t * length;
6112     uint8_t * last_length;
6113     int lambda;
6114     int rle_index, run, q, sum;
6115 #ifdef REFINE_STATS
6116 static int count=0;
6117 static int after_last=0;
6118 static int to_zero=0;
6119 static int from_zero=0;
6120 static int raise=0;
6121 static int lower=0;
6122 static int messed_sign=0;
6123 #endif
6124
6125     if(basis[0][0] == 0)
6126         build_basis(s->dsp.idct_permutation);
6127
6128     qmul= qscale*2;
6129     qadd= (qscale-1)|1;
6130     if (s->mb_intra) {
6131         if (!s->h263_aic) {
6132             if (n < 4)
6133                 q = s->y_dc_scale;
6134             else
6135                 q = s->c_dc_scale;
6136         } else{
6137             /* For AIC we skip quant/dequant of INTRADC */
6138             q = 1;
6139             qadd=0;
6140         }
6141         q <<= RECON_SHIFT-3;
6142         /* note: block[0] is assumed to be positive */
6143         dc= block[0]*q;
6144 //        block[0] = (block[0] + (q >> 1)) / q;
6145         start_i = 1;
6146         qmat = s->q_intra_matrix[qscale];
6147 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6148 //            bias= 1<<(QMAT_SHIFT-1);
6149         length     = s->intra_ac_vlc_length;
6150         last_length= s->intra_ac_vlc_last_length;
6151     } else {
6152         dc= 0;
6153         start_i = 0;
6154         qmat = s->q_inter_matrix[qscale];
6155         length     = s->inter_ac_vlc_length;
6156         last_length= s->inter_ac_vlc_last_length;
6157     }
6158     last_non_zero = s->block_last_index[n];
6159
6160 #ifdef REFINE_STATS
6161 {START_TIMER
6162 #endif
6163     dc += (1<<(RECON_SHIFT-1));
6164     for(i=0; i<64; i++){
6165         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6166     }
6167 #ifdef REFINE_STATS
6168 STOP_TIMER("memset rem[]")}
6169 #endif
6170     sum=0;
6171     for(i=0; i<64; i++){
6172         int one= 36;
6173         int qns=4;
6174         int w;
6175
6176         w= FFABS(weight[i]) + qns*one;
6177         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6178
6179         weight[i] = w;
6180 //        w=weight[i] = (63*qns + (w/2)) / w;
6181
6182         assert(w>0);
6183         assert(w<(1<<6));
6184         sum += w*w;
6185     }
6186     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6187 #ifdef REFINE_STATS
6188 {START_TIMER
6189 #endif
6190     run=0;
6191     rle_index=0;
6192     for(i=start_i; i<=last_non_zero; i++){
6193         int j= perm_scantable[i];
6194         const int level= block[j];
6195         int coeff;
6196
6197         if(level){
6198             if(level<0) coeff= qmul*level - qadd;
6199             else        coeff= qmul*level + qadd;
6200             run_tab[rle_index++]=run;
6201             run=0;
6202
6203             s->dsp.add_8x8basis(rem, basis[j], coeff);
6204         }else{
6205             run++;
6206         }
6207     }
6208 #ifdef REFINE_STATS
6209 if(last_non_zero>0){
6210 STOP_TIMER("init rem[]")
6211 }
6212 }
6213
6214 {START_TIMER
6215 #endif
6216     for(;;){
6217         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6218         int best_coeff=0;
6219         int best_change=0;
6220         int run2, best_unquant_change=0, analyze_gradient;
6221 #ifdef REFINE_STATS
6222 {START_TIMER
6223 #endif
6224         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6225
6226         if(analyze_gradient){
6227 #ifdef REFINE_STATS
6228 {START_TIMER
6229 #endif
6230             for(i=0; i<64; i++){
6231                 int w= weight[i];
6232
6233                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6234             }
6235 #ifdef REFINE_STATS
6236 STOP_TIMER("rem*w*w")}
6237 {START_TIMER
6238 #endif
6239             s->dsp.fdct(d1);
6240 #ifdef REFINE_STATS
6241 STOP_TIMER("dct")}
6242 #endif
6243         }
6244
6245         if(start_i){
6246             const int level= block[0];
6247             int change, old_coeff;
6248
6249             assert(s->mb_intra);
6250
6251             old_coeff= q*level;
6252
6253             for(change=-1; change<=1; change+=2){
6254                 int new_level= level + change;
6255                 int score, new_coeff;
6256
6257                 new_coeff= q*new_level;
6258                 if(new_coeff >= 2048 || new_coeff < 0)
6259                     continue;
6260
6261                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6262                 if(score<best_score){
6263                     best_score= score;
6264                     best_coeff= 0;
6265                     best_change= change;
6266                     best_unquant_change= new_coeff - old_coeff;
6267                 }
6268             }
6269         }
6270
6271         run=0;
6272         rle_index=0;
6273         run2= run_tab[rle_index++];
6274         prev_level=0;
6275         prev_run=0;
6276
6277         for(i=start_i; i<64; i++){
6278             int j= perm_scantable[i];
6279             const int level= block[j];
6280             int change, old_coeff;
6281
6282             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6283                 break;
6284
6285             if(level){
6286                 if(level<0) old_coeff= qmul*level - qadd;
6287                 else        old_coeff= qmul*level + qadd;
6288                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6289             }else{
6290                 old_coeff=0;
6291                 run2--;
6292                 assert(run2>=0 || i >= last_non_zero );
6293             }
6294
6295             for(change=-1; change<=1; change+=2){
6296                 int new_level= level + change;
6297                 int score, new_coeff, unquant_change;
6298
6299                 score=0;
6300                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
6301                    continue;
6302
6303                 if(new_level){
6304                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6305                     else            new_coeff= qmul*new_level + qadd;
6306                     if(new_coeff >= 2048 || new_coeff <= -2048)
6307                         continue;
6308                     //FIXME check for overflow
6309
6310                     if(level){
6311                         if(level < 63 && level > -63){
6312                             if(i < last_non_zero)
6313                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6314                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6315                             else
6316                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6317                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6318                         }
6319                     }else{
6320                         assert(FFABS(new_level)==1);
6321
6322                         if(analyze_gradient){
6323                             int g= d1[ scantable[i] ];
6324                             if(g && (g^new_level) >= 0)
6325                                 continue;
6326                         }
6327
6328                         if(i < last_non_zero){
6329                             int next_i= i + run2 + 1;
6330                             int next_level= block[ perm_scantable[next_i] ] + 64;
6331
6332                             if(next_level&(~127))
6333                                 next_level= 0;
6334
6335                             if(next_i < last_non_zero)
6336                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6337                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6338                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6339                             else
6340                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6341                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6342                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6343                         }else{
6344                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6345                             if(prev_level){
6346                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6347                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6348                             }
6349                         }
6350                     }
6351                 }else{
6352                     new_coeff=0;
6353                     assert(FFABS(level)==1);
6354
6355                     if(i < last_non_zero){
6356                         int next_i= i + run2 + 1;
6357                         int next_level= block[ perm_scantable[next_i] ] + 64;
6358
6359                         if(next_level&(~127))
6360                             next_level= 0;
6361
6362                         if(next_i < last_non_zero)
6363                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6364                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6365                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6366                         else
6367                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6368                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6369                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6370                     }else{
6371                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6372                         if(prev_level){
6373                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6374                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6375                         }
6376                     }
6377                 }
6378
6379                 score *= lambda;
6380
6381                 unquant_change= new_coeff - old_coeff;
6382                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6383
6384                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6385                 if(score<best_score){
6386                     best_score= score;
6387                     best_coeff= i;
6388                     best_change= change;
6389                     best_unquant_change= unquant_change;
6390                 }
6391             }
6392             if(level){
6393                 prev_level= level + 64;
6394                 if(prev_level&(~127))
6395                     prev_level= 0;
6396                 prev_run= run;
6397                 run=0;
6398             }else{
6399                 run++;
6400             }
6401         }
6402 #ifdef REFINE_STATS
6403 STOP_TIMER("iterative step")}
6404 #endif
6405
6406         if(best_change){
6407             int j= perm_scantable[ best_coeff ];
6408
6409             block[j] += best_change;
6410
6411             if(best_coeff > last_non_zero){
6412                 last_non_zero= best_coeff;
6413                 assert(block[j]);
6414 #ifdef REFINE_STATS
6415 after_last++;
6416 #endif
6417             }else{
6418 #ifdef REFINE_STATS
6419 if(block[j]){
6420     if(block[j] - best_change){
6421         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
6422             raise++;
6423         }else{
6424             lower++;
6425         }
6426     }else{
6427         from_zero++;
6428     }
6429 }else{
6430     to_zero++;
6431 }
6432 #endif
6433                 for(; last_non_zero>=start_i; last_non_zero--){
6434                     if(block[perm_scantable[last_non_zero]])
6435                         break;
6436                 }
6437             }
6438 #ifdef REFINE_STATS
6439 count++;
6440 if(256*256*256*64 % count == 0){
6441     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6442 }
6443 #endif
6444             run=0;
6445             rle_index=0;
6446             for(i=start_i; i<=last_non_zero; i++){
6447                 int j= perm_scantable[i];
6448                 const int level= block[j];
6449
6450                  if(level){
6451                      run_tab[rle_index++]=run;
6452                      run=0;
6453                  }else{
6454                      run++;
6455                  }
6456             }
6457
6458             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6459         }else{
6460             break;
6461         }
6462     }
6463 #ifdef REFINE_STATS
6464 if(last_non_zero>0){
6465 STOP_TIMER("iterative search")
6466 }
6467 }
6468 #endif
6469
6470     return last_non_zero;
6471 }
6472
6473 static int dct_quantize_c(MpegEncContext *s,
6474                         DCTELEM *block, int n,
6475                         int qscale, int *overflow)
6476 {
6477     int i, j, level, last_non_zero, q, start_i;
6478     const int *qmat;
6479     const uint8_t *scantable= s->intra_scantable.scantable;
6480     int bias;
6481     int max=0;
6482     unsigned int threshold1, threshold2;
6483
6484     s->dsp.fdct (block);
6485
6486     if(s->dct_error_sum)
6487         s->denoise_dct(s, block);
6488
6489     if (s->mb_intra) {
6490         if (!s->h263_aic) {
6491             if (n < 4)
6492                 q = s->y_dc_scale;
6493             else
6494                 q = s->c_dc_scale;
6495             q = q << 3;
6496         } else
6497             /* For AIC we skip quant/dequant of INTRADC */
6498             q = 1 << 3;
6499
6500         /* note: block[0] is assumed to be positive */
6501         block[0] = (block[0] + (q >> 1)) / q;
6502         start_i = 1;
6503         last_non_zero = 0;
6504         qmat = s->q_intra_matrix[qscale];
6505         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6506     } else {
6507         start_i = 0;
6508         last_non_zero = -1;
6509         qmat = s->q_inter_matrix[qscale];
6510         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6511     }
6512     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6513     threshold2= (threshold1<<1);
6514     for(i=63;i>=start_i;i--) {
6515         j = scantable[i];
6516         level = block[j] * qmat[j];
6517
6518         if(((unsigned)(level+threshold1))>threshold2){
6519             last_non_zero = i;
6520             break;
6521         }else{
6522             block[j]=0;
6523         }
6524     }
6525     for(i=start_i; i<=last_non_zero; i++) {
6526         j = scantable[i];
6527         level = block[j] * qmat[j];
6528
6529 //        if(   bias+level >= (1<<QMAT_SHIFT)
6530 //           || bias-level >= (1<<QMAT_SHIFT)){
6531         if(((unsigned)(level+threshold1))>threshold2){
6532             if(level>0){
6533                 level= (bias + level)>>QMAT_SHIFT;
6534                 block[j]= level;
6535             }else{
6536                 level= (bias - level)>>QMAT_SHIFT;
6537                 block[j]= -level;
6538             }
6539             max |=level;
6540         }else{
6541             block[j]=0;
6542         }
6543     }
6544     *overflow= s->max_qcoeff < max; //overflow might have happened
6545
6546     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6547     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6548         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6549
6550     return last_non_zero;
6551 }
6552
6553 #endif //CONFIG_ENCODERS
6554
6555 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6556                                    DCTELEM *block, int n, int qscale)
6557 {
6558     int i, level, nCoeffs;
6559     const uint16_t *quant_matrix;
6560
6561     nCoeffs= s->block_last_index[n];
6562
6563     if (n < 4)
6564         block[0] = block[0] * s->y_dc_scale;
6565     else
6566         block[0] = block[0] * s->c_dc_scale;
6567     /* XXX: only mpeg1 */
6568     quant_matrix = s->intra_matrix;
6569     for(i=1;i<=nCoeffs;i++) {
6570         int j= s->intra_scantable.permutated[i];
6571         level = block[j];
6572         if (level) {
6573             if (level < 0) {
6574                 level = -level;
6575                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6576                 level = (level - 1) | 1;
6577                 level = -level;
6578             } else {
6579                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6580                 level = (level - 1) | 1;
6581             }
6582             block[j] = level;
6583         }
6584     }
6585 }
6586
6587 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6588                                    DCTELEM *block, int n, int qscale)
6589 {
6590     int i, level, nCoeffs;
6591     const uint16_t *quant_matrix;
6592
6593     nCoeffs= s->block_last_index[n];
6594
6595     quant_matrix = s->inter_matrix;
6596     for(i=0; i<=nCoeffs; i++) {
6597         int j= s->intra_scantable.permutated[i];
6598         level = block[j];
6599         if (level) {
6600             if (level < 0) {
6601                 level = -level;
6602                 level = (((level << 1) + 1) * qscale *
6603                          ((int) (quant_matrix[j]))) >> 4;
6604                 level = (level - 1) | 1;
6605                 level = -level;
6606             } else {
6607                 level = (((level << 1) + 1) * qscale *
6608                          ((int) (quant_matrix[j]))) >> 4;
6609                 level = (level - 1) | 1;
6610             }
6611             block[j] = level;
6612         }
6613     }
6614 }
6615
6616 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6617                                    DCTELEM *block, int n, int qscale)
6618 {
6619     int i, level, nCoeffs;
6620     const uint16_t *quant_matrix;
6621
6622     if(s->alternate_scan) nCoeffs= 63;
6623     else nCoeffs= s->block_last_index[n];
6624
6625     if (n < 4)
6626         block[0] = block[0] * s->y_dc_scale;
6627     else
6628         block[0] = block[0] * s->c_dc_scale;
6629     quant_matrix = s->intra_matrix;
6630     for(i=1;i<=nCoeffs;i++) {
6631         int j= s->intra_scantable.permutated[i];
6632         level = block[j];
6633         if (level) {
6634             if (level < 0) {
6635                 level = -level;
6636                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6637                 level = -level;
6638             } else {
6639                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6640             }
6641             block[j] = level;
6642         }
6643     }
6644 }
6645
6646 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6647                                    DCTELEM *block, int n, int qscale)
6648 {
6649     int i, level, nCoeffs;
6650     const uint16_t *quant_matrix;
6651     int sum=-1;
6652
6653     if(s->alternate_scan) nCoeffs= 63;
6654     else nCoeffs= s->block_last_index[n];
6655
6656     if (n < 4)
6657         block[0] = block[0] * s->y_dc_scale;
6658     else
6659         block[0] = block[0] * s->c_dc_scale;
6660     quant_matrix = s->intra_matrix;
6661     for(i=1;i<=nCoeffs;i++) {
6662         int j= s->intra_scantable.permutated[i];
6663         level = block[j];
6664         if (level) {
6665             if (level < 0) {
6666                 level = -level;
6667                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6668                 level = -level;
6669             } else {
6670                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6671             }
6672             block[j] = level;
6673             sum+=level;
6674         }
6675     }
6676     block[63]^=sum&1;
6677 }
6678
6679 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6680                                    DCTELEM *block, int n, int qscale)
6681 {
6682     int i, level, nCoeffs;
6683     const uint16_t *quant_matrix;
6684     int sum=-1;
6685
6686     if(s->alternate_scan) nCoeffs= 63;
6687     else nCoeffs= s->block_last_index[n];
6688
6689     quant_matrix = s->inter_matrix;
6690     for(i=0; i<=nCoeffs; i++) {
6691         int j= s->intra_scantable.permutated[i];
6692         level = block[j];
6693         if (level) {
6694             if (level < 0) {
6695                 level = -level;
6696                 level = (((level << 1) + 1) * qscale *
6697                          ((int) (quant_matrix[j]))) >> 4;
6698                 level = -level;
6699             } else {
6700                 level = (((level << 1) + 1) * qscale *
6701                          ((int) (quant_matrix[j]))) >> 4;
6702             }
6703             block[j] = level;
6704             sum+=level;
6705         }
6706     }
6707     block[63]^=sum&1;
6708 }
6709
6710 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6711                                   DCTELEM *block, int n, int qscale)
6712 {
6713     int i, level, qmul, qadd;
6714     int nCoeffs;
6715
6716     assert(s->block_last_index[n]>=0);
6717
6718     qmul = qscale << 1;
6719
6720     if (!s->h263_aic) {
6721         if (n < 4)
6722             block[0] = block[0] * s->y_dc_scale;
6723         else
6724             block[0] = block[0] * s->c_dc_scale;
6725         qadd = (qscale - 1) | 1;
6726     }else{
6727         qadd = 0;
6728     }
6729     if(s->ac_pred)
6730         nCoeffs=63;
6731     else
6732         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6733
6734     for(i=1; i<=nCoeffs; i++) {
6735         level = block[i];
6736         if (level) {
6737             if (level < 0) {
6738                 level = level * qmul - qadd;
6739             } else {
6740                 level = level * qmul + qadd;
6741             }
6742             block[i] = level;
6743         }
6744     }
6745 }
6746
6747 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6748                                   DCTELEM *block, int n, int qscale)
6749 {
6750     int i, level, qmul, qadd;
6751     int nCoeffs;
6752
6753     assert(s->block_last_index[n]>=0);
6754
6755     qadd = (qscale - 1) | 1;
6756     qmul = qscale << 1;
6757
6758     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6759
6760     for(i=0; i<=nCoeffs; i++) {
6761         level = block[i];
6762         if (level) {
6763             if (level < 0) {
6764                 level = level * qmul - qadd;
6765             } else {
6766                 level = level * qmul + qadd;
6767             }
6768             block[i] = level;
6769         }
6770     }
6771 }
6772
6773 #ifdef CONFIG_ENCODERS
6774 AVCodec h263_encoder = {
6775     "h263",
6776     CODEC_TYPE_VIDEO,
6777     CODEC_ID_H263,
6778     sizeof(MpegEncContext),
6779     MPV_encode_init,
6780     MPV_encode_picture,
6781     MPV_encode_end,
6782     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6783 };
6784
6785 AVCodec h263p_encoder = {
6786     "h263p",
6787     CODEC_TYPE_VIDEO,
6788     CODEC_ID_H263P,
6789     sizeof(MpegEncContext),
6790     MPV_encode_init,
6791     MPV_encode_picture,
6792     MPV_encode_end,
6793     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6794 };
6795
6796 AVCodec flv_encoder = {
6797     "flv",
6798     CODEC_TYPE_VIDEO,
6799     CODEC_ID_FLV1,
6800     sizeof(MpegEncContext),
6801     MPV_encode_init,
6802     MPV_encode_picture,
6803     MPV_encode_end,
6804     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6805 };
6806
6807 AVCodec rv10_encoder = {
6808     "rv10",
6809     CODEC_TYPE_VIDEO,
6810     CODEC_ID_RV10,
6811     sizeof(MpegEncContext),
6812     MPV_encode_init,
6813     MPV_encode_picture,
6814     MPV_encode_end,
6815     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6816 };
6817
6818 AVCodec rv20_encoder = {
6819     "rv20",
6820     CODEC_TYPE_VIDEO,
6821     CODEC_ID_RV20,
6822     sizeof(MpegEncContext),
6823     MPV_encode_init,
6824     MPV_encode_picture,
6825     MPV_encode_end,
6826     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6827 };
6828
6829 AVCodec mpeg4_encoder = {
6830     "mpeg4",
6831     CODEC_TYPE_VIDEO,
6832     CODEC_ID_MPEG4,
6833     sizeof(MpegEncContext),
6834     MPV_encode_init,
6835     MPV_encode_picture,
6836     MPV_encode_end,
6837     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6838     .capabilities= CODEC_CAP_DELAY,
6839 };
6840
6841 AVCodec msmpeg4v1_encoder = {
6842     "msmpeg4v1",
6843     CODEC_TYPE_VIDEO,
6844     CODEC_ID_MSMPEG4V1,
6845     sizeof(MpegEncContext),
6846     MPV_encode_init,
6847     MPV_encode_picture,
6848     MPV_encode_end,
6849     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6850 };
6851
6852 AVCodec msmpeg4v2_encoder = {
6853     "msmpeg4v2",
6854     CODEC_TYPE_VIDEO,
6855     CODEC_ID_MSMPEG4V2,
6856     sizeof(MpegEncContext),
6857     MPV_encode_init,
6858     MPV_encode_picture,
6859     MPV_encode_end,
6860     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6861 };
6862
6863 AVCodec msmpeg4v3_encoder = {
6864     "msmpeg4",
6865     CODEC_TYPE_VIDEO,
6866     CODEC_ID_MSMPEG4V3,
6867     sizeof(MpegEncContext),
6868     MPV_encode_init,
6869     MPV_encode_picture,
6870     MPV_encode_end,
6871     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6872 };
6873
6874 AVCodec wmv1_encoder = {
6875     "wmv1",
6876     CODEC_TYPE_VIDEO,
6877     CODEC_ID_WMV1,
6878     sizeof(MpegEncContext),
6879     MPV_encode_init,
6880     MPV_encode_picture,
6881     MPV_encode_end,
6882     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6883 };
6884
6885 AVCodec mjpeg_encoder = {
6886     "mjpeg",
6887     CODEC_TYPE_VIDEO,
6888     CODEC_ID_MJPEG,
6889     sizeof(MpegEncContext),
6890     MPV_encode_init,
6891     MPV_encode_picture,
6892     MPV_encode_end,
6893     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, PIX_FMT_YUVJ422P, -1},
6894 };
6895
6896 #endif //CONFIG_ENCODERS