git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  *
  22  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  23  */
  24
  25 /**
  26  * @file mpegvideo.c
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "avcodec.h"
  31 #include "dsputil.h"
  32 #include "mpegvideo.h"
  33 #include "faandct.h"
  34 #include <limits.h>
  35
  36 #ifdef USE_FASTMEMCPY
  37 #include "libvo/fastmemcpy.h"
  38 #endif
  39
  40 //#undef NDEBUG
  41 //#include <assert.h>
  42
  43 #ifdef CONFIG_ENCODERS
  44 static int encode_picture(MpegEncContext *s, int picture_number);
  45 #endif //CONFIG_ENCODERS
  46 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
  53                                    DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  55                                    DCTELEM *block, int n, int qscale);
  56 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  57                                   DCTELEM *block, int n, int qscale);
  58 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  59                                   DCTELEM *block, int n, int qscale);
  60 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  61 #ifdef CONFIG_ENCODERS
  62 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  63 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  64 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  65 static int sse_mb(MpegEncContext *s);
  66 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  67 #endif //CONFIG_ENCODERS
  68
  69 #ifdef HAVE_XVMC
  70 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  71 extern void XVMC_field_end(MpegEncContext *s);
  72 extern void XVMC_decode_mb(MpegEncContext *s);
  73 #endif
  74
  75 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  76
  77
  78 /* enable all paranoid tests for rounding, overflows, etc... */
  79 //#define PARANOID
  80
  81 //#define DEBUG
  82
  83
  84 /* for jpeg fast DCT */
  85 #define CONST_BITS 14
  86
  87 static const uint16_t aanscales[64] = {
  88     /* precomputed values scaled up by 14 bits */
  89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  90     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  91     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  92     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  93     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  94     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  95     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  96     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  97 };
  98
  99 static const uint8_t h263_chroma_roundtab[16] = {
 100 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
 101     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
 102 };
 103
 104 static const uint8_t ff_default_chroma_qscale_table[32]={
 105 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 106     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 107 };
 108
 109 #ifdef CONFIG_ENCODERS
 110 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 111 static uint8_t default_fcode_tab[MAX_MV*2+1];
 112
 113 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 114
 115 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 116                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 117 {
 118     int qscale;
 119     int shift=0;
 120
 121     for(qscale=qmin; qscale<=qmax; qscale++){
 122         int i;
 123         if (dsp->fdct == ff_jpeg_fdct_islow
 124 #ifdef FAAN_POSTSCALE
 125             || dsp->fdct == ff_faandct
 126 #endif
 127             ) {
 128             for(i=0;i<64;i++) {
 129                 const int j= dsp->idct_permutation[i];
 130                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 131                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 132                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 133                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 134
 135                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 136                                 (qscale * quant_matrix[j]));
 137             }
 138         } else if (dsp->fdct == fdct_ifast
 139 #ifndef FAAN_POSTSCALE
 140                    || dsp->fdct == ff_faandct
 141 #endif
 142                    ) {
 143             for(i=0;i<64;i++) {
 144                 const int j= dsp->idct_permutation[i];
 145                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 146                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 147                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 148                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 149
 150                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 151                                 (aanscales[i] * qscale * quant_matrix[j]));
 152             }
 153         } else {
 154             for(i=0;i<64;i++) {
 155                 const int j= dsp->idct_permutation[i];
 156                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 157                    So 16           <= qscale * quant_matrix[i]             <= 7905
 158                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 159                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 160                 */
 161                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 162 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 163                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 164
 165                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 166                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 167             }
 168         }
 169
 170         for(i=intra; i<64; i++){
 171             int64_t max= 8191;
 172             if (dsp->fdct == fdct_ifast
 173 #ifndef FAAN_POSTSCALE
 174                    || dsp->fdct == ff_faandct
 175 #endif
 176                    ) {
 177                 max= (8191LL*aanscales[i]) >> 14;
 178             }
 179             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 180                 shift++;
 181             }
 182         }
 183     }
 184     if(shift){
 185         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
 186     }
 187 }
 188
 189 static inline void update_qscale(MpegEncContext *s){
 190     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 191     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 192
 193     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 194 }
 195 #endif //CONFIG_ENCODERS
 196
 197 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 198     int i;
 199     int end;
 200
 201     st->scantable= src_scantable;
 202
 203     for(i=0; i<64; i++){
 204         int j;
 205         j = src_scantable[i];
 206         st->permutated[i] = permutation[j];
 207 #ifdef ARCH_POWERPC
 208         st->inverse[j] = i;
 209 #endif
 210     }
 211
 212     end=-1;
 213     for(i=0; i<64; i++){
 214         int j;
 215         j = st->permutated[i];
 216         if(j>end) end=j;
 217         st->raster_end[i]= end;
 218     }
 219 }
 220
 221 #ifdef CONFIG_ENCODERS
 222 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
 223     int i;
 224
 225     if(matrix){
 226         put_bits(pb, 1, 1);
 227         for(i=0;i<64;i++) {
 228             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 229         }
 230     }else
 231         put_bits(pb, 1, 0);
 232 }
 233 #endif //CONFIG_ENCODERS
 234
 235 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
 236     int i;
 237
 238     assert(p<=end);
 239     if(p>=end)
 240         return end;
 241
 242     for(i=0; i<3; i++){
 243         uint32_t tmp= *state << 8;
 244         *state= tmp + *(p++);
 245         if(tmp == 0x100 || p==end)
 246             return p;
 247     }
 248
 249     while(p<end){
 250         if     (p[-1] > 1      ) p+= 3;
 251         else if(p[-2]          ) p+= 2;
 252         else if(p[-3]|(p[-1]-1)) p++;
 253         else{
 254             p++;
 255             break;
 256         }
 257     }
 258
 259     p= FFMIN(p, end)-4;
 260     *state=  be2me_32(unaligned32(p));
 261
 262     return p+4;
 263 }
 264
 265 /* init common dct for both encoder and decoder */
 266 int DCT_common_init(MpegEncContext *s)
 267 {
 268     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 269     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 270     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 271     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 272     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 273     if(s->flags & CODEC_FLAG_BITEXACT)
 274         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
 275     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 276
 277 #ifdef CONFIG_ENCODERS
 278     s->dct_quantize= dct_quantize_c;
 279     s->denoise_dct= denoise_dct_c;
 280 #endif //CONFIG_ENCODERS
 281
 282 #ifdef HAVE_MMX
 283     MPV_common_init_mmx(s);
 284 #endif
 285 #ifdef ARCH_ALPHA
 286     MPV_common_init_axp(s);
 287 #endif
 288 #ifdef HAVE_MLIB
 289     MPV_common_init_mlib(s);
 290 #endif
 291 #ifdef HAVE_MMI
 292     MPV_common_init_mmi(s);
 293 #endif
 294 #ifdef ARCH_ARMV4L
 295     MPV_common_init_armv4l(s);
 296 #endif
 297 #ifdef ARCH_POWERPC
 298     MPV_common_init_ppc(s);
 299 #endif
 300
 301 #ifdef CONFIG_ENCODERS
 302     s->fast_dct_quantize= s->dct_quantize;
 303
 304     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 305         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 306     }
 307
 308 #endif //CONFIG_ENCODERS
 309
 310     /* load & permutate scantables
 311        note: only wmv uses different ones
 312     */
 313     if(s->alternate_scan){
 314         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 315         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 316     }else{
 317         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 318         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 319     }
 320     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 321     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 322
 323     return 0;
 324 }
 325
 326 static void copy_picture(Picture *dst, Picture *src){
 327     *dst = *src;
 328     dst->type= FF_BUFFER_TYPE_COPY;
 329 }
 330
 331 #ifdef CONFIG_ENCODERS
 332 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 333     int i;
 334
 335     dst->pict_type              = src->pict_type;
 336     dst->quality                = src->quality;
 337     dst->coded_picture_number   = src->coded_picture_number;
 338     dst->display_picture_number = src->display_picture_number;
 339 //    dst->reference              = src->reference;
 340     dst->pts                    = src->pts;
 341     dst->interlaced_frame       = src->interlaced_frame;
 342     dst->top_field_first        = src->top_field_first;
 343
 344     if(s->avctx->me_threshold){
 345         if(!src->motion_val[0])
 346             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 347         if(!src->mb_type)
 348             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 349         if(!src->ref_index[0])
 350             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 351         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 352             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 353             src->motion_subsample_log2, dst->motion_subsample_log2);
 354
 355         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 356
 357         for(i=0; i<2; i++){
 358             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 359             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 360
 361             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 362                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 363             }
 364             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 365                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 366             }
 367         }
 368     }
 369 }
 370 #endif
 371
 372 /**
 373  * allocates a Picture
 374  * The pixels are allocated/set by calling get_buffer() if shared=0
 375  */
 376 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 377     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 378     const int mb_array_size= s->mb_stride*s->mb_height;
 379     const int b8_array_size= s->b8_stride*s->mb_height*2;
 380     const int b4_array_size= s->b4_stride*s->mb_height*4;
 381     int i;
 382
 383     if(shared){
 384         assert(pic->data[0]);
 385         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 386         pic->type= FF_BUFFER_TYPE_SHARED;
 387     }else{
 388         int r;
 389
 390         assert(!pic->data[0]);
 391
 392         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 393
 394         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 395             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 396             return -1;
 397         }
 398
 399         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 400             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 401             return -1;
 402         }
 403
 404         if(pic->linesize[1] != pic->linesize[2]){
 405             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 406             return -1;
 407         }
 408
 409         s->linesize  = pic->linesize[0];
 410         s->uvlinesize= pic->linesize[1];
 411     }
 412
 413     if(pic->qscale_table==NULL){
 414         if (s->encoding) {
 415             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 416             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 417             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 418         }
 419
 420         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 421         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 422         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 423         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 424         if(s->out_format == FMT_H264){
 425             for(i=0; i<2; i++){
 426                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 427                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 428                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 429             }
 430             pic->motion_subsample_log2= 2;
 431         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 432             for(i=0; i<2; i++){
 433                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 434                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 435                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 436             }
 437             pic->motion_subsample_log2= 3;
 438         }
 439         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 440             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 441         }
 442         pic->qstride= s->mb_stride;
 443         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 444     }
 445
 446     //it might be nicer if the application would keep track of these but it would require a API change
 447     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 448     s->prev_pict_types[0]= s->pict_type;
 449     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 450         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 451
 452     return 0;
 453 fail: //for the CHECKED_ALLOCZ macro
 454     return -1;
 455 }
 456
 457 /**
 458  * deallocates a picture
 459  */
 460 static void free_picture(MpegEncContext *s, Picture *pic){
 461     int i;
 462
 463     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 464         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 465     }
 466
 467     av_freep(&pic->mb_var);
 468     av_freep(&pic->mc_mb_var);
 469     av_freep(&pic->mb_mean);
 470     av_freep(&pic->mbskip_table);
 471     av_freep(&pic->qscale_table);
 472     av_freep(&pic->mb_type_base);
 473     av_freep(&pic->dct_coeff);
 474     av_freep(&pic->pan_scan);
 475     pic->mb_type= NULL;
 476     for(i=0; i<2; i++){
 477         av_freep(&pic->motion_val_base[i]);
 478         av_freep(&pic->ref_index[i]);
 479     }
 480
 481     if(pic->type == FF_BUFFER_TYPE_SHARED){
 482         for(i=0; i<4; i++){
 483             pic->base[i]=
 484             pic->data[i]= NULL;
 485         }
 486         pic->type= 0;
 487     }
 488 }
 489
 490 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 491     int i;
 492
 493     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 494     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
 495     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
 496
 497      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 498     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 499     s->rd_scratchpad=   s->me.scratchpad;
 500     s->b_scratchpad=    s->me.scratchpad;
 501     s->obmc_scratchpad= s->me.scratchpad + 16;
 502     if (s->encoding) {
 503         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 504         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 505         if(s->avctx->noise_reduction){
 506             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 507         }
 508     }
 509     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 510     s->block= s->blocks[0];
 511
 512     for(i=0;i<12;i++){
 513         s->pblocks[i] = (short *)(&s->block[i]);
 514     }
 515     return 0;
 516 fail:
 517     return -1; //free() through MPV_common_end()
 518 }
 519
 520 static void free_duplicate_context(MpegEncContext *s){
 521     if(s==NULL) return;
 522
 523     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 524     av_freep(&s->me.scratchpad);
 525     s->rd_scratchpad=
 526     s->b_scratchpad=
 527     s->obmc_scratchpad= NULL;
 528
 529     av_freep(&s->dct_error_sum);
 530     av_freep(&s->me.map);
 531     av_freep(&s->me.score_map);
 532     av_freep(&s->blocks);
 533     s->block= NULL;
 534 }
 535
 536 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 537 #define COPY(a) bak->a= src->a
 538     COPY(allocated_edge_emu_buffer);
 539     COPY(edge_emu_buffer);
 540     COPY(me.scratchpad);
 541     COPY(rd_scratchpad);
 542     COPY(b_scratchpad);
 543     COPY(obmc_scratchpad);
 544     COPY(me.map);
 545     COPY(me.score_map);
 546     COPY(blocks);
 547     COPY(block);
 548     COPY(start_mb_y);
 549     COPY(end_mb_y);
 550     COPY(me.map_generation);
 551     COPY(pb);
 552     COPY(dct_error_sum);
 553     COPY(dct_count[0]);
 554     COPY(dct_count[1]);
 555 #undef COPY
 556 }
 557
 558 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 559     MpegEncContext bak;
 560     int i;
 561     //FIXME copy only needed parts
 562 //START_TIMER
 563     backup_duplicate_context(&bak, dst);
 564     memcpy(dst, src, sizeof(MpegEncContext));
 565     backup_duplicate_context(dst, &bak);
 566     for(i=0;i<12;i++){
 567         dst->pblocks[i] = (short *)(&dst->block[i]);
 568     }
 569 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 570 }
 571
 572 #ifdef CONFIG_ENCODERS
 573 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 574 #define COPY(a) dst->a= src->a
 575     COPY(pict_type);
 576     COPY(current_picture);
 577     COPY(f_code);
 578     COPY(b_code);
 579     COPY(qscale);
 580     COPY(lambda);
 581     COPY(lambda2);
 582     COPY(picture_in_gop_number);
 583     COPY(gop_picture_number);
 584     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
 585     COPY(progressive_frame); //FIXME don't set in encode_header
 586     COPY(partitioned_frame); //FIXME don't set in encode_header
 587 #undef COPY
 588 }
 589 #endif
 590
 591 /**
 592  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 593  * the changed fields will not depend upon the prior state of the MpegEncContext.
 594  */
 595 static void MPV_common_defaults(MpegEncContext *s){
 596     s->y_dc_scale_table=
 597     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 598     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 599     s->progressive_frame= 1;
 600     s->progressive_sequence= 1;
 601     s->picture_structure= PICT_FRAME;
 602
 603     s->coded_picture_number = 0;
 604     s->picture_number = 0;
 605     s->input_picture_number = 0;
 606
 607     s->picture_in_gop_number = 0;
 608
 609     s->f_code = 1;
 610     s->b_code = 1;
 611 }
 612
 613 /**
 614  * sets the given MpegEncContext to defaults for decoding.
 615  * the changed fields will not depend upon the prior state of the MpegEncContext.
 616  */
 617 void MPV_decode_defaults(MpegEncContext *s){
 618     MPV_common_defaults(s);
 619 }
 620
 621 /**
 622  * sets the given MpegEncContext to defaults for encoding.
 623  * the changed fields will not depend upon the prior state of the MpegEncContext.
 624  */
 625
 626 #ifdef CONFIG_ENCODERS
 627 static void MPV_encode_defaults(MpegEncContext *s){
 628     static int done=0;
 629
 630     MPV_common_defaults(s);
 631
 632     if(!done){
 633         int i;
 634         done=1;
 635
 636         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 637         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 638
 639         for(i=-16; i<16; i++){
 640             default_fcode_tab[i + MAX_MV]= 1;
 641         }
 642     }
 643     s->me.mv_penalty= default_mv_penalty;
 644     s->fcode_tab= default_fcode_tab;
 645 }
 646 #endif //CONFIG_ENCODERS
 647
 648 /**
 649  * init common structure for both encoder and decoder.
 650  * this assumes that some variables like width/height are already set
 651  */
 652 int MPV_common_init(MpegEncContext *s)
 653 {
 654     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 655
 656     s->mb_height = (s->height + 15) / 16;
 657
 658     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
 659         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 660         return -1;
 661     }
 662
 663     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 664         return -1;
 665
 666     dsputil_init(&s->dsp, s->avctx);
 667     DCT_common_init(s);
 668
 669     s->flags= s->avctx->flags;
 670     s->flags2= s->avctx->flags2;
 671
 672     s->mb_width  = (s->width  + 15) / 16;
 673     s->mb_stride = s->mb_width + 1;
 674     s->b8_stride = s->mb_width*2 + 1;
 675     s->b4_stride = s->mb_width*4 + 1;
 676     mb_array_size= s->mb_height * s->mb_stride;
 677     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 678
 679     /* set chroma shifts */
 680     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 681                                                     &(s->chroma_y_shift) );
 682
 683     /* set default edge pos, will be overriden in decode_header if needed */
 684     s->h_edge_pos= s->mb_width*16;
 685     s->v_edge_pos= s->mb_height*16;
 686
 687     s->mb_num = s->mb_width * s->mb_height;
 688
 689     s->block_wrap[0]=
 690     s->block_wrap[1]=
 691     s->block_wrap[2]=
 692     s->block_wrap[3]= s->b8_stride;
 693     s->block_wrap[4]=
 694     s->block_wrap[5]= s->mb_stride;
 695
 696     y_size = s->b8_stride * (2 * s->mb_height + 1);
 697     c_size = s->mb_stride * (s->mb_height + 1);
 698     yc_size = y_size + 2 * c_size;
 699
 700     /* convert fourcc to upper case */
 701     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
 702                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 703                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 704                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 705
 706     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
 707                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 708                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 709                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 710
 711     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 712
 713     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 714     for(y=0; y<s->mb_height; y++){
 715         for(x=0; x<s->mb_width; x++){
 716             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 717         }
 718     }
 719     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 720
 721     if (s->encoding) {
 722         /* Allocate MV tables */
 723         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 724         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 725         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 726         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 727         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 728         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 729         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 730         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 731         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 732         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 733         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 734         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 735
 736         if(s->msmpeg4_version){
 737             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 738         }
 739         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 740
 741         /* Allocate MB type table */
 742         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 743
 744         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 745
 746         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 747         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 748         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 749         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 750         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 751         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 752
 753         if(s->avctx->noise_reduction){
 754             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 755         }
 756     }
 757     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 758
 759     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 760
 761     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 762         /* interlaced direct mode decoding tables */
 763             for(i=0; i<2; i++){
 764                 int j, k;
 765                 for(j=0; j<2; j++){
 766                     for(k=0; k<2; k++){
 767                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 768                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 769                     }
 770                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 771                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 772                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 773                 }
 774                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 775             }
 776     }
 777     if (s->out_format == FMT_H263) {
 778         /* ac values */
 779         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 780         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 781         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 782         s->ac_val[2] = s->ac_val[1] + c_size;
 783
 784         /* cbp values */
 785         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 786         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 787
 788         /* cbp, ac_pred, pred_dir */
 789         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 790         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 791     }
 792
 793     if (s->h263_pred || s->h263_plus || !s->encoding) {
 794         /* dc values */
 795         //MN: we need these for error resilience of intra-frames
 796         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 797         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 798         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 799         s->dc_val[2] = s->dc_val[1] + c_size;
 800         for(i=0;i<yc_size;i++)
 801             s->dc_val_base[i] = 1024;
 802     }
 803
 804     /* which mb is a intra block */
 805     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 806     memset(s->mbintra_table, 1, mb_array_size);
 807
 808     /* init macroblock skip table */
 809     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 810     //Note the +1 is for a quicker mpeg4 slice_end detection
 811     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 812
 813     s->parse_context.state= -1;
 814     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 815        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 816        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 817        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 818     }
 819
 820     s->context_initialized = 1;
 821
 822     s->thread_context[0]= s;
 823     for(i=1; i<s->avctx->thread_count; i++){
 824         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 825         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 826     }
 827
 828     for(i=0; i<s->avctx->thread_count; i++){
 829         if(init_duplicate_context(s->thread_context[i], s) < 0)
 830            goto fail;
 831         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 832         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 833     }
 834
 835     return 0;
 836  fail:
 837     MPV_common_end(s);
 838     return -1;
 839 }
 840
 841 /* init common structure for both encoder and decoder */
 842 void MPV_common_end(MpegEncContext *s)
 843 {
 844     int i, j, k;
 845
 846     for(i=0; i<s->avctx->thread_count; i++){
 847         free_duplicate_context(s->thread_context[i]);
 848     }
 849     for(i=1; i<s->avctx->thread_count; i++){
 850         av_freep(&s->thread_context[i]);
 851     }
 852
 853     av_freep(&s->parse_context.buffer);
 854     s->parse_context.buffer_size=0;
 855
 856     av_freep(&s->mb_type);
 857     av_freep(&s->p_mv_table_base);
 858     av_freep(&s->b_forw_mv_table_base);
 859     av_freep(&s->b_back_mv_table_base);
 860     av_freep(&s->b_bidir_forw_mv_table_base);
 861     av_freep(&s->b_bidir_back_mv_table_base);
 862     av_freep(&s->b_direct_mv_table_base);
 863     s->p_mv_table= NULL;
 864     s->b_forw_mv_table= NULL;
 865     s->b_back_mv_table= NULL;
 866     s->b_bidir_forw_mv_table= NULL;
 867     s->b_bidir_back_mv_table= NULL;
 868     s->b_direct_mv_table= NULL;
 869     for(i=0; i<2; i++){
 870         for(j=0; j<2; j++){
 871             for(k=0; k<2; k++){
 872                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 873                 s->b_field_mv_table[i][j][k]=NULL;
 874             }
 875             av_freep(&s->b_field_select_table[i][j]);
 876             av_freep(&s->p_field_mv_table_base[i][j]);
 877             s->p_field_mv_table[i][j]=NULL;
 878         }
 879         av_freep(&s->p_field_select_table[i]);
 880     }
 881
 882     av_freep(&s->dc_val_base);
 883     av_freep(&s->ac_val_base);
 884     av_freep(&s->coded_block_base);
 885     av_freep(&s->mbintra_table);
 886     av_freep(&s->cbp_table);
 887     av_freep(&s->pred_dir_table);
 888
 889     av_freep(&s->mbskip_table);
 890     av_freep(&s->prev_pict_types);
 891     av_freep(&s->bitstream_buffer);
 892     s->allocated_bitstream_buffer_size=0;
 893
 894     av_freep(&s->avctx->stats_out);
 895     av_freep(&s->ac_stats);
 896     av_freep(&s->error_status_table);
 897     av_freep(&s->mb_index2xy);
 898     av_freep(&s->lambda_table);
 899     av_freep(&s->q_intra_matrix);
 900     av_freep(&s->q_inter_matrix);
 901     av_freep(&s->q_intra_matrix16);
 902     av_freep(&s->q_inter_matrix16);
 903     av_freep(&s->input_picture);
 904     av_freep(&s->reordered_input_picture);
 905     av_freep(&s->dct_offset);
 906
 907     if(s->picture){
 908         for(i=0; i<MAX_PICTURE_COUNT; i++){
 909             free_picture(s, &s->picture[i]);
 910         }
 911     }
 912     av_freep(&s->picture);
 913     s->context_initialized = 0;
 914     s->last_picture_ptr=
 915     s->next_picture_ptr=
 916     s->current_picture_ptr= NULL;
 917     s->linesize= s->uvlinesize= 0;
 918
 919     for(i=0; i<3; i++)
 920         av_freep(&s->visualization_buffer[i]);
 921
 922     avcodec_default_free_buffers(s->avctx);
 923 }
 924
 925 #ifdef CONFIG_ENCODERS
 926
 927 /* init video encoder */
 928 int MPV_encode_init(AVCodecContext *avctx)
 929 {
 930     MpegEncContext *s = avctx->priv_data;
 931     int i;
 932     int chroma_h_shift, chroma_v_shift;
 933
 934     MPV_encode_defaults(s);
 935
 936     switch (avctx->codec_id) {
 937     case CODEC_ID_MPEG2VIDEO:
 938         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
 939             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
 940             return -1;
 941         }
 942         break;
 943     case CODEC_ID_LJPEG:
 944     case CODEC_ID_MJPEG:
 945         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 946            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
 947             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 948             return -1;
 949         }
 950         break;
 951     default:
 952         if(avctx->pix_fmt != PIX_FMT_YUV420P){
 953             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 954             return -1;
 955         }
 956     }
 957
 958     switch (avctx->pix_fmt) {
 959     case PIX_FMT_YUVJ422P:
 960     case PIX_FMT_YUV422P:
 961         s->chroma_format = CHROMA_422;
 962         break;
 963     case PIX_FMT_YUVJ420P:
 964     case PIX_FMT_YUV420P:
 965     default:
 966         s->chroma_format = CHROMA_420;
 967         break;
 968     }
 969
 970     s->bit_rate = avctx->bit_rate;
 971     s->width = avctx->width;
 972     s->height = avctx->height;
 973     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
 974         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 975         avctx->gop_size=600;
 976     }
 977     s->gop_size = avctx->gop_size;
 978     s->avctx = avctx;
 979     s->flags= avctx->flags;
 980     s->flags2= avctx->flags2;
 981     s->max_b_frames= avctx->max_b_frames;
 982     s->codec_id= avctx->codec->id;
 983     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 984     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 985     s->strict_std_compliance= avctx->strict_std_compliance;
 986     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 987     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 988     s->mpeg_quant= avctx->mpeg_quant;
 989     s->rtp_mode= !!avctx->rtp_payload_size;
 990     s->intra_dc_precision= avctx->intra_dc_precision;
 991     s->user_specified_pts = AV_NOPTS_VALUE;
 992
 993     if (s->gop_size <= 1) {
 994         s->intra_only = 1;
 995         s->gop_size = 12;
 996     } else {
 997         s->intra_only = 0;
 998     }
 999
1000     s->me_method = avctx->me_method;
1001
1002     /* Fixed QSCALE */
1003     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
1004
1005     s->adaptive_quant= (   s->avctx->lumi_masking
1006                         || s->avctx->dark_masking
1007                         || s->avctx->temporal_cplx_masking
1008                         || s->avctx->spatial_cplx_masking
1009                         || s->avctx->p_masking
1010                         || s->avctx->border_masking
1011                         || (s->flags&CODEC_FLAG_QP_RD))
1012                        && !s->fixed_qscale;
1013
1014     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1015     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1016     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1017     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1018
1019     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1020         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1021         return -1;
1022     }
1023
1024     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1025         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1026     }
1027
1028     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1029         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1030         return -1;
1031     }
1032
1033     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1034         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1035         return -1;
1036     }
1037
1038     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1039        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1040        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1041
1042         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1043     }
1044
1045     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1046        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1047         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1048         return -1;
1049     }
1050
1051     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1052         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1053         return -1;
1054     }
1055
1056     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1057         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1058         return -1;
1059     }
1060
1061     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1062         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1063         return -1;
1064     }
1065
1066     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1067         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1068         return -1;
1069     }
1070
1071     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1072         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1073         return -1;
1074     }
1075
1076     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1077        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1078         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1079         return -1;
1080     }
1081
1082     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1083         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1084         return -1;
1085     }
1086
1087     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1088         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1089         return -1;
1090     }
1091
1092     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1093         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1094         return -1;
1095     }
1096
1097     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1098         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1099         return -1;
1100     }
1101
1102     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1103         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1104         return -1;
1105     }
1106
1107     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1108        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1109        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1110         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1111         return -1;
1112     }
1113
1114     if(s->avctx->thread_count > 1)
1115         s->rtp_mode= 1;
1116
1117     if(!avctx->time_base.den || !avctx->time_base.num){
1118         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1119         return -1;
1120     }
1121
1122     i= (INT_MAX/2+128)>>8;
1123     if(avctx->me_threshold >= i){
1124         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1125         return -1;
1126     }
1127     if(avctx->mb_threshold >= i){
1128         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1129         return -1;
1130     }
1131
1132     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1133         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1134         avctx->b_frame_strategy = 0;
1135     }
1136
1137     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1138     if(i > 1){
1139         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1140         avctx->time_base.den /= i;
1141         avctx->time_base.num /= i;
1142 //        return -1;
1143     }
1144
1145     if(s->codec_id==CODEC_ID_MJPEG){
1146         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1147         s->inter_quant_bias= 0;
1148     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1149         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1150         s->inter_quant_bias= 0;
1151     }else{
1152         s->intra_quant_bias=0;
1153         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1154     }
1155
1156     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1157         s->intra_quant_bias= avctx->intra_quant_bias;
1158     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1159         s->inter_quant_bias= avctx->inter_quant_bias;
1160
1161     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1162
1163     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1164         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1165         return -1;
1166     }
1167     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1168
1169     switch(avctx->codec->id) {
1170     case CODEC_ID_MPEG1VIDEO:
1171         s->out_format = FMT_MPEG1;
1172         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1173         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1174         break;
1175     case CODEC_ID_MPEG2VIDEO:
1176         s->out_format = FMT_MPEG1;
1177         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1178         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1179         s->rtp_mode= 1;
1180         break;
1181     case CODEC_ID_LJPEG:
1182     case CODEC_ID_JPEGLS:
1183     case CODEC_ID_MJPEG:
1184         s->out_format = FMT_MJPEG;
1185         s->intra_only = 1; /* force intra only for jpeg */
1186         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1187         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1188         s->mjpeg_vsample[0] = 2;
1189         s->mjpeg_vsample[1] = 2>>chroma_v_shift;
1190         s->mjpeg_vsample[2] = 2>>chroma_v_shift;
1191         s->mjpeg_hsample[0] = 2;
1192         s->mjpeg_hsample[1] = 2>>chroma_h_shift;
1193         s->mjpeg_hsample[2] = 2>>chroma_h_shift;
1194         if (mjpeg_init(s) < 0)
1195             return -1;
1196         avctx->delay=0;
1197         s->low_delay=1;
1198         break;
1199     case CODEC_ID_H261:
1200         s->out_format = FMT_H261;
1201         avctx->delay=0;
1202         s->low_delay=1;
1203         break;
1204     case CODEC_ID_H263:
1205         if (h263_get_picture_format(s->width, s->height) == 7) {
1206             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1207             return -1;
1208         }
1209         s->out_format = FMT_H263;
1210         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1211         avctx->delay=0;
1212         s->low_delay=1;
1213         break;
1214     case CODEC_ID_H263P:
1215         s->out_format = FMT_H263;
1216         s->h263_plus = 1;
1217         /* Fx */
1218         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1219         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1220         s->modified_quant= s->h263_aic;
1221         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1222         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1223         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1224         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1225         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1226
1227         /* /Fx */
1228         /* These are just to be sure */
1229         avctx->delay=0;
1230         s->low_delay=1;
1231         break;
1232     case CODEC_ID_FLV1:
1233         s->out_format = FMT_H263;
1234         s->h263_flv = 2; /* format = 1; 11-bit codes */
1235         s->unrestricted_mv = 1;
1236         s->rtp_mode=0; /* don't allow GOB */
1237         avctx->delay=0;
1238         s->low_delay=1;
1239         break;
1240     case CODEC_ID_RV10:
1241         s->out_format = FMT_H263;
1242         avctx->delay=0;
1243         s->low_delay=1;
1244         break;
1245     case CODEC_ID_RV20:
1246         s->out_format = FMT_H263;
1247         avctx->delay=0;
1248         s->low_delay=1;
1249         s->modified_quant=1;
1250         s->h263_aic=1;
1251         s->h263_plus=1;
1252         s->loop_filter=1;
1253         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1254         break;
1255     case CODEC_ID_MPEG4:
1256         s->out_format = FMT_H263;
1257         s->h263_pred = 1;
1258         s->unrestricted_mv = 1;
1259         s->low_delay= s->max_b_frames ? 0 : 1;
1260         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1261         break;
1262     case CODEC_ID_MSMPEG4V1:
1263         s->out_format = FMT_H263;
1264         s->h263_msmpeg4 = 1;
1265         s->h263_pred = 1;
1266         s->unrestricted_mv = 1;
1267         s->msmpeg4_version= 1;
1268         avctx->delay=0;
1269         s->low_delay=1;
1270         break;
1271     case CODEC_ID_MSMPEG4V2:
1272         s->out_format = FMT_H263;
1273         s->h263_msmpeg4 = 1;
1274         s->h263_pred = 1;
1275         s->unrestricted_mv = 1;
1276         s->msmpeg4_version= 2;
1277         avctx->delay=0;
1278         s->low_delay=1;
1279         break;
1280     case CODEC_ID_MSMPEG4V3:
1281         s->out_format = FMT_H263;
1282         s->h263_msmpeg4 = 1;
1283         s->h263_pred = 1;
1284         s->unrestricted_mv = 1;
1285         s->msmpeg4_version= 3;
1286         s->flipflop_rounding=1;
1287         avctx->delay=0;
1288         s->low_delay=1;
1289         break;
1290     case CODEC_ID_WMV1:
1291         s->out_format = FMT_H263;
1292         s->h263_msmpeg4 = 1;
1293         s->h263_pred = 1;
1294         s->unrestricted_mv = 1;
1295         s->msmpeg4_version= 4;
1296         s->flipflop_rounding=1;
1297         avctx->delay=0;
1298         s->low_delay=1;
1299         break;
1300     case CODEC_ID_WMV2:
1301         s->out_format = FMT_H263;
1302         s->h263_msmpeg4 = 1;
1303         s->h263_pred = 1;
1304         s->unrestricted_mv = 1;
1305         s->msmpeg4_version= 5;
1306         s->flipflop_rounding=1;
1307         avctx->delay=0;
1308         s->low_delay=1;
1309         break;
1310     default:
1311         return -1;
1312     }
1313
1314     avctx->has_b_frames= !s->low_delay;
1315
1316     s->encoding = 1;
1317
1318     /* init */
1319     if (MPV_common_init(s) < 0)
1320         return -1;
1321
1322     if(s->modified_quant)
1323         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1324     s->progressive_frame=
1325     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN));
1326     s->quant_precision=5;
1327
1328     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1329     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1330
1331 #ifdef CONFIG_H261_ENCODER
1332     if (s->out_format == FMT_H261)
1333         ff_h261_encode_init(s);
1334 #endif
1335     if (s->out_format == FMT_H263)
1336         h263_encode_init(s);
1337     if(s->msmpeg4_version)
1338         ff_msmpeg4_encode_init(s);
1339     if (s->out_format == FMT_MPEG1)
1340         ff_mpeg1_encode_init(s);
1341
1342     /* init q matrix */
1343     for(i=0;i<64;i++) {
1344         int j= s->dsp.idct_permutation[i];
1345         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1346             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1347             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1348         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1349             s->intra_matrix[j] =
1350             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1351         }else
1352         { /* mpeg1/2 */
1353             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1354             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1355         }
1356         if(s->avctx->intra_matrix)
1357             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1358         if(s->avctx->inter_matrix)
1359             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1360     }
1361
1362     /* precompute matrix */
1363     /* for mjpeg, we do include qscale in the matrix */
1364     if (s->out_format != FMT_MJPEG) {
1365         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1366                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1367         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1368                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1369     }
1370
1371     if(ff_rate_control_init(s) < 0)
1372         return -1;
1373
1374     return 0;
1375 }
1376
1377 int MPV_encode_end(AVCodecContext *avctx)
1378 {
1379     MpegEncContext *s = avctx->priv_data;
1380
1381     ff_rate_control_uninit(s);
1382
1383     MPV_common_end(s);
1384     if (s->out_format == FMT_MJPEG)
1385         mjpeg_close(s);
1386
1387     av_freep(&avctx->extradata);
1388
1389     return 0;
1390 }
1391
1392 #endif //CONFIG_ENCODERS
1393
1394 void init_rl(RLTable *rl, int use_static)
1395 {
1396     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1397     uint8_t index_run[MAX_RUN+1];
1398     int last, run, level, start, end, i;
1399
1400     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1401     if(use_static && rl->max_level[0])
1402         return;
1403
1404     /* compute max_level[], max_run[] and index_run[] */
1405     for(last=0;last<2;last++) {
1406         if (last == 0) {
1407             start = 0;
1408             end = rl->last;
1409         } else {
1410             start = rl->last;
1411             end = rl->n;
1412         }
1413
1414         memset(max_level, 0, MAX_RUN + 1);
1415         memset(max_run, 0, MAX_LEVEL + 1);
1416         memset(index_run, rl->n, MAX_RUN + 1);
1417         for(i=start;i<end;i++) {
1418             run = rl->table_run[i];
1419             level = rl->table_level[i];
1420             if (index_run[run] == rl->n)
1421                 index_run[run] = i;
1422             if (level > max_level[run])
1423                 max_level[run] = level;
1424             if (run > max_run[level])
1425                 max_run[level] = run;
1426         }
1427         if(use_static)
1428             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1429         else
1430             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1431         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1432         if(use_static)
1433             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1434         else
1435             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1436         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1437         if(use_static)
1438             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1439         else
1440             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1441         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1442     }
1443 }
1444
1445 /* draw the edges of width 'w' of an image of size width, height */
1446 //FIXME check that this is ok for mpeg4 interlaced
1447 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1448 {
1449     uint8_t *ptr, *last_line;
1450     int i;
1451
1452     last_line = buf + (height - 1) * wrap;
1453     for(i=0;i<w;i++) {
1454         /* top and bottom */
1455         memcpy(buf - (i + 1) * wrap, buf, width);
1456         memcpy(last_line + (i + 1) * wrap, last_line, width);
1457     }
1458     /* left and right */
1459     ptr = buf;
1460     for(i=0;i<height;i++) {
1461         memset(ptr - w, ptr[0], w);
1462         memset(ptr + width, ptr[width-1], w);
1463         ptr += wrap;
1464     }
1465     /* corners */
1466     for(i=0;i<w;i++) {
1467         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1468         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1469         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1470         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1471     }
1472 }
1473
1474 int ff_find_unused_picture(MpegEncContext *s, int shared){
1475     int i;
1476
1477     if(shared){
1478         for(i=0; i<MAX_PICTURE_COUNT; i++){
1479             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1480         }
1481     }else{
1482         for(i=0; i<MAX_PICTURE_COUNT; i++){
1483             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1484         }
1485         for(i=0; i<MAX_PICTURE_COUNT; i++){
1486             if(s->picture[i].data[0]==NULL) return i;
1487         }
1488     }
1489
1490     assert(0);
1491     return -1;
1492 }
1493
1494 static void update_noise_reduction(MpegEncContext *s){
1495     int intra, i;
1496
1497     for(intra=0; intra<2; intra++){
1498         if(s->dct_count[intra] > (1<<16)){
1499             for(i=0; i<64; i++){
1500                 s->dct_error_sum[intra][i] >>=1;
1501             }
1502             s->dct_count[intra] >>= 1;
1503         }
1504
1505         for(i=0; i<64; i++){
1506             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1507         }
1508     }
1509 }
1510
1511 /**
1512  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1513  */
1514 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1515 {
1516     int i;
1517     AVFrame *pic;
1518     s->mb_skipped = 0;
1519
1520     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1521
1522     /* mark&release old frames */
1523     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1524         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1525
1526         /* release forgotten pictures */
1527         /* if(mpeg124/h263) */
1528         if(!s->encoding){
1529             for(i=0; i<MAX_PICTURE_COUNT; i++){
1530                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1531                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1532                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1533                 }
1534             }
1535         }
1536     }
1537 alloc:
1538     if(!s->encoding){
1539         /* release non reference frames */
1540         for(i=0; i<MAX_PICTURE_COUNT; i++){
1541             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1542                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1543             }
1544         }
1545
1546         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1547             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1548         else{
1549             i= ff_find_unused_picture(s, 0);
1550             pic= (AVFrame*)&s->picture[i];
1551         }
1552
1553         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1554                         && !s->dropable ? 3 : 0;
1555
1556         pic->coded_picture_number= s->coded_picture_number++;
1557
1558         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1559             return -1;
1560
1561         s->current_picture_ptr= (Picture*)pic;
1562         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1563         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1564     }
1565
1566     s->current_picture_ptr->pict_type= s->pict_type;
1567 //    if(s->flags && CODEC_FLAG_QSCALE)
1568   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1569     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1570
1571     copy_picture(&s->current_picture, s->current_picture_ptr);
1572
1573   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1574     if (s->pict_type != B_TYPE) {
1575         s->last_picture_ptr= s->next_picture_ptr;
1576         if(!s->dropable)
1577             s->next_picture_ptr= s->current_picture_ptr;
1578     }
1579 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1580         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1581         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1582         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1583         s->pict_type, s->dropable);*/
1584
1585     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1586     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1587
1588     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1589         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1590         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1591         goto alloc;
1592     }
1593
1594     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1595
1596     if(s->picture_structure!=PICT_FRAME){
1597         int i;
1598         for(i=0; i<4; i++){
1599             if(s->picture_structure == PICT_BOTTOM_FIELD){
1600                  s->current_picture.data[i] += s->current_picture.linesize[i];
1601             }
1602             s->current_picture.linesize[i] *= 2;
1603             s->last_picture.linesize[i] *=2;
1604             s->next_picture.linesize[i] *=2;
1605         }
1606     }
1607   }
1608
1609     s->hurry_up= s->avctx->hurry_up;
1610     s->error_resilience= avctx->error_resilience;
1611
1612     /* set dequantizer, we can't do it during init as it might change for mpeg4
1613        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1614     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1615         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1616         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1617     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1618         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1619         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1620     }else{
1621         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1622         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1623     }
1624
1625     if(s->dct_error_sum){
1626         assert(s->avctx->noise_reduction && s->encoding);
1627
1628         update_noise_reduction(s);
1629     }
1630
1631 #ifdef HAVE_XVMC
1632     if(s->avctx->xvmc_acceleration)
1633         return XVMC_field_start(s, avctx);
1634 #endif
1635     return 0;
1636 }
1637
1638 /* generic function for encode/decode called after a frame has been coded/decoded */
1639 void MPV_frame_end(MpegEncContext *s)
1640 {
1641     int i;
1642     /* draw edge for correct motion prediction if outside */
1643 #ifdef HAVE_XVMC
1644 //just to make sure that all data is rendered.
1645     if(s->avctx->xvmc_acceleration){
1646         XVMC_field_end(s);
1647     }else
1648 #endif
1649     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1650             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1651             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1652             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1653     }
1654     emms_c();
1655
1656     s->last_pict_type    = s->pict_type;
1657     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1658     if(s->pict_type!=B_TYPE){
1659         s->last_non_b_pict_type= s->pict_type;
1660     }
1661 #if 0
1662         /* copy back current_picture variables */
1663     for(i=0; i<MAX_PICTURE_COUNT; i++){
1664         if(s->picture[i].data[0] == s->current_picture.data[0]){
1665             s->picture[i]= s->current_picture;
1666             break;
1667         }
1668     }
1669     assert(i<MAX_PICTURE_COUNT);
1670 #endif
1671
1672     if(s->encoding){
1673         /* release non-reference frames */
1674         for(i=0; i<MAX_PICTURE_COUNT; i++){
1675             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1676                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1677             }
1678         }
1679     }
1680     // clear copies, to avoid confusion
1681 #if 0
1682     memset(&s->last_picture, 0, sizeof(Picture));
1683     memset(&s->next_picture, 0, sizeof(Picture));
1684     memset(&s->current_picture, 0, sizeof(Picture));
1685 #endif
1686     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1687 }
1688
1689 /**
1690  * draws an line from (ex, ey) -> (sx, sy).
1691  * @param w width of the image
1692  * @param h height of the image
1693  * @param stride stride/linesize of the image
1694  * @param color color of the arrow
1695  */
1696 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1697     int x, y, fr, f;
1698
1699     sx= clip(sx, 0, w-1);
1700     sy= clip(sy, 0, h-1);
1701     ex= clip(ex, 0, w-1);
1702     ey= clip(ey, 0, h-1);
1703
1704     buf[sy*stride + sx]+= color;
1705
1706     if(FFABS(ex - sx) > FFABS(ey - sy)){
1707         if(sx > ex){
1708             FFSWAP(int, sx, ex);
1709             FFSWAP(int, sy, ey);
1710         }
1711         buf+= sx + sy*stride;
1712         ex-= sx;
1713         f= ((ey-sy)<<16)/ex;
1714         for(x= 0; x <= ex; x++){
1715             y = (x*f)>>16;
1716             fr= (x*f)&0xFFFF;
1717             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1718             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1719         }
1720     }else{
1721         if(sy > ey){
1722             FFSWAP(int, sx, ex);
1723             FFSWAP(int, sy, ey);
1724         }
1725         buf+= sx + sy*stride;
1726         ey-= sy;
1727         if(ey) f= ((ex-sx)<<16)/ey;
1728         else   f= 0;
1729         for(y= 0; y <= ey; y++){
1730             x = (y*f)>>16;
1731             fr= (y*f)&0xFFFF;
1732             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1733             buf[y*stride + x+1]+= (color*         fr )>>16;;
1734         }
1735     }
1736 }
1737
1738 /**
1739  * draws an arrow from (ex, ey) -> (sx, sy).
1740  * @param w width of the image
1741  * @param h height of the image
1742  * @param stride stride/linesize of the image
1743  * @param color color of the arrow
1744  */
1745 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1746     int dx,dy;
1747
1748     sx= clip(sx, -100, w+100);
1749     sy= clip(sy, -100, h+100);
1750     ex= clip(ex, -100, w+100);
1751     ey= clip(ey, -100, h+100);
1752
1753     dx= ex - sx;
1754     dy= ey - sy;
1755
1756     if(dx*dx + dy*dy > 3*3){
1757         int rx=  dx + dy;
1758         int ry= -dx + dy;
1759         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1760
1761         //FIXME subpixel accuracy
1762         rx= ROUNDED_DIV(rx*3<<4, length);
1763         ry= ROUNDED_DIV(ry*3<<4, length);
1764
1765         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1766         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1767     }
1768     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1769 }
1770
1771 /**
1772  * prints debuging info for the given picture.
1773  */
1774 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1775
1776     if(!pict || !pict->mb_type) return;
1777
1778     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1779         int x,y;
1780
1781         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1782         switch (pict->pict_type) {
1783             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1784             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1785             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1786             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1787             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1788             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1789         }
1790         for(y=0; y<s->mb_height; y++){
1791             for(x=0; x<s->mb_width; x++){
1792                 if(s->avctx->debug&FF_DEBUG_SKIP){
1793                     int count= s->mbskip_table[x + y*s->mb_stride];
1794                     if(count>9) count=9;
1795                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1796                 }
1797                 if(s->avctx->debug&FF_DEBUG_QP){
1798                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1799                 }
1800                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1801                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1802                     //Type & MV direction
1803                     if(IS_PCM(mb_type))
1804                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1805                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1806                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1807                     else if(IS_INTRA4x4(mb_type))
1808                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1809                     else if(IS_INTRA16x16(mb_type))
1810                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1811                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1812                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1813                     else if(IS_DIRECT(mb_type))
1814                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1815                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1816                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1817                     else if(IS_GMC(mb_type))
1818                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1819                     else if(IS_SKIP(mb_type))
1820                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1821                     else if(!USES_LIST(mb_type, 1))
1822                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1823                     else if(!USES_LIST(mb_type, 0))
1824                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1825                     else{
1826                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1827                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1828                     }
1829
1830                     //segmentation
1831                     if(IS_8X8(mb_type))
1832                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1833                     else if(IS_16X8(mb_type))
1834                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1835                     else if(IS_8X16(mb_type))
1836                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1837                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1838                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1839                     else
1840                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1841
1842
1843                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1844                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1845                     else
1846                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1847                 }
1848 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1849             }
1850             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1851         }
1852     }
1853
1854     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1855         const int shift= 1 + s->quarter_sample;
1856         int mb_y;
1857         uint8_t *ptr;
1858         int i;
1859         int h_chroma_shift, v_chroma_shift;
1860         const int width = s->avctx->width;
1861         const int height= s->avctx->height;
1862         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1863         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1864         s->low_delay=0; //needed to see the vectors without trashing the buffers
1865
1866         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1867         for(i=0; i<3; i++){
1868             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1869             pict->data[i]= s->visualization_buffer[i];
1870         }
1871         pict->type= FF_BUFFER_TYPE_COPY;
1872         ptr= pict->data[0];
1873
1874         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1875             int mb_x;
1876             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1877                 const int mb_index= mb_x + mb_y*s->mb_stride;
1878                 if((s->avctx->debug_mv) && pict->motion_val){
1879                   int type;
1880                   for(type=0; type<3; type++){
1881                     int direction = 0;
1882                     switch (type) {
1883                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1884                                 continue;
1885                               direction = 0;
1886                               break;
1887                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1888                                 continue;
1889                               direction = 0;
1890                               break;
1891                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1892                                 continue;
1893                               direction = 1;
1894                               break;
1895                     }
1896                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1897                         continue;
1898
1899                     if(IS_8X8(pict->mb_type[mb_index])){
1900                       int i;
1901                       for(i=0; i<4; i++){
1902                         int sx= mb_x*16 + 4 + 8*(i&1);
1903                         int sy= mb_y*16 + 4 + 8*(i>>1);
1904                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1905                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1906                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1907                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1908                       }
1909                     }else if(IS_16X8(pict->mb_type[mb_index])){
1910                       int i;
1911                       for(i=0; i<2; i++){
1912                         int sx=mb_x*16 + 8;
1913                         int sy=mb_y*16 + 4 + 8*i;
1914                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1915                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1916                         int my=(pict->motion_val[direction][xy][1]>>shift);
1917
1918                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1919                             my*=2;
1920
1921                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1922                       }
1923                     }else if(IS_8X16(pict->mb_type[mb_index])){
1924                       int i;
1925                       for(i=0; i<2; i++){
1926                         int sx=mb_x*16 + 4 + 8*i;
1927                         int sy=mb_y*16 + 8;
1928                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1929                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1930                         int my=(pict->motion_val[direction][xy][1]>>shift);
1931
1932                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1933                             my*=2;
1934
1935                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1936                       }
1937                     }else{
1938                       int sx= mb_x*16 + 8;
1939                       int sy= mb_y*16 + 8;
1940                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1941                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1942                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1943                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1944                     }
1945                   }
1946                 }
1947                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1948                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1949                     int y;
1950                     for(y=0; y<8; y++){
1951                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1952                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1953                     }
1954                 }
1955                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1956                     int mb_type= pict->mb_type[mb_index];
1957                     uint64_t u,v;
1958                     int y;
1959 #define COLOR(theta, r)\
1960 u= (int)(128 + r*cos(theta*3.141592/180));\
1961 v= (int)(128 + r*sin(theta*3.141592/180));
1962
1963
1964                     u=v=128;
1965                     if(IS_PCM(mb_type)){
1966                         COLOR(120,48)
1967                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1968                         COLOR(30,48)
1969                     }else if(IS_INTRA4x4(mb_type)){
1970                         COLOR(90,48)
1971                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1972 //                        COLOR(120,48)
1973                     }else if(IS_DIRECT(mb_type)){
1974                         COLOR(150,48)
1975                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1976                         COLOR(170,48)
1977                     }else if(IS_GMC(mb_type)){
1978                         COLOR(190,48)
1979                     }else if(IS_SKIP(mb_type)){
1980 //                        COLOR(180,48)
1981                     }else if(!USES_LIST(mb_type, 1)){
1982                         COLOR(240,48)
1983                     }else if(!USES_LIST(mb_type, 0)){
1984                         COLOR(0,48)
1985                     }else{
1986                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1987                         COLOR(300,48)
1988                     }
1989
1990                     u*= 0x0101010101010101ULL;
1991                     v*= 0x0101010101010101ULL;
1992                     for(y=0; y<8; y++){
1993                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1994                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1995                     }
1996
1997                     //segmentation
1998                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1999                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2000                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2001                     }
2002                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
2003                         for(y=0; y<16; y++)
2004                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
2005                     }
2006                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2007                         int dm= 1 << (mv_sample_log2-2);
2008                         for(i=0; i<4; i++){
2009                             int sx= mb_x*16 + 8*(i&1);
2010                             int sy= mb_y*16 + 8*(i>>1);
2011                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2012                             //FIXME bidir
2013                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2014                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2015                                 for(y=0; y<8; y++)
2016                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2017                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2018                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2019                         }
2020                     }
2021
2022                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2023                         // hmm
2024                     }
2025                 }
2026                 s->mbskip_table[mb_index]=0;
2027             }
2028         }
2029     }
2030 }
2031
2032 #ifdef CONFIG_ENCODERS
2033
2034 static int get_sae(uint8_t *src, int ref, int stride){
2035     int x,y;
2036     int acc=0;
2037
2038     for(y=0; y<16; y++){
2039         for(x=0; x<16; x++){
2040             acc+= FFABS(src[x+y*stride] - ref);
2041         }
2042     }
2043
2044     return acc;
2045 }
2046
2047 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2048     int x, y, w, h;
2049     int acc=0;
2050
2051     w= s->width &~15;
2052     h= s->height&~15;
2053
2054     for(y=0; y<h; y+=16){
2055         for(x=0; x<w; x+=16){
2056             int offset= x + y*stride;
2057             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2058             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2059             int sae = get_sae(src + offset, mean, stride);
2060
2061             acc+= sae + 500 < sad;
2062         }
2063     }
2064     return acc;
2065 }
2066
2067
2068 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2069     AVFrame *pic=NULL;
2070     int64_t pts;
2071     int i;
2072     const int encoding_delay= s->max_b_frames;
2073     int direct=1;
2074
2075     if(pic_arg){
2076         pts= pic_arg->pts;
2077         pic_arg->display_picture_number= s->input_picture_number++;
2078
2079         if(pts != AV_NOPTS_VALUE){
2080             if(s->user_specified_pts != AV_NOPTS_VALUE){
2081                 int64_t time= pts;
2082                 int64_t last= s->user_specified_pts;
2083
2084                 if(time <= last){
2085                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2086                     return -1;
2087                 }
2088             }
2089             s->user_specified_pts= pts;
2090         }else{
2091             if(s->user_specified_pts != AV_NOPTS_VALUE){
2092                 s->user_specified_pts=
2093                 pts= s->user_specified_pts + 1;
2094                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2095             }else{
2096                 pts= pic_arg->display_picture_number;
2097             }
2098         }
2099     }
2100
2101   if(pic_arg){
2102     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2103     if(pic_arg->linesize[0] != s->linesize) direct=0;
2104     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2105     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2106
2107 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2108
2109     if(direct){
2110         i= ff_find_unused_picture(s, 1);
2111
2112         pic= (AVFrame*)&s->picture[i];
2113         pic->reference= 3;
2114
2115         for(i=0; i<4; i++){
2116             pic->data[i]= pic_arg->data[i];
2117             pic->linesize[i]= pic_arg->linesize[i];
2118         }
2119         alloc_picture(s, (Picture*)pic, 1);
2120     }else{
2121         i= ff_find_unused_picture(s, 0);
2122
2123         pic= (AVFrame*)&s->picture[i];
2124         pic->reference= 3;
2125
2126         alloc_picture(s, (Picture*)pic, 0);
2127
2128         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2129            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2130            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2131        // empty
2132         }else{
2133             int h_chroma_shift, v_chroma_shift;
2134             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2135
2136             for(i=0; i<3; i++){
2137                 int src_stride= pic_arg->linesize[i];
2138                 int dst_stride= i ? s->uvlinesize : s->linesize;
2139                 int h_shift= i ? h_chroma_shift : 0;
2140                 int v_shift= i ? v_chroma_shift : 0;
2141                 int w= s->width >>h_shift;
2142                 int h= s->height>>v_shift;
2143                 uint8_t *src= pic_arg->data[i];
2144                 uint8_t *dst= pic->data[i];
2145
2146                 if(!s->avctx->rc_buffer_size)
2147                     dst +=INPLACE_OFFSET;
2148
2149                 if(src_stride==dst_stride)
2150                     memcpy(dst, src, src_stride*h);
2151                 else{
2152                     while(h--){
2153                         memcpy(dst, src, w);
2154                         dst += dst_stride;
2155                         src += src_stride;
2156                     }
2157                 }
2158             }
2159         }
2160     }
2161     copy_picture_attributes(s, pic, pic_arg);
2162     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2163   }
2164
2165     /* shift buffer entries */
2166     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2167         s->input_picture[i-1]= s->input_picture[i];
2168
2169     s->input_picture[encoding_delay]= (Picture*)pic;
2170
2171     return 0;
2172 }
2173
2174 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2175     int x, y, plane;
2176     int score=0;
2177     int64_t score64=0;
2178
2179     for(plane=0; plane<3; plane++){
2180         const int stride= p->linesize[plane];
2181         const int bw= plane ? 1 : 2;
2182         for(y=0; y<s->mb_height*bw; y++){
2183             for(x=0; x<s->mb_width*bw; x++){
2184                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2185                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2186
2187                 switch(s->avctx->frame_skip_exp){
2188                     case 0: score= FFMAX(score, v); break;
2189                     case 1: score+= FFABS(v);break;
2190                     case 2: score+= v*v;break;
2191                     case 3: score64+= FFABS(v*v*(int64_t)v);break;
2192                     case 4: score64+= v*v*(int64_t)(v*v);break;
2193                 }
2194             }
2195         }
2196     }
2197
2198     if(score) score64= score;
2199
2200     if(score64 < s->avctx->frame_skip_threshold)
2201         return 1;
2202     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2203         return 1;
2204     return 0;
2205 }
2206
2207 static int estimate_best_b_count(MpegEncContext *s){
2208     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2209     AVCodecContext *c= avcodec_alloc_context();
2210     AVFrame input[FF_MAX_B_FRAMES+2];
2211     const int scale= s->avctx->brd_scale;
2212     int i, j, out_size, p_lambda, b_lambda, lambda2;
2213     int outbuf_size= s->width * s->height; //FIXME
2214     uint8_t *outbuf= av_malloc(outbuf_size);
2215     int64_t best_rd= INT64_MAX;
2216     int best_b_count= -1;
2217
2218     assert(scale>=0 && scale <=3);
2219
2220 //    emms_c();
2221     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2222     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2223     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2224     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2225
2226     c->width = s->width >> scale;
2227     c->height= s->height>> scale;
2228     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2229     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2230     c->mb_decision= s->avctx->mb_decision;
2231     c->me_cmp= s->avctx->me_cmp;
2232     c->mb_cmp= s->avctx->mb_cmp;
2233     c->me_sub_cmp= s->avctx->me_sub_cmp;
2234     c->pix_fmt = PIX_FMT_YUV420P;
2235     c->time_base= s->avctx->time_base;
2236     c->max_b_frames= s->max_b_frames;
2237
2238     if (avcodec_open(c, codec) < 0)
2239         return -1;
2240
2241     for(i=0; i<s->max_b_frames+2; i++){
2242         int ysize= c->width*c->height;
2243         int csize= (c->width/2)*(c->height/2);
2244         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2245
2246         if(pre_input_ptr)
2247             pre_input= *pre_input_ptr;
2248
2249         if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
2250             pre_input.data[0]+=INPLACE_OFFSET;
2251             pre_input.data[1]+=INPLACE_OFFSET;
2252             pre_input.data[2]+=INPLACE_OFFSET;
2253         }
2254
2255         avcodec_get_frame_defaults(&input[i]);
2256         input[i].data[0]= av_malloc(ysize + 2*csize);
2257         input[i].data[1]= input[i].data[0] + ysize;
2258         input[i].data[2]= input[i].data[1] + csize;
2259         input[i].linesize[0]= c->width;
2260         input[i].linesize[1]=
2261         input[i].linesize[2]= c->width/2;
2262
2263         if(!i || s->input_picture[i-1]){
2264             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2265             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2266             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2267         }
2268     }
2269
2270     for(j=0; j<s->max_b_frames+1; j++){
2271         int64_t rd=0;
2272
2273         if(!s->input_picture[j])
2274             break;
2275
2276         c->error[0]= c->error[1]= c->error[2]= 0;
2277
2278         input[0].pict_type= I_TYPE;
2279         input[0].quality= 1 * FF_QP2LAMBDA;
2280         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2281 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2282
2283         for(i=0; i<s->max_b_frames+1; i++){
2284             int is_p= i % (j+1) == j || i==s->max_b_frames;
2285
2286             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2287             input[i+1].quality= is_p ? p_lambda : b_lambda;
2288             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2289             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2290         }
2291
2292         /* get the delayed frames */
2293         while(out_size){
2294             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2295             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2296         }
2297
2298         rd += c->error[0] + c->error[1] + c->error[2];
2299
2300         if(rd < best_rd){
2301             best_rd= rd;
2302             best_b_count= j;
2303         }
2304     }
2305
2306     av_freep(&outbuf);
2307     avcodec_close(c);
2308     av_freep(&c);
2309
2310     for(i=0; i<s->max_b_frames+2; i++){
2311         av_freep(&input[i].data[0]);
2312     }
2313
2314     return best_b_count;
2315 }
2316
2317 static void select_input_picture(MpegEncContext *s){
2318     int i;
2319
2320     for(i=1; i<MAX_PICTURE_COUNT; i++)
2321         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2322     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2323
2324     /* set next picture type & ordering */
2325     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2326         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2327             s->reordered_input_picture[0]= s->input_picture[0];
2328             s->reordered_input_picture[0]->pict_type= I_TYPE;
2329             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2330         }else{
2331             int b_frames;
2332
2333             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2334                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2335                 //FIXME check that te gop check above is +-1 correct
2336 //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2337
2338                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2339                         for(i=0; i<4; i++)
2340                             s->input_picture[0]->data[i]= NULL;
2341                         s->input_picture[0]->type= 0;
2342                     }else{
2343                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2344                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2345
2346                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2347                     }
2348
2349                     emms_c();
2350                     ff_vbv_update(s, 0);
2351
2352                     goto no_output_pic;
2353                 }
2354             }
2355
2356             if(s->flags&CODEC_FLAG_PASS2){
2357                 for(i=0; i<s->max_b_frames+1; i++){
2358                     int pict_num= s->input_picture[0]->display_picture_number + i;
2359
2360                     if(pict_num >= s->rc_context.num_entries)
2361                         break;
2362                     if(!s->input_picture[i]){
2363                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2364                         break;
2365                     }
2366
2367                     s->input_picture[i]->pict_type=
2368                         s->rc_context.entry[pict_num].new_pict_type;
2369                 }
2370             }
2371
2372             if(s->avctx->b_frame_strategy==0){
2373                 b_frames= s->max_b_frames;
2374                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2375             }else if(s->avctx->b_frame_strategy==1){
2376                 for(i=1; i<s->max_b_frames+1; i++){
2377                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2378                         s->input_picture[i]->b_frame_score=
2379                             get_intra_count(s, s->input_picture[i  ]->data[0],
2380                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2381                     }
2382                 }
2383                 for(i=0; i<s->max_b_frames+1; i++){
2384                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
2385                 }
2386
2387                 b_frames= FFMAX(0, i-1);
2388
2389                 /* reset scores */
2390                 for(i=0; i<b_frames+1; i++){
2391                     s->input_picture[i]->b_frame_score=0;
2392                 }
2393             }else if(s->avctx->b_frame_strategy==2){
2394                 b_frames= estimate_best_b_count(s);
2395             }else{
2396                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2397                 b_frames=0;
2398             }
2399
2400             emms_c();
2401 //static int b_count=0;
2402 //b_count+= b_frames;
2403 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2404
2405             for(i= b_frames - 1; i>=0; i--){
2406                 int type= s->input_picture[i]->pict_type;
2407                 if(type && type != B_TYPE)
2408                     b_frames= i;
2409             }
2410             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2411                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2412             }
2413
2414             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2415               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2416                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2417               }else{
2418                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2419                     b_frames=0;
2420                 s->input_picture[b_frames]->pict_type= I_TYPE;
2421               }
2422             }
2423
2424             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2425                && b_frames
2426                && s->input_picture[b_frames]->pict_type== I_TYPE)
2427                 b_frames--;
2428
2429             s->reordered_input_picture[0]= s->input_picture[b_frames];
2430             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2431                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2432             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2433             for(i=0; i<b_frames; i++){
2434                 s->reordered_input_picture[i+1]= s->input_picture[i];
2435                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2436                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2437             }
2438         }
2439     }
2440 no_output_pic:
2441     if(s->reordered_input_picture[0]){
2442         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2443
2444         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2445
2446         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size){
2447             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2448
2449             int i= ff_find_unused_picture(s, 0);
2450             Picture *pic= &s->picture[i];
2451
2452             pic->reference              = s->reordered_input_picture[0]->reference;
2453             alloc_picture(s, pic, 0);
2454
2455             /* mark us unused / free shared pic */
2456             if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_INTERNAL)
2457                 s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
2458             for(i=0; i<4; i++)
2459                 s->reordered_input_picture[0]->data[i]= NULL;
2460             s->reordered_input_picture[0]->type= 0;
2461
2462             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2463
2464             s->current_picture_ptr= pic;
2465         }else{
2466             // input is not a shared pix -> reuse buffer for current_pix
2467
2468             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2469                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2470
2471             s->current_picture_ptr= s->reordered_input_picture[0];
2472             for(i=0; i<4; i++){
2473                 s->new_picture.data[i]+= INPLACE_OFFSET;
2474             }
2475         }
2476         copy_picture(&s->current_picture, s->current_picture_ptr);
2477
2478         s->picture_number= s->new_picture.display_picture_number;
2479 //printf("dpn:%d\n", s->picture_number);
2480     }else{
2481        memset(&s->new_picture, 0, sizeof(Picture));
2482     }
2483 }
2484
2485 int MPV_encode_picture(AVCodecContext *avctx,
2486                        unsigned char *buf, int buf_size, void *data)
2487 {
2488     MpegEncContext *s = avctx->priv_data;
2489     AVFrame *pic_arg = data;
2490     int i, stuffing_count;
2491
2492     for(i=0; i<avctx->thread_count; i++){
2493         int start_y= s->thread_context[i]->start_mb_y;
2494         int   end_y= s->thread_context[i]->  end_mb_y;
2495         int h= s->mb_height;
2496         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2497         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2498
2499         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2500     }
2501
2502     s->picture_in_gop_number++;
2503
2504     if(load_input_picture(s, pic_arg) < 0)
2505         return -1;
2506
2507     select_input_picture(s);
2508
2509     /* output? */
2510     if(s->new_picture.data[0]){
2511         s->pict_type= s->new_picture.pict_type;
2512 //emms_c();
2513 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2514         MPV_frame_start(s, avctx);
2515 vbv_retry:
2516         if (encode_picture(s, s->picture_number) < 0)
2517             return -1;
2518
2519         avctx->real_pict_num  = s->picture_number;
2520         avctx->header_bits = s->header_bits;
2521         avctx->mv_bits     = s->mv_bits;
2522         avctx->misc_bits   = s->misc_bits;
2523         avctx->i_tex_bits  = s->i_tex_bits;
2524         avctx->p_tex_bits  = s->p_tex_bits;
2525         avctx->i_count     = s->i_count;
2526         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2527         avctx->skip_count  = s->skip_count;
2528
2529         MPV_frame_end(s);
2530
2531         if (s->out_format == FMT_MJPEG)
2532             mjpeg_picture_trailer(s);
2533
2534         if(avctx->rc_buffer_size){
2535             RateControlContext *rcc= &s->rc_context;
2536             int max_size= rcc->buffer_index/3;
2537
2538             if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
2539                 s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
2540                 s->mb_skipped = 0;        //done in MPV_frame_start()
2541                 if(s->pict_type==P_TYPE){ //done in encode_picture() so we must undo it
2542                     if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
2543                         s->no_rounding ^= 1;
2544                 }
2545                 if(s->pict_type!=B_TYPE){
2546                     s->time_base= s->last_time_base;
2547                     s->last_non_b_time= s->time - s->pp_time;
2548                 }
2549 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
2550                 for(i=0; i<avctx->thread_count; i++){
2551                     PutBitContext *pb= &s->thread_context[i]->pb;
2552                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
2553                 }
2554                 goto vbv_retry;
2555             }
2556
2557             assert(s->avctx->rc_max_rate);
2558         }
2559
2560         if(s->flags&CODEC_FLAG_PASS1)
2561             ff_write_pass1_stats(s);
2562
2563         for(i=0; i<4; i++){
2564             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2565             avctx->error[i] += s->current_picture_ptr->error[i];
2566         }
2567
2568         if(s->flags&CODEC_FLAG_PASS1)
2569             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2570         flush_put_bits(&s->pb);
2571         s->frame_bits  = put_bits_count(&s->pb);
2572
2573         stuffing_count= ff_vbv_update(s, s->frame_bits);
2574         if(stuffing_count){
2575             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2576                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2577                 return -1;
2578             }
2579
2580             switch(s->codec_id){
2581             case CODEC_ID_MPEG1VIDEO:
2582             case CODEC_ID_MPEG2VIDEO:
2583                 while(stuffing_count--){
2584                     put_bits(&s->pb, 8, 0);
2585                 }
2586             break;
2587             case CODEC_ID_MPEG4:
2588                 put_bits(&s->pb, 16, 0);
2589                 put_bits(&s->pb, 16, 0x1C3);
2590                 stuffing_count -= 4;
2591                 while(stuffing_count--){
2592                     put_bits(&s->pb, 8, 0xFF);
2593                 }
2594             break;
2595             default:
2596                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2597             }
2598             flush_put_bits(&s->pb);
2599             s->frame_bits  = put_bits_count(&s->pb);
2600         }
2601
2602         /* update mpeg1/2 vbv_delay for CBR */
2603         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2604            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2605             int vbv_delay;
2606
2607             assert(s->repeat_first_field==0);
2608
2609             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2610             assert(vbv_delay < 0xFFFF);
2611
2612             s->vbv_delay_ptr[0] &= 0xF8;
2613             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2614             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2615             s->vbv_delay_ptr[2] &= 0x07;
2616             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2617         }
2618         s->total_bits += s->frame_bits;
2619         avctx->frame_bits  = s->frame_bits;
2620     }else{
2621         assert((pbBufPtr(&s->pb) == s->pb.buf));
2622         s->frame_bits=0;
2623     }
2624     assert((s->frame_bits&7)==0);
2625
2626     return s->frame_bits/8;
2627 }
2628
2629 #endif //CONFIG_ENCODERS
2630
2631 static inline void gmc1_motion(MpegEncContext *s,
2632                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2633                                uint8_t **ref_picture)
2634 {
2635     uint8_t *ptr;
2636     int offset, src_x, src_y, linesize, uvlinesize;
2637     int motion_x, motion_y;
2638     int emu=0;
2639
2640     motion_x= s->sprite_offset[0][0];
2641     motion_y= s->sprite_offset[0][1];
2642     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2643     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2644     motion_x<<=(3-s->sprite_warping_accuracy);
2645     motion_y<<=(3-s->sprite_warping_accuracy);
2646     src_x = clip(src_x, -16, s->width);
2647     if (src_x == s->width)
2648         motion_x =0;
2649     src_y = clip(src_y, -16, s->height);
2650     if (src_y == s->height)
2651         motion_y =0;
2652
2653     linesize = s->linesize;
2654     uvlinesize = s->uvlinesize;
2655
2656     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2657
2658     if(s->flags&CODEC_FLAG_EMU_EDGE){
2659         if(   (unsigned)src_x >= s->h_edge_pos - 17
2660            || (unsigned)src_y >= s->v_edge_pos - 17){
2661             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2662             ptr= s->edge_emu_buffer;
2663         }
2664     }
2665
2666     if((motion_x|motion_y)&7){
2667         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2668         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2669     }else{
2670         int dxy;
2671
2672         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2673         if (s->no_rounding){
2674             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2675         }else{
2676             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2677         }
2678     }
2679
2680     if(s->flags&CODEC_FLAG_GRAY) return;
2681
2682     motion_x= s->sprite_offset[1][0];
2683     motion_y= s->sprite_offset[1][1];
2684     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2685     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2686     motion_x<<=(3-s->sprite_warping_accuracy);
2687     motion_y<<=(3-s->sprite_warping_accuracy);
2688     src_x = clip(src_x, -8, s->width>>1);
2689     if (src_x == s->width>>1)
2690         motion_x =0;
2691     src_y = clip(src_y, -8, s->height>>1);
2692     if (src_y == s->height>>1)
2693         motion_y =0;
2694
2695     offset = (src_y * uvlinesize) + src_x;
2696     ptr = ref_picture[1] + offset;
2697     if(s->flags&CODEC_FLAG_EMU_EDGE){
2698         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2699            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2700             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2701             ptr= s->edge_emu_buffer;
2702             emu=1;
2703         }
2704     }
2705     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2706
2707     ptr = ref_picture[2] + offset;
2708     if(emu){
2709         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2710         ptr= s->edge_emu_buffer;
2711     }
2712     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2713
2714     return;
2715 }
2716
2717 static inline void gmc_motion(MpegEncContext *s,
2718                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2719                                uint8_t **ref_picture)
2720 {
2721     uint8_t *ptr;
2722     int linesize, uvlinesize;
2723     const int a= s->sprite_warping_accuracy;
2724     int ox, oy;
2725
2726     linesize = s->linesize;
2727     uvlinesize = s->uvlinesize;
2728
2729     ptr = ref_picture[0];
2730
2731     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2732     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2733
2734     s->dsp.gmc(dest_y, ptr, linesize, 16,
2735            ox,
2736            oy,
2737            s->sprite_delta[0][0], s->sprite_delta[0][1],
2738            s->sprite_delta[1][0], s->sprite_delta[1][1],
2739            a+1, (1<<(2*a+1)) - s->no_rounding,
2740            s->h_edge_pos, s->v_edge_pos);
2741     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2742            ox + s->sprite_delta[0][0]*8,
2743            oy + s->sprite_delta[1][0]*8,
2744            s->sprite_delta[0][0], s->sprite_delta[0][1],
2745            s->sprite_delta[1][0], s->sprite_delta[1][1],
2746            a+1, (1<<(2*a+1)) - s->no_rounding,
2747            s->h_edge_pos, s->v_edge_pos);
2748
2749     if(s->flags&CODEC_FLAG_GRAY) return;
2750
2751     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2752     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2753
2754     ptr = ref_picture[1];
2755     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2756            ox,
2757            oy,
2758            s->sprite_delta[0][0], s->sprite_delta[0][1],
2759            s->sprite_delta[1][0], s->sprite_delta[1][1],
2760            a+1, (1<<(2*a+1)) - s->no_rounding,
2761            s->h_edge_pos>>1, s->v_edge_pos>>1);
2762
2763     ptr = ref_picture[2];
2764     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2765            ox,
2766            oy,
2767            s->sprite_delta[0][0], s->sprite_delta[0][1],
2768            s->sprite_delta[1][0], s->sprite_delta[1][1],
2769            a+1, (1<<(2*a+1)) - s->no_rounding,
2770            s->h_edge_pos>>1, s->v_edge_pos>>1);
2771 }
2772
2773 /**
2774  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2775  * @param buf destination buffer
2776  * @param src source buffer
2777  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2778  * @param block_w width of block
2779  * @param block_h height of block
2780  * @param src_x x coordinate of the top left sample of the block in the source buffer
2781  * @param src_y y coordinate of the top left sample of the block in the source buffer
2782  * @param w width of the source buffer
2783  * @param h height of the source buffer
2784  */
2785 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2786                                     int src_x, int src_y, int w, int h){
2787     int x, y;
2788     int start_y, start_x, end_y, end_x;
2789
2790     if(src_y>= h){
2791         src+= (h-1-src_y)*linesize;
2792         src_y=h-1;
2793     }else if(src_y<=-block_h){
2794         src+= (1-block_h-src_y)*linesize;
2795         src_y=1-block_h;
2796     }
2797     if(src_x>= w){
2798         src+= (w-1-src_x);
2799         src_x=w-1;
2800     }else if(src_x<=-block_w){
2801         src+= (1-block_w-src_x);
2802         src_x=1-block_w;
2803     }
2804
2805     start_y= FFMAX(0, -src_y);
2806     start_x= FFMAX(0, -src_x);
2807     end_y= FFMIN(block_h, h-src_y);
2808     end_x= FFMIN(block_w, w-src_x);
2809
2810     // copy existing part
2811     for(y=start_y; y<end_y; y++){
2812         for(x=start_x; x<end_x; x++){
2813             buf[x + y*linesize]= src[x + y*linesize];
2814         }
2815     }
2816
2817     //top
2818     for(y=0; y<start_y; y++){
2819         for(x=start_x; x<end_x; x++){
2820             buf[x + y*linesize]= buf[x + start_y*linesize];
2821         }
2822     }
2823
2824     //bottom
2825     for(y=end_y; y<block_h; y++){
2826         for(x=start_x; x<end_x; x++){
2827             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2828         }
2829     }
2830
2831     for(y=0; y<block_h; y++){
2832        //left
2833         for(x=0; x<start_x; x++){
2834             buf[x + y*linesize]= buf[start_x + y*linesize];
2835         }
2836
2837        //right
2838         for(x=end_x; x<block_w; x++){
2839             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2840         }
2841     }
2842 }
2843
2844 static inline int hpel_motion(MpegEncContext *s,
2845                                   uint8_t *dest, uint8_t *src,
2846                                   int field_based, int field_select,
2847                                   int src_x, int src_y,
2848                                   int width, int height, int stride,
2849                                   int h_edge_pos, int v_edge_pos,
2850                                   int w, int h, op_pixels_func *pix_op,
2851                                   int motion_x, int motion_y)
2852 {
2853     int dxy;
2854     int emu=0;
2855
2856     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2857     src_x += motion_x >> 1;
2858     src_y += motion_y >> 1;
2859
2860     /* WARNING: do no forget half pels */
2861     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2862     if (src_x == width)
2863         dxy &= ~1;
2864     src_y = clip(src_y, -16, height);
2865     if (src_y == height)
2866         dxy &= ~2;
2867     src += src_y * stride + src_x;
2868
2869     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2870         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2871            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2872             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2873                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2874             src= s->edge_emu_buffer;
2875             emu=1;
2876         }
2877     }
2878     if(field_select)
2879         src += s->linesize;
2880     pix_op[dxy](dest, src, stride, h);
2881     return emu;
2882 }
2883
2884 static inline int hpel_motion_lowres(MpegEncContext *s,
2885                                   uint8_t *dest, uint8_t *src,
2886                                   int field_based, int field_select,
2887                                   int src_x, int src_y,
2888                                   int width, int height, int stride,
2889                                   int h_edge_pos, int v_edge_pos,
2890                                   int w, int h, h264_chroma_mc_func *pix_op,
2891                                   int motion_x, int motion_y)
2892 {
2893     const int lowres= s->avctx->lowres;
2894     const int s_mask= (2<<lowres)-1;
2895     int emu=0;
2896     int sx, sy;
2897
2898     if(s->quarter_sample){
2899         motion_x/=2;
2900         motion_y/=2;
2901     }
2902
2903     sx= motion_x & s_mask;
2904     sy= motion_y & s_mask;
2905     src_x += motion_x >> (lowres+1);
2906     src_y += motion_y >> (lowres+1);
2907
2908     src += src_y * stride + src_x;
2909
2910     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2911        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2912         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2913                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2914         src= s->edge_emu_buffer;
2915         emu=1;
2916     }
2917
2918     sx <<= 2 - lowres;
2919     sy <<= 2 - lowres;
2920     if(field_select)
2921         src += s->linesize;
2922     pix_op[lowres](dest, src, stride, h, sx, sy);
2923     return emu;
2924 }
2925
2926 /* apply one mpeg motion vector to the three components */
2927 static always_inline void mpeg_motion(MpegEncContext *s,
2928                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2929                                int field_based, int bottom_field, int field_select,
2930                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2931                                int motion_x, int motion_y, int h)
2932 {
2933     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2934     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2935
2936 #if 0
2937 if(s->quarter_sample)
2938 {
2939     motion_x>>=1;
2940     motion_y>>=1;
2941 }
2942 #endif
2943
2944     v_edge_pos = s->v_edge_pos >> field_based;
2945     linesize   = s->current_picture.linesize[0] << field_based;
2946     uvlinesize = s->current_picture.linesize[1] << field_based;
2947
2948     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2949     src_x = s->mb_x* 16               + (motion_x >> 1);
2950     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2951
2952     if (s->out_format == FMT_H263) {
2953         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2954             mx = (motion_x>>1)|(motion_x&1);
2955             my = motion_y >>1;
2956             uvdxy = ((my & 1) << 1) | (mx & 1);
2957             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2958             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2959         }else{
2960             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2961             uvsrc_x = src_x>>1;
2962             uvsrc_y = src_y>>1;
2963         }
2964     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2965         mx = motion_x / 4;
2966         my = motion_y / 4;
2967         uvdxy = 0;
2968         uvsrc_x = s->mb_x*8 + mx;
2969         uvsrc_y = s->mb_y*8 + my;
2970     } else {
2971         if(s->chroma_y_shift){
2972             mx = motion_x / 2;
2973             my = motion_y / 2;
2974             uvdxy = ((my & 1) << 1) | (mx & 1);
2975             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2976             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2977         } else {
2978             if(s->chroma_x_shift){
2979             //Chroma422
2980                 mx = motion_x / 2;
2981                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2982                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2983                 uvsrc_y = src_y;
2984             } else {
2985             //Chroma444
2986                 uvdxy = dxy;
2987                 uvsrc_x = src_x;
2988                 uvsrc_y = src_y;
2989             }
2990         }
2991     }
2992
2993     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2994     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2995     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2996
2997     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2998        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2999             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
3000                s->codec_id == CODEC_ID_MPEG1VIDEO){
3001                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
3002                 return ;
3003             }
3004             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3005                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3006             ptr_y = s->edge_emu_buffer;
3007             if(!(s->flags&CODEC_FLAG_GRAY)){
3008                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3009                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3010                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3011                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3012                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3013                 ptr_cb= uvbuf;
3014                 ptr_cr= uvbuf+16;
3015             }
3016     }
3017
3018     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3019         dest_y += s->linesize;
3020         dest_cb+= s->uvlinesize;
3021         dest_cr+= s->uvlinesize;
3022     }
3023
3024     if(field_select){
3025         ptr_y += s->linesize;
3026         ptr_cb+= s->uvlinesize;
3027         ptr_cr+= s->uvlinesize;
3028     }
3029
3030     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
3031
3032     if(!(s->flags&CODEC_FLAG_GRAY)){
3033         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
3034         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
3035     }
3036 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
3037     if(s->out_format == FMT_H261){
3038         ff_h261_loop_filter(s);
3039     }
3040 #endif
3041 }
3042
3043 /* apply one mpeg motion vector to the three components */
3044 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
3045                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3046                                int field_based, int bottom_field, int field_select,
3047                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3048                                int motion_x, int motion_y, int h)
3049 {
3050     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3051     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3052     const int lowres= s->avctx->lowres;
3053     const int block_s= 8>>lowres;
3054     const int s_mask= (2<<lowres)-1;
3055     const int h_edge_pos = s->h_edge_pos >> lowres;
3056     const int v_edge_pos = s->v_edge_pos >> lowres;
3057     linesize   = s->current_picture.linesize[0] << field_based;
3058     uvlinesize = s->current_picture.linesize[1] << field_based;
3059
3060     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3061         motion_x/=2;
3062         motion_y/=2;
3063     }
3064
3065     if(field_based){
3066         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3067     }
3068
3069     sx= motion_x & s_mask;
3070     sy= motion_y & s_mask;
3071     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3072     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3073
3074     if (s->out_format == FMT_H263) {
3075         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3076         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3077         uvsrc_x = src_x>>1;
3078         uvsrc_y = src_y>>1;
3079     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3080         mx = motion_x / 4;
3081         my = motion_y / 4;
3082         uvsx = (2*mx) & s_mask;
3083         uvsy = (2*my) & s_mask;
3084         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3085         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3086     } else {
3087         mx = motion_x / 2;
3088         my = motion_y / 2;
3089         uvsx = mx & s_mask;
3090         uvsy = my & s_mask;
3091         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3092         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3093     }
3094
3095     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3096     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3097     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3098
3099     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3100        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3101             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3102                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3103             ptr_y = s->edge_emu_buffer;
3104             if(!(s->flags&CODEC_FLAG_GRAY)){
3105                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3106                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3107                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3108                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3109                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3110                 ptr_cb= uvbuf;
3111                 ptr_cr= uvbuf+16;
3112             }
3113     }
3114
3115     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3116         dest_y += s->linesize;
3117         dest_cb+= s->uvlinesize;
3118         dest_cr+= s->uvlinesize;
3119     }
3120
3121     if(field_select){
3122         ptr_y += s->linesize;
3123         ptr_cb+= s->uvlinesize;
3124         ptr_cr+= s->uvlinesize;
3125     }
3126
3127     sx <<= 2 - lowres;
3128     sy <<= 2 - lowres;
3129     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3130
3131     if(!(s->flags&CODEC_FLAG_GRAY)){
3132         uvsx <<= 2 - lowres;
3133         uvsy <<= 2 - lowres;
3134         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3135         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3136     }
3137     //FIXME h261 lowres loop filter
3138 }
3139
3140 //FIXME move to dsputil, avg variant, 16x16 version
3141 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3142     int x;
3143     uint8_t * const top   = src[1];
3144     uint8_t * const left  = src[2];
3145     uint8_t * const mid   = src[0];
3146     uint8_t * const right = src[3];
3147     uint8_t * const bottom= src[4];
3148 #define OBMC_FILTER(x, t, l, m, r, b)\
3149     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3150 #define OBMC_FILTER4(x, t, l, m, r, b)\
3151     OBMC_FILTER(x         , t, l, m, r, b);\
3152     OBMC_FILTER(x+1       , t, l, m, r, b);\
3153     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3154     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3155
3156     x=0;
3157     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3158     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3159     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3160     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3161     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3162     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3163     x+= stride;
3164     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3165     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3166     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3167     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3168     x+= stride;
3169     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3170     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3171     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3172     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3173     x+= 2*stride;
3174     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3175     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3176     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3177     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3178     x+= 2*stride;
3179     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3180     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3181     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3182     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3183     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3184     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3185     x+= stride;
3186     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3187     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3188     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3189     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3190 }
3191
3192 /* obmc for 1 8x8 luma block */
3193 static inline void obmc_motion(MpegEncContext *s,
3194                                uint8_t *dest, uint8_t *src,
3195                                int src_x, int src_y,
3196                                op_pixels_func *pix_op,
3197                                int16_t mv[5][2]/* mid top left right bottom*/)
3198 #define MID    0
3199 {
3200     int i;
3201     uint8_t *ptr[5];
3202
3203     assert(s->quarter_sample==0);
3204
3205     for(i=0; i<5; i++){
3206         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3207             ptr[i]= ptr[MID];
3208         }else{
3209             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3210             hpel_motion(s, ptr[i], src, 0, 0,
3211                         src_x, src_y,
3212                         s->width, s->height, s->linesize,
3213                         s->h_edge_pos, s->v_edge_pos,
3214                         8, 8, pix_op,
3215                         mv[i][0], mv[i][1]);
3216         }
3217     }
3218
3219     put_obmc(dest, ptr, s->linesize);
3220 }
3221
3222 static inline void qpel_motion(MpegEncContext *s,
3223                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3224                                int field_based, int bottom_field, int field_select,
3225                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3226                                qpel_mc_func (*qpix_op)[16],
3227                                int motion_x, int motion_y, int h)
3228 {
3229     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3230     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3231
3232     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3233     src_x = s->mb_x *  16                 + (motion_x >> 2);
3234     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3235
3236     v_edge_pos = s->v_edge_pos >> field_based;
3237     linesize = s->linesize << field_based;
3238     uvlinesize = s->uvlinesize << field_based;
3239
3240     if(field_based){
3241         mx= motion_x/2;
3242         my= motion_y>>1;
3243     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3244         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3245         mx= (motion_x>>1) + rtab[motion_x&7];
3246         my= (motion_y>>1) + rtab[motion_y&7];
3247     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3248         mx= (motion_x>>1)|(motion_x&1);
3249         my= (motion_y>>1)|(motion_y&1);
3250     }else{
3251         mx= motion_x/2;
3252         my= motion_y/2;
3253     }
3254     mx= (mx>>1)|(mx&1);
3255     my= (my>>1)|(my&1);
3256
3257     uvdxy= (mx&1) | ((my&1)<<1);
3258     mx>>=1;
3259     my>>=1;
3260
3261     uvsrc_x = s->mb_x *  8                 + mx;
3262     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3263
3264     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3265     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3266     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3267
3268     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3269        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3270         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3271                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3272         ptr_y= s->edge_emu_buffer;
3273         if(!(s->flags&CODEC_FLAG_GRAY)){
3274             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3275             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3276                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3277             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3278                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3279             ptr_cb= uvbuf;
3280             ptr_cr= uvbuf + 16;
3281         }
3282     }
3283
3284     if(!field_based)
3285         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3286     else{
3287         if(bottom_field){
3288             dest_y += s->linesize;
3289             dest_cb+= s->uvlinesize;
3290             dest_cr+= s->uvlinesize;
3291         }
3292
3293         if(field_select){
3294             ptr_y  += s->linesize;
3295             ptr_cb += s->uvlinesize;
3296             ptr_cr += s->uvlinesize;
3297         }
3298         //damn interlaced mode
3299         //FIXME boundary mirroring is not exactly correct here
3300         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3301         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3302     }
3303     if(!(s->flags&CODEC_FLAG_GRAY)){
3304         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3305         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3306     }
3307 }
3308
3309 inline int ff_h263_round_chroma(int x){
3310     if (x >= 0)
3311         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3312     else {
3313         x = -x;
3314         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3315     }
3316 }
3317
3318 /**
3319  * h263 chorma 4mv motion compensation.
3320  */
3321 static inline void chroma_4mv_motion(MpegEncContext *s,
3322                                      uint8_t *dest_cb, uint8_t *dest_cr,
3323                                      uint8_t **ref_picture,
3324                                      op_pixels_func *pix_op,
3325                                      int mx, int my){
3326     int dxy, emu=0, src_x, src_y, offset;
3327     uint8_t *ptr;
3328
3329     /* In case of 8X8, we construct a single chroma motion vector
3330        with a special rounding */
3331     mx= ff_h263_round_chroma(mx);
3332     my= ff_h263_round_chroma(my);
3333
3334     dxy = ((my & 1) << 1) | (mx & 1);
3335     mx >>= 1;
3336     my >>= 1;
3337
3338     src_x = s->mb_x * 8 + mx;
3339     src_y = s->mb_y * 8 + my;
3340     src_x = clip(src_x, -8, s->width/2);
3341     if (src_x == s->width/2)
3342         dxy &= ~1;
3343     src_y = clip(src_y, -8, s->height/2);
3344     if (src_y == s->height/2)
3345         dxy &= ~2;
3346
3347     offset = (src_y * (s->uvlinesize)) + src_x;
3348     ptr = ref_picture[1] + offset;
3349     if(s->flags&CODEC_FLAG_EMU_EDGE){
3350         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3351            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3352             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3353             ptr= s->edge_emu_buffer;
3354             emu=1;
3355         }
3356     }
3357     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3358
3359     ptr = ref_picture[2] + offset;
3360     if(emu){
3361         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3362         ptr= s->edge_emu_buffer;
3363     }
3364     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3365 }
3366
3367 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3368                                      uint8_t *dest_cb, uint8_t *dest_cr,
3369                                      uint8_t **ref_picture,
3370                                      h264_chroma_mc_func *pix_op,
3371                                      int mx, int my){
3372     const int lowres= s->avctx->lowres;
3373     const int block_s= 8>>lowres;
3374     const int s_mask= (2<<lowres)-1;
3375     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3376     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3377     int emu=0, src_x, src_y, offset, sx, sy;
3378     uint8_t *ptr;
3379
3380     if(s->quarter_sample){
3381         mx/=2;
3382         my/=2;
3383     }
3384
3385     /* In case of 8X8, we construct a single chroma motion vector
3386        with a special rounding */
3387     mx= ff_h263_round_chroma(mx);
3388     my= ff_h263_round_chroma(my);
3389
3390     sx= mx & s_mask;
3391     sy= my & s_mask;
3392     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3393     src_y = s->mb_y*block_s + (my >> (lowres+1));
3394
3395     offset = src_y * s->uvlinesize + src_x;
3396     ptr = ref_picture[1] + offset;
3397     if(s->flags&CODEC_FLAG_EMU_EDGE){
3398         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3399            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3400             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3401             ptr= s->edge_emu_buffer;
3402             emu=1;
3403         }
3404     }
3405     sx <<= 2 - lowres;
3406     sy <<= 2 - lowres;
3407     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3408
3409     ptr = ref_picture[2] + offset;
3410     if(emu){
3411         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3412         ptr= s->edge_emu_buffer;
3413     }
3414     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3415 }
3416
3417 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3418     /* fetch pixels for estimated mv 4 macroblocks ahead
3419      * optimized for 64byte cache lines */
3420     const int shift = s->quarter_sample ? 2 : 1;
3421     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3422     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3423     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3424     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3425     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3426     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3427 }
3428
3429 /**
3430  * motion compensation of a single macroblock
3431  * @param s context
3432  * @param dest_y luma destination pointer
3433  * @param dest_cb chroma cb/u destination pointer
3434  * @param dest_cr chroma cr/v destination pointer
3435  * @param dir direction (0->forward, 1->backward)
3436  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3437  * @param pic_op halfpel motion compensation function (average or put normally)
3438  * @param pic_op qpel motion compensation function (average or put normally)
3439  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3440  */
3441 static inline void MPV_motion(MpegEncContext *s,
3442                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3443                               int dir, uint8_t **ref_picture,
3444                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3445 {
3446     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3447     int mb_x, mb_y, i;
3448     uint8_t *ptr, *dest;
3449
3450     mb_x = s->mb_x;
3451     mb_y = s->mb_y;
3452
3453     prefetch_motion(s, ref_picture, dir);
3454
3455     if(s->obmc && s->pict_type != B_TYPE){
3456         int16_t mv_cache[4][4][2];
3457         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3458         const int mot_stride= s->b8_stride;
3459         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3460
3461         assert(!s->mb_skipped);
3462
3463         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3464         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3465         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3466
3467         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3468             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3469         }else{
3470             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3471         }
3472
3473         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3474             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3475             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3476         }else{
3477             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3478             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3479         }
3480
3481         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3482             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3483             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3484         }else{
3485             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3486             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3487         }
3488
3489         mx = 0;
3490         my = 0;
3491         for(i=0;i<4;i++) {
3492             const int x= (i&1)+1;
3493             const int y= (i>>1)+1;
3494             int16_t mv[5][2]= {
3495                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3496                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3497                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3498                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3499                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3500             //FIXME cleanup
3501             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3502                         ref_picture[0],
3503                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3504                         pix_op[1],
3505                         mv);
3506
3507             mx += mv[0][0];
3508             my += mv[0][1];
3509         }
3510         if(!(s->flags&CODEC_FLAG_GRAY))
3511             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3512
3513         return;
3514     }
3515
3516     switch(s->mv_type) {
3517     case MV_TYPE_16X16:
3518         if(s->mcsel){
3519             if(s->real_sprite_warping_points==1){
3520                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3521                             ref_picture);
3522             }else{
3523                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3524                             ref_picture);
3525             }
3526         }else if(s->quarter_sample){
3527             qpel_motion(s, dest_y, dest_cb, dest_cr,
3528                         0, 0, 0,
3529                         ref_picture, pix_op, qpix_op,
3530                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3531         }else if(s->mspel){
3532             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3533                         ref_picture, pix_op,
3534                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3535         }else
3536         {
3537             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3538                         0, 0, 0,
3539                         ref_picture, pix_op,
3540                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3541         }
3542         break;
3543     case MV_TYPE_8X8:
3544         mx = 0;
3545         my = 0;
3546         if(s->quarter_sample){
3547             for(i=0;i<4;i++) {
3548                 motion_x = s->mv[dir][i][0];
3549                 motion_y = s->mv[dir][i][1];
3550
3551                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3552                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3553                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3554
3555                 /* WARNING: do no forget half pels */
3556                 src_x = clip(src_x, -16, s->width);
3557                 if (src_x == s->width)
3558                     dxy &= ~3;
3559                 src_y = clip(src_y, -16, s->height);
3560                 if (src_y == s->height)
3561                     dxy &= ~12;
3562
3563                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3564                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3565                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3566                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3567                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3568                         ptr= s->edge_emu_buffer;
3569                     }
3570                 }
3571                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3572                 qpix_op[1][dxy](dest, ptr, s->linesize);
3573
3574                 mx += s->mv[dir][i][0]/2;
3575                 my += s->mv[dir][i][1]/2;
3576             }
3577         }else{
3578             for(i=0;i<4;i++) {
3579                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3580                             ref_picture[0], 0, 0,
3581                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3582                             s->width, s->height, s->linesize,
3583                             s->h_edge_pos, s->v_edge_pos,
3584                             8, 8, pix_op[1],
3585                             s->mv[dir][i][0], s->mv[dir][i][1]);
3586
3587                 mx += s->mv[dir][i][0];
3588                 my += s->mv[dir][i][1];
3589             }
3590         }
3591
3592         if(!(s->flags&CODEC_FLAG_GRAY))
3593             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3594         break;
3595     case MV_TYPE_FIELD:
3596         if (s->picture_structure == PICT_FRAME) {
3597             if(s->quarter_sample){
3598                 for(i=0; i<2; i++){
3599                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3600                                 1, i, s->field_select[dir][i],
3601                                 ref_picture, pix_op, qpix_op,
3602                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3603                 }
3604             }else{
3605                 /* top field */
3606                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3607                             1, 0, s->field_select[dir][0],
3608                             ref_picture, pix_op,
3609                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3610                 /* bottom field */
3611                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3612                             1, 1, s->field_select[dir][1],
3613                             ref_picture, pix_op,
3614                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3615             }
3616         } else {
3617             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3618                 ref_picture= s->current_picture_ptr->data;
3619             }
3620
3621             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3622                         0, 0, s->field_select[dir][0],
3623                         ref_picture, pix_op,
3624                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3625         }
3626         break;
3627     case MV_TYPE_16X8:
3628         for(i=0; i<2; i++){
3629             uint8_t ** ref2picture;
3630
3631             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3632                 ref2picture= ref_picture;
3633             }else{
3634                 ref2picture= s->current_picture_ptr->data;
3635             }
3636
3637             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3638                         0, 0, s->field_select[dir][i],
3639                         ref2picture, pix_op,
3640                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3641
3642             dest_y += 16*s->linesize;
3643             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3644             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3645         }
3646         break;
3647     case MV_TYPE_DMV:
3648         if(s->picture_structure == PICT_FRAME){
3649             for(i=0; i<2; i++){
3650                 int j;
3651                 for(j=0; j<2; j++){
3652                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3653                                 1, j, j^i,
3654                                 ref_picture, pix_op,
3655                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3656                 }
3657                 pix_op = s->dsp.avg_pixels_tab;
3658             }
3659         }else{
3660             for(i=0; i<2; i++){
3661                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3662                             0, 0, s->picture_structure != i+1,
3663                             ref_picture, pix_op,
3664                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3665
3666                 // after put we make avg of the same block
3667                 pix_op=s->dsp.avg_pixels_tab;
3668
3669                 //opposite parity is always in the same frame if this is second field
3670                 if(!s->first_field){
3671                     ref_picture = s->current_picture_ptr->data;
3672                 }
3673             }
3674         }
3675     break;
3676     default: assert(0);
3677     }
3678 }
3679
3680 /**
3681  * motion compensation of a single macroblock
3682  * @param s context
3683  * @param dest_y luma destination pointer
3684  * @param dest_cb chroma cb/u destination pointer
3685  * @param dest_cr chroma cr/v destination pointer
3686  * @param dir direction (0->forward, 1->backward)
3687  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3688  * @param pic_op halfpel motion compensation function (average or put normally)
3689  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3690  */
3691 static inline void MPV_motion_lowres(MpegEncContext *s,
3692                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3693                               int dir, uint8_t **ref_picture,
3694                               h264_chroma_mc_func *pix_op)
3695 {
3696     int mx, my;
3697     int mb_x, mb_y, i;
3698     const int lowres= s->avctx->lowres;
3699     const int block_s= 8>>lowres;
3700
3701     mb_x = s->mb_x;
3702     mb_y = s->mb_y;
3703
3704     switch(s->mv_type) {
3705     case MV_TYPE_16X16:
3706         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3707                     0, 0, 0,
3708                     ref_picture, pix_op,
3709                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3710         break;
3711     case MV_TYPE_8X8:
3712         mx = 0;
3713         my = 0;
3714             for(i=0;i<4;i++) {
3715                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3716                             ref_picture[0], 0, 0,
3717                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3718                             s->width, s->height, s->linesize,
3719                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3720                             block_s, block_s, pix_op,
3721                             s->mv[dir][i][0], s->mv[dir][i][1]);
3722
3723                 mx += s->mv[dir][i][0];
3724                 my += s->mv[dir][i][1];
3725             }
3726
3727         if(!(s->flags&CODEC_FLAG_GRAY))
3728             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3729         break;
3730     case MV_TYPE_FIELD:
3731         if (s->picture_structure == PICT_FRAME) {
3732             /* top field */
3733             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3734                         1, 0, s->field_select[dir][0],
3735                         ref_picture, pix_op,
3736                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3737             /* bottom field */
3738             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3739                         1, 1, s->field_select[dir][1],
3740                         ref_picture, pix_op,
3741                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3742         } else {
3743             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3744                 ref_picture= s->current_picture_ptr->data;
3745             }
3746
3747             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3748                         0, 0, s->field_select[dir][0],
3749                         ref_picture, pix_op,
3750                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3751         }
3752         break;
3753     case MV_TYPE_16X8:
3754         for(i=0; i<2; i++){
3755             uint8_t ** ref2picture;
3756
3757             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3758                 ref2picture= ref_picture;
3759             }else{
3760                 ref2picture= s->current_picture_ptr->data;
3761             }
3762
3763             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3764                         0, 0, s->field_select[dir][i],
3765                         ref2picture, pix_op,
3766                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3767
3768             dest_y += 2*block_s*s->linesize;
3769             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3770             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3771         }
3772         break;
3773     case MV_TYPE_DMV:
3774         if(s->picture_structure == PICT_FRAME){
3775             for(i=0; i<2; i++){
3776                 int j;
3777                 for(j=0; j<2; j++){
3778                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3779                                 1, j, j^i,
3780                                 ref_picture, pix_op,
3781                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3782                 }
3783                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3784             }
3785         }else{
3786             for(i=0; i<2; i++){
3787                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3788                             0, 0, s->picture_structure != i+1,
3789                             ref_picture, pix_op,
3790                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3791
3792                 // after put we make avg of the same block
3793                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3794
3795                 //opposite parity is always in the same frame if this is second field
3796                 if(!s->first_field){
3797                     ref_picture = s->current_picture_ptr->data;
3798                 }
3799             }
3800         }
3801     break;
3802     default: assert(0);
3803     }
3804 }
3805
3806 /* put block[] to dest[] */
3807 static inline void put_dct(MpegEncContext *s,
3808                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3809 {
3810     s->dct_unquantize_intra(s, block, i, qscale);
3811     s->dsp.idct_put (dest, line_size, block);
3812 }
3813
3814 /* add block[] to dest[] */
3815 static inline void add_dct(MpegEncContext *s,
3816                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3817 {
3818     if (s->block_last_index[i] >= 0) {
3819         s->dsp.idct_add (dest, line_size, block);
3820     }
3821 }
3822
3823 static inline void add_dequant_dct(MpegEncContext *s,
3824                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3825 {
3826     if (s->block_last_index[i] >= 0) {
3827         s->dct_unquantize_inter(s, block, i, qscale);
3828
3829         s->dsp.idct_add (dest, line_size, block);
3830     }
3831 }
3832
3833 /**
3834  * cleans dc, ac, coded_block for the current non intra MB
3835  */
3836 void ff_clean_intra_table_entries(MpegEncContext *s)
3837 {
3838     int wrap = s->b8_stride;
3839     int xy = s->block_index[0];
3840
3841     s->dc_val[0][xy           ] =
3842     s->dc_val[0][xy + 1       ] =
3843     s->dc_val[0][xy     + wrap] =
3844     s->dc_val[0][xy + 1 + wrap] = 1024;
3845     /* ac pred */
3846     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3847     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3848     if (s->msmpeg4_version>=3) {
3849         s->coded_block[xy           ] =
3850         s->coded_block[xy + 1       ] =
3851         s->coded_block[xy     + wrap] =
3852         s->coded_block[xy + 1 + wrap] = 0;
3853     }
3854     /* chroma */
3855     wrap = s->mb_stride;
3856     xy = s->mb_x + s->mb_y * wrap;
3857     s->dc_val[1][xy] =
3858     s->dc_val[2][xy] = 1024;
3859     /* ac pred */
3860     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3861     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3862
3863     s->mbintra_table[xy]= 0;
3864 }
3865
3866 /* generic function called after a macroblock has been parsed by the
3867    decoder or after it has been encoded by the encoder.
3868
3869    Important variables used:
3870    s->mb_intra : true if intra macroblock
3871    s->mv_dir   : motion vector direction
3872    s->mv_type  : motion vector type
3873    s->mv       : motion vector
3874    s->interlaced_dct : true if interlaced dct used (mpeg2)
3875  */
3876 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3877 {
3878     int mb_x, mb_y;
3879     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3880 #ifdef HAVE_XVMC
3881     if(s->avctx->xvmc_acceleration){
3882         XVMC_decode_mb(s);//xvmc uses pblocks
3883         return;
3884     }
3885 #endif
3886
3887     mb_x = s->mb_x;
3888     mb_y = s->mb_y;
3889
3890     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3891        /* save DCT coefficients */
3892        int i,j;
3893        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3894        for(i=0; i<6; i++)
3895            for(j=0; j<64; j++)
3896                *dct++ = block[i][s->dsp.idct_permutation[j]];
3897     }
3898
3899     s->current_picture.qscale_table[mb_xy]= s->qscale;
3900
3901     /* update DC predictors for P macroblocks */
3902     if (!s->mb_intra) {
3903         if (s->h263_pred || s->h263_aic) {
3904             if(s->mbintra_table[mb_xy])
3905                 ff_clean_intra_table_entries(s);
3906         } else {
3907             s->last_dc[0] =
3908             s->last_dc[1] =
3909             s->last_dc[2] = 128 << s->intra_dc_precision;
3910         }
3911     }
3912     else if (s->h263_pred || s->h263_aic)
3913         s->mbintra_table[mb_xy]=1;
3914
3915     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3916         uint8_t *dest_y, *dest_cb, *dest_cr;
3917         int dct_linesize, dct_offset;
3918         op_pixels_func (*op_pix)[4];
3919         qpel_mc_func (*op_qpix)[16];
3920         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3921         const int uvlinesize= s->current_picture.linesize[1];
3922         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3923         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3924
3925         /* avoid copy if macroblock skipped in last frame too */
3926         /* skip only during decoding as we might trash the buffers during encoding a bit */
3927         if(!s->encoding){
3928             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3929             const int age= s->current_picture.age;
3930
3931             assert(age);
3932
3933             if (s->mb_skipped) {
3934                 s->mb_skipped= 0;
3935                 assert(s->pict_type!=I_TYPE);
3936
3937                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3938                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3939
3940                 /* if previous was skipped too, then nothing to do !  */
3941                 if (*mbskip_ptr >= age && s->current_picture.reference){
3942                     return;
3943                 }
3944             } else if(!s->current_picture.reference){
3945                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3946                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3947             } else{
3948                 *mbskip_ptr = 0; /* not skipped */
3949             }
3950         }
3951
3952         dct_linesize = linesize << s->interlaced_dct;
3953         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3954
3955         if(readable){
3956             dest_y=  s->dest[0];
3957             dest_cb= s->dest[1];
3958             dest_cr= s->dest[2];
3959         }else{
3960             dest_y = s->b_scratchpad;
3961             dest_cb= s->b_scratchpad+16*linesize;
3962             dest_cr= s->b_scratchpad+32*linesize;
3963         }
3964
3965         if (!s->mb_intra) {
3966             /* motion handling */
3967             /* decoding or more than one mb_type (MC was already done otherwise) */
3968             if(!s->encoding){
3969                 if(lowres_flag){
3970                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3971
3972                     if (s->mv_dir & MV_DIR_FORWARD) {
3973                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3974                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3975                     }
3976                     if (s->mv_dir & MV_DIR_BACKWARD) {
3977                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3978                     }
3979                 }else{
3980                     op_qpix= s->me.qpel_put;
3981                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3982                         op_pix = s->dsp.put_pixels_tab;
3983                     }else{
3984                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3985                     }
3986                     if (s->mv_dir & MV_DIR_FORWARD) {
3987                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3988                         op_pix = s->dsp.avg_pixels_tab;
3989                         op_qpix= s->me.qpel_avg;
3990                     }
3991                     if (s->mv_dir & MV_DIR_BACKWARD) {
3992                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3993                     }
3994                 }
3995             }
3996
3997             /* skip dequant / idct if we are really late ;) */
3998             if(s->hurry_up>1) goto skip_idct;
3999             if(s->avctx->skip_idct){
4000                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
4001                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
4002                    || s->avctx->skip_idct >= AVDISCARD_ALL)
4003                     goto skip_idct;
4004             }
4005
4006             /* add dct residue */
4007             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
4008                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
4009                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4010                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4011                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4012                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4013
4014                 if(!(s->flags&CODEC_FLAG_GRAY)){
4015                     if (s->chroma_y_shift){
4016                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4017                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4018                     }else{
4019                         dct_linesize >>= 1;
4020                         dct_offset >>=1;
4021                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4022                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4023                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4024                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4025                     }
4026                 }
4027             } else if(s->codec_id != CODEC_ID_WMV2){
4028                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
4029                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
4030                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
4031                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
4032
4033                 if(!(s->flags&CODEC_FLAG_GRAY)){
4034                     if(s->chroma_y_shift){//Chroma420
4035                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
4036                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4037                     }else{
4038                         //chroma422
4039                         dct_linesize = uvlinesize << s->interlaced_dct;
4040                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4041
4042                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4043                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4044                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4045                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4046                         if(!s->chroma_x_shift){//Chroma444
4047                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4048                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4049                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4050                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4051                         }
4052                     }
4053                 }//fi gray
4054             }
4055             else{
4056                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4057             }
4058         } else {
4059             /* dct only in intra block */
4060             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4061                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4062                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4063                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4064                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4065
4066                 if(!(s->flags&CODEC_FLAG_GRAY)){
4067                     if(s->chroma_y_shift){
4068                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4069                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4070                     }else{
4071                         dct_offset >>=1;
4072                         dct_linesize >>=1;
4073                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4074                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4075                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4076                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4077                     }
4078                 }
4079             }else{
4080                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4081                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4082                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4083                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4084
4085                 if(!(s->flags&CODEC_FLAG_GRAY)){
4086                     if(s->chroma_y_shift){
4087                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4088                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4089                     }else{
4090
4091                         dct_linesize = uvlinesize << s->interlaced_dct;
4092                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4093
4094                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4095                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4096                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4097                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4098                         if(!s->chroma_x_shift){//Chroma444
4099                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4100                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4101                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4102                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4103                         }
4104                     }
4105                 }//gray
4106             }
4107         }
4108 skip_idct:
4109         if(!readable){
4110             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4111             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4112             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4113         }
4114     }
4115 }
4116
4117 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4118     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4119     else                  MPV_decode_mb_internal(s, block, 0);
4120 }
4121
4122 #ifdef CONFIG_ENCODERS
4123
4124 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4125 {
4126     static const char tab[64]=
4127         {3,2,2,1,1,1,1,1,
4128          1,1,1,1,1,1,1,1,
4129          1,1,1,1,1,1,1,1,
4130          0,0,0,0,0,0,0,0,
4131          0,0,0,0,0,0,0,0,
4132          0,0,0,0,0,0,0,0,
4133          0,0,0,0,0,0,0,0,
4134          0,0,0,0,0,0,0,0};
4135     int score=0;
4136     int run=0;
4137     int i;
4138     DCTELEM *block= s->block[n];
4139     const int last_index= s->block_last_index[n];
4140     int skip_dc;
4141
4142     if(threshold<0){
4143         skip_dc=0;
4144         threshold= -threshold;
4145     }else
4146         skip_dc=1;
4147
4148     /* are all which we could set to zero are allready zero? */
4149     if(last_index<=skip_dc - 1) return;
4150
4151     for(i=0; i<=last_index; i++){
4152         const int j = s->intra_scantable.permutated[i];
4153         const int level = FFABS(block[j]);
4154         if(level==1){
4155             if(skip_dc && i==0) continue;
4156             score+= tab[run];
4157             run=0;
4158         }else if(level>1){
4159             return;
4160         }else{
4161             run++;
4162         }
4163     }
4164     if(score >= threshold) return;
4165     for(i=skip_dc; i<=last_index; i++){
4166         const int j = s->intra_scantable.permutated[i];
4167         block[j]=0;
4168     }
4169     if(block[0]) s->block_last_index[n]= 0;
4170     else         s->block_last_index[n]= -1;
4171 }
4172
4173 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4174 {
4175     int i;
4176     const int maxlevel= s->max_qcoeff;
4177     const int minlevel= s->min_qcoeff;
4178     int overflow=0;
4179
4180     if(s->mb_intra){
4181         i=1; //skip clipping of intra dc
4182     }else
4183         i=0;
4184
4185     for(;i<=last_index; i++){
4186         const int j= s->intra_scantable.permutated[i];
4187         int level = block[j];
4188
4189         if     (level>maxlevel){
4190             level=maxlevel;
4191             overflow++;
4192         }else if(level<minlevel){
4193             level=minlevel;
4194             overflow++;
4195         }
4196
4197         block[j]= level;
4198     }
4199
4200     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4201         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4202 }
4203
4204 #endif //CONFIG_ENCODERS
4205
4206 /**
4207  *
4208  * @param h is the normal height, this will be reduced automatically if needed for the last row
4209  */
4210 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4211     if (s->avctx->draw_horiz_band) {
4212         AVFrame *src;
4213         int offset[4];
4214
4215         if(s->picture_structure != PICT_FRAME){
4216             h <<= 1;
4217             y <<= 1;
4218             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4219         }
4220
4221         h= FFMIN(h, s->avctx->height - y);
4222
4223         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4224             src= (AVFrame*)s->current_picture_ptr;
4225         else if(s->last_picture_ptr)
4226             src= (AVFrame*)s->last_picture_ptr;
4227         else
4228             return;
4229
4230         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4231             offset[0]=
4232             offset[1]=
4233             offset[2]=
4234             offset[3]= 0;
4235         }else{
4236             offset[0]= y * s->linesize;;
4237             offset[1]=
4238             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4239             offset[3]= 0;
4240         }
4241
4242         emms_c();
4243
4244         s->avctx->draw_horiz_band(s->avctx, src, offset,
4245                                   y, s->picture_structure, h);
4246     }
4247 }
4248
4249 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4250     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4251     const int uvlinesize= s->current_picture.linesize[1];
4252     const int mb_size= 4 - s->avctx->lowres;
4253
4254     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4255     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4256     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4257     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4258     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4259     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4260     //block_index is not used by mpeg2, so it is not affected by chroma_format
4261
4262     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4263     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4264     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4265
4266     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4267     {
4268         s->dest[0] += s->mb_y *   linesize << mb_size;
4269         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4270         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4271     }
4272 }
4273
4274 #ifdef CONFIG_ENCODERS
4275
4276 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4277     int x, y;
4278 //FIXME optimize
4279     for(y=0; y<8; y++){
4280         for(x=0; x<8; x++){
4281             int x2, y2;
4282             int sum=0;
4283             int sqr=0;
4284             int count=0;
4285
4286             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4287                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4288                     int v= ptr[x2 + y2*stride];
4289                     sum += v;
4290                     sqr += v*v;
4291                     count++;
4292                 }
4293             }
4294             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4295         }
4296     }
4297 }
4298
4299 static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4300 {
4301     int16_t weight[8][64];
4302     DCTELEM orig[8][64];
4303     const int mb_x= s->mb_x;
4304     const int mb_y= s->mb_y;
4305     int i;
4306     int skip_dct[8];
4307     int dct_offset   = s->linesize*8; //default for progressive frames
4308     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4309     int wrap_y, wrap_c;
4310
4311     for(i=0; i<mb_block_count; i++) skip_dct[i]=0;
4312
4313     if(s->adaptive_quant){
4314         const int last_qp= s->qscale;
4315         const int mb_xy= mb_x + mb_y*s->mb_stride;
4316
4317         s->lambda= s->lambda_table[mb_xy];
4318         update_qscale(s);
4319
4320         if(!(s->flags&CODEC_FLAG_QP_RD)){
4321             s->dquant= s->qscale - last_qp;
4322
4323             if(s->out_format==FMT_H263){
4324                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4325
4326                 if(s->codec_id==CODEC_ID_MPEG4){
4327                     if(!s->mb_intra){
4328                         if(s->pict_type == B_TYPE){
4329                             if(s->dquant&1)
4330                                 s->dquant= (s->dquant/2)*2;
4331                             if(s->mv_dir&MV_DIRECT)
4332                                 s->dquant= 0;
4333                         }
4334                         if(s->mv_type==MV_TYPE_8X8)
4335                             s->dquant=0;
4336                     }
4337                 }
4338             }
4339         }
4340         ff_set_qscale(s, last_qp + s->dquant);
4341     }else if(s->flags&CODEC_FLAG_QP_RD)
4342         ff_set_qscale(s, s->qscale + s->dquant);
4343
4344     wrap_y = s->linesize;
4345     wrap_c = s->uvlinesize;
4346     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4347     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4348     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4349
4350     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4351         uint8_t *ebuf= s->edge_emu_buffer + 32;
4352         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4353         ptr_y= ebuf;
4354         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4355         ptr_cb= ebuf+18*wrap_y;
4356         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4357         ptr_cr= ebuf+18*wrap_y+8;
4358     }
4359
4360     if (s->mb_intra) {
4361         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4362             int progressive_score, interlaced_score;
4363
4364             s->interlaced_dct=0;
4365             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4366                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4367
4368             if(progressive_score > 0){
4369                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4370                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4371                 if(progressive_score > interlaced_score){
4372                     s->interlaced_dct=1;
4373
4374                     dct_offset= wrap_y;
4375                     wrap_y<<=1;
4376                     if (s->chroma_format == CHROMA_422)
4377                         wrap_c<<=1;
4378                 }
4379             }
4380         }
4381
4382         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4383         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4384         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4385         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4386
4387         if(s->flags&CODEC_FLAG_GRAY){
4388             skip_dct[4]= 1;
4389             skip_dct[5]= 1;
4390         }else{
4391             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4392             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4393             if(!s->chroma_y_shift){ /* 422 */
4394                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4395                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4396             }
4397         }
4398     }else{
4399         op_pixels_func (*op_pix)[4];
4400         qpel_mc_func (*op_qpix)[16];
4401         uint8_t *dest_y, *dest_cb, *dest_cr;
4402
4403         dest_y  = s->dest[0];
4404         dest_cb = s->dest[1];
4405         dest_cr = s->dest[2];
4406
4407         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4408             op_pix = s->dsp.put_pixels_tab;
4409             op_qpix= s->dsp.put_qpel_pixels_tab;
4410         }else{
4411             op_pix = s->dsp.put_no_rnd_pixels_tab;
4412             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4413         }
4414
4415         if (s->mv_dir & MV_DIR_FORWARD) {
4416             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4417             op_pix = s->dsp.avg_pixels_tab;
4418             op_qpix= s->dsp.avg_qpel_pixels_tab;
4419         }
4420         if (s->mv_dir & MV_DIR_BACKWARD) {
4421             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4422         }
4423
4424         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4425             int progressive_score, interlaced_score;
4426
4427             s->interlaced_dct=0;
4428             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4429                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4430
4431             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4432
4433             if(progressive_score>0){
4434                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4435                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4436
4437                 if(progressive_score > interlaced_score){
4438                     s->interlaced_dct=1;
4439
4440                     dct_offset= wrap_y;
4441                     wrap_y<<=1;
4442                     if (s->chroma_format == CHROMA_422)
4443                         wrap_c<<=1;
4444                 }
4445             }
4446         }
4447
4448         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4449         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4450         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4451         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4452
4453         if(s->flags&CODEC_FLAG_GRAY){
4454             skip_dct[4]= 1;
4455             skip_dct[5]= 1;
4456         }else{
4457             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4458             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4459             if(!s->chroma_y_shift){ /* 422 */
4460                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4461                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4462             }
4463         }
4464         /* pre quantization */
4465         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4466             //FIXME optimize
4467             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4468             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4469             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4470             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4471             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4472             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4473             if(!s->chroma_y_shift){ /* 422 */
4474                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4475                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4476             }
4477         }
4478     }
4479
4480     if(s->avctx->quantizer_noise_shaping){
4481         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4482         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4483         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4484         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4485         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4486         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4487         if(!s->chroma_y_shift){ /* 422 */
4488             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4489             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4490         }
4491         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4492     }
4493
4494     /* DCT & quantize */
4495     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4496     {
4497         for(i=0;i<mb_block_count;i++) {
4498             if(!skip_dct[i]){
4499                 int overflow;
4500                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4501             // FIXME we could decide to change to quantizer instead of clipping
4502             // JS: I don't think that would be a good idea it could lower quality instead
4503             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4504                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4505             }else
4506                 s->block_last_index[i]= -1;
4507         }
4508         if(s->avctx->quantizer_noise_shaping){
4509             for(i=0;i<mb_block_count;i++) {
4510                 if(!skip_dct[i]){
4511                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4512                 }
4513             }
4514         }
4515
4516         if(s->luma_elim_threshold && !s->mb_intra)
4517             for(i=0; i<4; i++)
4518                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4519         if(s->chroma_elim_threshold && !s->mb_intra)
4520             for(i=4; i<mb_block_count; i++)
4521                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4522
4523         if(s->flags & CODEC_FLAG_CBP_RD){
4524             for(i=0;i<mb_block_count;i++) {
4525                 if(s->block_last_index[i] == -1)
4526                     s->coded_score[i]= INT_MAX/256;
4527             }
4528         }
4529     }
4530
4531     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4532         s->block_last_index[4]=
4533         s->block_last_index[5]= 0;
4534         s->block[4][0]=
4535         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4536     }
4537
4538     //non c quantize code returns incorrect block_last_index FIXME
4539     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4540         for(i=0; i<mb_block_count; i++){
4541             int j;
4542             if(s->block_last_index[i]>0){
4543                 for(j=63; j>0; j--){
4544                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4545                 }
4546                 s->block_last_index[i]= j;
4547             }
4548         }
4549     }
4550
4551     /* huffman encode */
4552     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4553     case CODEC_ID_MPEG1VIDEO:
4554     case CODEC_ID_MPEG2VIDEO:
4555         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4556     case CODEC_ID_MPEG4:
4557         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4558     case CODEC_ID_MSMPEG4V2:
4559     case CODEC_ID_MSMPEG4V3:
4560     case CODEC_ID_WMV1:
4561         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4562     case CODEC_ID_WMV2:
4563          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4564 #ifdef CONFIG_H261_ENCODER
4565     case CODEC_ID_H261:
4566         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4567 #endif
4568     case CODEC_ID_H263:
4569     case CODEC_ID_H263P:
4570     case CODEC_ID_FLV1:
4571     case CODEC_ID_RV10:
4572     case CODEC_ID_RV20:
4573         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4574     case CODEC_ID_MJPEG:
4575         mjpeg_encode_mb(s, s->block); break;
4576     default:
4577         assert(0);
4578     }
4579 }
4580
4581 static always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4582 {
4583     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4584     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4585 }
4586
4587 #endif //CONFIG_ENCODERS
4588
4589 void ff_mpeg_flush(AVCodecContext *avctx){
4590     int i;
4591     MpegEncContext *s = avctx->priv_data;
4592
4593     if(s==NULL || s->picture==NULL)
4594         return;
4595
4596     for(i=0; i<MAX_PICTURE_COUNT; i++){
4597        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4598                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4599         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4600     }
4601     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4602
4603     s->mb_x= s->mb_y= 0;
4604
4605     s->parse_context.state= -1;
4606     s->parse_context.frame_start_found= 0;
4607     s->parse_context.overread= 0;
4608     s->parse_context.overread_index= 0;
4609     s->parse_context.index= 0;
4610     s->parse_context.last_index= 0;
4611     s->bitstream_buffer_size=0;
4612 }
4613
4614 #ifdef CONFIG_ENCODERS
4615 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4616 {
4617     const uint16_t *srcw= (uint16_t*)src;
4618     int words= length>>4;
4619     int bits= length&15;
4620     int i;
4621
4622     if(length==0) return;
4623
4624     if(words < 16){
4625         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4626     }else if(put_bits_count(pb)&7){
4627         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4628     }else{
4629         for(i=0; put_bits_count(pb)&31; i++)
4630             put_bits(pb, 8, src[i]);
4631         flush_put_bits(pb);
4632         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4633         skip_put_bytes(pb, 2*words-i);
4634     }
4635
4636     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4637 }
4638
4639 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4640     int i;
4641
4642     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4643
4644     /* mpeg1 */
4645     d->mb_skip_run= s->mb_skip_run;
4646     for(i=0; i<3; i++)
4647         d->last_dc[i]= s->last_dc[i];
4648
4649     /* statistics */
4650     d->mv_bits= s->mv_bits;
4651     d->i_tex_bits= s->i_tex_bits;
4652     d->p_tex_bits= s->p_tex_bits;
4653     d->i_count= s->i_count;
4654     d->f_count= s->f_count;
4655     d->b_count= s->b_count;
4656     d->skip_count= s->skip_count;
4657     d->misc_bits= s->misc_bits;
4658     d->last_bits= 0;
4659
4660     d->mb_skipped= 0;
4661     d->qscale= s->qscale;
4662     d->dquant= s->dquant;
4663 }
4664
4665 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4666     int i;
4667
4668     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4669     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4670
4671     /* mpeg1 */
4672     d->mb_skip_run= s->mb_skip_run;
4673     for(i=0; i<3; i++)
4674         d->last_dc[i]= s->last_dc[i];
4675
4676     /* statistics */
4677     d->mv_bits= s->mv_bits;
4678     d->i_tex_bits= s->i_tex_bits;
4679     d->p_tex_bits= s->p_tex_bits;
4680     d->i_count= s->i_count;
4681     d->f_count= s->f_count;
4682     d->b_count= s->b_count;
4683     d->skip_count= s->skip_count;
4684     d->misc_bits= s->misc_bits;
4685
4686     d->mb_intra= s->mb_intra;
4687     d->mb_skipped= s->mb_skipped;
4688     d->mv_type= s->mv_type;
4689     d->mv_dir= s->mv_dir;
4690     d->pb= s->pb;
4691     if(s->data_partitioning){
4692         d->pb2= s->pb2;
4693         d->tex_pb= s->tex_pb;
4694     }
4695     d->block= s->block;
4696     for(i=0; i<8; i++)
4697         d->block_last_index[i]= s->block_last_index[i];
4698     d->interlaced_dct= s->interlaced_dct;
4699     d->qscale= s->qscale;
4700 }
4701
4702 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4703                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4704                            int *dmin, int *next_block, int motion_x, int motion_y)
4705 {
4706     int score;
4707     uint8_t *dest_backup[3];
4708
4709     copy_context_before_encode(s, backup, type);
4710
4711     s->block= s->blocks[*next_block];
4712     s->pb= pb[*next_block];
4713     if(s->data_partitioning){
4714         s->pb2   = pb2   [*next_block];
4715         s->tex_pb= tex_pb[*next_block];
4716     }
4717
4718     if(*next_block){
4719         memcpy(dest_backup, s->dest, sizeof(s->dest));
4720         s->dest[0] = s->rd_scratchpad;
4721         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4722         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4723         assert(s->linesize >= 32); //FIXME
4724     }
4725
4726     encode_mb(s, motion_x, motion_y);
4727
4728     score= put_bits_count(&s->pb);
4729     if(s->data_partitioning){
4730         score+= put_bits_count(&s->pb2);
4731         score+= put_bits_count(&s->tex_pb);
4732     }
4733
4734     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4735         MPV_decode_mb(s, s->block);
4736
4737         score *= s->lambda2;
4738         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4739     }
4740
4741     if(*next_block){
4742         memcpy(s->dest, dest_backup, sizeof(s->dest));
4743     }
4744
4745     if(score<*dmin){
4746         *dmin= score;
4747         *next_block^=1;
4748
4749         copy_context_after_encode(best, s, type);
4750     }
4751 }
4752
4753 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4754     uint32_t *sq = squareTbl + 256;
4755     int acc=0;
4756     int x,y;
4757
4758     if(w==16 && h==16)
4759         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4760     else if(w==8 && h==8)
4761         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4762
4763     for(y=0; y<h; y++){
4764         for(x=0; x<w; x++){
4765             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4766         }
4767     }
4768
4769     assert(acc>=0);
4770
4771     return acc;
4772 }
4773
4774 static int sse_mb(MpegEncContext *s){
4775     int w= 16;
4776     int h= 16;
4777
4778     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4779     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4780
4781     if(w==16 && h==16)
4782       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4783         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4784                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4785                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4786       }else{
4787         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4788                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4789                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4790       }
4791     else
4792         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4793                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4794                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4795 }
4796
4797 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4798     MpegEncContext *s= arg;
4799
4800
4801     s->me.pre_pass=1;
4802     s->me.dia_size= s->avctx->pre_dia_size;
4803     s->first_slice_line=1;
4804     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4805         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4806             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4807         }
4808         s->first_slice_line=0;
4809     }
4810
4811     s->me.pre_pass=0;
4812
4813     return 0;
4814 }
4815
4816 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4817     MpegEncContext *s= arg;
4818
4819     s->me.dia_size= s->avctx->dia_size;
4820     s->first_slice_line=1;
4821     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4822         s->mb_x=0; //for block init below
4823         ff_init_block_index(s);
4824         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4825             s->block_index[0]+=2;
4826             s->block_index[1]+=2;
4827             s->block_index[2]+=2;
4828             s->block_index[3]+=2;
4829
4830             /* compute motion vector & mb_type and store in context */
4831             if(s->pict_type==B_TYPE)
4832                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4833             else
4834                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4835         }
4836         s->first_slice_line=0;
4837     }
4838     return 0;
4839 }
4840
4841 static int mb_var_thread(AVCodecContext *c, void *arg){
4842     MpegEncContext *s= arg;
4843     int mb_x, mb_y;
4844
4845     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4846         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4847             int xx = mb_x * 16;
4848             int yy = mb_y * 16;
4849             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4850             int varc;
4851             int sum = s->dsp.pix_sum(pix, s->linesize);
4852
4853             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4854
4855             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4856             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4857             s->me.mb_var_sum_temp    += varc;
4858         }
4859     }
4860     return 0;
4861 }
4862
4863 static void write_slice_end(MpegEncContext *s){
4864     if(s->codec_id==CODEC_ID_MPEG4){
4865         if(s->partitioned_frame){
4866             ff_mpeg4_merge_partitions(s);
4867         }
4868
4869         ff_mpeg4_stuffing(&s->pb);
4870     }else if(s->out_format == FMT_MJPEG){
4871         ff_mjpeg_stuffing(&s->pb);
4872     }
4873
4874     align_put_bits(&s->pb);
4875     flush_put_bits(&s->pb);
4876
4877     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4878         s->misc_bits+= get_bits_diff(s);
4879 }
4880
4881 static int encode_thread(AVCodecContext *c, void *arg){
4882     MpegEncContext *s= arg;
4883     int mb_x, mb_y, pdif = 0;
4884     int i, j;
4885     MpegEncContext best_s, backup_s;
4886     uint8_t bit_buf[2][MAX_MB_BYTES];
4887     uint8_t bit_buf2[2][MAX_MB_BYTES];
4888     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4889     PutBitContext pb[2], pb2[2], tex_pb[2];
4890 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4891
4892     for(i=0; i<2; i++){
4893         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4894         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4895         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4896     }
4897
4898     s->last_bits= put_bits_count(&s->pb);
4899     s->mv_bits=0;
4900     s->misc_bits=0;
4901     s->i_tex_bits=0;
4902     s->p_tex_bits=0;
4903     s->i_count=0;
4904     s->f_count=0;
4905     s->b_count=0;
4906     s->skip_count=0;
4907
4908     for(i=0; i<3; i++){
4909         /* init last dc values */
4910         /* note: quant matrix value (8) is implied here */
4911         s->last_dc[i] = 128 << s->intra_dc_precision;
4912
4913         s->current_picture.error[i] = 0;
4914     }
4915     s->mb_skip_run = 0;
4916     memset(s->last_mv, 0, sizeof(s->last_mv));
4917
4918     s->last_mv_dir = 0;
4919
4920     switch(s->codec_id){
4921     case CODEC_ID_H263:
4922     case CODEC_ID_H263P:
4923     case CODEC_ID_FLV1:
4924         s->gob_index = ff_h263_get_gob_height(s);
4925         break;
4926     case CODEC_ID_MPEG4:
4927         if(s->partitioned_frame)
4928             ff_mpeg4_init_partitions(s);
4929         break;
4930     }
4931
4932     s->resync_mb_x=0;
4933     s->resync_mb_y=0;
4934     s->first_slice_line = 1;
4935     s->ptr_lastgob = s->pb.buf;
4936     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4937 //    printf("row %d at %X\n", s->mb_y, (int)s);
4938         s->mb_x=0;
4939         s->mb_y= mb_y;
4940
4941         ff_set_qscale(s, s->qscale);
4942         ff_init_block_index(s);
4943
4944         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4945             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4946             int mb_type= s->mb_type[xy];
4947 //            int d;
4948             int dmin= INT_MAX;
4949             int dir;
4950
4951             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4952                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4953                 return -1;
4954             }
4955             if(s->data_partitioning){
4956                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4957                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4958                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4959                     return -1;
4960                 }
4961             }
4962
4963             s->mb_x = mb_x;
4964             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4965             ff_update_block_index(s);
4966
4967 #ifdef CONFIG_H261_ENCODER
4968             if(s->codec_id == CODEC_ID_H261){
4969                 ff_h261_reorder_mb_index(s);
4970                 xy= s->mb_y*s->mb_stride + s->mb_x;
4971                 mb_type= s->mb_type[xy];
4972             }
4973 #endif
4974
4975             /* write gob / video packet header  */
4976             if(s->rtp_mode){
4977                 int current_packet_size, is_gob_start;
4978
4979                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4980
4981                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4982
4983                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4984
4985                 switch(s->codec_id){
4986                 case CODEC_ID_H263:
4987                 case CODEC_ID_H263P:
4988                     if(!s->h263_slice_structured)
4989                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4990                     break;
4991                 case CODEC_ID_MPEG2VIDEO:
4992                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4993                 case CODEC_ID_MPEG1VIDEO:
4994                     if(s->mb_skip_run) is_gob_start=0;
4995                     break;
4996                 }
4997
4998                 if(is_gob_start){
4999                     if(s->start_mb_y != mb_y || mb_x!=0){
5000                         write_slice_end(s);
5001
5002                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
5003                             ff_mpeg4_init_partitions(s);
5004                         }
5005                     }
5006
5007                     assert((put_bits_count(&s->pb)&7) == 0);
5008                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
5009
5010                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
5011                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
5012                         int d= 100 / s->avctx->error_rate;
5013                         if(r % d == 0){
5014                             current_packet_size=0;
5015 #ifndef ALT_BITSTREAM_WRITER
5016                             s->pb.buf_ptr= s->ptr_lastgob;
5017 #endif
5018                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
5019                         }
5020                     }
5021
5022                     if (s->avctx->rtp_callback){
5023                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
5024                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
5025                     }
5026
5027                     switch(s->codec_id){
5028                     case CODEC_ID_MPEG4:
5029                         ff_mpeg4_encode_video_packet_header(s);
5030                         ff_mpeg4_clean_buffers(s);
5031                     break;
5032                     case CODEC_ID_MPEG1VIDEO:
5033                     case CODEC_ID_MPEG2VIDEO:
5034                         ff_mpeg1_encode_slice_header(s);
5035                         ff_mpeg1_clean_buffers(s);
5036                     break;
5037                     case CODEC_ID_H263:
5038                     case CODEC_ID_H263P:
5039                         h263_encode_gob_header(s, mb_y);
5040                     break;
5041                     }
5042
5043                     if(s->flags&CODEC_FLAG_PASS1){
5044                         int bits= put_bits_count(&s->pb);
5045                         s->misc_bits+= bits - s->last_bits;
5046                         s->last_bits= bits;
5047                     }
5048
5049                     s->ptr_lastgob += current_packet_size;
5050                     s->first_slice_line=1;
5051                     s->resync_mb_x=mb_x;
5052                     s->resync_mb_y=mb_y;
5053                 }
5054             }
5055
5056             if(  (s->resync_mb_x   == s->mb_x)
5057                && s->resync_mb_y+1 == s->mb_y){
5058                 s->first_slice_line=0;
5059             }
5060
5061             s->mb_skipped=0;
5062             s->dquant=0; //only for QP_RD
5063
5064             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5065                 int next_block=0;
5066                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5067
5068                 copy_context_before_encode(&backup_s, s, -1);
5069                 backup_s.pb= s->pb;
5070                 best_s.data_partitioning= s->data_partitioning;
5071                 best_s.partitioned_frame= s->partitioned_frame;
5072                 if(s->data_partitioning){
5073                     backup_s.pb2= s->pb2;
5074                     backup_s.tex_pb= s->tex_pb;
5075                 }
5076
5077                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5078                     s->mv_dir = MV_DIR_FORWARD;
5079                     s->mv_type = MV_TYPE_16X16;
5080                     s->mb_intra= 0;
5081                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5082                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5083                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5084                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5085                 }
5086                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5087                     s->mv_dir = MV_DIR_FORWARD;
5088                     s->mv_type = MV_TYPE_FIELD;
5089                     s->mb_intra= 0;
5090                     for(i=0; i<2; i++){
5091                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5092                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5093                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5094                     }
5095                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5096                                  &dmin, &next_block, 0, 0);
5097                 }
5098                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5099                     s->mv_dir = MV_DIR_FORWARD;
5100                     s->mv_type = MV_TYPE_16X16;
5101                     s->mb_intra= 0;
5102                     s->mv[0][0][0] = 0;
5103                     s->mv[0][0][1] = 0;
5104                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5105                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5106                 }
5107                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5108                     s->mv_dir = MV_DIR_FORWARD;
5109                     s->mv_type = MV_TYPE_8X8;
5110                     s->mb_intra= 0;
5111                     for(i=0; i<4; i++){
5112                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5113                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5114                     }
5115                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5116                                  &dmin, &next_block, 0, 0);
5117                 }
5118                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5119                     s->mv_dir = MV_DIR_FORWARD;
5120                     s->mv_type = MV_TYPE_16X16;
5121                     s->mb_intra= 0;
5122                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5123                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5124                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5125                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5126                 }
5127                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5128                     s->mv_dir = MV_DIR_BACKWARD;
5129                     s->mv_type = MV_TYPE_16X16;
5130                     s->mb_intra= 0;
5131                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5132                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5133                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5134                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5135                 }
5136                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5137                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5138                     s->mv_type = MV_TYPE_16X16;
5139                     s->mb_intra= 0;
5140                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5141                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5142                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5143                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5144                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5145                                  &dmin, &next_block, 0, 0);
5146                 }
5147                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5148                     int mx= s->b_direct_mv_table[xy][0];
5149                     int my= s->b_direct_mv_table[xy][1];
5150
5151                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5152                     s->mb_intra= 0;
5153                     ff_mpeg4_set_direct_mv(s, mx, my);
5154                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5155                                  &dmin, &next_block, mx, my);
5156                 }
5157                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5158                     s->mv_dir = MV_DIR_FORWARD;
5159                     s->mv_type = MV_TYPE_FIELD;
5160                     s->mb_intra= 0;
5161                     for(i=0; i<2; i++){
5162                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5163                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5164                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5165                     }
5166                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5167                                  &dmin, &next_block, 0, 0);
5168                 }
5169                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5170                     s->mv_dir = MV_DIR_BACKWARD;
5171                     s->mv_type = MV_TYPE_FIELD;
5172                     s->mb_intra= 0;
5173                     for(i=0; i<2; i++){
5174                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5175                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5176                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5177                     }
5178                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5179                                  &dmin, &next_block, 0, 0);
5180                 }
5181                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5182                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5183                     s->mv_type = MV_TYPE_FIELD;
5184                     s->mb_intra= 0;
5185                     for(dir=0; dir<2; dir++){
5186                         for(i=0; i<2; i++){
5187                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5188                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5189                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5190                         }
5191                     }
5192                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5193                                  &dmin, &next_block, 0, 0);
5194                 }
5195                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5196                     s->mv_dir = 0;
5197                     s->mv_type = MV_TYPE_16X16;
5198                     s->mb_intra= 1;
5199                     s->mv[0][0][0] = 0;
5200                     s->mv[0][0][1] = 0;
5201                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5202                                  &dmin, &next_block, 0, 0);
5203                     if(s->h263_pred || s->h263_aic){
5204                         if(best_s.mb_intra)
5205                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5206                         else
5207                             ff_clean_intra_table_entries(s); //old mode?
5208                     }
5209                 }
5210
5211                 if(s->flags & CODEC_FLAG_QP_RD){
5212                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
5213                         const int last_qp= backup_s.qscale;
5214                         int dquant, dir, qp, dc[6];
5215                         DCTELEM ac[6][16];
5216                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5217
5218                         assert(backup_s.dquant == 0);
5219
5220                         //FIXME intra
5221                         s->mv_dir= best_s.mv_dir;
5222                         s->mv_type = MV_TYPE_16X16;
5223                         s->mb_intra= best_s.mb_intra;
5224                         s->mv[0][0][0] = best_s.mv[0][0][0];
5225                         s->mv[0][0][1] = best_s.mv[0][0][1];
5226                         s->mv[1][0][0] = best_s.mv[1][0][0];
5227                         s->mv[1][0][1] = best_s.mv[1][0][1];
5228
5229                         dir= s->pict_type == B_TYPE ? 2 : 1;
5230                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
5231                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
5232                             qp= last_qp + dquant;
5233                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5234                                 break;
5235                             backup_s.dquant= dquant;
5236                             if(s->mb_intra && s->dc_val[0]){
5237                                 for(i=0; i<6; i++){
5238                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5239                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5240                                 }
5241                             }
5242
5243                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5244                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5245                             if(best_s.qscale != qp){
5246                                 if(s->mb_intra && s->dc_val[0]){
5247                                     for(i=0; i<6; i++){
5248                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5249                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5250                                     }
5251                                 }
5252                                 if(dir > 0 && dquant==dir){
5253                                     dquant= 0;
5254                                     dir= -dir;
5255                                 }else
5256                                     break;
5257                             }
5258                         }
5259                         qp= best_s.qscale;
5260                         s->current_picture.qscale_table[xy]= qp;
5261                     }
5262                 }
5263
5264                 copy_context_after_encode(s, &best_s, -1);
5265
5266                 pb_bits_count= put_bits_count(&s->pb);
5267                 flush_put_bits(&s->pb);
5268                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5269                 s->pb= backup_s.pb;
5270
5271                 if(s->data_partitioning){
5272                     pb2_bits_count= put_bits_count(&s->pb2);
5273                     flush_put_bits(&s->pb2);
5274                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5275                     s->pb2= backup_s.pb2;
5276
5277                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5278                     flush_put_bits(&s->tex_pb);
5279                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5280                     s->tex_pb= backup_s.tex_pb;
5281                 }
5282                 s->last_bits= put_bits_count(&s->pb);
5283
5284                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5285                     ff_h263_update_motion_val(s);
5286
5287                 if(next_block==0){ //FIXME 16 vs linesize16
5288                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5289                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5290                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5291                 }
5292
5293                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5294                     MPV_decode_mb(s, s->block);
5295             } else {
5296                 int motion_x, motion_y;
5297                 s->mv_type=MV_TYPE_16X16;
5298                 // only one MB-Type possible
5299
5300                 switch(mb_type){
5301                 case CANDIDATE_MB_TYPE_INTRA:
5302                     s->mv_dir = 0;
5303                     s->mb_intra= 1;
5304                     motion_x= s->mv[0][0][0] = 0;
5305                     motion_y= s->mv[0][0][1] = 0;
5306                     break;
5307                 case CANDIDATE_MB_TYPE_INTER:
5308                     s->mv_dir = MV_DIR_FORWARD;
5309                     s->mb_intra= 0;
5310                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5311                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5312                     break;
5313                 case CANDIDATE_MB_TYPE_INTER_I:
5314                     s->mv_dir = MV_DIR_FORWARD;
5315                     s->mv_type = MV_TYPE_FIELD;
5316                     s->mb_intra= 0;
5317                     for(i=0; i<2; i++){
5318                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5319                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5320                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5321                     }
5322                     motion_x = motion_y = 0;
5323                     break;
5324                 case CANDIDATE_MB_TYPE_INTER4V:
5325                     s->mv_dir = MV_DIR_FORWARD;
5326                     s->mv_type = MV_TYPE_8X8;
5327                     s->mb_intra= 0;
5328                     for(i=0; i<4; i++){
5329                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5330                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5331                     }
5332                     motion_x= motion_y= 0;
5333                     break;
5334                 case CANDIDATE_MB_TYPE_DIRECT:
5335                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5336                     s->mb_intra= 0;
5337                     motion_x=s->b_direct_mv_table[xy][0];
5338                     motion_y=s->b_direct_mv_table[xy][1];
5339                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5340                     break;
5341                 case CANDIDATE_MB_TYPE_BIDIR:
5342                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5343                     s->mb_intra= 0;
5344                     motion_x=0;
5345                     motion_y=0;
5346                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5347                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5348                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5349                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5350                     break;
5351                 case CANDIDATE_MB_TYPE_BACKWARD:
5352                     s->mv_dir = MV_DIR_BACKWARD;
5353                     s->mb_intra= 0;
5354                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5355                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5356                     break;
5357                 case CANDIDATE_MB_TYPE_FORWARD:
5358                     s->mv_dir = MV_DIR_FORWARD;
5359                     s->mb_intra= 0;
5360                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5361                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5362 //                    printf(" %d %d ", motion_x, motion_y);
5363                     break;
5364                 case CANDIDATE_MB_TYPE_FORWARD_I:
5365                     s->mv_dir = MV_DIR_FORWARD;
5366                     s->mv_type = MV_TYPE_FIELD;
5367                     s->mb_intra= 0;
5368                     for(i=0; i<2; i++){
5369                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5370                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5371                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5372                     }
5373                     motion_x=motion_y=0;
5374                     break;
5375                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5376                     s->mv_dir = MV_DIR_BACKWARD;
5377                     s->mv_type = MV_TYPE_FIELD;
5378                     s->mb_intra= 0;
5379                     for(i=0; i<2; i++){
5380                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5381                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5382                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5383                     }
5384                     motion_x=motion_y=0;
5385                     break;
5386                 case CANDIDATE_MB_TYPE_BIDIR_I:
5387                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5388                     s->mv_type = MV_TYPE_FIELD;
5389                     s->mb_intra= 0;
5390                     for(dir=0; dir<2; dir++){
5391                         for(i=0; i<2; i++){
5392                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5393                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5394                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5395                         }
5396                     }
5397                     motion_x=motion_y=0;
5398                     break;
5399                 default:
5400                     motion_x=motion_y=0; //gcc warning fix
5401                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5402                 }
5403
5404                 encode_mb(s, motion_x, motion_y);
5405
5406                 // RAL: Update last macroblock type
5407                 s->last_mv_dir = s->mv_dir;
5408
5409                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5410                     ff_h263_update_motion_val(s);
5411
5412                 MPV_decode_mb(s, s->block);
5413             }
5414
5415             /* clean the MV table in IPS frames for direct mode in B frames */
5416             if(s->mb_intra /* && I,P,S_TYPE */){
5417                 s->p_mv_table[xy][0]=0;
5418                 s->p_mv_table[xy][1]=0;
5419             }
5420
5421             if(s->flags&CODEC_FLAG_PSNR){
5422                 int w= 16;
5423                 int h= 16;
5424
5425                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5426                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5427
5428                 s->current_picture.error[0] += sse(
5429                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5430                     s->dest[0], w, h, s->linesize);
5431                 s->current_picture.error[1] += sse(
5432                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5433                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5434                 s->current_picture.error[2] += sse(
5435                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5436                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5437             }
5438             if(s->loop_filter){
5439                 if(s->out_format == FMT_H263)
5440                     ff_h263_loop_filter(s);
5441             }
5442 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5443         }
5444     }
5445
5446     //not beautiful here but we must write it before flushing so it has to be here
5447     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5448         msmpeg4_encode_ext_header(s);
5449
5450     write_slice_end(s);
5451
5452     /* Send the last GOB if RTP */
5453     if (s->avctx->rtp_callback) {
5454         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5455         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5456         /* Call the RTP callback to send the last GOB */
5457         emms_c();
5458         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5459     }
5460
5461     return 0;
5462 }
5463
5464 #define MERGE(field) dst->field += src->field; src->field=0
5465 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5466     MERGE(me.scene_change_score);
5467     MERGE(me.mc_mb_var_sum_temp);
5468     MERGE(me.mb_var_sum_temp);
5469 }
5470
5471 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5472     int i;
5473
5474     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5475     MERGE(dct_count[1]);
5476     MERGE(mv_bits);
5477     MERGE(i_tex_bits);
5478     MERGE(p_tex_bits);
5479     MERGE(i_count);
5480     MERGE(f_count);
5481     MERGE(b_count);
5482     MERGE(skip_count);
5483     MERGE(misc_bits);
5484     MERGE(error_count);
5485     MERGE(padding_bug_score);
5486     MERGE(current_picture.error[0]);
5487     MERGE(current_picture.error[1]);
5488     MERGE(current_picture.error[2]);
5489
5490     if(dst->avctx->noise_reduction){
5491         for(i=0; i<64; i++){
5492             MERGE(dct_error_sum[0][i]);
5493             MERGE(dct_error_sum[1][i]);
5494         }
5495     }
5496
5497     assert(put_bits_count(&src->pb) % 8 ==0);
5498     assert(put_bits_count(&dst->pb) % 8 ==0);
5499     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5500     flush_put_bits(&dst->pb);
5501 }
5502
5503 static int estimate_qp(MpegEncContext *s, int dry_run){
5504     if (s->next_lambda){
5505         s->current_picture_ptr->quality=
5506         s->current_picture.quality = s->next_lambda;
5507         if(!dry_run) s->next_lambda= 0;
5508     } else if (!s->fixed_qscale) {
5509         s->current_picture_ptr->quality=
5510         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5511         if (s->current_picture.quality < 0)
5512             return -1;
5513     }
5514
5515     if(s->adaptive_quant){
5516         switch(s->codec_id){
5517         case CODEC_ID_MPEG4:
5518             ff_clean_mpeg4_qscales(s);
5519             break;
5520         case CODEC_ID_H263:
5521         case CODEC_ID_H263P:
5522         case CODEC_ID_FLV1:
5523             ff_clean_h263_qscales(s);
5524             break;
5525         }
5526
5527         s->lambda= s->lambda_table[0];
5528         //FIXME broken
5529     }else
5530         s->lambda= s->current_picture.quality;
5531 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5532     update_qscale(s);
5533     return 0;
5534 }
5535
5536 static int encode_picture(MpegEncContext *s, int picture_number)
5537 {
5538     int i;
5539     int bits;
5540
5541     s->picture_number = picture_number;
5542
5543     /* Reset the average MB variance */
5544     s->me.mb_var_sum_temp    =
5545     s->me.mc_mb_var_sum_temp = 0;
5546
5547     /* we need to initialize some time vars before we can encode b-frames */
5548     // RAL: Condition added for MPEG1VIDEO
5549     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5550         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5551
5552     s->me.scene_change_score=0;
5553
5554 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5555
5556     if(s->pict_type==I_TYPE){
5557         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5558         else                        s->no_rounding=0;
5559     }else if(s->pict_type!=B_TYPE){
5560         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5561             s->no_rounding ^= 1;
5562     }
5563
5564     if(s->flags & CODEC_FLAG_PASS2){
5565         if (estimate_qp(s,1) < 0)
5566             return -1;
5567         ff_get_2pass_fcode(s);
5568     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5569         if(s->pict_type==B_TYPE)
5570             s->lambda= s->last_lambda_for[s->pict_type];
5571         else
5572             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5573         update_qscale(s);
5574     }
5575
5576     s->mb_intra=0; //for the rate distortion & bit compare functions
5577     for(i=1; i<s->avctx->thread_count; i++){
5578         ff_update_duplicate_context(s->thread_context[i], s);
5579     }
5580
5581     ff_init_me(s);
5582
5583     /* Estimate motion for every MB */
5584     if(s->pict_type != I_TYPE){
5585         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5586         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
5587         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5588             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5589                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5590             }
5591         }
5592
5593         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5594     }else /* if(s->pict_type == I_TYPE) */{
5595         /* I-Frame */
5596         for(i=0; i<s->mb_stride*s->mb_height; i++)
5597             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5598
5599         if(!s->fixed_qscale){
5600             /* finding spatial complexity for I-frame rate control */
5601             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5602         }
5603     }
5604     for(i=1; i<s->avctx->thread_count; i++){
5605         merge_context_after_me(s, s->thread_context[i]);
5606     }
5607     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5608     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5609     emms_c();
5610
5611     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5612         s->pict_type= I_TYPE;
5613         for(i=0; i<s->mb_stride*s->mb_height; i++)
5614             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5615 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5616     }
5617
5618     if(!s->umvplus){
5619         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5620             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5621
5622             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5623                 int a,b;
5624                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5625                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5626                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5627             }
5628
5629             ff_fix_long_p_mvs(s);
5630             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5631             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5632                 int j;
5633                 for(i=0; i<2; i++){
5634                     for(j=0; j<2; j++)
5635                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5636                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5637                 }
5638             }
5639         }
5640
5641         if(s->pict_type==B_TYPE){
5642             int a, b;
5643
5644             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5645             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5646             s->f_code = FFMAX(a, b);
5647
5648             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5649             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5650             s->b_code = FFMAX(a, b);
5651
5652             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5653             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5654             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5655             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5656             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5657                 int dir, j;
5658                 for(dir=0; dir<2; dir++){
5659                     for(i=0; i<2; i++){
5660                         for(j=0; j<2; j++){
5661                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5662                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5663                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5664                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5665                         }
5666                     }
5667                 }
5668             }
5669         }
5670     }
5671
5672     if (estimate_qp(s, 0) < 0)
5673         return -1;
5674
5675     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5676         s->qscale= 3; //reduce clipping problems
5677
5678     if (s->out_format == FMT_MJPEG) {
5679         /* for mjpeg, we do include qscale in the matrix */
5680         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5681         for(i=1;i<64;i++){
5682             int j= s->dsp.idct_permutation[i];
5683
5684             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5685         }
5686         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5687                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5688         s->qscale= 8;
5689     }
5690
5691     //FIXME var duplication
5692     s->current_picture_ptr->key_frame=
5693     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5694     s->current_picture_ptr->pict_type=
5695     s->current_picture.pict_type= s->pict_type;
5696
5697     if(s->current_picture.key_frame)
5698         s->picture_in_gop_number=0;
5699
5700     s->last_bits= put_bits_count(&s->pb);
5701     switch(s->out_format) {
5702     case FMT_MJPEG:
5703         mjpeg_picture_header(s);
5704         break;
5705 #ifdef CONFIG_H261_ENCODER
5706     case FMT_H261:
5707         ff_h261_encode_picture_header(s, picture_number);
5708         break;
5709 #endif
5710     case FMT_H263:
5711         if (s->codec_id == CODEC_ID_WMV2)
5712             ff_wmv2_encode_picture_header(s, picture_number);
5713         else if (s->h263_msmpeg4)
5714             msmpeg4_encode_picture_header(s, picture_number);
5715         else if (s->h263_pred)
5716             mpeg4_encode_picture_header(s, picture_number);
5717 #ifdef CONFIG_RV10_ENCODER
5718         else if (s->codec_id == CODEC_ID_RV10)
5719             rv10_encode_picture_header(s, picture_number);
5720 #endif
5721 #ifdef CONFIG_RV20_ENCODER
5722         else if (s->codec_id == CODEC_ID_RV20)
5723             rv20_encode_picture_header(s, picture_number);
5724 #endif
5725         else if (s->codec_id == CODEC_ID_FLV1)
5726             ff_flv_encode_picture_header(s, picture_number);
5727         else
5728             h263_encode_picture_header(s, picture_number);
5729         break;
5730     case FMT_MPEG1:
5731         mpeg1_encode_picture_header(s, picture_number);
5732         break;
5733     case FMT_H264:
5734         break;
5735     default:
5736         assert(0);
5737     }
5738     bits= put_bits_count(&s->pb);
5739     s->header_bits= bits - s->last_bits;
5740
5741     for(i=1; i<s->avctx->thread_count; i++){
5742         update_duplicate_context_after_me(s->thread_context[i], s);
5743     }
5744     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5745     for(i=1; i<s->avctx->thread_count; i++){
5746         merge_context_after_encode(s, s->thread_context[i]);
5747     }
5748     emms_c();
5749     return 0;
5750 }
5751
5752 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5753     const int intra= s->mb_intra;
5754     int i;
5755
5756     s->dct_count[intra]++;
5757
5758     for(i=0; i<64; i++){
5759         int level= block[i];
5760
5761         if(level){
5762             if(level>0){
5763                 s->dct_error_sum[intra][i] += level;
5764                 level -= s->dct_offset[intra][i];
5765                 if(level<0) level=0;
5766             }else{
5767                 s->dct_error_sum[intra][i] -= level;
5768                 level += s->dct_offset[intra][i];
5769                 if(level>0) level=0;
5770             }
5771             block[i]= level;
5772         }
5773     }
5774 }
5775
5776 static int dct_quantize_trellis_c(MpegEncContext *s,
5777                         DCTELEM *block, int n,
5778                         int qscale, int *overflow){
5779     const int *qmat;
5780     const uint8_t *scantable= s->intra_scantable.scantable;
5781     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5782     int max=0;
5783     unsigned int threshold1, threshold2;
5784     int bias=0;
5785     int run_tab[65];
5786     int level_tab[65];
5787     int score_tab[65];
5788     int survivor[65];
5789     int survivor_count;
5790     int last_run=0;
5791     int last_level=0;
5792     int last_score= 0;
5793     int last_i;
5794     int coeff[2][64];
5795     int coeff_count[64];
5796     int qmul, qadd, start_i, last_non_zero, i, dc;
5797     const int esc_length= s->ac_esc_length;
5798     uint8_t * length;
5799     uint8_t * last_length;
5800     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5801
5802     s->dsp.fdct (block);
5803
5804     if(s->dct_error_sum)
5805         s->denoise_dct(s, block);
5806     qmul= qscale*16;
5807     qadd= ((qscale-1)|1)*8;
5808
5809     if (s->mb_intra) {
5810         int q;
5811         if (!s->h263_aic) {
5812             if (n < 4)
5813                 q = s->y_dc_scale;
5814             else
5815                 q = s->c_dc_scale;
5816             q = q << 3;
5817         } else{
5818             /* For AIC we skip quant/dequant of INTRADC */
5819             q = 1 << 3;
5820             qadd=0;
5821         }
5822
5823         /* note: block[0] is assumed to be positive */
5824         block[0] = (block[0] + (q >> 1)) / q;
5825         start_i = 1;
5826         last_non_zero = 0;
5827         qmat = s->q_intra_matrix[qscale];
5828         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5829             bias= 1<<(QMAT_SHIFT-1);
5830         length     = s->intra_ac_vlc_length;
5831         last_length= s->intra_ac_vlc_last_length;
5832     } else {
5833         start_i = 0;
5834         last_non_zero = -1;
5835         qmat = s->q_inter_matrix[qscale];
5836         length     = s->inter_ac_vlc_length;
5837         last_length= s->inter_ac_vlc_last_length;
5838     }
5839     last_i= start_i;
5840
5841     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5842     threshold2= (threshold1<<1);
5843
5844     for(i=63; i>=start_i; i--) {
5845         const int j = scantable[i];
5846         int level = block[j] * qmat[j];
5847
5848         if(((unsigned)(level+threshold1))>threshold2){
5849             last_non_zero = i;
5850             break;
5851         }
5852     }
5853
5854     for(i=start_i; i<=last_non_zero; i++) {
5855         const int j = scantable[i];
5856         int level = block[j] * qmat[j];
5857
5858 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5859 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5860         if(((unsigned)(level+threshold1))>threshold2){
5861             if(level>0){
5862                 level= (bias + level)>>QMAT_SHIFT;
5863                 coeff[0][i]= level;
5864                 coeff[1][i]= level-1;
5865 //                coeff[2][k]= level-2;
5866             }else{
5867                 level= (bias - level)>>QMAT_SHIFT;
5868                 coeff[0][i]= -level;
5869                 coeff[1][i]= -level+1;
5870 //                coeff[2][k]= -level+2;
5871             }
5872             coeff_count[i]= FFMIN(level, 2);
5873             assert(coeff_count[i]);
5874             max |=level;
5875         }else{
5876             coeff[0][i]= (level>>31)|1;
5877             coeff_count[i]= 1;
5878         }
5879     }
5880
5881     *overflow= s->max_qcoeff < max; //overflow might have happened
5882
5883     if(last_non_zero < start_i){
5884         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5885         return last_non_zero;
5886     }
5887
5888     score_tab[start_i]= 0;
5889     survivor[0]= start_i;
5890     survivor_count= 1;
5891
5892     for(i=start_i; i<=last_non_zero; i++){
5893         int level_index, j;
5894         const int dct_coeff= FFABS(block[ scantable[i] ]);
5895         const int zero_distoration= dct_coeff*dct_coeff;
5896         int best_score=256*256*256*120;
5897         for(level_index=0; level_index < coeff_count[i]; level_index++){
5898             int distoration;
5899             int level= coeff[level_index][i];
5900             const int alevel= FFABS(level);
5901             int unquant_coeff;
5902
5903             assert(level);
5904
5905             if(s->out_format == FMT_H263){
5906                 unquant_coeff= alevel*qmul + qadd;
5907             }else{ //MPEG1
5908                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5909                 if(s->mb_intra){
5910                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5911                         unquant_coeff =   (unquant_coeff - 1) | 1;
5912                 }else{
5913                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5914                         unquant_coeff =   (unquant_coeff - 1) | 1;
5915                 }
5916                 unquant_coeff<<= 3;
5917             }
5918
5919             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5920             level+=64;
5921             if((level&(~127)) == 0){
5922                 for(j=survivor_count-1; j>=0; j--){
5923                     int run= i - survivor[j];
5924                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5925                     score += score_tab[i-run];
5926
5927                     if(score < best_score){
5928                         best_score= score;
5929                         run_tab[i+1]= run;
5930                         level_tab[i+1]= level-64;
5931                     }
5932                 }
5933
5934                 if(s->out_format == FMT_H263){
5935                     for(j=survivor_count-1; j>=0; j--){
5936                         int run= i - survivor[j];
5937                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5938                         score += score_tab[i-run];
5939                         if(score < last_score){
5940                             last_score= score;
5941                             last_run= run;
5942                             last_level= level-64;
5943                             last_i= i+1;
5944                         }
5945                     }
5946                 }
5947             }else{
5948                 distoration += esc_length*lambda;
5949                 for(j=survivor_count-1; j>=0; j--){
5950                     int run= i - survivor[j];
5951                     int score= distoration + score_tab[i-run];
5952
5953                     if(score < best_score){
5954                         best_score= score;
5955                         run_tab[i+1]= run;
5956                         level_tab[i+1]= level-64;
5957                     }
5958                 }
5959
5960                 if(s->out_format == FMT_H263){
5961                   for(j=survivor_count-1; j>=0; j--){
5962                         int run= i - survivor[j];
5963                         int score= distoration + score_tab[i-run];
5964                         if(score < last_score){
5965                             last_score= score;
5966                             last_run= run;
5967                             last_level= level-64;
5968                             last_i= i+1;
5969                         }
5970                     }
5971                 }
5972             }
5973         }
5974
5975         score_tab[i+1]= best_score;
5976
5977         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5978         if(last_non_zero <= 27){
5979             for(; survivor_count; survivor_count--){
5980                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5981                     break;
5982             }
5983         }else{
5984             for(; survivor_count; survivor_count--){
5985                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5986                     break;
5987             }
5988         }
5989
5990         survivor[ survivor_count++ ]= i+1;
5991     }
5992
5993     if(s->out_format != FMT_H263){
5994         last_score= 256*256*256*120;
5995         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5996             int score= score_tab[i];
5997             if(i) score += lambda*2; //FIXME exacter?
5998
5999             if(score < last_score){
6000                 last_score= score;
6001                 last_i= i;
6002                 last_level= level_tab[i];
6003                 last_run= run_tab[i];
6004             }
6005         }
6006     }
6007
6008     s->coded_score[n] = last_score;
6009
6010     dc= FFABS(block[0]);
6011     last_non_zero= last_i - 1;
6012     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
6013
6014     if(last_non_zero < start_i)
6015         return last_non_zero;
6016
6017     if(last_non_zero == 0 && start_i == 0){
6018         int best_level= 0;
6019         int best_score= dc * dc;
6020
6021         for(i=0; i<coeff_count[0]; i++){
6022             int level= coeff[i][0];
6023             int alevel= FFABS(level);
6024             int unquant_coeff, score, distortion;
6025
6026             if(s->out_format == FMT_H263){
6027                     unquant_coeff= (alevel*qmul + qadd)>>3;
6028             }else{ //MPEG1
6029                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
6030                     unquant_coeff =   (unquant_coeff - 1) | 1;
6031             }
6032             unquant_coeff = (unquant_coeff + 4) >> 3;
6033             unquant_coeff<<= 3 + 3;
6034
6035             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
6036             level+=64;
6037             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
6038             else                    score= distortion + esc_length*lambda;
6039
6040             if(score < best_score){
6041                 best_score= score;
6042                 best_level= level - 64;
6043             }
6044         }
6045         block[0]= best_level;
6046         s->coded_score[n] = best_score - dc*dc;
6047         if(best_level == 0) return -1;
6048         else                return last_non_zero;
6049     }
6050
6051     i= last_i;
6052     assert(last_level);
6053
6054     block[ perm_scantable[last_non_zero] ]= last_level;
6055     i -= last_run + 1;
6056
6057     for(; i>start_i; i -= run_tab[i] + 1){
6058         block[ perm_scantable[i-1] ]= level_tab[i];
6059     }
6060
6061     return last_non_zero;
6062 }
6063
6064 //#define REFINE_STATS 1
6065 static int16_t basis[64][64];
6066
6067 static void build_basis(uint8_t *perm){
6068     int i, j, x, y;
6069     emms_c();
6070     for(i=0; i<8; i++){
6071         for(j=0; j<8; j++){
6072             for(y=0; y<8; y++){
6073                 for(x=0; x<8; x++){
6074                     double s= 0.25*(1<<BASIS_SHIFT);
6075                     int index= 8*i + j;
6076                     int perm_index= perm[index];
6077                     if(i==0) s*= sqrt(0.5);
6078                     if(j==0) s*= sqrt(0.5);
6079                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6080                 }
6081             }
6082         }
6083     }
6084 }
6085
6086 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6087                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6088                         int n, int qscale){
6089     int16_t rem[64];
6090     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6091     const int *qmat;
6092     const uint8_t *scantable= s->intra_scantable.scantable;
6093     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6094 //    unsigned int threshold1, threshold2;
6095 //    int bias=0;
6096     int run_tab[65];
6097     int prev_run=0;
6098     int prev_level=0;
6099     int qmul, qadd, start_i, last_non_zero, i, dc;
6100     uint8_t * length;
6101     uint8_t * last_length;
6102     int lambda;
6103     int rle_index, run, q, sum;
6104 #ifdef REFINE_STATS
6105 static int count=0;
6106 static int after_last=0;
6107 static int to_zero=0;
6108 static int from_zero=0;
6109 static int raise=0;
6110 static int lower=0;
6111 static int messed_sign=0;
6112 #endif
6113
6114     if(basis[0][0] == 0)
6115         build_basis(s->dsp.idct_permutation);
6116
6117     qmul= qscale*2;
6118     qadd= (qscale-1)|1;
6119     if (s->mb_intra) {
6120         if (!s->h263_aic) {
6121             if (n < 4)
6122                 q = s->y_dc_scale;
6123             else
6124                 q = s->c_dc_scale;
6125         } else{
6126             /* For AIC we skip quant/dequant of INTRADC */
6127             q = 1;
6128             qadd=0;
6129         }
6130         q <<= RECON_SHIFT-3;
6131         /* note: block[0] is assumed to be positive */
6132         dc= block[0]*q;
6133 //        block[0] = (block[0] + (q >> 1)) / q;
6134         start_i = 1;
6135         qmat = s->q_intra_matrix[qscale];
6136 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6137 //            bias= 1<<(QMAT_SHIFT-1);
6138         length     = s->intra_ac_vlc_length;
6139         last_length= s->intra_ac_vlc_last_length;
6140     } else {
6141         dc= 0;
6142         start_i = 0;
6143         qmat = s->q_inter_matrix[qscale];
6144         length     = s->inter_ac_vlc_length;
6145         last_length= s->inter_ac_vlc_last_length;
6146     }
6147     last_non_zero = s->block_last_index[n];
6148
6149 #ifdef REFINE_STATS
6150 {START_TIMER
6151 #endif
6152     dc += (1<<(RECON_SHIFT-1));
6153     for(i=0; i<64; i++){
6154         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6155     }
6156 #ifdef REFINE_STATS
6157 STOP_TIMER("memset rem[]")}
6158 #endif
6159     sum=0;
6160     for(i=0; i<64; i++){
6161         int one= 36;
6162         int qns=4;
6163         int w;
6164
6165         w= FFABS(weight[i]) + qns*one;
6166         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6167
6168         weight[i] = w;
6169 //        w=weight[i] = (63*qns + (w/2)) / w;
6170
6171         assert(w>0);
6172         assert(w<(1<<6));
6173         sum += w*w;
6174     }
6175     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6176 #ifdef REFINE_STATS
6177 {START_TIMER
6178 #endif
6179     run=0;
6180     rle_index=0;
6181     for(i=start_i; i<=last_non_zero; i++){
6182         int j= perm_scantable[i];
6183         const int level= block[j];
6184         int coeff;
6185
6186         if(level){
6187             if(level<0) coeff= qmul*level - qadd;
6188             else        coeff= qmul*level + qadd;
6189             run_tab[rle_index++]=run;
6190             run=0;
6191
6192             s->dsp.add_8x8basis(rem, basis[j], coeff);
6193         }else{
6194             run++;
6195         }
6196     }
6197 #ifdef REFINE_STATS
6198 if(last_non_zero>0){
6199 STOP_TIMER("init rem[]")
6200 }
6201 }
6202
6203 {START_TIMER
6204 #endif
6205     for(;;){
6206         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6207         int best_coeff=0;
6208         int best_change=0;
6209         int run2, best_unquant_change=0, analyze_gradient;
6210 #ifdef REFINE_STATS
6211 {START_TIMER
6212 #endif
6213         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6214
6215         if(analyze_gradient){
6216 #ifdef REFINE_STATS
6217 {START_TIMER
6218 #endif
6219             for(i=0; i<64; i++){
6220                 int w= weight[i];
6221
6222                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6223             }
6224 #ifdef REFINE_STATS
6225 STOP_TIMER("rem*w*w")}
6226 {START_TIMER
6227 #endif
6228             s->dsp.fdct(d1);
6229 #ifdef REFINE_STATS
6230 STOP_TIMER("dct")}
6231 #endif
6232         }
6233
6234         if(start_i){
6235             const int level= block[0];
6236             int change, old_coeff;
6237
6238             assert(s->mb_intra);
6239
6240             old_coeff= q*level;
6241
6242             for(change=-1; change<=1; change+=2){
6243                 int new_level= level + change;
6244                 int score, new_coeff;
6245
6246                 new_coeff= q*new_level;
6247                 if(new_coeff >= 2048 || new_coeff < 0)
6248                     continue;
6249
6250                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6251                 if(score<best_score){
6252                     best_score= score;
6253                     best_coeff= 0;
6254                     best_change= change;
6255                     best_unquant_change= new_coeff - old_coeff;
6256                 }
6257             }
6258         }
6259
6260         run=0;
6261         rle_index=0;
6262         run2= run_tab[rle_index++];
6263         prev_level=0;
6264         prev_run=0;
6265
6266         for(i=start_i; i<64; i++){
6267             int j= perm_scantable[i];
6268             const int level= block[j];
6269             int change, old_coeff;
6270
6271             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6272                 break;
6273
6274             if(level){
6275                 if(level<0) old_coeff= qmul*level - qadd;
6276                 else        old_coeff= qmul*level + qadd;
6277                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6278             }else{
6279                 old_coeff=0;
6280                 run2--;
6281                 assert(run2>=0 || i >= last_non_zero );
6282             }
6283
6284             for(change=-1; change<=1; change+=2){
6285                 int new_level= level + change;
6286                 int score, new_coeff, unquant_change;
6287
6288                 score=0;
6289                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
6290                    continue;
6291
6292                 if(new_level){
6293                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6294                     else            new_coeff= qmul*new_level + qadd;
6295                     if(new_coeff >= 2048 || new_coeff <= -2048)
6296                         continue;
6297                     //FIXME check for overflow
6298
6299                     if(level){
6300                         if(level < 63 && level > -63){
6301                             if(i < last_non_zero)
6302                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6303                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6304                             else
6305                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6306                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6307                         }
6308                     }else{
6309                         assert(FFABS(new_level)==1);
6310
6311                         if(analyze_gradient){
6312                             int g= d1[ scantable[i] ];
6313                             if(g && (g^new_level) >= 0)
6314                                 continue;
6315                         }
6316
6317                         if(i < last_non_zero){
6318                             int next_i= i + run2 + 1;
6319                             int next_level= block[ perm_scantable[next_i] ] + 64;
6320
6321                             if(next_level&(~127))
6322                                 next_level= 0;
6323
6324                             if(next_i < last_non_zero)
6325                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6326                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6327                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6328                             else
6329                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6330                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6331                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6332                         }else{
6333                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6334                             if(prev_level){
6335                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6336                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6337                             }
6338                         }
6339                     }
6340                 }else{
6341                     new_coeff=0;
6342                     assert(FFABS(level)==1);
6343
6344                     if(i < last_non_zero){
6345                         int next_i= i + run2 + 1;
6346                         int next_level= block[ perm_scantable[next_i] ] + 64;
6347
6348                         if(next_level&(~127))
6349                             next_level= 0;
6350
6351                         if(next_i < last_non_zero)
6352                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6353                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6354                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6355                         else
6356                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6357                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6358                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6359                     }else{
6360                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6361                         if(prev_level){
6362                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6363                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6364                         }
6365                     }
6366                 }
6367
6368                 score *= lambda;
6369
6370                 unquant_change= new_coeff - old_coeff;
6371                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6372
6373                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6374                 if(score<best_score){
6375                     best_score= score;
6376                     best_coeff= i;
6377                     best_change= change;
6378                     best_unquant_change= unquant_change;
6379                 }
6380             }
6381             if(level){
6382                 prev_level= level + 64;
6383                 if(prev_level&(~127))
6384                     prev_level= 0;
6385                 prev_run= run;
6386                 run=0;
6387             }else{
6388                 run++;
6389             }
6390         }
6391 #ifdef REFINE_STATS
6392 STOP_TIMER("iterative step")}
6393 #endif
6394
6395         if(best_change){
6396             int j= perm_scantable[ best_coeff ];
6397
6398             block[j] += best_change;
6399
6400             if(best_coeff > last_non_zero){
6401                 last_non_zero= best_coeff;
6402                 assert(block[j]);
6403 #ifdef REFINE_STATS
6404 after_last++;
6405 #endif
6406             }else{
6407 #ifdef REFINE_STATS
6408 if(block[j]){
6409     if(block[j] - best_change){
6410         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
6411             raise++;
6412         }else{
6413             lower++;
6414         }
6415     }else{
6416         from_zero++;
6417     }
6418 }else{
6419     to_zero++;
6420 }
6421 #endif
6422                 for(; last_non_zero>=start_i; last_non_zero--){
6423                     if(block[perm_scantable[last_non_zero]])
6424                         break;
6425                 }
6426             }
6427 #ifdef REFINE_STATS
6428 count++;
6429 if(256*256*256*64 % count == 0){
6430     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6431 }
6432 #endif
6433             run=0;
6434             rle_index=0;
6435             for(i=start_i; i<=last_non_zero; i++){
6436                 int j= perm_scantable[i];
6437                 const int level= block[j];
6438
6439                  if(level){
6440                      run_tab[rle_index++]=run;
6441                      run=0;
6442                  }else{
6443                      run++;
6444                  }
6445             }
6446
6447             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6448         }else{
6449             break;
6450         }
6451     }
6452 #ifdef REFINE_STATS
6453 if(last_non_zero>0){
6454 STOP_TIMER("iterative search")
6455 }
6456 }
6457 #endif
6458
6459     return last_non_zero;
6460 }
6461
6462 static int dct_quantize_c(MpegEncContext *s,
6463                         DCTELEM *block, int n,
6464                         int qscale, int *overflow)
6465 {
6466     int i, j, level, last_non_zero, q, start_i;
6467     const int *qmat;
6468     const uint8_t *scantable= s->intra_scantable.scantable;
6469     int bias;
6470     int max=0;
6471     unsigned int threshold1, threshold2;
6472
6473     s->dsp.fdct (block);
6474
6475     if(s->dct_error_sum)
6476         s->denoise_dct(s, block);
6477
6478     if (s->mb_intra) {
6479         if (!s->h263_aic) {
6480             if (n < 4)
6481                 q = s->y_dc_scale;
6482             else
6483                 q = s->c_dc_scale;
6484             q = q << 3;
6485         } else
6486             /* For AIC we skip quant/dequant of INTRADC */
6487             q = 1 << 3;
6488
6489         /* note: block[0] is assumed to be positive */
6490         block[0] = (block[0] + (q >> 1)) / q;
6491         start_i = 1;
6492         last_non_zero = 0;
6493         qmat = s->q_intra_matrix[qscale];
6494         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6495     } else {
6496         start_i = 0;
6497         last_non_zero = -1;
6498         qmat = s->q_inter_matrix[qscale];
6499         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6500     }
6501     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6502     threshold2= (threshold1<<1);
6503     for(i=63;i>=start_i;i--) {
6504         j = scantable[i];
6505         level = block[j] * qmat[j];
6506
6507         if(((unsigned)(level+threshold1))>threshold2){
6508             last_non_zero = i;
6509             break;
6510         }else{
6511             block[j]=0;
6512         }
6513     }
6514     for(i=start_i; i<=last_non_zero; i++) {
6515         j = scantable[i];
6516         level = block[j] * qmat[j];
6517
6518 //        if(   bias+level >= (1<<QMAT_SHIFT)
6519 //           || bias-level >= (1<<QMAT_SHIFT)){
6520         if(((unsigned)(level+threshold1))>threshold2){
6521             if(level>0){
6522                 level= (bias + level)>>QMAT_SHIFT;
6523                 block[j]= level;
6524             }else{
6525                 level= (bias - level)>>QMAT_SHIFT;
6526                 block[j]= -level;
6527             }
6528             max |=level;
6529         }else{
6530             block[j]=0;
6531         }
6532     }
6533     *overflow= s->max_qcoeff < max; //overflow might have happened
6534
6535     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6536     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6537         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6538
6539     return last_non_zero;
6540 }
6541
6542 #endif //CONFIG_ENCODERS
6543
6544 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6545                                    DCTELEM *block, int n, int qscale)
6546 {
6547     int i, level, nCoeffs;
6548     const uint16_t *quant_matrix;
6549
6550     nCoeffs= s->block_last_index[n];
6551
6552     if (n < 4)
6553         block[0] = block[0] * s->y_dc_scale;
6554     else
6555         block[0] = block[0] * s->c_dc_scale;
6556     /* XXX: only mpeg1 */
6557     quant_matrix = s->intra_matrix;
6558     for(i=1;i<=nCoeffs;i++) {
6559         int j= s->intra_scantable.permutated[i];
6560         level = block[j];
6561         if (level) {
6562             if (level < 0) {
6563                 level = -level;
6564                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6565                 level = (level - 1) | 1;
6566                 level = -level;
6567             } else {
6568                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6569                 level = (level - 1) | 1;
6570             }
6571             block[j] = level;
6572         }
6573     }
6574 }
6575
6576 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6577                                    DCTELEM *block, int n, int qscale)
6578 {
6579     int i, level, nCoeffs;
6580     const uint16_t *quant_matrix;
6581
6582     nCoeffs= s->block_last_index[n];
6583
6584     quant_matrix = s->inter_matrix;
6585     for(i=0; i<=nCoeffs; i++) {
6586         int j= s->intra_scantable.permutated[i];
6587         level = block[j];
6588         if (level) {
6589             if (level < 0) {
6590                 level = -level;
6591                 level = (((level << 1) + 1) * qscale *
6592                          ((int) (quant_matrix[j]))) >> 4;
6593                 level = (level - 1) | 1;
6594                 level = -level;
6595             } else {
6596                 level = (((level << 1) + 1) * qscale *
6597                          ((int) (quant_matrix[j]))) >> 4;
6598                 level = (level - 1) | 1;
6599             }
6600             block[j] = level;
6601         }
6602     }
6603 }
6604
6605 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6606                                    DCTELEM *block, int n, int qscale)
6607 {
6608     int i, level, nCoeffs;
6609     const uint16_t *quant_matrix;
6610
6611     if(s->alternate_scan) nCoeffs= 63;
6612     else nCoeffs= s->block_last_index[n];
6613
6614     if (n < 4)
6615         block[0] = block[0] * s->y_dc_scale;
6616     else
6617         block[0] = block[0] * s->c_dc_scale;
6618     quant_matrix = s->intra_matrix;
6619     for(i=1;i<=nCoeffs;i++) {
6620         int j= s->intra_scantable.permutated[i];
6621         level = block[j];
6622         if (level) {
6623             if (level < 0) {
6624                 level = -level;
6625                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6626                 level = -level;
6627             } else {
6628                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6629             }
6630             block[j] = level;
6631         }
6632     }
6633 }
6634
6635 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6636                                    DCTELEM *block, int n, int qscale)
6637 {
6638     int i, level, nCoeffs;
6639     const uint16_t *quant_matrix;
6640     int sum=-1;
6641
6642     if(s->alternate_scan) nCoeffs= 63;
6643     else nCoeffs= s->block_last_index[n];
6644
6645     if (n < 4)
6646         block[0] = block[0] * s->y_dc_scale;
6647     else
6648         block[0] = block[0] * s->c_dc_scale;
6649     quant_matrix = s->intra_matrix;
6650     for(i=1;i<=nCoeffs;i++) {
6651         int j= s->intra_scantable.permutated[i];
6652         level = block[j];
6653         if (level) {
6654             if (level < 0) {
6655                 level = -level;
6656                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6657                 level = -level;
6658             } else {
6659                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6660             }
6661             block[j] = level;
6662             sum+=level;
6663         }
6664     }
6665     block[63]^=sum&1;
6666 }
6667
6668 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6669                                    DCTELEM *block, int n, int qscale)
6670 {
6671     int i, level, nCoeffs;
6672     const uint16_t *quant_matrix;
6673     int sum=-1;
6674
6675     if(s->alternate_scan) nCoeffs= 63;
6676     else nCoeffs= s->block_last_index[n];
6677
6678     quant_matrix = s->inter_matrix;
6679     for(i=0; i<=nCoeffs; i++) {
6680         int j= s->intra_scantable.permutated[i];
6681         level = block[j];
6682         if (level) {
6683             if (level < 0) {
6684                 level = -level;
6685                 level = (((level << 1) + 1) * qscale *
6686                          ((int) (quant_matrix[j]))) >> 4;
6687                 level = -level;
6688             } else {
6689                 level = (((level << 1) + 1) * qscale *
6690                          ((int) (quant_matrix[j]))) >> 4;
6691             }
6692             block[j] = level;
6693             sum+=level;
6694         }
6695     }
6696     block[63]^=sum&1;
6697 }
6698
6699 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6700                                   DCTELEM *block, int n, int qscale)
6701 {
6702     int i, level, qmul, qadd;
6703     int nCoeffs;
6704
6705     assert(s->block_last_index[n]>=0);
6706
6707     qmul = qscale << 1;
6708
6709     if (!s->h263_aic) {
6710         if (n < 4)
6711             block[0] = block[0] * s->y_dc_scale;
6712         else
6713             block[0] = block[0] * s->c_dc_scale;
6714         qadd = (qscale - 1) | 1;
6715     }else{
6716         qadd = 0;
6717     }
6718     if(s->ac_pred)
6719         nCoeffs=63;
6720     else
6721         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6722
6723     for(i=1; i<=nCoeffs; i++) {
6724         level = block[i];
6725         if (level) {
6726             if (level < 0) {
6727                 level = level * qmul - qadd;
6728             } else {
6729                 level = level * qmul + qadd;
6730             }
6731             block[i] = level;
6732         }
6733     }
6734 }
6735
6736 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6737                                   DCTELEM *block, int n, int qscale)
6738 {
6739     int i, level, qmul, qadd;
6740     int nCoeffs;
6741
6742     assert(s->block_last_index[n]>=0);
6743
6744     qadd = (qscale - 1) | 1;
6745     qmul = qscale << 1;
6746
6747     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6748
6749     for(i=0; i<=nCoeffs; i++) {
6750         level = block[i];
6751         if (level) {
6752             if (level < 0) {
6753                 level = level * qmul - qadd;
6754             } else {
6755                 level = level * qmul + qadd;
6756             }
6757             block[i] = level;
6758         }
6759     }
6760 }
6761
6762 #ifdef CONFIG_ENCODERS
6763 AVCodec h263_encoder = {
6764     "h263",
6765     CODEC_TYPE_VIDEO,
6766     CODEC_ID_H263,
6767     sizeof(MpegEncContext),
6768     MPV_encode_init,
6769     MPV_encode_picture,
6770     MPV_encode_end,
6771     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6772 };
6773
6774 AVCodec h263p_encoder = {
6775     "h263p",
6776     CODEC_TYPE_VIDEO,
6777     CODEC_ID_H263P,
6778     sizeof(MpegEncContext),
6779     MPV_encode_init,
6780     MPV_encode_picture,
6781     MPV_encode_end,
6782     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6783 };
6784
6785 AVCodec flv_encoder = {
6786     "flv",
6787     CODEC_TYPE_VIDEO,
6788     CODEC_ID_FLV1,
6789     sizeof(MpegEncContext),
6790     MPV_encode_init,
6791     MPV_encode_picture,
6792     MPV_encode_end,
6793     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6794 };
6795
6796 AVCodec rv10_encoder = {
6797     "rv10",
6798     CODEC_TYPE_VIDEO,
6799     CODEC_ID_RV10,
6800     sizeof(MpegEncContext),
6801     MPV_encode_init,
6802     MPV_encode_picture,
6803     MPV_encode_end,
6804     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6805 };
6806
6807 AVCodec rv20_encoder = {
6808     "rv20",
6809     CODEC_TYPE_VIDEO,
6810     CODEC_ID_RV20,
6811     sizeof(MpegEncContext),
6812     MPV_encode_init,
6813     MPV_encode_picture,
6814     MPV_encode_end,
6815     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6816 };
6817
6818 AVCodec mpeg4_encoder = {
6819     "mpeg4",
6820     CODEC_TYPE_VIDEO,
6821     CODEC_ID_MPEG4,
6822     sizeof(MpegEncContext),
6823     MPV_encode_init,
6824     MPV_encode_picture,
6825     MPV_encode_end,
6826     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6827     .capabilities= CODEC_CAP_DELAY,
6828 };
6829
6830 AVCodec msmpeg4v1_encoder = {
6831     "msmpeg4v1",
6832     CODEC_TYPE_VIDEO,
6833     CODEC_ID_MSMPEG4V1,
6834     sizeof(MpegEncContext),
6835     MPV_encode_init,
6836     MPV_encode_picture,
6837     MPV_encode_end,
6838     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6839 };
6840
6841 AVCodec msmpeg4v2_encoder = {
6842     "msmpeg4v2",
6843     CODEC_TYPE_VIDEO,
6844     CODEC_ID_MSMPEG4V2,
6845     sizeof(MpegEncContext),
6846     MPV_encode_init,
6847     MPV_encode_picture,
6848     MPV_encode_end,
6849     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6850 };
6851
6852 AVCodec msmpeg4v3_encoder = {
6853     "msmpeg4",
6854     CODEC_TYPE_VIDEO,
6855     CODEC_ID_MSMPEG4V3,
6856     sizeof(MpegEncContext),
6857     MPV_encode_init,
6858     MPV_encode_picture,
6859     MPV_encode_end,
6860     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6861 };
6862
6863 AVCodec wmv1_encoder = {
6864     "wmv1",
6865     CODEC_TYPE_VIDEO,
6866     CODEC_ID_WMV1,
6867     sizeof(MpegEncContext),
6868     MPV_encode_init,
6869     MPV_encode_picture,
6870     MPV_encode_end,
6871     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6872 };
6873
6874 AVCodec mjpeg_encoder = {
6875     "mjpeg",
6876     CODEC_TYPE_VIDEO,
6877     CODEC_ID_MJPEG,
6878     sizeof(MpegEncContext),
6879     MPV_encode_init,
6880     MPV_encode_picture,
6881     MPV_encode_end,
6882     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, PIX_FMT_YUVJ422P, -1},
6883 };
6884
6885 #endif //CONFIG_ENCODERS