git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  *
  20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  21  */
  22
  23 /**
  24  * @file mpegvideo.c
  25  * The simplest mpeg encoder (well, it was the simplest!).
  26  */
  27
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "mpegvideo.h"
  31 #include "faandct.h"
  32 #include <limits.h>
  33
  34 #ifdef USE_FASTMEMCPY
  35 #include "fastmemcpy.h"
  36 #endif
  37
  38 //#undef NDEBUG
  39 //#include <assert.h>
  40
  41 #ifdef CONFIG_ENCODERS
  42 static void encode_picture(MpegEncContext *s, int picture_number);
  43 #endif //CONFIG_ENCODERS
  44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  53                                   DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  55                                   DCTELEM *block, int n, int qscale);
  56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  57 #ifdef CONFIG_ENCODERS
  58 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  59 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  60 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  61 static int sse_mb(MpegEncContext *s);
  62 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  63 #endif //CONFIG_ENCODERS
  64
  65 #ifdef HAVE_XVMC
  66 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  67 extern void XVMC_field_end(MpegEncContext *s);
  68 extern void XVMC_decode_mb(MpegEncContext *s);
  69 #endif
  70
  71 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  72
  73
  74 /* enable all paranoid tests for rounding, overflows, etc... */
  75 //#define PARANOID
  76
  77 //#define DEBUG
  78
  79
  80 /* for jpeg fast DCT */
  81 #define CONST_BITS 14
  82
  83 static const uint16_t aanscales[64] = {
  84     /* precomputed values scaled up by 14 bits */
  85     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  86     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  87     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  88     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  90     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  91     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  92     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  93 };
  94
  95 static const uint8_t h263_chroma_roundtab[16] = {
  96 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  97     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  98 };
  99
 100 static const uint8_t ff_default_chroma_qscale_table[32]={
 101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 102     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 103 };
 104
 105 #ifdef CONFIG_ENCODERS
 106 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 107 static uint8_t default_fcode_tab[MAX_MV*2+1];
 108
 109 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 110
 111 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 112                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 113 {
 114     int qscale;
 115     int shift=0;
 116
 117     for(qscale=qmin; qscale<=qmax; qscale++){
 118         int i;
 119         if (dsp->fdct == ff_jpeg_fdct_islow
 120 #ifdef FAAN_POSTSCALE
 121             || dsp->fdct == ff_faandct
 122 #endif
 123             ) {
 124             for(i=0;i<64;i++) {
 125                 const int j= dsp->idct_permutation[i];
 126                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 127                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 128                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 129                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 130
 131                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 132                                 (qscale * quant_matrix[j]));
 133             }
 134         } else if (dsp->fdct == fdct_ifast
 135 #ifndef FAAN_POSTSCALE
 136                    || dsp->fdct == ff_faandct
 137 #endif
 138                    ) {
 139             for(i=0;i<64;i++) {
 140                 const int j= dsp->idct_permutation[i];
 141                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 142                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 143                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 144                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 145
 146                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 147                                 (aanscales[i] * qscale * quant_matrix[j]));
 148             }
 149         } else {
 150             for(i=0;i<64;i++) {
 151                 const int j= dsp->idct_permutation[i];
 152                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 153                    So 16           <= qscale * quant_matrix[i]             <= 7905
 154                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 155                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 156                 */
 157                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 158 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 159                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 160
 161                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 162                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 163             }
 164         }
 165
 166         for(i=intra; i<64; i++){
 167             int64_t max= 8191;
 168             if (dsp->fdct == fdct_ifast
 169 #ifndef FAAN_POSTSCALE
 170                    || dsp->fdct == ff_faandct
 171 #endif
 172                    ) {
 173                 max= (8191LL*aanscales[i]) >> 14;
 174             }
 175             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 176                 shift++;
 177             }
 178         }
 179     }
 180     if(shift){
 181         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
 182     }
 183 }
 184
 185 static inline void update_qscale(MpegEncContext *s){
 186     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 187     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 188
 189     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 190 }
 191 #endif //CONFIG_ENCODERS
 192
 193 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 194     int i;
 195     int end;
 196
 197     st->scantable= src_scantable;
 198
 199     for(i=0; i<64; i++){
 200         int j;
 201         j = src_scantable[i];
 202         st->permutated[i] = permutation[j];
 203 #ifdef ARCH_POWERPC
 204         st->inverse[j] = i;
 205 #endif
 206     }
 207
 208     end=-1;
 209     for(i=0; i<64; i++){
 210         int j;
 211         j = st->permutated[i];
 212         if(j>end) end=j;
 213         st->raster_end[i]= end;
 214     }
 215 }
 216
 217 #ifdef CONFIG_ENCODERS
 218 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
 219     int i;
 220
 221     if(matrix){
 222         put_bits(pb, 1, 1);
 223         for(i=0;i<64;i++) {
 224             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 225         }
 226     }else
 227         put_bits(pb, 1, 0);
 228 }
 229 #endif //CONFIG_ENCODERS
 230
 231 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
 232     int i;
 233
 234     assert(p<=end);
 235     if(p>=end)
 236         return end;
 237
 238     for(i=0; i<3; i++){
 239         uint32_t tmp= *state << 8;
 240         *state= tmp + *(p++);
 241         if(tmp == 0x100 || p==end)
 242             return p;
 243     }
 244
 245     while(p<end){
 246         if     (p[-1] > 1      ) p+= 3;
 247         else if(p[-2]          ) p+= 2;
 248         else if(p[-3]|(p[-1]-1)) p++;
 249         else{
 250             p++;
 251             break;
 252         }
 253     }
 254
 255     p= FFMIN(p, end)-4;
 256     *state=  be2me_32(unaligned32(p));
 257
 258     return p+4;
 259 }
 260
 261 /* init common dct for both encoder and decoder */
 262 int DCT_common_init(MpegEncContext *s)
 263 {
 264     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 265     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 266     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 267     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 268     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 269     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 270
 271 #ifdef CONFIG_ENCODERS
 272     s->dct_quantize= dct_quantize_c;
 273     s->denoise_dct= denoise_dct_c;
 274 #endif //CONFIG_ENCODERS
 275
 276 #ifdef HAVE_MMX
 277     MPV_common_init_mmx(s);
 278 #endif
 279 #ifdef ARCH_ALPHA
 280     MPV_common_init_axp(s);
 281 #endif
 282 #ifdef HAVE_MLIB
 283     MPV_common_init_mlib(s);
 284 #endif
 285 #ifdef HAVE_MMI
 286     MPV_common_init_mmi(s);
 287 #endif
 288 #ifdef ARCH_ARMV4L
 289     MPV_common_init_armv4l(s);
 290 #endif
 291 #ifdef ARCH_POWERPC
 292     MPV_common_init_ppc(s);
 293 #endif
 294
 295 #ifdef CONFIG_ENCODERS
 296     s->fast_dct_quantize= s->dct_quantize;
 297
 298     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 299         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 300     }
 301
 302 #endif //CONFIG_ENCODERS
 303
 304     /* load & permutate scantables
 305        note: only wmv uses different ones
 306     */
 307     if(s->alternate_scan){
 308         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 309         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 310     }else{
 311         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 312         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 313     }
 314     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 315     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 316
 317     return 0;
 318 }
 319
 320 static void copy_picture(Picture *dst, Picture *src){
 321     *dst = *src;
 322     dst->type= FF_BUFFER_TYPE_COPY;
 323 }
 324
 325 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 326     int i;
 327
 328     dst->pict_type              = src->pict_type;
 329     dst->quality                = src->quality;
 330     dst->coded_picture_number   = src->coded_picture_number;
 331     dst->display_picture_number = src->display_picture_number;
 332 //    dst->reference              = src->reference;
 333     dst->pts                    = src->pts;
 334     dst->interlaced_frame       = src->interlaced_frame;
 335     dst->top_field_first        = src->top_field_first;
 336
 337     if(s->avctx->me_threshold){
 338         if(!src->motion_val[0])
 339             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 340         if(!src->mb_type)
 341             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 342         if(!src->ref_index[0])
 343             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 344         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 345             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 346             src->motion_subsample_log2, dst->motion_subsample_log2);
 347
 348         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 349
 350         for(i=0; i<2; i++){
 351             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 352             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 353
 354             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 355                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 356             }
 357             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 358                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 359             }
 360         }
 361     }
 362 }
 363
 364 /**
 365  * allocates a Picture
 366  * The pixels are allocated/set by calling get_buffer() if shared=0
 367  */
 368 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 369     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 370     const int mb_array_size= s->mb_stride*s->mb_height;
 371     const int b8_array_size= s->b8_stride*s->mb_height*2;
 372     const int b4_array_size= s->b4_stride*s->mb_height*4;
 373     int i;
 374
 375     if(shared){
 376         assert(pic->data[0]);
 377         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 378         pic->type= FF_BUFFER_TYPE_SHARED;
 379     }else{
 380         int r;
 381
 382         assert(!pic->data[0]);
 383
 384         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 385
 386         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 387             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 388             return -1;
 389         }
 390
 391         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 392             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 393             return -1;
 394         }
 395
 396         if(pic->linesize[1] != pic->linesize[2]){
 397             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 398             return -1;
 399         }
 400
 401         s->linesize  = pic->linesize[0];
 402         s->uvlinesize= pic->linesize[1];
 403     }
 404
 405     if(pic->qscale_table==NULL){
 406         if (s->encoding) {
 407             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 408             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 409             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 410         }
 411
 412         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 413         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 414         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 415         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 416         if(s->out_format == FMT_H264){
 417             for(i=0; i<2; i++){
 418                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 419                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 420                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 421             }
 422             pic->motion_subsample_log2= 2;
 423         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 424             for(i=0; i<2; i++){
 425                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 426                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 427                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 428             }
 429             pic->motion_subsample_log2= 3;
 430         }
 431         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 432             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 433         }
 434         pic->qstride= s->mb_stride;
 435         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 436     }
 437
 438     //it might be nicer if the application would keep track of these but it would require a API change
 439     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 440     s->prev_pict_types[0]= s->pict_type;
 441     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 442         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 443
 444     return 0;
 445 fail: //for the CHECKED_ALLOCZ macro
 446     return -1;
 447 }
 448
 449 /**
 450  * deallocates a picture
 451  */
 452 static void free_picture(MpegEncContext *s, Picture *pic){
 453     int i;
 454
 455     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 456         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 457     }
 458
 459     av_freep(&pic->mb_var);
 460     av_freep(&pic->mc_mb_var);
 461     av_freep(&pic->mb_mean);
 462     av_freep(&pic->mbskip_table);
 463     av_freep(&pic->qscale_table);
 464     av_freep(&pic->mb_type_base);
 465     av_freep(&pic->dct_coeff);
 466     av_freep(&pic->pan_scan);
 467     pic->mb_type= NULL;
 468     for(i=0; i<2; i++){
 469         av_freep(&pic->motion_val_base[i]);
 470         av_freep(&pic->ref_index[i]);
 471     }
 472
 473     if(pic->type == FF_BUFFER_TYPE_SHARED){
 474         for(i=0; i<4; i++){
 475             pic->base[i]=
 476             pic->data[i]= NULL;
 477         }
 478         pic->type= 0;
 479     }
 480 }
 481
 482 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 483     int i;
 484
 485     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 486     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 487     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
 488
 489      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 490     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 491     s->rd_scratchpad=   s->me.scratchpad;
 492     s->b_scratchpad=    s->me.scratchpad;
 493     s->obmc_scratchpad= s->me.scratchpad + 16;
 494     if (s->encoding) {
 495         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 496         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 497         if(s->avctx->noise_reduction){
 498             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 499         }
 500     }
 501     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 502     s->block= s->blocks[0];
 503
 504     for(i=0;i<12;i++){
 505         s->pblocks[i] = (short *)(&s->block[i]);
 506     }
 507     return 0;
 508 fail:
 509     return -1; //free() through MPV_common_end()
 510 }
 511
 512 static void free_duplicate_context(MpegEncContext *s){
 513     if(s==NULL) return;
 514
 515     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 516     av_freep(&s->me.scratchpad);
 517     s->rd_scratchpad=
 518     s->b_scratchpad=
 519     s->obmc_scratchpad= NULL;
 520
 521     av_freep(&s->dct_error_sum);
 522     av_freep(&s->me.map);
 523     av_freep(&s->me.score_map);
 524     av_freep(&s->blocks);
 525     s->block= NULL;
 526 }
 527
 528 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 529 #define COPY(a) bak->a= src->a
 530     COPY(allocated_edge_emu_buffer);
 531     COPY(edge_emu_buffer);
 532     COPY(me.scratchpad);
 533     COPY(rd_scratchpad);
 534     COPY(b_scratchpad);
 535     COPY(obmc_scratchpad);
 536     COPY(me.map);
 537     COPY(me.score_map);
 538     COPY(blocks);
 539     COPY(block);
 540     COPY(start_mb_y);
 541     COPY(end_mb_y);
 542     COPY(me.map_generation);
 543     COPY(pb);
 544     COPY(dct_error_sum);
 545     COPY(dct_count[0]);
 546     COPY(dct_count[1]);
 547 #undef COPY
 548 }
 549
 550 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 551     MpegEncContext bak;
 552     int i;
 553     //FIXME copy only needed parts
 554 //START_TIMER
 555     backup_duplicate_context(&bak, dst);
 556     memcpy(dst, src, sizeof(MpegEncContext));
 557     backup_duplicate_context(dst, &bak);
 558     for(i=0;i<12;i++){
 559         dst->pblocks[i] = (short *)(&dst->block[i]);
 560     }
 561 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 562 }
 563
 564 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 565 #define COPY(a) dst->a= src->a
 566     COPY(pict_type);
 567     COPY(current_picture);
 568     COPY(f_code);
 569     COPY(b_code);
 570     COPY(qscale);
 571     COPY(lambda);
 572     COPY(lambda2);
 573     COPY(picture_in_gop_number);
 574     COPY(gop_picture_number);
 575     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
 576     COPY(progressive_frame); //FIXME don't set in encode_header
 577     COPY(partitioned_frame); //FIXME don't set in encode_header
 578 #undef COPY
 579 }
 580
 581 /**
 582  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 583  * the changed fields will not depend upon the prior state of the MpegEncContext.
 584  */
 585 static void MPV_common_defaults(MpegEncContext *s){
 586     s->y_dc_scale_table=
 587     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 588     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 589     s->progressive_frame= 1;
 590     s->progressive_sequence= 1;
 591     s->picture_structure= PICT_FRAME;
 592
 593     s->coded_picture_number = 0;
 594     s->picture_number = 0;
 595     s->input_picture_number = 0;
 596
 597     s->picture_in_gop_number = 0;
 598
 599     s->f_code = 1;
 600     s->b_code = 1;
 601 }
 602
 603 /**
 604  * sets the given MpegEncContext to defaults for decoding.
 605  * the changed fields will not depend upon the prior state of the MpegEncContext.
 606  */
 607 void MPV_decode_defaults(MpegEncContext *s){
 608     MPV_common_defaults(s);
 609 }
 610
 611 /**
 612  * sets the given MpegEncContext to defaults for encoding.
 613  * the changed fields will not depend upon the prior state of the MpegEncContext.
 614  */
 615
 616 #ifdef CONFIG_ENCODERS
 617 static void MPV_encode_defaults(MpegEncContext *s){
 618     static int done=0;
 619
 620     MPV_common_defaults(s);
 621
 622     if(!done){
 623         int i;
 624         done=1;
 625
 626         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 627         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 628
 629         for(i=-16; i<16; i++){
 630             default_fcode_tab[i + MAX_MV]= 1;
 631         }
 632     }
 633     s->me.mv_penalty= default_mv_penalty;
 634     s->fcode_tab= default_fcode_tab;
 635 }
 636 #endif //CONFIG_ENCODERS
 637
 638 /**
 639  * init common structure for both encoder and decoder.
 640  * this assumes that some variables like width/height are already set
 641  */
 642 int MPV_common_init(MpegEncContext *s)
 643 {
 644     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 645
 646     s->mb_height = (s->height + 15) / 16;
 647
 648     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
 649         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 650         return -1;
 651     }
 652
 653     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 654         return -1;
 655
 656     dsputil_init(&s->dsp, s->avctx);
 657     DCT_common_init(s);
 658
 659     s->flags= s->avctx->flags;
 660     s->flags2= s->avctx->flags2;
 661
 662     s->mb_width  = (s->width  + 15) / 16;
 663     s->mb_stride = s->mb_width + 1;
 664     s->b8_stride = s->mb_width*2 + 1;
 665     s->b4_stride = s->mb_width*4 + 1;
 666     mb_array_size= s->mb_height * s->mb_stride;
 667     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 668
 669     /* set chroma shifts */
 670     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 671                                                     &(s->chroma_y_shift) );
 672
 673     /* set default edge pos, will be overriden in decode_header if needed */
 674     s->h_edge_pos= s->mb_width*16;
 675     s->v_edge_pos= s->mb_height*16;
 676
 677     s->mb_num = s->mb_width * s->mb_height;
 678
 679     s->block_wrap[0]=
 680     s->block_wrap[1]=
 681     s->block_wrap[2]=
 682     s->block_wrap[3]= s->b8_stride;
 683     s->block_wrap[4]=
 684     s->block_wrap[5]= s->mb_stride;
 685
 686     y_size = s->b8_stride * (2 * s->mb_height + 1);
 687     c_size = s->mb_stride * (s->mb_height + 1);
 688     yc_size = y_size + 2 * c_size;
 689
 690     /* convert fourcc to upper case */
 691     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
 692                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 693                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 694                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 695
 696     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
 697                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 698                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 699                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 700
 701     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 702
 703     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 704     for(y=0; y<s->mb_height; y++){
 705         for(x=0; x<s->mb_width; x++){
 706             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 707         }
 708     }
 709     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 710
 711     if (s->encoding) {
 712         /* Allocate MV tables */
 713         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 714         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 715         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 716         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 717         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 718         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 719         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 720         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 721         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 722         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 723         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 724         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 725
 726         if(s->msmpeg4_version){
 727             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 728         }
 729         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 730
 731         /* Allocate MB type table */
 732         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 733
 734         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 735
 736         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 737         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 738         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 739         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 740         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 741         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 742
 743         if(s->avctx->noise_reduction){
 744             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 745         }
 746     }
 747     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 748
 749     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 750
 751     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 752         /* interlaced direct mode decoding tables */
 753             for(i=0; i<2; i++){
 754                 int j, k;
 755                 for(j=0; j<2; j++){
 756                     for(k=0; k<2; k++){
 757                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 758                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 759                     }
 760                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 761                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 762                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 763                 }
 764                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 765             }
 766     }
 767     if (s->out_format == FMT_H263) {
 768         /* ac values */
 769         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 770         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 771         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 772         s->ac_val[2] = s->ac_val[1] + c_size;
 773
 774         /* cbp values */
 775         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 776         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 777
 778         /* cbp, ac_pred, pred_dir */
 779         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 780         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 781     }
 782
 783     if (s->h263_pred || s->h263_plus || !s->encoding) {
 784         /* dc values */
 785         //MN: we need these for error resilience of intra-frames
 786         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 787         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 788         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 789         s->dc_val[2] = s->dc_val[1] + c_size;
 790         for(i=0;i<yc_size;i++)
 791             s->dc_val_base[i] = 1024;
 792     }
 793
 794     /* which mb is a intra block */
 795     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 796     memset(s->mbintra_table, 1, mb_array_size);
 797
 798     /* init macroblock skip table */
 799     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 800     //Note the +1 is for a quicker mpeg4 slice_end detection
 801     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 802
 803     s->parse_context.state= -1;
 804     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 805        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 806        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 807        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 808     }
 809
 810     s->context_initialized = 1;
 811
 812     s->thread_context[0]= s;
 813     for(i=1; i<s->avctx->thread_count; i++){
 814         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 815         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 816     }
 817
 818     for(i=0; i<s->avctx->thread_count; i++){
 819         if(init_duplicate_context(s->thread_context[i], s) < 0)
 820            goto fail;
 821         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 822         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 823     }
 824
 825     return 0;
 826  fail:
 827     MPV_common_end(s);
 828     return -1;
 829 }
 830
 831 /* init common structure for both encoder and decoder */
 832 void MPV_common_end(MpegEncContext *s)
 833 {
 834     int i, j, k;
 835
 836     for(i=0; i<s->avctx->thread_count; i++){
 837         free_duplicate_context(s->thread_context[i]);
 838     }
 839     for(i=1; i<s->avctx->thread_count; i++){
 840         av_freep(&s->thread_context[i]);
 841     }
 842
 843     av_freep(&s->parse_context.buffer);
 844     s->parse_context.buffer_size=0;
 845
 846     av_freep(&s->mb_type);
 847     av_freep(&s->p_mv_table_base);
 848     av_freep(&s->b_forw_mv_table_base);
 849     av_freep(&s->b_back_mv_table_base);
 850     av_freep(&s->b_bidir_forw_mv_table_base);
 851     av_freep(&s->b_bidir_back_mv_table_base);
 852     av_freep(&s->b_direct_mv_table_base);
 853     s->p_mv_table= NULL;
 854     s->b_forw_mv_table= NULL;
 855     s->b_back_mv_table= NULL;
 856     s->b_bidir_forw_mv_table= NULL;
 857     s->b_bidir_back_mv_table= NULL;
 858     s->b_direct_mv_table= NULL;
 859     for(i=0; i<2; i++){
 860         for(j=0; j<2; j++){
 861             for(k=0; k<2; k++){
 862                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 863                 s->b_field_mv_table[i][j][k]=NULL;
 864             }
 865             av_freep(&s->b_field_select_table[i][j]);
 866             av_freep(&s->p_field_mv_table_base[i][j]);
 867             s->p_field_mv_table[i][j]=NULL;
 868         }
 869         av_freep(&s->p_field_select_table[i]);
 870     }
 871
 872     av_freep(&s->dc_val_base);
 873     av_freep(&s->ac_val_base);
 874     av_freep(&s->coded_block_base);
 875     av_freep(&s->mbintra_table);
 876     av_freep(&s->cbp_table);
 877     av_freep(&s->pred_dir_table);
 878
 879     av_freep(&s->mbskip_table);
 880     av_freep(&s->prev_pict_types);
 881     av_freep(&s->bitstream_buffer);
 882     s->allocated_bitstream_buffer_size=0;
 883
 884     av_freep(&s->avctx->stats_out);
 885     av_freep(&s->ac_stats);
 886     av_freep(&s->error_status_table);
 887     av_freep(&s->mb_index2xy);
 888     av_freep(&s->lambda_table);
 889     av_freep(&s->q_intra_matrix);
 890     av_freep(&s->q_inter_matrix);
 891     av_freep(&s->q_intra_matrix16);
 892     av_freep(&s->q_inter_matrix16);
 893     av_freep(&s->input_picture);
 894     av_freep(&s->reordered_input_picture);
 895     av_freep(&s->dct_offset);
 896
 897     if(s->picture){
 898         for(i=0; i<MAX_PICTURE_COUNT; i++){
 899             free_picture(s, &s->picture[i]);
 900         }
 901     }
 902     av_freep(&s->picture);
 903     s->context_initialized = 0;
 904     s->last_picture_ptr=
 905     s->next_picture_ptr=
 906     s->current_picture_ptr= NULL;
 907     s->linesize= s->uvlinesize= 0;
 908
 909     for(i=0; i<3; i++)
 910         av_freep(&s->visualization_buffer[i]);
 911
 912     avcodec_default_free_buffers(s->avctx);
 913 }
 914
 915 #ifdef CONFIG_ENCODERS
 916
 917 /* init video encoder */
 918 int MPV_encode_init(AVCodecContext *avctx)
 919 {
 920     MpegEncContext *s = avctx->priv_data;
 921     int i;
 922     int chroma_h_shift, chroma_v_shift;
 923
 924     MPV_encode_defaults(s);
 925
 926     if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
 927         av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 928         return -1;
 929     }
 930
 931     if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
 932         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){
 933             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 934             return -1;
 935         }
 936     }else{
 937         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){
 938             av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
 939             return -1;
 940         }
 941     }
 942
 943     s->bit_rate = avctx->bit_rate;
 944     s->width = avctx->width;
 945     s->height = avctx->height;
 946     if(avctx->gop_size > 600){
 947         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 948         avctx->gop_size=600;
 949     }
 950     s->gop_size = avctx->gop_size;
 951     s->avctx = avctx;
 952     s->flags= avctx->flags;
 953     s->flags2= avctx->flags2;
 954     s->max_b_frames= avctx->max_b_frames;
 955     s->codec_id= avctx->codec->id;
 956     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 957     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 958     s->strict_std_compliance= avctx->strict_std_compliance;
 959     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 960     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 961     s->mpeg_quant= avctx->mpeg_quant;
 962     s->rtp_mode= !!avctx->rtp_payload_size;
 963     s->intra_dc_precision= avctx->intra_dc_precision;
 964     s->user_specified_pts = AV_NOPTS_VALUE;
 965
 966     if (s->gop_size <= 1) {
 967         s->intra_only = 1;
 968         s->gop_size = 12;
 969     } else {
 970         s->intra_only = 0;
 971     }
 972
 973     s->me_method = avctx->me_method;
 974
 975     /* Fixed QSCALE */
 976     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 977
 978     s->adaptive_quant= (   s->avctx->lumi_masking
 979                         || s->avctx->dark_masking
 980                         || s->avctx->temporal_cplx_masking
 981                         || s->avctx->spatial_cplx_masking
 982                         || s->avctx->p_masking
 983                         || s->avctx->border_masking
 984                         || (s->flags&CODEC_FLAG_QP_RD))
 985                        && !s->fixed_qscale;
 986
 987     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
 988     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 989     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
 990
 991     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
 992         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
 993         return -1;
 994     }
 995
 996     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
 997         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 998     }
 999
1000     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1001         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1002         return -1;
1003     }
1004
1005     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1006         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1007         return -1;
1008     }
1009
1010     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1011        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1012        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1013
1014         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1015     }
1016
1017     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1018        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1019         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1020         return -1;
1021     }
1022
1023     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1024         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1025         return -1;
1026     }
1027
1028     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1029         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1030         return -1;
1031     }
1032
1033     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1034         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1035         return -1;
1036     }
1037
1038     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1039         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1040         return -1;
1041     }
1042
1043     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1044         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1045         return -1;
1046     }
1047
1048     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1049        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1050         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1051         return -1;
1052     }
1053
1054     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1055         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1056         return -1;
1057     }
1058
1059     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1060         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1061         return -1;
1062     }
1063
1064     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1065         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1066         return -1;
1067     }
1068
1069     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1070         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1071         return -1;
1072     }
1073
1074     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1075        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1076        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1077         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1078         return -1;
1079     }
1080
1081     if(s->avctx->thread_count > 1)
1082         s->rtp_mode= 1;
1083
1084     if(!avctx->time_base.den || !avctx->time_base.num){
1085         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1086         return -1;
1087     }
1088
1089     i= (INT_MAX/2+128)>>8;
1090     if(avctx->me_threshold >= i){
1091         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1092         return -1;
1093     }
1094     if(avctx->mb_threshold >= i){
1095         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1096         return -1;
1097     }
1098
1099     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1100         av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass\n");
1101         return -1;
1102     }
1103
1104     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1105     if(i > 1){
1106         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1107         avctx->time_base.den /= i;
1108         avctx->time_base.num /= i;
1109 //        return -1;
1110     }
1111
1112     if(s->codec_id==CODEC_ID_MJPEG){
1113         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1114         s->inter_quant_bias= 0;
1115     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1116         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1117         s->inter_quant_bias= 0;
1118     }else{
1119         s->intra_quant_bias=0;
1120         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1121     }
1122
1123     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1124         s->intra_quant_bias= avctx->intra_quant_bias;
1125     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1126         s->inter_quant_bias= avctx->inter_quant_bias;
1127
1128     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1129
1130     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1131         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1132         return -1;
1133     }
1134     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1135
1136     switch(avctx->codec->id) {
1137     case CODEC_ID_MPEG1VIDEO:
1138         s->out_format = FMT_MPEG1;
1139         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1140         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1141         break;
1142     case CODEC_ID_MPEG2VIDEO:
1143         s->out_format = FMT_MPEG1;
1144         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1145         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1146         s->rtp_mode= 1;
1147         break;
1148     case CODEC_ID_LJPEG:
1149     case CODEC_ID_JPEGLS:
1150     case CODEC_ID_MJPEG:
1151         s->out_format = FMT_MJPEG;
1152         s->intra_only = 1; /* force intra only for jpeg */
1153         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1154         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1155         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1156         s->mjpeg_vsample[1] = 1;
1157         s->mjpeg_vsample[2] = 1;
1158         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1159         s->mjpeg_hsample[1] = 1;
1160         s->mjpeg_hsample[2] = 1;
1161         if (mjpeg_init(s) < 0)
1162             return -1;
1163         avctx->delay=0;
1164         s->low_delay=1;
1165         break;
1166     case CODEC_ID_H261:
1167         s->out_format = FMT_H261;
1168         avctx->delay=0;
1169         s->low_delay=1;
1170         break;
1171     case CODEC_ID_H263:
1172         if (h263_get_picture_format(s->width, s->height) == 7) {
1173             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1174             return -1;
1175         }
1176         s->out_format = FMT_H263;
1177         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1178         avctx->delay=0;
1179         s->low_delay=1;
1180         break;
1181     case CODEC_ID_H263P:
1182         s->out_format = FMT_H263;
1183         s->h263_plus = 1;
1184         /* Fx */
1185         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1186         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1187         s->modified_quant= s->h263_aic;
1188         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1189         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1190         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1191         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1192         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1193
1194         /* /Fx */
1195         /* These are just to be sure */
1196         avctx->delay=0;
1197         s->low_delay=1;
1198         break;
1199     case CODEC_ID_FLV1:
1200         s->out_format = FMT_H263;
1201         s->h263_flv = 2; /* format = 1; 11-bit codes */
1202         s->unrestricted_mv = 1;
1203         s->rtp_mode=0; /* don't allow GOB */
1204         avctx->delay=0;
1205         s->low_delay=1;
1206         break;
1207     case CODEC_ID_RV10:
1208         s->out_format = FMT_H263;
1209         avctx->delay=0;
1210         s->low_delay=1;
1211         break;
1212     case CODEC_ID_RV20:
1213         s->out_format = FMT_H263;
1214         avctx->delay=0;
1215         s->low_delay=1;
1216         s->modified_quant=1;
1217         s->h263_aic=1;
1218         s->h263_plus=1;
1219         s->loop_filter=1;
1220         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1221         break;
1222     case CODEC_ID_MPEG4:
1223         s->out_format = FMT_H263;
1224         s->h263_pred = 1;
1225         s->unrestricted_mv = 1;
1226         s->low_delay= s->max_b_frames ? 0 : 1;
1227         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1228         break;
1229     case CODEC_ID_MSMPEG4V1:
1230         s->out_format = FMT_H263;
1231         s->h263_msmpeg4 = 1;
1232         s->h263_pred = 1;
1233         s->unrestricted_mv = 1;
1234         s->msmpeg4_version= 1;
1235         avctx->delay=0;
1236         s->low_delay=1;
1237         break;
1238     case CODEC_ID_MSMPEG4V2:
1239         s->out_format = FMT_H263;
1240         s->h263_msmpeg4 = 1;
1241         s->h263_pred = 1;
1242         s->unrestricted_mv = 1;
1243         s->msmpeg4_version= 2;
1244         avctx->delay=0;
1245         s->low_delay=1;
1246         break;
1247     case CODEC_ID_MSMPEG4V3:
1248         s->out_format = FMT_H263;
1249         s->h263_msmpeg4 = 1;
1250         s->h263_pred = 1;
1251         s->unrestricted_mv = 1;
1252         s->msmpeg4_version= 3;
1253         s->flipflop_rounding=1;
1254         avctx->delay=0;
1255         s->low_delay=1;
1256         break;
1257     case CODEC_ID_WMV1:
1258         s->out_format = FMT_H263;
1259         s->h263_msmpeg4 = 1;
1260         s->h263_pred = 1;
1261         s->unrestricted_mv = 1;
1262         s->msmpeg4_version= 4;
1263         s->flipflop_rounding=1;
1264         avctx->delay=0;
1265         s->low_delay=1;
1266         break;
1267     case CODEC_ID_WMV2:
1268         s->out_format = FMT_H263;
1269         s->h263_msmpeg4 = 1;
1270         s->h263_pred = 1;
1271         s->unrestricted_mv = 1;
1272         s->msmpeg4_version= 5;
1273         s->flipflop_rounding=1;
1274         avctx->delay=0;
1275         s->low_delay=1;
1276         break;
1277     default:
1278         return -1;
1279     }
1280
1281     avctx->has_b_frames= !s->low_delay;
1282
1283     s->encoding = 1;
1284
1285     /* init */
1286     if (MPV_common_init(s) < 0)
1287         return -1;
1288
1289     if(s->modified_quant)
1290         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1291     s->progressive_frame=
1292     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1293     s->quant_precision=5;
1294
1295     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1296     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1297
1298 #ifdef CONFIG_H261_ENCODER
1299     if (s->out_format == FMT_H261)
1300         ff_h261_encode_init(s);
1301 #endif
1302     if (s->out_format == FMT_H263)
1303         h263_encode_init(s);
1304     if(s->msmpeg4_version)
1305         ff_msmpeg4_encode_init(s);
1306     if (s->out_format == FMT_MPEG1)
1307         ff_mpeg1_encode_init(s);
1308
1309     /* init q matrix */
1310     for(i=0;i<64;i++) {
1311         int j= s->dsp.idct_permutation[i];
1312         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1313             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1314             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1315         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1316             s->intra_matrix[j] =
1317             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1318         }else
1319         { /* mpeg1/2 */
1320             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1321             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1322         }
1323         if(s->avctx->intra_matrix)
1324             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1325         if(s->avctx->inter_matrix)
1326             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1327     }
1328
1329     /* precompute matrix */
1330     /* for mjpeg, we do include qscale in the matrix */
1331     if (s->out_format != FMT_MJPEG) {
1332         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1333                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1334         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1335                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1336     }
1337
1338     if(ff_rate_control_init(s) < 0)
1339         return -1;
1340
1341     return 0;
1342 }
1343
1344 int MPV_encode_end(AVCodecContext *avctx)
1345 {
1346     MpegEncContext *s = avctx->priv_data;
1347
1348 #ifdef STATS
1349     print_stats();
1350 #endif
1351
1352     ff_rate_control_uninit(s);
1353
1354     MPV_common_end(s);
1355     if (s->out_format == FMT_MJPEG)
1356         mjpeg_close(s);
1357
1358     av_freep(&avctx->extradata);
1359
1360     return 0;
1361 }
1362
1363 #endif //CONFIG_ENCODERS
1364
1365 void init_rl(RLTable *rl, int use_static)
1366 {
1367     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1368     uint8_t index_run[MAX_RUN+1];
1369     int last, run, level, start, end, i;
1370
1371     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1372     if(use_static && rl->max_level[0])
1373         return;
1374
1375     /* compute max_level[], max_run[] and index_run[] */
1376     for(last=0;last<2;last++) {
1377         if (last == 0) {
1378             start = 0;
1379             end = rl->last;
1380         } else {
1381             start = rl->last;
1382             end = rl->n;
1383         }
1384
1385         memset(max_level, 0, MAX_RUN + 1);
1386         memset(max_run, 0, MAX_LEVEL + 1);
1387         memset(index_run, rl->n, MAX_RUN + 1);
1388         for(i=start;i<end;i++) {
1389             run = rl->table_run[i];
1390             level = rl->table_level[i];
1391             if (index_run[run] == rl->n)
1392                 index_run[run] = i;
1393             if (level > max_level[run])
1394                 max_level[run] = level;
1395             if (run > max_run[level])
1396                 max_run[level] = run;
1397         }
1398         if(use_static)
1399             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1400         else
1401             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1402         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1403         if(use_static)
1404             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1405         else
1406             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1407         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1408         if(use_static)
1409             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1410         else
1411             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1412         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1413     }
1414 }
1415
1416 /* draw the edges of width 'w' of an image of size width, height */
1417 //FIXME check that this is ok for mpeg4 interlaced
1418 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1419 {
1420     uint8_t *ptr, *last_line;
1421     int i;
1422
1423     last_line = buf + (height - 1) * wrap;
1424     for(i=0;i<w;i++) {
1425         /* top and bottom */
1426         memcpy(buf - (i + 1) * wrap, buf, width);
1427         memcpy(last_line + (i + 1) * wrap, last_line, width);
1428     }
1429     /* left and right */
1430     ptr = buf;
1431     for(i=0;i<height;i++) {
1432         memset(ptr - w, ptr[0], w);
1433         memset(ptr + width, ptr[width-1], w);
1434         ptr += wrap;
1435     }
1436     /* corners */
1437     for(i=0;i<w;i++) {
1438         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1439         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1440         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1441         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1442     }
1443 }
1444
1445 int ff_find_unused_picture(MpegEncContext *s, int shared){
1446     int i;
1447
1448     if(shared){
1449         for(i=0; i<MAX_PICTURE_COUNT; i++){
1450             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1451         }
1452     }else{
1453         for(i=0; i<MAX_PICTURE_COUNT; i++){
1454             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1455         }
1456         for(i=0; i<MAX_PICTURE_COUNT; i++){
1457             if(s->picture[i].data[0]==NULL) return i;
1458         }
1459     }
1460
1461     assert(0);
1462     return -1;
1463 }
1464
1465 static void update_noise_reduction(MpegEncContext *s){
1466     int intra, i;
1467
1468     for(intra=0; intra<2; intra++){
1469         if(s->dct_count[intra] > (1<<16)){
1470             for(i=0; i<64; i++){
1471                 s->dct_error_sum[intra][i] >>=1;
1472             }
1473             s->dct_count[intra] >>= 1;
1474         }
1475
1476         for(i=0; i<64; i++){
1477             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1478         }
1479     }
1480 }
1481
1482 /**
1483  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1484  */
1485 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1486 {
1487     int i;
1488     AVFrame *pic;
1489     s->mb_skipped = 0;
1490
1491     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1492
1493     /* mark&release old frames */
1494     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1495         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1496
1497         /* release forgotten pictures */
1498         /* if(mpeg124/h263) */
1499         if(!s->encoding){
1500             for(i=0; i<MAX_PICTURE_COUNT; i++){
1501                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1502                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1503                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1504                 }
1505             }
1506         }
1507     }
1508 alloc:
1509     if(!s->encoding){
1510         /* release non reference frames */
1511         for(i=0; i<MAX_PICTURE_COUNT; i++){
1512             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1513                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1514             }
1515         }
1516
1517         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1518             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1519         else{
1520             i= ff_find_unused_picture(s, 0);
1521             pic= (AVFrame*)&s->picture[i];
1522         }
1523
1524         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1525                         && !s->dropable ? 3 : 0;
1526
1527         pic->coded_picture_number= s->coded_picture_number++;
1528
1529         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1530             return -1;
1531
1532         s->current_picture_ptr= (Picture*)pic;
1533         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1534         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1535     }
1536
1537     s->current_picture_ptr->pict_type= s->pict_type;
1538 //    if(s->flags && CODEC_FLAG_QSCALE)
1539   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1540     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1541
1542     copy_picture(&s->current_picture, s->current_picture_ptr);
1543
1544   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1545     if (s->pict_type != B_TYPE) {
1546         s->last_picture_ptr= s->next_picture_ptr;
1547         if(!s->dropable)
1548             s->next_picture_ptr= s->current_picture_ptr;
1549     }
1550 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1551         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1552         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1553         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1554         s->pict_type, s->dropable);*/
1555
1556     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1557     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1558
1559     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1560         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1561         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1562         goto alloc;
1563     }
1564
1565     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1566
1567     if(s->picture_structure!=PICT_FRAME){
1568         int i;
1569         for(i=0; i<4; i++){
1570             if(s->picture_structure == PICT_BOTTOM_FIELD){
1571                  s->current_picture.data[i] += s->current_picture.linesize[i];
1572             }
1573             s->current_picture.linesize[i] *= 2;
1574             s->last_picture.linesize[i] *=2;
1575             s->next_picture.linesize[i] *=2;
1576         }
1577     }
1578   }
1579
1580     s->hurry_up= s->avctx->hurry_up;
1581     s->error_resilience= avctx->error_resilience;
1582
1583     /* set dequantizer, we can't do it during init as it might change for mpeg4
1584        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1585     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1586         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1587         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1588     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1589         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1590         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1591     }else{
1592         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1593         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1594     }
1595
1596     if(s->dct_error_sum){
1597         assert(s->avctx->noise_reduction && s->encoding);
1598
1599         update_noise_reduction(s);
1600     }
1601
1602 #ifdef HAVE_XVMC
1603     if(s->avctx->xvmc_acceleration)
1604         return XVMC_field_start(s, avctx);
1605 #endif
1606     return 0;
1607 }
1608
1609 /* generic function for encode/decode called after a frame has been coded/decoded */
1610 void MPV_frame_end(MpegEncContext *s)
1611 {
1612     int i;
1613     /* draw edge for correct motion prediction if outside */
1614 #ifdef HAVE_XVMC
1615 //just to make sure that all data is rendered.
1616     if(s->avctx->xvmc_acceleration){
1617         XVMC_field_end(s);
1618     }else
1619 #endif
1620     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1621             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1622             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1623             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1624     }
1625     emms_c();
1626
1627     s->last_pict_type    = s->pict_type;
1628     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1629     if(s->pict_type!=B_TYPE){
1630         s->last_non_b_pict_type= s->pict_type;
1631     }
1632 #if 0
1633         /* copy back current_picture variables */
1634     for(i=0; i<MAX_PICTURE_COUNT; i++){
1635         if(s->picture[i].data[0] == s->current_picture.data[0]){
1636             s->picture[i]= s->current_picture;
1637             break;
1638         }
1639     }
1640     assert(i<MAX_PICTURE_COUNT);
1641 #endif
1642
1643     if(s->encoding){
1644         /* release non-reference frames */
1645         for(i=0; i<MAX_PICTURE_COUNT; i++){
1646             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1647                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1648             }
1649         }
1650     }
1651     // clear copies, to avoid confusion
1652 #if 0
1653     memset(&s->last_picture, 0, sizeof(Picture));
1654     memset(&s->next_picture, 0, sizeof(Picture));
1655     memset(&s->current_picture, 0, sizeof(Picture));
1656 #endif
1657     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1658 }
1659
1660 /**
1661  * draws an line from (ex, ey) -> (sx, sy).
1662  * @param w width of the image
1663  * @param h height of the image
1664  * @param stride stride/linesize of the image
1665  * @param color color of the arrow
1666  */
1667 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1668     int t, x, y, fr, f;
1669
1670     sx= clip(sx, 0, w-1);
1671     sy= clip(sy, 0, h-1);
1672     ex= clip(ex, 0, w-1);
1673     ey= clip(ey, 0, h-1);
1674
1675     buf[sy*stride + sx]+= color;
1676
1677     if(ABS(ex - sx) > ABS(ey - sy)){
1678         if(sx > ex){
1679             t=sx; sx=ex; ex=t;
1680             t=sy; sy=ey; ey=t;
1681         }
1682         buf+= sx + sy*stride;
1683         ex-= sx;
1684         f= ((ey-sy)<<16)/ex;
1685         for(x= 0; x <= ex; x++){
1686             y = (x*f)>>16;
1687             fr= (x*f)&0xFFFF;
1688             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1689             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1690         }
1691     }else{
1692         if(sy > ey){
1693             t=sx; sx=ex; ex=t;
1694             t=sy; sy=ey; ey=t;
1695         }
1696         buf+= sx + sy*stride;
1697         ey-= sy;
1698         if(ey) f= ((ex-sx)<<16)/ey;
1699         else   f= 0;
1700         for(y= 0; y <= ey; y++){
1701             x = (y*f)>>16;
1702             fr= (y*f)&0xFFFF;
1703             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1704             buf[y*stride + x+1]+= (color*         fr )>>16;;
1705         }
1706     }
1707 }
1708
1709 /**
1710  * draws an arrow from (ex, ey) -> (sx, sy).
1711  * @param w width of the image
1712  * @param h height of the image
1713  * @param stride stride/linesize of the image
1714  * @param color color of the arrow
1715  */
1716 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1717     int dx,dy;
1718
1719     sx= clip(sx, -100, w+100);
1720     sy= clip(sy, -100, h+100);
1721     ex= clip(ex, -100, w+100);
1722     ey= clip(ey, -100, h+100);
1723
1724     dx= ex - sx;
1725     dy= ey - sy;
1726
1727     if(dx*dx + dy*dy > 3*3){
1728         int rx=  dx + dy;
1729         int ry= -dx + dy;
1730         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1731
1732         //FIXME subpixel accuracy
1733         rx= ROUNDED_DIV(rx*3<<4, length);
1734         ry= ROUNDED_DIV(ry*3<<4, length);
1735
1736         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1737         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1738     }
1739     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1740 }
1741
1742 /**
1743  * prints debuging info for the given picture.
1744  */
1745 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1746
1747     if(!pict || !pict->mb_type) return;
1748
1749     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1750         int x,y;
1751
1752         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1753         switch (pict->pict_type) {
1754             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1755             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1756             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1757             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1758             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1759             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1760         }
1761         for(y=0; y<s->mb_height; y++){
1762             for(x=0; x<s->mb_width; x++){
1763                 if(s->avctx->debug&FF_DEBUG_SKIP){
1764                     int count= s->mbskip_table[x + y*s->mb_stride];
1765                     if(count>9) count=9;
1766                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1767                 }
1768                 if(s->avctx->debug&FF_DEBUG_QP){
1769                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1770                 }
1771                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1772                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1773                     //Type & MV direction
1774                     if(IS_PCM(mb_type))
1775                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1776                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1777                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1778                     else if(IS_INTRA4x4(mb_type))
1779                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1780                     else if(IS_INTRA16x16(mb_type))
1781                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1782                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1783                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1784                     else if(IS_DIRECT(mb_type))
1785                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1786                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1787                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1788                     else if(IS_GMC(mb_type))
1789                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1790                     else if(IS_SKIP(mb_type))
1791                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1792                     else if(!USES_LIST(mb_type, 1))
1793                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1794                     else if(!USES_LIST(mb_type, 0))
1795                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1796                     else{
1797                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1798                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1799                     }
1800
1801                     //segmentation
1802                     if(IS_8X8(mb_type))
1803                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1804                     else if(IS_16X8(mb_type))
1805                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1806                     else if(IS_8X16(mb_type))
1807                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1808                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1809                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1810                     else
1811                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1812
1813
1814                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1815                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1816                     else
1817                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1818                 }
1819 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1820             }
1821             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1822         }
1823     }
1824
1825     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1826         const int shift= 1 + s->quarter_sample;
1827         int mb_y;
1828         uint8_t *ptr;
1829         int i;
1830         int h_chroma_shift, v_chroma_shift;
1831         const int width = s->avctx->width;
1832         const int height= s->avctx->height;
1833         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1834         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1835         s->low_delay=0; //needed to see the vectors without trashing the buffers
1836
1837         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1838         for(i=0; i<3; i++){
1839             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1840             pict->data[i]= s->visualization_buffer[i];
1841         }
1842         pict->type= FF_BUFFER_TYPE_COPY;
1843         ptr= pict->data[0];
1844
1845         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1846             int mb_x;
1847             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1848                 const int mb_index= mb_x + mb_y*s->mb_stride;
1849                 if((s->avctx->debug_mv) && pict->motion_val){
1850                   int type;
1851                   for(type=0; type<3; type++){
1852                     int direction = 0;
1853                     switch (type) {
1854                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1855                                 continue;
1856                               direction = 0;
1857                               break;
1858                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1859                                 continue;
1860                               direction = 0;
1861                               break;
1862                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1863                                 continue;
1864                               direction = 1;
1865                               break;
1866                     }
1867                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1868                         continue;
1869
1870                     if(IS_8X8(pict->mb_type[mb_index])){
1871                       int i;
1872                       for(i=0; i<4; i++){
1873                         int sx= mb_x*16 + 4 + 8*(i&1);
1874                         int sy= mb_y*16 + 4 + 8*(i>>1);
1875                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1876                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1877                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1878                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1879                       }
1880                     }else if(IS_16X8(pict->mb_type[mb_index])){
1881                       int i;
1882                       for(i=0; i<2; i++){
1883                         int sx=mb_x*16 + 8;
1884                         int sy=mb_y*16 + 4 + 8*i;
1885                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1886                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1887                         int my=(pict->motion_val[direction][xy][1]>>shift);
1888
1889                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1890                             my*=2;
1891
1892                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1893                       }
1894                     }else if(IS_8X16(pict->mb_type[mb_index])){
1895                       int i;
1896                       for(i=0; i<2; i++){
1897                         int sx=mb_x*16 + 4 + 8*i;
1898                         int sy=mb_y*16 + 8;
1899                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1900                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1901                         int my=(pict->motion_val[direction][xy][1]>>shift);
1902
1903                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1904                             my*=2;
1905
1906                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1907                       }
1908                     }else{
1909                       int sx= mb_x*16 + 8;
1910                       int sy= mb_y*16 + 8;
1911                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1912                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1913                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1914                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1915                     }
1916                   }
1917                 }
1918                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1919                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1920                     int y;
1921                     for(y=0; y<8; y++){
1922                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1923                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1924                     }
1925                 }
1926                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1927                     int mb_type= pict->mb_type[mb_index];
1928                     uint64_t u,v;
1929                     int y;
1930 #define COLOR(theta, r)\
1931 u= (int)(128 + r*cos(theta*3.141592/180));\
1932 v= (int)(128 + r*sin(theta*3.141592/180));
1933
1934
1935                     u=v=128;
1936                     if(IS_PCM(mb_type)){
1937                         COLOR(120,48)
1938                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1939                         COLOR(30,48)
1940                     }else if(IS_INTRA4x4(mb_type)){
1941                         COLOR(90,48)
1942                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1943 //                        COLOR(120,48)
1944                     }else if(IS_DIRECT(mb_type)){
1945                         COLOR(150,48)
1946                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1947                         COLOR(170,48)
1948                     }else if(IS_GMC(mb_type)){
1949                         COLOR(190,48)
1950                     }else if(IS_SKIP(mb_type)){
1951 //                        COLOR(180,48)
1952                     }else if(!USES_LIST(mb_type, 1)){
1953                         COLOR(240,48)
1954                     }else if(!USES_LIST(mb_type, 0)){
1955                         COLOR(0,48)
1956                     }else{
1957                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1958                         COLOR(300,48)
1959                     }
1960
1961                     u*= 0x0101010101010101ULL;
1962                     v*= 0x0101010101010101ULL;
1963                     for(y=0; y<8; y++){
1964                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1965                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1966                     }
1967
1968                     //segmentation
1969                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1970                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1971                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1972                     }
1973                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1974                         for(y=0; y<16; y++)
1975                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1976                     }
1977                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1978                         int dm= 1 << (mv_sample_log2-2);
1979                         for(i=0; i<4; i++){
1980                             int sx= mb_x*16 + 8*(i&1);
1981                             int sy= mb_y*16 + 8*(i>>1);
1982                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1983                             //FIXME bidir
1984                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1985                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1986                                 for(y=0; y<8; y++)
1987                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1988                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1989                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1990                         }
1991                     }
1992
1993                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1994                         // hmm
1995                     }
1996                 }
1997                 s->mbskip_table[mb_index]=0;
1998             }
1999         }
2000     }
2001 }
2002
2003 #ifdef CONFIG_ENCODERS
2004
2005 static int get_sae(uint8_t *src, int ref, int stride){
2006     int x,y;
2007     int acc=0;
2008
2009     for(y=0; y<16; y++){
2010         for(x=0; x<16; x++){
2011             acc+= ABS(src[x+y*stride] - ref);
2012         }
2013     }
2014
2015     return acc;
2016 }
2017
2018 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2019     int x, y, w, h;
2020     int acc=0;
2021
2022     w= s->width &~15;
2023     h= s->height&~15;
2024
2025     for(y=0; y<h; y+=16){
2026         for(x=0; x<w; x+=16){
2027             int offset= x + y*stride;
2028             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2029             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2030             int sae = get_sae(src + offset, mean, stride);
2031
2032             acc+= sae + 500 < sad;
2033         }
2034     }
2035     return acc;
2036 }
2037
2038
2039 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2040     AVFrame *pic=NULL;
2041     int64_t pts;
2042     int i;
2043     const int encoding_delay= s->max_b_frames;
2044     int direct=1;
2045
2046     if(pic_arg){
2047         pts= pic_arg->pts;
2048         pic_arg->display_picture_number= s->input_picture_number++;
2049
2050         if(pts != AV_NOPTS_VALUE){
2051             if(s->user_specified_pts != AV_NOPTS_VALUE){
2052                 int64_t time= pts;
2053                 int64_t last= s->user_specified_pts;
2054
2055                 if(time <= last){
2056                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2057                     return -1;
2058                 }
2059             }
2060             s->user_specified_pts= pts;
2061         }else{
2062             if(s->user_specified_pts != AV_NOPTS_VALUE){
2063                 s->user_specified_pts=
2064                 pts= s->user_specified_pts + 1;
2065                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2066             }else{
2067                 pts= pic_arg->display_picture_number;
2068             }
2069         }
2070     }
2071
2072   if(pic_arg){
2073     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2074     if(pic_arg->linesize[0] != s->linesize) direct=0;
2075     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2076     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2077
2078 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2079
2080     if(direct){
2081         i= ff_find_unused_picture(s, 1);
2082
2083         pic= (AVFrame*)&s->picture[i];
2084         pic->reference= 3;
2085
2086         for(i=0; i<4; i++){
2087             pic->data[i]= pic_arg->data[i];
2088             pic->linesize[i]= pic_arg->linesize[i];
2089         }
2090         alloc_picture(s, (Picture*)pic, 1);
2091     }else{
2092         i= ff_find_unused_picture(s, 0);
2093
2094         pic= (AVFrame*)&s->picture[i];
2095         pic->reference= 3;
2096
2097         alloc_picture(s, (Picture*)pic, 0);
2098
2099         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2100            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2101            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2102        // empty
2103         }else{
2104             int h_chroma_shift, v_chroma_shift;
2105             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2106
2107             for(i=0; i<3; i++){
2108                 int src_stride= pic_arg->linesize[i];
2109                 int dst_stride= i ? s->uvlinesize : s->linesize;
2110                 int h_shift= i ? h_chroma_shift : 0;
2111                 int v_shift= i ? v_chroma_shift : 0;
2112                 int w= s->width >>h_shift;
2113                 int h= s->height>>v_shift;
2114                 uint8_t *src= pic_arg->data[i];
2115                 uint8_t *dst= pic->data[i] + INPLACE_OFFSET;
2116
2117                 if(src_stride==dst_stride)
2118                     memcpy(dst, src, src_stride*h);
2119                 else{
2120                     while(h--){
2121                         memcpy(dst, src, w);
2122                         dst += dst_stride;
2123                         src += src_stride;
2124                     }
2125                 }
2126             }
2127         }
2128     }
2129     copy_picture_attributes(s, pic, pic_arg);
2130     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2131   }
2132
2133     /* shift buffer entries */
2134     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2135         s->input_picture[i-1]= s->input_picture[i];
2136
2137     s->input_picture[encoding_delay]= (Picture*)pic;
2138
2139     return 0;
2140 }
2141
2142 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2143     int x, y, plane;
2144     int score=0;
2145     int64_t score64=0;
2146
2147     for(plane=0; plane<3; plane++){
2148         const int stride= p->linesize[plane];
2149         const int bw= plane ? 1 : 2;
2150         for(y=0; y<s->mb_height*bw; y++){
2151             for(x=0; x<s->mb_width*bw; x++){
2152                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2153                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2154
2155                 switch(s->avctx->frame_skip_exp){
2156                     case 0: score= FFMAX(score, v); break;
2157                     case 1: score+= ABS(v);break;
2158                     case 2: score+= v*v;break;
2159                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2160                     case 4: score64+= v*v*(int64_t)(v*v);break;
2161                 }
2162             }
2163         }
2164     }
2165
2166     if(score) score64= score;
2167
2168     if(score64 < s->avctx->frame_skip_threshold)
2169         return 1;
2170     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2171         return 1;
2172     return 0;
2173 }
2174
2175 static int estimate_best_b_count(MpegEncContext *s){
2176     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2177     AVCodecContext *c= avcodec_alloc_context();
2178     AVFrame input[FF_MAX_B_FRAMES+2];
2179     const int scale= s->avctx->brd_scale;
2180     int i, j, out_size, p_lambda, b_lambda, lambda2;
2181     int outbuf_size= s->width * s->height; //FIXME
2182     uint8_t *outbuf= av_malloc(outbuf_size);
2183     int64_t best_rd= INT64_MAX;
2184     int best_b_count= -1;
2185
2186     assert(scale>=0 && scale <=3);
2187
2188 //    emms_c();
2189     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2190     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2191     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2192     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2193
2194     c->width = s->width >> scale;
2195     c->height= s->height>> scale;
2196     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2197     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2198     c->mb_decision= s->avctx->mb_decision;
2199     c->me_cmp= s->avctx->me_cmp;
2200     c->mb_cmp= s->avctx->mb_cmp;
2201     c->me_sub_cmp= s->avctx->me_sub_cmp;
2202     c->pix_fmt = PIX_FMT_YUV420P;
2203     c->time_base= s->avctx->time_base;
2204     c->max_b_frames= s->max_b_frames;
2205
2206     if (avcodec_open(c, codec) < 0)
2207         return -1;
2208
2209     for(i=0; i<s->max_b_frames+2; i++){
2210         int ysize= c->width*c->height;
2211         int csize= (c->width/2)*(c->height/2);
2212         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2213
2214         if(pre_input_ptr)
2215             pre_input= *pre_input_ptr;
2216
2217         if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
2218             pre_input.data[0]+=INPLACE_OFFSET;
2219             pre_input.data[1]+=INPLACE_OFFSET;
2220             pre_input.data[2]+=INPLACE_OFFSET;
2221         }
2222
2223         avcodec_get_frame_defaults(&input[i]);
2224         input[i].data[0]= av_malloc(ysize + 2*csize);
2225         input[i].data[1]= input[i].data[0] + ysize;
2226         input[i].data[2]= input[i].data[1] + csize;
2227         input[i].linesize[0]= c->width;
2228         input[i].linesize[1]=
2229         input[i].linesize[2]= c->width/2;
2230
2231         if(!i || s->input_picture[i-1]){
2232             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2233             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2234             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2235         }
2236     }
2237
2238     for(j=0; j<s->max_b_frames+1; j++){
2239         int64_t rd=0;
2240
2241         if(!s->input_picture[j])
2242             break;
2243
2244         c->error[0]= c->error[1]= c->error[2]= 0;
2245
2246         input[0].pict_type= I_TYPE;
2247         input[0].quality= 1 * FF_QP2LAMBDA;
2248         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2249 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2250
2251         for(i=0; i<s->max_b_frames+1; i++){
2252             int is_p= i % (j+1) == j || i==s->max_b_frames;
2253
2254             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2255             input[i+1].quality= is_p ? p_lambda : b_lambda;
2256             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2257             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2258         }
2259
2260         /* get the delayed frames */
2261         while(out_size){
2262             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2263             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2264         }
2265
2266         rd += c->error[0] + c->error[1] + c->error[2];
2267
2268         if(rd < best_rd){
2269             best_rd= rd;
2270             best_b_count= j;
2271         }
2272     }
2273
2274     av_freep(&outbuf);
2275     avcodec_close(c);
2276     av_freep(&c);
2277
2278     for(i=0; i<s->max_b_frames+2; i++){
2279         av_freep(&input[i].data[0]);
2280     }
2281
2282     return best_b_count;
2283 }
2284
2285 static void select_input_picture(MpegEncContext *s){
2286     int i;
2287
2288     for(i=1; i<MAX_PICTURE_COUNT; i++)
2289         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2290     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2291
2292     /* set next picture type & ordering */
2293     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2294         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2295             s->reordered_input_picture[0]= s->input_picture[0];
2296             s->reordered_input_picture[0]->pict_type= I_TYPE;
2297             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2298         }else{
2299             int b_frames;
2300
2301             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2302                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2303                 //FIXME check that te gop check above is +-1 correct
2304 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2305
2306                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2307                         for(i=0; i<4; i++)
2308                             s->input_picture[0]->data[i]= NULL;
2309                         s->input_picture[0]->type= 0;
2310                     }else{
2311                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2312                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2313
2314                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2315                     }
2316
2317                     emms_c();
2318                     ff_vbv_update(s, 0);
2319
2320                     goto no_output_pic;
2321                 }
2322             }
2323
2324             if(s->flags&CODEC_FLAG_PASS2){
2325                 for(i=0; i<s->max_b_frames+1; i++){
2326                     int pict_num= s->input_picture[0]->display_picture_number + i;
2327
2328                     if(pict_num >= s->rc_context.num_entries)
2329                         break;
2330                     if(!s->input_picture[i]){
2331                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2332                         break;
2333                     }
2334
2335                     s->input_picture[i]->pict_type=
2336                         s->rc_context.entry[pict_num].new_pict_type;
2337                 }
2338             }
2339
2340             if(s->avctx->b_frame_strategy==0){
2341                 b_frames= s->max_b_frames;
2342                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2343             }else if(s->avctx->b_frame_strategy==1){
2344                 for(i=1; i<s->max_b_frames+1; i++){
2345                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2346                         s->input_picture[i]->b_frame_score=
2347                             get_intra_count(s, s->input_picture[i  ]->data[0],
2348                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2349                     }
2350                 }
2351                 for(i=0; i<s->max_b_frames+1; i++){
2352                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2353                 }
2354
2355                 b_frames= FFMAX(0, i-1);
2356
2357                 /* reset scores */
2358                 for(i=0; i<b_frames+1; i++){
2359                     s->input_picture[i]->b_frame_score=0;
2360                 }
2361             }else if(s->avctx->b_frame_strategy==2){
2362                 b_frames= estimate_best_b_count(s);
2363             }else{
2364                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2365                 b_frames=0;
2366             }
2367
2368             emms_c();
2369 //static int b_count=0;
2370 //b_count+= b_frames;
2371 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2372
2373             for(i= b_frames - 1; i>=0; i--){
2374                 int type= s->input_picture[i]->pict_type;
2375                 if(type && type != B_TYPE)
2376                     b_frames= i;
2377             }
2378             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2379                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2380             }
2381
2382             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2383               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2384                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2385               }else{
2386                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2387                     b_frames=0;
2388                 s->input_picture[b_frames]->pict_type= I_TYPE;
2389               }
2390             }
2391
2392             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2393                && b_frames
2394                && s->input_picture[b_frames]->pict_type== I_TYPE)
2395                 b_frames--;
2396
2397             s->reordered_input_picture[0]= s->input_picture[b_frames];
2398             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2399                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2400             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2401             for(i=0; i<b_frames; i++){
2402                 s->reordered_input_picture[i+1]= s->input_picture[i];
2403                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2404                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2405             }
2406         }
2407     }
2408 no_output_pic:
2409     if(s->reordered_input_picture[0]){
2410         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2411
2412         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2413
2414         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2415             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2416
2417             int i= ff_find_unused_picture(s, 0);
2418             Picture *pic= &s->picture[i];
2419
2420             /* mark us unused / free shared pic */
2421             for(i=0; i<4; i++)
2422                 s->reordered_input_picture[0]->data[i]= NULL;
2423             s->reordered_input_picture[0]->type= 0;
2424
2425             pic->reference              = s->reordered_input_picture[0]->reference;
2426
2427             alloc_picture(s, pic, 0);
2428
2429             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2430
2431             s->current_picture_ptr= pic;
2432         }else{
2433             // input is not a shared pix -> reuse buffer for current_pix
2434
2435             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2436                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2437
2438             s->current_picture_ptr= s->reordered_input_picture[0];
2439             for(i=0; i<4; i++){
2440                 s->new_picture.data[i]+= INPLACE_OFFSET;
2441             }
2442         }
2443         copy_picture(&s->current_picture, s->current_picture_ptr);
2444
2445         s->picture_number= s->new_picture.display_picture_number;
2446 //printf("dpn:%d\n", s->picture_number);
2447     }else{
2448        memset(&s->new_picture, 0, sizeof(Picture));
2449     }
2450 }
2451
2452 int MPV_encode_picture(AVCodecContext *avctx,
2453                        unsigned char *buf, int buf_size, void *data)
2454 {
2455     MpegEncContext *s = avctx->priv_data;
2456     AVFrame *pic_arg = data;
2457     int i, stuffing_count;
2458
2459     if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2460         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2461         return -1;
2462     }
2463
2464     for(i=0; i<avctx->thread_count; i++){
2465         int start_y= s->thread_context[i]->start_mb_y;
2466         int   end_y= s->thread_context[i]->  end_mb_y;
2467         int h= s->mb_height;
2468         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2469         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2470
2471         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2472     }
2473
2474     s->picture_in_gop_number++;
2475
2476     if(load_input_picture(s, pic_arg) < 0)
2477         return -1;
2478
2479     select_input_picture(s);
2480
2481     /* output? */
2482     if(s->new_picture.data[0]){
2483         s->pict_type= s->new_picture.pict_type;
2484 //emms_c();
2485 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2486         MPV_frame_start(s, avctx);
2487
2488         encode_picture(s, s->picture_number);
2489
2490         avctx->real_pict_num  = s->picture_number;
2491         avctx->header_bits = s->header_bits;
2492         avctx->mv_bits     = s->mv_bits;
2493         avctx->misc_bits   = s->misc_bits;
2494         avctx->i_tex_bits  = s->i_tex_bits;
2495         avctx->p_tex_bits  = s->p_tex_bits;
2496         avctx->i_count     = s->i_count;
2497         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2498         avctx->skip_count  = s->skip_count;
2499
2500         MPV_frame_end(s);
2501
2502         if (s->out_format == FMT_MJPEG)
2503             mjpeg_picture_trailer(s);
2504
2505         if(s->flags&CODEC_FLAG_PASS1)
2506             ff_write_pass1_stats(s);
2507
2508         for(i=0; i<4; i++){
2509             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2510             avctx->error[i] += s->current_picture_ptr->error[i];
2511         }
2512
2513         if(s->flags&CODEC_FLAG_PASS1)
2514             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2515         flush_put_bits(&s->pb);
2516         s->frame_bits  = put_bits_count(&s->pb);
2517
2518         stuffing_count= ff_vbv_update(s, s->frame_bits);
2519         if(stuffing_count){
2520             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2521                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2522                 return -1;
2523             }
2524
2525             switch(s->codec_id){
2526             case CODEC_ID_MPEG1VIDEO:
2527             case CODEC_ID_MPEG2VIDEO:
2528                 while(stuffing_count--){
2529                     put_bits(&s->pb, 8, 0);
2530                 }
2531             break;
2532             case CODEC_ID_MPEG4:
2533                 put_bits(&s->pb, 16, 0);
2534                 put_bits(&s->pb, 16, 0x1C3);
2535                 stuffing_count -= 4;
2536                 while(stuffing_count--){
2537                     put_bits(&s->pb, 8, 0xFF);
2538                 }
2539             break;
2540             default:
2541                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2542             }
2543             flush_put_bits(&s->pb);
2544             s->frame_bits  = put_bits_count(&s->pb);
2545         }
2546
2547         /* update mpeg1/2 vbv_delay for CBR */
2548         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2549            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2550             int vbv_delay;
2551
2552             assert(s->repeat_first_field==0);
2553
2554             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2555             assert(vbv_delay < 0xFFFF);
2556
2557             s->vbv_delay_ptr[0] &= 0xF8;
2558             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2559             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2560             s->vbv_delay_ptr[2] &= 0x07;
2561             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2562         }
2563         s->total_bits += s->frame_bits;
2564         avctx->frame_bits  = s->frame_bits;
2565     }else{
2566         assert((pbBufPtr(&s->pb) == s->pb.buf));
2567         s->frame_bits=0;
2568     }
2569     assert((s->frame_bits&7)==0);
2570
2571     return s->frame_bits/8;
2572 }
2573
2574 #endif //CONFIG_ENCODERS
2575
2576 static inline void gmc1_motion(MpegEncContext *s,
2577                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2578                                uint8_t **ref_picture)
2579 {
2580     uint8_t *ptr;
2581     int offset, src_x, src_y, linesize, uvlinesize;
2582     int motion_x, motion_y;
2583     int emu=0;
2584
2585     motion_x= s->sprite_offset[0][0];
2586     motion_y= s->sprite_offset[0][1];
2587     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2588     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2589     motion_x<<=(3-s->sprite_warping_accuracy);
2590     motion_y<<=(3-s->sprite_warping_accuracy);
2591     src_x = clip(src_x, -16, s->width);
2592     if (src_x == s->width)
2593         motion_x =0;
2594     src_y = clip(src_y, -16, s->height);
2595     if (src_y == s->height)
2596         motion_y =0;
2597
2598     linesize = s->linesize;
2599     uvlinesize = s->uvlinesize;
2600
2601     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2602
2603     if(s->flags&CODEC_FLAG_EMU_EDGE){
2604         if(   (unsigned)src_x >= s->h_edge_pos - 17
2605            || (unsigned)src_y >= s->v_edge_pos - 17){
2606             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2607             ptr= s->edge_emu_buffer;
2608         }
2609     }
2610
2611     if((motion_x|motion_y)&7){
2612         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2613         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2614     }else{
2615         int dxy;
2616
2617         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2618         if (s->no_rounding){
2619             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2620         }else{
2621             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2622         }
2623     }
2624
2625     if(s->flags&CODEC_FLAG_GRAY) return;
2626
2627     motion_x= s->sprite_offset[1][0];
2628     motion_y= s->sprite_offset[1][1];
2629     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2630     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2631     motion_x<<=(3-s->sprite_warping_accuracy);
2632     motion_y<<=(3-s->sprite_warping_accuracy);
2633     src_x = clip(src_x, -8, s->width>>1);
2634     if (src_x == s->width>>1)
2635         motion_x =0;
2636     src_y = clip(src_y, -8, s->height>>1);
2637     if (src_y == s->height>>1)
2638         motion_y =0;
2639
2640     offset = (src_y * uvlinesize) + src_x;
2641     ptr = ref_picture[1] + offset;
2642     if(s->flags&CODEC_FLAG_EMU_EDGE){
2643         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2644            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2645             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2646             ptr= s->edge_emu_buffer;
2647             emu=1;
2648         }
2649     }
2650     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2651
2652     ptr = ref_picture[2] + offset;
2653     if(emu){
2654         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2655         ptr= s->edge_emu_buffer;
2656     }
2657     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2658
2659     return;
2660 }
2661
2662 static inline void gmc_motion(MpegEncContext *s,
2663                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2664                                uint8_t **ref_picture)
2665 {
2666     uint8_t *ptr;
2667     int linesize, uvlinesize;
2668     const int a= s->sprite_warping_accuracy;
2669     int ox, oy;
2670
2671     linesize = s->linesize;
2672     uvlinesize = s->uvlinesize;
2673
2674     ptr = ref_picture[0];
2675
2676     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2677     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2678
2679     s->dsp.gmc(dest_y, ptr, linesize, 16,
2680            ox,
2681            oy,
2682            s->sprite_delta[0][0], s->sprite_delta[0][1],
2683            s->sprite_delta[1][0], s->sprite_delta[1][1],
2684            a+1, (1<<(2*a+1)) - s->no_rounding,
2685            s->h_edge_pos, s->v_edge_pos);
2686     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2687            ox + s->sprite_delta[0][0]*8,
2688            oy + s->sprite_delta[1][0]*8,
2689            s->sprite_delta[0][0], s->sprite_delta[0][1],
2690            s->sprite_delta[1][0], s->sprite_delta[1][1],
2691            a+1, (1<<(2*a+1)) - s->no_rounding,
2692            s->h_edge_pos, s->v_edge_pos);
2693
2694     if(s->flags&CODEC_FLAG_GRAY) return;
2695
2696     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2697     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2698
2699     ptr = ref_picture[1];
2700     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2701            ox,
2702            oy,
2703            s->sprite_delta[0][0], s->sprite_delta[0][1],
2704            s->sprite_delta[1][0], s->sprite_delta[1][1],
2705            a+1, (1<<(2*a+1)) - s->no_rounding,
2706            s->h_edge_pos>>1, s->v_edge_pos>>1);
2707
2708     ptr = ref_picture[2];
2709     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2710            ox,
2711            oy,
2712            s->sprite_delta[0][0], s->sprite_delta[0][1],
2713            s->sprite_delta[1][0], s->sprite_delta[1][1],
2714            a+1, (1<<(2*a+1)) - s->no_rounding,
2715            s->h_edge_pos>>1, s->v_edge_pos>>1);
2716 }
2717
2718 /**
2719  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2720  * @param buf destination buffer
2721  * @param src source buffer
2722  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2723  * @param block_w width of block
2724  * @param block_h height of block
2725  * @param src_x x coordinate of the top left sample of the block in the source buffer
2726  * @param src_y y coordinate of the top left sample of the block in the source buffer
2727  * @param w width of the source buffer
2728  * @param h height of the source buffer
2729  */
2730 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2731                                     int src_x, int src_y, int w, int h){
2732     int x, y;
2733     int start_y, start_x, end_y, end_x;
2734
2735     if(src_y>= h){
2736         src+= (h-1-src_y)*linesize;
2737         src_y=h-1;
2738     }else if(src_y<=-block_h){
2739         src+= (1-block_h-src_y)*linesize;
2740         src_y=1-block_h;
2741     }
2742     if(src_x>= w){
2743         src+= (w-1-src_x);
2744         src_x=w-1;
2745     }else if(src_x<=-block_w){
2746         src+= (1-block_w-src_x);
2747         src_x=1-block_w;
2748     }
2749
2750     start_y= FFMAX(0, -src_y);
2751     start_x= FFMAX(0, -src_x);
2752     end_y= FFMIN(block_h, h-src_y);
2753     end_x= FFMIN(block_w, w-src_x);
2754
2755     // copy existing part
2756     for(y=start_y; y<end_y; y++){
2757         for(x=start_x; x<end_x; x++){
2758             buf[x + y*linesize]= src[x + y*linesize];
2759         }
2760     }
2761
2762     //top
2763     for(y=0; y<start_y; y++){
2764         for(x=start_x; x<end_x; x++){
2765             buf[x + y*linesize]= buf[x + start_y*linesize];
2766         }
2767     }
2768
2769     //bottom
2770     for(y=end_y; y<block_h; y++){
2771         for(x=start_x; x<end_x; x++){
2772             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2773         }
2774     }
2775
2776     for(y=0; y<block_h; y++){
2777        //left
2778         for(x=0; x<start_x; x++){
2779             buf[x + y*linesize]= buf[start_x + y*linesize];
2780         }
2781
2782        //right
2783         for(x=end_x; x<block_w; x++){
2784             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2785         }
2786     }
2787 }
2788
2789 static inline int hpel_motion(MpegEncContext *s,
2790                                   uint8_t *dest, uint8_t *src,
2791                                   int field_based, int field_select,
2792                                   int src_x, int src_y,
2793                                   int width, int height, int stride,
2794                                   int h_edge_pos, int v_edge_pos,
2795                                   int w, int h, op_pixels_func *pix_op,
2796                                   int motion_x, int motion_y)
2797 {
2798     int dxy;
2799     int emu=0;
2800
2801     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2802     src_x += motion_x >> 1;
2803     src_y += motion_y >> 1;
2804
2805     /* WARNING: do no forget half pels */
2806     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2807     if (src_x == width)
2808         dxy &= ~1;
2809     src_y = clip(src_y, -16, height);
2810     if (src_y == height)
2811         dxy &= ~2;
2812     src += src_y * stride + src_x;
2813
2814     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2815         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2816            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2817             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2818                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2819             src= s->edge_emu_buffer;
2820             emu=1;
2821         }
2822     }
2823     if(field_select)
2824         src += s->linesize;
2825     pix_op[dxy](dest, src, stride, h);
2826     return emu;
2827 }
2828
2829 static inline int hpel_motion_lowres(MpegEncContext *s,
2830                                   uint8_t *dest, uint8_t *src,
2831                                   int field_based, int field_select,
2832                                   int src_x, int src_y,
2833                                   int width, int height, int stride,
2834                                   int h_edge_pos, int v_edge_pos,
2835                                   int w, int h, h264_chroma_mc_func *pix_op,
2836                                   int motion_x, int motion_y)
2837 {
2838     const int lowres= s->avctx->lowres;
2839     const int s_mask= (2<<lowres)-1;
2840     int emu=0;
2841     int sx, sy;
2842
2843     if(s->quarter_sample){
2844         motion_x/=2;
2845         motion_y/=2;
2846     }
2847
2848     sx= motion_x & s_mask;
2849     sy= motion_y & s_mask;
2850     src_x += motion_x >> (lowres+1);
2851     src_y += motion_y >> (lowres+1);
2852
2853     src += src_y * stride + src_x;
2854
2855     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2856        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2857         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2858                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2859         src= s->edge_emu_buffer;
2860         emu=1;
2861     }
2862
2863     sx <<= 2 - lowres;
2864     sy <<= 2 - lowres;
2865     if(field_select)
2866         src += s->linesize;
2867     pix_op[lowres](dest, src, stride, h, sx, sy);
2868     return emu;
2869 }
2870
2871 /* apply one mpeg motion vector to the three components */
2872 static always_inline void mpeg_motion(MpegEncContext *s,
2873                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2874                                int field_based, int bottom_field, int field_select,
2875                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2876                                int motion_x, int motion_y, int h)
2877 {
2878     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2879     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2880
2881 #if 0
2882 if(s->quarter_sample)
2883 {
2884     motion_x>>=1;
2885     motion_y>>=1;
2886 }
2887 #endif
2888
2889     v_edge_pos = s->v_edge_pos >> field_based;
2890     linesize   = s->current_picture.linesize[0] << field_based;
2891     uvlinesize = s->current_picture.linesize[1] << field_based;
2892
2893     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2894     src_x = s->mb_x* 16               + (motion_x >> 1);
2895     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2896
2897     if (s->out_format == FMT_H263) {
2898         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2899             mx = (motion_x>>1)|(motion_x&1);
2900             my = motion_y >>1;
2901             uvdxy = ((my & 1) << 1) | (mx & 1);
2902             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2903             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2904         }else{
2905             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2906             uvsrc_x = src_x>>1;
2907             uvsrc_y = src_y>>1;
2908         }
2909     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2910         mx = motion_x / 4;
2911         my = motion_y / 4;
2912         uvdxy = 0;
2913         uvsrc_x = s->mb_x*8 + mx;
2914         uvsrc_y = s->mb_y*8 + my;
2915     } else {
2916         if(s->chroma_y_shift){
2917             mx = motion_x / 2;
2918             my = motion_y / 2;
2919             uvdxy = ((my & 1) << 1) | (mx & 1);
2920             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2921             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2922         } else {
2923             if(s->chroma_x_shift){
2924             //Chroma422
2925                 mx = motion_x / 2;
2926                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2927                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2928                 uvsrc_y = src_y;
2929             } else {
2930             //Chroma444
2931                 uvdxy = dxy;
2932                 uvsrc_x = src_x;
2933                 uvsrc_y = src_y;
2934             }
2935         }
2936     }
2937
2938     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2939     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2940     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2941
2942     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2943        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2944             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2945                s->codec_id == CODEC_ID_MPEG1VIDEO){
2946                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2947                 return ;
2948             }
2949             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2950                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2951             ptr_y = s->edge_emu_buffer;
2952             if(!(s->flags&CODEC_FLAG_GRAY)){
2953                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2954                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2955                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2956                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2957                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2958                 ptr_cb= uvbuf;
2959                 ptr_cr= uvbuf+16;
2960             }
2961     }
2962
2963     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2964         dest_y += s->linesize;
2965         dest_cb+= s->uvlinesize;
2966         dest_cr+= s->uvlinesize;
2967     }
2968
2969     if(field_select){
2970         ptr_y += s->linesize;
2971         ptr_cb+= s->uvlinesize;
2972         ptr_cr+= s->uvlinesize;
2973     }
2974
2975     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2976
2977     if(!(s->flags&CODEC_FLAG_GRAY)){
2978         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2979         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2980     }
2981 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2982     if(s->out_format == FMT_H261){
2983         ff_h261_loop_filter(s);
2984     }
2985 #endif
2986 }
2987
2988 /* apply one mpeg motion vector to the three components */
2989 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2990                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2991                                int field_based, int bottom_field, int field_select,
2992                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2993                                int motion_x, int motion_y, int h)
2994 {
2995     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2996     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2997     const int lowres= s->avctx->lowres;
2998     const int block_s= 8>>lowres;
2999     const int s_mask= (2<<lowres)-1;
3000     const int h_edge_pos = s->h_edge_pos >> lowres;
3001     const int v_edge_pos = s->v_edge_pos >> lowres;
3002     linesize   = s->current_picture.linesize[0] << field_based;
3003     uvlinesize = s->current_picture.linesize[1] << field_based;
3004
3005     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3006         motion_x/=2;
3007         motion_y/=2;
3008     }
3009
3010     if(field_based){
3011         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3012     }
3013
3014     sx= motion_x & s_mask;
3015     sy= motion_y & s_mask;
3016     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3017     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3018
3019     if (s->out_format == FMT_H263) {
3020         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3021         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3022         uvsrc_x = src_x>>1;
3023         uvsrc_y = src_y>>1;
3024     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3025         mx = motion_x / 4;
3026         my = motion_y / 4;
3027         uvsx = (2*mx) & s_mask;
3028         uvsy = (2*my) & s_mask;
3029         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3030         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3031     } else {
3032         mx = motion_x / 2;
3033         my = motion_y / 2;
3034         uvsx = mx & s_mask;
3035         uvsy = my & s_mask;
3036         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3037         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3038     }
3039
3040     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3041     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3042     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3043
3044     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3045        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3046             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3047                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3048             ptr_y = s->edge_emu_buffer;
3049             if(!(s->flags&CODEC_FLAG_GRAY)){
3050                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3051                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3052                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3053                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3054                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3055                 ptr_cb= uvbuf;
3056                 ptr_cr= uvbuf+16;
3057             }
3058     }
3059
3060     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3061         dest_y += s->linesize;
3062         dest_cb+= s->uvlinesize;
3063         dest_cr+= s->uvlinesize;
3064     }
3065
3066     if(field_select){
3067         ptr_y += s->linesize;
3068         ptr_cb+= s->uvlinesize;
3069         ptr_cr+= s->uvlinesize;
3070     }
3071
3072     sx <<= 2 - lowres;
3073     sy <<= 2 - lowres;
3074     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3075
3076     if(!(s->flags&CODEC_FLAG_GRAY)){
3077         uvsx <<= 2 - lowres;
3078         uvsy <<= 2 - lowres;
3079         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3080         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3081     }
3082     //FIXME h261 lowres loop filter
3083 }
3084
3085 //FIXME move to dsputil, avg variant, 16x16 version
3086 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3087     int x;
3088     uint8_t * const top   = src[1];
3089     uint8_t * const left  = src[2];
3090     uint8_t * const mid   = src[0];
3091     uint8_t * const right = src[3];
3092     uint8_t * const bottom= src[4];
3093 #define OBMC_FILTER(x, t, l, m, r, b)\
3094     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3095 #define OBMC_FILTER4(x, t, l, m, r, b)\
3096     OBMC_FILTER(x         , t, l, m, r, b);\
3097     OBMC_FILTER(x+1       , t, l, m, r, b);\
3098     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3099     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3100
3101     x=0;
3102     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3103     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3104     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3105     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3106     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3107     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3108     x+= stride;
3109     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3110     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3111     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3112     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3113     x+= stride;
3114     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3115     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3116     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3117     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3118     x+= 2*stride;
3119     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3120     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3121     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3122     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3123     x+= 2*stride;
3124     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3125     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3126     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3127     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3128     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3129     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3130     x+= stride;
3131     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3132     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3133     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3134     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3135 }
3136
3137 /* obmc for 1 8x8 luma block */
3138 static inline void obmc_motion(MpegEncContext *s,
3139                                uint8_t *dest, uint8_t *src,
3140                                int src_x, int src_y,
3141                                op_pixels_func *pix_op,
3142                                int16_t mv[5][2]/* mid top left right bottom*/)
3143 #define MID    0
3144 {
3145     int i;
3146     uint8_t *ptr[5];
3147
3148     assert(s->quarter_sample==0);
3149
3150     for(i=0; i<5; i++){
3151         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3152             ptr[i]= ptr[MID];
3153         }else{
3154             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3155             hpel_motion(s, ptr[i], src, 0, 0,
3156                         src_x, src_y,
3157                         s->width, s->height, s->linesize,
3158                         s->h_edge_pos, s->v_edge_pos,
3159                         8, 8, pix_op,
3160                         mv[i][0], mv[i][1]);
3161         }
3162     }
3163
3164     put_obmc(dest, ptr, s->linesize);
3165 }
3166
3167 static inline void qpel_motion(MpegEncContext *s,
3168                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3169                                int field_based, int bottom_field, int field_select,
3170                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3171                                qpel_mc_func (*qpix_op)[16],
3172                                int motion_x, int motion_y, int h)
3173 {
3174     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3175     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3176
3177     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3178     src_x = s->mb_x *  16                 + (motion_x >> 2);
3179     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3180
3181     v_edge_pos = s->v_edge_pos >> field_based;
3182     linesize = s->linesize << field_based;
3183     uvlinesize = s->uvlinesize << field_based;
3184
3185     if(field_based){
3186         mx= motion_x/2;
3187         my= motion_y>>1;
3188     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3189         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3190         mx= (motion_x>>1) + rtab[motion_x&7];
3191         my= (motion_y>>1) + rtab[motion_y&7];
3192     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3193         mx= (motion_x>>1)|(motion_x&1);
3194         my= (motion_y>>1)|(motion_y&1);
3195     }else{
3196         mx= motion_x/2;
3197         my= motion_y/2;
3198     }
3199     mx= (mx>>1)|(mx&1);
3200     my= (my>>1)|(my&1);
3201
3202     uvdxy= (mx&1) | ((my&1)<<1);
3203     mx>>=1;
3204     my>>=1;
3205
3206     uvsrc_x = s->mb_x *  8                 + mx;
3207     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3208
3209     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3210     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3211     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3212
3213     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3214        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3215         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3216                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3217         ptr_y= s->edge_emu_buffer;
3218         if(!(s->flags&CODEC_FLAG_GRAY)){
3219             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3220             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3221                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3222             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3223                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3224             ptr_cb= uvbuf;
3225             ptr_cr= uvbuf + 16;
3226         }
3227     }
3228
3229     if(!field_based)
3230         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3231     else{
3232         if(bottom_field){
3233             dest_y += s->linesize;
3234             dest_cb+= s->uvlinesize;
3235             dest_cr+= s->uvlinesize;
3236         }
3237
3238         if(field_select){
3239             ptr_y  += s->linesize;
3240             ptr_cb += s->uvlinesize;
3241             ptr_cr += s->uvlinesize;
3242         }
3243         //damn interlaced mode
3244         //FIXME boundary mirroring is not exactly correct here
3245         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3246         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3247     }
3248     if(!(s->flags&CODEC_FLAG_GRAY)){
3249         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3250         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3251     }
3252 }
3253
3254 inline int ff_h263_round_chroma(int x){
3255     if (x >= 0)
3256         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3257     else {
3258         x = -x;
3259         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3260     }
3261 }
3262
3263 /**
3264  * h263 chorma 4mv motion compensation.
3265  */
3266 static inline void chroma_4mv_motion(MpegEncContext *s,
3267                                      uint8_t *dest_cb, uint8_t *dest_cr,
3268                                      uint8_t **ref_picture,
3269                                      op_pixels_func *pix_op,
3270                                      int mx, int my){
3271     int dxy, emu=0, src_x, src_y, offset;
3272     uint8_t *ptr;
3273
3274     /* In case of 8X8, we construct a single chroma motion vector
3275        with a special rounding */
3276     mx= ff_h263_round_chroma(mx);
3277     my= ff_h263_round_chroma(my);
3278
3279     dxy = ((my & 1) << 1) | (mx & 1);
3280     mx >>= 1;
3281     my >>= 1;
3282
3283     src_x = s->mb_x * 8 + mx;
3284     src_y = s->mb_y * 8 + my;
3285     src_x = clip(src_x, -8, s->width/2);
3286     if (src_x == s->width/2)
3287         dxy &= ~1;
3288     src_y = clip(src_y, -8, s->height/2);
3289     if (src_y == s->height/2)
3290         dxy &= ~2;
3291
3292     offset = (src_y * (s->uvlinesize)) + src_x;
3293     ptr = ref_picture[1] + offset;
3294     if(s->flags&CODEC_FLAG_EMU_EDGE){
3295         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3296            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3297             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3298             ptr= s->edge_emu_buffer;
3299             emu=1;
3300         }
3301     }
3302     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3303
3304     ptr = ref_picture[2] + offset;
3305     if(emu){
3306         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3307         ptr= s->edge_emu_buffer;
3308     }
3309     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3310 }
3311
3312 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3313                                      uint8_t *dest_cb, uint8_t *dest_cr,
3314                                      uint8_t **ref_picture,
3315                                      h264_chroma_mc_func *pix_op,
3316                                      int mx, int my){
3317     const int lowres= s->avctx->lowres;
3318     const int block_s= 8>>lowres;
3319     const int s_mask= (2<<lowres)-1;
3320     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3321     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3322     int emu=0, src_x, src_y, offset, sx, sy;
3323     uint8_t *ptr;
3324
3325     if(s->quarter_sample){
3326         mx/=2;
3327         my/=2;
3328     }
3329
3330     /* In case of 8X8, we construct a single chroma motion vector
3331        with a special rounding */
3332     mx= ff_h263_round_chroma(mx);
3333     my= ff_h263_round_chroma(my);
3334
3335     sx= mx & s_mask;
3336     sy= my & s_mask;
3337     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3338     src_y = s->mb_y*block_s + (my >> (lowres+1));
3339
3340     offset = src_y * s->uvlinesize + src_x;
3341     ptr = ref_picture[1] + offset;
3342     if(s->flags&CODEC_FLAG_EMU_EDGE){
3343         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3344            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3345             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3346             ptr= s->edge_emu_buffer;
3347             emu=1;
3348         }
3349     }
3350     sx <<= 2 - lowres;
3351     sy <<= 2 - lowres;
3352     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3353
3354     ptr = ref_picture[2] + offset;
3355     if(emu){
3356         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3357         ptr= s->edge_emu_buffer;
3358     }
3359     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3360 }
3361
3362 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3363     /* fetch pixels for estimated mv 4 macroblocks ahead
3364      * optimized for 64byte cache lines */
3365     const int shift = s->quarter_sample ? 2 : 1;
3366     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3367     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3368     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3369     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3370     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3371     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3372 }
3373
3374 /**
3375  * motion compensation of a single macroblock
3376  * @param s context
3377  * @param dest_y luma destination pointer
3378  * @param dest_cb chroma cb/u destination pointer
3379  * @param dest_cr chroma cr/v destination pointer
3380  * @param dir direction (0->forward, 1->backward)
3381  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3382  * @param pic_op halfpel motion compensation function (average or put normally)
3383  * @param pic_op qpel motion compensation function (average or put normally)
3384  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3385  */
3386 static inline void MPV_motion(MpegEncContext *s,
3387                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3388                               int dir, uint8_t **ref_picture,
3389                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3390 {
3391     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3392     int mb_x, mb_y, i;
3393     uint8_t *ptr, *dest;
3394
3395     mb_x = s->mb_x;
3396     mb_y = s->mb_y;
3397
3398     prefetch_motion(s, ref_picture, dir);
3399
3400     if(s->obmc && s->pict_type != B_TYPE){
3401         int16_t mv_cache[4][4][2];
3402         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3403         const int mot_stride= s->b8_stride;
3404         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3405
3406         assert(!s->mb_skipped);
3407
3408         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3409         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3410         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3411
3412         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3413             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3414         }else{
3415             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3416         }
3417
3418         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3419             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3420             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3421         }else{
3422             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3423             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3424         }
3425
3426         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3427             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3428             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3429         }else{
3430             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3431             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3432         }
3433
3434         mx = 0;
3435         my = 0;
3436         for(i=0;i<4;i++) {
3437             const int x= (i&1)+1;
3438             const int y= (i>>1)+1;
3439             int16_t mv[5][2]= {
3440                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3441                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3442                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3443                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3444                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3445             //FIXME cleanup
3446             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3447                         ref_picture[0],
3448                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3449                         pix_op[1],
3450                         mv);
3451
3452             mx += mv[0][0];
3453             my += mv[0][1];
3454         }
3455         if(!(s->flags&CODEC_FLAG_GRAY))
3456             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3457
3458         return;
3459     }
3460
3461     switch(s->mv_type) {
3462     case MV_TYPE_16X16:
3463         if(s->mcsel){
3464             if(s->real_sprite_warping_points==1){
3465                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3466                             ref_picture);
3467             }else{
3468                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3469                             ref_picture);
3470             }
3471         }else if(s->quarter_sample){
3472             qpel_motion(s, dest_y, dest_cb, dest_cr,
3473                         0, 0, 0,
3474                         ref_picture, pix_op, qpix_op,
3475                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3476         }else if(s->mspel){
3477             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3478                         ref_picture, pix_op,
3479                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3480         }else
3481         {
3482             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3483                         0, 0, 0,
3484                         ref_picture, pix_op,
3485                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3486         }
3487         break;
3488     case MV_TYPE_8X8:
3489         mx = 0;
3490         my = 0;
3491         if(s->quarter_sample){
3492             for(i=0;i<4;i++) {
3493                 motion_x = s->mv[dir][i][0];
3494                 motion_y = s->mv[dir][i][1];
3495
3496                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3497                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3498                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3499
3500                 /* WARNING: do no forget half pels */
3501                 src_x = clip(src_x, -16, s->width);
3502                 if (src_x == s->width)
3503                     dxy &= ~3;
3504                 src_y = clip(src_y, -16, s->height);
3505                 if (src_y == s->height)
3506                     dxy &= ~12;
3507
3508                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3509                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3510                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3511                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3512                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3513                         ptr= s->edge_emu_buffer;
3514                     }
3515                 }
3516                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3517                 qpix_op[1][dxy](dest, ptr, s->linesize);
3518
3519                 mx += s->mv[dir][i][0]/2;
3520                 my += s->mv[dir][i][1]/2;
3521             }
3522         }else{
3523             for(i=0;i<4;i++) {
3524                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3525                             ref_picture[0], 0, 0,
3526                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3527                             s->width, s->height, s->linesize,
3528                             s->h_edge_pos, s->v_edge_pos,
3529                             8, 8, pix_op[1],
3530                             s->mv[dir][i][0], s->mv[dir][i][1]);
3531
3532                 mx += s->mv[dir][i][0];
3533                 my += s->mv[dir][i][1];
3534             }
3535         }
3536
3537         if(!(s->flags&CODEC_FLAG_GRAY))
3538             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3539         break;
3540     case MV_TYPE_FIELD:
3541         if (s->picture_structure == PICT_FRAME) {
3542             if(s->quarter_sample){
3543                 for(i=0; i<2; i++){
3544                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3545                                 1, i, s->field_select[dir][i],
3546                                 ref_picture, pix_op, qpix_op,
3547                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3548                 }
3549             }else{
3550                 /* top field */
3551                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3552                             1, 0, s->field_select[dir][0],
3553                             ref_picture, pix_op,
3554                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3555                 /* bottom field */
3556                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3557                             1, 1, s->field_select[dir][1],
3558                             ref_picture, pix_op,
3559                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3560             }
3561         } else {
3562             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3563                 ref_picture= s->current_picture_ptr->data;
3564             }
3565
3566             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3567                         0, 0, s->field_select[dir][0],
3568                         ref_picture, pix_op,
3569                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3570         }
3571         break;
3572     case MV_TYPE_16X8:
3573         for(i=0; i<2; i++){
3574             uint8_t ** ref2picture;
3575
3576             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3577                 ref2picture= ref_picture;
3578             }else{
3579                 ref2picture= s->current_picture_ptr->data;
3580             }
3581
3582             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3583                         0, 0, s->field_select[dir][i],
3584                         ref2picture, pix_op,
3585                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3586
3587             dest_y += 16*s->linesize;
3588             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3589             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3590         }
3591         break;
3592     case MV_TYPE_DMV:
3593         if(s->picture_structure == PICT_FRAME){
3594             for(i=0; i<2; i++){
3595                 int j;
3596                 for(j=0; j<2; j++){
3597                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3598                                 1, j, j^i,
3599                                 ref_picture, pix_op,
3600                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3601                 }
3602                 pix_op = s->dsp.avg_pixels_tab;
3603             }
3604         }else{
3605             for(i=0; i<2; i++){
3606                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3607                             0, 0, s->picture_structure != i+1,
3608                             ref_picture, pix_op,
3609                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3610
3611                 // after put we make avg of the same block
3612                 pix_op=s->dsp.avg_pixels_tab;
3613
3614                 //opposite parity is always in the same frame if this is second field
3615                 if(!s->first_field){
3616                     ref_picture = s->current_picture_ptr->data;
3617                 }
3618             }
3619         }
3620     break;
3621     default: assert(0);
3622     }
3623 }
3624
3625 /**
3626  * motion compensation of a single macroblock
3627  * @param s context
3628  * @param dest_y luma destination pointer
3629  * @param dest_cb chroma cb/u destination pointer
3630  * @param dest_cr chroma cr/v destination pointer
3631  * @param dir direction (0->forward, 1->backward)
3632  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3633  * @param pic_op halfpel motion compensation function (average or put normally)
3634  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3635  */
3636 static inline void MPV_motion_lowres(MpegEncContext *s,
3637                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3638                               int dir, uint8_t **ref_picture,
3639                               h264_chroma_mc_func *pix_op)
3640 {
3641     int mx, my;
3642     int mb_x, mb_y, i;
3643     const int lowres= s->avctx->lowres;
3644     const int block_s= 8>>lowres;
3645
3646     mb_x = s->mb_x;
3647     mb_y = s->mb_y;
3648
3649     switch(s->mv_type) {
3650     case MV_TYPE_16X16:
3651         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3652                     0, 0, 0,
3653                     ref_picture, pix_op,
3654                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3655         break;
3656     case MV_TYPE_8X8:
3657         mx = 0;
3658         my = 0;
3659             for(i=0;i<4;i++) {
3660                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3661                             ref_picture[0], 0, 0,
3662                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3663                             s->width, s->height, s->linesize,
3664                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3665                             block_s, block_s, pix_op,
3666                             s->mv[dir][i][0], s->mv[dir][i][1]);
3667
3668                 mx += s->mv[dir][i][0];
3669                 my += s->mv[dir][i][1];
3670             }
3671
3672         if(!(s->flags&CODEC_FLAG_GRAY))
3673             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3674         break;
3675     case MV_TYPE_FIELD:
3676         if (s->picture_structure == PICT_FRAME) {
3677             /* top field */
3678             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3679                         1, 0, s->field_select[dir][0],
3680                         ref_picture, pix_op,
3681                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3682             /* bottom field */
3683             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3684                         1, 1, s->field_select[dir][1],
3685                         ref_picture, pix_op,
3686                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3687         } else {
3688             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3689                 ref_picture= s->current_picture_ptr->data;
3690             }
3691
3692             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3693                         0, 0, s->field_select[dir][0],
3694                         ref_picture, pix_op,
3695                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3696         }
3697         break;
3698     case MV_TYPE_16X8:
3699         for(i=0; i<2; i++){
3700             uint8_t ** ref2picture;
3701
3702             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3703                 ref2picture= ref_picture;
3704             }else{
3705                 ref2picture= s->current_picture_ptr->data;
3706             }
3707
3708             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3709                         0, 0, s->field_select[dir][i],
3710                         ref2picture, pix_op,
3711                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3712
3713             dest_y += 2*block_s*s->linesize;
3714             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3715             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3716         }
3717         break;
3718     case MV_TYPE_DMV:
3719         if(s->picture_structure == PICT_FRAME){
3720             for(i=0; i<2; i++){
3721                 int j;
3722                 for(j=0; j<2; j++){
3723                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3724                                 1, j, j^i,
3725                                 ref_picture, pix_op,
3726                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3727                 }
3728                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3729             }
3730         }else{
3731             for(i=0; i<2; i++){
3732                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3733                             0, 0, s->picture_structure != i+1,
3734                             ref_picture, pix_op,
3735                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3736
3737                 // after put we make avg of the same block
3738                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3739
3740                 //opposite parity is always in the same frame if this is second field
3741                 if(!s->first_field){
3742                     ref_picture = s->current_picture_ptr->data;
3743                 }
3744             }
3745         }
3746     break;
3747     default: assert(0);
3748     }
3749 }
3750
3751 /* put block[] to dest[] */
3752 static inline void put_dct(MpegEncContext *s,
3753                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3754 {
3755     s->dct_unquantize_intra(s, block, i, qscale);
3756     s->dsp.idct_put (dest, line_size, block);
3757 }
3758
3759 /* add block[] to dest[] */
3760 static inline void add_dct(MpegEncContext *s,
3761                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3762 {
3763     if (s->block_last_index[i] >= 0) {
3764         s->dsp.idct_add (dest, line_size, block);
3765     }
3766 }
3767
3768 static inline void add_dequant_dct(MpegEncContext *s,
3769                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3770 {
3771     if (s->block_last_index[i] >= 0) {
3772         s->dct_unquantize_inter(s, block, i, qscale);
3773
3774         s->dsp.idct_add (dest, line_size, block);
3775     }
3776 }
3777
3778 /**
3779  * cleans dc, ac, coded_block for the current non intra MB
3780  */
3781 void ff_clean_intra_table_entries(MpegEncContext *s)
3782 {
3783     int wrap = s->b8_stride;
3784     int xy = s->block_index[0];
3785
3786     s->dc_val[0][xy           ] =
3787     s->dc_val[0][xy + 1       ] =
3788     s->dc_val[0][xy     + wrap] =
3789     s->dc_val[0][xy + 1 + wrap] = 1024;
3790     /* ac pred */
3791     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3792     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3793     if (s->msmpeg4_version>=3) {
3794         s->coded_block[xy           ] =
3795         s->coded_block[xy + 1       ] =
3796         s->coded_block[xy     + wrap] =
3797         s->coded_block[xy + 1 + wrap] = 0;
3798     }
3799     /* chroma */
3800     wrap = s->mb_stride;
3801     xy = s->mb_x + s->mb_y * wrap;
3802     s->dc_val[1][xy] =
3803     s->dc_val[2][xy] = 1024;
3804     /* ac pred */
3805     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3806     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3807
3808     s->mbintra_table[xy]= 0;
3809 }
3810
3811 /* generic function called after a macroblock has been parsed by the
3812    decoder or after it has been encoded by the encoder.
3813
3814    Important variables used:
3815    s->mb_intra : true if intra macroblock
3816    s->mv_dir   : motion vector direction
3817    s->mv_type  : motion vector type
3818    s->mv       : motion vector
3819    s->interlaced_dct : true if interlaced dct used (mpeg2)
3820  */
3821 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3822 {
3823     int mb_x, mb_y;
3824     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3825 #ifdef HAVE_XVMC
3826     if(s->avctx->xvmc_acceleration){
3827         XVMC_decode_mb(s);//xvmc uses pblocks
3828         return;
3829     }
3830 #endif
3831
3832     mb_x = s->mb_x;
3833     mb_y = s->mb_y;
3834
3835     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3836        /* save DCT coefficients */
3837        int i,j;
3838        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3839        for(i=0; i<6; i++)
3840            for(j=0; j<64; j++)
3841                *dct++ = block[i][s->dsp.idct_permutation[j]];
3842     }
3843
3844     s->current_picture.qscale_table[mb_xy]= s->qscale;
3845
3846     /* update DC predictors for P macroblocks */
3847     if (!s->mb_intra) {
3848         if (s->h263_pred || s->h263_aic) {
3849             if(s->mbintra_table[mb_xy])
3850                 ff_clean_intra_table_entries(s);
3851         } else {
3852             s->last_dc[0] =
3853             s->last_dc[1] =
3854             s->last_dc[2] = 128 << s->intra_dc_precision;
3855         }
3856     }
3857     else if (s->h263_pred || s->h263_aic)
3858         s->mbintra_table[mb_xy]=1;
3859
3860     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3861         uint8_t *dest_y, *dest_cb, *dest_cr;
3862         int dct_linesize, dct_offset;
3863         op_pixels_func (*op_pix)[4];
3864         qpel_mc_func (*op_qpix)[16];
3865         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3866         const int uvlinesize= s->current_picture.linesize[1];
3867         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3868         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3869
3870         /* avoid copy if macroblock skipped in last frame too */
3871         /* skip only during decoding as we might trash the buffers during encoding a bit */
3872         if(!s->encoding){
3873             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3874             const int age= s->current_picture.age;
3875
3876             assert(age);
3877
3878             if (s->mb_skipped) {
3879                 s->mb_skipped= 0;
3880                 assert(s->pict_type!=I_TYPE);
3881
3882                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3883                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3884
3885                 /* if previous was skipped too, then nothing to do !  */
3886                 if (*mbskip_ptr >= age && s->current_picture.reference){
3887                     return;
3888                 }
3889             } else if(!s->current_picture.reference){
3890                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3891                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3892             } else{
3893                 *mbskip_ptr = 0; /* not skipped */
3894             }
3895         }
3896
3897         dct_linesize = linesize << s->interlaced_dct;
3898         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3899
3900         if(readable){
3901             dest_y=  s->dest[0];
3902             dest_cb= s->dest[1];
3903             dest_cr= s->dest[2];
3904         }else{
3905             dest_y = s->b_scratchpad;
3906             dest_cb= s->b_scratchpad+16*linesize;
3907             dest_cr= s->b_scratchpad+32*linesize;
3908         }
3909
3910         if (!s->mb_intra) {
3911             /* motion handling */
3912             /* decoding or more than one mb_type (MC was already done otherwise) */
3913             if(!s->encoding){
3914                 if(lowres_flag){
3915                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3916
3917                     if (s->mv_dir & MV_DIR_FORWARD) {
3918                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3919                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3920                     }
3921                     if (s->mv_dir & MV_DIR_BACKWARD) {
3922                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3923                     }
3924                 }else{
3925                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3926                         op_pix = s->dsp.put_pixels_tab;
3927                         op_qpix= s->dsp.put_qpel_pixels_tab;
3928                     }else{
3929                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3930                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3931                     }
3932                     if (s->mv_dir & MV_DIR_FORWARD) {
3933                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3934                         op_pix = s->dsp.avg_pixels_tab;
3935                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3936                     }
3937                     if (s->mv_dir & MV_DIR_BACKWARD) {
3938                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3939                     }
3940                 }
3941             }
3942
3943             /* skip dequant / idct if we are really late ;) */
3944             if(s->hurry_up>1) goto skip_idct;
3945             if(s->avctx->skip_idct){
3946                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3947                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3948                    || s->avctx->skip_idct >= AVDISCARD_ALL)
3949                     goto skip_idct;
3950             }
3951
3952             /* add dct residue */
3953             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3954                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3955                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3956                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3957                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3958                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3959
3960                 if(!(s->flags&CODEC_FLAG_GRAY)){
3961                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3962                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3963                 }
3964             } else if(s->codec_id != CODEC_ID_WMV2){
3965                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3966                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3967                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3968                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3969
3970                 if(!(s->flags&CODEC_FLAG_GRAY)){
3971                     if(s->chroma_y_shift){//Chroma420
3972                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3973                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3974                     }else{
3975                         //chroma422
3976                         dct_linesize = uvlinesize << s->interlaced_dct;
3977                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3978
3979                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3980                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3981                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3982                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3983                         if(!s->chroma_x_shift){//Chroma444
3984                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3985                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3986                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3987                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3988                         }
3989                     }
3990                 }//fi gray
3991             }
3992             else{
3993                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3994             }
3995         } else {
3996             /* dct only in intra block */
3997             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3998                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3999                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4000                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4001                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4002
4003                 if(!(s->flags&CODEC_FLAG_GRAY)){
4004                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4005                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4006                 }
4007             }else{
4008                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4009                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4010                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4011                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4012
4013                 if(!(s->flags&CODEC_FLAG_GRAY)){
4014                     if(s->chroma_y_shift){
4015                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4016                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4017                     }else{
4018
4019                         dct_linesize = uvlinesize << s->interlaced_dct;
4020                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4021
4022                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4023                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4024                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4025                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4026                         if(!s->chroma_x_shift){//Chroma444
4027                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4028                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4029                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4030                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4031                         }
4032                     }
4033                 }//gray
4034             }
4035         }
4036 skip_idct:
4037         if(!readable){
4038             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4039             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4040             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4041         }
4042     }
4043 }
4044
4045 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4046     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4047     else                  MPV_decode_mb_internal(s, block, 0);
4048 }
4049
4050 #ifdef CONFIG_ENCODERS
4051
4052 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4053 {
4054     static const char tab[64]=
4055         {3,2,2,1,1,1,1,1,
4056          1,1,1,1,1,1,1,1,
4057          1,1,1,1,1,1,1,1,
4058          0,0,0,0,0,0,0,0,
4059          0,0,0,0,0,0,0,0,
4060          0,0,0,0,0,0,0,0,
4061          0,0,0,0,0,0,0,0,
4062          0,0,0,0,0,0,0,0};
4063     int score=0;
4064     int run=0;
4065     int i;
4066     DCTELEM *block= s->block[n];
4067     const int last_index= s->block_last_index[n];
4068     int skip_dc;
4069
4070     if(threshold<0){
4071         skip_dc=0;
4072         threshold= -threshold;
4073     }else
4074         skip_dc=1;
4075
4076     /* are all which we could set to zero are allready zero? */
4077     if(last_index<=skip_dc - 1) return;
4078
4079     for(i=0; i<=last_index; i++){
4080         const int j = s->intra_scantable.permutated[i];
4081         const int level = ABS(block[j]);
4082         if(level==1){
4083             if(skip_dc && i==0) continue;
4084             score+= tab[run];
4085             run=0;
4086         }else if(level>1){
4087             return;
4088         }else{
4089             run++;
4090         }
4091     }
4092     if(score >= threshold) return;
4093     for(i=skip_dc; i<=last_index; i++){
4094         const int j = s->intra_scantable.permutated[i];
4095         block[j]=0;
4096     }
4097     if(block[0]) s->block_last_index[n]= 0;
4098     else         s->block_last_index[n]= -1;
4099 }
4100
4101 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4102 {
4103     int i;
4104     const int maxlevel= s->max_qcoeff;
4105     const int minlevel= s->min_qcoeff;
4106     int overflow=0;
4107
4108     if(s->mb_intra){
4109         i=1; //skip clipping of intra dc
4110     }else
4111         i=0;
4112
4113     for(;i<=last_index; i++){
4114         const int j= s->intra_scantable.permutated[i];
4115         int level = block[j];
4116
4117         if     (level>maxlevel){
4118             level=maxlevel;
4119             overflow++;
4120         }else if(level<minlevel){
4121             level=minlevel;
4122             overflow++;
4123         }
4124
4125         block[j]= level;
4126     }
4127
4128     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4129         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4130 }
4131
4132 #endif //CONFIG_ENCODERS
4133
4134 /**
4135  *
4136  * @param h is the normal height, this will be reduced automatically if needed for the last row
4137  */
4138 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4139     if (s->avctx->draw_horiz_band) {
4140         AVFrame *src;
4141         int offset[4];
4142
4143         if(s->picture_structure != PICT_FRAME){
4144             h <<= 1;
4145             y <<= 1;
4146             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4147         }
4148
4149         h= FFMIN(h, s->avctx->height - y);
4150
4151         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4152             src= (AVFrame*)s->current_picture_ptr;
4153         else if(s->last_picture_ptr)
4154             src= (AVFrame*)s->last_picture_ptr;
4155         else
4156             return;
4157
4158         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4159             offset[0]=
4160             offset[1]=
4161             offset[2]=
4162             offset[3]= 0;
4163         }else{
4164             offset[0]= y * s->linesize;;
4165             offset[1]=
4166             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4167             offset[3]= 0;
4168         }
4169
4170         emms_c();
4171
4172         s->avctx->draw_horiz_band(s->avctx, src, offset,
4173                                   y, s->picture_structure, h);
4174     }
4175 }
4176
4177 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4178     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4179     const int uvlinesize= s->current_picture.linesize[1];
4180     const int mb_size= 4 - s->avctx->lowres;
4181
4182     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4183     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4184     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4185     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4186     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4187     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4188     //block_index is not used by mpeg2, so it is not affected by chroma_format
4189
4190     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4191     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4192     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4193
4194     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4195     {
4196         s->dest[0] += s->mb_y *   linesize << mb_size;
4197         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4198         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4199     }
4200 }
4201
4202 #ifdef CONFIG_ENCODERS
4203
4204 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4205     int x, y;
4206 //FIXME optimize
4207     for(y=0; y<8; y++){
4208         for(x=0; x<8; x++){
4209             int x2, y2;
4210             int sum=0;
4211             int sqr=0;
4212             int count=0;
4213
4214             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4215                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4216                     int v= ptr[x2 + y2*stride];
4217                     sum += v;
4218                     sqr += v*v;
4219                     count++;
4220                 }
4221             }
4222             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4223         }
4224     }
4225 }
4226
4227 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4228 {
4229     int16_t weight[6][64];
4230     DCTELEM orig[6][64];
4231     const int mb_x= s->mb_x;
4232     const int mb_y= s->mb_y;
4233     int i;
4234     int skip_dct[6];
4235     int dct_offset   = s->linesize*8; //default for progressive frames
4236     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4237     int wrap_y, wrap_c;
4238
4239     for(i=0; i<6; i++) skip_dct[i]=0;
4240
4241     if(s->adaptive_quant){
4242         const int last_qp= s->qscale;
4243         const int mb_xy= mb_x + mb_y*s->mb_stride;
4244
4245         s->lambda= s->lambda_table[mb_xy];
4246         update_qscale(s);
4247
4248         if(!(s->flags&CODEC_FLAG_QP_RD)){
4249             s->dquant= s->qscale - last_qp;
4250
4251             if(s->out_format==FMT_H263){
4252                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4253
4254                 if(s->codec_id==CODEC_ID_MPEG4){
4255                     if(!s->mb_intra){
4256                         if(s->pict_type == B_TYPE){
4257                             if(s->dquant&1)
4258                                 s->dquant= (s->dquant/2)*2;
4259                             if(s->mv_dir&MV_DIRECT)
4260                                 s->dquant= 0;
4261                         }
4262                         if(s->mv_type==MV_TYPE_8X8)
4263                             s->dquant=0;
4264                     }
4265                 }
4266             }
4267         }
4268         ff_set_qscale(s, last_qp + s->dquant);
4269     }else if(s->flags&CODEC_FLAG_QP_RD)
4270         ff_set_qscale(s, s->qscale + s->dquant);
4271
4272     wrap_y = s->linesize;
4273     wrap_c = s->uvlinesize;
4274     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4275     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4276     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4277
4278     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4279         uint8_t *ebuf= s->edge_emu_buffer + 32;
4280         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4281         ptr_y= ebuf;
4282         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4283         ptr_cb= ebuf+18*wrap_y;
4284         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4285         ptr_cr= ebuf+18*wrap_y+8;
4286     }
4287
4288     if (s->mb_intra) {
4289         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4290             int progressive_score, interlaced_score;
4291
4292             s->interlaced_dct=0;
4293             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4294                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4295
4296             if(progressive_score > 0){
4297                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4298                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4299                 if(progressive_score > interlaced_score){
4300                     s->interlaced_dct=1;
4301
4302                     dct_offset= wrap_y;
4303                     wrap_y<<=1;
4304                 }
4305             }
4306         }
4307
4308         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4309         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4310         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4311         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4312
4313         if(s->flags&CODEC_FLAG_GRAY){
4314             skip_dct[4]= 1;
4315             skip_dct[5]= 1;
4316         }else{
4317             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4318             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4319         }
4320     }else{
4321         op_pixels_func (*op_pix)[4];
4322         qpel_mc_func (*op_qpix)[16];
4323         uint8_t *dest_y, *dest_cb, *dest_cr;
4324
4325         dest_y  = s->dest[0];
4326         dest_cb = s->dest[1];
4327         dest_cr = s->dest[2];
4328
4329         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4330             op_pix = s->dsp.put_pixels_tab;
4331             op_qpix= s->dsp.put_qpel_pixels_tab;
4332         }else{
4333             op_pix = s->dsp.put_no_rnd_pixels_tab;
4334             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4335         }
4336
4337         if (s->mv_dir & MV_DIR_FORWARD) {
4338             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4339             op_pix = s->dsp.avg_pixels_tab;
4340             op_qpix= s->dsp.avg_qpel_pixels_tab;
4341         }
4342         if (s->mv_dir & MV_DIR_BACKWARD) {
4343             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4344         }
4345
4346         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4347             int progressive_score, interlaced_score;
4348
4349             s->interlaced_dct=0;
4350             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4351                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4352
4353             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4354
4355             if(progressive_score>0){
4356                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4357                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4358
4359                 if(progressive_score > interlaced_score){
4360                     s->interlaced_dct=1;
4361
4362                     dct_offset= wrap_y;
4363                     wrap_y<<=1;
4364                 }
4365             }
4366         }
4367
4368         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4369         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4370         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4371         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4372
4373         if(s->flags&CODEC_FLAG_GRAY){
4374             skip_dct[4]= 1;
4375             skip_dct[5]= 1;
4376         }else{
4377             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4378             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4379         }
4380         /* pre quantization */
4381         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4382             //FIXME optimize
4383             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4384             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4385             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4386             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4387             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4388             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4389         }
4390     }
4391
4392     if(s->avctx->quantizer_noise_shaping){
4393         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4394         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4395         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4396         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4397         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4398         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4399         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4400     }
4401
4402     /* DCT & quantize */
4403     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4404     {
4405         for(i=0;i<6;i++) {
4406             if(!skip_dct[i]){
4407                 int overflow;
4408                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4409             // FIXME we could decide to change to quantizer instead of clipping
4410             // JS: I don't think that would be a good idea it could lower quality instead
4411             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4412                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4413             }else
4414                 s->block_last_index[i]= -1;
4415         }
4416         if(s->avctx->quantizer_noise_shaping){
4417             for(i=0;i<6;i++) {
4418                 if(!skip_dct[i]){
4419                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4420                 }
4421             }
4422         }
4423
4424         if(s->luma_elim_threshold && !s->mb_intra)
4425             for(i=0; i<4; i++)
4426                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4427         if(s->chroma_elim_threshold && !s->mb_intra)
4428             for(i=4; i<6; i++)
4429                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4430
4431         if(s->flags & CODEC_FLAG_CBP_RD){
4432             for(i=0;i<6;i++) {
4433                 if(s->block_last_index[i] == -1)
4434                     s->coded_score[i]= INT_MAX/256;
4435             }
4436         }
4437     }
4438
4439     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4440         s->block_last_index[4]=
4441         s->block_last_index[5]= 0;
4442         s->block[4][0]=
4443         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4444     }
4445
4446     //non c quantize code returns incorrect block_last_index FIXME
4447     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4448         for(i=0; i<6; i++){
4449             int j;
4450             if(s->block_last_index[i]>0){
4451                 for(j=63; j>0; j--){
4452                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4453                 }
4454                 s->block_last_index[i]= j;
4455             }
4456         }
4457     }
4458
4459     /* huffman encode */
4460     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4461     case CODEC_ID_MPEG1VIDEO:
4462     case CODEC_ID_MPEG2VIDEO:
4463         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4464     case CODEC_ID_MPEG4:
4465         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4466     case CODEC_ID_MSMPEG4V2:
4467     case CODEC_ID_MSMPEG4V3:
4468     case CODEC_ID_WMV1:
4469         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4470     case CODEC_ID_WMV2:
4471          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4472 #ifdef CONFIG_H261_ENCODER
4473     case CODEC_ID_H261:
4474         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4475 #endif
4476     case CODEC_ID_H263:
4477     case CODEC_ID_H263P:
4478     case CODEC_ID_FLV1:
4479     case CODEC_ID_RV10:
4480     case CODEC_ID_RV20:
4481         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4482     case CODEC_ID_MJPEG:
4483         mjpeg_encode_mb(s, s->block); break;
4484     default:
4485         assert(0);
4486     }
4487 }
4488
4489 #endif //CONFIG_ENCODERS
4490
4491 void ff_mpeg_flush(AVCodecContext *avctx){
4492     int i;
4493     MpegEncContext *s = avctx->priv_data;
4494
4495     if(s==NULL || s->picture==NULL)
4496         return;
4497
4498     for(i=0; i<MAX_PICTURE_COUNT; i++){
4499        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4500                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4501         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4502     }
4503     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4504
4505     s->mb_x= s->mb_y= 0;
4506
4507     s->parse_context.state= -1;
4508     s->parse_context.frame_start_found= 0;
4509     s->parse_context.overread= 0;
4510     s->parse_context.overread_index= 0;
4511     s->parse_context.index= 0;
4512     s->parse_context.last_index= 0;
4513     s->bitstream_buffer_size=0;
4514 }
4515
4516 #ifdef CONFIG_ENCODERS
4517 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4518 {
4519     const uint16_t *srcw= (uint16_t*)src;
4520     int words= length>>4;
4521     int bits= length&15;
4522     int i;
4523
4524     if(length==0) return;
4525
4526     if(words < 16){
4527         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4528     }else if(put_bits_count(pb)&7){
4529         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4530     }else{
4531         for(i=0; put_bits_count(pb)&31; i++)
4532             put_bits(pb, 8, src[i]);
4533         flush_put_bits(pb);
4534         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4535         skip_put_bytes(pb, 2*words-i);
4536     }
4537
4538     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4539 }
4540
4541 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4542     int i;
4543
4544     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4545
4546     /* mpeg1 */
4547     d->mb_skip_run= s->mb_skip_run;
4548     for(i=0; i<3; i++)
4549         d->last_dc[i]= s->last_dc[i];
4550
4551     /* statistics */
4552     d->mv_bits= s->mv_bits;
4553     d->i_tex_bits= s->i_tex_bits;
4554     d->p_tex_bits= s->p_tex_bits;
4555     d->i_count= s->i_count;
4556     d->f_count= s->f_count;
4557     d->b_count= s->b_count;
4558     d->skip_count= s->skip_count;
4559     d->misc_bits= s->misc_bits;
4560     d->last_bits= 0;
4561
4562     d->mb_skipped= 0;
4563     d->qscale= s->qscale;
4564     d->dquant= s->dquant;
4565 }
4566
4567 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4568     int i;
4569
4570     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4571     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4572
4573     /* mpeg1 */
4574     d->mb_skip_run= s->mb_skip_run;
4575     for(i=0; i<3; i++)
4576         d->last_dc[i]= s->last_dc[i];
4577
4578     /* statistics */
4579     d->mv_bits= s->mv_bits;
4580     d->i_tex_bits= s->i_tex_bits;
4581     d->p_tex_bits= s->p_tex_bits;
4582     d->i_count= s->i_count;
4583     d->f_count= s->f_count;
4584     d->b_count= s->b_count;
4585     d->skip_count= s->skip_count;
4586     d->misc_bits= s->misc_bits;
4587
4588     d->mb_intra= s->mb_intra;
4589     d->mb_skipped= s->mb_skipped;
4590     d->mv_type= s->mv_type;
4591     d->mv_dir= s->mv_dir;
4592     d->pb= s->pb;
4593     if(s->data_partitioning){
4594         d->pb2= s->pb2;
4595         d->tex_pb= s->tex_pb;
4596     }
4597     d->block= s->block;
4598     for(i=0; i<6; i++)
4599         d->block_last_index[i]= s->block_last_index[i];
4600     d->interlaced_dct= s->interlaced_dct;
4601     d->qscale= s->qscale;
4602 }
4603
4604 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4605                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4606                            int *dmin, int *next_block, int motion_x, int motion_y)
4607 {
4608     int score;
4609     uint8_t *dest_backup[3];
4610
4611     copy_context_before_encode(s, backup, type);
4612
4613     s->block= s->blocks[*next_block];
4614     s->pb= pb[*next_block];
4615     if(s->data_partitioning){
4616         s->pb2   = pb2   [*next_block];
4617         s->tex_pb= tex_pb[*next_block];
4618     }
4619
4620     if(*next_block){
4621         memcpy(dest_backup, s->dest, sizeof(s->dest));
4622         s->dest[0] = s->rd_scratchpad;
4623         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4624         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4625         assert(s->linesize >= 32); //FIXME
4626     }
4627
4628     encode_mb(s, motion_x, motion_y);
4629
4630     score= put_bits_count(&s->pb);
4631     if(s->data_partitioning){
4632         score+= put_bits_count(&s->pb2);
4633         score+= put_bits_count(&s->tex_pb);
4634     }
4635
4636     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4637         MPV_decode_mb(s, s->block);
4638
4639         score *= s->lambda2;
4640         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4641     }
4642
4643     if(*next_block){
4644         memcpy(s->dest, dest_backup, sizeof(s->dest));
4645     }
4646
4647     if(score<*dmin){
4648         *dmin= score;
4649         *next_block^=1;
4650
4651         copy_context_after_encode(best, s, type);
4652     }
4653 }
4654
4655 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4656     uint32_t *sq = squareTbl + 256;
4657     int acc=0;
4658     int x,y;
4659
4660     if(w==16 && h==16)
4661         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4662     else if(w==8 && h==8)
4663         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4664
4665     for(y=0; y<h; y++){
4666         for(x=0; x<w; x++){
4667             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4668         }
4669     }
4670
4671     assert(acc>=0);
4672
4673     return acc;
4674 }
4675
4676 static int sse_mb(MpegEncContext *s){
4677     int w= 16;
4678     int h= 16;
4679
4680     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4681     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4682
4683     if(w==16 && h==16)
4684       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4685         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4686                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4687                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4688       }else{
4689         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4690                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4691                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4692       }
4693     else
4694         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4695                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4696                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4697 }
4698
4699 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4700     MpegEncContext *s= arg;
4701
4702
4703     s->me.pre_pass=1;
4704     s->me.dia_size= s->avctx->pre_dia_size;
4705     s->first_slice_line=1;
4706     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4707         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4708             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4709         }
4710         s->first_slice_line=0;
4711     }
4712
4713     s->me.pre_pass=0;
4714
4715     return 0;
4716 }
4717
4718 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4719     MpegEncContext *s= arg;
4720
4721     s->me.dia_size= s->avctx->dia_size;
4722     s->first_slice_line=1;
4723     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4724         s->mb_x=0; //for block init below
4725         ff_init_block_index(s);
4726         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4727             s->block_index[0]+=2;
4728             s->block_index[1]+=2;
4729             s->block_index[2]+=2;
4730             s->block_index[3]+=2;
4731
4732             /* compute motion vector & mb_type and store in context */
4733             if(s->pict_type==B_TYPE)
4734                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4735             else
4736                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4737         }
4738         s->first_slice_line=0;
4739     }
4740     return 0;
4741 }
4742
4743 static int mb_var_thread(AVCodecContext *c, void *arg){
4744     MpegEncContext *s= arg;
4745     int mb_x, mb_y;
4746
4747     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4748         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4749             int xx = mb_x * 16;
4750             int yy = mb_y * 16;
4751             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4752             int varc;
4753             int sum = s->dsp.pix_sum(pix, s->linesize);
4754
4755             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4756
4757             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4758             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4759             s->me.mb_var_sum_temp    += varc;
4760         }
4761     }
4762     return 0;
4763 }
4764
4765 static void write_slice_end(MpegEncContext *s){
4766     if(s->codec_id==CODEC_ID_MPEG4){
4767         if(s->partitioned_frame){
4768             ff_mpeg4_merge_partitions(s);
4769         }
4770
4771         ff_mpeg4_stuffing(&s->pb);
4772     }else if(s->out_format == FMT_MJPEG){
4773         ff_mjpeg_stuffing(&s->pb);
4774     }
4775
4776     align_put_bits(&s->pb);
4777     flush_put_bits(&s->pb);
4778
4779     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4780         s->misc_bits+= get_bits_diff(s);
4781 }
4782
4783 static int encode_thread(AVCodecContext *c, void *arg){
4784     MpegEncContext *s= arg;
4785     int mb_x, mb_y, pdif = 0;
4786     int i, j;
4787     MpegEncContext best_s, backup_s;
4788     uint8_t bit_buf[2][MAX_MB_BYTES];
4789     uint8_t bit_buf2[2][MAX_MB_BYTES];
4790     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4791     PutBitContext pb[2], pb2[2], tex_pb[2];
4792 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4793
4794     for(i=0; i<2; i++){
4795         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4796         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4797         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4798     }
4799
4800     s->last_bits= put_bits_count(&s->pb);
4801     s->mv_bits=0;
4802     s->misc_bits=0;
4803     s->i_tex_bits=0;
4804     s->p_tex_bits=0;
4805     s->i_count=0;
4806     s->f_count=0;
4807     s->b_count=0;
4808     s->skip_count=0;
4809
4810     for(i=0; i<3; i++){
4811         /* init last dc values */
4812         /* note: quant matrix value (8) is implied here */
4813         s->last_dc[i] = 128 << s->intra_dc_precision;
4814
4815         s->current_picture.error[i] = 0;
4816     }
4817     s->mb_skip_run = 0;
4818     memset(s->last_mv, 0, sizeof(s->last_mv));
4819
4820     s->last_mv_dir = 0;
4821
4822     switch(s->codec_id){
4823     case CODEC_ID_H263:
4824     case CODEC_ID_H263P:
4825     case CODEC_ID_FLV1:
4826         s->gob_index = ff_h263_get_gob_height(s);
4827         break;
4828     case CODEC_ID_MPEG4:
4829         if(s->partitioned_frame)
4830             ff_mpeg4_init_partitions(s);
4831         break;
4832     }
4833
4834     s->resync_mb_x=0;
4835     s->resync_mb_y=0;
4836     s->first_slice_line = 1;
4837     s->ptr_lastgob = s->pb.buf;
4838     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4839 //    printf("row %d at %X\n", s->mb_y, (int)s);
4840         s->mb_x=0;
4841         s->mb_y= mb_y;
4842
4843         ff_set_qscale(s, s->qscale);
4844         ff_init_block_index(s);
4845
4846         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4847             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4848             int mb_type= s->mb_type[xy];
4849 //            int d;
4850             int dmin= INT_MAX;
4851             int dir;
4852
4853             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4854                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4855                 return -1;
4856             }
4857             if(s->data_partitioning){
4858                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4859                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4860                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4861                     return -1;
4862                 }
4863             }
4864
4865             s->mb_x = mb_x;
4866             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4867             ff_update_block_index(s);
4868
4869 #ifdef CONFIG_H261_ENCODER
4870             if(s->codec_id == CODEC_ID_H261){
4871                 ff_h261_reorder_mb_index(s);
4872                 xy= s->mb_y*s->mb_stride + s->mb_x;
4873                 mb_type= s->mb_type[xy];
4874             }
4875 #endif
4876
4877             /* write gob / video packet header  */
4878             if(s->rtp_mode){
4879                 int current_packet_size, is_gob_start;
4880
4881                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4882
4883                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4884
4885                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4886
4887                 switch(s->codec_id){
4888                 case CODEC_ID_H263:
4889                 case CODEC_ID_H263P:
4890                     if(!s->h263_slice_structured)
4891                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4892                     break;
4893                 case CODEC_ID_MPEG2VIDEO:
4894                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4895                 case CODEC_ID_MPEG1VIDEO:
4896                     if(s->mb_skip_run) is_gob_start=0;
4897                     break;
4898                 }
4899
4900                 if(is_gob_start){
4901                     if(s->start_mb_y != mb_y || mb_x!=0){
4902                         write_slice_end(s);
4903
4904                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4905                             ff_mpeg4_init_partitions(s);
4906                         }
4907                     }
4908
4909                     assert((put_bits_count(&s->pb)&7) == 0);
4910                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4911
4912                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4913                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4914                         int d= 100 / s->avctx->error_rate;
4915                         if(r % d == 0){
4916                             current_packet_size=0;
4917 #ifndef ALT_BITSTREAM_WRITER
4918                             s->pb.buf_ptr= s->ptr_lastgob;
4919 #endif
4920                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4921                         }
4922                     }
4923
4924                     if (s->avctx->rtp_callback){
4925                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4926                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4927                     }
4928
4929                     switch(s->codec_id){
4930                     case CODEC_ID_MPEG4:
4931                         ff_mpeg4_encode_video_packet_header(s);
4932                         ff_mpeg4_clean_buffers(s);
4933                     break;
4934                     case CODEC_ID_MPEG1VIDEO:
4935                     case CODEC_ID_MPEG2VIDEO:
4936                         ff_mpeg1_encode_slice_header(s);
4937                         ff_mpeg1_clean_buffers(s);
4938                     break;
4939                     case CODEC_ID_H263:
4940                     case CODEC_ID_H263P:
4941                         h263_encode_gob_header(s, mb_y);
4942                     break;
4943                     }
4944
4945                     if(s->flags&CODEC_FLAG_PASS1){
4946                         int bits= put_bits_count(&s->pb);
4947                         s->misc_bits+= bits - s->last_bits;
4948                         s->last_bits= bits;
4949                     }
4950
4951                     s->ptr_lastgob += current_packet_size;
4952                     s->first_slice_line=1;
4953                     s->resync_mb_x=mb_x;
4954                     s->resync_mb_y=mb_y;
4955                 }
4956             }
4957
4958             if(  (s->resync_mb_x   == s->mb_x)
4959                && s->resync_mb_y+1 == s->mb_y){
4960                 s->first_slice_line=0;
4961             }
4962
4963             s->mb_skipped=0;
4964             s->dquant=0; //only for QP_RD
4965
4966             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4967                 int next_block=0;
4968                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4969
4970                 copy_context_before_encode(&backup_s, s, -1);
4971                 backup_s.pb= s->pb;
4972                 best_s.data_partitioning= s->data_partitioning;
4973                 best_s.partitioned_frame= s->partitioned_frame;
4974                 if(s->data_partitioning){
4975                     backup_s.pb2= s->pb2;
4976                     backup_s.tex_pb= s->tex_pb;
4977                 }
4978
4979                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4980                     s->mv_dir = MV_DIR_FORWARD;
4981                     s->mv_type = MV_TYPE_16X16;
4982                     s->mb_intra= 0;
4983                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4984                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4985                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
4986                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4987                 }
4988                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
4989                     s->mv_dir = MV_DIR_FORWARD;
4990                     s->mv_type = MV_TYPE_FIELD;
4991                     s->mb_intra= 0;
4992                     for(i=0; i<2; i++){
4993                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4994                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4995                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4996                     }
4997                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
4998                                  &dmin, &next_block, 0, 0);
4999                 }
5000                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5001                     s->mv_dir = MV_DIR_FORWARD;
5002                     s->mv_type = MV_TYPE_16X16;
5003                     s->mb_intra= 0;
5004                     s->mv[0][0][0] = 0;
5005                     s->mv[0][0][1] = 0;
5006                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5007                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5008                 }
5009                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5010                     s->mv_dir = MV_DIR_FORWARD;
5011                     s->mv_type = MV_TYPE_8X8;
5012                     s->mb_intra= 0;
5013                     for(i=0; i<4; i++){
5014                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5015                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5016                     }
5017                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5018                                  &dmin, &next_block, 0, 0);
5019                 }
5020                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5021                     s->mv_dir = MV_DIR_FORWARD;
5022                     s->mv_type = MV_TYPE_16X16;
5023                     s->mb_intra= 0;
5024                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5025                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5026                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5027                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5028                 }
5029                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5030                     s->mv_dir = MV_DIR_BACKWARD;
5031                     s->mv_type = MV_TYPE_16X16;
5032                     s->mb_intra= 0;
5033                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5034                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5035                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5036                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5037                 }
5038                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5039                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5040                     s->mv_type = MV_TYPE_16X16;
5041                     s->mb_intra= 0;
5042                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5043                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5044                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5045                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5046                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5047                                  &dmin, &next_block, 0, 0);
5048                 }
5049                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5050                     int mx= s->b_direct_mv_table[xy][0];
5051                     int my= s->b_direct_mv_table[xy][1];
5052
5053                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5054                     s->mb_intra= 0;
5055                     ff_mpeg4_set_direct_mv(s, mx, my);
5056                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5057                                  &dmin, &next_block, mx, my);
5058                 }
5059                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5060                     s->mv_dir = MV_DIR_FORWARD;
5061                     s->mv_type = MV_TYPE_FIELD;
5062                     s->mb_intra= 0;
5063                     for(i=0; i<2; i++){
5064                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5065                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5066                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5067                     }
5068                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5069                                  &dmin, &next_block, 0, 0);
5070                 }
5071                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5072                     s->mv_dir = MV_DIR_BACKWARD;
5073                     s->mv_type = MV_TYPE_FIELD;
5074                     s->mb_intra= 0;
5075                     for(i=0; i<2; i++){
5076                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5077                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5078                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5079                     }
5080                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5081                                  &dmin, &next_block, 0, 0);
5082                 }
5083                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5084                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5085                     s->mv_type = MV_TYPE_FIELD;
5086                     s->mb_intra= 0;
5087                     for(dir=0; dir<2; dir++){
5088                         for(i=0; i<2; i++){
5089                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5090                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5091                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5092                         }
5093                     }
5094                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5095                                  &dmin, &next_block, 0, 0);
5096                 }
5097                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5098                     s->mv_dir = 0;
5099                     s->mv_type = MV_TYPE_16X16;
5100                     s->mb_intra= 1;
5101                     s->mv[0][0][0] = 0;
5102                     s->mv[0][0][1] = 0;
5103                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5104                                  &dmin, &next_block, 0, 0);
5105                     if(s->h263_pred || s->h263_aic){
5106                         if(best_s.mb_intra)
5107                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5108                         else
5109                             ff_clean_intra_table_entries(s); //old mode?
5110                     }
5111                 }
5112
5113                 if(s->flags & CODEC_FLAG_QP_RD){
5114                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
5115                         const int last_qp= backup_s.qscale;
5116                         int dquant, dir, qp, dc[6];
5117                         DCTELEM ac[6][16];
5118                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5119
5120                         assert(backup_s.dquant == 0);
5121
5122                         //FIXME intra
5123                         s->mv_dir= best_s.mv_dir;
5124                         s->mv_type = MV_TYPE_16X16;
5125                         s->mb_intra= best_s.mb_intra;
5126                         s->mv[0][0][0] = best_s.mv[0][0][0];
5127                         s->mv[0][0][1] = best_s.mv[0][0][1];
5128                         s->mv[1][0][0] = best_s.mv[1][0][0];
5129                         s->mv[1][0][1] = best_s.mv[1][0][1];
5130
5131                         dir= s->pict_type == B_TYPE ? 2 : 1;
5132                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
5133                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
5134                             qp= last_qp + dquant;
5135                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5136                                 break;
5137                             backup_s.dquant= dquant;
5138                             if(s->mb_intra && s->dc_val[0]){
5139                                 for(i=0; i<6; i++){
5140                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5141                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5142                                 }
5143                             }
5144
5145                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5146                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5147                             if(best_s.qscale != qp){
5148                                 if(s->mb_intra && s->dc_val[0]){
5149                                     for(i=0; i<6; i++){
5150                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5151                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5152                                     }
5153                                 }
5154                                 if(dir > 0 && dquant==dir){
5155                                     dquant= 0;
5156                                     dir= -dir;
5157                                 }else
5158                                     break;
5159                             }
5160                         }
5161                         qp= best_s.qscale;
5162                         s->current_picture.qscale_table[xy]= qp;
5163                     }
5164                 }
5165
5166                 copy_context_after_encode(s, &best_s, -1);
5167
5168                 pb_bits_count= put_bits_count(&s->pb);
5169                 flush_put_bits(&s->pb);
5170                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5171                 s->pb= backup_s.pb;
5172
5173                 if(s->data_partitioning){
5174                     pb2_bits_count= put_bits_count(&s->pb2);
5175                     flush_put_bits(&s->pb2);
5176                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5177                     s->pb2= backup_s.pb2;
5178
5179                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5180                     flush_put_bits(&s->tex_pb);
5181                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5182                     s->tex_pb= backup_s.tex_pb;
5183                 }
5184                 s->last_bits= put_bits_count(&s->pb);
5185
5186                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5187                     ff_h263_update_motion_val(s);
5188
5189                 if(next_block==0){ //FIXME 16 vs linesize16
5190                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5191                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5192                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5193                 }
5194
5195                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5196                     MPV_decode_mb(s, s->block);
5197             } else {
5198                 int motion_x, motion_y;
5199                 s->mv_type=MV_TYPE_16X16;
5200                 // only one MB-Type possible
5201
5202                 switch(mb_type){
5203                 case CANDIDATE_MB_TYPE_INTRA:
5204                     s->mv_dir = 0;
5205                     s->mb_intra= 1;
5206                     motion_x= s->mv[0][0][0] = 0;
5207                     motion_y= s->mv[0][0][1] = 0;
5208                     break;
5209                 case CANDIDATE_MB_TYPE_INTER:
5210                     s->mv_dir = MV_DIR_FORWARD;
5211                     s->mb_intra= 0;
5212                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5213                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5214                     break;
5215                 case CANDIDATE_MB_TYPE_INTER_I:
5216                     s->mv_dir = MV_DIR_FORWARD;
5217                     s->mv_type = MV_TYPE_FIELD;
5218                     s->mb_intra= 0;
5219                     for(i=0; i<2; i++){
5220                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5221                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5222                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5223                     }
5224                     motion_x = motion_y = 0;
5225                     break;
5226                 case CANDIDATE_MB_TYPE_INTER4V:
5227                     s->mv_dir = MV_DIR_FORWARD;
5228                     s->mv_type = MV_TYPE_8X8;
5229                     s->mb_intra= 0;
5230                     for(i=0; i<4; i++){
5231                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5232                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5233                     }
5234                     motion_x= motion_y= 0;
5235                     break;
5236                 case CANDIDATE_MB_TYPE_DIRECT:
5237                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5238                     s->mb_intra= 0;
5239                     motion_x=s->b_direct_mv_table[xy][0];
5240                     motion_y=s->b_direct_mv_table[xy][1];
5241                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5242                     break;
5243                 case CANDIDATE_MB_TYPE_BIDIR:
5244                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5245                     s->mb_intra= 0;
5246                     motion_x=0;
5247                     motion_y=0;
5248                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5249                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5250                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5251                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5252                     break;
5253                 case CANDIDATE_MB_TYPE_BACKWARD:
5254                     s->mv_dir = MV_DIR_BACKWARD;
5255                     s->mb_intra= 0;
5256                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5257                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5258                     break;
5259                 case CANDIDATE_MB_TYPE_FORWARD:
5260                     s->mv_dir = MV_DIR_FORWARD;
5261                     s->mb_intra= 0;
5262                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5263                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5264 //                    printf(" %d %d ", motion_x, motion_y);
5265                     break;
5266                 case CANDIDATE_MB_TYPE_FORWARD_I:
5267                     s->mv_dir = MV_DIR_FORWARD;
5268                     s->mv_type = MV_TYPE_FIELD;
5269                     s->mb_intra= 0;
5270                     for(i=0; i<2; i++){
5271                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5272                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5273                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5274                     }
5275                     motion_x=motion_y=0;
5276                     break;
5277                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5278                     s->mv_dir = MV_DIR_BACKWARD;
5279                     s->mv_type = MV_TYPE_FIELD;
5280                     s->mb_intra= 0;
5281                     for(i=0; i<2; i++){
5282                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5283                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5284                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5285                     }
5286                     motion_x=motion_y=0;
5287                     break;
5288                 case CANDIDATE_MB_TYPE_BIDIR_I:
5289                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5290                     s->mv_type = MV_TYPE_FIELD;
5291                     s->mb_intra= 0;
5292                     for(dir=0; dir<2; dir++){
5293                         for(i=0; i<2; i++){
5294                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5295                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5296                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5297                         }
5298                     }
5299                     motion_x=motion_y=0;
5300                     break;
5301                 default:
5302                     motion_x=motion_y=0; //gcc warning fix
5303                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5304                 }
5305
5306                 encode_mb(s, motion_x, motion_y);
5307
5308                 // RAL: Update last macroblock type
5309                 s->last_mv_dir = s->mv_dir;
5310
5311                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5312                     ff_h263_update_motion_val(s);
5313
5314                 MPV_decode_mb(s, s->block);
5315             }
5316
5317             /* clean the MV table in IPS frames for direct mode in B frames */
5318             if(s->mb_intra /* && I,P,S_TYPE */){
5319                 s->p_mv_table[xy][0]=0;
5320                 s->p_mv_table[xy][1]=0;
5321             }
5322
5323             if(s->flags&CODEC_FLAG_PSNR){
5324                 int w= 16;
5325                 int h= 16;
5326
5327                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5328                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5329
5330                 s->current_picture.error[0] += sse(
5331                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5332                     s->dest[0], w, h, s->linesize);
5333                 s->current_picture.error[1] += sse(
5334                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5335                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5336                 s->current_picture.error[2] += sse(
5337                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5338                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5339             }
5340             if(s->loop_filter){
5341                 if(s->out_format == FMT_H263)
5342                     ff_h263_loop_filter(s);
5343             }
5344 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5345         }
5346     }
5347
5348     //not beautiful here but we must write it before flushing so it has to be here
5349     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5350         msmpeg4_encode_ext_header(s);
5351
5352     write_slice_end(s);
5353
5354     /* Send the last GOB if RTP */
5355     if (s->avctx->rtp_callback) {
5356         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5357         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5358         /* Call the RTP callback to send the last GOB */
5359         emms_c();
5360         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5361     }
5362
5363     return 0;
5364 }
5365
5366 #define MERGE(field) dst->field += src->field; src->field=0
5367 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5368     MERGE(me.scene_change_score);
5369     MERGE(me.mc_mb_var_sum_temp);
5370     MERGE(me.mb_var_sum_temp);
5371 }
5372
5373 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5374     int i;
5375
5376     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5377     MERGE(dct_count[1]);
5378     MERGE(mv_bits);
5379     MERGE(i_tex_bits);
5380     MERGE(p_tex_bits);
5381     MERGE(i_count);
5382     MERGE(f_count);
5383     MERGE(b_count);
5384     MERGE(skip_count);
5385     MERGE(misc_bits);
5386     MERGE(error_count);
5387     MERGE(padding_bug_score);
5388     MERGE(current_picture.error[0]);
5389     MERGE(current_picture.error[1]);
5390     MERGE(current_picture.error[2]);
5391
5392     if(dst->avctx->noise_reduction){
5393         for(i=0; i<64; i++){
5394             MERGE(dct_error_sum[0][i]);
5395             MERGE(dct_error_sum[1][i]);
5396         }
5397     }
5398
5399     assert(put_bits_count(&src->pb) % 8 ==0);
5400     assert(put_bits_count(&dst->pb) % 8 ==0);
5401     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5402     flush_put_bits(&dst->pb);
5403 }
5404
5405 static void estimate_qp(MpegEncContext *s, int dry_run){
5406     if (!s->fixed_qscale)
5407         s->current_picture_ptr->quality=
5408         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5409
5410     if(s->adaptive_quant){
5411         switch(s->codec_id){
5412         case CODEC_ID_MPEG4:
5413             ff_clean_mpeg4_qscales(s);
5414             break;
5415         case CODEC_ID_H263:
5416         case CODEC_ID_H263P:
5417         case CODEC_ID_FLV1:
5418             ff_clean_h263_qscales(s);
5419             break;
5420         }
5421
5422         s->lambda= s->lambda_table[0];
5423         //FIXME broken
5424     }else
5425         s->lambda= s->current_picture.quality;
5426 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5427     update_qscale(s);
5428 }
5429
5430 static void encode_picture(MpegEncContext *s, int picture_number)
5431 {
5432     int i;
5433     int bits;
5434
5435     s->picture_number = picture_number;
5436
5437     /* Reset the average MB variance */
5438     s->me.mb_var_sum_temp    =
5439     s->me.mc_mb_var_sum_temp = 0;
5440
5441     /* we need to initialize some time vars before we can encode b-frames */
5442     // RAL: Condition added for MPEG1VIDEO
5443     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5444         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5445
5446     s->me.scene_change_score=0;
5447
5448 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5449
5450     if(s->pict_type==I_TYPE){
5451         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5452         else                        s->no_rounding=0;
5453     }else if(s->pict_type!=B_TYPE){
5454         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5455             s->no_rounding ^= 1;
5456     }
5457
5458     if(s->flags & CODEC_FLAG_PASS2){
5459         estimate_qp(s, 1);
5460         ff_get_2pass_fcode(s);
5461     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5462         if(s->pict_type==B_TYPE)
5463             s->lambda= s->last_lambda_for[s->pict_type];
5464         else
5465             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5466         update_qscale(s);
5467     }
5468
5469     s->mb_intra=0; //for the rate distortion & bit compare functions
5470     for(i=1; i<s->avctx->thread_count; i++){
5471         ff_update_duplicate_context(s->thread_context[i], s);
5472     }
5473
5474     ff_init_me(s);
5475
5476     /* Estimate motion for every MB */
5477     if(s->pict_type != I_TYPE){
5478         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5479         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5480         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5481             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5482                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5483             }
5484         }
5485
5486         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5487     }else /* if(s->pict_type == I_TYPE) */{
5488         /* I-Frame */
5489         for(i=0; i<s->mb_stride*s->mb_height; i++)
5490             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5491
5492         if(!s->fixed_qscale){
5493             /* finding spatial complexity for I-frame rate control */
5494             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5495         }
5496     }
5497     for(i=1; i<s->avctx->thread_count; i++){
5498         merge_context_after_me(s, s->thread_context[i]);
5499     }
5500     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5501     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5502     emms_c();
5503
5504     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5505         s->pict_type= I_TYPE;
5506         for(i=0; i<s->mb_stride*s->mb_height; i++)
5507             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5508 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5509     }
5510
5511     if(!s->umvplus){
5512         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5513             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5514
5515             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5516                 int a,b;
5517                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5518                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5519                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5520             }
5521
5522             ff_fix_long_p_mvs(s);
5523             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5524             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5525                 int j;
5526                 for(i=0; i<2; i++){
5527                     for(j=0; j<2; j++)
5528                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5529                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5530                 }
5531             }
5532         }
5533
5534         if(s->pict_type==B_TYPE){
5535             int a, b;
5536
5537             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5538             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5539             s->f_code = FFMAX(a, b);
5540
5541             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5542             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5543             s->b_code = FFMAX(a, b);
5544
5545             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5546             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5547             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5548             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5549             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5550                 int dir, j;
5551                 for(dir=0; dir<2; dir++){
5552                     for(i=0; i<2; i++){
5553                         for(j=0; j<2; j++){
5554                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5555                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5556                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5557                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5558                         }
5559                     }
5560                 }
5561             }
5562         }
5563     }
5564
5565     estimate_qp(s, 0);
5566
5567     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5568         s->qscale= 3; //reduce clipping problems
5569
5570     if (s->out_format == FMT_MJPEG) {
5571         /* for mjpeg, we do include qscale in the matrix */
5572         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5573         for(i=1;i<64;i++){
5574             int j= s->dsp.idct_permutation[i];
5575
5576             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3) & 0xFF;
5577         }
5578         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5579                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5580         s->qscale= 8;
5581     }
5582
5583     //FIXME var duplication
5584     s->current_picture_ptr->key_frame=
5585     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5586     s->current_picture_ptr->pict_type=
5587     s->current_picture.pict_type= s->pict_type;
5588
5589     if(s->current_picture.key_frame)
5590         s->picture_in_gop_number=0;
5591
5592     s->last_bits= put_bits_count(&s->pb);
5593     switch(s->out_format) {
5594     case FMT_MJPEG:
5595         mjpeg_picture_header(s);
5596         break;
5597 #ifdef CONFIG_H261_ENCODER
5598     case FMT_H261:
5599         ff_h261_encode_picture_header(s, picture_number);
5600         break;
5601 #endif
5602     case FMT_H263:
5603         if (s->codec_id == CODEC_ID_WMV2)
5604             ff_wmv2_encode_picture_header(s, picture_number);
5605         else if (s->h263_msmpeg4)
5606             msmpeg4_encode_picture_header(s, picture_number);
5607         else if (s->h263_pred)
5608             mpeg4_encode_picture_header(s, picture_number);
5609 #ifdef CONFIG_RV10_ENCODER
5610         else if (s->codec_id == CODEC_ID_RV10)
5611             rv10_encode_picture_header(s, picture_number);
5612 #endif
5613 #ifdef CONFIG_RV20_ENCODER
5614         else if (s->codec_id == CODEC_ID_RV20)
5615             rv20_encode_picture_header(s, picture_number);
5616 #endif
5617         else if (s->codec_id == CODEC_ID_FLV1)
5618             ff_flv_encode_picture_header(s, picture_number);
5619         else
5620             h263_encode_picture_header(s, picture_number);
5621         break;
5622     case FMT_MPEG1:
5623         mpeg1_encode_picture_header(s, picture_number);
5624         break;
5625     case FMT_H264:
5626         break;
5627     default:
5628         assert(0);
5629     }
5630     bits= put_bits_count(&s->pb);
5631     s->header_bits= bits - s->last_bits;
5632
5633     for(i=1; i<s->avctx->thread_count; i++){
5634         update_duplicate_context_after_me(s->thread_context[i], s);
5635     }
5636     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5637     for(i=1; i<s->avctx->thread_count; i++){
5638         merge_context_after_encode(s, s->thread_context[i]);
5639     }
5640     emms_c();
5641 }
5642
5643 #endif //CONFIG_ENCODERS
5644
5645 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5646     const int intra= s->mb_intra;
5647     int i;
5648
5649     s->dct_count[intra]++;
5650
5651     for(i=0; i<64; i++){
5652         int level= block[i];
5653
5654         if(level){
5655             if(level>0){
5656                 s->dct_error_sum[intra][i] += level;
5657                 level -= s->dct_offset[intra][i];
5658                 if(level<0) level=0;
5659             }else{
5660                 s->dct_error_sum[intra][i] -= level;
5661                 level += s->dct_offset[intra][i];
5662                 if(level>0) level=0;
5663             }
5664             block[i]= level;
5665         }
5666     }
5667 }
5668
5669 #ifdef CONFIG_ENCODERS
5670
5671 static int dct_quantize_trellis_c(MpegEncContext *s,
5672                         DCTELEM *block, int n,
5673                         int qscale, int *overflow){
5674     const int *qmat;
5675     const uint8_t *scantable= s->intra_scantable.scantable;
5676     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5677     int max=0;
5678     unsigned int threshold1, threshold2;
5679     int bias=0;
5680     int run_tab[65];
5681     int level_tab[65];
5682     int score_tab[65];
5683     int survivor[65];
5684     int survivor_count;
5685     int last_run=0;
5686     int last_level=0;
5687     int last_score= 0;
5688     int last_i;
5689     int coeff[2][64];
5690     int coeff_count[64];
5691     int qmul, qadd, start_i, last_non_zero, i, dc;
5692     const int esc_length= s->ac_esc_length;
5693     uint8_t * length;
5694     uint8_t * last_length;
5695     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5696
5697     s->dsp.fdct (block);
5698
5699     if(s->dct_error_sum)
5700         s->denoise_dct(s, block);
5701     qmul= qscale*16;
5702     qadd= ((qscale-1)|1)*8;
5703
5704     if (s->mb_intra) {
5705         int q;
5706         if (!s->h263_aic) {
5707             if (n < 4)
5708                 q = s->y_dc_scale;
5709             else
5710                 q = s->c_dc_scale;
5711             q = q << 3;
5712         } else{
5713             /* For AIC we skip quant/dequant of INTRADC */
5714             q = 1 << 3;
5715             qadd=0;
5716         }
5717
5718         /* note: block[0] is assumed to be positive */
5719         block[0] = (block[0] + (q >> 1)) / q;
5720         start_i = 1;
5721         last_non_zero = 0;
5722         qmat = s->q_intra_matrix[qscale];
5723         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5724             bias= 1<<(QMAT_SHIFT-1);
5725         length     = s->intra_ac_vlc_length;
5726         last_length= s->intra_ac_vlc_last_length;
5727     } else {
5728         start_i = 0;
5729         last_non_zero = -1;
5730         qmat = s->q_inter_matrix[qscale];
5731         length     = s->inter_ac_vlc_length;
5732         last_length= s->inter_ac_vlc_last_length;
5733     }
5734     last_i= start_i;
5735
5736     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5737     threshold2= (threshold1<<1);
5738
5739     for(i=63; i>=start_i; i--) {
5740         const int j = scantable[i];
5741         int level = block[j] * qmat[j];
5742
5743         if(((unsigned)(level+threshold1))>threshold2){
5744             last_non_zero = i;
5745             break;
5746         }
5747     }
5748
5749     for(i=start_i; i<=last_non_zero; i++) {
5750         const int j = scantable[i];
5751         int level = block[j] * qmat[j];
5752
5753 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5754 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5755         if(((unsigned)(level+threshold1))>threshold2){
5756             if(level>0){
5757                 level= (bias + level)>>QMAT_SHIFT;
5758                 coeff[0][i]= level;
5759                 coeff[1][i]= level-1;
5760 //                coeff[2][k]= level-2;
5761             }else{
5762                 level= (bias - level)>>QMAT_SHIFT;
5763                 coeff[0][i]= -level;
5764                 coeff[1][i]= -level+1;
5765 //                coeff[2][k]= -level+2;
5766             }
5767             coeff_count[i]= FFMIN(level, 2);
5768             assert(coeff_count[i]);
5769             max |=level;
5770         }else{
5771             coeff[0][i]= (level>>31)|1;
5772             coeff_count[i]= 1;
5773         }
5774     }
5775
5776     *overflow= s->max_qcoeff < max; //overflow might have happened
5777
5778     if(last_non_zero < start_i){
5779         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5780         return last_non_zero;
5781     }
5782
5783     score_tab[start_i]= 0;
5784     survivor[0]= start_i;
5785     survivor_count= 1;
5786
5787     for(i=start_i; i<=last_non_zero; i++){
5788         int level_index, j;
5789         const int dct_coeff= ABS(block[ scantable[i] ]);
5790         const int zero_distoration= dct_coeff*dct_coeff;
5791         int best_score=256*256*256*120;
5792         for(level_index=0; level_index < coeff_count[i]; level_index++){
5793             int distoration;
5794             int level= coeff[level_index][i];
5795             const int alevel= ABS(level);
5796             int unquant_coeff;
5797
5798             assert(level);
5799
5800             if(s->out_format == FMT_H263){
5801                 unquant_coeff= alevel*qmul + qadd;
5802             }else{ //MPEG1
5803                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5804                 if(s->mb_intra){
5805                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5806                         unquant_coeff =   (unquant_coeff - 1) | 1;
5807                 }else{
5808                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5809                         unquant_coeff =   (unquant_coeff - 1) | 1;
5810                 }
5811                 unquant_coeff<<= 3;
5812             }
5813
5814             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5815             level+=64;
5816             if((level&(~127)) == 0){
5817                 for(j=survivor_count-1; j>=0; j--){
5818                     int run= i - survivor[j];
5819                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5820                     score += score_tab[i-run];
5821
5822                     if(score < best_score){
5823                         best_score= score;
5824                         run_tab[i+1]= run;
5825                         level_tab[i+1]= level-64;
5826                     }
5827                 }
5828
5829                 if(s->out_format == FMT_H263){
5830                     for(j=survivor_count-1; j>=0; j--){
5831                         int run= i - survivor[j];
5832                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5833                         score += score_tab[i-run];
5834                         if(score < last_score){
5835                             last_score= score;
5836                             last_run= run;
5837                             last_level= level-64;
5838                             last_i= i+1;
5839                         }
5840                     }
5841                 }
5842             }else{
5843                 distoration += esc_length*lambda;
5844                 for(j=survivor_count-1; j>=0; j--){
5845                     int run= i - survivor[j];
5846                     int score= distoration + score_tab[i-run];
5847
5848                     if(score < best_score){
5849                         best_score= score;
5850                         run_tab[i+1]= run;
5851                         level_tab[i+1]= level-64;
5852                     }
5853                 }
5854
5855                 if(s->out_format == FMT_H263){
5856                   for(j=survivor_count-1; j>=0; j--){
5857                         int run= i - survivor[j];
5858                         int score= distoration + score_tab[i-run];
5859                         if(score < last_score){
5860                             last_score= score;
5861                             last_run= run;
5862                             last_level= level-64;
5863                             last_i= i+1;
5864                         }
5865                     }
5866                 }
5867             }
5868         }
5869
5870         score_tab[i+1]= best_score;
5871
5872         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5873         if(last_non_zero <= 27){
5874             for(; survivor_count; survivor_count--){
5875                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5876                     break;
5877             }
5878         }else{
5879             for(; survivor_count; survivor_count--){
5880                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5881                     break;
5882             }
5883         }
5884
5885         survivor[ survivor_count++ ]= i+1;
5886     }
5887
5888     if(s->out_format != FMT_H263){
5889         last_score= 256*256*256*120;
5890         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5891             int score= score_tab[i];
5892             if(i) score += lambda*2; //FIXME exacter?
5893
5894             if(score < last_score){
5895                 last_score= score;
5896                 last_i= i;
5897                 last_level= level_tab[i];
5898                 last_run= run_tab[i];
5899             }
5900         }
5901     }
5902
5903     s->coded_score[n] = last_score;
5904
5905     dc= ABS(block[0]);
5906     last_non_zero= last_i - 1;
5907     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5908
5909     if(last_non_zero < start_i)
5910         return last_non_zero;
5911
5912     if(last_non_zero == 0 && start_i == 0){
5913         int best_level= 0;
5914         int best_score= dc * dc;
5915
5916         for(i=0; i<coeff_count[0]; i++){
5917             int level= coeff[i][0];
5918             int alevel= ABS(level);
5919             int unquant_coeff, score, distortion;
5920
5921             if(s->out_format == FMT_H263){
5922                     unquant_coeff= (alevel*qmul + qadd)>>3;
5923             }else{ //MPEG1
5924                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5925                     unquant_coeff =   (unquant_coeff - 1) | 1;
5926             }
5927             unquant_coeff = (unquant_coeff + 4) >> 3;
5928             unquant_coeff<<= 3 + 3;
5929
5930             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5931             level+=64;
5932             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5933             else                    score= distortion + esc_length*lambda;
5934
5935             if(score < best_score){
5936                 best_score= score;
5937                 best_level= level - 64;
5938             }
5939         }
5940         block[0]= best_level;
5941         s->coded_score[n] = best_score - dc*dc;
5942         if(best_level == 0) return -1;
5943         else                return last_non_zero;
5944     }
5945
5946     i= last_i;
5947     assert(last_level);
5948
5949     block[ perm_scantable[last_non_zero] ]= last_level;
5950     i -= last_run + 1;
5951
5952     for(; i>start_i; i -= run_tab[i] + 1){
5953         block[ perm_scantable[i-1] ]= level_tab[i];
5954     }
5955
5956     return last_non_zero;
5957 }
5958
5959 //#define REFINE_STATS 1
5960 static int16_t basis[64][64];
5961
5962 static void build_basis(uint8_t *perm){
5963     int i, j, x, y;
5964     emms_c();
5965     for(i=0; i<8; i++){
5966         for(j=0; j<8; j++){
5967             for(y=0; y<8; y++){
5968                 for(x=0; x<8; x++){
5969                     double s= 0.25*(1<<BASIS_SHIFT);
5970                     int index= 8*i + j;
5971                     int perm_index= perm[index];
5972                     if(i==0) s*= sqrt(0.5);
5973                     if(j==0) s*= sqrt(0.5);
5974                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5975                 }
5976             }
5977         }
5978     }
5979 }
5980
5981 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5982                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5983                         int n, int qscale){
5984     int16_t rem[64];
5985     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
5986     const int *qmat;
5987     const uint8_t *scantable= s->intra_scantable.scantable;
5988     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5989 //    unsigned int threshold1, threshold2;
5990 //    int bias=0;
5991     int run_tab[65];
5992     int prev_run=0;
5993     int prev_level=0;
5994     int qmul, qadd, start_i, last_non_zero, i, dc;
5995     uint8_t * length;
5996     uint8_t * last_length;
5997     int lambda;
5998     int rle_index, run, q, sum;
5999 #ifdef REFINE_STATS
6000 static int count=0;
6001 static int after_last=0;
6002 static int to_zero=0;
6003 static int from_zero=0;
6004 static int raise=0;
6005 static int lower=0;
6006 static int messed_sign=0;
6007 #endif
6008
6009     if(basis[0][0] == 0)
6010         build_basis(s->dsp.idct_permutation);
6011
6012     qmul= qscale*2;
6013     qadd= (qscale-1)|1;
6014     if (s->mb_intra) {
6015         if (!s->h263_aic) {
6016             if (n < 4)
6017                 q = s->y_dc_scale;
6018             else
6019                 q = s->c_dc_scale;
6020         } else{
6021             /* For AIC we skip quant/dequant of INTRADC */
6022             q = 1;
6023             qadd=0;
6024         }
6025         q <<= RECON_SHIFT-3;
6026         /* note: block[0] is assumed to be positive */
6027         dc= block[0]*q;
6028 //        block[0] = (block[0] + (q >> 1)) / q;
6029         start_i = 1;
6030         qmat = s->q_intra_matrix[qscale];
6031 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6032 //            bias= 1<<(QMAT_SHIFT-1);
6033         length     = s->intra_ac_vlc_length;
6034         last_length= s->intra_ac_vlc_last_length;
6035     } else {
6036         dc= 0;
6037         start_i = 0;
6038         qmat = s->q_inter_matrix[qscale];
6039         length     = s->inter_ac_vlc_length;
6040         last_length= s->inter_ac_vlc_last_length;
6041     }
6042     last_non_zero = s->block_last_index[n];
6043
6044 #ifdef REFINE_STATS
6045 {START_TIMER
6046 #endif
6047     dc += (1<<(RECON_SHIFT-1));
6048     for(i=0; i<64; i++){
6049         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6050     }
6051 #ifdef REFINE_STATS
6052 STOP_TIMER("memset rem[]")}
6053 #endif
6054     sum=0;
6055     for(i=0; i<64; i++){
6056         int one= 36;
6057         int qns=4;
6058         int w;
6059
6060         w= ABS(weight[i]) + qns*one;
6061         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6062
6063         weight[i] = w;
6064 //        w=weight[i] = (63*qns + (w/2)) / w;
6065
6066         assert(w>0);
6067         assert(w<(1<<6));
6068         sum += w*w;
6069     }
6070     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6071 #ifdef REFINE_STATS
6072 {START_TIMER
6073 #endif
6074     run=0;
6075     rle_index=0;
6076     for(i=start_i; i<=last_non_zero; i++){
6077         int j= perm_scantable[i];
6078         const int level= block[j];
6079         int coeff;
6080
6081         if(level){
6082             if(level<0) coeff= qmul*level - qadd;
6083             else        coeff= qmul*level + qadd;
6084             run_tab[rle_index++]=run;
6085             run=0;
6086
6087             s->dsp.add_8x8basis(rem, basis[j], coeff);
6088         }else{
6089             run++;
6090         }
6091     }
6092 #ifdef REFINE_STATS
6093 if(last_non_zero>0){
6094 STOP_TIMER("init rem[]")
6095 }
6096 }
6097
6098 {START_TIMER
6099 #endif
6100     for(;;){
6101         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6102         int best_coeff=0;
6103         int best_change=0;
6104         int run2, best_unquant_change=0, analyze_gradient;
6105 #ifdef REFINE_STATS
6106 {START_TIMER
6107 #endif
6108         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6109
6110         if(analyze_gradient){
6111 #ifdef REFINE_STATS
6112 {START_TIMER
6113 #endif
6114             for(i=0; i<64; i++){
6115                 int w= weight[i];
6116
6117                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6118             }
6119 #ifdef REFINE_STATS
6120 STOP_TIMER("rem*w*w")}
6121 {START_TIMER
6122 #endif
6123             s->dsp.fdct(d1);
6124 #ifdef REFINE_STATS
6125 STOP_TIMER("dct")}
6126 #endif
6127         }
6128
6129         if(start_i){
6130             const int level= block[0];
6131             int change, old_coeff;
6132
6133             assert(s->mb_intra);
6134
6135             old_coeff= q*level;
6136
6137             for(change=-1; change<=1; change+=2){
6138                 int new_level= level + change;
6139                 int score, new_coeff;
6140
6141                 new_coeff= q*new_level;
6142                 if(new_coeff >= 2048 || new_coeff < 0)
6143                     continue;
6144
6145                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6146                 if(score<best_score){
6147                     best_score= score;
6148                     best_coeff= 0;
6149                     best_change= change;
6150                     best_unquant_change= new_coeff - old_coeff;
6151                 }
6152             }
6153         }
6154
6155         run=0;
6156         rle_index=0;
6157         run2= run_tab[rle_index++];
6158         prev_level=0;
6159         prev_run=0;
6160
6161         for(i=start_i; i<64; i++){
6162             int j= perm_scantable[i];
6163             const int level= block[j];
6164             int change, old_coeff;
6165
6166             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6167                 break;
6168
6169             if(level){
6170                 if(level<0) old_coeff= qmul*level - qadd;
6171                 else        old_coeff= qmul*level + qadd;
6172                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6173             }else{
6174                 old_coeff=0;
6175                 run2--;
6176                 assert(run2>=0 || i >= last_non_zero );
6177             }
6178
6179             for(change=-1; change<=1; change+=2){
6180                 int new_level= level + change;
6181                 int score, new_coeff, unquant_change;
6182
6183                 score=0;
6184                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
6185                    continue;
6186
6187                 if(new_level){
6188                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6189                     else            new_coeff= qmul*new_level + qadd;
6190                     if(new_coeff >= 2048 || new_coeff <= -2048)
6191                         continue;
6192                     //FIXME check for overflow
6193
6194                     if(level){
6195                         if(level < 63 && level > -63){
6196                             if(i < last_non_zero)
6197                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6198                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6199                             else
6200                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6201                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6202                         }
6203                     }else{
6204                         assert(ABS(new_level)==1);
6205
6206                         if(analyze_gradient){
6207                             int g= d1[ scantable[i] ];
6208                             if(g && (g^new_level) >= 0)
6209                                 continue;
6210                         }
6211
6212                         if(i < last_non_zero){
6213                             int next_i= i + run2 + 1;
6214                             int next_level= block[ perm_scantable[next_i] ] + 64;
6215
6216                             if(next_level&(~127))
6217                                 next_level= 0;
6218
6219                             if(next_i < last_non_zero)
6220                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6221                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6222                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6223                             else
6224                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6225                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6226                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6227                         }else{
6228                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6229                             if(prev_level){
6230                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6231                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6232                             }
6233                         }
6234                     }
6235                 }else{
6236                     new_coeff=0;
6237                     assert(ABS(level)==1);
6238
6239                     if(i < last_non_zero){
6240                         int next_i= i + run2 + 1;
6241                         int next_level= block[ perm_scantable[next_i] ] + 64;
6242
6243                         if(next_level&(~127))
6244                             next_level= 0;
6245
6246                         if(next_i < last_non_zero)
6247                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6248                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6249                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6250                         else
6251                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6252                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6253                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6254                     }else{
6255                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6256                         if(prev_level){
6257                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6258                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6259                         }
6260                     }
6261                 }
6262
6263                 score *= lambda;
6264
6265                 unquant_change= new_coeff - old_coeff;
6266                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6267
6268                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6269                 if(score<best_score){
6270                     best_score= score;
6271                     best_coeff= i;
6272                     best_change= change;
6273                     best_unquant_change= unquant_change;
6274                 }
6275             }
6276             if(level){
6277                 prev_level= level + 64;
6278                 if(prev_level&(~127))
6279                     prev_level= 0;
6280                 prev_run= run;
6281                 run=0;
6282             }else{
6283                 run++;
6284             }
6285         }
6286 #ifdef REFINE_STATS
6287 STOP_TIMER("iterative step")}
6288 #endif
6289
6290         if(best_change){
6291             int j= perm_scantable[ best_coeff ];
6292
6293             block[j] += best_change;
6294
6295             if(best_coeff > last_non_zero){
6296                 last_non_zero= best_coeff;
6297                 assert(block[j]);
6298 #ifdef REFINE_STATS
6299 after_last++;
6300 #endif
6301             }else{
6302 #ifdef REFINE_STATS
6303 if(block[j]){
6304     if(block[j] - best_change){
6305         if(ABS(block[j]) > ABS(block[j] - best_change)){
6306             raise++;
6307         }else{
6308             lower++;
6309         }
6310     }else{
6311         from_zero++;
6312     }
6313 }else{
6314     to_zero++;
6315 }
6316 #endif
6317                 for(; last_non_zero>=start_i; last_non_zero--){
6318                     if(block[perm_scantable[last_non_zero]])
6319                         break;
6320                 }
6321             }
6322 #ifdef REFINE_STATS
6323 count++;
6324 if(256*256*256*64 % count == 0){
6325     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6326 }
6327 #endif
6328             run=0;
6329             rle_index=0;
6330             for(i=start_i; i<=last_non_zero; i++){
6331                 int j= perm_scantable[i];
6332                 const int level= block[j];
6333
6334                  if(level){
6335                      run_tab[rle_index++]=run;
6336                      run=0;
6337                  }else{
6338                      run++;
6339                  }
6340             }
6341
6342             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6343         }else{
6344             break;
6345         }
6346     }
6347 #ifdef REFINE_STATS
6348 if(last_non_zero>0){
6349 STOP_TIMER("iterative search")
6350 }
6351 }
6352 #endif
6353
6354     return last_non_zero;
6355 }
6356
6357 static int dct_quantize_c(MpegEncContext *s,
6358                         DCTELEM *block, int n,
6359                         int qscale, int *overflow)
6360 {
6361     int i, j, level, last_non_zero, q, start_i;
6362     const int *qmat;
6363     const uint8_t *scantable= s->intra_scantable.scantable;
6364     int bias;
6365     int max=0;
6366     unsigned int threshold1, threshold2;
6367
6368     s->dsp.fdct (block);
6369
6370     if(s->dct_error_sum)
6371         s->denoise_dct(s, block);
6372
6373     if (s->mb_intra) {
6374         if (!s->h263_aic) {
6375             if (n < 4)
6376                 q = s->y_dc_scale;
6377             else
6378                 q = s->c_dc_scale;
6379             q = q << 3;
6380         } else
6381             /* For AIC we skip quant/dequant of INTRADC */
6382             q = 1 << 3;
6383
6384         /* note: block[0] is assumed to be positive */
6385         block[0] = (block[0] + (q >> 1)) / q;
6386         start_i = 1;
6387         last_non_zero = 0;
6388         qmat = s->q_intra_matrix[qscale];
6389         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6390     } else {
6391         start_i = 0;
6392         last_non_zero = -1;
6393         qmat = s->q_inter_matrix[qscale];
6394         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6395     }
6396     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6397     threshold2= (threshold1<<1);
6398     for(i=63;i>=start_i;i--) {
6399         j = scantable[i];
6400         level = block[j] * qmat[j];
6401
6402         if(((unsigned)(level+threshold1))>threshold2){
6403             last_non_zero = i;
6404             break;
6405         }else{
6406             block[j]=0;
6407         }
6408     }
6409     for(i=start_i; i<=last_non_zero; i++) {
6410         j = scantable[i];
6411         level = block[j] * qmat[j];
6412
6413 //        if(   bias+level >= (1<<QMAT_SHIFT)
6414 //           || bias-level >= (1<<QMAT_SHIFT)){
6415         if(((unsigned)(level+threshold1))>threshold2){
6416             if(level>0){
6417                 level= (bias + level)>>QMAT_SHIFT;
6418                 block[j]= level;
6419             }else{
6420                 level= (bias - level)>>QMAT_SHIFT;
6421                 block[j]= -level;
6422             }
6423             max |=level;
6424         }else{
6425             block[j]=0;
6426         }
6427     }
6428     *overflow= s->max_qcoeff < max; //overflow might have happened
6429
6430     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6431     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6432         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6433
6434     return last_non_zero;
6435 }
6436
6437 #endif //CONFIG_ENCODERS
6438
6439 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6440                                    DCTELEM *block, int n, int qscale)
6441 {
6442     int i, level, nCoeffs;
6443     const uint16_t *quant_matrix;
6444
6445     nCoeffs= s->block_last_index[n];
6446
6447     if (n < 4)
6448         block[0] = block[0] * s->y_dc_scale;
6449     else
6450         block[0] = block[0] * s->c_dc_scale;
6451     /* XXX: only mpeg1 */
6452     quant_matrix = s->intra_matrix;
6453     for(i=1;i<=nCoeffs;i++) {
6454         int j= s->intra_scantable.permutated[i];
6455         level = block[j];
6456         if (level) {
6457             if (level < 0) {
6458                 level = -level;
6459                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6460                 level = (level - 1) | 1;
6461                 level = -level;
6462             } else {
6463                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6464                 level = (level - 1) | 1;
6465             }
6466             block[j] = level;
6467         }
6468     }
6469 }
6470
6471 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6472                                    DCTELEM *block, int n, int qscale)
6473 {
6474     int i, level, nCoeffs;
6475     const uint16_t *quant_matrix;
6476
6477     nCoeffs= s->block_last_index[n];
6478
6479     quant_matrix = s->inter_matrix;
6480     for(i=0; i<=nCoeffs; i++) {
6481         int j= s->intra_scantable.permutated[i];
6482         level = block[j];
6483         if (level) {
6484             if (level < 0) {
6485                 level = -level;
6486                 level = (((level << 1) + 1) * qscale *
6487                          ((int) (quant_matrix[j]))) >> 4;
6488                 level = (level - 1) | 1;
6489                 level = -level;
6490             } else {
6491                 level = (((level << 1) + 1) * qscale *
6492                          ((int) (quant_matrix[j]))) >> 4;
6493                 level = (level - 1) | 1;
6494             }
6495             block[j] = level;
6496         }
6497     }
6498 }
6499
6500 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6501                                    DCTELEM *block, int n, int qscale)
6502 {
6503     int i, level, nCoeffs;
6504     const uint16_t *quant_matrix;
6505
6506     if(s->alternate_scan) nCoeffs= 63;
6507     else nCoeffs= s->block_last_index[n];
6508
6509     if (n < 4)
6510         block[0] = block[0] * s->y_dc_scale;
6511     else
6512         block[0] = block[0] * s->c_dc_scale;
6513     quant_matrix = s->intra_matrix;
6514     for(i=1;i<=nCoeffs;i++) {
6515         int j= s->intra_scantable.permutated[i];
6516         level = block[j];
6517         if (level) {
6518             if (level < 0) {
6519                 level = -level;
6520                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6521                 level = -level;
6522             } else {
6523                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6524             }
6525             block[j] = level;
6526         }
6527     }
6528 }
6529
6530 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6531                                    DCTELEM *block, int n, int qscale)
6532 {
6533     int i, level, nCoeffs;
6534     const uint16_t *quant_matrix;
6535     int sum=-1;
6536
6537     if(s->alternate_scan) nCoeffs= 63;
6538     else nCoeffs= s->block_last_index[n];
6539
6540     quant_matrix = s->inter_matrix;
6541     for(i=0; i<=nCoeffs; i++) {
6542         int j= s->intra_scantable.permutated[i];
6543         level = block[j];
6544         if (level) {
6545             if (level < 0) {
6546                 level = -level;
6547                 level = (((level << 1) + 1) * qscale *
6548                          ((int) (quant_matrix[j]))) >> 4;
6549                 level = -level;
6550             } else {
6551                 level = (((level << 1) + 1) * qscale *
6552                          ((int) (quant_matrix[j]))) >> 4;
6553             }
6554             block[j] = level;
6555             sum+=level;
6556         }
6557     }
6558     block[63]^=sum&1;
6559 }
6560
6561 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6562                                   DCTELEM *block, int n, int qscale)
6563 {
6564     int i, level, qmul, qadd;
6565     int nCoeffs;
6566
6567     assert(s->block_last_index[n]>=0);
6568
6569     qmul = qscale << 1;
6570
6571     if (!s->h263_aic) {
6572         if (n < 4)
6573             block[0] = block[0] * s->y_dc_scale;
6574         else
6575             block[0] = block[0] * s->c_dc_scale;
6576         qadd = (qscale - 1) | 1;
6577     }else{
6578         qadd = 0;
6579     }
6580     if(s->ac_pred)
6581         nCoeffs=63;
6582     else
6583         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6584
6585     for(i=1; i<=nCoeffs; i++) {
6586         level = block[i];
6587         if (level) {
6588             if (level < 0) {
6589                 level = level * qmul - qadd;
6590             } else {
6591                 level = level * qmul + qadd;
6592             }
6593             block[i] = level;
6594         }
6595     }
6596 }
6597
6598 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6599                                   DCTELEM *block, int n, int qscale)
6600 {
6601     int i, level, qmul, qadd;
6602     int nCoeffs;
6603
6604     assert(s->block_last_index[n]>=0);
6605
6606     qadd = (qscale - 1) | 1;
6607     qmul = qscale << 1;
6608
6609     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6610
6611     for(i=0; i<=nCoeffs; i++) {
6612         level = block[i];
6613         if (level) {
6614             if (level < 0) {
6615                 level = level * qmul - qadd;
6616             } else {
6617                 level = level * qmul + qadd;
6618             }
6619             block[i] = level;
6620         }
6621     }
6622 }
6623
6624 #ifdef CONFIG_ENCODERS
6625 AVCodec h263_encoder = {
6626     "h263",
6627     CODEC_TYPE_VIDEO,
6628     CODEC_ID_H263,
6629     sizeof(MpegEncContext),
6630     MPV_encode_init,
6631     MPV_encode_picture,
6632     MPV_encode_end,
6633     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6634 };
6635
6636 AVCodec h263p_encoder = {
6637     "h263p",
6638     CODEC_TYPE_VIDEO,
6639     CODEC_ID_H263P,
6640     sizeof(MpegEncContext),
6641     MPV_encode_init,
6642     MPV_encode_picture,
6643     MPV_encode_end,
6644     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6645 };
6646
6647 AVCodec flv_encoder = {
6648     "flv",
6649     CODEC_TYPE_VIDEO,
6650     CODEC_ID_FLV1,
6651     sizeof(MpegEncContext),
6652     MPV_encode_init,
6653     MPV_encode_picture,
6654     MPV_encode_end,
6655     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6656 };
6657
6658 AVCodec rv10_encoder = {
6659     "rv10",
6660     CODEC_TYPE_VIDEO,
6661     CODEC_ID_RV10,
6662     sizeof(MpegEncContext),
6663     MPV_encode_init,
6664     MPV_encode_picture,
6665     MPV_encode_end,
6666     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6667 };
6668
6669 AVCodec rv20_encoder = {
6670     "rv20",
6671     CODEC_TYPE_VIDEO,
6672     CODEC_ID_RV20,
6673     sizeof(MpegEncContext),
6674     MPV_encode_init,
6675     MPV_encode_picture,
6676     MPV_encode_end,
6677     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6678 };
6679
6680 AVCodec mpeg4_encoder = {
6681     "mpeg4",
6682     CODEC_TYPE_VIDEO,
6683     CODEC_ID_MPEG4,
6684     sizeof(MpegEncContext),
6685     MPV_encode_init,
6686     MPV_encode_picture,
6687     MPV_encode_end,
6688     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6689     .capabilities= CODEC_CAP_DELAY,
6690 };
6691
6692 AVCodec msmpeg4v1_encoder = {
6693     "msmpeg4v1",
6694     CODEC_TYPE_VIDEO,
6695     CODEC_ID_MSMPEG4V1,
6696     sizeof(MpegEncContext),
6697     MPV_encode_init,
6698     MPV_encode_picture,
6699     MPV_encode_end,
6700     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6701 };
6702
6703 AVCodec msmpeg4v2_encoder = {
6704     "msmpeg4v2",
6705     CODEC_TYPE_VIDEO,
6706     CODEC_ID_MSMPEG4V2,
6707     sizeof(MpegEncContext),
6708     MPV_encode_init,
6709     MPV_encode_picture,
6710     MPV_encode_end,
6711     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6712 };
6713
6714 AVCodec msmpeg4v3_encoder = {
6715     "msmpeg4",
6716     CODEC_TYPE_VIDEO,
6717     CODEC_ID_MSMPEG4V3,
6718     sizeof(MpegEncContext),
6719     MPV_encode_init,
6720     MPV_encode_picture,
6721     MPV_encode_end,
6722     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6723 };
6724
6725 AVCodec wmv1_encoder = {
6726     "wmv1",
6727     CODEC_TYPE_VIDEO,
6728     CODEC_ID_WMV1,
6729     sizeof(MpegEncContext),
6730     MPV_encode_init,
6731     MPV_encode_picture,
6732     MPV_encode_end,
6733     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6734 };
6735
6736 AVCodec mjpeg_encoder = {
6737     "mjpeg",
6738     CODEC_TYPE_VIDEO,
6739     CODEC_ID_MJPEG,
6740     sizeof(MpegEncContext),
6741     MPV_encode_init,
6742     MPV_encode_picture,
6743     MPV_encode_end,
6744     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6745 };
6746
6747 #endif //CONFIG_ENCODERS