git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  *
  22  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  23  */
  24
  25 /**
  26  * @file mpegvideo.c
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "avcodec.h"
  31 #include "dsputil.h"
  32 #include "mpegvideo.h"
  33 #include "faandct.h"
  34 #include <limits.h>
  35
  36 #ifdef USE_FASTMEMCPY
  37 #include "libvo/fastmemcpy.h"
  38 #endif
  39
  40 //#undef NDEBUG
  41 //#include <assert.h>
  42
  43 #ifdef CONFIG_ENCODERS
  44 static int encode_picture(MpegEncContext *s, int picture_number);
  45 #endif //CONFIG_ENCODERS
  46 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
  53                                    DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  55                                    DCTELEM *block, int n, int qscale);
  56 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  57                                   DCTELEM *block, int n, int qscale);
  58 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  59                                   DCTELEM *block, int n, int qscale);
  60 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  61 #ifdef CONFIG_ENCODERS
  62 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  63 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  64 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  65 static int sse_mb(MpegEncContext *s);
  66 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  67 #endif //CONFIG_ENCODERS
  68
  69 #ifdef HAVE_XVMC
  70 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  71 extern void XVMC_field_end(MpegEncContext *s);
  72 extern void XVMC_decode_mb(MpegEncContext *s);
  73 #endif
  74
  75 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  76
  77
  78 /* enable all paranoid tests for rounding, overflows, etc... */
  79 //#define PARANOID
  80
  81 //#define DEBUG
  82
  83
  84 /* for jpeg fast DCT */
  85 #define CONST_BITS 14
  86
  87 static const uint16_t aanscales[64] = {
  88     /* precomputed values scaled up by 14 bits */
  89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  90     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  91     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  92     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  93     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  94     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  95     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  96     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  97 };
  98
  99 static const uint8_t h263_chroma_roundtab[16] = {
 100 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
 101     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
 102 };
 103
 104 static const uint8_t ff_default_chroma_qscale_table[32]={
 105 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 106     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 107 };
 108
 109 #ifdef CONFIG_ENCODERS
 110 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 111 static uint8_t default_fcode_tab[MAX_MV*2+1];
 112
 113 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 114
 115 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 116                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 117 {
 118     int qscale;
 119     int shift=0;
 120
 121     for(qscale=qmin; qscale<=qmax; qscale++){
 122         int i;
 123         if (dsp->fdct == ff_jpeg_fdct_islow
 124 #ifdef FAAN_POSTSCALE
 125             || dsp->fdct == ff_faandct
 126 #endif
 127             ) {
 128             for(i=0;i<64;i++) {
 129                 const int j= dsp->idct_permutation[i];
 130                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 131                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 132                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 133                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 134
 135                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 136                                 (qscale * quant_matrix[j]));
 137             }
 138         } else if (dsp->fdct == fdct_ifast
 139 #ifndef FAAN_POSTSCALE
 140                    || dsp->fdct == ff_faandct
 141 #endif
 142                    ) {
 143             for(i=0;i<64;i++) {
 144                 const int j= dsp->idct_permutation[i];
 145                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 146                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 147                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 148                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 149
 150                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 151                                 (aanscales[i] * qscale * quant_matrix[j]));
 152             }
 153         } else {
 154             for(i=0;i<64;i++) {
 155                 const int j= dsp->idct_permutation[i];
 156                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 157                    So 16           <= qscale * quant_matrix[i]             <= 7905
 158                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 159                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 160                 */
 161                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 162 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 163                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 164
 165                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 166                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 167             }
 168         }
 169
 170         for(i=intra; i<64; i++){
 171             int64_t max= 8191;
 172             if (dsp->fdct == fdct_ifast
 173 #ifndef FAAN_POSTSCALE
 174                    || dsp->fdct == ff_faandct
 175 #endif
 176                    ) {
 177                 max= (8191LL*aanscales[i]) >> 14;
 178             }
 179             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 180                 shift++;
 181             }
 182         }
 183     }
 184     if(shift){
 185         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
 186     }
 187 }
 188
 189 static inline void update_qscale(MpegEncContext *s){
 190     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 191     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 192
 193     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 194 }
 195 #endif //CONFIG_ENCODERS
 196
 197 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 198     int i;
 199     int end;
 200
 201     st->scantable= src_scantable;
 202
 203     for(i=0; i<64; i++){
 204         int j;
 205         j = src_scantable[i];
 206         st->permutated[i] = permutation[j];
 207 #ifdef ARCH_POWERPC
 208         st->inverse[j] = i;
 209 #endif
 210     }
 211
 212     end=-1;
 213     for(i=0; i<64; i++){
 214         int j;
 215         j = st->permutated[i];
 216         if(j>end) end=j;
 217         st->raster_end[i]= end;
 218     }
 219 }
 220
 221 #ifdef CONFIG_ENCODERS
 222 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
 223     int i;
 224
 225     if(matrix){
 226         put_bits(pb, 1, 1);
 227         for(i=0;i<64;i++) {
 228             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 229         }
 230     }else
 231         put_bits(pb, 1, 0);
 232 }
 233 #endif //CONFIG_ENCODERS
 234
 235 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
 236     int i;
 237
 238     assert(p<=end);
 239     if(p>=end)
 240         return end;
 241
 242     for(i=0; i<3; i++){
 243         uint32_t tmp= *state << 8;
 244         *state= tmp + *(p++);
 245         if(tmp == 0x100 || p==end)
 246             return p;
 247     }
 248
 249     while(p<end){
 250         if     (p[-1] > 1      ) p+= 3;
 251         else if(p[-2]          ) p+= 2;
 252         else if(p[-3]|(p[-1]-1)) p++;
 253         else{
 254             p++;
 255             break;
 256         }
 257     }
 258
 259     p= FFMIN(p, end)-4;
 260     *state=  be2me_32(unaligned32(p));
 261
 262     return p+4;
 263 }
 264
 265 /* init common dct for both encoder and decoder */
 266 int DCT_common_init(MpegEncContext *s)
 267 {
 268     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 269     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 270     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 271     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 272     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 273     if(s->flags & CODEC_FLAG_BITEXACT)
 274         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
 275     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 276
 277 #ifdef CONFIG_ENCODERS
 278     s->dct_quantize= dct_quantize_c;
 279     s->denoise_dct= denoise_dct_c;
 280 #endif //CONFIG_ENCODERS
 281
 282 #ifdef HAVE_MMX
 283     MPV_common_init_mmx(s);
 284 #endif
 285 #ifdef ARCH_ALPHA
 286     MPV_common_init_axp(s);
 287 #endif
 288 #ifdef HAVE_MLIB
 289     MPV_common_init_mlib(s);
 290 #endif
 291 #ifdef HAVE_MMI
 292     MPV_common_init_mmi(s);
 293 #endif
 294 #ifdef ARCH_ARMV4L
 295     MPV_common_init_armv4l(s);
 296 #endif
 297 #ifdef ARCH_POWERPC
 298     MPV_common_init_ppc(s);
 299 #endif
 300
 301 #ifdef CONFIG_ENCODERS
 302     s->fast_dct_quantize= s->dct_quantize;
 303
 304     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 305         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 306     }
 307
 308 #endif //CONFIG_ENCODERS
 309
 310     /* load & permutate scantables
 311        note: only wmv uses different ones
 312     */
 313     if(s->alternate_scan){
 314         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 315         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 316     }else{
 317         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 318         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 319     }
 320     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 321     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 322
 323     return 0;
 324 }
 325
 326 static void copy_picture(Picture *dst, Picture *src){
 327     *dst = *src;
 328     dst->type= FF_BUFFER_TYPE_COPY;
 329 }
 330
 331 #ifdef CONFIG_ENCODERS
 332 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 333     int i;
 334
 335     dst->pict_type              = src->pict_type;
 336     dst->quality                = src->quality;
 337     dst->coded_picture_number   = src->coded_picture_number;
 338     dst->display_picture_number = src->display_picture_number;
 339 //    dst->reference              = src->reference;
 340     dst->pts                    = src->pts;
 341     dst->interlaced_frame       = src->interlaced_frame;
 342     dst->top_field_first        = src->top_field_first;
 343
 344     if(s->avctx->me_threshold){
 345         if(!src->motion_val[0])
 346             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 347         if(!src->mb_type)
 348             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 349         if(!src->ref_index[0])
 350             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 351         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 352             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 353             src->motion_subsample_log2, dst->motion_subsample_log2);
 354
 355         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 356
 357         for(i=0; i<2; i++){
 358             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 359             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 360
 361             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 362                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 363             }
 364             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 365                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 366             }
 367         }
 368     }
 369 }
 370 #endif
 371
 372 /**
 373  * allocates a Picture
 374  * The pixels are allocated/set by calling get_buffer() if shared=0
 375  */
 376 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 377     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 378     const int mb_array_size= s->mb_stride*s->mb_height;
 379     const int b8_array_size= s->b8_stride*s->mb_height*2;
 380     const int b4_array_size= s->b4_stride*s->mb_height*4;
 381     int i;
 382
 383     if(shared){
 384         assert(pic->data[0]);
 385         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 386         pic->type= FF_BUFFER_TYPE_SHARED;
 387     }else{
 388         int r;
 389
 390         assert(!pic->data[0]);
 391
 392         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 393
 394         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 395             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 396             return -1;
 397         }
 398
 399         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 400             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 401             return -1;
 402         }
 403
 404         if(pic->linesize[1] != pic->linesize[2]){
 405             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 406             return -1;
 407         }
 408
 409         s->linesize  = pic->linesize[0];
 410         s->uvlinesize= pic->linesize[1];
 411     }
 412
 413     if(pic->qscale_table==NULL){
 414         if (s->encoding) {
 415             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 416             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 417             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 418         }
 419
 420         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 421         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 422         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 423         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 424         if(s->out_format == FMT_H264){
 425             for(i=0; i<2; i++){
 426                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 427                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 428                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 429             }
 430             pic->motion_subsample_log2= 2;
 431         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 432             for(i=0; i<2; i++){
 433                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 434                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 435                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 436             }
 437             pic->motion_subsample_log2= 3;
 438         }
 439         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 440             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 441         }
 442         pic->qstride= s->mb_stride;
 443         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 444     }
 445
 446     //it might be nicer if the application would keep track of these but it would require a API change
 447     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 448     s->prev_pict_types[0]= s->pict_type;
 449     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 450         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 451
 452     return 0;
 453 fail: //for the CHECKED_ALLOCZ macro
 454     return -1;
 455 }
 456
 457 /**
 458  * deallocates a picture
 459  */
 460 static void free_picture(MpegEncContext *s, Picture *pic){
 461     int i;
 462
 463     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 464         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 465     }
 466
 467     av_freep(&pic->mb_var);
 468     av_freep(&pic->mc_mb_var);
 469     av_freep(&pic->mb_mean);
 470     av_freep(&pic->mbskip_table);
 471     av_freep(&pic->qscale_table);
 472     av_freep(&pic->mb_type_base);
 473     av_freep(&pic->dct_coeff);
 474     av_freep(&pic->pan_scan);
 475     pic->mb_type= NULL;
 476     for(i=0; i<2; i++){
 477         av_freep(&pic->motion_val_base[i]);
 478         av_freep(&pic->ref_index[i]);
 479     }
 480
 481     if(pic->type == FF_BUFFER_TYPE_SHARED){
 482         for(i=0; i<4; i++){
 483             pic->base[i]=
 484             pic->data[i]= NULL;
 485         }
 486         pic->type= 0;
 487     }
 488 }
 489
 490 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 491     int i;
 492
 493     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 494     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
 495     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
 496
 497      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 498     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 499     s->rd_scratchpad=   s->me.scratchpad;
 500     s->b_scratchpad=    s->me.scratchpad;
 501     s->obmc_scratchpad= s->me.scratchpad + 16;
 502     if (s->encoding) {
 503         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 504         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 505         if(s->avctx->noise_reduction){
 506             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 507         }
 508     }
 509     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 510     s->block= s->blocks[0];
 511
 512     for(i=0;i<12;i++){
 513         s->pblocks[i] = (short *)(&s->block[i]);
 514     }
 515     return 0;
 516 fail:
 517     return -1; //free() through MPV_common_end()
 518 }
 519
 520 static void free_duplicate_context(MpegEncContext *s){
 521     if(s==NULL) return;
 522
 523     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 524     av_freep(&s->me.scratchpad);
 525     s->rd_scratchpad=
 526     s->b_scratchpad=
 527     s->obmc_scratchpad= NULL;
 528
 529     av_freep(&s->dct_error_sum);
 530     av_freep(&s->me.map);
 531     av_freep(&s->me.score_map);
 532     av_freep(&s->blocks);
 533     s->block= NULL;
 534 }
 535
 536 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 537 #define COPY(a) bak->a= src->a
 538     COPY(allocated_edge_emu_buffer);
 539     COPY(edge_emu_buffer);
 540     COPY(me.scratchpad);
 541     COPY(rd_scratchpad);
 542     COPY(b_scratchpad);
 543     COPY(obmc_scratchpad);
 544     COPY(me.map);
 545     COPY(me.score_map);
 546     COPY(blocks);
 547     COPY(block);
 548     COPY(start_mb_y);
 549     COPY(end_mb_y);
 550     COPY(me.map_generation);
 551     COPY(pb);
 552     COPY(dct_error_sum);
 553     COPY(dct_count[0]);
 554     COPY(dct_count[1]);
 555 #undef COPY
 556 }
 557
 558 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 559     MpegEncContext bak;
 560     int i;
 561     //FIXME copy only needed parts
 562 //START_TIMER
 563     backup_duplicate_context(&bak, dst);
 564     memcpy(dst, src, sizeof(MpegEncContext));
 565     backup_duplicate_context(dst, &bak);
 566     for(i=0;i<12;i++){
 567         dst->pblocks[i] = (short *)(&dst->block[i]);
 568     }
 569 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 570 }
 571
 572 #ifdef CONFIG_ENCODERS
 573 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 574 #define COPY(a) dst->a= src->a
 575     COPY(pict_type);
 576     COPY(current_picture);
 577     COPY(f_code);
 578     COPY(b_code);
 579     COPY(qscale);
 580     COPY(lambda);
 581     COPY(lambda2);
 582     COPY(picture_in_gop_number);
 583     COPY(gop_picture_number);
 584     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
 585     COPY(progressive_frame); //FIXME don't set in encode_header
 586     COPY(partitioned_frame); //FIXME don't set in encode_header
 587 #undef COPY
 588 }
 589 #endif
 590
 591 /**
 592  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 593  * the changed fields will not depend upon the prior state of the MpegEncContext.
 594  */
 595 static void MPV_common_defaults(MpegEncContext *s){
 596     s->y_dc_scale_table=
 597     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 598     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 599     s->progressive_frame= 1;
 600     s->progressive_sequence= 1;
 601     s->picture_structure= PICT_FRAME;
 602
 603     s->coded_picture_number = 0;
 604     s->picture_number = 0;
 605     s->input_picture_number = 0;
 606
 607     s->picture_in_gop_number = 0;
 608
 609     s->f_code = 1;
 610     s->b_code = 1;
 611 }
 612
 613 /**
 614  * sets the given MpegEncContext to defaults for decoding.
 615  * the changed fields will not depend upon the prior state of the MpegEncContext.
 616  */
 617 void MPV_decode_defaults(MpegEncContext *s){
 618     MPV_common_defaults(s);
 619 }
 620
 621 /**
 622  * sets the given MpegEncContext to defaults for encoding.
 623  * the changed fields will not depend upon the prior state of the MpegEncContext.
 624  */
 625
 626 #ifdef CONFIG_ENCODERS
 627 static void MPV_encode_defaults(MpegEncContext *s){
 628     static int done=0;
 629
 630     MPV_common_defaults(s);
 631
 632     if(!done){
 633         int i;
 634         done=1;
 635
 636         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 637         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 638
 639         for(i=-16; i<16; i++){
 640             default_fcode_tab[i + MAX_MV]= 1;
 641         }
 642     }
 643     s->me.mv_penalty= default_mv_penalty;
 644     s->fcode_tab= default_fcode_tab;
 645 }
 646 #endif //CONFIG_ENCODERS
 647
 648 /**
 649  * init common structure for both encoder and decoder.
 650  * this assumes that some variables like width/height are already set
 651  */
 652 int MPV_common_init(MpegEncContext *s)
 653 {
 654     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 655
 656     s->mb_height = (s->height + 15) / 16;
 657
 658     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
 659         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 660         return -1;
 661     }
 662
 663     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 664         return -1;
 665
 666     dsputil_init(&s->dsp, s->avctx);
 667     DCT_common_init(s);
 668
 669     s->flags= s->avctx->flags;
 670     s->flags2= s->avctx->flags2;
 671
 672     s->mb_width  = (s->width  + 15) / 16;
 673     s->mb_stride = s->mb_width + 1;
 674     s->b8_stride = s->mb_width*2 + 1;
 675     s->b4_stride = s->mb_width*4 + 1;
 676     mb_array_size= s->mb_height * s->mb_stride;
 677     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 678
 679     /* set chroma shifts */
 680     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 681                                                     &(s->chroma_y_shift) );
 682
 683     /* set default edge pos, will be overriden in decode_header if needed */
 684     s->h_edge_pos= s->mb_width*16;
 685     s->v_edge_pos= s->mb_height*16;
 686
 687     s->mb_num = s->mb_width * s->mb_height;
 688
 689     s->block_wrap[0]=
 690     s->block_wrap[1]=
 691     s->block_wrap[2]=
 692     s->block_wrap[3]= s->b8_stride;
 693     s->block_wrap[4]=
 694     s->block_wrap[5]= s->mb_stride;
 695
 696     y_size = s->b8_stride * (2 * s->mb_height + 1);
 697     c_size = s->mb_stride * (s->mb_height + 1);
 698     yc_size = y_size + 2 * c_size;
 699
 700     /* convert fourcc to upper case */
 701     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
 702                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 703                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 704                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 705
 706     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
 707                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 708                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 709                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 710
 711     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 712
 713     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 714     for(y=0; y<s->mb_height; y++){
 715         for(x=0; x<s->mb_width; x++){
 716             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 717         }
 718     }
 719     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 720
 721     if (s->encoding) {
 722         /* Allocate MV tables */
 723         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 724         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 725         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 726         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 727         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 728         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 729         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 730         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 731         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 732         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 733         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 734         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 735
 736         if(s->msmpeg4_version){
 737             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 738         }
 739         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 740
 741         /* Allocate MB type table */
 742         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 743
 744         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 745
 746         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 747         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 748         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 749         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 750         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 751         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 752
 753         if(s->avctx->noise_reduction){
 754             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 755         }
 756     }
 757     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 758
 759     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 760
 761     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 762         /* interlaced direct mode decoding tables */
 763             for(i=0; i<2; i++){
 764                 int j, k;
 765                 for(j=0; j<2; j++){
 766                     for(k=0; k<2; k++){
 767                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 768                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 769                     }
 770                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 771                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 772                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 773                 }
 774                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 775             }
 776     }
 777     if (s->out_format == FMT_H263) {
 778         /* ac values */
 779         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 780         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 781         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 782         s->ac_val[2] = s->ac_val[1] + c_size;
 783
 784         /* cbp values */
 785         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 786         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 787
 788         /* cbp, ac_pred, pred_dir */
 789         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 790         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 791     }
 792
 793     if (s->h263_pred || s->h263_plus || !s->encoding) {
 794         /* dc values */
 795         //MN: we need these for error resilience of intra-frames
 796         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 797         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 798         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 799         s->dc_val[2] = s->dc_val[1] + c_size;
 800         for(i=0;i<yc_size;i++)
 801             s->dc_val_base[i] = 1024;
 802     }
 803
 804     /* which mb is a intra block */
 805     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 806     memset(s->mbintra_table, 1, mb_array_size);
 807
 808     /* init macroblock skip table */
 809     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 810     //Note the +1 is for a quicker mpeg4 slice_end detection
 811     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 812
 813     s->parse_context.state= -1;
 814     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 815        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 816        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 817        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 818     }
 819
 820     s->context_initialized = 1;
 821
 822     s->thread_context[0]= s;
 823     for(i=1; i<s->avctx->thread_count; i++){
 824         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 825         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 826     }
 827
 828     for(i=0; i<s->avctx->thread_count; i++){
 829         if(init_duplicate_context(s->thread_context[i], s) < 0)
 830            goto fail;
 831         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 832         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 833     }
 834
 835     return 0;
 836  fail:
 837     MPV_common_end(s);
 838     return -1;
 839 }
 840
 841 /* init common structure for both encoder and decoder */
 842 void MPV_common_end(MpegEncContext *s)
 843 {
 844     int i, j, k;
 845
 846     for(i=0; i<s->avctx->thread_count; i++){
 847         free_duplicate_context(s->thread_context[i]);
 848     }
 849     for(i=1; i<s->avctx->thread_count; i++){
 850         av_freep(&s->thread_context[i]);
 851     }
 852
 853     av_freep(&s->parse_context.buffer);
 854     s->parse_context.buffer_size=0;
 855
 856     av_freep(&s->mb_type);
 857     av_freep(&s->p_mv_table_base);
 858     av_freep(&s->b_forw_mv_table_base);
 859     av_freep(&s->b_back_mv_table_base);
 860     av_freep(&s->b_bidir_forw_mv_table_base);
 861     av_freep(&s->b_bidir_back_mv_table_base);
 862     av_freep(&s->b_direct_mv_table_base);
 863     s->p_mv_table= NULL;
 864     s->b_forw_mv_table= NULL;
 865     s->b_back_mv_table= NULL;
 866     s->b_bidir_forw_mv_table= NULL;
 867     s->b_bidir_back_mv_table= NULL;
 868     s->b_direct_mv_table= NULL;
 869     for(i=0; i<2; i++){
 870         for(j=0; j<2; j++){
 871             for(k=0; k<2; k++){
 872                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 873                 s->b_field_mv_table[i][j][k]=NULL;
 874             }
 875             av_freep(&s->b_field_select_table[i][j]);
 876             av_freep(&s->p_field_mv_table_base[i][j]);
 877             s->p_field_mv_table[i][j]=NULL;
 878         }
 879         av_freep(&s->p_field_select_table[i]);
 880     }
 881
 882     av_freep(&s->dc_val_base);
 883     av_freep(&s->ac_val_base);
 884     av_freep(&s->coded_block_base);
 885     av_freep(&s->mbintra_table);
 886     av_freep(&s->cbp_table);
 887     av_freep(&s->pred_dir_table);
 888
 889     av_freep(&s->mbskip_table);
 890     av_freep(&s->prev_pict_types);
 891     av_freep(&s->bitstream_buffer);
 892     s->allocated_bitstream_buffer_size=0;
 893
 894     av_freep(&s->avctx->stats_out);
 895     av_freep(&s->ac_stats);
 896     av_freep(&s->error_status_table);
 897     av_freep(&s->mb_index2xy);
 898     av_freep(&s->lambda_table);
 899     av_freep(&s->q_intra_matrix);
 900     av_freep(&s->q_inter_matrix);
 901     av_freep(&s->q_intra_matrix16);
 902     av_freep(&s->q_inter_matrix16);
 903     av_freep(&s->input_picture);
 904     av_freep(&s->reordered_input_picture);
 905     av_freep(&s->dct_offset);
 906
 907     if(s->picture){
 908         for(i=0; i<MAX_PICTURE_COUNT; i++){
 909             free_picture(s, &s->picture[i]);
 910         }
 911     }
 912     av_freep(&s->picture);
 913     s->context_initialized = 0;
 914     s->last_picture_ptr=
 915     s->next_picture_ptr=
 916     s->current_picture_ptr= NULL;
 917     s->linesize= s->uvlinesize= 0;
 918
 919     for(i=0; i<3; i++)
 920         av_freep(&s->visualization_buffer[i]);
 921
 922     avcodec_default_free_buffers(s->avctx);
 923 }
 924
 925 #ifdef CONFIG_ENCODERS
 926
 927 /* init video encoder */
 928 int MPV_encode_init(AVCodecContext *avctx)
 929 {
 930     MpegEncContext *s = avctx->priv_data;
 931     int i;
 932     int chroma_h_shift, chroma_v_shift;
 933
 934     MPV_encode_defaults(s);
 935
 936     switch (avctx->codec_id) {
 937     case CODEC_ID_MPEG2VIDEO:
 938         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
 939             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
 940             return -1;
 941         }
 942         break;
 943     case CODEC_ID_LJPEG:
 944     case CODEC_ID_MJPEG:
 945         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 946            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
 947             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 948             return -1;
 949         }
 950         break;
 951     default:
 952         if(avctx->pix_fmt != PIX_FMT_YUV420P){
 953             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 954             return -1;
 955         }
 956     }
 957
 958     switch (avctx->pix_fmt) {
 959     case PIX_FMT_YUVJ422P:
 960     case PIX_FMT_YUV422P:
 961         s->chroma_format = CHROMA_422;
 962         break;
 963     case PIX_FMT_YUVJ420P:
 964     case PIX_FMT_YUV420P:
 965     default:
 966         s->chroma_format = CHROMA_420;
 967         break;
 968     }
 969
 970     s->bit_rate = avctx->bit_rate;
 971     s->width = avctx->width;
 972     s->height = avctx->height;
 973     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
 974         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 975         avctx->gop_size=600;
 976     }
 977     s->gop_size = avctx->gop_size;
 978     s->avctx = avctx;
 979     s->flags= avctx->flags;
 980     s->flags2= avctx->flags2;
 981     s->max_b_frames= avctx->max_b_frames;
 982     s->codec_id= avctx->codec->id;
 983     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 984     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 985     s->strict_std_compliance= avctx->strict_std_compliance;
 986     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 987     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 988     s->mpeg_quant= avctx->mpeg_quant;
 989     s->rtp_mode= !!avctx->rtp_payload_size;
 990     s->intra_dc_precision= avctx->intra_dc_precision;
 991     s->user_specified_pts = AV_NOPTS_VALUE;
 992
 993     if (s->gop_size <= 1) {
 994         s->intra_only = 1;
 995         s->gop_size = 12;
 996     } else {
 997         s->intra_only = 0;
 998     }
 999
1000     s->me_method = avctx->me_method;
1001
1002     /* Fixed QSCALE */
1003     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
1004
1005     s->adaptive_quant= (   s->avctx->lumi_masking
1006                         || s->avctx->dark_masking
1007                         || s->avctx->temporal_cplx_masking
1008                         || s->avctx->spatial_cplx_masking
1009                         || s->avctx->p_masking
1010                         || s->avctx->border_masking
1011                         || (s->flags&CODEC_FLAG_QP_RD))
1012                        && !s->fixed_qscale;
1013
1014     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1015     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1016     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1017     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1018
1019     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1020         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1021         return -1;
1022     }
1023
1024     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1025         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1026     }
1027
1028     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1029         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1030         return -1;
1031     }
1032
1033     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1034         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1035         return -1;
1036     }
1037
1038     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1039        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1040        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1041
1042         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1043     }
1044
1045     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1046        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1047         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1048         return -1;
1049     }
1050
1051     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1052         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1053         return -1;
1054     }
1055
1056     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1057         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1058         return -1;
1059     }
1060
1061     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1062         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1063         return -1;
1064     }
1065
1066     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1067         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1068         return -1;
1069     }
1070
1071     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1072         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1073         return -1;
1074     }
1075
1076     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1077        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1078         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1079         return -1;
1080     }
1081
1082     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1083         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1084         return -1;
1085     }
1086
1087     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1088         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1089         return -1;
1090     }
1091
1092     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1093         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1094         return -1;
1095     }
1096
1097     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1098         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1099         return -1;
1100     }
1101
1102     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1103         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1104         return -1;
1105     }
1106
1107     if(s->flags & CODEC_FLAG_LOW_DELAY){
1108         if (s->codec_id != CODEC_ID_MPEG2VIDEO && s->codec_id != CODEC_ID_MPEG1VIDEO){
1109             av_log(avctx, AV_LOG_ERROR, "low delay forcing is only available for mpeg1/2\n");
1110             return -1;
1111         }
1112         if (s->max_b_frames != 0){
1113             av_log(avctx, AV_LOG_ERROR, "b frames cannot be used with low delay\n");
1114             return -1;
1115         }
1116     }
1117
1118     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1119        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1120        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1121         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1122         return -1;
1123     }
1124
1125     if(s->avctx->thread_count > 1)
1126         s->rtp_mode= 1;
1127
1128     if(!avctx->time_base.den || !avctx->time_base.num){
1129         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1130         return -1;
1131     }
1132
1133     i= (INT_MAX/2+128)>>8;
1134     if(avctx->me_threshold >= i){
1135         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1136         return -1;
1137     }
1138     if(avctx->mb_threshold >= i){
1139         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1140         return -1;
1141     }
1142
1143     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1144         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1145         avctx->b_frame_strategy = 0;
1146     }
1147
1148     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1149     if(i > 1){
1150         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1151         avctx->time_base.den /= i;
1152         avctx->time_base.num /= i;
1153 //        return -1;
1154     }
1155
1156     if(s->codec_id==CODEC_ID_MJPEG){
1157         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1158         s->inter_quant_bias= 0;
1159     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1160         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1161         s->inter_quant_bias= 0;
1162     }else{
1163         s->intra_quant_bias=0;
1164         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1165     }
1166
1167     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1168         s->intra_quant_bias= avctx->intra_quant_bias;
1169     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1170         s->inter_quant_bias= avctx->inter_quant_bias;
1171
1172     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1173
1174     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1175         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1176         return -1;
1177     }
1178     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1179
1180     switch(avctx->codec->id) {
1181     case CODEC_ID_MPEG1VIDEO:
1182         s->out_format = FMT_MPEG1;
1183         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1184         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1185         break;
1186     case CODEC_ID_MPEG2VIDEO:
1187         s->out_format = FMT_MPEG1;
1188         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1189         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1190         s->rtp_mode= 1;
1191         break;
1192     case CODEC_ID_LJPEG:
1193     case CODEC_ID_JPEGLS:
1194     case CODEC_ID_MJPEG:
1195         s->out_format = FMT_MJPEG;
1196         s->intra_only = 1; /* force intra only for jpeg */
1197         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1198         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1199         s->mjpeg_vsample[0] = 2;
1200         s->mjpeg_vsample[1] = 2>>chroma_v_shift;
1201         s->mjpeg_vsample[2] = 2>>chroma_v_shift;
1202         s->mjpeg_hsample[0] = 2;
1203         s->mjpeg_hsample[1] = 2>>chroma_h_shift;
1204         s->mjpeg_hsample[2] = 2>>chroma_h_shift;
1205         if (mjpeg_init(s) < 0)
1206             return -1;
1207         avctx->delay=0;
1208         s->low_delay=1;
1209         break;
1210 #ifdef CONFIG_H261_ENCODER
1211     case CODEC_ID_H261:
1212         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
1213             av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.261 codec.\nValid sizes are 176x144, 352x288\n", s->width, s->height);
1214             return -1;
1215         }
1216         s->out_format = FMT_H261;
1217         avctx->delay=0;
1218         s->low_delay=1;
1219         break;
1220 #endif
1221     case CODEC_ID_H263:
1222         if (h263_get_picture_format(s->width, s->height) == 7) {
1223             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1224             return -1;
1225         }
1226         s->out_format = FMT_H263;
1227         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1228         avctx->delay=0;
1229         s->low_delay=1;
1230         break;
1231     case CODEC_ID_H263P:
1232         s->out_format = FMT_H263;
1233         s->h263_plus = 1;
1234         /* Fx */
1235         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1236         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1237         s->modified_quant= s->h263_aic;
1238         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1239         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1240         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1241         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1242         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1243
1244         /* /Fx */
1245         /* These are just to be sure */
1246         avctx->delay=0;
1247         s->low_delay=1;
1248         break;
1249     case CODEC_ID_FLV1:
1250         s->out_format = FMT_H263;
1251         s->h263_flv = 2; /* format = 1; 11-bit codes */
1252         s->unrestricted_mv = 1;
1253         s->rtp_mode=0; /* don't allow GOB */
1254         avctx->delay=0;
1255         s->low_delay=1;
1256         break;
1257     case CODEC_ID_RV10:
1258         s->out_format = FMT_H263;
1259         avctx->delay=0;
1260         s->low_delay=1;
1261         break;
1262     case CODEC_ID_RV20:
1263         s->out_format = FMT_H263;
1264         avctx->delay=0;
1265         s->low_delay=1;
1266         s->modified_quant=1;
1267         s->h263_aic=1;
1268         s->h263_plus=1;
1269         s->loop_filter=1;
1270         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1271         break;
1272     case CODEC_ID_MPEG4:
1273         s->out_format = FMT_H263;
1274         s->h263_pred = 1;
1275         s->unrestricted_mv = 1;
1276         s->low_delay= s->max_b_frames ? 0 : 1;
1277         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1278         break;
1279     case CODEC_ID_MSMPEG4V1:
1280         s->out_format = FMT_H263;
1281         s->h263_msmpeg4 = 1;
1282         s->h263_pred = 1;
1283         s->unrestricted_mv = 1;
1284         s->msmpeg4_version= 1;
1285         avctx->delay=0;
1286         s->low_delay=1;
1287         break;
1288     case CODEC_ID_MSMPEG4V2:
1289         s->out_format = FMT_H263;
1290         s->h263_msmpeg4 = 1;
1291         s->h263_pred = 1;
1292         s->unrestricted_mv = 1;
1293         s->msmpeg4_version= 2;
1294         avctx->delay=0;
1295         s->low_delay=1;
1296         break;
1297     case CODEC_ID_MSMPEG4V3:
1298         s->out_format = FMT_H263;
1299         s->h263_msmpeg4 = 1;
1300         s->h263_pred = 1;
1301         s->unrestricted_mv = 1;
1302         s->msmpeg4_version= 3;
1303         s->flipflop_rounding=1;
1304         avctx->delay=0;
1305         s->low_delay=1;
1306         break;
1307     case CODEC_ID_WMV1:
1308         s->out_format = FMT_H263;
1309         s->h263_msmpeg4 = 1;
1310         s->h263_pred = 1;
1311         s->unrestricted_mv = 1;
1312         s->msmpeg4_version= 4;
1313         s->flipflop_rounding=1;
1314         avctx->delay=0;
1315         s->low_delay=1;
1316         break;
1317     case CODEC_ID_WMV2:
1318         s->out_format = FMT_H263;
1319         s->h263_msmpeg4 = 1;
1320         s->h263_pred = 1;
1321         s->unrestricted_mv = 1;
1322         s->msmpeg4_version= 5;
1323         s->flipflop_rounding=1;
1324         avctx->delay=0;
1325         s->low_delay=1;
1326         break;
1327     default:
1328         return -1;
1329     }
1330
1331     avctx->has_b_frames= !s->low_delay;
1332
1333     s->encoding = 1;
1334
1335     /* init */
1336     if (MPV_common_init(s) < 0)
1337         return -1;
1338
1339     if(s->modified_quant)
1340         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1341     s->progressive_frame=
1342     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN));
1343     s->quant_precision=5;
1344
1345     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1346     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1347
1348 #ifdef CONFIG_H261_ENCODER
1349     if (s->out_format == FMT_H261)
1350         ff_h261_encode_init(s);
1351 #endif
1352     if (s->out_format == FMT_H263)
1353         h263_encode_init(s);
1354     if(s->msmpeg4_version)
1355         ff_msmpeg4_encode_init(s);
1356     if (s->out_format == FMT_MPEG1)
1357         ff_mpeg1_encode_init(s);
1358
1359     /* init q matrix */
1360     for(i=0;i<64;i++) {
1361         int j= s->dsp.idct_permutation[i];
1362         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1363             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1364             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1365         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1366             s->intra_matrix[j] =
1367             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1368         }else
1369         { /* mpeg1/2 */
1370             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1371             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1372         }
1373         if(s->avctx->intra_matrix)
1374             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1375         if(s->avctx->inter_matrix)
1376             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1377     }
1378
1379     /* precompute matrix */
1380     /* for mjpeg, we do include qscale in the matrix */
1381     if (s->out_format != FMT_MJPEG) {
1382         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1383                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1384         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1385                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1386     }
1387
1388     if(ff_rate_control_init(s) < 0)
1389         return -1;
1390
1391     return 0;
1392 }
1393
1394 int MPV_encode_end(AVCodecContext *avctx)
1395 {
1396     MpegEncContext *s = avctx->priv_data;
1397
1398     ff_rate_control_uninit(s);
1399
1400     MPV_common_end(s);
1401     if (s->out_format == FMT_MJPEG)
1402         mjpeg_close(s);
1403
1404     av_freep(&avctx->extradata);
1405
1406     return 0;
1407 }
1408
1409 #endif //CONFIG_ENCODERS
1410
1411 void init_rl(RLTable *rl, int use_static)
1412 {
1413     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1414     uint8_t index_run[MAX_RUN+1];
1415     int last, run, level, start, end, i;
1416
1417     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1418     if(use_static && rl->max_level[0])
1419         return;
1420
1421     /* compute max_level[], max_run[] and index_run[] */
1422     for(last=0;last<2;last++) {
1423         if (last == 0) {
1424             start = 0;
1425             end = rl->last;
1426         } else {
1427             start = rl->last;
1428             end = rl->n;
1429         }
1430
1431         memset(max_level, 0, MAX_RUN + 1);
1432         memset(max_run, 0, MAX_LEVEL + 1);
1433         memset(index_run, rl->n, MAX_RUN + 1);
1434         for(i=start;i<end;i++) {
1435             run = rl->table_run[i];
1436             level = rl->table_level[i];
1437             if (index_run[run] == rl->n)
1438                 index_run[run] = i;
1439             if (level > max_level[run])
1440                 max_level[run] = level;
1441             if (run > max_run[level])
1442                 max_run[level] = run;
1443         }
1444         if(use_static)
1445             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1446         else
1447             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1448         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1449         if(use_static)
1450             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1451         else
1452             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1453         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1454         if(use_static)
1455             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1456         else
1457             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1458         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1459     }
1460 }
1461
1462 /* draw the edges of width 'w' of an image of size width, height */
1463 //FIXME check that this is ok for mpeg4 interlaced
1464 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1465 {
1466     uint8_t *ptr, *last_line;
1467     int i;
1468
1469     last_line = buf + (height - 1) * wrap;
1470     for(i=0;i<w;i++) {
1471         /* top and bottom */
1472         memcpy(buf - (i + 1) * wrap, buf, width);
1473         memcpy(last_line + (i + 1) * wrap, last_line, width);
1474     }
1475     /* left and right */
1476     ptr = buf;
1477     for(i=0;i<height;i++) {
1478         memset(ptr - w, ptr[0], w);
1479         memset(ptr + width, ptr[width-1], w);
1480         ptr += wrap;
1481     }
1482     /* corners */
1483     for(i=0;i<w;i++) {
1484         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1485         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1486         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1487         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1488     }
1489 }
1490
1491 int ff_find_unused_picture(MpegEncContext *s, int shared){
1492     int i;
1493
1494     if(shared){
1495         for(i=0; i<MAX_PICTURE_COUNT; i++){
1496             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1497         }
1498     }else{
1499         for(i=0; i<MAX_PICTURE_COUNT; i++){
1500             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1501         }
1502         for(i=0; i<MAX_PICTURE_COUNT; i++){
1503             if(s->picture[i].data[0]==NULL) return i;
1504         }
1505     }
1506
1507     assert(0);
1508     return -1;
1509 }
1510
1511 static void update_noise_reduction(MpegEncContext *s){
1512     int intra, i;
1513
1514     for(intra=0; intra<2; intra++){
1515         if(s->dct_count[intra] > (1<<16)){
1516             for(i=0; i<64; i++){
1517                 s->dct_error_sum[intra][i] >>=1;
1518             }
1519             s->dct_count[intra] >>= 1;
1520         }
1521
1522         for(i=0; i<64; i++){
1523             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1524         }
1525     }
1526 }
1527
1528 /**
1529  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1530  */
1531 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1532 {
1533     int i;
1534     AVFrame *pic;
1535     s->mb_skipped = 0;
1536
1537     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1538
1539     /* mark&release old frames */
1540     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1541         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1542
1543         /* release forgotten pictures */
1544         /* if(mpeg124/h263) */
1545         if(!s->encoding){
1546             for(i=0; i<MAX_PICTURE_COUNT; i++){
1547                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1548                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1549                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1550                 }
1551             }
1552         }
1553     }
1554 alloc:
1555     if(!s->encoding){
1556         /* release non reference frames */
1557         for(i=0; i<MAX_PICTURE_COUNT; i++){
1558             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1559                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1560             }
1561         }
1562
1563         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1564             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1565         else{
1566             i= ff_find_unused_picture(s, 0);
1567             pic= (AVFrame*)&s->picture[i];
1568         }
1569
1570         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1571                         && !s->dropable ? 3 : 0;
1572
1573         pic->coded_picture_number= s->coded_picture_number++;
1574
1575         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1576             return -1;
1577
1578         s->current_picture_ptr= (Picture*)pic;
1579         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1580         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1581     }
1582
1583     s->current_picture_ptr->pict_type= s->pict_type;
1584 //    if(s->flags && CODEC_FLAG_QSCALE)
1585   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1586     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1587
1588     copy_picture(&s->current_picture, s->current_picture_ptr);
1589
1590   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1591     if (s->pict_type != B_TYPE) {
1592         s->last_picture_ptr= s->next_picture_ptr;
1593         if(!s->dropable)
1594             s->next_picture_ptr= s->current_picture_ptr;
1595     }
1596 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1597         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1598         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1599         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1600         s->pict_type, s->dropable);*/
1601
1602     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1603     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1604
1605     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1606         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1607         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1608         goto alloc;
1609     }
1610
1611     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1612
1613     if(s->picture_structure!=PICT_FRAME){
1614         int i;
1615         for(i=0; i<4; i++){
1616             if(s->picture_structure == PICT_BOTTOM_FIELD){
1617                  s->current_picture.data[i] += s->current_picture.linesize[i];
1618             }
1619             s->current_picture.linesize[i] *= 2;
1620             s->last_picture.linesize[i] *=2;
1621             s->next_picture.linesize[i] *=2;
1622         }
1623     }
1624   }
1625
1626     s->hurry_up= s->avctx->hurry_up;
1627     s->error_resilience= avctx->error_resilience;
1628
1629     /* set dequantizer, we can't do it during init as it might change for mpeg4
1630        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1631     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1632         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1633         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1634     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1635         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1636         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1637     }else{
1638         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1639         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1640     }
1641
1642     if(s->dct_error_sum){
1643         assert(s->avctx->noise_reduction && s->encoding);
1644
1645         update_noise_reduction(s);
1646     }
1647
1648 #ifdef HAVE_XVMC
1649     if(s->avctx->xvmc_acceleration)
1650         return XVMC_field_start(s, avctx);
1651 #endif
1652     return 0;
1653 }
1654
1655 /* generic function for encode/decode called after a frame has been coded/decoded */
1656 void MPV_frame_end(MpegEncContext *s)
1657 {
1658     int i;
1659     /* draw edge for correct motion prediction if outside */
1660 #ifdef HAVE_XVMC
1661 //just to make sure that all data is rendered.
1662     if(s->avctx->xvmc_acceleration){
1663         XVMC_field_end(s);
1664     }else
1665 #endif
1666     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1667             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1668             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1669             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1670     }
1671     emms_c();
1672
1673     s->last_pict_type    = s->pict_type;
1674     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1675     if(s->pict_type!=B_TYPE){
1676         s->last_non_b_pict_type= s->pict_type;
1677     }
1678 #if 0
1679         /* copy back current_picture variables */
1680     for(i=0; i<MAX_PICTURE_COUNT; i++){
1681         if(s->picture[i].data[0] == s->current_picture.data[0]){
1682             s->picture[i]= s->current_picture;
1683             break;
1684         }
1685     }
1686     assert(i<MAX_PICTURE_COUNT);
1687 #endif
1688
1689     if(s->encoding){
1690         /* release non-reference frames */
1691         for(i=0; i<MAX_PICTURE_COUNT; i++){
1692             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1693                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1694             }
1695         }
1696     }
1697     // clear copies, to avoid confusion
1698 #if 0
1699     memset(&s->last_picture, 0, sizeof(Picture));
1700     memset(&s->next_picture, 0, sizeof(Picture));
1701     memset(&s->current_picture, 0, sizeof(Picture));
1702 #endif
1703     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1704 }
1705
1706 /**
1707  * draws an line from (ex, ey) -> (sx, sy).
1708  * @param w width of the image
1709  * @param h height of the image
1710  * @param stride stride/linesize of the image
1711  * @param color color of the arrow
1712  */
1713 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1714     int x, y, fr, f;
1715
1716     sx= clip(sx, 0, w-1);
1717     sy= clip(sy, 0, h-1);
1718     ex= clip(ex, 0, w-1);
1719     ey= clip(ey, 0, h-1);
1720
1721     buf[sy*stride + sx]+= color;
1722
1723     if(FFABS(ex - sx) > FFABS(ey - sy)){
1724         if(sx > ex){
1725             FFSWAP(int, sx, ex);
1726             FFSWAP(int, sy, ey);
1727         }
1728         buf+= sx + sy*stride;
1729         ex-= sx;
1730         f= ((ey-sy)<<16)/ex;
1731         for(x= 0; x <= ex; x++){
1732             y = (x*f)>>16;
1733             fr= (x*f)&0xFFFF;
1734             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1735             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1736         }
1737     }else{
1738         if(sy > ey){
1739             FFSWAP(int, sx, ex);
1740             FFSWAP(int, sy, ey);
1741         }
1742         buf+= sx + sy*stride;
1743         ey-= sy;
1744         if(ey) f= ((ex-sx)<<16)/ey;
1745         else   f= 0;
1746         for(y= 0; y <= ey; y++){
1747             x = (y*f)>>16;
1748             fr= (y*f)&0xFFFF;
1749             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1750             buf[y*stride + x+1]+= (color*         fr )>>16;;
1751         }
1752     }
1753 }
1754
1755 /**
1756  * draws an arrow from (ex, ey) -> (sx, sy).
1757  * @param w width of the image
1758  * @param h height of the image
1759  * @param stride stride/linesize of the image
1760  * @param color color of the arrow
1761  */
1762 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1763     int dx,dy;
1764
1765     sx= clip(sx, -100, w+100);
1766     sy= clip(sy, -100, h+100);
1767     ex= clip(ex, -100, w+100);
1768     ey= clip(ey, -100, h+100);
1769
1770     dx= ex - sx;
1771     dy= ey - sy;
1772
1773     if(dx*dx + dy*dy > 3*3){
1774         int rx=  dx + dy;
1775         int ry= -dx + dy;
1776         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1777
1778         //FIXME subpixel accuracy
1779         rx= ROUNDED_DIV(rx*3<<4, length);
1780         ry= ROUNDED_DIV(ry*3<<4, length);
1781
1782         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1783         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1784     }
1785     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1786 }
1787
1788 /**
1789  * prints debuging info for the given picture.
1790  */
1791 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1792
1793     if(!pict || !pict->mb_type) return;
1794
1795     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1796         int x,y;
1797
1798         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1799         switch (pict->pict_type) {
1800             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1801             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1802             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1803             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1804             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1805             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1806         }
1807         for(y=0; y<s->mb_height; y++){
1808             for(x=0; x<s->mb_width; x++){
1809                 if(s->avctx->debug&FF_DEBUG_SKIP){
1810                     int count= s->mbskip_table[x + y*s->mb_stride];
1811                     if(count>9) count=9;
1812                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1813                 }
1814                 if(s->avctx->debug&FF_DEBUG_QP){
1815                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1816                 }
1817                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1818                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1819                     //Type & MV direction
1820                     if(IS_PCM(mb_type))
1821                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1822                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1823                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1824                     else if(IS_INTRA4x4(mb_type))
1825                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1826                     else if(IS_INTRA16x16(mb_type))
1827                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1828                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1829                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1830                     else if(IS_DIRECT(mb_type))
1831                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1832                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1833                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1834                     else if(IS_GMC(mb_type))
1835                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1836                     else if(IS_SKIP(mb_type))
1837                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1838                     else if(!USES_LIST(mb_type, 1))
1839                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1840                     else if(!USES_LIST(mb_type, 0))
1841                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1842                     else{
1843                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1844                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1845                     }
1846
1847                     //segmentation
1848                     if(IS_8X8(mb_type))
1849                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1850                     else if(IS_16X8(mb_type))
1851                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1852                     else if(IS_8X16(mb_type))
1853                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1854                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1855                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1856                     else
1857                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1858
1859
1860                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1861                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1862                     else
1863                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1864                 }
1865 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1866             }
1867             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1868         }
1869     }
1870
1871     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1872         const int shift= 1 + s->quarter_sample;
1873         int mb_y;
1874         uint8_t *ptr;
1875         int i;
1876         int h_chroma_shift, v_chroma_shift;
1877         const int width = s->avctx->width;
1878         const int height= s->avctx->height;
1879         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1880         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1881         s->low_delay=0; //needed to see the vectors without trashing the buffers
1882
1883         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1884         for(i=0; i<3; i++){
1885             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1886             pict->data[i]= s->visualization_buffer[i];
1887         }
1888         pict->type= FF_BUFFER_TYPE_COPY;
1889         ptr= pict->data[0];
1890
1891         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1892             int mb_x;
1893             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1894                 const int mb_index= mb_x + mb_y*s->mb_stride;
1895                 if((s->avctx->debug_mv) && pict->motion_val){
1896                   int type;
1897                   for(type=0; type<3; type++){
1898                     int direction = 0;
1899                     switch (type) {
1900                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1901                                 continue;
1902                               direction = 0;
1903                               break;
1904                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1905                                 continue;
1906                               direction = 0;
1907                               break;
1908                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1909                                 continue;
1910                               direction = 1;
1911                               break;
1912                     }
1913                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1914                         continue;
1915
1916                     if(IS_8X8(pict->mb_type[mb_index])){
1917                       int i;
1918                       for(i=0; i<4; i++){
1919                         int sx= mb_x*16 + 4 + 8*(i&1);
1920                         int sy= mb_y*16 + 4 + 8*(i>>1);
1921                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1922                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1923                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1924                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1925                       }
1926                     }else if(IS_16X8(pict->mb_type[mb_index])){
1927                       int i;
1928                       for(i=0; i<2; i++){
1929                         int sx=mb_x*16 + 8;
1930                         int sy=mb_y*16 + 4 + 8*i;
1931                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1932                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1933                         int my=(pict->motion_val[direction][xy][1]>>shift);
1934
1935                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1936                             my*=2;
1937
1938                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1939                       }
1940                     }else if(IS_8X16(pict->mb_type[mb_index])){
1941                       int i;
1942                       for(i=0; i<2; i++){
1943                         int sx=mb_x*16 + 4 + 8*i;
1944                         int sy=mb_y*16 + 8;
1945                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1946                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1947                         int my=(pict->motion_val[direction][xy][1]>>shift);
1948
1949                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1950                             my*=2;
1951
1952                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1953                       }
1954                     }else{
1955                       int sx= mb_x*16 + 8;
1956                       int sy= mb_y*16 + 8;
1957                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1958                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1959                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1960                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1961                     }
1962                   }
1963                 }
1964                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1965                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1966                     int y;
1967                     for(y=0; y<8; y++){
1968                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1969                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1970                     }
1971                 }
1972                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1973                     int mb_type= pict->mb_type[mb_index];
1974                     uint64_t u,v;
1975                     int y;
1976 #define COLOR(theta, r)\
1977 u= (int)(128 + r*cos(theta*3.141592/180));\
1978 v= (int)(128 + r*sin(theta*3.141592/180));
1979
1980
1981                     u=v=128;
1982                     if(IS_PCM(mb_type)){
1983                         COLOR(120,48)
1984                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1985                         COLOR(30,48)
1986                     }else if(IS_INTRA4x4(mb_type)){
1987                         COLOR(90,48)
1988                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1989 //                        COLOR(120,48)
1990                     }else if(IS_DIRECT(mb_type)){
1991                         COLOR(150,48)
1992                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1993                         COLOR(170,48)
1994                     }else if(IS_GMC(mb_type)){
1995                         COLOR(190,48)
1996                     }else if(IS_SKIP(mb_type)){
1997 //                        COLOR(180,48)
1998                     }else if(!USES_LIST(mb_type, 1)){
1999                         COLOR(240,48)
2000                     }else if(!USES_LIST(mb_type, 0)){
2001                         COLOR(0,48)
2002                     }else{
2003                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
2004                         COLOR(300,48)
2005                     }
2006
2007                     u*= 0x0101010101010101ULL;
2008                     v*= 0x0101010101010101ULL;
2009                     for(y=0; y<8; y++){
2010                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
2011                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
2012                     }
2013
2014                     //segmentation
2015                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
2016                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2017                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2018                     }
2019                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
2020                         for(y=0; y<16; y++)
2021                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
2022                     }
2023                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2024                         int dm= 1 << (mv_sample_log2-2);
2025                         for(i=0; i<4; i++){
2026                             int sx= mb_x*16 + 8*(i&1);
2027                             int sy= mb_y*16 + 8*(i>>1);
2028                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2029                             //FIXME bidir
2030                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2031                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2032                                 for(y=0; y<8; y++)
2033                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2034                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2035                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2036                         }
2037                     }
2038
2039                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2040                         // hmm
2041                     }
2042                 }
2043                 s->mbskip_table[mb_index]=0;
2044             }
2045         }
2046     }
2047 }
2048
2049 #ifdef CONFIG_ENCODERS
2050
2051 static int get_sae(uint8_t *src, int ref, int stride){
2052     int x,y;
2053     int acc=0;
2054
2055     for(y=0; y<16; y++){
2056         for(x=0; x<16; x++){
2057             acc+= FFABS(src[x+y*stride] - ref);
2058         }
2059     }
2060
2061     return acc;
2062 }
2063
2064 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2065     int x, y, w, h;
2066     int acc=0;
2067
2068     w= s->width &~15;
2069     h= s->height&~15;
2070
2071     for(y=0; y<h; y+=16){
2072         for(x=0; x<w; x+=16){
2073             int offset= x + y*stride;
2074             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2075             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2076             int sae = get_sae(src + offset, mean, stride);
2077
2078             acc+= sae + 500 < sad;
2079         }
2080     }
2081     return acc;
2082 }
2083
2084
2085 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2086     AVFrame *pic=NULL;
2087     int64_t pts;
2088     int i;
2089     const int encoding_delay= s->max_b_frames;
2090     int direct=1;
2091
2092     if(pic_arg){
2093         pts= pic_arg->pts;
2094         pic_arg->display_picture_number= s->input_picture_number++;
2095
2096         if(pts != AV_NOPTS_VALUE){
2097             if(s->user_specified_pts != AV_NOPTS_VALUE){
2098                 int64_t time= pts;
2099                 int64_t last= s->user_specified_pts;
2100
2101                 if(time <= last){
2102                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2103                     return -1;
2104                 }
2105             }
2106             s->user_specified_pts= pts;
2107         }else{
2108             if(s->user_specified_pts != AV_NOPTS_VALUE){
2109                 s->user_specified_pts=
2110                 pts= s->user_specified_pts + 1;
2111                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2112             }else{
2113                 pts= pic_arg->display_picture_number;
2114             }
2115         }
2116     }
2117
2118   if(pic_arg){
2119     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2120     if(pic_arg->linesize[0] != s->linesize) direct=0;
2121     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2122     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2123
2124 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2125
2126     if(direct){
2127         i= ff_find_unused_picture(s, 1);
2128
2129         pic= (AVFrame*)&s->picture[i];
2130         pic->reference= 3;
2131
2132         for(i=0; i<4; i++){
2133             pic->data[i]= pic_arg->data[i];
2134             pic->linesize[i]= pic_arg->linesize[i];
2135         }
2136         alloc_picture(s, (Picture*)pic, 1);
2137     }else{
2138         i= ff_find_unused_picture(s, 0);
2139
2140         pic= (AVFrame*)&s->picture[i];
2141         pic->reference= 3;
2142
2143         alloc_picture(s, (Picture*)pic, 0);
2144
2145         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2146            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2147            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2148        // empty
2149         }else{
2150             int h_chroma_shift, v_chroma_shift;
2151             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2152
2153             for(i=0; i<3; i++){
2154                 int src_stride= pic_arg->linesize[i];
2155                 int dst_stride= i ? s->uvlinesize : s->linesize;
2156                 int h_shift= i ? h_chroma_shift : 0;
2157                 int v_shift= i ? v_chroma_shift : 0;
2158                 int w= s->width >>h_shift;
2159                 int h= s->height>>v_shift;
2160                 uint8_t *src= pic_arg->data[i];
2161                 uint8_t *dst= pic->data[i];
2162
2163                 if(!s->avctx->rc_buffer_size)
2164                     dst +=INPLACE_OFFSET;
2165
2166                 if(src_stride==dst_stride)
2167                     memcpy(dst, src, src_stride*h);
2168                 else{
2169                     while(h--){
2170                         memcpy(dst, src, w);
2171                         dst += dst_stride;
2172                         src += src_stride;
2173                     }
2174                 }
2175             }
2176         }
2177     }
2178     copy_picture_attributes(s, pic, pic_arg);
2179     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2180   }
2181
2182     /* shift buffer entries */
2183     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2184         s->input_picture[i-1]= s->input_picture[i];
2185
2186     s->input_picture[encoding_delay]= (Picture*)pic;
2187
2188     return 0;
2189 }
2190
2191 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2192     int x, y, plane;
2193     int score=0;
2194     int64_t score64=0;
2195
2196     for(plane=0; plane<3; plane++){
2197         const int stride= p->linesize[plane];
2198         const int bw= plane ? 1 : 2;
2199         for(y=0; y<s->mb_height*bw; y++){
2200             for(x=0; x<s->mb_width*bw; x++){
2201                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2202                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2203
2204                 switch(s->avctx->frame_skip_exp){
2205                     case 0: score= FFMAX(score, v); break;
2206                     case 1: score+= FFABS(v);break;
2207                     case 2: score+= v*v;break;
2208                     case 3: score64+= FFABS(v*v*(int64_t)v);break;
2209                     case 4: score64+= v*v*(int64_t)(v*v);break;
2210                 }
2211             }
2212         }
2213     }
2214
2215     if(score) score64= score;
2216
2217     if(score64 < s->avctx->frame_skip_threshold)
2218         return 1;
2219     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2220         return 1;
2221     return 0;
2222 }
2223
2224 static int estimate_best_b_count(MpegEncContext *s){
2225     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2226     AVCodecContext *c= avcodec_alloc_context();
2227     AVFrame input[FF_MAX_B_FRAMES+2];
2228     const int scale= s->avctx->brd_scale;
2229     int i, j, out_size, p_lambda, b_lambda, lambda2;
2230     int outbuf_size= s->width * s->height; //FIXME
2231     uint8_t *outbuf= av_malloc(outbuf_size);
2232     int64_t best_rd= INT64_MAX;
2233     int best_b_count= -1;
2234
2235     assert(scale>=0 && scale <=3);
2236
2237 //    emms_c();
2238     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2239     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2240     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2241     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2242
2243     c->width = s->width >> scale;
2244     c->height= s->height>> scale;
2245     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2246     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2247     c->mb_decision= s->avctx->mb_decision;
2248     c->me_cmp= s->avctx->me_cmp;
2249     c->mb_cmp= s->avctx->mb_cmp;
2250     c->me_sub_cmp= s->avctx->me_sub_cmp;
2251     c->pix_fmt = PIX_FMT_YUV420P;
2252     c->time_base= s->avctx->time_base;
2253     c->max_b_frames= s->max_b_frames;
2254
2255     if (avcodec_open(c, codec) < 0)
2256         return -1;
2257
2258     for(i=0; i<s->max_b_frames+2; i++){
2259         int ysize= c->width*c->height;
2260         int csize= (c->width/2)*(c->height/2);
2261         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2262
2263         if(pre_input_ptr)
2264             pre_input= *pre_input_ptr;
2265
2266         if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
2267             pre_input.data[0]+=INPLACE_OFFSET;
2268             pre_input.data[1]+=INPLACE_OFFSET;
2269             pre_input.data[2]+=INPLACE_OFFSET;
2270         }
2271
2272         avcodec_get_frame_defaults(&input[i]);
2273         input[i].data[0]= av_malloc(ysize + 2*csize);
2274         input[i].data[1]= input[i].data[0] + ysize;
2275         input[i].data[2]= input[i].data[1] + csize;
2276         input[i].linesize[0]= c->width;
2277         input[i].linesize[1]=
2278         input[i].linesize[2]= c->width/2;
2279
2280         if(!i || s->input_picture[i-1]){
2281             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2282             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2283             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2284         }
2285     }
2286
2287     for(j=0; j<s->max_b_frames+1; j++){
2288         int64_t rd=0;
2289
2290         if(!s->input_picture[j])
2291             break;
2292
2293         c->error[0]= c->error[1]= c->error[2]= 0;
2294
2295         input[0].pict_type= I_TYPE;
2296         input[0].quality= 1 * FF_QP2LAMBDA;
2297         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2298 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2299
2300         for(i=0; i<s->max_b_frames+1; i++){
2301             int is_p= i % (j+1) == j || i==s->max_b_frames;
2302
2303             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2304             input[i+1].quality= is_p ? p_lambda : b_lambda;
2305             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2306             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2307         }
2308
2309         /* get the delayed frames */
2310         while(out_size){
2311             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2312             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2313         }
2314
2315         rd += c->error[0] + c->error[1] + c->error[2];
2316
2317         if(rd < best_rd){
2318             best_rd= rd;
2319             best_b_count= j;
2320         }
2321     }
2322
2323     av_freep(&outbuf);
2324     avcodec_close(c);
2325     av_freep(&c);
2326
2327     for(i=0; i<s->max_b_frames+2; i++){
2328         av_freep(&input[i].data[0]);
2329     }
2330
2331     return best_b_count;
2332 }
2333
2334 static void select_input_picture(MpegEncContext *s){
2335     int i;
2336
2337     for(i=1; i<MAX_PICTURE_COUNT; i++)
2338         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2339     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2340
2341     /* set next picture type & ordering */
2342     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2343         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2344             s->reordered_input_picture[0]= s->input_picture[0];
2345             s->reordered_input_picture[0]->pict_type= I_TYPE;
2346             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2347         }else{
2348             int b_frames;
2349
2350             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2351                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2352                 //FIXME check that te gop check above is +-1 correct
2353 //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2354
2355                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2356                         for(i=0; i<4; i++)
2357                             s->input_picture[0]->data[i]= NULL;
2358                         s->input_picture[0]->type= 0;
2359                     }else{
2360                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2361                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2362
2363                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2364                     }
2365
2366                     emms_c();
2367                     ff_vbv_update(s, 0);
2368
2369                     goto no_output_pic;
2370                 }
2371             }
2372
2373             if(s->flags&CODEC_FLAG_PASS2){
2374                 for(i=0; i<s->max_b_frames+1; i++){
2375                     int pict_num= s->input_picture[0]->display_picture_number + i;
2376
2377                     if(pict_num >= s->rc_context.num_entries)
2378                         break;
2379                     if(!s->input_picture[i]){
2380                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2381                         break;
2382                     }
2383
2384                     s->input_picture[i]->pict_type=
2385                         s->rc_context.entry[pict_num].new_pict_type;
2386                 }
2387             }
2388
2389             if(s->avctx->b_frame_strategy==0){
2390                 b_frames= s->max_b_frames;
2391                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2392             }else if(s->avctx->b_frame_strategy==1){
2393                 for(i=1; i<s->max_b_frames+1; i++){
2394                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2395                         s->input_picture[i]->b_frame_score=
2396                             get_intra_count(s, s->input_picture[i  ]->data[0],
2397                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2398                     }
2399                 }
2400                 for(i=0; i<s->max_b_frames+1; i++){
2401                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
2402                 }
2403
2404                 b_frames= FFMAX(0, i-1);
2405
2406                 /* reset scores */
2407                 for(i=0; i<b_frames+1; i++){
2408                     s->input_picture[i]->b_frame_score=0;
2409                 }
2410             }else if(s->avctx->b_frame_strategy==2){
2411                 b_frames= estimate_best_b_count(s);
2412             }else{
2413                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2414                 b_frames=0;
2415             }
2416
2417             emms_c();
2418 //static int b_count=0;
2419 //b_count+= b_frames;
2420 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2421
2422             for(i= b_frames - 1; i>=0; i--){
2423                 int type= s->input_picture[i]->pict_type;
2424                 if(type && type != B_TYPE)
2425                     b_frames= i;
2426             }
2427             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2428                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2429             }
2430
2431             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2432               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2433                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2434               }else{
2435                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2436                     b_frames=0;
2437                 s->input_picture[b_frames]->pict_type= I_TYPE;
2438               }
2439             }
2440
2441             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2442                && b_frames
2443                && s->input_picture[b_frames]->pict_type== I_TYPE)
2444                 b_frames--;
2445
2446             s->reordered_input_picture[0]= s->input_picture[b_frames];
2447             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2448                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2449             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2450             for(i=0; i<b_frames; i++){
2451                 s->reordered_input_picture[i+1]= s->input_picture[i];
2452                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2453                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2454             }
2455         }
2456     }
2457 no_output_pic:
2458     if(s->reordered_input_picture[0]){
2459         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2460
2461         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2462
2463         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size){
2464             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2465
2466             int i= ff_find_unused_picture(s, 0);
2467             Picture *pic= &s->picture[i];
2468
2469             pic->reference              = s->reordered_input_picture[0]->reference;
2470             alloc_picture(s, pic, 0);
2471
2472             /* mark us unused / free shared pic */
2473             if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_INTERNAL)
2474                 s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
2475             for(i=0; i<4; i++)
2476                 s->reordered_input_picture[0]->data[i]= NULL;
2477             s->reordered_input_picture[0]->type= 0;
2478
2479             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2480
2481             s->current_picture_ptr= pic;
2482         }else{
2483             // input is not a shared pix -> reuse buffer for current_pix
2484
2485             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2486                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2487
2488             s->current_picture_ptr= s->reordered_input_picture[0];
2489             for(i=0; i<4; i++){
2490                 s->new_picture.data[i]+= INPLACE_OFFSET;
2491             }
2492         }
2493         copy_picture(&s->current_picture, s->current_picture_ptr);
2494
2495         s->picture_number= s->new_picture.display_picture_number;
2496 //printf("dpn:%d\n", s->picture_number);
2497     }else{
2498        memset(&s->new_picture, 0, sizeof(Picture));
2499     }
2500 }
2501
2502 int MPV_encode_picture(AVCodecContext *avctx,
2503                        unsigned char *buf, int buf_size, void *data)
2504 {
2505     MpegEncContext *s = avctx->priv_data;
2506     AVFrame *pic_arg = data;
2507     int i, stuffing_count;
2508
2509     for(i=0; i<avctx->thread_count; i++){
2510         int start_y= s->thread_context[i]->start_mb_y;
2511         int   end_y= s->thread_context[i]->  end_mb_y;
2512         int h= s->mb_height;
2513         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2514         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2515
2516         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2517     }
2518
2519     s->picture_in_gop_number++;
2520
2521     if(load_input_picture(s, pic_arg) < 0)
2522         return -1;
2523
2524     select_input_picture(s);
2525
2526     /* output? */
2527     if(s->new_picture.data[0]){
2528         s->pict_type= s->new_picture.pict_type;
2529 //emms_c();
2530 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2531         MPV_frame_start(s, avctx);
2532 vbv_retry:
2533         if (encode_picture(s, s->picture_number) < 0)
2534             return -1;
2535
2536         avctx->real_pict_num  = s->picture_number;
2537         avctx->header_bits = s->header_bits;
2538         avctx->mv_bits     = s->mv_bits;
2539         avctx->misc_bits   = s->misc_bits;
2540         avctx->i_tex_bits  = s->i_tex_bits;
2541         avctx->p_tex_bits  = s->p_tex_bits;
2542         avctx->i_count     = s->i_count;
2543         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2544         avctx->skip_count  = s->skip_count;
2545
2546         MPV_frame_end(s);
2547
2548         if (s->out_format == FMT_MJPEG)
2549             mjpeg_picture_trailer(s);
2550
2551         if(avctx->rc_buffer_size){
2552             RateControlContext *rcc= &s->rc_context;
2553             int max_size= rcc->buffer_index/3;
2554
2555             if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
2556                 s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
2557                 if(s->adaptive_quant){
2558                     int i;
2559                     for(i=0; i<s->mb_height*s->mb_stride; i++)
2560                         s->lambda_table[i]= FFMAX(s->lambda_table[i]+1, s->lambda_table[i]*(s->qscale+1) / s->qscale);
2561                 }
2562                 s->mb_skipped = 0;        //done in MPV_frame_start()
2563                 if(s->pict_type==P_TYPE){ //done in encode_picture() so we must undo it
2564                     if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
2565                         s->no_rounding ^= 1;
2566                 }
2567                 if(s->pict_type!=B_TYPE){
2568                     s->time_base= s->last_time_base;
2569                     s->last_non_b_time= s->time - s->pp_time;
2570                 }
2571 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
2572                 for(i=0; i<avctx->thread_count; i++){
2573                     PutBitContext *pb= &s->thread_context[i]->pb;
2574                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
2575                 }
2576                 goto vbv_retry;
2577             }
2578
2579             assert(s->avctx->rc_max_rate);
2580         }
2581
2582         if(s->flags&CODEC_FLAG_PASS1)
2583             ff_write_pass1_stats(s);
2584
2585         for(i=0; i<4; i++){
2586             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2587             avctx->error[i] += s->current_picture_ptr->error[i];
2588         }
2589
2590         if(s->flags&CODEC_FLAG_PASS1)
2591             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2592         flush_put_bits(&s->pb);
2593         s->frame_bits  = put_bits_count(&s->pb);
2594
2595         stuffing_count= ff_vbv_update(s, s->frame_bits);
2596         if(stuffing_count){
2597             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2598                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2599                 return -1;
2600             }
2601
2602             switch(s->codec_id){
2603             case CODEC_ID_MPEG1VIDEO:
2604             case CODEC_ID_MPEG2VIDEO:
2605                 while(stuffing_count--){
2606                     put_bits(&s->pb, 8, 0);
2607                 }
2608             break;
2609             case CODEC_ID_MPEG4:
2610                 put_bits(&s->pb, 16, 0);
2611                 put_bits(&s->pb, 16, 0x1C3);
2612                 stuffing_count -= 4;
2613                 while(stuffing_count--){
2614                     put_bits(&s->pb, 8, 0xFF);
2615                 }
2616             break;
2617             default:
2618                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2619             }
2620             flush_put_bits(&s->pb);
2621             s->frame_bits  = put_bits_count(&s->pb);
2622         }
2623
2624         /* update mpeg1/2 vbv_delay for CBR */
2625         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2626            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2627             int vbv_delay;
2628
2629             assert(s->repeat_first_field==0);
2630
2631             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2632             assert(vbv_delay < 0xFFFF);
2633
2634             s->vbv_delay_ptr[0] &= 0xF8;
2635             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2636             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2637             s->vbv_delay_ptr[2] &= 0x07;
2638             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2639         }
2640         s->total_bits += s->frame_bits;
2641         avctx->frame_bits  = s->frame_bits;
2642     }else{
2643         assert((pbBufPtr(&s->pb) == s->pb.buf));
2644         s->frame_bits=0;
2645     }
2646     assert((s->frame_bits&7)==0);
2647
2648     return s->frame_bits/8;
2649 }
2650
2651 #endif //CONFIG_ENCODERS
2652
2653 static inline void gmc1_motion(MpegEncContext *s,
2654                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2655                                uint8_t **ref_picture)
2656 {
2657     uint8_t *ptr;
2658     int offset, src_x, src_y, linesize, uvlinesize;
2659     int motion_x, motion_y;
2660     int emu=0;
2661
2662     motion_x= s->sprite_offset[0][0];
2663     motion_y= s->sprite_offset[0][1];
2664     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2665     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2666     motion_x<<=(3-s->sprite_warping_accuracy);
2667     motion_y<<=(3-s->sprite_warping_accuracy);
2668     src_x = clip(src_x, -16, s->width);
2669     if (src_x == s->width)
2670         motion_x =0;
2671     src_y = clip(src_y, -16, s->height);
2672     if (src_y == s->height)
2673         motion_y =0;
2674
2675     linesize = s->linesize;
2676     uvlinesize = s->uvlinesize;
2677
2678     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2679
2680     if(s->flags&CODEC_FLAG_EMU_EDGE){
2681         if(   (unsigned)src_x >= s->h_edge_pos - 17
2682            || (unsigned)src_y >= s->v_edge_pos - 17){
2683             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2684             ptr= s->edge_emu_buffer;
2685         }
2686     }
2687
2688     if((motion_x|motion_y)&7){
2689         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2690         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2691     }else{
2692         int dxy;
2693
2694         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2695         if (s->no_rounding){
2696             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2697         }else{
2698             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2699         }
2700     }
2701
2702     if(s->flags&CODEC_FLAG_GRAY) return;
2703
2704     motion_x= s->sprite_offset[1][0];
2705     motion_y= s->sprite_offset[1][1];
2706     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2707     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2708     motion_x<<=(3-s->sprite_warping_accuracy);
2709     motion_y<<=(3-s->sprite_warping_accuracy);
2710     src_x = clip(src_x, -8, s->width>>1);
2711     if (src_x == s->width>>1)
2712         motion_x =0;
2713     src_y = clip(src_y, -8, s->height>>1);
2714     if (src_y == s->height>>1)
2715         motion_y =0;
2716
2717     offset = (src_y * uvlinesize) + src_x;
2718     ptr = ref_picture[1] + offset;
2719     if(s->flags&CODEC_FLAG_EMU_EDGE){
2720         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2721            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2722             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2723             ptr= s->edge_emu_buffer;
2724             emu=1;
2725         }
2726     }
2727     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2728
2729     ptr = ref_picture[2] + offset;
2730     if(emu){
2731         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2732         ptr= s->edge_emu_buffer;
2733     }
2734     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2735
2736     return;
2737 }
2738
2739 static inline void gmc_motion(MpegEncContext *s,
2740                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2741                                uint8_t **ref_picture)
2742 {
2743     uint8_t *ptr;
2744     int linesize, uvlinesize;
2745     const int a= s->sprite_warping_accuracy;
2746     int ox, oy;
2747
2748     linesize = s->linesize;
2749     uvlinesize = s->uvlinesize;
2750
2751     ptr = ref_picture[0];
2752
2753     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2754     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2755
2756     s->dsp.gmc(dest_y, ptr, linesize, 16,
2757            ox,
2758            oy,
2759            s->sprite_delta[0][0], s->sprite_delta[0][1],
2760            s->sprite_delta[1][0], s->sprite_delta[1][1],
2761            a+1, (1<<(2*a+1)) - s->no_rounding,
2762            s->h_edge_pos, s->v_edge_pos);
2763     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2764            ox + s->sprite_delta[0][0]*8,
2765            oy + s->sprite_delta[1][0]*8,
2766            s->sprite_delta[0][0], s->sprite_delta[0][1],
2767            s->sprite_delta[1][0], s->sprite_delta[1][1],
2768            a+1, (1<<(2*a+1)) - s->no_rounding,
2769            s->h_edge_pos, s->v_edge_pos);
2770
2771     if(s->flags&CODEC_FLAG_GRAY) return;
2772
2773     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2774     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2775
2776     ptr = ref_picture[1];
2777     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2778            ox,
2779            oy,
2780            s->sprite_delta[0][0], s->sprite_delta[0][1],
2781            s->sprite_delta[1][0], s->sprite_delta[1][1],
2782            a+1, (1<<(2*a+1)) - s->no_rounding,
2783            s->h_edge_pos>>1, s->v_edge_pos>>1);
2784
2785     ptr = ref_picture[2];
2786     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2787            ox,
2788            oy,
2789            s->sprite_delta[0][0], s->sprite_delta[0][1],
2790            s->sprite_delta[1][0], s->sprite_delta[1][1],
2791            a+1, (1<<(2*a+1)) - s->no_rounding,
2792            s->h_edge_pos>>1, s->v_edge_pos>>1);
2793 }
2794
2795 /**
2796  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2797  * @param buf destination buffer
2798  * @param src source buffer
2799  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2800  * @param block_w width of block
2801  * @param block_h height of block
2802  * @param src_x x coordinate of the top left sample of the block in the source buffer
2803  * @param src_y y coordinate of the top left sample of the block in the source buffer
2804  * @param w width of the source buffer
2805  * @param h height of the source buffer
2806  */
2807 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2808                                     int src_x, int src_y, int w, int h){
2809     int x, y;
2810     int start_y, start_x, end_y, end_x;
2811
2812     if(src_y>= h){
2813         src+= (h-1-src_y)*linesize;
2814         src_y=h-1;
2815     }else if(src_y<=-block_h){
2816         src+= (1-block_h-src_y)*linesize;
2817         src_y=1-block_h;
2818     }
2819     if(src_x>= w){
2820         src+= (w-1-src_x);
2821         src_x=w-1;
2822     }else if(src_x<=-block_w){
2823         src+= (1-block_w-src_x);
2824         src_x=1-block_w;
2825     }
2826
2827     start_y= FFMAX(0, -src_y);
2828     start_x= FFMAX(0, -src_x);
2829     end_y= FFMIN(block_h, h-src_y);
2830     end_x= FFMIN(block_w, w-src_x);
2831
2832     // copy existing part
2833     for(y=start_y; y<end_y; y++){
2834         for(x=start_x; x<end_x; x++){
2835             buf[x + y*linesize]= src[x + y*linesize];
2836         }
2837     }
2838
2839     //top
2840     for(y=0; y<start_y; y++){
2841         for(x=start_x; x<end_x; x++){
2842             buf[x + y*linesize]= buf[x + start_y*linesize];
2843         }
2844     }
2845
2846     //bottom
2847     for(y=end_y; y<block_h; y++){
2848         for(x=start_x; x<end_x; x++){
2849             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2850         }
2851     }
2852
2853     for(y=0; y<block_h; y++){
2854        //left
2855         for(x=0; x<start_x; x++){
2856             buf[x + y*linesize]= buf[start_x + y*linesize];
2857         }
2858
2859        //right
2860         for(x=end_x; x<block_w; x++){
2861             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2862         }
2863     }
2864 }
2865
2866 static inline int hpel_motion(MpegEncContext *s,
2867                                   uint8_t *dest, uint8_t *src,
2868                                   int field_based, int field_select,
2869                                   int src_x, int src_y,
2870                                   int width, int height, int stride,
2871                                   int h_edge_pos, int v_edge_pos,
2872                                   int w, int h, op_pixels_func *pix_op,
2873                                   int motion_x, int motion_y)
2874 {
2875     int dxy;
2876     int emu=0;
2877
2878     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2879     src_x += motion_x >> 1;
2880     src_y += motion_y >> 1;
2881
2882     /* WARNING: do no forget half pels */
2883     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2884     if (src_x == width)
2885         dxy &= ~1;
2886     src_y = clip(src_y, -16, height);
2887     if (src_y == height)
2888         dxy &= ~2;
2889     src += src_y * stride + src_x;
2890
2891     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2892         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2893            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2894             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2895                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2896             src= s->edge_emu_buffer;
2897             emu=1;
2898         }
2899     }
2900     if(field_select)
2901         src += s->linesize;
2902     pix_op[dxy](dest, src, stride, h);
2903     return emu;
2904 }
2905
2906 static inline int hpel_motion_lowres(MpegEncContext *s,
2907                                   uint8_t *dest, uint8_t *src,
2908                                   int field_based, int field_select,
2909                                   int src_x, int src_y,
2910                                   int width, int height, int stride,
2911                                   int h_edge_pos, int v_edge_pos,
2912                                   int w, int h, h264_chroma_mc_func *pix_op,
2913                                   int motion_x, int motion_y)
2914 {
2915     const int lowres= s->avctx->lowres;
2916     const int s_mask= (2<<lowres)-1;
2917     int emu=0;
2918     int sx, sy;
2919
2920     if(s->quarter_sample){
2921         motion_x/=2;
2922         motion_y/=2;
2923     }
2924
2925     sx= motion_x & s_mask;
2926     sy= motion_y & s_mask;
2927     src_x += motion_x >> (lowres+1);
2928     src_y += motion_y >> (lowres+1);
2929
2930     src += src_y * stride + src_x;
2931
2932     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2933        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2934         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2935                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2936         src= s->edge_emu_buffer;
2937         emu=1;
2938     }
2939
2940     sx <<= 2 - lowres;
2941     sy <<= 2 - lowres;
2942     if(field_select)
2943         src += s->linesize;
2944     pix_op[lowres](dest, src, stride, h, sx, sy);
2945     return emu;
2946 }
2947
2948 /* apply one mpeg motion vector to the three components */
2949 static av_always_inline void mpeg_motion(MpegEncContext *s,
2950                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2951                                int field_based, int bottom_field, int field_select,
2952                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2953                                int motion_x, int motion_y, int h)
2954 {
2955     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2956     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2957
2958 #if 0
2959 if(s->quarter_sample)
2960 {
2961     motion_x>>=1;
2962     motion_y>>=1;
2963 }
2964 #endif
2965
2966     v_edge_pos = s->v_edge_pos >> field_based;
2967     linesize   = s->current_picture.linesize[0] << field_based;
2968     uvlinesize = s->current_picture.linesize[1] << field_based;
2969
2970     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2971     src_x = s->mb_x* 16               + (motion_x >> 1);
2972     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2973
2974     if (s->out_format == FMT_H263) {
2975         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2976             mx = (motion_x>>1)|(motion_x&1);
2977             my = motion_y >>1;
2978             uvdxy = ((my & 1) << 1) | (mx & 1);
2979             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2980             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2981         }else{
2982             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2983             uvsrc_x = src_x>>1;
2984             uvsrc_y = src_y>>1;
2985         }
2986     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2987         mx = motion_x / 4;
2988         my = motion_y / 4;
2989         uvdxy = 0;
2990         uvsrc_x = s->mb_x*8 + mx;
2991         uvsrc_y = s->mb_y*8 + my;
2992     } else {
2993         if(s->chroma_y_shift){
2994             mx = motion_x / 2;
2995             my = motion_y / 2;
2996             uvdxy = ((my & 1) << 1) | (mx & 1);
2997             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2998             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2999         } else {
3000             if(s->chroma_x_shift){
3001             //Chroma422
3002                 mx = motion_x / 2;
3003                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
3004                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
3005                 uvsrc_y = src_y;
3006             } else {
3007             //Chroma444
3008                 uvdxy = dxy;
3009                 uvsrc_x = src_x;
3010                 uvsrc_y = src_y;
3011             }
3012         }
3013     }
3014
3015     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3016     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3017     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3018
3019     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
3020        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
3021             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
3022                s->codec_id == CODEC_ID_MPEG1VIDEO){
3023                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
3024                 return ;
3025             }
3026             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3027                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3028             ptr_y = s->edge_emu_buffer;
3029             if(!(s->flags&CODEC_FLAG_GRAY)){
3030                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3031                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3032                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3033                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3034                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3035                 ptr_cb= uvbuf;
3036                 ptr_cr= uvbuf+16;
3037             }
3038     }
3039
3040     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3041         dest_y += s->linesize;
3042         dest_cb+= s->uvlinesize;
3043         dest_cr+= s->uvlinesize;
3044     }
3045
3046     if(field_select){
3047         ptr_y += s->linesize;
3048         ptr_cb+= s->uvlinesize;
3049         ptr_cr+= s->uvlinesize;
3050     }
3051
3052     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
3053
3054     if(!(s->flags&CODEC_FLAG_GRAY)){
3055         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
3056         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
3057     }
3058 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
3059     if(s->out_format == FMT_H261){
3060         ff_h261_loop_filter(s);
3061     }
3062 #endif
3063 }
3064
3065 /* apply one mpeg motion vector to the three components */
3066 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
3067                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3068                                int field_based, int bottom_field, int field_select,
3069                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3070                                int motion_x, int motion_y, int h)
3071 {
3072     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3073     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3074     const int lowres= s->avctx->lowres;
3075     const int block_s= 8>>lowres;
3076     const int s_mask= (2<<lowres)-1;
3077     const int h_edge_pos = s->h_edge_pos >> lowres;
3078     const int v_edge_pos = s->v_edge_pos >> lowres;
3079     linesize   = s->current_picture.linesize[0] << field_based;
3080     uvlinesize = s->current_picture.linesize[1] << field_based;
3081
3082     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3083         motion_x/=2;
3084         motion_y/=2;
3085     }
3086
3087     if(field_based){
3088         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3089     }
3090
3091     sx= motion_x & s_mask;
3092     sy= motion_y & s_mask;
3093     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3094     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3095
3096     if (s->out_format == FMT_H263) {
3097         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3098         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3099         uvsrc_x = src_x>>1;
3100         uvsrc_y = src_y>>1;
3101     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3102         mx = motion_x / 4;
3103         my = motion_y / 4;
3104         uvsx = (2*mx) & s_mask;
3105         uvsy = (2*my) & s_mask;
3106         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3107         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3108     } else {
3109         mx = motion_x / 2;
3110         my = motion_y / 2;
3111         uvsx = mx & s_mask;
3112         uvsy = my & s_mask;
3113         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3114         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3115     }
3116
3117     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3118     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3119     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3120
3121     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3122        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3123             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3124                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3125             ptr_y = s->edge_emu_buffer;
3126             if(!(s->flags&CODEC_FLAG_GRAY)){
3127                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3128                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3129                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3130                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3131                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3132                 ptr_cb= uvbuf;
3133                 ptr_cr= uvbuf+16;
3134             }
3135     }
3136
3137     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3138         dest_y += s->linesize;
3139         dest_cb+= s->uvlinesize;
3140         dest_cr+= s->uvlinesize;
3141     }
3142
3143     if(field_select){
3144         ptr_y += s->linesize;
3145         ptr_cb+= s->uvlinesize;
3146         ptr_cr+= s->uvlinesize;
3147     }
3148
3149     sx <<= 2 - lowres;
3150     sy <<= 2 - lowres;
3151     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3152
3153     if(!(s->flags&CODEC_FLAG_GRAY)){
3154         uvsx <<= 2 - lowres;
3155         uvsy <<= 2 - lowres;
3156         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3157         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3158     }
3159     //FIXME h261 lowres loop filter
3160 }
3161
3162 //FIXME move to dsputil, avg variant, 16x16 version
3163 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3164     int x;
3165     uint8_t * const top   = src[1];
3166     uint8_t * const left  = src[2];
3167     uint8_t * const mid   = src[0];
3168     uint8_t * const right = src[3];
3169     uint8_t * const bottom= src[4];
3170 #define OBMC_FILTER(x, t, l, m, r, b)\
3171     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3172 #define OBMC_FILTER4(x, t, l, m, r, b)\
3173     OBMC_FILTER(x         , t, l, m, r, b);\
3174     OBMC_FILTER(x+1       , t, l, m, r, b);\
3175     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3176     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3177
3178     x=0;
3179     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3180     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3181     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3182     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3183     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3184     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3185     x+= stride;
3186     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3187     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3188     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3189     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3190     x+= stride;
3191     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3192     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3193     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3194     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3195     x+= 2*stride;
3196     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3197     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3198     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3199     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3200     x+= 2*stride;
3201     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3202     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3203     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3204     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3205     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3206     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3207     x+= stride;
3208     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3209     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3210     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3211     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3212 }
3213
3214 /* obmc for 1 8x8 luma block */
3215 static inline void obmc_motion(MpegEncContext *s,
3216                                uint8_t *dest, uint8_t *src,
3217                                int src_x, int src_y,
3218                                op_pixels_func *pix_op,
3219                                int16_t mv[5][2]/* mid top left right bottom*/)
3220 #define MID    0
3221 {
3222     int i;
3223     uint8_t *ptr[5];
3224
3225     assert(s->quarter_sample==0);
3226
3227     for(i=0; i<5; i++){
3228         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3229             ptr[i]= ptr[MID];
3230         }else{
3231             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3232             hpel_motion(s, ptr[i], src, 0, 0,
3233                         src_x, src_y,
3234                         s->width, s->height, s->linesize,
3235                         s->h_edge_pos, s->v_edge_pos,
3236                         8, 8, pix_op,
3237                         mv[i][0], mv[i][1]);
3238         }
3239     }
3240
3241     put_obmc(dest, ptr, s->linesize);
3242 }
3243
3244 static inline void qpel_motion(MpegEncContext *s,
3245                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3246                                int field_based, int bottom_field, int field_select,
3247                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3248                                qpel_mc_func (*qpix_op)[16],
3249                                int motion_x, int motion_y, int h)
3250 {
3251     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3252     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3253
3254     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3255     src_x = s->mb_x *  16                 + (motion_x >> 2);
3256     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3257
3258     v_edge_pos = s->v_edge_pos >> field_based;
3259     linesize = s->linesize << field_based;
3260     uvlinesize = s->uvlinesize << field_based;
3261
3262     if(field_based){
3263         mx= motion_x/2;
3264         my= motion_y>>1;
3265     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3266         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3267         mx= (motion_x>>1) + rtab[motion_x&7];
3268         my= (motion_y>>1) + rtab[motion_y&7];
3269     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3270         mx= (motion_x>>1)|(motion_x&1);
3271         my= (motion_y>>1)|(motion_y&1);
3272     }else{
3273         mx= motion_x/2;
3274         my= motion_y/2;
3275     }
3276     mx= (mx>>1)|(mx&1);
3277     my= (my>>1)|(my&1);
3278
3279     uvdxy= (mx&1) | ((my&1)<<1);
3280     mx>>=1;
3281     my>>=1;
3282
3283     uvsrc_x = s->mb_x *  8                 + mx;
3284     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3285
3286     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3287     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3288     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3289
3290     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3291        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3292         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3293                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3294         ptr_y= s->edge_emu_buffer;
3295         if(!(s->flags&CODEC_FLAG_GRAY)){
3296             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3297             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3298                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3299             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3300                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3301             ptr_cb= uvbuf;
3302             ptr_cr= uvbuf + 16;
3303         }
3304     }
3305
3306     if(!field_based)
3307         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3308     else{
3309         if(bottom_field){
3310             dest_y += s->linesize;
3311             dest_cb+= s->uvlinesize;
3312             dest_cr+= s->uvlinesize;
3313         }
3314
3315         if(field_select){
3316             ptr_y  += s->linesize;
3317             ptr_cb += s->uvlinesize;
3318             ptr_cr += s->uvlinesize;
3319         }
3320         //damn interlaced mode
3321         //FIXME boundary mirroring is not exactly correct here
3322         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3323         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3324     }
3325     if(!(s->flags&CODEC_FLAG_GRAY)){
3326         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3327         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3328     }
3329 }
3330
3331 inline int ff_h263_round_chroma(int x){
3332     if (x >= 0)
3333         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3334     else {
3335         x = -x;
3336         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3337     }
3338 }
3339
3340 /**
3341  * h263 chorma 4mv motion compensation.
3342  */
3343 static inline void chroma_4mv_motion(MpegEncContext *s,
3344                                      uint8_t *dest_cb, uint8_t *dest_cr,
3345                                      uint8_t **ref_picture,
3346                                      op_pixels_func *pix_op,
3347                                      int mx, int my){
3348     int dxy, emu=0, src_x, src_y, offset;
3349     uint8_t *ptr;
3350
3351     /* In case of 8X8, we construct a single chroma motion vector
3352        with a special rounding */
3353     mx= ff_h263_round_chroma(mx);
3354     my= ff_h263_round_chroma(my);
3355
3356     dxy = ((my & 1) << 1) | (mx & 1);
3357     mx >>= 1;
3358     my >>= 1;
3359
3360     src_x = s->mb_x * 8 + mx;
3361     src_y = s->mb_y * 8 + my;
3362     src_x = clip(src_x, -8, s->width/2);
3363     if (src_x == s->width/2)
3364         dxy &= ~1;
3365     src_y = clip(src_y, -8, s->height/2);
3366     if (src_y == s->height/2)
3367         dxy &= ~2;
3368
3369     offset = (src_y * (s->uvlinesize)) + src_x;
3370     ptr = ref_picture[1] + offset;
3371     if(s->flags&CODEC_FLAG_EMU_EDGE){
3372         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3373            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3374             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3375             ptr= s->edge_emu_buffer;
3376             emu=1;
3377         }
3378     }
3379     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3380
3381     ptr = ref_picture[2] + offset;
3382     if(emu){
3383         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3384         ptr= s->edge_emu_buffer;
3385     }
3386     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3387 }
3388
3389 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3390                                      uint8_t *dest_cb, uint8_t *dest_cr,
3391                                      uint8_t **ref_picture,
3392                                      h264_chroma_mc_func *pix_op,
3393                                      int mx, int my){
3394     const int lowres= s->avctx->lowres;
3395     const int block_s= 8>>lowres;
3396     const int s_mask= (2<<lowres)-1;
3397     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3398     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3399     int emu=0, src_x, src_y, offset, sx, sy;
3400     uint8_t *ptr;
3401
3402     if(s->quarter_sample){
3403         mx/=2;
3404         my/=2;
3405     }
3406
3407     /* In case of 8X8, we construct a single chroma motion vector
3408        with a special rounding */
3409     mx= ff_h263_round_chroma(mx);
3410     my= ff_h263_round_chroma(my);
3411
3412     sx= mx & s_mask;
3413     sy= my & s_mask;
3414     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3415     src_y = s->mb_y*block_s + (my >> (lowres+1));
3416
3417     offset = src_y * s->uvlinesize + src_x;
3418     ptr = ref_picture[1] + offset;
3419     if(s->flags&CODEC_FLAG_EMU_EDGE){
3420         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3421            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3422             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3423             ptr= s->edge_emu_buffer;
3424             emu=1;
3425         }
3426     }
3427     sx <<= 2 - lowres;
3428     sy <<= 2 - lowres;
3429     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3430
3431     ptr = ref_picture[2] + offset;
3432     if(emu){
3433         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3434         ptr= s->edge_emu_buffer;
3435     }
3436     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3437 }
3438
3439 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3440     /* fetch pixels for estimated mv 4 macroblocks ahead
3441      * optimized for 64byte cache lines */
3442     const int shift = s->quarter_sample ? 2 : 1;
3443     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3444     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3445     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3446     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3447     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3448     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3449 }
3450
3451 /**
3452  * motion compensation of a single macroblock
3453  * @param s context
3454  * @param dest_y luma destination pointer
3455  * @param dest_cb chroma cb/u destination pointer
3456  * @param dest_cr chroma cr/v destination pointer
3457  * @param dir direction (0->forward, 1->backward)
3458  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3459  * @param pic_op halfpel motion compensation function (average or put normally)
3460  * @param pic_op qpel motion compensation function (average or put normally)
3461  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3462  */
3463 static inline void MPV_motion(MpegEncContext *s,
3464                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3465                               int dir, uint8_t **ref_picture,
3466                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3467 {
3468     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3469     int mb_x, mb_y, i;
3470     uint8_t *ptr, *dest;
3471
3472     mb_x = s->mb_x;
3473     mb_y = s->mb_y;
3474
3475     prefetch_motion(s, ref_picture, dir);
3476
3477     if(s->obmc && s->pict_type != B_TYPE){
3478         int16_t mv_cache[4][4][2];
3479         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3480         const int mot_stride= s->b8_stride;
3481         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3482
3483         assert(!s->mb_skipped);
3484
3485         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3486         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3487         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3488
3489         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3490             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3491         }else{
3492             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3493         }
3494
3495         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3496             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3497             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3498         }else{
3499             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3500             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3501         }
3502
3503         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3504             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3505             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3506         }else{
3507             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3508             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3509         }
3510
3511         mx = 0;
3512         my = 0;
3513         for(i=0;i<4;i++) {
3514             const int x= (i&1)+1;
3515             const int y= (i>>1)+1;
3516             int16_t mv[5][2]= {
3517                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3518                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3519                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3520                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3521                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3522             //FIXME cleanup
3523             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3524                         ref_picture[0],
3525                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3526                         pix_op[1],
3527                         mv);
3528
3529             mx += mv[0][0];
3530             my += mv[0][1];
3531         }
3532         if(!(s->flags&CODEC_FLAG_GRAY))
3533             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3534
3535         return;
3536     }
3537
3538     switch(s->mv_type) {
3539     case MV_TYPE_16X16:
3540         if(s->mcsel){
3541             if(s->real_sprite_warping_points==1){
3542                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3543                             ref_picture);
3544             }else{
3545                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3546                             ref_picture);
3547             }
3548         }else if(s->quarter_sample){
3549             qpel_motion(s, dest_y, dest_cb, dest_cr,
3550                         0, 0, 0,
3551                         ref_picture, pix_op, qpix_op,
3552                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3553         }else if(s->mspel){
3554             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3555                         ref_picture, pix_op,
3556                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3557         }else
3558         {
3559             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3560                         0, 0, 0,
3561                         ref_picture, pix_op,
3562                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3563         }
3564         break;
3565     case MV_TYPE_8X8:
3566         mx = 0;
3567         my = 0;
3568         if(s->quarter_sample){
3569             for(i=0;i<4;i++) {
3570                 motion_x = s->mv[dir][i][0];
3571                 motion_y = s->mv[dir][i][1];
3572
3573                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3574                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3575                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3576
3577                 /* WARNING: do no forget half pels */
3578                 src_x = clip(src_x, -16, s->width);
3579                 if (src_x == s->width)
3580                     dxy &= ~3;
3581                 src_y = clip(src_y, -16, s->height);
3582                 if (src_y == s->height)
3583                     dxy &= ~12;
3584
3585                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3586                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3587                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3588                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3589                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3590                         ptr= s->edge_emu_buffer;
3591                     }
3592                 }
3593                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3594                 qpix_op[1][dxy](dest, ptr, s->linesize);
3595
3596                 mx += s->mv[dir][i][0]/2;
3597                 my += s->mv[dir][i][1]/2;
3598             }
3599         }else{
3600             for(i=0;i<4;i++) {
3601                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3602                             ref_picture[0], 0, 0,
3603                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3604                             s->width, s->height, s->linesize,
3605                             s->h_edge_pos, s->v_edge_pos,
3606                             8, 8, pix_op[1],
3607                             s->mv[dir][i][0], s->mv[dir][i][1]);
3608
3609                 mx += s->mv[dir][i][0];
3610                 my += s->mv[dir][i][1];
3611             }
3612         }
3613
3614         if(!(s->flags&CODEC_FLAG_GRAY))
3615             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3616         break;
3617     case MV_TYPE_FIELD:
3618         if (s->picture_structure == PICT_FRAME) {
3619             if(s->quarter_sample){
3620                 for(i=0; i<2; i++){
3621                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3622                                 1, i, s->field_select[dir][i],
3623                                 ref_picture, pix_op, qpix_op,
3624                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3625                 }
3626             }else{
3627                 /* top field */
3628                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3629                             1, 0, s->field_select[dir][0],
3630                             ref_picture, pix_op,
3631                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3632                 /* bottom field */
3633                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3634                             1, 1, s->field_select[dir][1],
3635                             ref_picture, pix_op,
3636                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3637             }
3638         } else {
3639             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3640                 ref_picture= s->current_picture_ptr->data;
3641             }
3642
3643             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3644                         0, 0, s->field_select[dir][0],
3645                         ref_picture, pix_op,
3646                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3647         }
3648         break;
3649     case MV_TYPE_16X8:
3650         for(i=0; i<2; i++){
3651             uint8_t ** ref2picture;
3652
3653             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3654                 ref2picture= ref_picture;
3655             }else{
3656                 ref2picture= s->current_picture_ptr->data;
3657             }
3658
3659             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3660                         0, 0, s->field_select[dir][i],
3661                         ref2picture, pix_op,
3662                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3663
3664             dest_y += 16*s->linesize;
3665             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3666             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3667         }
3668         break;
3669     case MV_TYPE_DMV:
3670         if(s->picture_structure == PICT_FRAME){
3671             for(i=0; i<2; i++){
3672                 int j;
3673                 for(j=0; j<2; j++){
3674                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3675                                 1, j, j^i,
3676                                 ref_picture, pix_op,
3677                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3678                 }
3679                 pix_op = s->dsp.avg_pixels_tab;
3680             }
3681         }else{
3682             for(i=0; i<2; i++){
3683                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3684                             0, 0, s->picture_structure != i+1,
3685                             ref_picture, pix_op,
3686                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3687
3688                 // after put we make avg of the same block
3689                 pix_op=s->dsp.avg_pixels_tab;
3690
3691                 //opposite parity is always in the same frame if this is second field
3692                 if(!s->first_field){
3693                     ref_picture = s->current_picture_ptr->data;
3694                 }
3695             }
3696         }
3697     break;
3698     default: assert(0);
3699     }
3700 }
3701
3702 /**
3703  * motion compensation of a single macroblock
3704  * @param s context
3705  * @param dest_y luma destination pointer
3706  * @param dest_cb chroma cb/u destination pointer
3707  * @param dest_cr chroma cr/v destination pointer
3708  * @param dir direction (0->forward, 1->backward)
3709  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3710  * @param pic_op halfpel motion compensation function (average or put normally)
3711  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3712  */
3713 static inline void MPV_motion_lowres(MpegEncContext *s,
3714                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3715                               int dir, uint8_t **ref_picture,
3716                               h264_chroma_mc_func *pix_op)
3717 {
3718     int mx, my;
3719     int mb_x, mb_y, i;
3720     const int lowres= s->avctx->lowres;
3721     const int block_s= 8>>lowres;
3722
3723     mb_x = s->mb_x;
3724     mb_y = s->mb_y;
3725
3726     switch(s->mv_type) {
3727     case MV_TYPE_16X16:
3728         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3729                     0, 0, 0,
3730                     ref_picture, pix_op,
3731                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3732         break;
3733     case MV_TYPE_8X8:
3734         mx = 0;
3735         my = 0;
3736             for(i=0;i<4;i++) {
3737                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3738                             ref_picture[0], 0, 0,
3739                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3740                             s->width, s->height, s->linesize,
3741                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3742                             block_s, block_s, pix_op,
3743                             s->mv[dir][i][0], s->mv[dir][i][1]);
3744
3745                 mx += s->mv[dir][i][0];
3746                 my += s->mv[dir][i][1];
3747             }
3748
3749         if(!(s->flags&CODEC_FLAG_GRAY))
3750             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3751         break;
3752     case MV_TYPE_FIELD:
3753         if (s->picture_structure == PICT_FRAME) {
3754             /* top field */
3755             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3756                         1, 0, s->field_select[dir][0],
3757                         ref_picture, pix_op,
3758                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3759             /* bottom field */
3760             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3761                         1, 1, s->field_select[dir][1],
3762                         ref_picture, pix_op,
3763                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3764         } else {
3765             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3766                 ref_picture= s->current_picture_ptr->data;
3767             }
3768
3769             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3770                         0, 0, s->field_select[dir][0],
3771                         ref_picture, pix_op,
3772                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3773         }
3774         break;
3775     case MV_TYPE_16X8:
3776         for(i=0; i<2; i++){
3777             uint8_t ** ref2picture;
3778
3779             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3780                 ref2picture= ref_picture;
3781             }else{
3782                 ref2picture= s->current_picture_ptr->data;
3783             }
3784
3785             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3786                         0, 0, s->field_select[dir][i],
3787                         ref2picture, pix_op,
3788                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3789
3790             dest_y += 2*block_s*s->linesize;
3791             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3792             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3793         }
3794         break;
3795     case MV_TYPE_DMV:
3796         if(s->picture_structure == PICT_FRAME){
3797             for(i=0; i<2; i++){
3798                 int j;
3799                 for(j=0; j<2; j++){
3800                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3801                                 1, j, j^i,
3802                                 ref_picture, pix_op,
3803                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3804                 }
3805                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3806             }
3807         }else{
3808             for(i=0; i<2; i++){
3809                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3810                             0, 0, s->picture_structure != i+1,
3811                             ref_picture, pix_op,
3812                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3813
3814                 // after put we make avg of the same block
3815                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3816
3817                 //opposite parity is always in the same frame if this is second field
3818                 if(!s->first_field){
3819                     ref_picture = s->current_picture_ptr->data;
3820                 }
3821             }
3822         }
3823     break;
3824     default: assert(0);
3825     }
3826 }
3827
3828 /* put block[] to dest[] */
3829 static inline void put_dct(MpegEncContext *s,
3830                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3831 {
3832     s->dct_unquantize_intra(s, block, i, qscale);
3833     s->dsp.idct_put (dest, line_size, block);
3834 }
3835
3836 /* add block[] to dest[] */
3837 static inline void add_dct(MpegEncContext *s,
3838                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3839 {
3840     if (s->block_last_index[i] >= 0) {
3841         s->dsp.idct_add (dest, line_size, block);
3842     }
3843 }
3844
3845 static inline void add_dequant_dct(MpegEncContext *s,
3846                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3847 {
3848     if (s->block_last_index[i] >= 0) {
3849         s->dct_unquantize_inter(s, block, i, qscale);
3850
3851         s->dsp.idct_add (dest, line_size, block);
3852     }
3853 }
3854
3855 /**
3856  * cleans dc, ac, coded_block for the current non intra MB
3857  */
3858 void ff_clean_intra_table_entries(MpegEncContext *s)
3859 {
3860     int wrap = s->b8_stride;
3861     int xy = s->block_index[0];
3862
3863     s->dc_val[0][xy           ] =
3864     s->dc_val[0][xy + 1       ] =
3865     s->dc_val[0][xy     + wrap] =
3866     s->dc_val[0][xy + 1 + wrap] = 1024;
3867     /* ac pred */
3868     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3869     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3870     if (s->msmpeg4_version>=3) {
3871         s->coded_block[xy           ] =
3872         s->coded_block[xy + 1       ] =
3873         s->coded_block[xy     + wrap] =
3874         s->coded_block[xy + 1 + wrap] = 0;
3875     }
3876     /* chroma */
3877     wrap = s->mb_stride;
3878     xy = s->mb_x + s->mb_y * wrap;
3879     s->dc_val[1][xy] =
3880     s->dc_val[2][xy] = 1024;
3881     /* ac pred */
3882     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3883     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3884
3885     s->mbintra_table[xy]= 0;
3886 }
3887
3888 /* generic function called after a macroblock has been parsed by the
3889    decoder or after it has been encoded by the encoder.
3890
3891    Important variables used:
3892    s->mb_intra : true if intra macroblock
3893    s->mv_dir   : motion vector direction
3894    s->mv_type  : motion vector type
3895    s->mv       : motion vector
3896    s->interlaced_dct : true if interlaced dct used (mpeg2)
3897  */
3898 static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3899 {
3900     int mb_x, mb_y;
3901     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3902 #ifdef HAVE_XVMC
3903     if(s->avctx->xvmc_acceleration){
3904         XVMC_decode_mb(s);//xvmc uses pblocks
3905         return;
3906     }
3907 #endif
3908
3909     mb_x = s->mb_x;
3910     mb_y = s->mb_y;
3911
3912     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3913        /* save DCT coefficients */
3914        int i,j;
3915        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3916        for(i=0; i<6; i++)
3917            for(j=0; j<64; j++)
3918                *dct++ = block[i][s->dsp.idct_permutation[j]];
3919     }
3920
3921     s->current_picture.qscale_table[mb_xy]= s->qscale;
3922
3923     /* update DC predictors for P macroblocks */
3924     if (!s->mb_intra) {
3925         if (s->h263_pred || s->h263_aic) {
3926             if(s->mbintra_table[mb_xy])
3927                 ff_clean_intra_table_entries(s);
3928         } else {
3929             s->last_dc[0] =
3930             s->last_dc[1] =
3931             s->last_dc[2] = 128 << s->intra_dc_precision;
3932         }
3933     }
3934     else if (s->h263_pred || s->h263_aic)
3935         s->mbintra_table[mb_xy]=1;
3936
3937     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
3938         uint8_t *dest_y, *dest_cb, *dest_cr;
3939         int dct_linesize, dct_offset;
3940         op_pixels_func (*op_pix)[4];
3941         qpel_mc_func (*op_qpix)[16];
3942         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3943         const int uvlinesize= s->current_picture.linesize[1];
3944         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3945         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3946
3947         /* avoid copy if macroblock skipped in last frame too */
3948         /* skip only during decoding as we might trash the buffers during encoding a bit */
3949         if(!s->encoding){
3950             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3951             const int age= s->current_picture.age;
3952
3953             assert(age);
3954
3955             if (s->mb_skipped) {
3956                 s->mb_skipped= 0;
3957                 assert(s->pict_type!=I_TYPE);
3958
3959                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3960                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3961
3962                 /* if previous was skipped too, then nothing to do !  */
3963                 if (*mbskip_ptr >= age && s->current_picture.reference){
3964                     return;
3965                 }
3966             } else if(!s->current_picture.reference){
3967                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3968                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3969             } else{
3970                 *mbskip_ptr = 0; /* not skipped */
3971             }
3972         }
3973
3974         dct_linesize = linesize << s->interlaced_dct;
3975         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3976
3977         if(readable){
3978             dest_y=  s->dest[0];
3979             dest_cb= s->dest[1];
3980             dest_cr= s->dest[2];
3981         }else{
3982             dest_y = s->b_scratchpad;
3983             dest_cb= s->b_scratchpad+16*linesize;
3984             dest_cr= s->b_scratchpad+32*linesize;
3985         }
3986
3987         if (!s->mb_intra) {
3988             /* motion handling */
3989             /* decoding or more than one mb_type (MC was already done otherwise) */
3990             if(!s->encoding){
3991                 if(lowres_flag){
3992                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3993
3994                     if (s->mv_dir & MV_DIR_FORWARD) {
3995                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3996                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3997                     }
3998                     if (s->mv_dir & MV_DIR_BACKWARD) {
3999                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
4000                     }
4001                 }else{
4002                     op_qpix= s->me.qpel_put;
4003                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
4004                         op_pix = s->dsp.put_pixels_tab;
4005                     }else{
4006                         op_pix = s->dsp.put_no_rnd_pixels_tab;
4007                     }
4008                     if (s->mv_dir & MV_DIR_FORWARD) {
4009                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4010                         op_pix = s->dsp.avg_pixels_tab;
4011                         op_qpix= s->me.qpel_avg;
4012                     }
4013                     if (s->mv_dir & MV_DIR_BACKWARD) {
4014                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4015                     }
4016                 }
4017             }
4018
4019             /* skip dequant / idct if we are really late ;) */
4020             if(s->hurry_up>1) goto skip_idct;
4021             if(s->avctx->skip_idct){
4022                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
4023                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
4024                    || s->avctx->skip_idct >= AVDISCARD_ALL)
4025                     goto skip_idct;
4026             }
4027
4028             /* add dct residue */
4029             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
4030                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
4031                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4032                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4033                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4034                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4035
4036                 if(!(s->flags&CODEC_FLAG_GRAY)){
4037                     if (s->chroma_y_shift){
4038                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4039                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4040                     }else{
4041                         dct_linesize >>= 1;
4042                         dct_offset >>=1;
4043                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4044                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4045                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4046                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4047                     }
4048                 }
4049             } else if(s->codec_id != CODEC_ID_WMV2){
4050                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
4051                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
4052                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
4053                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
4054
4055                 if(!(s->flags&CODEC_FLAG_GRAY)){
4056                     if(s->chroma_y_shift){//Chroma420
4057                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
4058                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4059                     }else{
4060                         //chroma422
4061                         dct_linesize = uvlinesize << s->interlaced_dct;
4062                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4063
4064                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4065                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4066                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4067                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4068                         if(!s->chroma_x_shift){//Chroma444
4069                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4070                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4071                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4072                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4073                         }
4074                     }
4075                 }//fi gray
4076             }
4077             else{
4078                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4079             }
4080         } else {
4081             /* dct only in intra block */
4082             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4083                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4084                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4085                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4086                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4087
4088                 if(!(s->flags&CODEC_FLAG_GRAY)){
4089                     if(s->chroma_y_shift){
4090                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4091                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4092                     }else{
4093                         dct_offset >>=1;
4094                         dct_linesize >>=1;
4095                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4096                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4097                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4098                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4099                     }
4100                 }
4101             }else{
4102                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4103                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4104                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4105                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4106
4107                 if(!(s->flags&CODEC_FLAG_GRAY)){
4108                     if(s->chroma_y_shift){
4109                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4110                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4111                     }else{
4112
4113                         dct_linesize = uvlinesize << s->interlaced_dct;
4114                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4115
4116                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4117                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4118                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4119                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4120                         if(!s->chroma_x_shift){//Chroma444
4121                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4122                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4123                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4124                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4125                         }
4126                     }
4127                 }//gray
4128             }
4129         }
4130 skip_idct:
4131         if(!readable){
4132             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4133             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4134             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4135         }
4136     }
4137 }
4138
4139 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4140     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4141     else                  MPV_decode_mb_internal(s, block, 0);
4142 }
4143
4144 #ifdef CONFIG_ENCODERS
4145
4146 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4147 {
4148     static const char tab[64]=
4149         {3,2,2,1,1,1,1,1,
4150          1,1,1,1,1,1,1,1,
4151          1,1,1,1,1,1,1,1,
4152          0,0,0,0,0,0,0,0,
4153          0,0,0,0,0,0,0,0,
4154          0,0,0,0,0,0,0,0,
4155          0,0,0,0,0,0,0,0,
4156          0,0,0,0,0,0,0,0};
4157     int score=0;
4158     int run=0;
4159     int i;
4160     DCTELEM *block= s->block[n];
4161     const int last_index= s->block_last_index[n];
4162     int skip_dc;
4163
4164     if(threshold<0){
4165         skip_dc=0;
4166         threshold= -threshold;
4167     }else
4168         skip_dc=1;
4169
4170     /* are all which we could set to zero are allready zero? */
4171     if(last_index<=skip_dc - 1) return;
4172
4173     for(i=0; i<=last_index; i++){
4174         const int j = s->intra_scantable.permutated[i];
4175         const int level = FFABS(block[j]);
4176         if(level==1){
4177             if(skip_dc && i==0) continue;
4178             score+= tab[run];
4179             run=0;
4180         }else if(level>1){
4181             return;
4182         }else{
4183             run++;
4184         }
4185     }
4186     if(score >= threshold) return;
4187     for(i=skip_dc; i<=last_index; i++){
4188         const int j = s->intra_scantable.permutated[i];
4189         block[j]=0;
4190     }
4191     if(block[0]) s->block_last_index[n]= 0;
4192     else         s->block_last_index[n]= -1;
4193 }
4194
4195 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4196 {
4197     int i;
4198     const int maxlevel= s->max_qcoeff;
4199     const int minlevel= s->min_qcoeff;
4200     int overflow=0;
4201
4202     if(s->mb_intra){
4203         i=1; //skip clipping of intra dc
4204     }else
4205         i=0;
4206
4207     for(;i<=last_index; i++){
4208         const int j= s->intra_scantable.permutated[i];
4209         int level = block[j];
4210
4211         if     (level>maxlevel){
4212             level=maxlevel;
4213             overflow++;
4214         }else if(level<minlevel){
4215             level=minlevel;
4216             overflow++;
4217         }
4218
4219         block[j]= level;
4220     }
4221
4222     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4223         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4224 }
4225
4226 #endif //CONFIG_ENCODERS
4227
4228 /**
4229  *
4230  * @param h is the normal height, this will be reduced automatically if needed for the last row
4231  */
4232 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4233     if (s->avctx->draw_horiz_band) {
4234         AVFrame *src;
4235         int offset[4];
4236
4237         if(s->picture_structure != PICT_FRAME){
4238             h <<= 1;
4239             y <<= 1;
4240             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4241         }
4242
4243         h= FFMIN(h, s->avctx->height - y);
4244
4245         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4246             src= (AVFrame*)s->current_picture_ptr;
4247         else if(s->last_picture_ptr)
4248             src= (AVFrame*)s->last_picture_ptr;
4249         else
4250             return;
4251
4252         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4253             offset[0]=
4254             offset[1]=
4255             offset[2]=
4256             offset[3]= 0;
4257         }else{
4258             offset[0]= y * s->linesize;;
4259             offset[1]=
4260             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4261             offset[3]= 0;
4262         }
4263
4264         emms_c();
4265
4266         s->avctx->draw_horiz_band(s->avctx, src, offset,
4267                                   y, s->picture_structure, h);
4268     }
4269 }
4270
4271 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4272     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4273     const int uvlinesize= s->current_picture.linesize[1];
4274     const int mb_size= 4 - s->avctx->lowres;
4275
4276     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4277     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4278     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4279     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4280     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4281     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4282     //block_index is not used by mpeg2, so it is not affected by chroma_format
4283
4284     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4285     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4286     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4287
4288     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4289     {
4290         s->dest[0] += s->mb_y *   linesize << mb_size;
4291         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4292         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4293     }
4294 }
4295
4296 #ifdef CONFIG_ENCODERS
4297
4298 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4299     int x, y;
4300 //FIXME optimize
4301     for(y=0; y<8; y++){
4302         for(x=0; x<8; x++){
4303             int x2, y2;
4304             int sum=0;
4305             int sqr=0;
4306             int count=0;
4307
4308             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4309                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4310                     int v= ptr[x2 + y2*stride];
4311                     sum += v;
4312                     sqr += v*v;
4313                     count++;
4314                 }
4315             }
4316             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4317         }
4318     }
4319 }
4320
4321 static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4322 {
4323     int16_t weight[8][64];
4324     DCTELEM orig[8][64];
4325     const int mb_x= s->mb_x;
4326     const int mb_y= s->mb_y;
4327     int i;
4328     int skip_dct[8];
4329     int dct_offset   = s->linesize*8; //default for progressive frames
4330     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4331     int wrap_y, wrap_c;
4332
4333     for(i=0; i<mb_block_count; i++) skip_dct[i]=0;
4334
4335     if(s->adaptive_quant){
4336         const int last_qp= s->qscale;
4337         const int mb_xy= mb_x + mb_y*s->mb_stride;
4338
4339         s->lambda= s->lambda_table[mb_xy];
4340         update_qscale(s);
4341
4342         if(!(s->flags&CODEC_FLAG_QP_RD)){
4343             s->qscale= s->current_picture_ptr->qscale_table[mb_xy];
4344             s->dquant= s->qscale - last_qp;
4345
4346             if(s->out_format==FMT_H263){
4347                 s->dquant= clip(s->dquant, -2, 2);
4348
4349                 if(s->codec_id==CODEC_ID_MPEG4){
4350                     if(!s->mb_intra){
4351                         if(s->pict_type == B_TYPE){
4352                             if(s->dquant&1 || s->mv_dir&MV_DIRECT)
4353                                 s->dquant= 0;
4354                         }
4355                         if(s->mv_type==MV_TYPE_8X8)
4356                             s->dquant=0;
4357                     }
4358                 }
4359             }
4360         }
4361         ff_set_qscale(s, last_qp + s->dquant);
4362     }else if(s->flags&CODEC_FLAG_QP_RD)
4363         ff_set_qscale(s, s->qscale + s->dquant);
4364
4365     wrap_y = s->linesize;
4366     wrap_c = s->uvlinesize;
4367     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4368     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4369     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4370
4371     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4372         uint8_t *ebuf= s->edge_emu_buffer + 32;
4373         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4374         ptr_y= ebuf;
4375         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4376         ptr_cb= ebuf+18*wrap_y;
4377         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4378         ptr_cr= ebuf+18*wrap_y+8;
4379     }
4380
4381     if (s->mb_intra) {
4382         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4383             int progressive_score, interlaced_score;
4384
4385             s->interlaced_dct=0;
4386             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4387                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4388
4389             if(progressive_score > 0){
4390                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4391                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4392                 if(progressive_score > interlaced_score){
4393                     s->interlaced_dct=1;
4394
4395                     dct_offset= wrap_y;
4396                     wrap_y<<=1;
4397                     if (s->chroma_format == CHROMA_422)
4398                         wrap_c<<=1;
4399                 }
4400             }
4401         }
4402
4403         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4404         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4405         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4406         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4407
4408         if(s->flags&CODEC_FLAG_GRAY){
4409             skip_dct[4]= 1;
4410             skip_dct[5]= 1;
4411         }else{
4412             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4413             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4414             if(!s->chroma_y_shift){ /* 422 */
4415                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4416                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4417             }
4418         }
4419     }else{
4420         op_pixels_func (*op_pix)[4];
4421         qpel_mc_func (*op_qpix)[16];
4422         uint8_t *dest_y, *dest_cb, *dest_cr;
4423
4424         dest_y  = s->dest[0];
4425         dest_cb = s->dest[1];
4426         dest_cr = s->dest[2];
4427
4428         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4429             op_pix = s->dsp.put_pixels_tab;
4430             op_qpix= s->dsp.put_qpel_pixels_tab;
4431         }else{
4432             op_pix = s->dsp.put_no_rnd_pixels_tab;
4433             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4434         }
4435
4436         if (s->mv_dir & MV_DIR_FORWARD) {
4437             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4438             op_pix = s->dsp.avg_pixels_tab;
4439             op_qpix= s->dsp.avg_qpel_pixels_tab;
4440         }
4441         if (s->mv_dir & MV_DIR_BACKWARD) {
4442             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4443         }
4444
4445         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4446             int progressive_score, interlaced_score;
4447
4448             s->interlaced_dct=0;
4449             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4450                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4451
4452             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4453
4454             if(progressive_score>0){
4455                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4456                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4457
4458                 if(progressive_score > interlaced_score){
4459                     s->interlaced_dct=1;
4460
4461                     dct_offset= wrap_y;
4462                     wrap_y<<=1;
4463                     if (s->chroma_format == CHROMA_422)
4464                         wrap_c<<=1;
4465                 }
4466             }
4467         }
4468
4469         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4470         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4471         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4472         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4473
4474         if(s->flags&CODEC_FLAG_GRAY){
4475             skip_dct[4]= 1;
4476             skip_dct[5]= 1;
4477         }else{
4478             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4479             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4480             if(!s->chroma_y_shift){ /* 422 */
4481                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4482                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4483             }
4484         }
4485         /* pre quantization */
4486         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4487             //FIXME optimize
4488             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4489             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4490             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4491             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4492             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4493             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4494             if(!s->chroma_y_shift){ /* 422 */
4495                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4496                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4497             }
4498         }
4499     }
4500
4501     if(s->avctx->quantizer_noise_shaping){
4502         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4503         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4504         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4505         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4506         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4507         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4508         if(!s->chroma_y_shift){ /* 422 */
4509             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4510             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4511         }
4512         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4513     }
4514
4515     /* DCT & quantize */
4516     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4517     {
4518         for(i=0;i<mb_block_count;i++) {
4519             if(!skip_dct[i]){
4520                 int overflow;
4521                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4522             // FIXME we could decide to change to quantizer instead of clipping
4523             // JS: I don't think that would be a good idea it could lower quality instead
4524             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4525                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4526             }else
4527                 s->block_last_index[i]= -1;
4528         }
4529         if(s->avctx->quantizer_noise_shaping){
4530             for(i=0;i<mb_block_count;i++) {
4531                 if(!skip_dct[i]){
4532                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4533                 }
4534             }
4535         }
4536
4537         if(s->luma_elim_threshold && !s->mb_intra)
4538             for(i=0; i<4; i++)
4539                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4540         if(s->chroma_elim_threshold && !s->mb_intra)
4541             for(i=4; i<mb_block_count; i++)
4542                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4543
4544         if(s->flags & CODEC_FLAG_CBP_RD){
4545             for(i=0;i<mb_block_count;i++) {
4546                 if(s->block_last_index[i] == -1)
4547                     s->coded_score[i]= INT_MAX/256;
4548             }
4549         }
4550     }
4551
4552     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4553         s->block_last_index[4]=
4554         s->block_last_index[5]= 0;
4555         s->block[4][0]=
4556         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4557     }
4558
4559     //non c quantize code returns incorrect block_last_index FIXME
4560     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4561         for(i=0; i<mb_block_count; i++){
4562             int j;
4563             if(s->block_last_index[i]>0){
4564                 for(j=63; j>0; j--){
4565                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4566                 }
4567                 s->block_last_index[i]= j;
4568             }
4569         }
4570     }
4571
4572     /* huffman encode */
4573     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4574     case CODEC_ID_MPEG1VIDEO:
4575     case CODEC_ID_MPEG2VIDEO:
4576         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4577     case CODEC_ID_MPEG4:
4578         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4579     case CODEC_ID_MSMPEG4V2:
4580     case CODEC_ID_MSMPEG4V3:
4581     case CODEC_ID_WMV1:
4582         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4583     case CODEC_ID_WMV2:
4584          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4585 #ifdef CONFIG_H261_ENCODER
4586     case CODEC_ID_H261:
4587         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4588 #endif
4589     case CODEC_ID_H263:
4590     case CODEC_ID_H263P:
4591     case CODEC_ID_FLV1:
4592     case CODEC_ID_RV10:
4593     case CODEC_ID_RV20:
4594         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4595     case CODEC_ID_MJPEG:
4596         mjpeg_encode_mb(s, s->block); break;
4597     default:
4598         assert(0);
4599     }
4600 }
4601
4602 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4603 {
4604     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4605     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4606 }
4607
4608 #endif //CONFIG_ENCODERS
4609
4610 void ff_mpeg_flush(AVCodecContext *avctx){
4611     int i;
4612     MpegEncContext *s = avctx->priv_data;
4613
4614     if(s==NULL || s->picture==NULL)
4615         return;
4616
4617     for(i=0; i<MAX_PICTURE_COUNT; i++){
4618        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4619                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4620         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4621     }
4622     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4623
4624     s->mb_x= s->mb_y= 0;
4625
4626     s->parse_context.state= -1;
4627     s->parse_context.frame_start_found= 0;
4628     s->parse_context.overread= 0;
4629     s->parse_context.overread_index= 0;
4630     s->parse_context.index= 0;
4631     s->parse_context.last_index= 0;
4632     s->bitstream_buffer_size=0;
4633 }
4634
4635 #ifdef CONFIG_ENCODERS
4636 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4637 {
4638     const uint16_t *srcw= (uint16_t*)src;
4639     int words= length>>4;
4640     int bits= length&15;
4641     int i;
4642
4643     if(length==0) return;
4644
4645     if(words < 16){
4646         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4647     }else if(put_bits_count(pb)&7){
4648         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4649     }else{
4650         for(i=0; put_bits_count(pb)&31; i++)
4651             put_bits(pb, 8, src[i]);
4652         flush_put_bits(pb);
4653         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4654         skip_put_bytes(pb, 2*words-i);
4655     }
4656
4657     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4658 }
4659
4660 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4661     int i;
4662
4663     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4664
4665     /* mpeg1 */
4666     d->mb_skip_run= s->mb_skip_run;
4667     for(i=0; i<3; i++)
4668         d->last_dc[i]= s->last_dc[i];
4669
4670     /* statistics */
4671     d->mv_bits= s->mv_bits;
4672     d->i_tex_bits= s->i_tex_bits;
4673     d->p_tex_bits= s->p_tex_bits;
4674     d->i_count= s->i_count;
4675     d->f_count= s->f_count;
4676     d->b_count= s->b_count;
4677     d->skip_count= s->skip_count;
4678     d->misc_bits= s->misc_bits;
4679     d->last_bits= 0;
4680
4681     d->mb_skipped= 0;
4682     d->qscale= s->qscale;
4683     d->dquant= s->dquant;
4684 }
4685
4686 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4687     int i;
4688
4689     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4690     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4691
4692     /* mpeg1 */
4693     d->mb_skip_run= s->mb_skip_run;
4694     for(i=0; i<3; i++)
4695         d->last_dc[i]= s->last_dc[i];
4696
4697     /* statistics */
4698     d->mv_bits= s->mv_bits;
4699     d->i_tex_bits= s->i_tex_bits;
4700     d->p_tex_bits= s->p_tex_bits;
4701     d->i_count= s->i_count;
4702     d->f_count= s->f_count;
4703     d->b_count= s->b_count;
4704     d->skip_count= s->skip_count;
4705     d->misc_bits= s->misc_bits;
4706
4707     d->mb_intra= s->mb_intra;
4708     d->mb_skipped= s->mb_skipped;
4709     d->mv_type= s->mv_type;
4710     d->mv_dir= s->mv_dir;
4711     d->pb= s->pb;
4712     if(s->data_partitioning){
4713         d->pb2= s->pb2;
4714         d->tex_pb= s->tex_pb;
4715     }
4716     d->block= s->block;
4717     for(i=0; i<8; i++)
4718         d->block_last_index[i]= s->block_last_index[i];
4719     d->interlaced_dct= s->interlaced_dct;
4720     d->qscale= s->qscale;
4721 }
4722
4723 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4724                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4725                            int *dmin, int *next_block, int motion_x, int motion_y)
4726 {
4727     int score;
4728     uint8_t *dest_backup[3];
4729
4730     copy_context_before_encode(s, backup, type);
4731
4732     s->block= s->blocks[*next_block];
4733     s->pb= pb[*next_block];
4734     if(s->data_partitioning){
4735         s->pb2   = pb2   [*next_block];
4736         s->tex_pb= tex_pb[*next_block];
4737     }
4738
4739     if(*next_block){
4740         memcpy(dest_backup, s->dest, sizeof(s->dest));
4741         s->dest[0] = s->rd_scratchpad;
4742         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4743         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4744         assert(s->linesize >= 32); //FIXME
4745     }
4746
4747     encode_mb(s, motion_x, motion_y);
4748
4749     score= put_bits_count(&s->pb);
4750     if(s->data_partitioning){
4751         score+= put_bits_count(&s->pb2);
4752         score+= put_bits_count(&s->tex_pb);
4753     }
4754
4755     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4756         MPV_decode_mb(s, s->block);
4757
4758         score *= s->lambda2;
4759         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4760     }
4761
4762     if(*next_block){
4763         memcpy(s->dest, dest_backup, sizeof(s->dest));
4764     }
4765
4766     if(score<*dmin){
4767         *dmin= score;
4768         *next_block^=1;
4769
4770         copy_context_after_encode(best, s, type);
4771     }
4772 }
4773
4774 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4775     uint32_t *sq = ff_squareTbl + 256;
4776     int acc=0;
4777     int x,y;
4778
4779     if(w==16 && h==16)
4780         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4781     else if(w==8 && h==8)
4782         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4783
4784     for(y=0; y<h; y++){
4785         for(x=0; x<w; x++){
4786             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4787         }
4788     }
4789
4790     assert(acc>=0);
4791
4792     return acc;
4793 }
4794
4795 static int sse_mb(MpegEncContext *s){
4796     int w= 16;
4797     int h= 16;
4798
4799     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4800     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4801
4802     if(w==16 && h==16)
4803       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4804         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4805                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4806                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4807       }else{
4808         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4809                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4810                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4811       }
4812     else
4813         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4814                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4815                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4816 }
4817
4818 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4819     MpegEncContext *s= arg;
4820
4821
4822     s->me.pre_pass=1;
4823     s->me.dia_size= s->avctx->pre_dia_size;
4824     s->first_slice_line=1;
4825     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4826         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4827             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4828         }
4829         s->first_slice_line=0;
4830     }
4831
4832     s->me.pre_pass=0;
4833
4834     return 0;
4835 }
4836
4837 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4838     MpegEncContext *s= arg;
4839
4840     ff_check_alignment();
4841
4842     s->me.dia_size= s->avctx->dia_size;
4843     s->first_slice_line=1;
4844     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4845         s->mb_x=0; //for block init below
4846         ff_init_block_index(s);
4847         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4848             s->block_index[0]+=2;
4849             s->block_index[1]+=2;
4850             s->block_index[2]+=2;
4851             s->block_index[3]+=2;
4852
4853             /* compute motion vector & mb_type and store in context */
4854             if(s->pict_type==B_TYPE)
4855                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4856             else
4857                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4858         }
4859         s->first_slice_line=0;
4860     }
4861     return 0;
4862 }
4863
4864 static int mb_var_thread(AVCodecContext *c, void *arg){
4865     MpegEncContext *s= arg;
4866     int mb_x, mb_y;
4867
4868     ff_check_alignment();
4869
4870     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4871         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4872             int xx = mb_x * 16;
4873             int yy = mb_y * 16;
4874             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4875             int varc;
4876             int sum = s->dsp.pix_sum(pix, s->linesize);
4877
4878             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4879
4880             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4881             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4882             s->me.mb_var_sum_temp    += varc;
4883         }
4884     }
4885     return 0;
4886 }
4887
4888 static void write_slice_end(MpegEncContext *s){
4889     if(s->codec_id==CODEC_ID_MPEG4){
4890         if(s->partitioned_frame){
4891             ff_mpeg4_merge_partitions(s);
4892         }
4893
4894         ff_mpeg4_stuffing(&s->pb);
4895     }else if(s->out_format == FMT_MJPEG){
4896         ff_mjpeg_stuffing(&s->pb);
4897     }
4898
4899     align_put_bits(&s->pb);
4900     flush_put_bits(&s->pb);
4901
4902     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4903         s->misc_bits+= get_bits_diff(s);
4904 }
4905
4906 static int encode_thread(AVCodecContext *c, void *arg){
4907     MpegEncContext *s= arg;
4908     int mb_x, mb_y, pdif = 0;
4909     int i, j;
4910     MpegEncContext best_s, backup_s;
4911     uint8_t bit_buf[2][MAX_MB_BYTES];
4912     uint8_t bit_buf2[2][MAX_MB_BYTES];
4913     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4914     PutBitContext pb[2], pb2[2], tex_pb[2];
4915 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4916
4917     ff_check_alignment();
4918
4919     for(i=0; i<2; i++){
4920         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4921         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4922         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4923     }
4924
4925     s->last_bits= put_bits_count(&s->pb);
4926     s->mv_bits=0;
4927     s->misc_bits=0;
4928     s->i_tex_bits=0;
4929     s->p_tex_bits=0;
4930     s->i_count=0;
4931     s->f_count=0;
4932     s->b_count=0;
4933     s->skip_count=0;
4934
4935     for(i=0; i<3; i++){
4936         /* init last dc values */
4937         /* note: quant matrix value (8) is implied here */
4938         s->last_dc[i] = 128 << s->intra_dc_precision;
4939
4940         s->current_picture.error[i] = 0;
4941     }
4942     s->mb_skip_run = 0;
4943     memset(s->last_mv, 0, sizeof(s->last_mv));
4944
4945     s->last_mv_dir = 0;
4946
4947     switch(s->codec_id){
4948     case CODEC_ID_H263:
4949     case CODEC_ID_H263P:
4950     case CODEC_ID_FLV1:
4951         s->gob_index = ff_h263_get_gob_height(s);
4952         break;
4953     case CODEC_ID_MPEG4:
4954         if(s->partitioned_frame)
4955             ff_mpeg4_init_partitions(s);
4956         break;
4957     }
4958
4959     s->resync_mb_x=0;
4960     s->resync_mb_y=0;
4961     s->first_slice_line = 1;
4962     s->ptr_lastgob = s->pb.buf;
4963     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4964 //    printf("row %d at %X\n", s->mb_y, (int)s);
4965         s->mb_x=0;
4966         s->mb_y= mb_y;
4967
4968         ff_set_qscale(s, s->qscale);
4969         ff_init_block_index(s);
4970
4971         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4972             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4973             int mb_type= s->mb_type[xy];
4974 //            int d;
4975             int dmin= INT_MAX;
4976             int dir;
4977
4978             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4979                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4980                 return -1;
4981             }
4982             if(s->data_partitioning){
4983                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4984                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4985                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4986                     return -1;
4987                 }
4988             }
4989
4990             s->mb_x = mb_x;
4991             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4992             ff_update_block_index(s);
4993
4994 #ifdef CONFIG_H261_ENCODER
4995             if(s->codec_id == CODEC_ID_H261){
4996                 ff_h261_reorder_mb_index(s);
4997                 xy= s->mb_y*s->mb_stride + s->mb_x;
4998                 mb_type= s->mb_type[xy];
4999             }
5000 #endif
5001
5002             /* write gob / video packet header  */
5003             if(s->rtp_mode){
5004                 int current_packet_size, is_gob_start;
5005
5006                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
5007
5008                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
5009
5010                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
5011
5012                 switch(s->codec_id){
5013                 case CODEC_ID_H263:
5014                 case CODEC_ID_H263P:
5015                     if(!s->h263_slice_structured)
5016                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
5017                     break;
5018                 case CODEC_ID_MPEG2VIDEO:
5019                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
5020                 case CODEC_ID_MPEG1VIDEO:
5021                     if(s->mb_skip_run) is_gob_start=0;
5022                     break;
5023                 }
5024
5025                 if(is_gob_start){
5026                     if(s->start_mb_y != mb_y || mb_x!=0){
5027                         write_slice_end(s);
5028
5029                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
5030                             ff_mpeg4_init_partitions(s);
5031                         }
5032                     }
5033
5034                     assert((put_bits_count(&s->pb)&7) == 0);
5035                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
5036
5037                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
5038                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
5039                         int d= 100 / s->avctx->error_rate;
5040                         if(r % d == 0){
5041                             current_packet_size=0;
5042 #ifndef ALT_BITSTREAM_WRITER
5043                             s->pb.buf_ptr= s->ptr_lastgob;
5044 #endif
5045                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
5046                         }
5047                     }
5048
5049                     if (s->avctx->rtp_callback){
5050                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
5051                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
5052                     }
5053
5054                     switch(s->codec_id){
5055                     case CODEC_ID_MPEG4:
5056                         ff_mpeg4_encode_video_packet_header(s);
5057                         ff_mpeg4_clean_buffers(s);
5058                     break;
5059                     case CODEC_ID_MPEG1VIDEO:
5060                     case CODEC_ID_MPEG2VIDEO:
5061                         ff_mpeg1_encode_slice_header(s);
5062                         ff_mpeg1_clean_buffers(s);
5063                     break;
5064                     case CODEC_ID_H263:
5065                     case CODEC_ID_H263P:
5066                         h263_encode_gob_header(s, mb_y);
5067                     break;
5068                     }
5069
5070                     if(s->flags&CODEC_FLAG_PASS1){
5071                         int bits= put_bits_count(&s->pb);
5072                         s->misc_bits+= bits - s->last_bits;
5073                         s->last_bits= bits;
5074                     }
5075
5076                     s->ptr_lastgob += current_packet_size;
5077                     s->first_slice_line=1;
5078                     s->resync_mb_x=mb_x;
5079                     s->resync_mb_y=mb_y;
5080                 }
5081             }
5082
5083             if(  (s->resync_mb_x   == s->mb_x)
5084                && s->resync_mb_y+1 == s->mb_y){
5085                 s->first_slice_line=0;
5086             }
5087
5088             s->mb_skipped=0;
5089             s->dquant=0; //only for QP_RD
5090
5091             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5092                 int next_block=0;
5093                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5094
5095                 copy_context_before_encode(&backup_s, s, -1);
5096                 backup_s.pb= s->pb;
5097                 best_s.data_partitioning= s->data_partitioning;
5098                 best_s.partitioned_frame= s->partitioned_frame;
5099                 if(s->data_partitioning){
5100                     backup_s.pb2= s->pb2;
5101                     backup_s.tex_pb= s->tex_pb;
5102                 }
5103
5104                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5105                     s->mv_dir = MV_DIR_FORWARD;
5106                     s->mv_type = MV_TYPE_16X16;
5107                     s->mb_intra= 0;
5108                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5109                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5110                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5111                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5112                 }
5113                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5114                     s->mv_dir = MV_DIR_FORWARD;
5115                     s->mv_type = MV_TYPE_FIELD;
5116                     s->mb_intra= 0;
5117                     for(i=0; i<2; i++){
5118                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5119                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5120                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5121                     }
5122                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5123                                  &dmin, &next_block, 0, 0);
5124                 }
5125                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5126                     s->mv_dir = MV_DIR_FORWARD;
5127                     s->mv_type = MV_TYPE_16X16;
5128                     s->mb_intra= 0;
5129                     s->mv[0][0][0] = 0;
5130                     s->mv[0][0][1] = 0;
5131                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5132                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5133                 }
5134                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5135                     s->mv_dir = MV_DIR_FORWARD;
5136                     s->mv_type = MV_TYPE_8X8;
5137                     s->mb_intra= 0;
5138                     for(i=0; i<4; i++){
5139                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5140                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5141                     }
5142                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5143                                  &dmin, &next_block, 0, 0);
5144                 }
5145                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5146                     s->mv_dir = MV_DIR_FORWARD;
5147                     s->mv_type = MV_TYPE_16X16;
5148                     s->mb_intra= 0;
5149                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5150                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5151                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5152                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5153                 }
5154                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5155                     s->mv_dir = MV_DIR_BACKWARD;
5156                     s->mv_type = MV_TYPE_16X16;
5157                     s->mb_intra= 0;
5158                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5159                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5160                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5161                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5162                 }
5163                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5164                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5165                     s->mv_type = MV_TYPE_16X16;
5166                     s->mb_intra= 0;
5167                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5168                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5169                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5170                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5171                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5172                                  &dmin, &next_block, 0, 0);
5173                 }
5174                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5175                     s->mv_dir = MV_DIR_FORWARD;
5176                     s->mv_type = MV_TYPE_FIELD;
5177                     s->mb_intra= 0;
5178                     for(i=0; i<2; i++){
5179                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5180                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5181                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5182                     }
5183                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5184                                  &dmin, &next_block, 0, 0);
5185                 }
5186                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5187                     s->mv_dir = MV_DIR_BACKWARD;
5188                     s->mv_type = MV_TYPE_FIELD;
5189                     s->mb_intra= 0;
5190                     for(i=0; i<2; i++){
5191                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5192                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5193                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5194                     }
5195                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5196                                  &dmin, &next_block, 0, 0);
5197                 }
5198                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5199                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5200                     s->mv_type = MV_TYPE_FIELD;
5201                     s->mb_intra= 0;
5202                     for(dir=0; dir<2; dir++){
5203                         for(i=0; i<2; i++){
5204                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5205                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5206                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5207                         }
5208                     }
5209                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5210                                  &dmin, &next_block, 0, 0);
5211                 }
5212                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5213                     s->mv_dir = 0;
5214                     s->mv_type = MV_TYPE_16X16;
5215                     s->mb_intra= 1;
5216                     s->mv[0][0][0] = 0;
5217                     s->mv[0][0][1] = 0;
5218                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5219                                  &dmin, &next_block, 0, 0);
5220                     if(s->h263_pred || s->h263_aic){
5221                         if(best_s.mb_intra)
5222                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5223                         else
5224                             ff_clean_intra_table_entries(s); //old mode?
5225                     }
5226                 }
5227
5228                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
5229                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
5230                         const int last_qp= backup_s.qscale;
5231                         int qpi, qp, dc[6];
5232                         DCTELEM ac[6][16];
5233                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5234                         static const int dquant_tab[4]={-1,1,-2,2};
5235
5236                         assert(backup_s.dquant == 0);
5237
5238                         //FIXME intra
5239                         s->mv_dir= best_s.mv_dir;
5240                         s->mv_type = MV_TYPE_16X16;
5241                         s->mb_intra= best_s.mb_intra;
5242                         s->mv[0][0][0] = best_s.mv[0][0][0];
5243                         s->mv[0][0][1] = best_s.mv[0][0][1];
5244                         s->mv[1][0][0] = best_s.mv[1][0][0];
5245                         s->mv[1][0][1] = best_s.mv[1][0][1];
5246
5247                         qpi = s->pict_type == B_TYPE ? 2 : 0;
5248                         for(; qpi<4; qpi++){
5249                             int dquant= dquant_tab[qpi];
5250                             qp= last_qp + dquant;
5251                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5252                                 continue;
5253                             backup_s.dquant= dquant;
5254                             if(s->mb_intra && s->dc_val[0]){
5255                                 for(i=0; i<6; i++){
5256                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5257                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5258                                 }
5259                             }
5260
5261                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5262                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5263                             if(best_s.qscale != qp){
5264                                 if(s->mb_intra && s->dc_val[0]){
5265                                     for(i=0; i<6; i++){
5266                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5267                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5268                                     }
5269                                 }
5270                             }
5271                         }
5272                     }
5273                 }
5274                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5275                     int mx= s->b_direct_mv_table[xy][0];
5276                     int my= s->b_direct_mv_table[xy][1];
5277
5278                     backup_s.dquant = 0;
5279                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5280                     s->mb_intra= 0;
5281                     ff_mpeg4_set_direct_mv(s, mx, my);
5282                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5283                                  &dmin, &next_block, mx, my);
5284                 }
5285                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT0){
5286                     backup_s.dquant = 0;
5287                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5288                     s->mb_intra= 0;
5289                     ff_mpeg4_set_direct_mv(s, 0, 0);
5290                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5291                                  &dmin, &next_block, 0, 0);
5292                 }
5293                 s->current_picture.qscale_table[xy]= best_s.qscale;
5294
5295                 copy_context_after_encode(s, &best_s, -1);
5296
5297                 pb_bits_count= put_bits_count(&s->pb);
5298                 flush_put_bits(&s->pb);
5299                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5300                 s->pb= backup_s.pb;
5301
5302                 if(s->data_partitioning){
5303                     pb2_bits_count= put_bits_count(&s->pb2);
5304                     flush_put_bits(&s->pb2);
5305                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5306                     s->pb2= backup_s.pb2;
5307
5308                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5309                     flush_put_bits(&s->tex_pb);
5310                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5311                     s->tex_pb= backup_s.tex_pb;
5312                 }
5313                 s->last_bits= put_bits_count(&s->pb);
5314
5315                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5316                     ff_h263_update_motion_val(s);
5317
5318                 if(next_block==0){ //FIXME 16 vs linesize16
5319                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5320                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5321                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5322                 }
5323
5324                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5325                     MPV_decode_mb(s, s->block);
5326             } else {
5327                 int motion_x, motion_y;
5328                 s->mv_type=MV_TYPE_16X16;
5329                 // only one MB-Type possible
5330
5331                 switch(mb_type){
5332                 case CANDIDATE_MB_TYPE_INTRA:
5333                     s->mv_dir = 0;
5334                     s->mb_intra= 1;
5335                     motion_x= s->mv[0][0][0] = 0;
5336                     motion_y= s->mv[0][0][1] = 0;
5337                     break;
5338                 case CANDIDATE_MB_TYPE_INTER:
5339                     s->mv_dir = MV_DIR_FORWARD;
5340                     s->mb_intra= 0;
5341                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5342                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5343                     break;
5344                 case CANDIDATE_MB_TYPE_INTER_I:
5345                     s->mv_dir = MV_DIR_FORWARD;
5346                     s->mv_type = MV_TYPE_FIELD;
5347                     s->mb_intra= 0;
5348                     for(i=0; i<2; i++){
5349                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5350                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5351                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5352                     }
5353                     motion_x = motion_y = 0;
5354                     break;
5355                 case CANDIDATE_MB_TYPE_INTER4V:
5356                     s->mv_dir = MV_DIR_FORWARD;
5357                     s->mv_type = MV_TYPE_8X8;
5358                     s->mb_intra= 0;
5359                     for(i=0; i<4; i++){
5360                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5361                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5362                     }
5363                     motion_x= motion_y= 0;
5364                     break;
5365                 case CANDIDATE_MB_TYPE_DIRECT:
5366                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5367                     s->mb_intra= 0;
5368                     motion_x=s->b_direct_mv_table[xy][0];
5369                     motion_y=s->b_direct_mv_table[xy][1];
5370                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5371                     break;
5372                 case CANDIDATE_MB_TYPE_DIRECT0:
5373                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5374                     s->mb_intra= 0;
5375                     ff_mpeg4_set_direct_mv(s, 0, 0);
5376                     break;
5377                 case CANDIDATE_MB_TYPE_BIDIR:
5378                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5379                     s->mb_intra= 0;
5380                     motion_x=0;
5381                     motion_y=0;
5382                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5383                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5384                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5385                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5386                     break;
5387                 case CANDIDATE_MB_TYPE_BACKWARD:
5388                     s->mv_dir = MV_DIR_BACKWARD;
5389                     s->mb_intra= 0;
5390                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5391                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5392                     break;
5393                 case CANDIDATE_MB_TYPE_FORWARD:
5394                     s->mv_dir = MV_DIR_FORWARD;
5395                     s->mb_intra= 0;
5396                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5397                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5398 //                    printf(" %d %d ", motion_x, motion_y);
5399                     break;
5400                 case CANDIDATE_MB_TYPE_FORWARD_I:
5401                     s->mv_dir = MV_DIR_FORWARD;
5402                     s->mv_type = MV_TYPE_FIELD;
5403                     s->mb_intra= 0;
5404                     for(i=0; i<2; i++){
5405                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5406                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5407                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5408                     }
5409                     motion_x=motion_y=0;
5410                     break;
5411                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5412                     s->mv_dir = MV_DIR_BACKWARD;
5413                     s->mv_type = MV_TYPE_FIELD;
5414                     s->mb_intra= 0;
5415                     for(i=0; i<2; i++){
5416                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5417                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5418                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5419                     }
5420                     motion_x=motion_y=0;
5421                     break;
5422                 case CANDIDATE_MB_TYPE_BIDIR_I:
5423                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5424                     s->mv_type = MV_TYPE_FIELD;
5425                     s->mb_intra= 0;
5426                     for(dir=0; dir<2; dir++){
5427                         for(i=0; i<2; i++){
5428                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5429                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5430                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5431                         }
5432                     }
5433                     motion_x=motion_y=0;
5434                     break;
5435                 default:
5436                     motion_x=motion_y=0; //gcc warning fix
5437                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5438                 }
5439
5440                 encode_mb(s, motion_x, motion_y);
5441
5442                 // RAL: Update last macroblock type
5443                 s->last_mv_dir = s->mv_dir;
5444
5445                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5446                     ff_h263_update_motion_val(s);
5447
5448                 MPV_decode_mb(s, s->block);
5449             }
5450
5451             /* clean the MV table in IPS frames for direct mode in B frames */
5452             if(s->mb_intra /* && I,P,S_TYPE */){
5453                 s->p_mv_table[xy][0]=0;
5454                 s->p_mv_table[xy][1]=0;
5455             }
5456
5457             if(s->flags&CODEC_FLAG_PSNR){
5458                 int w= 16;
5459                 int h= 16;
5460
5461                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5462                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5463
5464                 s->current_picture.error[0] += sse(
5465                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5466                     s->dest[0], w, h, s->linesize);
5467                 s->current_picture.error[1] += sse(
5468                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5469                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5470                 s->current_picture.error[2] += sse(
5471                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5472                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5473             }
5474             if(s->loop_filter){
5475                 if(s->out_format == FMT_H263)
5476                     ff_h263_loop_filter(s);
5477             }
5478 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5479         }
5480     }
5481
5482     //not beautiful here but we must write it before flushing so it has to be here
5483     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5484         msmpeg4_encode_ext_header(s);
5485
5486     write_slice_end(s);
5487
5488     /* Send the last GOB if RTP */
5489     if (s->avctx->rtp_callback) {
5490         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5491         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5492         /* Call the RTP callback to send the last GOB */
5493         emms_c();
5494         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5495     }
5496
5497     return 0;
5498 }
5499
5500 #define MERGE(field) dst->field += src->field; src->field=0
5501 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5502     MERGE(me.scene_change_score);
5503     MERGE(me.mc_mb_var_sum_temp);
5504     MERGE(me.mb_var_sum_temp);
5505 }
5506
5507 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5508     int i;
5509
5510     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5511     MERGE(dct_count[1]);
5512     MERGE(mv_bits);
5513     MERGE(i_tex_bits);
5514     MERGE(p_tex_bits);
5515     MERGE(i_count);
5516     MERGE(f_count);
5517     MERGE(b_count);
5518     MERGE(skip_count);
5519     MERGE(misc_bits);
5520     MERGE(error_count);
5521     MERGE(padding_bug_score);
5522     MERGE(current_picture.error[0]);
5523     MERGE(current_picture.error[1]);
5524     MERGE(current_picture.error[2]);
5525
5526     if(dst->avctx->noise_reduction){
5527         for(i=0; i<64; i++){
5528             MERGE(dct_error_sum[0][i]);
5529             MERGE(dct_error_sum[1][i]);
5530         }
5531     }
5532
5533     assert(put_bits_count(&src->pb) % 8 ==0);
5534     assert(put_bits_count(&dst->pb) % 8 ==0);
5535     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5536     flush_put_bits(&dst->pb);
5537 }
5538
5539 static int estimate_qp(MpegEncContext *s, int dry_run){
5540     if (s->next_lambda){
5541         s->current_picture_ptr->quality=
5542         s->current_picture.quality = s->next_lambda;
5543         if(!dry_run) s->next_lambda= 0;
5544     } else if (!s->fixed_qscale) {
5545         s->current_picture_ptr->quality=
5546         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5547         if (s->current_picture.quality < 0)
5548             return -1;
5549     }
5550
5551     if(s->adaptive_quant){
5552         switch(s->codec_id){
5553         case CODEC_ID_MPEG4:
5554             ff_clean_mpeg4_qscales(s);
5555             break;
5556         case CODEC_ID_H263:
5557         case CODEC_ID_H263P:
5558         case CODEC_ID_FLV1:
5559             ff_clean_h263_qscales(s);
5560             break;
5561         }
5562
5563         s->lambda= s->lambda_table[0];
5564         //FIXME broken
5565     }else
5566         s->lambda= s->current_picture.quality;
5567 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5568     update_qscale(s);
5569     return 0;
5570 }
5571
5572 static int encode_picture(MpegEncContext *s, int picture_number)
5573 {
5574     int i;
5575     int bits;
5576
5577     s->picture_number = picture_number;
5578
5579     /* Reset the average MB variance */
5580     s->me.mb_var_sum_temp    =
5581     s->me.mc_mb_var_sum_temp = 0;
5582
5583     /* we need to initialize some time vars before we can encode b-frames */
5584     // RAL: Condition added for MPEG1VIDEO
5585     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5586         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5587
5588     s->me.scene_change_score=0;
5589
5590 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5591
5592     if(s->pict_type==I_TYPE){
5593         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5594         else                        s->no_rounding=0;
5595     }else if(s->pict_type!=B_TYPE){
5596         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5597             s->no_rounding ^= 1;
5598     }
5599
5600     if(s->flags & CODEC_FLAG_PASS2){
5601         if (estimate_qp(s,1) < 0)
5602             return -1;
5603         ff_get_2pass_fcode(s);
5604     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5605         if(s->pict_type==B_TYPE)
5606             s->lambda= s->last_lambda_for[s->pict_type];
5607         else
5608             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5609         update_qscale(s);
5610     }
5611
5612     s->mb_intra=0; //for the rate distortion & bit compare functions
5613     for(i=1; i<s->avctx->thread_count; i++){
5614         ff_update_duplicate_context(s->thread_context[i], s);
5615     }
5616
5617     ff_init_me(s);
5618
5619     /* Estimate motion for every MB */
5620     if(s->pict_type != I_TYPE){
5621         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5622         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
5623         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5624             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5625                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5626             }
5627         }
5628
5629         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5630     }else /* if(s->pict_type == I_TYPE) */{
5631         /* I-Frame */
5632         for(i=0; i<s->mb_stride*s->mb_height; i++)
5633             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5634
5635         if(!s->fixed_qscale){
5636             /* finding spatial complexity for I-frame rate control */
5637             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5638         }
5639     }
5640     for(i=1; i<s->avctx->thread_count; i++){
5641         merge_context_after_me(s, s->thread_context[i]);
5642     }
5643     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5644     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5645     emms_c();
5646
5647     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5648         s->pict_type= I_TYPE;
5649         for(i=0; i<s->mb_stride*s->mb_height; i++)
5650             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5651 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5652     }
5653
5654     if(!s->umvplus){
5655         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5656             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5657
5658             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5659                 int a,b;
5660                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5661                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5662                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5663             }
5664
5665             ff_fix_long_p_mvs(s);
5666             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5667             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5668                 int j;
5669                 for(i=0; i<2; i++){
5670                     for(j=0; j<2; j++)
5671                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5672                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5673                 }
5674             }
5675         }
5676
5677         if(s->pict_type==B_TYPE){
5678             int a, b;
5679
5680             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5681             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5682             s->f_code = FFMAX(a, b);
5683
5684             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5685             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5686             s->b_code = FFMAX(a, b);
5687
5688             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5689             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5690             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5691             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5692             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5693                 int dir, j;
5694                 for(dir=0; dir<2; dir++){
5695                     for(i=0; i<2; i++){
5696                         for(j=0; j<2; j++){
5697                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5698                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5699                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5700                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5701                         }
5702                     }
5703                 }
5704             }
5705         }
5706     }
5707
5708     if (estimate_qp(s, 0) < 0)
5709         return -1;
5710
5711     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5712         s->qscale= 3; //reduce clipping problems
5713
5714     if (s->out_format == FMT_MJPEG) {
5715         /* for mjpeg, we do include qscale in the matrix */
5716         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5717         for(i=1;i<64;i++){
5718             int j= s->dsp.idct_permutation[i];
5719
5720             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5721         }
5722         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5723                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5724         s->qscale= 8;
5725     }
5726
5727     //FIXME var duplication
5728     s->current_picture_ptr->key_frame=
5729     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5730     s->current_picture_ptr->pict_type=
5731     s->current_picture.pict_type= s->pict_type;
5732
5733     if(s->current_picture.key_frame)
5734         s->picture_in_gop_number=0;
5735
5736     s->last_bits= put_bits_count(&s->pb);
5737     switch(s->out_format) {
5738     case FMT_MJPEG:
5739         mjpeg_picture_header(s);
5740         break;
5741 #ifdef CONFIG_H261_ENCODER
5742     case FMT_H261:
5743         ff_h261_encode_picture_header(s, picture_number);
5744         break;
5745 #endif
5746     case FMT_H263:
5747         if (s->codec_id == CODEC_ID_WMV2)
5748             ff_wmv2_encode_picture_header(s, picture_number);
5749         else if (s->h263_msmpeg4)
5750             msmpeg4_encode_picture_header(s, picture_number);
5751         else if (s->h263_pred)
5752             mpeg4_encode_picture_header(s, picture_number);
5753 #ifdef CONFIG_RV10_ENCODER
5754         else if (s->codec_id == CODEC_ID_RV10)
5755             rv10_encode_picture_header(s, picture_number);
5756 #endif
5757 #ifdef CONFIG_RV20_ENCODER
5758         else if (s->codec_id == CODEC_ID_RV20)
5759             rv20_encode_picture_header(s, picture_number);
5760 #endif
5761         else if (s->codec_id == CODEC_ID_FLV1)
5762             ff_flv_encode_picture_header(s, picture_number);
5763         else
5764             h263_encode_picture_header(s, picture_number);
5765         break;
5766     case FMT_MPEG1:
5767         mpeg1_encode_picture_header(s, picture_number);
5768         break;
5769     case FMT_H264:
5770         break;
5771     default:
5772         assert(0);
5773     }
5774     bits= put_bits_count(&s->pb);
5775     s->header_bits= bits - s->last_bits;
5776
5777     for(i=1; i<s->avctx->thread_count; i++){
5778         update_duplicate_context_after_me(s->thread_context[i], s);
5779     }
5780     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5781     for(i=1; i<s->avctx->thread_count; i++){
5782         merge_context_after_encode(s, s->thread_context[i]);
5783     }
5784     emms_c();
5785     return 0;
5786 }
5787
5788 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5789     const int intra= s->mb_intra;
5790     int i;
5791
5792     s->dct_count[intra]++;
5793
5794     for(i=0; i<64; i++){
5795         int level= block[i];
5796
5797         if(level){
5798             if(level>0){
5799                 s->dct_error_sum[intra][i] += level;
5800                 level -= s->dct_offset[intra][i];
5801                 if(level<0) level=0;
5802             }else{
5803                 s->dct_error_sum[intra][i] -= level;
5804                 level += s->dct_offset[intra][i];
5805                 if(level>0) level=0;
5806             }
5807             block[i]= level;
5808         }
5809     }
5810 }
5811
5812 static int dct_quantize_trellis_c(MpegEncContext *s,
5813                         DCTELEM *block, int n,
5814                         int qscale, int *overflow){
5815     const int *qmat;
5816     const uint8_t *scantable= s->intra_scantable.scantable;
5817     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5818     int max=0;
5819     unsigned int threshold1, threshold2;
5820     int bias=0;
5821     int run_tab[65];
5822     int level_tab[65];
5823     int score_tab[65];
5824     int survivor[65];
5825     int survivor_count;
5826     int last_run=0;
5827     int last_level=0;
5828     int last_score= 0;
5829     int last_i;
5830     int coeff[2][64];
5831     int coeff_count[64];
5832     int qmul, qadd, start_i, last_non_zero, i, dc;
5833     const int esc_length= s->ac_esc_length;
5834     uint8_t * length;
5835     uint8_t * last_length;
5836     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5837
5838     s->dsp.fdct (block);
5839
5840     if(s->dct_error_sum)
5841         s->denoise_dct(s, block);
5842     qmul= qscale*16;
5843     qadd= ((qscale-1)|1)*8;
5844
5845     if (s->mb_intra) {
5846         int q;
5847         if (!s->h263_aic) {
5848             if (n < 4)
5849                 q = s->y_dc_scale;
5850             else
5851                 q = s->c_dc_scale;
5852             q = q << 3;
5853         } else{
5854             /* For AIC we skip quant/dequant of INTRADC */
5855             q = 1 << 3;
5856             qadd=0;
5857         }
5858
5859         /* note: block[0] is assumed to be positive */
5860         block[0] = (block[0] + (q >> 1)) / q;
5861         start_i = 1;
5862         last_non_zero = 0;
5863         qmat = s->q_intra_matrix[qscale];
5864         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5865             bias= 1<<(QMAT_SHIFT-1);
5866         length     = s->intra_ac_vlc_length;
5867         last_length= s->intra_ac_vlc_last_length;
5868     } else {
5869         start_i = 0;
5870         last_non_zero = -1;
5871         qmat = s->q_inter_matrix[qscale];
5872         length     = s->inter_ac_vlc_length;
5873         last_length= s->inter_ac_vlc_last_length;
5874     }
5875     last_i= start_i;
5876
5877     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5878     threshold2= (threshold1<<1);
5879
5880     for(i=63; i>=start_i; i--) {
5881         const int j = scantable[i];
5882         int level = block[j] * qmat[j];
5883
5884         if(((unsigned)(level+threshold1))>threshold2){
5885             last_non_zero = i;
5886             break;
5887         }
5888     }
5889
5890     for(i=start_i; i<=last_non_zero; i++) {
5891         const int j = scantable[i];
5892         int level = block[j] * qmat[j];
5893
5894 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5895 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5896         if(((unsigned)(level+threshold1))>threshold2){
5897             if(level>0){
5898                 level= (bias + level)>>QMAT_SHIFT;
5899                 coeff[0][i]= level;
5900                 coeff[1][i]= level-1;
5901 //                coeff[2][k]= level-2;
5902             }else{
5903                 level= (bias - level)>>QMAT_SHIFT;
5904                 coeff[0][i]= -level;
5905                 coeff[1][i]= -level+1;
5906 //                coeff[2][k]= -level+2;
5907             }
5908             coeff_count[i]= FFMIN(level, 2);
5909             assert(coeff_count[i]);
5910             max |=level;
5911         }else{
5912             coeff[0][i]= (level>>31)|1;
5913             coeff_count[i]= 1;
5914         }
5915     }
5916
5917     *overflow= s->max_qcoeff < max; //overflow might have happened
5918
5919     if(last_non_zero < start_i){
5920         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5921         return last_non_zero;
5922     }
5923
5924     score_tab[start_i]= 0;
5925     survivor[0]= start_i;
5926     survivor_count= 1;
5927
5928     for(i=start_i; i<=last_non_zero; i++){
5929         int level_index, j;
5930         const int dct_coeff= FFABS(block[ scantable[i] ]);
5931         const int zero_distoration= dct_coeff*dct_coeff;
5932         int best_score=256*256*256*120;
5933         for(level_index=0; level_index < coeff_count[i]; level_index++){
5934             int distoration;
5935             int level= coeff[level_index][i];
5936             const int alevel= FFABS(level);
5937             int unquant_coeff;
5938
5939             assert(level);
5940
5941             if(s->out_format == FMT_H263){
5942                 unquant_coeff= alevel*qmul + qadd;
5943             }else{ //MPEG1
5944                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5945                 if(s->mb_intra){
5946                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5947                         unquant_coeff =   (unquant_coeff - 1) | 1;
5948                 }else{
5949                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5950                         unquant_coeff =   (unquant_coeff - 1) | 1;
5951                 }
5952                 unquant_coeff<<= 3;
5953             }
5954
5955             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5956             level+=64;
5957             if((level&(~127)) == 0){
5958                 for(j=survivor_count-1; j>=0; j--){
5959                     int run= i - survivor[j];
5960                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5961                     score += score_tab[i-run];
5962
5963                     if(score < best_score){
5964                         best_score= score;
5965                         run_tab[i+1]= run;
5966                         level_tab[i+1]= level-64;
5967                     }
5968                 }
5969
5970                 if(s->out_format == FMT_H263){
5971                     for(j=survivor_count-1; j>=0; j--){
5972                         int run= i - survivor[j];
5973                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5974                         score += score_tab[i-run];
5975                         if(score < last_score){
5976                             last_score= score;
5977                             last_run= run;
5978                             last_level= level-64;
5979                             last_i= i+1;
5980                         }
5981                     }
5982                 }
5983             }else{
5984                 distoration += esc_length*lambda;
5985                 for(j=survivor_count-1; j>=0; j--){
5986                     int run= i - survivor[j];
5987                     int score= distoration + score_tab[i-run];
5988
5989                     if(score < best_score){
5990                         best_score= score;
5991                         run_tab[i+1]= run;
5992                         level_tab[i+1]= level-64;
5993                     }
5994                 }
5995
5996                 if(s->out_format == FMT_H263){
5997                   for(j=survivor_count-1; j>=0; j--){
5998                         int run= i - survivor[j];
5999                         int score= distoration + score_tab[i-run];
6000                         if(score < last_score){
6001                             last_score= score;
6002                             last_run= run;
6003                             last_level= level-64;
6004                             last_i= i+1;
6005                         }
6006                     }
6007                 }
6008             }
6009         }
6010
6011         score_tab[i+1]= best_score;
6012
6013         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
6014         if(last_non_zero <= 27){
6015             for(; survivor_count; survivor_count--){
6016                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
6017                     break;
6018             }
6019         }else{
6020             for(; survivor_count; survivor_count--){
6021                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
6022                     break;
6023             }
6024         }
6025
6026         survivor[ survivor_count++ ]= i+1;
6027     }
6028
6029     if(s->out_format != FMT_H263){
6030         last_score= 256*256*256*120;
6031         for(i= survivor[0]; i<=last_non_zero + 1; i++){
6032             int score= score_tab[i];
6033             if(i) score += lambda*2; //FIXME exacter?
6034
6035             if(score < last_score){
6036                 last_score= score;
6037                 last_i= i;
6038                 last_level= level_tab[i];
6039                 last_run= run_tab[i];
6040             }
6041         }
6042     }
6043
6044     s->coded_score[n] = last_score;
6045
6046     dc= FFABS(block[0]);
6047     last_non_zero= last_i - 1;
6048     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
6049
6050     if(last_non_zero < start_i)
6051         return last_non_zero;
6052
6053     if(last_non_zero == 0 && start_i == 0){
6054         int best_level= 0;
6055         int best_score= dc * dc;
6056
6057         for(i=0; i<coeff_count[0]; i++){
6058             int level= coeff[i][0];
6059             int alevel= FFABS(level);
6060             int unquant_coeff, score, distortion;
6061
6062             if(s->out_format == FMT_H263){
6063                     unquant_coeff= (alevel*qmul + qadd)>>3;
6064             }else{ //MPEG1
6065                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
6066                     unquant_coeff =   (unquant_coeff - 1) | 1;
6067             }
6068             unquant_coeff = (unquant_coeff + 4) >> 3;
6069             unquant_coeff<<= 3 + 3;
6070
6071             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
6072             level+=64;
6073             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
6074             else                    score= distortion + esc_length*lambda;
6075
6076             if(score < best_score){
6077                 best_score= score;
6078                 best_level= level - 64;
6079             }
6080         }
6081         block[0]= best_level;
6082         s->coded_score[n] = best_score - dc*dc;
6083         if(best_level == 0) return -1;
6084         else                return last_non_zero;
6085     }
6086
6087     i= last_i;
6088     assert(last_level);
6089
6090     block[ perm_scantable[last_non_zero] ]= last_level;
6091     i -= last_run + 1;
6092
6093     for(; i>start_i; i -= run_tab[i] + 1){
6094         block[ perm_scantable[i-1] ]= level_tab[i];
6095     }
6096
6097     return last_non_zero;
6098 }
6099
6100 //#define REFINE_STATS 1
6101 static int16_t basis[64][64];
6102
6103 static void build_basis(uint8_t *perm){
6104     int i, j, x, y;
6105     emms_c();
6106     for(i=0; i<8; i++){
6107         for(j=0; j<8; j++){
6108             for(y=0; y<8; y++){
6109                 for(x=0; x<8; x++){
6110                     double s= 0.25*(1<<BASIS_SHIFT);
6111                     int index= 8*i + j;
6112                     int perm_index= perm[index];
6113                     if(i==0) s*= sqrt(0.5);
6114                     if(j==0) s*= sqrt(0.5);
6115                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6116                 }
6117             }
6118         }
6119     }
6120 }
6121
6122 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6123                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6124                         int n, int qscale){
6125     int16_t rem[64];
6126     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6127     const int *qmat;
6128     const uint8_t *scantable= s->intra_scantable.scantable;
6129     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6130 //    unsigned int threshold1, threshold2;
6131 //    int bias=0;
6132     int run_tab[65];
6133     int prev_run=0;
6134     int prev_level=0;
6135     int qmul, qadd, start_i, last_non_zero, i, dc;
6136     uint8_t * length;
6137     uint8_t * last_length;
6138     int lambda;
6139     int rle_index, run, q, sum;
6140 #ifdef REFINE_STATS
6141 static int count=0;
6142 static int after_last=0;
6143 static int to_zero=0;
6144 static int from_zero=0;
6145 static int raise=0;
6146 static int lower=0;
6147 static int messed_sign=0;
6148 #endif
6149
6150     if(basis[0][0] == 0)
6151         build_basis(s->dsp.idct_permutation);
6152
6153     qmul= qscale*2;
6154     qadd= (qscale-1)|1;
6155     if (s->mb_intra) {
6156         if (!s->h263_aic) {
6157             if (n < 4)
6158                 q = s->y_dc_scale;
6159             else
6160                 q = s->c_dc_scale;
6161         } else{
6162             /* For AIC we skip quant/dequant of INTRADC */
6163             q = 1;
6164             qadd=0;
6165         }
6166         q <<= RECON_SHIFT-3;
6167         /* note: block[0] is assumed to be positive */
6168         dc= block[0]*q;
6169 //        block[0] = (block[0] + (q >> 1)) / q;
6170         start_i = 1;
6171         qmat = s->q_intra_matrix[qscale];
6172 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6173 //            bias= 1<<(QMAT_SHIFT-1);
6174         length     = s->intra_ac_vlc_length;
6175         last_length= s->intra_ac_vlc_last_length;
6176     } else {
6177         dc= 0;
6178         start_i = 0;
6179         qmat = s->q_inter_matrix[qscale];
6180         length     = s->inter_ac_vlc_length;
6181         last_length= s->inter_ac_vlc_last_length;
6182     }
6183     last_non_zero = s->block_last_index[n];
6184
6185 #ifdef REFINE_STATS
6186 {START_TIMER
6187 #endif
6188     dc += (1<<(RECON_SHIFT-1));
6189     for(i=0; i<64; i++){
6190         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6191     }
6192 #ifdef REFINE_STATS
6193 STOP_TIMER("memset rem[]")}
6194 #endif
6195     sum=0;
6196     for(i=0; i<64; i++){
6197         int one= 36;
6198         int qns=4;
6199         int w;
6200
6201         w= FFABS(weight[i]) + qns*one;
6202         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6203
6204         weight[i] = w;
6205 //        w=weight[i] = (63*qns + (w/2)) / w;
6206
6207         assert(w>0);
6208         assert(w<(1<<6));
6209         sum += w*w;
6210     }
6211     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6212 #ifdef REFINE_STATS
6213 {START_TIMER
6214 #endif
6215     run=0;
6216     rle_index=0;
6217     for(i=start_i; i<=last_non_zero; i++){
6218         int j= perm_scantable[i];
6219         const int level= block[j];
6220         int coeff;
6221
6222         if(level){
6223             if(level<0) coeff= qmul*level - qadd;
6224             else        coeff= qmul*level + qadd;
6225             run_tab[rle_index++]=run;
6226             run=0;
6227
6228             s->dsp.add_8x8basis(rem, basis[j], coeff);
6229         }else{
6230             run++;
6231         }
6232     }
6233 #ifdef REFINE_STATS
6234 if(last_non_zero>0){
6235 STOP_TIMER("init rem[]")
6236 }
6237 }
6238
6239 {START_TIMER
6240 #endif
6241     for(;;){
6242         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6243         int best_coeff=0;
6244         int best_change=0;
6245         int run2, best_unquant_change=0, analyze_gradient;
6246 #ifdef REFINE_STATS
6247 {START_TIMER
6248 #endif
6249         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6250
6251         if(analyze_gradient){
6252 #ifdef REFINE_STATS
6253 {START_TIMER
6254 #endif
6255             for(i=0; i<64; i++){
6256                 int w= weight[i];
6257
6258                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6259             }
6260 #ifdef REFINE_STATS
6261 STOP_TIMER("rem*w*w")}
6262 {START_TIMER
6263 #endif
6264             s->dsp.fdct(d1);
6265 #ifdef REFINE_STATS
6266 STOP_TIMER("dct")}
6267 #endif
6268         }
6269
6270         if(start_i){
6271             const int level= block[0];
6272             int change, old_coeff;
6273
6274             assert(s->mb_intra);
6275
6276             old_coeff= q*level;
6277
6278             for(change=-1; change<=1; change+=2){
6279                 int new_level= level + change;
6280                 int score, new_coeff;
6281
6282                 new_coeff= q*new_level;
6283                 if(new_coeff >= 2048 || new_coeff < 0)
6284                     continue;
6285
6286                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6287                 if(score<best_score){
6288                     best_score= score;
6289                     best_coeff= 0;
6290                     best_change= change;
6291                     best_unquant_change= new_coeff - old_coeff;
6292                 }
6293             }
6294         }
6295
6296         run=0;
6297         rle_index=0;
6298         run2= run_tab[rle_index++];
6299         prev_level=0;
6300         prev_run=0;
6301
6302         for(i=start_i; i<64; i++){
6303             int j= perm_scantable[i];
6304             const int level= block[j];
6305             int change, old_coeff;
6306
6307             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6308                 break;
6309
6310             if(level){
6311                 if(level<0) old_coeff= qmul*level - qadd;
6312                 else        old_coeff= qmul*level + qadd;
6313                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6314             }else{
6315                 old_coeff=0;
6316                 run2--;
6317                 assert(run2>=0 || i >= last_non_zero );
6318             }
6319
6320             for(change=-1; change<=1; change+=2){
6321                 int new_level= level + change;
6322                 int score, new_coeff, unquant_change;
6323
6324                 score=0;
6325                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
6326                    continue;
6327
6328                 if(new_level){
6329                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6330                     else            new_coeff= qmul*new_level + qadd;
6331                     if(new_coeff >= 2048 || new_coeff <= -2048)
6332                         continue;
6333                     //FIXME check for overflow
6334
6335                     if(level){
6336                         if(level < 63 && level > -63){
6337                             if(i < last_non_zero)
6338                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6339                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6340                             else
6341                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6342                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6343                         }
6344                     }else{
6345                         assert(FFABS(new_level)==1);
6346
6347                         if(analyze_gradient){
6348                             int g= d1[ scantable[i] ];
6349                             if(g && (g^new_level) >= 0)
6350                                 continue;
6351                         }
6352
6353                         if(i < last_non_zero){
6354                             int next_i= i + run2 + 1;
6355                             int next_level= block[ perm_scantable[next_i] ] + 64;
6356
6357                             if(next_level&(~127))
6358                                 next_level= 0;
6359
6360                             if(next_i < last_non_zero)
6361                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6362                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6363                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6364                             else
6365                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6366                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6367                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6368                         }else{
6369                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6370                             if(prev_level){
6371                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6372                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6373                             }
6374                         }
6375                     }
6376                 }else{
6377                     new_coeff=0;
6378                     assert(FFABS(level)==1);
6379
6380                     if(i < last_non_zero){
6381                         int next_i= i + run2 + 1;
6382                         int next_level= block[ perm_scantable[next_i] ] + 64;
6383
6384                         if(next_level&(~127))
6385                             next_level= 0;
6386
6387                         if(next_i < last_non_zero)
6388                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6389                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6390                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6391                         else
6392                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6393                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6394                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6395                     }else{
6396                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6397                         if(prev_level){
6398                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6399                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6400                         }
6401                     }
6402                 }
6403
6404                 score *= lambda;
6405
6406                 unquant_change= new_coeff - old_coeff;
6407                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6408
6409                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6410                 if(score<best_score){
6411                     best_score= score;
6412                     best_coeff= i;
6413                     best_change= change;
6414                     best_unquant_change= unquant_change;
6415                 }
6416             }
6417             if(level){
6418                 prev_level= level + 64;
6419                 if(prev_level&(~127))
6420                     prev_level= 0;
6421                 prev_run= run;
6422                 run=0;
6423             }else{
6424                 run++;
6425             }
6426         }
6427 #ifdef REFINE_STATS
6428 STOP_TIMER("iterative step")}
6429 #endif
6430
6431         if(best_change){
6432             int j= perm_scantable[ best_coeff ];
6433
6434             block[j] += best_change;
6435
6436             if(best_coeff > last_non_zero){
6437                 last_non_zero= best_coeff;
6438                 assert(block[j]);
6439 #ifdef REFINE_STATS
6440 after_last++;
6441 #endif
6442             }else{
6443 #ifdef REFINE_STATS
6444 if(block[j]){
6445     if(block[j] - best_change){
6446         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
6447             raise++;
6448         }else{
6449             lower++;
6450         }
6451     }else{
6452         from_zero++;
6453     }
6454 }else{
6455     to_zero++;
6456 }
6457 #endif
6458                 for(; last_non_zero>=start_i; last_non_zero--){
6459                     if(block[perm_scantable[last_non_zero]])
6460                         break;
6461                 }
6462             }
6463 #ifdef REFINE_STATS
6464 count++;
6465 if(256*256*256*64 % count == 0){
6466     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6467 }
6468 #endif
6469             run=0;
6470             rle_index=0;
6471             for(i=start_i; i<=last_non_zero; i++){
6472                 int j= perm_scantable[i];
6473                 const int level= block[j];
6474
6475                  if(level){
6476                      run_tab[rle_index++]=run;
6477                      run=0;
6478                  }else{
6479                      run++;
6480                  }
6481             }
6482
6483             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6484         }else{
6485             break;
6486         }
6487     }
6488 #ifdef REFINE_STATS
6489 if(last_non_zero>0){
6490 STOP_TIMER("iterative search")
6491 }
6492 }
6493 #endif
6494
6495     return last_non_zero;
6496 }
6497
6498 static int dct_quantize_c(MpegEncContext *s,
6499                         DCTELEM *block, int n,
6500                         int qscale, int *overflow)
6501 {
6502     int i, j, level, last_non_zero, q, start_i;
6503     const int *qmat;
6504     const uint8_t *scantable= s->intra_scantable.scantable;
6505     int bias;
6506     int max=0;
6507     unsigned int threshold1, threshold2;
6508
6509     s->dsp.fdct (block);
6510
6511     if(s->dct_error_sum)
6512         s->denoise_dct(s, block);
6513
6514     if (s->mb_intra) {
6515         if (!s->h263_aic) {
6516             if (n < 4)
6517                 q = s->y_dc_scale;
6518             else
6519                 q = s->c_dc_scale;
6520             q = q << 3;
6521         } else
6522             /* For AIC we skip quant/dequant of INTRADC */
6523             q = 1 << 3;
6524
6525         /* note: block[0] is assumed to be positive */
6526         block[0] = (block[0] + (q >> 1)) / q;
6527         start_i = 1;
6528         last_non_zero = 0;
6529         qmat = s->q_intra_matrix[qscale];
6530         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6531     } else {
6532         start_i = 0;
6533         last_non_zero = -1;
6534         qmat = s->q_inter_matrix[qscale];
6535         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6536     }
6537     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6538     threshold2= (threshold1<<1);
6539     for(i=63;i>=start_i;i--) {
6540         j = scantable[i];
6541         level = block[j] * qmat[j];
6542
6543         if(((unsigned)(level+threshold1))>threshold2){
6544             last_non_zero = i;
6545             break;
6546         }else{
6547             block[j]=0;
6548         }
6549     }
6550     for(i=start_i; i<=last_non_zero; i++) {
6551         j = scantable[i];
6552         level = block[j] * qmat[j];
6553
6554 //        if(   bias+level >= (1<<QMAT_SHIFT)
6555 //           || bias-level >= (1<<QMAT_SHIFT)){
6556         if(((unsigned)(level+threshold1))>threshold2){
6557             if(level>0){
6558                 level= (bias + level)>>QMAT_SHIFT;
6559                 block[j]= level;
6560             }else{
6561                 level= (bias - level)>>QMAT_SHIFT;
6562                 block[j]= -level;
6563             }
6564             max |=level;
6565         }else{
6566             block[j]=0;
6567         }
6568     }
6569     *overflow= s->max_qcoeff < max; //overflow might have happened
6570
6571     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6572     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6573         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6574
6575     return last_non_zero;
6576 }
6577
6578 #endif //CONFIG_ENCODERS
6579
6580 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6581                                    DCTELEM *block, int n, int qscale)
6582 {
6583     int i, level, nCoeffs;
6584     const uint16_t *quant_matrix;
6585
6586     nCoeffs= s->block_last_index[n];
6587
6588     if (n < 4)
6589         block[0] = block[0] * s->y_dc_scale;
6590     else
6591         block[0] = block[0] * s->c_dc_scale;
6592     /* XXX: only mpeg1 */
6593     quant_matrix = s->intra_matrix;
6594     for(i=1;i<=nCoeffs;i++) {
6595         int j= s->intra_scantable.permutated[i];
6596         level = block[j];
6597         if (level) {
6598             if (level < 0) {
6599                 level = -level;
6600                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6601                 level = (level - 1) | 1;
6602                 level = -level;
6603             } else {
6604                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6605                 level = (level - 1) | 1;
6606             }
6607             block[j] = level;
6608         }
6609     }
6610 }
6611
6612 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6613                                    DCTELEM *block, int n, int qscale)
6614 {
6615     int i, level, nCoeffs;
6616     const uint16_t *quant_matrix;
6617
6618     nCoeffs= s->block_last_index[n];
6619
6620     quant_matrix = s->inter_matrix;
6621     for(i=0; i<=nCoeffs; i++) {
6622         int j= s->intra_scantable.permutated[i];
6623         level = block[j];
6624         if (level) {
6625             if (level < 0) {
6626                 level = -level;
6627                 level = (((level << 1) + 1) * qscale *
6628                          ((int) (quant_matrix[j]))) >> 4;
6629                 level = (level - 1) | 1;
6630                 level = -level;
6631             } else {
6632                 level = (((level << 1) + 1) * qscale *
6633                          ((int) (quant_matrix[j]))) >> 4;
6634                 level = (level - 1) | 1;
6635             }
6636             block[j] = level;
6637         }
6638     }
6639 }
6640
6641 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6642                                    DCTELEM *block, int n, int qscale)
6643 {
6644     int i, level, nCoeffs;
6645     const uint16_t *quant_matrix;
6646
6647     if(s->alternate_scan) nCoeffs= 63;
6648     else nCoeffs= s->block_last_index[n];
6649
6650     if (n < 4)
6651         block[0] = block[0] * s->y_dc_scale;
6652     else
6653         block[0] = block[0] * s->c_dc_scale;
6654     quant_matrix = s->intra_matrix;
6655     for(i=1;i<=nCoeffs;i++) {
6656         int j= s->intra_scantable.permutated[i];
6657         level = block[j];
6658         if (level) {
6659             if (level < 0) {
6660                 level = -level;
6661                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6662                 level = -level;
6663             } else {
6664                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6665             }
6666             block[j] = level;
6667         }
6668     }
6669 }
6670
6671 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6672                                    DCTELEM *block, int n, int qscale)
6673 {
6674     int i, level, nCoeffs;
6675     const uint16_t *quant_matrix;
6676     int sum=-1;
6677
6678     if(s->alternate_scan) nCoeffs= 63;
6679     else nCoeffs= s->block_last_index[n];
6680
6681     if (n < 4)
6682         block[0] = block[0] * s->y_dc_scale;
6683     else
6684         block[0] = block[0] * s->c_dc_scale;
6685     quant_matrix = s->intra_matrix;
6686     for(i=1;i<=nCoeffs;i++) {
6687         int j= s->intra_scantable.permutated[i];
6688         level = block[j];
6689         if (level) {
6690             if (level < 0) {
6691                 level = -level;
6692                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6693                 level = -level;
6694             } else {
6695                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6696             }
6697             block[j] = level;
6698             sum+=level;
6699         }
6700     }
6701     block[63]^=sum&1;
6702 }
6703
6704 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6705                                    DCTELEM *block, int n, int qscale)
6706 {
6707     int i, level, nCoeffs;
6708     const uint16_t *quant_matrix;
6709     int sum=-1;
6710
6711     if(s->alternate_scan) nCoeffs= 63;
6712     else nCoeffs= s->block_last_index[n];
6713
6714     quant_matrix = s->inter_matrix;
6715     for(i=0; i<=nCoeffs; i++) {
6716         int j= s->intra_scantable.permutated[i];
6717         level = block[j];
6718         if (level) {
6719             if (level < 0) {
6720                 level = -level;
6721                 level = (((level << 1) + 1) * qscale *
6722                          ((int) (quant_matrix[j]))) >> 4;
6723                 level = -level;
6724             } else {
6725                 level = (((level << 1) + 1) * qscale *
6726                          ((int) (quant_matrix[j]))) >> 4;
6727             }
6728             block[j] = level;
6729             sum+=level;
6730         }
6731     }
6732     block[63]^=sum&1;
6733 }
6734
6735 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6736                                   DCTELEM *block, int n, int qscale)
6737 {
6738     int i, level, qmul, qadd;
6739     int nCoeffs;
6740
6741     assert(s->block_last_index[n]>=0);
6742
6743     qmul = qscale << 1;
6744
6745     if (!s->h263_aic) {
6746         if (n < 4)
6747             block[0] = block[0] * s->y_dc_scale;
6748         else
6749             block[0] = block[0] * s->c_dc_scale;
6750         qadd = (qscale - 1) | 1;
6751     }else{
6752         qadd = 0;
6753     }
6754     if(s->ac_pred)
6755         nCoeffs=63;
6756     else
6757         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6758
6759     for(i=1; i<=nCoeffs; i++) {
6760         level = block[i];
6761         if (level) {
6762             if (level < 0) {
6763                 level = level * qmul - qadd;
6764             } else {
6765                 level = level * qmul + qadd;
6766             }
6767             block[i] = level;
6768         }
6769     }
6770 }
6771
6772 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6773                                   DCTELEM *block, int n, int qscale)
6774 {
6775     int i, level, qmul, qadd;
6776     int nCoeffs;
6777
6778     assert(s->block_last_index[n]>=0);
6779
6780     qadd = (qscale - 1) | 1;
6781     qmul = qscale << 1;
6782
6783     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6784
6785     for(i=0; i<=nCoeffs; i++) {
6786         level = block[i];
6787         if (level) {
6788             if (level < 0) {
6789                 level = level * qmul - qadd;
6790             } else {
6791                 level = level * qmul + qadd;
6792             }
6793             block[i] = level;
6794         }
6795     }
6796 }
6797
6798 #ifdef CONFIG_ENCODERS
6799 AVCodec h263_encoder = {
6800     "h263",
6801     CODEC_TYPE_VIDEO,
6802     CODEC_ID_H263,
6803     sizeof(MpegEncContext),
6804     MPV_encode_init,
6805     MPV_encode_picture,
6806     MPV_encode_end,
6807     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6808 };
6809
6810 AVCodec h263p_encoder = {
6811     "h263p",
6812     CODEC_TYPE_VIDEO,
6813     CODEC_ID_H263P,
6814     sizeof(MpegEncContext),
6815     MPV_encode_init,
6816     MPV_encode_picture,
6817     MPV_encode_end,
6818     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6819 };
6820
6821 AVCodec flv_encoder = {
6822     "flv",
6823     CODEC_TYPE_VIDEO,
6824     CODEC_ID_FLV1,
6825     sizeof(MpegEncContext),
6826     MPV_encode_init,
6827     MPV_encode_picture,
6828     MPV_encode_end,
6829     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6830 };
6831
6832 AVCodec rv10_encoder = {
6833     "rv10",
6834     CODEC_TYPE_VIDEO,
6835     CODEC_ID_RV10,
6836     sizeof(MpegEncContext),
6837     MPV_encode_init,
6838     MPV_encode_picture,
6839     MPV_encode_end,
6840     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6841 };
6842
6843 AVCodec rv20_encoder = {
6844     "rv20",
6845     CODEC_TYPE_VIDEO,
6846     CODEC_ID_RV20,
6847     sizeof(MpegEncContext),
6848     MPV_encode_init,
6849     MPV_encode_picture,
6850     MPV_encode_end,
6851     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6852 };
6853
6854 AVCodec mpeg4_encoder = {
6855     "mpeg4",
6856     CODEC_TYPE_VIDEO,
6857     CODEC_ID_MPEG4,
6858     sizeof(MpegEncContext),
6859     MPV_encode_init,
6860     MPV_encode_picture,
6861     MPV_encode_end,
6862     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6863     .capabilities= CODEC_CAP_DELAY,
6864 };
6865
6866 AVCodec msmpeg4v1_encoder = {
6867     "msmpeg4v1",
6868     CODEC_TYPE_VIDEO,
6869     CODEC_ID_MSMPEG4V1,
6870     sizeof(MpegEncContext),
6871     MPV_encode_init,
6872     MPV_encode_picture,
6873     MPV_encode_end,
6874     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6875 };
6876
6877 AVCodec msmpeg4v2_encoder = {
6878     "msmpeg4v2",
6879     CODEC_TYPE_VIDEO,
6880     CODEC_ID_MSMPEG4V2,
6881     sizeof(MpegEncContext),
6882     MPV_encode_init,
6883     MPV_encode_picture,
6884     MPV_encode_end,
6885     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6886 };
6887
6888 AVCodec msmpeg4v3_encoder = {
6889     "msmpeg4",
6890     CODEC_TYPE_VIDEO,
6891     CODEC_ID_MSMPEG4V3,
6892     sizeof(MpegEncContext),
6893     MPV_encode_init,
6894     MPV_encode_picture,
6895     MPV_encode_end,
6896     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6897 };
6898
6899 AVCodec wmv1_encoder = {
6900     "wmv1",
6901     CODEC_TYPE_VIDEO,
6902     CODEC_ID_WMV1,
6903     sizeof(MpegEncContext),
6904     MPV_encode_init,
6905     MPV_encode_picture,
6906     MPV_encode_end,
6907     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6908 };
6909
6910 AVCodec mjpeg_encoder = {
6911     "mjpeg",
6912     CODEC_TYPE_VIDEO,
6913     CODEC_ID_MJPEG,
6914     sizeof(MpegEncContext),
6915     MPV_encode_init,
6916     MPV_encode_picture,
6917     MPV_encode_end,
6918     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, PIX_FMT_YUVJ422P, -1},
6919 };
6920
6921 #endif //CONFIG_ENCODERS