git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "dsputil.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mathops.h"
  46 #include "mpegutils.h"
  47 #include "mjpegenc.h"
  48 #include "msmpeg4.h"
  49 #include "faandct.h"
  50 #include "thread.h"
  51 #include "aandcttab.h"
  52 #include "flv.h"
  53 #include "mpeg4video.h"
  54 #include "internal.h"
  55 #include "bytestream.h"
  56 #include <limits.h>
  57 #include "sp5x.h"
  58
  59 static int encode_picture(MpegEncContext *s, int picture_number);
  60 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  61 static int sse_mb(MpegEncContext *s);
  62 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  63 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  64
  65 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  66 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  67
  68 const AVOption ff_mpv_generic_options[] = {
  69     FF_MPV_COMMON_OPTS
  70     { NULL },
  71 };
  72
  73 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  74                        uint16_t (*qmat16)[2][64],
  75                        const uint16_t *quant_matrix,
  76                        int bias, int qmin, int qmax, int intra)
  77 {
  78     int qscale;
  79     int shift = 0;
  80
  81     for (qscale = qmin; qscale <= qmax; qscale++) {
  82         int i;
  83         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  84             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  85             dsp->fdct == ff_faandct) {
  86             for (i = 0; i < 64; i++) {
  87                 const int j = dsp->idct_permutation[i];
  88                 /* 16 <= qscale * quant_matrix[i] <= 7905
  89                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  90                  *             19952 <=              x  <= 249205026
  91                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  92                  *           3444240 >= (1 << 36) / (x) >= 275 */
  93
  94                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  95                                         (qscale * quant_matrix[j]));
  96             }
  97         } else if (dsp->fdct == ff_fdct_ifast) {
  98             for (i = 0; i < 64; i++) {
  99                 const int j = dsp->idct_permutation[i];
 100                 /* 16 <= qscale * quant_matrix[i] <= 7905
 101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 102                  *             19952 <=              x  <= 249205026
 103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 104                  *           3444240 >= (1 << 36) / (x) >= 275 */
 105
 106                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 107                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
 108             }
 109         } else {
 110             for (i = 0; i < 64; i++) {
 111                 const int j = dsp->idct_permutation[i];
 112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 113                  * Assume x = qscale * quant_matrix[i]
 114                  * So             16 <=              x  <= 7905
 115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 116                  * so          32768 >= (1 << 19) / (x) >= 67 */
 117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 118                                         (qscale * quant_matrix[j]));
 119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 120                 //                    (qscale * quant_matrix[i]);
 121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 122                                        (qscale * quant_matrix[j]);
 123
 124                 if (qmat16[qscale][0][i] == 0 ||
 125                     qmat16[qscale][0][i] == 128 * 256)
 126                     qmat16[qscale][0][i] = 128 * 256 - 1;
 127                 qmat16[qscale][1][i] =
 128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 129                                 qmat16[qscale][0][i]);
 130             }
 131         }
 132
 133         for (i = intra; i < 64; i++) {
 134             int64_t max = 8191;
 135             if (dsp->fdct == ff_fdct_ifast) {
 136                 max = (8191LL * ff_aanscales[i]) >> 14;
 137             }
 138             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 139                 shift++;
 140             }
 141         }
 142     }
 143     if (shift) {
 144         av_log(NULL, AV_LOG_INFO,
 145                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 146                QMAT_SHIFT - shift);
 147     }
 148 }
 149
 150 static inline void update_qscale(MpegEncContext *s)
 151 {
 152     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 153                 (FF_LAMBDA_SHIFT + 7);
 154     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 155
 156     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 157                  FF_LAMBDA_SHIFT;
 158 }
 159
 160 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 161 {
 162     int i;
 163
 164     if (matrix) {
 165         put_bits(pb, 1, 1);
 166         for (i = 0; i < 64; i++) {
 167             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 168         }
 169     } else
 170         put_bits(pb, 1, 0);
 171 }
 172
 173 /**
 174  * init s->current_picture.qscale_table from s->lambda_table
 175  */
 176 void ff_init_qscale_tab(MpegEncContext *s)
 177 {
 178     int8_t * const qscale_table = s->current_picture.qscale_table;
 179     int i;
 180
 181     for (i = 0; i < s->mb_num; i++) {
 182         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 183         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 184         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 185                                                   s->avctx->qmax);
 186     }
 187 }
 188
 189 static void update_duplicate_context_after_me(MpegEncContext *dst,
 190                                               MpegEncContext *src)
 191 {
 192 #define COPY(a) dst->a= src->a
 193     COPY(pict_type);
 194     COPY(current_picture);
 195     COPY(f_code);
 196     COPY(b_code);
 197     COPY(qscale);
 198     COPY(lambda);
 199     COPY(lambda2);
 200     COPY(picture_in_gop_number);
 201     COPY(gop_picture_number);
 202     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 203     COPY(progressive_frame);    // FIXME don't set in encode_header
 204     COPY(partitioned_frame);    // FIXME don't set in encode_header
 205 #undef COPY
 206 }
 207
 208 /**
 209  * Set the given MpegEncContext to defaults for encoding.
 210  * the changed fields will not depend upon the prior state of the MpegEncContext.
 211  */
 212 static void MPV_encode_defaults(MpegEncContext *s)
 213 {
 214     int i;
 215     ff_MPV_common_defaults(s);
 216
 217     for (i = -16; i < 16; i++) {
 218         default_fcode_tab[i + MAX_MV] = 1;
 219     }
 220     s->me.mv_penalty = default_mv_penalty;
 221     s->fcode_tab     = default_fcode_tab;
 222
 223     s->input_picture_number  = 0;
 224     s->picture_in_gop_number = 0;
 225 }
 226
 227 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 228     if (ARCH_X86)
 229         ff_dct_encode_init_x86(s);
 230
 231     if (CONFIG_H263_ENCODER)
 232         ff_h263dsp_init(&s->h263dsp);
 233     if (!s->dct_quantize)
 234         s->dct_quantize = ff_dct_quantize_c;
 235     if (!s->denoise_dct)
 236         s->denoise_dct  = denoise_dct_c;
 237     s->fast_dct_quantize = s->dct_quantize;
 238     if (s->avctx->trellis)
 239         s->dct_quantize  = dct_quantize_trellis_c;
 240
 241     return 0;
 242 }
 243
 244 /* init video encoder */
 245 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 246 {
 247     MpegEncContext *s = avctx->priv_data;
 248     int i, ret;
 249
 250     MPV_encode_defaults(s);
 251
 252     switch (avctx->codec_id) {
 253     case AV_CODEC_ID_MPEG2VIDEO:
 254         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 255             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 256             av_log(avctx, AV_LOG_ERROR,
 257                    "only YUV420 and YUV422 are supported\n");
 258             return -1;
 259         }
 260         break;
 261     case AV_CODEC_ID_MJPEG:
 262     case AV_CODEC_ID_AMV:
 263         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
 264             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
 265             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
 266             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 267               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
 268               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
 269              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 270             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 271             return -1;
 272         }
 273         break;
 274     default:
 275         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 276             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 277             return -1;
 278         }
 279     }
 280
 281     switch (avctx->pix_fmt) {
 282     case AV_PIX_FMT_YUVJ444P:
 283     case AV_PIX_FMT_YUV444P:
 284         s->chroma_format = CHROMA_444;
 285         break;
 286     case AV_PIX_FMT_YUVJ422P:
 287     case AV_PIX_FMT_YUV422P:
 288         s->chroma_format = CHROMA_422;
 289         break;
 290     case AV_PIX_FMT_YUVJ420P:
 291     case AV_PIX_FMT_YUV420P:
 292     default:
 293         s->chroma_format = CHROMA_420;
 294         break;
 295     }
 296
 297     s->bit_rate = avctx->bit_rate;
 298     s->width    = avctx->width;
 299     s->height   = avctx->height;
 300     if (avctx->gop_size > 600 &&
 301         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 302         av_log(avctx, AV_LOG_WARNING,
 303                "keyframe interval too large!, reducing it from %d to %d\n",
 304                avctx->gop_size, 600);
 305         avctx->gop_size = 600;
 306     }
 307     s->gop_size     = avctx->gop_size;
 308     s->avctx        = avctx;
 309     s->flags        = avctx->flags;
 310     s->flags2       = avctx->flags2;
 311     if (avctx->max_b_frames > MAX_B_FRAMES) {
 312         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 313                "is %d.\n", MAX_B_FRAMES);
 314         avctx->max_b_frames = MAX_B_FRAMES;
 315     }
 316     s->max_b_frames = avctx->max_b_frames;
 317     s->codec_id     = avctx->codec->id;
 318     s->strict_std_compliance = avctx->strict_std_compliance;
 319     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 320     s->mpeg_quant         = avctx->mpeg_quant;
 321     s->rtp_mode           = !!avctx->rtp_payload_size;
 322     s->intra_dc_precision = avctx->intra_dc_precision;
 323     s->user_specified_pts = AV_NOPTS_VALUE;
 324
 325     if (s->gop_size <= 1) {
 326         s->intra_only = 1;
 327         s->gop_size   = 12;
 328     } else {
 329         s->intra_only = 0;
 330     }
 331
 332     s->me_method = avctx->me_method;
 333
 334     /* Fixed QSCALE */
 335     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 336
 337     s->adaptive_quant = (s->avctx->lumi_masking ||
 338                          s->avctx->dark_masking ||
 339                          s->avctx->temporal_cplx_masking ||
 340                          s->avctx->spatial_cplx_masking  ||
 341                          s->avctx->p_masking      ||
 342                          s->avctx->border_masking ||
 343                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 344                         !s->fixed_qscale;
 345
 346     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 347
 348     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 349         switch(avctx->codec_id) {
 350         case AV_CODEC_ID_MPEG1VIDEO:
 351         case AV_CODEC_ID_MPEG2VIDEO:
 352             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 353             break;
 354         case AV_CODEC_ID_MPEG4:
 355         case AV_CODEC_ID_MSMPEG4V1:
 356         case AV_CODEC_ID_MSMPEG4V2:
 357         case AV_CODEC_ID_MSMPEG4V3:
 358             if       (avctx->rc_max_rate >= 15000000) {
 359                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 360             } else if(avctx->rc_max_rate >=  2000000) {
 361                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 362             } else if(avctx->rc_max_rate >=   384000) {
 363                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 364             } else
 365                 avctx->rc_buffer_size = 40;
 366             avctx->rc_buffer_size *= 16384;
 367             break;
 368         }
 369         if (avctx->rc_buffer_size) {
 370             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 371         }
 372     }
 373
 374     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 375         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 376         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
 377             return -1;
 378     }
 379
 380     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 381         av_log(avctx, AV_LOG_INFO,
 382                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 383     }
 384
 385     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 386         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 387         return -1;
 388     }
 389
 390     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 391         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 392         return -1;
 393     }
 394
 395     if (avctx->rc_max_rate &&
 396         avctx->rc_max_rate == avctx->bit_rate &&
 397         avctx->rc_max_rate != avctx->rc_min_rate) {
 398         av_log(avctx, AV_LOG_INFO,
 399                "impossible bitrate constraints, this will fail\n");
 400     }
 401
 402     if (avctx->rc_buffer_size &&
 403         avctx->bit_rate * (int64_t)avctx->time_base.num >
 404             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 405         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 406         return -1;
 407     }
 408
 409     if (!s->fixed_qscale &&
 410         avctx->bit_rate * av_q2d(avctx->time_base) >
 411             avctx->bit_rate_tolerance) {
 412         av_log(avctx, AV_LOG_WARNING,
 413                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 414         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 415     }
 416
 417     if (s->avctx->rc_max_rate &&
 418         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 419         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 420          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 421         90000LL * (avctx->rc_buffer_size - 1) >
 422             s->avctx->rc_max_rate * 0xFFFFLL) {
 423         av_log(avctx, AV_LOG_INFO,
 424                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 425                "specified vbv buffer is too large for the given bitrate!\n");
 426     }
 427
 428     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 429         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 430         s->codec_id != AV_CODEC_ID_FLV1) {
 431         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 432         return -1;
 433     }
 434
 435     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 436         av_log(avctx, AV_LOG_ERROR,
 437                "OBMC is only supported with simple mb decision\n");
 438         return -1;
 439     }
 440
 441     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 442         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 443         return -1;
 444     }
 445
 446     if (s->max_b_frames                    &&
 447         s->codec_id != AV_CODEC_ID_MPEG4      &&
 448         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 449         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 450         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 451         return -1;
 452     }
 453     if (s->max_b_frames < 0) {
 454         av_log(avctx, AV_LOG_ERROR,
 455                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 456         return -1;
 457     }
 458
 459     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 460          s->codec_id == AV_CODEC_ID_H263  ||
 461          s->codec_id == AV_CODEC_ID_H263P) &&
 462         (avctx->sample_aspect_ratio.num > 255 ||
 463          avctx->sample_aspect_ratio.den > 255)) {
 464         av_log(avctx, AV_LOG_WARNING,
 465                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 466                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 467         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 468                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 469     }
 470
 471     if ((s->codec_id == AV_CODEC_ID_H263  ||
 472          s->codec_id == AV_CODEC_ID_H263P) &&
 473         (avctx->width  > 2048 ||
 474          avctx->height > 1152 )) {
 475         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 476         return -1;
 477     }
 478     if ((s->codec_id == AV_CODEC_ID_H263  ||
 479          s->codec_id == AV_CODEC_ID_H263P) &&
 480         ((avctx->width &3) ||
 481          (avctx->height&3) )) {
 482         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 483         return -1;
 484     }
 485
 486     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 487         (avctx->width  > 4095 ||
 488          avctx->height > 4095 )) {
 489         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 490         return -1;
 491     }
 492
 493     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 494         (avctx->width  > 16383 ||
 495          avctx->height > 16383 )) {
 496         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 497         return -1;
 498     }
 499
 500     if (s->codec_id == AV_CODEC_ID_RV10 &&
 501         (avctx->width &15 ||
 502          avctx->height&15 )) {
 503         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 504         return AVERROR(EINVAL);
 505     }
 506
 507     if (s->codec_id == AV_CODEC_ID_RV20 &&
 508         (avctx->width &3 ||
 509          avctx->height&3 )) {
 510         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 511         return AVERROR(EINVAL);
 512     }
 513
 514     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 515          s->codec_id == AV_CODEC_ID_WMV2) &&
 516          avctx->width & 1) {
 517          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 518          return -1;
 519     }
 520
 521     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 522         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 523         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 524         return -1;
 525     }
 526
 527     // FIXME mpeg2 uses that too
 528     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 529                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 530         av_log(avctx, AV_LOG_ERROR,
 531                "mpeg2 style quantization not supported by codec\n");
 532         return -1;
 533     }
 534
 535     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 536         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 537         return -1;
 538     }
 539
 540     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 541         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 542         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 543         return -1;
 544     }
 545
 546     if (s->avctx->scenechange_threshold < 1000000000 &&
 547         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 548         av_log(avctx, AV_LOG_ERROR,
 549                "closed gop with scene change detection are not supported yet, "
 550                "set threshold to 1000000000\n");
 551         return -1;
 552     }
 553
 554     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 555         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 556             av_log(avctx, AV_LOG_ERROR,
 557                   "low delay forcing is only available for mpeg2\n");
 558             return -1;
 559         }
 560         if (s->max_b_frames != 0) {
 561             av_log(avctx, AV_LOG_ERROR,
 562                    "b frames cannot be used with low delay\n");
 563             return -1;
 564         }
 565     }
 566
 567     if (s->q_scale_type == 1) {
 568         if (avctx->qmax > 12) {
 569             av_log(avctx, AV_LOG_ERROR,
 570                    "non linear quant only supports qmax <= 12 currently\n");
 571             return -1;
 572         }
 573     }
 574
 575     if (s->avctx->thread_count > 1         &&
 576         s->codec_id != AV_CODEC_ID_MPEG4      &&
 577         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 578         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 579         s->codec_id != AV_CODEC_ID_MJPEG      &&
 580         (s->codec_id != AV_CODEC_ID_H263P)) {
 581         av_log(avctx, AV_LOG_ERROR,
 582                "multi threaded encoding not supported by codec\n");
 583         return -1;
 584     }
 585
 586     if (s->avctx->thread_count < 1) {
 587         av_log(avctx, AV_LOG_ERROR,
 588                "automatic thread number detection not supported by codec, "
 589                "patch welcome\n");
 590         return -1;
 591     }
 592
 593     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 594         s->rtp_mode = 1;
 595
 596     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 597         s->h263_slice_structured = 1;
 598
 599     if (!avctx->time_base.den || !avctx->time_base.num) {
 600         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 601         return -1;
 602     }
 603
 604     i = (INT_MAX / 2 + 128) >> 8;
 605     if (avctx->mb_threshold >= i) {
 606         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 607                i - 1);
 608         return -1;
 609     }
 610
 611     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 612         av_log(avctx, AV_LOG_INFO,
 613                "notice: b_frame_strategy only affects the first pass\n");
 614         avctx->b_frame_strategy = 0;
 615     }
 616
 617     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 618     if (i > 1) {
 619         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 620         avctx->time_base.den /= i;
 621         avctx->time_base.num /= i;
 622         //return -1;
 623     }
 624
 625     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 626         // (a + x * 3 / 8) / x
 627         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 628         s->inter_quant_bias = 0;
 629     } else {
 630         s->intra_quant_bias = 0;
 631         // (a - x / 4) / x
 632         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 633     }
 634
 635     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 636         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 637         return AVERROR(EINVAL);
 638     }
 639
 640     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 641         s->intra_quant_bias = avctx->intra_quant_bias;
 642     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 643         s->inter_quant_bias = avctx->inter_quant_bias;
 644
 645     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 646
 647     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 648         s->avctx->time_base.den > (1 << 16) - 1) {
 649         av_log(avctx, AV_LOG_ERROR,
 650                "timebase %d/%d not supported by MPEG 4 standard, "
 651                "the maximum admitted value for the timebase denominator "
 652                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 653                (1 << 16) - 1);
 654         return -1;
 655     }
 656     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 657
 658     switch (avctx->codec->id) {
 659     case AV_CODEC_ID_MPEG1VIDEO:
 660         s->out_format = FMT_MPEG1;
 661         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 662         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 663         break;
 664     case AV_CODEC_ID_MPEG2VIDEO:
 665         s->out_format = FMT_MPEG1;
 666         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 667         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 668         s->rtp_mode   = 1;
 669         break;
 670     case AV_CODEC_ID_MJPEG:
 671     case AV_CODEC_ID_AMV:
 672         s->out_format = FMT_MJPEG;
 673         s->intra_only = 1; /* force intra only for jpeg */
 674         if (!CONFIG_MJPEG_ENCODER ||
 675             ff_mjpeg_encode_init(s) < 0)
 676             return -1;
 677         avctx->delay = 0;
 678         s->low_delay = 1;
 679         break;
 680     case AV_CODEC_ID_H261:
 681         if (!CONFIG_H261_ENCODER)
 682             return -1;
 683         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 684             av_log(avctx, AV_LOG_ERROR,
 685                    "The specified picture size of %dx%d is not valid for the "
 686                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 687                     s->width, s->height);
 688             return -1;
 689         }
 690         s->out_format = FMT_H261;
 691         avctx->delay  = 0;
 692         s->low_delay  = 1;
 693         break;
 694     case AV_CODEC_ID_H263:
 695         if (!CONFIG_H263_ENCODER)
 696             return -1;
 697         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 698                              s->width, s->height) == 8) {
 699             av_log(avctx, AV_LOG_ERROR,
 700                    "The specified picture size of %dx%d is not valid for "
 701                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 702                    "352x288, 704x576, and 1408x1152. "
 703                    "Try H.263+.\n", s->width, s->height);
 704             return -1;
 705         }
 706         s->out_format = FMT_H263;
 707         avctx->delay  = 0;
 708         s->low_delay  = 1;
 709         break;
 710     case AV_CODEC_ID_H263P:
 711         s->out_format = FMT_H263;
 712         s->h263_plus  = 1;
 713         /* Fx */
 714         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 715         s->modified_quant  = s->h263_aic;
 716         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 717         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 718
 719         /* /Fx */
 720         /* These are just to be sure */
 721         avctx->delay = 0;
 722         s->low_delay = 1;
 723         break;
 724     case AV_CODEC_ID_FLV1:
 725         s->out_format      = FMT_H263;
 726         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 727         s->unrestricted_mv = 1;
 728         s->rtp_mode  = 0; /* don't allow GOB */
 729         avctx->delay = 0;
 730         s->low_delay = 1;
 731         break;
 732     case AV_CODEC_ID_RV10:
 733         s->out_format = FMT_H263;
 734         avctx->delay  = 0;
 735         s->low_delay  = 1;
 736         break;
 737     case AV_CODEC_ID_RV20:
 738         s->out_format      = FMT_H263;
 739         avctx->delay       = 0;
 740         s->low_delay       = 1;
 741         s->modified_quant  = 1;
 742         s->h263_aic        = 1;
 743         s->h263_plus       = 1;
 744         s->loop_filter     = 1;
 745         s->unrestricted_mv = 0;
 746         break;
 747     case AV_CODEC_ID_MPEG4:
 748         s->out_format      = FMT_H263;
 749         s->h263_pred       = 1;
 750         s->unrestricted_mv = 1;
 751         s->low_delay       = s->max_b_frames ? 0 : 1;
 752         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 753         break;
 754     case AV_CODEC_ID_MSMPEG4V2:
 755         s->out_format      = FMT_H263;
 756         s->h263_pred       = 1;
 757         s->unrestricted_mv = 1;
 758         s->msmpeg4_version = 2;
 759         avctx->delay       = 0;
 760         s->low_delay       = 1;
 761         break;
 762     case AV_CODEC_ID_MSMPEG4V3:
 763         s->out_format        = FMT_H263;
 764         s->h263_pred         = 1;
 765         s->unrestricted_mv   = 1;
 766         s->msmpeg4_version   = 3;
 767         s->flipflop_rounding = 1;
 768         avctx->delay         = 0;
 769         s->low_delay         = 1;
 770         break;
 771     case AV_CODEC_ID_WMV1:
 772         s->out_format        = FMT_H263;
 773         s->h263_pred         = 1;
 774         s->unrestricted_mv   = 1;
 775         s->msmpeg4_version   = 4;
 776         s->flipflop_rounding = 1;
 777         avctx->delay         = 0;
 778         s->low_delay         = 1;
 779         break;
 780     case AV_CODEC_ID_WMV2:
 781         s->out_format        = FMT_H263;
 782         s->h263_pred         = 1;
 783         s->unrestricted_mv   = 1;
 784         s->msmpeg4_version   = 5;
 785         s->flipflop_rounding = 1;
 786         avctx->delay         = 0;
 787         s->low_delay         = 1;
 788         break;
 789     default:
 790         return -1;
 791     }
 792
 793     avctx->has_b_frames = !s->low_delay;
 794
 795     s->encoding = 1;
 796
 797     s->progressive_frame    =
 798     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 799                                                 CODEC_FLAG_INTERLACED_ME) ||
 800                                 s->alternate_scan);
 801
 802     /* init */
 803     if (ff_MPV_common_init(s) < 0)
 804         return -1;
 805
 806     s->avctx->coded_frame = s->current_picture.f;
 807
 808     if (s->msmpeg4_version) {
 809         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 810                           2 * 2 * (MAX_LEVEL + 1) *
 811                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 812     }
 813     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 814
 815     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 816     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 817     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 818     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 819     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 820     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 821     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 822                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 823     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 824                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 825
 826     if (s->avctx->noise_reduction) {
 827         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 828                           2 * 64 * sizeof(uint16_t), fail);
 829     }
 830
 831     ff_dct_encode_init(s);
 832
 833     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 834         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 835
 836     s->quant_precision = 5;
 837
 838     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 839     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 840
 841     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 842         ff_h261_encode_init(s);
 843     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 844         ff_h263_encode_init(s);
 845     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 846         ff_msmpeg4_encode_init(s);
 847     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 848         && s->out_format == FMT_MPEG1)
 849         ff_mpeg1_encode_init(s);
 850
 851     /* init q matrix */
 852     for (i = 0; i < 64; i++) {
 853         int j = s->dsp.idct_permutation[i];
 854         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 855             s->mpeg_quant) {
 856             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 857             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 858         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 859             s->intra_matrix[j] =
 860             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 861         } else {
 862             /* mpeg1/2 */
 863             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 864             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 865         }
 866         if (s->avctx->intra_matrix)
 867             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 868         if (s->avctx->inter_matrix)
 869             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 870     }
 871
 872     /* precompute matrix */
 873     /* for mjpeg, we do include qscale in the matrix */
 874     if (s->out_format != FMT_MJPEG) {
 875         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 876                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 877                           31, 1);
 878         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 879                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 880                           31, 0);
 881     }
 882
 883     if (ff_rate_control_init(s) < 0)
 884         return -1;
 885
 886 #if FF_API_ERROR_RATE
 887     FF_DISABLE_DEPRECATION_WARNINGS
 888     if (avctx->error_rate)
 889         s->error_rate = avctx->error_rate;
 890     FF_ENABLE_DEPRECATION_WARNINGS;
 891 #endif
 892
 893     if (avctx->b_frame_strategy == 2) {
 894         for (i = 0; i < s->max_b_frames + 2; i++) {
 895             s->tmp_frames[i] = av_frame_alloc();
 896             if (!s->tmp_frames[i])
 897                 return AVERROR(ENOMEM);
 898
 899             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 900             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 901             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 902
 903             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 904             if (ret < 0)
 905                 return ret;
 906         }
 907     }
 908
 909     return 0;
 910 fail:
 911     ff_MPV_encode_end(avctx);
 912     return AVERROR_UNKNOWN;
 913 }
 914
 915 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 916 {
 917     MpegEncContext *s = avctx->priv_data;
 918     int i;
 919
 920     ff_rate_control_uninit(s);
 921
 922     ff_MPV_common_end(s);
 923     if (CONFIG_MJPEG_ENCODER &&
 924         s->out_format == FMT_MJPEG)
 925         ff_mjpeg_encode_close(s);
 926
 927     av_freep(&avctx->extradata);
 928
 929     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 930         av_frame_free(&s->tmp_frames[i]);
 931
 932     ff_free_picture_tables(&s->new_picture);
 933     ff_mpeg_unref_picture(s, &s->new_picture);
 934
 935     av_freep(&s->avctx->stats_out);
 936     av_freep(&s->ac_stats);
 937
 938     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 939     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
 940     s->q_chroma_intra_matrix=   NULL;
 941     s->q_chroma_intra_matrix16= NULL;
 942     av_freep(&s->q_intra_matrix);
 943     av_freep(&s->q_inter_matrix);
 944     av_freep(&s->q_intra_matrix16);
 945     av_freep(&s->q_inter_matrix16);
 946     av_freep(&s->input_picture);
 947     av_freep(&s->reordered_input_picture);
 948     av_freep(&s->dct_offset);
 949
 950     return 0;
 951 }
 952
 953 static int get_sae(uint8_t *src, int ref, int stride)
 954 {
 955     int x,y;
 956     int acc = 0;
 957
 958     for (y = 0; y < 16; y++) {
 959         for (x = 0; x < 16; x++) {
 960             acc += FFABS(src[x + y * stride] - ref);
 961         }
 962     }
 963
 964     return acc;
 965 }
 966
 967 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 968                            uint8_t *ref, int stride)
 969 {
 970     int x, y, w, h;
 971     int acc = 0;
 972
 973     w = s->width  & ~15;
 974     h = s->height & ~15;
 975
 976     for (y = 0; y < h; y += 16) {
 977         for (x = 0; x < w; x += 16) {
 978             int offset = x + y * stride;
 979             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 980                                      16);
 981             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 982             int sae  = get_sae(src + offset, mean, stride);
 983
 984             acc += sae + 500 < sad;
 985         }
 986     }
 987     return acc;
 988 }
 989
 990
 991 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 992 {
 993     Picture *pic = NULL;
 994     int64_t pts;
 995     int i, display_picture_number = 0, ret;
 996     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
 997                                                  (s->low_delay ? 0 : 1);
 998     int direct = 1;
 999
1000     if (pic_arg) {
1001         pts = pic_arg->pts;
1002         display_picture_number = s->input_picture_number++;
1003
1004         if (pts != AV_NOPTS_VALUE) {
1005             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1006                 int64_t last = s->user_specified_pts;
1007
1008                 if (pts <= last) {
1009                     av_log(s->avctx, AV_LOG_ERROR,
1010                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1011                            pts, last);
1012                     return AVERROR(EINVAL);
1013                 }
1014
1015                 if (!s->low_delay && display_picture_number == 1)
1016                     s->dts_delta = pts - last;
1017             }
1018             s->user_specified_pts = pts;
1019         } else {
1020             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1021                 s->user_specified_pts =
1022                 pts = s->user_specified_pts + 1;
1023                 av_log(s->avctx, AV_LOG_INFO,
1024                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1025                        pts);
1026             } else {
1027                 pts = display_picture_number;
1028             }
1029         }
1030     }
1031
1032     if (pic_arg) {
1033         if (!pic_arg->buf[0])
1034             direct = 0;
1035         if (pic_arg->linesize[0] != s->linesize)
1036             direct = 0;
1037         if (pic_arg->linesize[1] != s->uvlinesize)
1038             direct = 0;
1039         if (pic_arg->linesize[2] != s->uvlinesize)
1040             direct = 0;
1041         if ((s->width & 15) || (s->height & 15))
1042             direct = 0;
1043         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1044             direct = 0;
1045         if (s->linesize & (STRIDE_ALIGN-1))
1046             direct = 0;
1047
1048         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1049                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1050
1051         if (direct) {
1052             i = ff_find_unused_picture(s, 1);
1053             if (i < 0)
1054                 return i;
1055
1056             pic = &s->picture[i];
1057             pic->reference = 3;
1058
1059             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1060                 return ret;
1061             if (ff_alloc_picture(s, pic, 1) < 0) {
1062                 return -1;
1063             }
1064         } else {
1065             i = ff_find_unused_picture(s, 0);
1066             if (i < 0)
1067                 return i;
1068
1069             pic = &s->picture[i];
1070             pic->reference = 3;
1071
1072             if (ff_alloc_picture(s, pic, 0) < 0) {
1073                 return -1;
1074             }
1075
1076             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1077                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1078                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1079                 // empty
1080             } else {
1081                 int h_chroma_shift, v_chroma_shift;
1082                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1083                                                  &h_chroma_shift,
1084                                                  &v_chroma_shift);
1085
1086                 for (i = 0; i < 3; i++) {
1087                     int src_stride = pic_arg->linesize[i];
1088                     int dst_stride = i ? s->uvlinesize : s->linesize;
1089                     int h_shift = i ? h_chroma_shift : 0;
1090                     int v_shift = i ? v_chroma_shift : 0;
1091                     int w = s->width  >> h_shift;
1092                     int h = s->height >> v_shift;
1093                     uint8_t *src = pic_arg->data[i];
1094                     uint8_t *dst = pic->f->data[i];
1095
1096                     if (s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1097                         h = ((s->height + 15)/16*16) >> v_shift;
1098                     }
1099
1100                     if (!s->avctx->rc_buffer_size)
1101                         dst += INPLACE_OFFSET;
1102
1103                     if (src_stride == dst_stride)
1104                         memcpy(dst, src, src_stride * h);
1105                     else {
1106                         int h2 = h;
1107                         uint8_t *dst2 = dst;
1108                         while (h2--) {
1109                             memcpy(dst2, src, w);
1110                             dst2 += dst_stride;
1111                             src += src_stride;
1112                         }
1113                     }
1114                     if ((s->width & 15) || (s->height & 15)) {
1115                         s->dsp.draw_edges(dst, dst_stride,
1116                                           w, h,
1117                                           16>>h_shift,
1118                                           16>>v_shift,
1119                                           EDGE_BOTTOM);
1120                     }
1121                 }
1122             }
1123         }
1124         ret = av_frame_copy_props(pic->f, pic_arg);
1125         if (ret < 0)
1126             return ret;
1127
1128         pic->f->display_picture_number = display_picture_number;
1129         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1130     }
1131
1132     /* shift buffer entries */
1133     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1134         s->input_picture[i - 1] = s->input_picture[i];
1135
1136     s->input_picture[encoding_delay] = (Picture*) pic;
1137
1138     return 0;
1139 }
1140
1141 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1142 {
1143     int x, y, plane;
1144     int score = 0;
1145     int64_t score64 = 0;
1146
1147     for (plane = 0; plane < 3; plane++) {
1148         const int stride = p->f->linesize[plane];
1149         const int bw = plane ? 1 : 2;
1150         for (y = 0; y < s->mb_height * bw; y++) {
1151             for (x = 0; x < s->mb_width * bw; x++) {
1152                 int off = p->shared ? 0 : 16;
1153                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1154                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1155                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1156
1157                 switch (FFABS(s->avctx->frame_skip_exp)) {
1158                 case 0: score    =  FFMAX(score, v);          break;
1159                 case 1: score   += FFABS(v);                  break;
1160                 case 2: score64 += v * (int64_t)v;                       break;
1161                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1162                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1163                 }
1164             }
1165         }
1166     }
1167     emms_c();
1168
1169     if (score)
1170         score64 = score;
1171     if (s->avctx->frame_skip_exp < 0)
1172         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1173                       -1.0/s->avctx->frame_skip_exp);
1174
1175     if (score64 < s->avctx->frame_skip_threshold)
1176         return 1;
1177     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1178         return 1;
1179     return 0;
1180 }
1181
1182 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1183 {
1184     AVPacket pkt = { 0 };
1185     int ret, got_output;
1186
1187     av_init_packet(&pkt);
1188     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1189     if (ret < 0)
1190         return ret;
1191
1192     ret = pkt.size;
1193     av_free_packet(&pkt);
1194     return ret;
1195 }
1196
1197 static int estimate_best_b_count(MpegEncContext *s)
1198 {
1199     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1200     AVCodecContext *c = avcodec_alloc_context3(NULL);
1201     const int scale = s->avctx->brd_scale;
1202     int i, j, out_size, p_lambda, b_lambda, lambda2;
1203     int64_t best_rd  = INT64_MAX;
1204     int best_b_count = -1;
1205
1206     av_assert0(scale >= 0 && scale <= 3);
1207
1208     //emms_c();
1209     //s->next_picture_ptr->quality;
1210     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1211     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1212     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1213     if (!b_lambda) // FIXME we should do this somewhere else
1214         b_lambda = p_lambda;
1215     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1216                FF_LAMBDA_SHIFT;
1217
1218     c->width        = s->width  >> scale;
1219     c->height       = s->height >> scale;
1220     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1221                       CODEC_FLAG_INPUT_PRESERVED;
1222     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1223     c->mb_decision  = s->avctx->mb_decision;
1224     c->me_cmp       = s->avctx->me_cmp;
1225     c->mb_cmp       = s->avctx->mb_cmp;
1226     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1227     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1228     c->time_base    = s->avctx->time_base;
1229     c->max_b_frames = s->max_b_frames;
1230
1231     if (avcodec_open2(c, codec, NULL) < 0)
1232         return -1;
1233
1234     for (i = 0; i < s->max_b_frames + 2; i++) {
1235         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1236                                                 s->next_picture_ptr;
1237
1238         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1239             pre_input = *pre_input_ptr;
1240
1241             if (!pre_input.shared && i) {
1242                 pre_input.f->data[0] += INPLACE_OFFSET;
1243                 pre_input.f->data[1] += INPLACE_OFFSET;
1244                 pre_input.f->data[2] += INPLACE_OFFSET;
1245             }
1246
1247             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1248                                  pre_input.f->data[0], pre_input.f->linesize[0],
1249                                  c->width,      c->height);
1250             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1251                                  pre_input.f->data[1], pre_input.f->linesize[1],
1252                                  c->width >> 1, c->height >> 1);
1253             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1254                                  pre_input.f->data[2], pre_input.f->linesize[2],
1255                                  c->width >> 1, c->height >> 1);
1256         }
1257     }
1258
1259     for (j = 0; j < s->max_b_frames + 1; j++) {
1260         int64_t rd = 0;
1261
1262         if (!s->input_picture[j])
1263             break;
1264
1265         c->error[0] = c->error[1] = c->error[2] = 0;
1266
1267         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1268         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1269
1270         out_size = encode_frame(c, s->tmp_frames[0]);
1271
1272         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1273
1274         for (i = 0; i < s->max_b_frames + 1; i++) {
1275             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1276
1277             s->tmp_frames[i + 1]->pict_type = is_p ?
1278                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1279             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1280
1281             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1282
1283             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1284         }
1285
1286         /* get the delayed frames */
1287         while (out_size) {
1288             out_size = encode_frame(c, NULL);
1289             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1290         }
1291
1292         rd += c->error[0] + c->error[1] + c->error[2];
1293
1294         if (rd < best_rd) {
1295             best_rd = rd;
1296             best_b_count = j;
1297         }
1298     }
1299
1300     avcodec_close(c);
1301     av_freep(&c);
1302
1303     return best_b_count;
1304 }
1305
1306 static int select_input_picture(MpegEncContext *s)
1307 {
1308     int i, ret;
1309
1310     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1311         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1312     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1313
1314     /* set next picture type & ordering */
1315     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1316         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1317             if (s->picture_in_gop_number < s->gop_size &&
1318                 s->next_picture_ptr &&
1319                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1320                 // FIXME check that te gop check above is +-1 correct
1321                 av_frame_unref(s->input_picture[0]->f);
1322
1323                 ff_vbv_update(s, 0);
1324
1325                 goto no_output_pic;
1326             }
1327         }
1328
1329         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1330             s->next_picture_ptr == NULL || s->intra_only) {
1331             s->reordered_input_picture[0] = s->input_picture[0];
1332             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1333             s->reordered_input_picture[0]->f->coded_picture_number =
1334                 s->coded_picture_number++;
1335         } else {
1336             int b_frames;
1337
1338             if (s->flags & CODEC_FLAG_PASS2) {
1339                 for (i = 0; i < s->max_b_frames + 1; i++) {
1340                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1341
1342                     if (pict_num >= s->rc_context.num_entries)
1343                         break;
1344                     if (!s->input_picture[i]) {
1345                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1346                         break;
1347                     }
1348
1349                     s->input_picture[i]->f->pict_type =
1350                         s->rc_context.entry[pict_num].new_pict_type;
1351                 }
1352             }
1353
1354             if (s->avctx->b_frame_strategy == 0) {
1355                 b_frames = s->max_b_frames;
1356                 while (b_frames && !s->input_picture[b_frames])
1357                     b_frames--;
1358             } else if (s->avctx->b_frame_strategy == 1) {
1359                 for (i = 1; i < s->max_b_frames + 1; i++) {
1360                     if (s->input_picture[i] &&
1361                         s->input_picture[i]->b_frame_score == 0) {
1362                         s->input_picture[i]->b_frame_score =
1363                             get_intra_count(s,
1364                                             s->input_picture[i    ]->f->data[0],
1365                                             s->input_picture[i - 1]->f->data[0],
1366                                             s->linesize) + 1;
1367                     }
1368                 }
1369                 for (i = 0; i < s->max_b_frames + 1; i++) {
1370                     if (s->input_picture[i] == NULL ||
1371                         s->input_picture[i]->b_frame_score - 1 >
1372                             s->mb_num / s->avctx->b_sensitivity)
1373                         break;
1374                 }
1375
1376                 b_frames = FFMAX(0, i - 1);
1377
1378                 /* reset scores */
1379                 for (i = 0; i < b_frames + 1; i++) {
1380                     s->input_picture[i]->b_frame_score = 0;
1381                 }
1382             } else if (s->avctx->b_frame_strategy == 2) {
1383                 b_frames = estimate_best_b_count(s);
1384             } else {
1385                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1386                 b_frames = 0;
1387             }
1388
1389             emms_c();
1390
1391             for (i = b_frames - 1; i >= 0; i--) {
1392                 int type = s->input_picture[i]->f->pict_type;
1393                 if (type && type != AV_PICTURE_TYPE_B)
1394                     b_frames = i;
1395             }
1396             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1397                 b_frames == s->max_b_frames) {
1398                 av_log(s->avctx, AV_LOG_ERROR,
1399                        "warning, too many b frames in a row\n");
1400             }
1401
1402             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1403                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1404                     s->gop_size > s->picture_in_gop_number) {
1405                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1406                 } else {
1407                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1408                         b_frames = 0;
1409                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1410                 }
1411             }
1412
1413             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1414                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1415                 b_frames--;
1416
1417             s->reordered_input_picture[0] = s->input_picture[b_frames];
1418             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1419                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1420             s->reordered_input_picture[0]->f->coded_picture_number =
1421                 s->coded_picture_number++;
1422             for (i = 0; i < b_frames; i++) {
1423                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1424                 s->reordered_input_picture[i + 1]->f->pict_type =
1425                     AV_PICTURE_TYPE_B;
1426                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1427                     s->coded_picture_number++;
1428             }
1429         }
1430     }
1431 no_output_pic:
1432     if (s->reordered_input_picture[0]) {
1433         s->reordered_input_picture[0]->reference =
1434            s->reordered_input_picture[0]->f->pict_type !=
1435                AV_PICTURE_TYPE_B ? 3 : 0;
1436
1437         ff_mpeg_unref_picture(s, &s->new_picture);
1438         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1439             return ret;
1440
1441         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1442             // input is a shared pix, so we can't modifiy it -> alloc a new
1443             // one & ensure that the shared one is reuseable
1444
1445             Picture *pic;
1446             int i = ff_find_unused_picture(s, 0);
1447             if (i < 0)
1448                 return i;
1449             pic = &s->picture[i];
1450
1451             pic->reference = s->reordered_input_picture[0]->reference;
1452             if (ff_alloc_picture(s, pic, 0) < 0) {
1453                 return -1;
1454             }
1455
1456             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1457             if (ret < 0)
1458                 return ret;
1459
1460             /* mark us unused / free shared pic */
1461             av_frame_unref(s->reordered_input_picture[0]->f);
1462             s->reordered_input_picture[0]->shared = 0;
1463
1464             s->current_picture_ptr = pic;
1465         } else {
1466             // input is not a shared pix -> reuse buffer for current_pix
1467             s->current_picture_ptr = s->reordered_input_picture[0];
1468             for (i = 0; i < 4; i++) {
1469                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1470             }
1471         }
1472         ff_mpeg_unref_picture(s, &s->current_picture);
1473         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1474                                        s->current_picture_ptr)) < 0)
1475             return ret;
1476
1477         s->picture_number = s->new_picture.f->display_picture_number;
1478     } else {
1479         ff_mpeg_unref_picture(s, &s->new_picture);
1480     }
1481     return 0;
1482 }
1483
1484 static void frame_end(MpegEncContext *s)
1485 {
1486     if (s->unrestricted_mv &&
1487         s->current_picture.reference &&
1488         !s->intra_only) {
1489         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1490         int hshift = desc->log2_chroma_w;
1491         int vshift = desc->log2_chroma_h;
1492         s->dsp.draw_edges(s->current_picture.f->data[0], s->current_picture.f->linesize[0],
1493                           s->h_edge_pos, s->v_edge_pos,
1494                           EDGE_WIDTH, EDGE_WIDTH,
1495                           EDGE_TOP | EDGE_BOTTOM);
1496         s->dsp.draw_edges(s->current_picture.f->data[1], s->current_picture.f->linesize[1],
1497                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1498                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1499                           EDGE_TOP | EDGE_BOTTOM);
1500         s->dsp.draw_edges(s->current_picture.f->data[2], s->current_picture.f->linesize[2],
1501                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1502                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1503                           EDGE_TOP | EDGE_BOTTOM);
1504     }
1505
1506     emms_c();
1507
1508     s->last_pict_type                 = s->pict_type;
1509     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1510     if (s->pict_type!= AV_PICTURE_TYPE_B)
1511         s->last_non_b_pict_type = s->pict_type;
1512
1513     s->avctx->coded_frame = s->current_picture_ptr->f;
1514
1515 }
1516
1517 static void update_noise_reduction(MpegEncContext *s)
1518 {
1519     int intra, i;
1520
1521     for (intra = 0; intra < 2; intra++) {
1522         if (s->dct_count[intra] > (1 << 16)) {
1523             for (i = 0; i < 64; i++) {
1524                 s->dct_error_sum[intra][i] >>= 1;
1525             }
1526             s->dct_count[intra] >>= 1;
1527         }
1528
1529         for (i = 0; i < 64; i++) {
1530             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1531                                        s->dct_count[intra] +
1532                                        s->dct_error_sum[intra][i] / 2) /
1533                                       (s->dct_error_sum[intra][i] + 1);
1534         }
1535     }
1536 }
1537
1538 static int frame_start(MpegEncContext *s)
1539 {
1540     int ret;
1541
1542     /* mark & release old frames */
1543     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1544         s->last_picture_ptr != s->next_picture_ptr &&
1545         s->last_picture_ptr->f->buf[0]) {
1546         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1547     }
1548
1549     s->current_picture_ptr->f->pict_type = s->pict_type;
1550     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1551
1552     ff_mpeg_unref_picture(s, &s->current_picture);
1553     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1554                                    s->current_picture_ptr)) < 0)
1555         return ret;
1556
1557     if (s->pict_type != AV_PICTURE_TYPE_B) {
1558         s->last_picture_ptr = s->next_picture_ptr;
1559         if (!s->droppable)
1560             s->next_picture_ptr = s->current_picture_ptr;
1561     }
1562
1563     if (s->last_picture_ptr) {
1564         ff_mpeg_unref_picture(s, &s->last_picture);
1565         if (s->last_picture_ptr->f->buf[0] &&
1566             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1567                                        s->last_picture_ptr)) < 0)
1568             return ret;
1569     }
1570     if (s->next_picture_ptr) {
1571         ff_mpeg_unref_picture(s, &s->next_picture);
1572         if (s->next_picture_ptr->f->buf[0] &&
1573             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1574                                        s->next_picture_ptr)) < 0)
1575             return ret;
1576     }
1577
1578     if (s->picture_structure!= PICT_FRAME) {
1579         int i;
1580         for (i = 0; i < 4; i++) {
1581             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1582                 s->current_picture.f->data[i] +=
1583                     s->current_picture.f->linesize[i];
1584             }
1585             s->current_picture.f->linesize[i] *= 2;
1586             s->last_picture.f->linesize[i]    *= 2;
1587             s->next_picture.f->linesize[i]    *= 2;
1588         }
1589     }
1590
1591     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1592         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1593         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1594     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1595         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1596         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1597     } else {
1598         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1599         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1600     }
1601
1602     if (s->dct_error_sum) {
1603         av_assert2(s->avctx->noise_reduction && s->encoding);
1604         update_noise_reduction(s);
1605     }
1606
1607     return 0;
1608 }
1609
1610 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1611                           const AVFrame *pic_arg, int *got_packet)
1612 {
1613     MpegEncContext *s = avctx->priv_data;
1614     int i, stuffing_count, ret;
1615     int context_count = s->slice_context_count;
1616
1617     s->picture_in_gop_number++;
1618
1619     if (load_input_picture(s, pic_arg) < 0)
1620         return -1;
1621
1622     if (select_input_picture(s) < 0) {
1623         return -1;
1624     }
1625
1626     /* output? */
1627     if (s->new_picture.f->data[0]) {
1628         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1629             return ret;
1630         if (s->mb_info) {
1631             s->mb_info_ptr = av_packet_new_side_data(pkt,
1632                                  AV_PKT_DATA_H263_MB_INFO,
1633                                  s->mb_width*s->mb_height*12);
1634             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1635         }
1636
1637         for (i = 0; i < context_count; i++) {
1638             int start_y = s->thread_context[i]->start_mb_y;
1639             int   end_y = s->thread_context[i]->  end_mb_y;
1640             int h       = s->mb_height;
1641             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1642             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1643
1644             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1645         }
1646
1647         s->pict_type = s->new_picture.f->pict_type;
1648         //emms_c();
1649         ret = frame_start(s);
1650         if (ret < 0)
1651             return ret;
1652 vbv_retry:
1653         if (encode_picture(s, s->picture_number) < 0)
1654             return -1;
1655
1656         avctx->header_bits = s->header_bits;
1657         avctx->mv_bits     = s->mv_bits;
1658         avctx->misc_bits   = s->misc_bits;
1659         avctx->i_tex_bits  = s->i_tex_bits;
1660         avctx->p_tex_bits  = s->p_tex_bits;
1661         avctx->i_count     = s->i_count;
1662         // FIXME f/b_count in avctx
1663         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1664         avctx->skip_count  = s->skip_count;
1665
1666         frame_end(s);
1667
1668         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1669             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1670
1671         if (avctx->rc_buffer_size) {
1672             RateControlContext *rcc = &s->rc_context;
1673             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1674
1675             if (put_bits_count(&s->pb) > max_size &&
1676                 s->lambda < s->avctx->lmax) {
1677                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1678                                        (s->qscale + 1) / s->qscale);
1679                 if (s->adaptive_quant) {
1680                     int i;
1681                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1682                         s->lambda_table[i] =
1683                             FFMAX(s->lambda_table[i] + 1,
1684                                   s->lambda_table[i] * (s->qscale + 1) /
1685                                   s->qscale);
1686                 }
1687                 s->mb_skipped = 0;        // done in frame_start()
1688                 // done in encode_picture() so we must undo it
1689                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1690                     if (s->flipflop_rounding          ||
1691                         s->codec_id == AV_CODEC_ID_H263P ||
1692                         s->codec_id == AV_CODEC_ID_MPEG4)
1693                         s->no_rounding ^= 1;
1694                 }
1695                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1696                     s->time_base       = s->last_time_base;
1697                     s->last_non_b_time = s->time - s->pp_time;
1698                 }
1699                 for (i = 0; i < context_count; i++) {
1700                     PutBitContext *pb = &s->thread_context[i]->pb;
1701                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1702                 }
1703                 goto vbv_retry;
1704             }
1705
1706             av_assert0(s->avctx->rc_max_rate);
1707         }
1708
1709         if (s->flags & CODEC_FLAG_PASS1)
1710             ff_write_pass1_stats(s);
1711
1712         for (i = 0; i < 4; i++) {
1713             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1714             avctx->error[i] += s->current_picture_ptr->f->error[i];
1715         }
1716
1717         if (s->flags & CODEC_FLAG_PASS1)
1718             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1719                    avctx->i_tex_bits + avctx->p_tex_bits ==
1720                        put_bits_count(&s->pb));
1721         flush_put_bits(&s->pb);
1722         s->frame_bits  = put_bits_count(&s->pb);
1723
1724         stuffing_count = ff_vbv_update(s, s->frame_bits);
1725         s->stuffing_bits = 8*stuffing_count;
1726         if (stuffing_count) {
1727             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1728                     stuffing_count + 50) {
1729                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1730                 return -1;
1731             }
1732
1733             switch (s->codec_id) {
1734             case AV_CODEC_ID_MPEG1VIDEO:
1735             case AV_CODEC_ID_MPEG2VIDEO:
1736                 while (stuffing_count--) {
1737                     put_bits(&s->pb, 8, 0);
1738                 }
1739             break;
1740             case AV_CODEC_ID_MPEG4:
1741                 put_bits(&s->pb, 16, 0);
1742                 put_bits(&s->pb, 16, 0x1C3);
1743                 stuffing_count -= 4;
1744                 while (stuffing_count--) {
1745                     put_bits(&s->pb, 8, 0xFF);
1746                 }
1747             break;
1748             default:
1749                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1750             }
1751             flush_put_bits(&s->pb);
1752             s->frame_bits  = put_bits_count(&s->pb);
1753         }
1754
1755         /* update mpeg1/2 vbv_delay for CBR */
1756         if (s->avctx->rc_max_rate                          &&
1757             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1758             s->out_format == FMT_MPEG1                     &&
1759             90000LL * (avctx->rc_buffer_size - 1) <=
1760                 s->avctx->rc_max_rate * 0xFFFFLL) {
1761             int vbv_delay, min_delay;
1762             double inbits  = s->avctx->rc_max_rate *
1763                              av_q2d(s->avctx->time_base);
1764             int    minbits = s->frame_bits - 8 *
1765                              (s->vbv_delay_ptr - s->pb.buf - 1);
1766             double bits    = s->rc_context.buffer_index + minbits - inbits;
1767
1768             if (bits < 0)
1769                 av_log(s->avctx, AV_LOG_ERROR,
1770                        "Internal error, negative bits\n");
1771
1772             assert(s->repeat_first_field == 0);
1773
1774             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1775             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1776                         s->avctx->rc_max_rate;
1777
1778             vbv_delay = FFMAX(vbv_delay, min_delay);
1779
1780             av_assert0(vbv_delay < 0xFFFF);
1781
1782             s->vbv_delay_ptr[0] &= 0xF8;
1783             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1784             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1785             s->vbv_delay_ptr[2] &= 0x07;
1786             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1787             avctx->vbv_delay     = vbv_delay * 300;
1788         }
1789         s->total_bits     += s->frame_bits;
1790         avctx->frame_bits  = s->frame_bits;
1791
1792         pkt->pts = s->current_picture.f->pts;
1793         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1794             if (!s->current_picture.f->coded_picture_number)
1795                 pkt->dts = pkt->pts - s->dts_delta;
1796             else
1797                 pkt->dts = s->reordered_pts;
1798             s->reordered_pts = pkt->pts;
1799         } else
1800             pkt->dts = pkt->pts;
1801         if (s->current_picture.f->key_frame)
1802             pkt->flags |= AV_PKT_FLAG_KEY;
1803         if (s->mb_info)
1804             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1805     } else {
1806         s->frame_bits = 0;
1807     }
1808
1809     /* release non-reference frames */
1810     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1811         if (!s->picture[i].reference)
1812             ff_mpeg_unref_picture(s, &s->picture[i]);
1813     }
1814
1815     av_assert1((s->frame_bits & 7) == 0);
1816
1817     pkt->size = s->frame_bits / 8;
1818     *got_packet = !!pkt->size;
1819     return 0;
1820 }
1821
1822 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1823                                                 int n, int threshold)
1824 {
1825     static const char tab[64] = {
1826         3, 2, 2, 1, 1, 1, 1, 1,
1827         1, 1, 1, 1, 1, 1, 1, 1,
1828         1, 1, 1, 1, 1, 1, 1, 1,
1829         0, 0, 0, 0, 0, 0, 0, 0,
1830         0, 0, 0, 0, 0, 0, 0, 0,
1831         0, 0, 0, 0, 0, 0, 0, 0,
1832         0, 0, 0, 0, 0, 0, 0, 0,
1833         0, 0, 0, 0, 0, 0, 0, 0
1834     };
1835     int score = 0;
1836     int run = 0;
1837     int i;
1838     int16_t *block = s->block[n];
1839     const int last_index = s->block_last_index[n];
1840     int skip_dc;
1841
1842     if (threshold < 0) {
1843         skip_dc = 0;
1844         threshold = -threshold;
1845     } else
1846         skip_dc = 1;
1847
1848     /* Are all we could set to zero already zero? */
1849     if (last_index <= skip_dc - 1)
1850         return;
1851
1852     for (i = 0; i <= last_index; i++) {
1853         const int j = s->intra_scantable.permutated[i];
1854         const int level = FFABS(block[j]);
1855         if (level == 1) {
1856             if (skip_dc && i == 0)
1857                 continue;
1858             score += tab[run];
1859             run = 0;
1860         } else if (level > 1) {
1861             return;
1862         } else {
1863             run++;
1864         }
1865     }
1866     if (score >= threshold)
1867         return;
1868     for (i = skip_dc; i <= last_index; i++) {
1869         const int j = s->intra_scantable.permutated[i];
1870         block[j] = 0;
1871     }
1872     if (block[0])
1873         s->block_last_index[n] = 0;
1874     else
1875         s->block_last_index[n] = -1;
1876 }
1877
1878 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1879                                int last_index)
1880 {
1881     int i;
1882     const int maxlevel = s->max_qcoeff;
1883     const int minlevel = s->min_qcoeff;
1884     int overflow = 0;
1885
1886     if (s->mb_intra) {
1887         i = 1; // skip clipping of intra dc
1888     } else
1889         i = 0;
1890
1891     for (; i <= last_index; i++) {
1892         const int j = s->intra_scantable.permutated[i];
1893         int level = block[j];
1894
1895         if (level > maxlevel) {
1896             level = maxlevel;
1897             overflow++;
1898         } else if (level < minlevel) {
1899             level = minlevel;
1900             overflow++;
1901         }
1902
1903         block[j] = level;
1904     }
1905
1906     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1907         av_log(s->avctx, AV_LOG_INFO,
1908                "warning, clipping %d dct coefficients to %d..%d\n",
1909                overflow, minlevel, maxlevel);
1910 }
1911
1912 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1913 {
1914     int x, y;
1915     // FIXME optimize
1916     for (y = 0; y < 8; y++) {
1917         for (x = 0; x < 8; x++) {
1918             int x2, y2;
1919             int sum = 0;
1920             int sqr = 0;
1921             int count = 0;
1922
1923             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1924                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1925                     int v = ptr[x2 + y2 * stride];
1926                     sum += v;
1927                     sqr += v * v;
1928                     count++;
1929                 }
1930             }
1931             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1932         }
1933     }
1934 }
1935
1936 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1937                                                 int motion_x, int motion_y,
1938                                                 int mb_block_height,
1939                                                 int mb_block_width,
1940                                                 int mb_block_count)
1941 {
1942     int16_t weight[12][64];
1943     int16_t orig[12][64];
1944     const int mb_x = s->mb_x;
1945     const int mb_y = s->mb_y;
1946     int i;
1947     int skip_dct[12];
1948     int dct_offset = s->linesize * 8; // default for progressive frames
1949     int uv_dct_offset = s->uvlinesize * 8;
1950     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1951     ptrdiff_t wrap_y, wrap_c;
1952
1953     for (i = 0; i < mb_block_count; i++)
1954         skip_dct[i] = s->skipdct;
1955
1956     if (s->adaptive_quant) {
1957         const int last_qp = s->qscale;
1958         const int mb_xy = mb_x + mb_y * s->mb_stride;
1959
1960         s->lambda = s->lambda_table[mb_xy];
1961         update_qscale(s);
1962
1963         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1964             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1965             s->dquant = s->qscale - last_qp;
1966
1967             if (s->out_format == FMT_H263) {
1968                 s->dquant = av_clip(s->dquant, -2, 2);
1969
1970                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1971                     if (!s->mb_intra) {
1972                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1973                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1974                                 s->dquant = 0;
1975                         }
1976                         if (s->mv_type == MV_TYPE_8X8)
1977                             s->dquant = 0;
1978                     }
1979                 }
1980             }
1981         }
1982         ff_set_qscale(s, last_qp + s->dquant);
1983     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1984         ff_set_qscale(s, s->qscale + s->dquant);
1985
1986     wrap_y = s->linesize;
1987     wrap_c = s->uvlinesize;
1988     ptr_y  = s->new_picture.f->data[0] +
1989              (mb_y * 16 * wrap_y)              + mb_x * 16;
1990     ptr_cb = s->new_picture.f->data[1] +
1991              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1992     ptr_cr = s->new_picture.f->data[2] +
1993              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1994
1995     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1996         uint8_t *ebuf = s->edge_emu_buffer + 32;
1997         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
1998         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
1999         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2000                                  wrap_y, wrap_y,
2001                                  16, 16, mb_x * 16, mb_y * 16,
2002                                  s->width, s->height);
2003         ptr_y = ebuf;
2004         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2005                                  wrap_c, wrap_c,
2006                                  mb_block_width, mb_block_height,
2007                                  mb_x * mb_block_width, mb_y * mb_block_height,
2008                                  cw, ch);
2009         ptr_cb = ebuf + 18 * wrap_y;
2010         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr,
2011                                  wrap_c, wrap_c,
2012                                  mb_block_width, mb_block_height,
2013                                  mb_x * mb_block_width, mb_y * mb_block_height,
2014                                  cw, ch);
2015         ptr_cr = ebuf + 18 * wrap_y + 16;
2016     }
2017
2018     if (s->mb_intra) {
2019         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2020             int progressive_score, interlaced_score;
2021
2022             s->interlaced_dct = 0;
2023             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2024                                                     NULL, wrap_y, 8) +
2025                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2026                                                     NULL, wrap_y, 8) - 400;
2027
2028             if (progressive_score > 0) {
2029                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2030                                                        NULL, wrap_y * 2, 8) +
2031                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2032                                                        NULL, wrap_y * 2, 8);
2033                 if (progressive_score > interlaced_score) {
2034                     s->interlaced_dct = 1;
2035
2036                     dct_offset = wrap_y;
2037                     uv_dct_offset = wrap_c;
2038                     wrap_y <<= 1;
2039                     if (s->chroma_format == CHROMA_422 ||
2040                         s->chroma_format == CHROMA_444)
2041                         wrap_c <<= 1;
2042                 }
2043             }
2044         }
2045
2046         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2047         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2048         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2049         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2050
2051         if (s->flags & CODEC_FLAG_GRAY) {
2052             skip_dct[4] = 1;
2053             skip_dct[5] = 1;
2054         } else {
2055             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2056             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2057             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2058                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2059                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2060             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2061                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2062                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2063                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2064                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2065                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2066                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2067             }
2068         }
2069     } else {
2070         op_pixels_func (*op_pix)[4];
2071         qpel_mc_func (*op_qpix)[16];
2072         uint8_t *dest_y, *dest_cb, *dest_cr;
2073
2074         dest_y  = s->dest[0];
2075         dest_cb = s->dest[1];
2076         dest_cr = s->dest[2];
2077
2078         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2079             op_pix  = s->hdsp.put_pixels_tab;
2080             op_qpix = s->dsp.put_qpel_pixels_tab;
2081         } else {
2082             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2083             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
2084         }
2085
2086         if (s->mv_dir & MV_DIR_FORWARD) {
2087             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2088                           s->last_picture.f->data,
2089                           op_pix, op_qpix);
2090             op_pix  = s->hdsp.avg_pixels_tab;
2091             op_qpix = s->dsp.avg_qpel_pixels_tab;
2092         }
2093         if (s->mv_dir & MV_DIR_BACKWARD) {
2094             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2095                           s->next_picture.f->data,
2096                           op_pix, op_qpix);
2097         }
2098
2099         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2100             int progressive_score, interlaced_score;
2101
2102             s->interlaced_dct = 0;
2103             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2104                                                     ptr_y,              wrap_y,
2105                                                     8) +
2106                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2107                                                     ptr_y + wrap_y * 8, wrap_y,
2108                                                     8) - 400;
2109
2110             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2111                 progressive_score -= 400;
2112
2113             if (progressive_score > 0) {
2114                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2115                                                        ptr_y,
2116                                                        wrap_y * 2, 8) +
2117                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2118                                                        ptr_y + wrap_y,
2119                                                        wrap_y * 2, 8);
2120
2121                 if (progressive_score > interlaced_score) {
2122                     s->interlaced_dct = 1;
2123
2124                     dct_offset = wrap_y;
2125                     uv_dct_offset = wrap_c;
2126                     wrap_y <<= 1;
2127                     if (s->chroma_format == CHROMA_422)
2128                         wrap_c <<= 1;
2129                 }
2130             }
2131         }
2132
2133         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2134         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2135         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2136                            dest_y + dct_offset, wrap_y);
2137         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2138                            dest_y + dct_offset + 8, wrap_y);
2139
2140         if (s->flags & CODEC_FLAG_GRAY) {
2141             skip_dct[4] = 1;
2142             skip_dct[5] = 1;
2143         } else {
2144             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2145             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2146             if (!s->chroma_y_shift) { /* 422 */
2147                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2148                                    dest_cb + uv_dct_offset, wrap_c);
2149                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2150                                    dest_cr + uv_dct_offset, wrap_c);
2151             }
2152         }
2153         /* pre quantization */
2154         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2155                 2 * s->qscale * s->qscale) {
2156             // FIXME optimize
2157             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2158                               wrap_y, 8) < 20 * s->qscale)
2159                 skip_dct[0] = 1;
2160             if (s->dsp.sad[1](NULL, ptr_y + 8,
2161                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2162                 skip_dct[1] = 1;
2163             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2164                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2165                 skip_dct[2] = 1;
2166             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2167                               dest_y + dct_offset + 8,
2168                               wrap_y, 8) < 20 * s->qscale)
2169                 skip_dct[3] = 1;
2170             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2171                               wrap_c, 8) < 20 * s->qscale)
2172                 skip_dct[4] = 1;
2173             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2174                               wrap_c, 8) < 20 * s->qscale)
2175                 skip_dct[5] = 1;
2176             if (!s->chroma_y_shift) { /* 422 */
2177                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2178                                   dest_cb + uv_dct_offset,
2179                                   wrap_c, 8) < 20 * s->qscale)
2180                     skip_dct[6] = 1;
2181                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2182                                   dest_cr + uv_dct_offset,
2183                                   wrap_c, 8) < 20 * s->qscale)
2184                     skip_dct[7] = 1;
2185             }
2186         }
2187     }
2188
2189     if (s->quantizer_noise_shaping) {
2190         if (!skip_dct[0])
2191             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2192         if (!skip_dct[1])
2193             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2194         if (!skip_dct[2])
2195             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2196         if (!skip_dct[3])
2197             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2198         if (!skip_dct[4])
2199             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2200         if (!skip_dct[5])
2201             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2202         if (!s->chroma_y_shift) { /* 422 */
2203             if (!skip_dct[6])
2204                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2205                                   wrap_c);
2206             if (!skip_dct[7])
2207                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2208                                   wrap_c);
2209         }
2210         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2211     }
2212
2213     /* DCT & quantize */
2214     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2215     {
2216         for (i = 0; i < mb_block_count; i++) {
2217             if (!skip_dct[i]) {
2218                 int overflow;
2219                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2220                 // FIXME we could decide to change to quantizer instead of
2221                 // clipping
2222                 // JS: I don't think that would be a good idea it could lower
2223                 //     quality instead of improve it. Just INTRADC clipping
2224                 //     deserves changes in quantizer
2225                 if (overflow)
2226                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2227             } else
2228                 s->block_last_index[i] = -1;
2229         }
2230         if (s->quantizer_noise_shaping) {
2231             for (i = 0; i < mb_block_count; i++) {
2232                 if (!skip_dct[i]) {
2233                     s->block_last_index[i] =
2234                         dct_quantize_refine(s, s->block[i], weight[i],
2235                                             orig[i], i, s->qscale);
2236                 }
2237             }
2238         }
2239
2240         if (s->luma_elim_threshold && !s->mb_intra)
2241             for (i = 0; i < 4; i++)
2242                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2243         if (s->chroma_elim_threshold && !s->mb_intra)
2244             for (i = 4; i < mb_block_count; i++)
2245                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2246
2247         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2248             for (i = 0; i < mb_block_count; i++) {
2249                 if (s->block_last_index[i] == -1)
2250                     s->coded_score[i] = INT_MAX / 256;
2251             }
2252         }
2253     }
2254
2255     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2256         s->block_last_index[4] =
2257         s->block_last_index[5] = 0;
2258         s->block[4][0] =
2259         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2260         if (!s->chroma_y_shift) { /* 422 / 444 */
2261             for (i=6; i<12; i++) {
2262                 s->block_last_index[i] = 0;
2263                 s->block[i][0] = s->block[4][0];
2264             }
2265         }
2266     }
2267
2268     // non c quantize code returns incorrect block_last_index FIXME
2269     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2270         for (i = 0; i < mb_block_count; i++) {
2271             int j;
2272             if (s->block_last_index[i] > 0) {
2273                 for (j = 63; j > 0; j--) {
2274                     if (s->block[i][s->intra_scantable.permutated[j]])
2275                         break;
2276                 }
2277                 s->block_last_index[i] = j;
2278             }
2279         }
2280     }
2281
2282     /* huffman encode */
2283     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2284     case AV_CODEC_ID_MPEG1VIDEO:
2285     case AV_CODEC_ID_MPEG2VIDEO:
2286         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2287             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2288         break;
2289     case AV_CODEC_ID_MPEG4:
2290         if (CONFIG_MPEG4_ENCODER)
2291             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2292         break;
2293     case AV_CODEC_ID_MSMPEG4V2:
2294     case AV_CODEC_ID_MSMPEG4V3:
2295     case AV_CODEC_ID_WMV1:
2296         if (CONFIG_MSMPEG4_ENCODER)
2297             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2298         break;
2299     case AV_CODEC_ID_WMV2:
2300         if (CONFIG_WMV2_ENCODER)
2301             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2302         break;
2303     case AV_CODEC_ID_H261:
2304         if (CONFIG_H261_ENCODER)
2305             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2306         break;
2307     case AV_CODEC_ID_H263:
2308     case AV_CODEC_ID_H263P:
2309     case AV_CODEC_ID_FLV1:
2310     case AV_CODEC_ID_RV10:
2311     case AV_CODEC_ID_RV20:
2312         if (CONFIG_H263_ENCODER)
2313             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2314         break;
2315     case AV_CODEC_ID_MJPEG:
2316     case AV_CODEC_ID_AMV:
2317         if (CONFIG_MJPEG_ENCODER)
2318             ff_mjpeg_encode_mb(s, s->block);
2319         break;
2320     default:
2321         av_assert1(0);
2322     }
2323 }
2324
2325 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2326 {
2327     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2328     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2329     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2330 }
2331
2332 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2333     int i;
2334
2335     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2336
2337     /* mpeg1 */
2338     d->mb_skip_run= s->mb_skip_run;
2339     for(i=0; i<3; i++)
2340         d->last_dc[i] = s->last_dc[i];
2341
2342     /* statistics */
2343     d->mv_bits= s->mv_bits;
2344     d->i_tex_bits= s->i_tex_bits;
2345     d->p_tex_bits= s->p_tex_bits;
2346     d->i_count= s->i_count;
2347     d->f_count= s->f_count;
2348     d->b_count= s->b_count;
2349     d->skip_count= s->skip_count;
2350     d->misc_bits= s->misc_bits;
2351     d->last_bits= 0;
2352
2353     d->mb_skipped= 0;
2354     d->qscale= s->qscale;
2355     d->dquant= s->dquant;
2356
2357     d->esc3_level_length= s->esc3_level_length;
2358 }
2359
2360 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2361     int i;
2362
2363     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2364     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2365
2366     /* mpeg1 */
2367     d->mb_skip_run= s->mb_skip_run;
2368     for(i=0; i<3; i++)
2369         d->last_dc[i] = s->last_dc[i];
2370
2371     /* statistics */
2372     d->mv_bits= s->mv_bits;
2373     d->i_tex_bits= s->i_tex_bits;
2374     d->p_tex_bits= s->p_tex_bits;
2375     d->i_count= s->i_count;
2376     d->f_count= s->f_count;
2377     d->b_count= s->b_count;
2378     d->skip_count= s->skip_count;
2379     d->misc_bits= s->misc_bits;
2380
2381     d->mb_intra= s->mb_intra;
2382     d->mb_skipped= s->mb_skipped;
2383     d->mv_type= s->mv_type;
2384     d->mv_dir= s->mv_dir;
2385     d->pb= s->pb;
2386     if(s->data_partitioning){
2387         d->pb2= s->pb2;
2388         d->tex_pb= s->tex_pb;
2389     }
2390     d->block= s->block;
2391     for(i=0; i<8; i++)
2392         d->block_last_index[i]= s->block_last_index[i];
2393     d->interlaced_dct= s->interlaced_dct;
2394     d->qscale= s->qscale;
2395
2396     d->esc3_level_length= s->esc3_level_length;
2397 }
2398
2399 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2400                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2401                            int *dmin, int *next_block, int motion_x, int motion_y)
2402 {
2403     int score;
2404     uint8_t *dest_backup[3];
2405
2406     copy_context_before_encode(s, backup, type);
2407
2408     s->block= s->blocks[*next_block];
2409     s->pb= pb[*next_block];
2410     if(s->data_partitioning){
2411         s->pb2   = pb2   [*next_block];
2412         s->tex_pb= tex_pb[*next_block];
2413     }
2414
2415     if(*next_block){
2416         memcpy(dest_backup, s->dest, sizeof(s->dest));
2417         s->dest[0] = s->rd_scratchpad;
2418         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2419         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2420         av_assert0(s->linesize >= 32); //FIXME
2421     }
2422
2423     encode_mb(s, motion_x, motion_y);
2424
2425     score= put_bits_count(&s->pb);
2426     if(s->data_partitioning){
2427         score+= put_bits_count(&s->pb2);
2428         score+= put_bits_count(&s->tex_pb);
2429     }
2430
2431     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2432         ff_MPV_decode_mb(s, s->block);
2433
2434         score *= s->lambda2;
2435         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2436     }
2437
2438     if(*next_block){
2439         memcpy(s->dest, dest_backup, sizeof(s->dest));
2440     }
2441
2442     if(score<*dmin){
2443         *dmin= score;
2444         *next_block^=1;
2445
2446         copy_context_after_encode(best, s, type);
2447     }
2448 }
2449
2450 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2451     uint32_t *sq = ff_square_tab + 256;
2452     int acc=0;
2453     int x,y;
2454
2455     if(w==16 && h==16)
2456         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2457     else if(w==8 && h==8)
2458         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2459
2460     for(y=0; y<h; y++){
2461         for(x=0; x<w; x++){
2462             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2463         }
2464     }
2465
2466     av_assert2(acc>=0);
2467
2468     return acc;
2469 }
2470
2471 static int sse_mb(MpegEncContext *s){
2472     int w= 16;
2473     int h= 16;
2474
2475     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2476     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2477
2478     if(w==16 && h==16)
2479       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2480         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2481                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2482                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2483       }else{
2484         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2485                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2486                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2487       }
2488     else
2489         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2490                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2491                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2492 }
2493
2494 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2495     MpegEncContext *s= *(void**)arg;
2496
2497
2498     s->me.pre_pass=1;
2499     s->me.dia_size= s->avctx->pre_dia_size;
2500     s->first_slice_line=1;
2501     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2502         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2503             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2504         }
2505         s->first_slice_line=0;
2506     }
2507
2508     s->me.pre_pass=0;
2509
2510     return 0;
2511 }
2512
2513 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2514     MpegEncContext *s= *(void**)arg;
2515
2516     ff_check_alignment();
2517
2518     s->me.dia_size= s->avctx->dia_size;
2519     s->first_slice_line=1;
2520     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2521         s->mb_x=0; //for block init below
2522         ff_init_block_index(s);
2523         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2524             s->block_index[0]+=2;
2525             s->block_index[1]+=2;
2526             s->block_index[2]+=2;
2527             s->block_index[3]+=2;
2528
2529             /* compute motion vector & mb_type and store in context */
2530             if(s->pict_type==AV_PICTURE_TYPE_B)
2531                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2532             else
2533                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2534         }
2535         s->first_slice_line=0;
2536     }
2537     return 0;
2538 }
2539
2540 static int mb_var_thread(AVCodecContext *c, void *arg){
2541     MpegEncContext *s= *(void**)arg;
2542     int mb_x, mb_y;
2543
2544     ff_check_alignment();
2545
2546     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2547         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2548             int xx = mb_x * 16;
2549             int yy = mb_y * 16;
2550             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2551             int varc;
2552             int sum = s->dsp.pix_sum(pix, s->linesize);
2553
2554             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2555
2556             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2557             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2558             s->me.mb_var_sum_temp    += varc;
2559         }
2560     }
2561     return 0;
2562 }
2563
2564 static void write_slice_end(MpegEncContext *s){
2565     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2566         if(s->partitioned_frame){
2567             ff_mpeg4_merge_partitions(s);
2568         }
2569
2570         ff_mpeg4_stuffing(&s->pb);
2571     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2572         ff_mjpeg_encode_stuffing(s);
2573     }
2574
2575     avpriv_align_put_bits(&s->pb);
2576     flush_put_bits(&s->pb);
2577
2578     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2579         s->misc_bits+= get_bits_diff(s);
2580 }
2581
2582 static void write_mb_info(MpegEncContext *s)
2583 {
2584     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2585     int offset = put_bits_count(&s->pb);
2586     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2587     int gobn = s->mb_y / s->gob_index;
2588     int pred_x, pred_y;
2589     if (CONFIG_H263_ENCODER)
2590         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2591     bytestream_put_le32(&ptr, offset);
2592     bytestream_put_byte(&ptr, s->qscale);
2593     bytestream_put_byte(&ptr, gobn);
2594     bytestream_put_le16(&ptr, mba);
2595     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2596     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2597     /* 4MV not implemented */
2598     bytestream_put_byte(&ptr, 0); /* hmv2 */
2599     bytestream_put_byte(&ptr, 0); /* vmv2 */
2600 }
2601
2602 static void update_mb_info(MpegEncContext *s, int startcode)
2603 {
2604     if (!s->mb_info)
2605         return;
2606     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2607         s->mb_info_size += 12;
2608         s->prev_mb_info = s->last_mb_info;
2609     }
2610     if (startcode) {
2611         s->prev_mb_info = put_bits_count(&s->pb)/8;
2612         /* This might have incremented mb_info_size above, and we return without
2613          * actually writing any info into that slot yet. But in that case,
2614          * this will be called again at the start of the after writing the
2615          * start code, actually writing the mb info. */
2616         return;
2617     }
2618
2619     s->last_mb_info = put_bits_count(&s->pb)/8;
2620     if (!s->mb_info_size)
2621         s->mb_info_size += 12;
2622     write_mb_info(s);
2623 }
2624
2625 static int encode_thread(AVCodecContext *c, void *arg){
2626     MpegEncContext *s= *(void**)arg;
2627     int mb_x, mb_y, pdif = 0;
2628     int chr_h= 16>>s->chroma_y_shift;
2629     int i, j;
2630     MpegEncContext best_s, backup_s;
2631     uint8_t bit_buf[2][MAX_MB_BYTES];
2632     uint8_t bit_buf2[2][MAX_MB_BYTES];
2633     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2634     PutBitContext pb[2], pb2[2], tex_pb[2];
2635
2636     ff_check_alignment();
2637
2638     for(i=0; i<2; i++){
2639         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2640         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2641         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2642     }
2643
2644     s->last_bits= put_bits_count(&s->pb);
2645     s->mv_bits=0;
2646     s->misc_bits=0;
2647     s->i_tex_bits=0;
2648     s->p_tex_bits=0;
2649     s->i_count=0;
2650     s->f_count=0;
2651     s->b_count=0;
2652     s->skip_count=0;
2653
2654     for(i=0; i<3; i++){
2655         /* init last dc values */
2656         /* note: quant matrix value (8) is implied here */
2657         s->last_dc[i] = 128 << s->intra_dc_precision;
2658
2659         s->current_picture.f->error[i] = 0;
2660     }
2661     if(s->codec_id==AV_CODEC_ID_AMV){
2662         s->last_dc[0] = 128*8/13;
2663         s->last_dc[1] = 128*8/14;
2664         s->last_dc[2] = 128*8/14;
2665     }
2666     s->mb_skip_run = 0;
2667     memset(s->last_mv, 0, sizeof(s->last_mv));
2668
2669     s->last_mv_dir = 0;
2670
2671     switch(s->codec_id){
2672     case AV_CODEC_ID_H263:
2673     case AV_CODEC_ID_H263P:
2674     case AV_CODEC_ID_FLV1:
2675         if (CONFIG_H263_ENCODER)
2676             s->gob_index = ff_h263_get_gob_height(s);
2677         break;
2678     case AV_CODEC_ID_MPEG4:
2679         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2680             ff_mpeg4_init_partitions(s);
2681         break;
2682     }
2683
2684     s->resync_mb_x=0;
2685     s->resync_mb_y=0;
2686     s->first_slice_line = 1;
2687     s->ptr_lastgob = s->pb.buf;
2688     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2689         s->mb_x=0;
2690         s->mb_y= mb_y;
2691
2692         ff_set_qscale(s, s->qscale);
2693         ff_init_block_index(s);
2694
2695         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2696             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2697             int mb_type= s->mb_type[xy];
2698 //            int d;
2699             int dmin= INT_MAX;
2700             int dir;
2701
2702             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2703                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2704                 return -1;
2705             }
2706             if(s->data_partitioning){
2707                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2708                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2709                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2710                     return -1;
2711                 }
2712             }
2713
2714             s->mb_x = mb_x;
2715             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2716             ff_update_block_index(s);
2717
2718             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2719                 ff_h261_reorder_mb_index(s);
2720                 xy= s->mb_y*s->mb_stride + s->mb_x;
2721                 mb_type= s->mb_type[xy];
2722             }
2723
2724             /* write gob / video packet header  */
2725             if(s->rtp_mode){
2726                 int current_packet_size, is_gob_start;
2727
2728                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2729
2730                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2731
2732                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2733
2734                 switch(s->codec_id){
2735                 case AV_CODEC_ID_H263:
2736                 case AV_CODEC_ID_H263P:
2737                     if(!s->h263_slice_structured)
2738                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2739                     break;
2740                 case AV_CODEC_ID_MPEG2VIDEO:
2741                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2742                 case AV_CODEC_ID_MPEG1VIDEO:
2743                     if(s->mb_skip_run) is_gob_start=0;
2744                     break;
2745                 case AV_CODEC_ID_MJPEG:
2746                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2747                     break;
2748                 }
2749
2750                 if(is_gob_start){
2751                     if(s->start_mb_y != mb_y || mb_x!=0){
2752                         write_slice_end(s);
2753
2754                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2755                             ff_mpeg4_init_partitions(s);
2756                         }
2757                     }
2758
2759                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2760                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2761
2762                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2763                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2764                         int d = 100 / s->error_rate;
2765                         if(r % d == 0){
2766                             current_packet_size=0;
2767                             s->pb.buf_ptr= s->ptr_lastgob;
2768                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2769                         }
2770                     }
2771
2772                     if (s->avctx->rtp_callback){
2773                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2774                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2775                     }
2776                     update_mb_info(s, 1);
2777
2778                     switch(s->codec_id){
2779                     case AV_CODEC_ID_MPEG4:
2780                         if (CONFIG_MPEG4_ENCODER) {
2781                             ff_mpeg4_encode_video_packet_header(s);
2782                             ff_mpeg4_clean_buffers(s);
2783                         }
2784                     break;
2785                     case AV_CODEC_ID_MPEG1VIDEO:
2786                     case AV_CODEC_ID_MPEG2VIDEO:
2787                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2788                             ff_mpeg1_encode_slice_header(s);
2789                             ff_mpeg1_clean_buffers(s);
2790                         }
2791                     break;
2792                     case AV_CODEC_ID_H263:
2793                     case AV_CODEC_ID_H263P:
2794                         if (CONFIG_H263_ENCODER)
2795                             ff_h263_encode_gob_header(s, mb_y);
2796                     break;
2797                     }
2798
2799                     if(s->flags&CODEC_FLAG_PASS1){
2800                         int bits= put_bits_count(&s->pb);
2801                         s->misc_bits+= bits - s->last_bits;
2802                         s->last_bits= bits;
2803                     }
2804
2805                     s->ptr_lastgob += current_packet_size;
2806                     s->first_slice_line=1;
2807                     s->resync_mb_x=mb_x;
2808                     s->resync_mb_y=mb_y;
2809                 }
2810             }
2811
2812             if(  (s->resync_mb_x   == s->mb_x)
2813                && s->resync_mb_y+1 == s->mb_y){
2814                 s->first_slice_line=0;
2815             }
2816
2817             s->mb_skipped=0;
2818             s->dquant=0; //only for QP_RD
2819
2820             update_mb_info(s, 0);
2821
2822             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2823                 int next_block=0;
2824                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2825
2826                 copy_context_before_encode(&backup_s, s, -1);
2827                 backup_s.pb= s->pb;
2828                 best_s.data_partitioning= s->data_partitioning;
2829                 best_s.partitioned_frame= s->partitioned_frame;
2830                 if(s->data_partitioning){
2831                     backup_s.pb2= s->pb2;
2832                     backup_s.tex_pb= s->tex_pb;
2833                 }
2834
2835                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2836                     s->mv_dir = MV_DIR_FORWARD;
2837                     s->mv_type = MV_TYPE_16X16;
2838                     s->mb_intra= 0;
2839                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2840                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2841                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2842                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2843                 }
2844                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2845                     s->mv_dir = MV_DIR_FORWARD;
2846                     s->mv_type = MV_TYPE_FIELD;
2847                     s->mb_intra= 0;
2848                     for(i=0; i<2; i++){
2849                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2850                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2851                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2852                     }
2853                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2854                                  &dmin, &next_block, 0, 0);
2855                 }
2856                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2857                     s->mv_dir = MV_DIR_FORWARD;
2858                     s->mv_type = MV_TYPE_16X16;
2859                     s->mb_intra= 0;
2860                     s->mv[0][0][0] = 0;
2861                     s->mv[0][0][1] = 0;
2862                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2863                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2864                 }
2865                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2866                     s->mv_dir = MV_DIR_FORWARD;
2867                     s->mv_type = MV_TYPE_8X8;
2868                     s->mb_intra= 0;
2869                     for(i=0; i<4; i++){
2870                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2871                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2872                     }
2873                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2874                                  &dmin, &next_block, 0, 0);
2875                 }
2876                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2877                     s->mv_dir = MV_DIR_FORWARD;
2878                     s->mv_type = MV_TYPE_16X16;
2879                     s->mb_intra= 0;
2880                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2881                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2882                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2883                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2884                 }
2885                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2886                     s->mv_dir = MV_DIR_BACKWARD;
2887                     s->mv_type = MV_TYPE_16X16;
2888                     s->mb_intra= 0;
2889                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2890                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2891                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2892                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2893                 }
2894                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2895                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2896                     s->mv_type = MV_TYPE_16X16;
2897                     s->mb_intra= 0;
2898                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2899                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2900                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2901                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2902                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2903                                  &dmin, &next_block, 0, 0);
2904                 }
2905                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2906                     s->mv_dir = MV_DIR_FORWARD;
2907                     s->mv_type = MV_TYPE_FIELD;
2908                     s->mb_intra= 0;
2909                     for(i=0; i<2; i++){
2910                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2911                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2912                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2913                     }
2914                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2915                                  &dmin, &next_block, 0, 0);
2916                 }
2917                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2918                     s->mv_dir = MV_DIR_BACKWARD;
2919                     s->mv_type = MV_TYPE_FIELD;
2920                     s->mb_intra= 0;
2921                     for(i=0; i<2; i++){
2922                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2923                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2924                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2925                     }
2926                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2927                                  &dmin, &next_block, 0, 0);
2928                 }
2929                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2930                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2931                     s->mv_type = MV_TYPE_FIELD;
2932                     s->mb_intra= 0;
2933                     for(dir=0; dir<2; dir++){
2934                         for(i=0; i<2; i++){
2935                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2936                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2937                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2938                         }
2939                     }
2940                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2941                                  &dmin, &next_block, 0, 0);
2942                 }
2943                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2944                     s->mv_dir = 0;
2945                     s->mv_type = MV_TYPE_16X16;
2946                     s->mb_intra= 1;
2947                     s->mv[0][0][0] = 0;
2948                     s->mv[0][0][1] = 0;
2949                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2950                                  &dmin, &next_block, 0, 0);
2951                     if(s->h263_pred || s->h263_aic){
2952                         if(best_s.mb_intra)
2953                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2954                         else
2955                             ff_clean_intra_table_entries(s); //old mode?
2956                     }
2957                 }
2958
2959                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2960                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2961                         const int last_qp= backup_s.qscale;
2962                         int qpi, qp, dc[6];
2963                         int16_t ac[6][16];
2964                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2965                         static const int dquant_tab[4]={-1,1,-2,2};
2966                         int storecoefs = s->mb_intra && s->dc_val[0];
2967
2968                         av_assert2(backup_s.dquant == 0);
2969
2970                         //FIXME intra
2971                         s->mv_dir= best_s.mv_dir;
2972                         s->mv_type = MV_TYPE_16X16;
2973                         s->mb_intra= best_s.mb_intra;
2974                         s->mv[0][0][0] = best_s.mv[0][0][0];
2975                         s->mv[0][0][1] = best_s.mv[0][0][1];
2976                         s->mv[1][0][0] = best_s.mv[1][0][0];
2977                         s->mv[1][0][1] = best_s.mv[1][0][1];
2978
2979                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2980                         for(; qpi<4; qpi++){
2981                             int dquant= dquant_tab[qpi];
2982                             qp= last_qp + dquant;
2983                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2984                                 continue;
2985                             backup_s.dquant= dquant;
2986                             if(storecoefs){
2987                                 for(i=0; i<6; i++){
2988                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2989                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2990                                 }
2991                             }
2992
2993                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2994                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2995                             if(best_s.qscale != qp){
2996                                 if(storecoefs){
2997                                     for(i=0; i<6; i++){
2998                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2999                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3000                                     }
3001                                 }
3002                             }
3003                         }
3004                     }
3005                 }
3006                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3007                     int mx= s->b_direct_mv_table[xy][0];
3008                     int my= s->b_direct_mv_table[xy][1];
3009
3010                     backup_s.dquant = 0;
3011                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3012                     s->mb_intra= 0;
3013                     ff_mpeg4_set_direct_mv(s, mx, my);
3014                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3015                                  &dmin, &next_block, mx, my);
3016                 }
3017                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3018                     backup_s.dquant = 0;
3019                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3020                     s->mb_intra= 0;
3021                     ff_mpeg4_set_direct_mv(s, 0, 0);
3022                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3023                                  &dmin, &next_block, 0, 0);
3024                 }
3025                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3026                     int coded=0;
3027                     for(i=0; i<6; i++)
3028                         coded |= s->block_last_index[i];
3029                     if(coded){
3030                         int mx,my;
3031                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3032                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3033                             mx=my=0; //FIXME find the one we actually used
3034                             ff_mpeg4_set_direct_mv(s, mx, my);
3035                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3036                             mx= s->mv[1][0][0];
3037                             my= s->mv[1][0][1];
3038                         }else{
3039                             mx= s->mv[0][0][0];
3040                             my= s->mv[0][0][1];
3041                         }
3042
3043                         s->mv_dir= best_s.mv_dir;
3044                         s->mv_type = best_s.mv_type;
3045                         s->mb_intra= 0;
3046 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3047                         s->mv[0][0][1] = best_s.mv[0][0][1];
3048                         s->mv[1][0][0] = best_s.mv[1][0][0];
3049                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3050                         backup_s.dquant= 0;
3051                         s->skipdct=1;
3052                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3053                                         &dmin, &next_block, mx, my);
3054                         s->skipdct=0;
3055                     }
3056                 }
3057
3058                 s->current_picture.qscale_table[xy] = best_s.qscale;
3059
3060                 copy_context_after_encode(s, &best_s, -1);
3061
3062                 pb_bits_count= put_bits_count(&s->pb);
3063                 flush_put_bits(&s->pb);
3064                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3065                 s->pb= backup_s.pb;
3066
3067                 if(s->data_partitioning){
3068                     pb2_bits_count= put_bits_count(&s->pb2);
3069                     flush_put_bits(&s->pb2);
3070                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3071                     s->pb2= backup_s.pb2;
3072
3073                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3074                     flush_put_bits(&s->tex_pb);
3075                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3076                     s->tex_pb= backup_s.tex_pb;
3077                 }
3078                 s->last_bits= put_bits_count(&s->pb);
3079
3080                 if (CONFIG_H263_ENCODER &&
3081                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3082                     ff_h263_update_motion_val(s);
3083
3084                 if(next_block==0){ //FIXME 16 vs linesize16
3085                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3086                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3087                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3088                 }
3089
3090                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3091                     ff_MPV_decode_mb(s, s->block);
3092             } else {
3093                 int motion_x = 0, motion_y = 0;
3094                 s->mv_type=MV_TYPE_16X16;
3095                 // only one MB-Type possible
3096
3097                 switch(mb_type){
3098                 case CANDIDATE_MB_TYPE_INTRA:
3099                     s->mv_dir = 0;
3100                     s->mb_intra= 1;
3101                     motion_x= s->mv[0][0][0] = 0;
3102                     motion_y= s->mv[0][0][1] = 0;
3103                     break;
3104                 case CANDIDATE_MB_TYPE_INTER:
3105                     s->mv_dir = MV_DIR_FORWARD;
3106                     s->mb_intra= 0;
3107                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3108                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3109                     break;
3110                 case CANDIDATE_MB_TYPE_INTER_I:
3111                     s->mv_dir = MV_DIR_FORWARD;
3112                     s->mv_type = MV_TYPE_FIELD;
3113                     s->mb_intra= 0;
3114                     for(i=0; i<2; i++){
3115                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3116                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3117                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3118                     }
3119                     break;
3120                 case CANDIDATE_MB_TYPE_INTER4V:
3121                     s->mv_dir = MV_DIR_FORWARD;
3122                     s->mv_type = MV_TYPE_8X8;
3123                     s->mb_intra= 0;
3124                     for(i=0; i<4; i++){
3125                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3126                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3127                     }
3128                     break;
3129                 case CANDIDATE_MB_TYPE_DIRECT:
3130                     if (CONFIG_MPEG4_ENCODER) {
3131                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3132                         s->mb_intra= 0;
3133                         motion_x=s->b_direct_mv_table[xy][0];
3134                         motion_y=s->b_direct_mv_table[xy][1];
3135                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3136                     }
3137                     break;
3138                 case CANDIDATE_MB_TYPE_DIRECT0:
3139                     if (CONFIG_MPEG4_ENCODER) {
3140                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3141                         s->mb_intra= 0;
3142                         ff_mpeg4_set_direct_mv(s, 0, 0);
3143                     }
3144                     break;
3145                 case CANDIDATE_MB_TYPE_BIDIR:
3146                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3147                     s->mb_intra= 0;
3148                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3149                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3150                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3151                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3152                     break;
3153                 case CANDIDATE_MB_TYPE_BACKWARD:
3154                     s->mv_dir = MV_DIR_BACKWARD;
3155                     s->mb_intra= 0;
3156                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3157                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3158                     break;
3159                 case CANDIDATE_MB_TYPE_FORWARD:
3160                     s->mv_dir = MV_DIR_FORWARD;
3161                     s->mb_intra= 0;
3162                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3163                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3164                     break;
3165                 case CANDIDATE_MB_TYPE_FORWARD_I:
3166                     s->mv_dir = MV_DIR_FORWARD;
3167                     s->mv_type = MV_TYPE_FIELD;
3168                     s->mb_intra= 0;
3169                     for(i=0; i<2; i++){
3170                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3171                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3172                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3173                     }
3174                     break;
3175                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3176                     s->mv_dir = MV_DIR_BACKWARD;
3177                     s->mv_type = MV_TYPE_FIELD;
3178                     s->mb_intra= 0;
3179                     for(i=0; i<2; i++){
3180                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3181                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3182                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3183                     }
3184                     break;
3185                 case CANDIDATE_MB_TYPE_BIDIR_I:
3186                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3187                     s->mv_type = MV_TYPE_FIELD;
3188                     s->mb_intra= 0;
3189                     for(dir=0; dir<2; dir++){
3190                         for(i=0; i<2; i++){
3191                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3192                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3193                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3194                         }
3195                     }
3196                     break;
3197                 default:
3198                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3199                 }
3200
3201                 encode_mb(s, motion_x, motion_y);
3202
3203                 // RAL: Update last macroblock type
3204                 s->last_mv_dir = s->mv_dir;
3205
3206                 if (CONFIG_H263_ENCODER &&
3207                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3208                     ff_h263_update_motion_val(s);
3209
3210                 ff_MPV_decode_mb(s, s->block);
3211             }
3212
3213             /* clean the MV table in IPS frames for direct mode in B frames */
3214             if(s->mb_intra /* && I,P,S_TYPE */){
3215                 s->p_mv_table[xy][0]=0;
3216                 s->p_mv_table[xy][1]=0;
3217             }
3218
3219             if(s->flags&CODEC_FLAG_PSNR){
3220                 int w= 16;
3221                 int h= 16;
3222
3223                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3224                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3225
3226                 s->current_picture.f->error[0] += sse(
3227                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3228                     s->dest[0], w, h, s->linesize);
3229                 s->current_picture.f->error[1] += sse(
3230                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3231                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3232                 s->current_picture.f->error[2] += sse(
3233                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3234                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3235             }
3236             if(s->loop_filter){
3237                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3238                     ff_h263_loop_filter(s);
3239             }
3240             av_dlog(s->avctx, "MB %d %d bits\n",
3241                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3242         }
3243     }
3244
3245     //not beautiful here but we must write it before flushing so it has to be here
3246     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3247         ff_msmpeg4_encode_ext_header(s);
3248
3249     write_slice_end(s);
3250
3251     /* Send the last GOB if RTP */
3252     if (s->avctx->rtp_callback) {
3253         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3254         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3255         /* Call the RTP callback to send the last GOB */
3256         emms_c();
3257         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3258     }
3259
3260     return 0;
3261 }
3262
3263 #define MERGE(field) dst->field += src->field; src->field=0
3264 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3265     MERGE(me.scene_change_score);
3266     MERGE(me.mc_mb_var_sum_temp);
3267     MERGE(me.mb_var_sum_temp);
3268 }
3269
3270 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3271     int i;
3272
3273     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3274     MERGE(dct_count[1]);
3275     MERGE(mv_bits);
3276     MERGE(i_tex_bits);
3277     MERGE(p_tex_bits);
3278     MERGE(i_count);
3279     MERGE(f_count);
3280     MERGE(b_count);
3281     MERGE(skip_count);
3282     MERGE(misc_bits);
3283     MERGE(er.error_count);
3284     MERGE(padding_bug_score);
3285     MERGE(current_picture.f->error[0]);
3286     MERGE(current_picture.f->error[1]);
3287     MERGE(current_picture.f->error[2]);
3288
3289     if(dst->avctx->noise_reduction){
3290         for(i=0; i<64; i++){
3291             MERGE(dct_error_sum[0][i]);
3292             MERGE(dct_error_sum[1][i]);
3293         }
3294     }
3295
3296     assert(put_bits_count(&src->pb) % 8 ==0);
3297     assert(put_bits_count(&dst->pb) % 8 ==0);
3298     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3299     flush_put_bits(&dst->pb);
3300 }
3301
3302 static int estimate_qp(MpegEncContext *s, int dry_run){
3303     if (s->next_lambda){
3304         s->current_picture_ptr->f->quality =
3305         s->current_picture.f->quality = s->next_lambda;
3306         if(!dry_run) s->next_lambda= 0;
3307     } else if (!s->fixed_qscale) {
3308         s->current_picture_ptr->f->quality =
3309         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3310         if (s->current_picture.f->quality < 0)
3311             return -1;
3312     }
3313
3314     if(s->adaptive_quant){
3315         switch(s->codec_id){
3316         case AV_CODEC_ID_MPEG4:
3317             if (CONFIG_MPEG4_ENCODER)
3318                 ff_clean_mpeg4_qscales(s);
3319             break;
3320         case AV_CODEC_ID_H263:
3321         case AV_CODEC_ID_H263P:
3322         case AV_CODEC_ID_FLV1:
3323             if (CONFIG_H263_ENCODER)
3324                 ff_clean_h263_qscales(s);
3325             break;
3326         default:
3327             ff_init_qscale_tab(s);
3328         }
3329
3330         s->lambda= s->lambda_table[0];
3331         //FIXME broken
3332     }else
3333         s->lambda = s->current_picture.f->quality;
3334     update_qscale(s);
3335     return 0;
3336 }
3337
3338 /* must be called before writing the header */
3339 static void set_frame_distances(MpegEncContext * s){
3340     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3341     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3342
3343     if(s->pict_type==AV_PICTURE_TYPE_B){
3344         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3345         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3346     }else{
3347         s->pp_time= s->time - s->last_non_b_time;
3348         s->last_non_b_time= s->time;
3349         assert(s->picture_number==0 || s->pp_time > 0);
3350     }
3351 }
3352
3353 static int encode_picture(MpegEncContext *s, int picture_number)
3354 {
3355     int i, ret;
3356     int bits;
3357     int context_count = s->slice_context_count;
3358
3359     s->picture_number = picture_number;
3360
3361     /* Reset the average MB variance */
3362     s->me.mb_var_sum_temp    =
3363     s->me.mc_mb_var_sum_temp = 0;
3364
3365     /* we need to initialize some time vars before we can encode b-frames */
3366     // RAL: Condition added for MPEG1VIDEO
3367     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3368         set_frame_distances(s);
3369     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3370         ff_set_mpeg4_time(s);
3371
3372     s->me.scene_change_score=0;
3373
3374 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3375
3376     if(s->pict_type==AV_PICTURE_TYPE_I){
3377         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3378         else                        s->no_rounding=0;
3379     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3380         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3381             s->no_rounding ^= 1;
3382     }
3383
3384     if(s->flags & CODEC_FLAG_PASS2){
3385         if (estimate_qp(s,1) < 0)
3386             return -1;
3387         ff_get_2pass_fcode(s);
3388     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3389         if(s->pict_type==AV_PICTURE_TYPE_B)
3390             s->lambda= s->last_lambda_for[s->pict_type];
3391         else
3392             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3393         update_qscale(s);
3394     }
3395
3396     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3397         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3398         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3399         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3400         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3401     }
3402
3403     s->mb_intra=0; //for the rate distortion & bit compare functions
3404     for(i=1; i<context_count; i++){
3405         ret = ff_update_duplicate_context(s->thread_context[i], s);
3406         if (ret < 0)
3407             return ret;
3408     }
3409
3410     if(ff_init_me(s)<0)
3411         return -1;
3412
3413     /* Estimate motion for every MB */
3414     if(s->pict_type != AV_PICTURE_TYPE_I){
3415         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3416         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3417         if (s->pict_type != AV_PICTURE_TYPE_B) {
3418             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3419                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3420             }
3421         }
3422
3423         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3424     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3425         /* I-Frame */
3426         for(i=0; i<s->mb_stride*s->mb_height; i++)
3427             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3428
3429         if(!s->fixed_qscale){
3430             /* finding spatial complexity for I-frame rate control */
3431             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3432         }
3433     }
3434     for(i=1; i<context_count; i++){
3435         merge_context_after_me(s, s->thread_context[i]);
3436     }
3437     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3438     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3439     emms_c();
3440
3441     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3442         s->pict_type= AV_PICTURE_TYPE_I;
3443         for(i=0; i<s->mb_stride*s->mb_height; i++)
3444             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3445         if(s->msmpeg4_version >= 3)
3446             s->no_rounding=1;
3447         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3448                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3449     }
3450
3451     if(!s->umvplus){
3452         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3453             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3454
3455             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3456                 int a,b;
3457                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3458                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3459                 s->f_code= FFMAX3(s->f_code, a, b);
3460             }
3461
3462             ff_fix_long_p_mvs(s);
3463             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3464             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3465                 int j;
3466                 for(i=0; i<2; i++){
3467                     for(j=0; j<2; j++)
3468                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3469                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3470                 }
3471             }
3472         }
3473
3474         if(s->pict_type==AV_PICTURE_TYPE_B){
3475             int a, b;
3476
3477             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3478             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3479             s->f_code = FFMAX(a, b);
3480
3481             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3482             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3483             s->b_code = FFMAX(a, b);
3484
3485             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3486             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3487             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3488             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3489             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3490                 int dir, j;
3491                 for(dir=0; dir<2; dir++){
3492                     for(i=0; i<2; i++){
3493                         for(j=0; j<2; j++){
3494                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3495                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3496                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3497                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3498                         }
3499                     }
3500                 }
3501             }
3502         }
3503     }
3504
3505     if (estimate_qp(s, 0) < 0)
3506         return -1;
3507
3508     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3509         s->qscale= 3; //reduce clipping problems
3510
3511     if (s->out_format == FMT_MJPEG) {
3512         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3513         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3514
3515         if (s->avctx->intra_matrix) {
3516             chroma_matrix =
3517             luma_matrix = s->avctx->intra_matrix;
3518         }
3519         if (s->avctx->chroma_intra_matrix)
3520             chroma_matrix = s->avctx->chroma_intra_matrix;
3521
3522         /* for mjpeg, we do include qscale in the matrix */
3523         for(i=1;i<64;i++){
3524             int j= s->dsp.idct_permutation[i];
3525
3526             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3527             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3528         }
3529         s->y_dc_scale_table=
3530         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3531         s->chroma_intra_matrix[0] =
3532         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3533         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3534                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3535         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3536                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3537         s->qscale= 8;
3538     }
3539     if(s->codec_id == AV_CODEC_ID_AMV){
3540         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3541         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3542         for(i=1;i<64;i++){
3543             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3544
3545             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3546             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3547         }
3548         s->y_dc_scale_table= y;
3549         s->c_dc_scale_table= c;
3550         s->intra_matrix[0] = 13;
3551         s->chroma_intra_matrix[0] = 14;
3552         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3553                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3554         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3555                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3556         s->qscale= 8;
3557     }
3558
3559     //FIXME var duplication
3560     s->current_picture_ptr->f->key_frame =
3561     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3562     s->current_picture_ptr->f->pict_type =
3563     s->current_picture.f->pict_type = s->pict_type;
3564
3565     if (s->current_picture.f->key_frame)
3566         s->picture_in_gop_number=0;
3567
3568     s->mb_x = s->mb_y = 0;
3569     s->last_bits= put_bits_count(&s->pb);
3570     switch(s->out_format) {
3571     case FMT_MJPEG:
3572         if (CONFIG_MJPEG_ENCODER)
3573             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3574                                            s->intra_matrix, s->chroma_intra_matrix);
3575         break;
3576     case FMT_H261:
3577         if (CONFIG_H261_ENCODER)
3578             ff_h261_encode_picture_header(s, picture_number);
3579         break;
3580     case FMT_H263:
3581         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3582             ff_wmv2_encode_picture_header(s, picture_number);
3583         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3584             ff_msmpeg4_encode_picture_header(s, picture_number);
3585         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3586             ff_mpeg4_encode_picture_header(s, picture_number);
3587         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3588             ff_rv10_encode_picture_header(s, picture_number);
3589         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3590             ff_rv20_encode_picture_header(s, picture_number);
3591         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3592             ff_flv_encode_picture_header(s, picture_number);
3593         else if (CONFIG_H263_ENCODER)
3594             ff_h263_encode_picture_header(s, picture_number);
3595         break;
3596     case FMT_MPEG1:
3597         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3598             ff_mpeg1_encode_picture_header(s, picture_number);
3599         break;
3600     default:
3601         av_assert0(0);
3602     }
3603     bits= put_bits_count(&s->pb);
3604     s->header_bits= bits - s->last_bits;
3605
3606     for(i=1; i<context_count; i++){
3607         update_duplicate_context_after_me(s->thread_context[i], s);
3608     }
3609     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3610     for(i=1; i<context_count; i++){
3611         merge_context_after_encode(s, s->thread_context[i]);
3612     }
3613     emms_c();
3614     return 0;
3615 }
3616
3617 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3618     const int intra= s->mb_intra;
3619     int i;
3620
3621     s->dct_count[intra]++;
3622
3623     for(i=0; i<64; i++){
3624         int level= block[i];
3625
3626         if(level){
3627             if(level>0){
3628                 s->dct_error_sum[intra][i] += level;
3629                 level -= s->dct_offset[intra][i];
3630                 if(level<0) level=0;
3631             }else{
3632                 s->dct_error_sum[intra][i] -= level;
3633                 level += s->dct_offset[intra][i];
3634                 if(level>0) level=0;
3635             }
3636             block[i]= level;
3637         }
3638     }
3639 }
3640
3641 static int dct_quantize_trellis_c(MpegEncContext *s,
3642                                   int16_t *block, int n,
3643                                   int qscale, int *overflow){
3644     const int *qmat;
3645     const uint8_t *scantable= s->intra_scantable.scantable;
3646     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3647     int max=0;
3648     unsigned int threshold1, threshold2;
3649     int bias=0;
3650     int run_tab[65];
3651     int level_tab[65];
3652     int score_tab[65];
3653     int survivor[65];
3654     int survivor_count;
3655     int last_run=0;
3656     int last_level=0;
3657     int last_score= 0;
3658     int last_i;
3659     int coeff[2][64];
3660     int coeff_count[64];
3661     int qmul, qadd, start_i, last_non_zero, i, dc;
3662     const int esc_length= s->ac_esc_length;
3663     uint8_t * length;
3664     uint8_t * last_length;
3665     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3666
3667     s->dsp.fdct (block);
3668
3669     if(s->dct_error_sum)
3670         s->denoise_dct(s, block);
3671     qmul= qscale*16;
3672     qadd= ((qscale-1)|1)*8;
3673
3674     if (s->mb_intra) {
3675         int q;
3676         if (!s->h263_aic) {
3677             if (n < 4)
3678                 q = s->y_dc_scale;
3679             else
3680                 q = s->c_dc_scale;
3681             q = q << 3;
3682         } else{
3683             /* For AIC we skip quant/dequant of INTRADC */
3684             q = 1 << 3;
3685             qadd=0;
3686         }
3687
3688         /* note: block[0] is assumed to be positive */
3689         block[0] = (block[0] + (q >> 1)) / q;
3690         start_i = 1;
3691         last_non_zero = 0;
3692         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3693         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3694             bias= 1<<(QMAT_SHIFT-1);
3695         length     = s->intra_ac_vlc_length;
3696         last_length= s->intra_ac_vlc_last_length;
3697     } else {
3698         start_i = 0;
3699         last_non_zero = -1;
3700         qmat = s->q_inter_matrix[qscale];
3701         length     = s->inter_ac_vlc_length;
3702         last_length= s->inter_ac_vlc_last_length;
3703     }
3704     last_i= start_i;
3705
3706     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3707     threshold2= (threshold1<<1);
3708
3709     for(i=63; i>=start_i; i--) {
3710         const int j = scantable[i];
3711         int level = block[j] * qmat[j];
3712
3713         if(((unsigned)(level+threshold1))>threshold2){
3714             last_non_zero = i;
3715             break;
3716         }
3717     }
3718
3719     for(i=start_i; i<=last_non_zero; i++) {
3720         const int j = scantable[i];
3721         int level = block[j] * qmat[j];
3722
3723 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3724 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3725         if(((unsigned)(level+threshold1))>threshold2){
3726             if(level>0){
3727                 level= (bias + level)>>QMAT_SHIFT;
3728                 coeff[0][i]= level;
3729                 coeff[1][i]= level-1;
3730 //                coeff[2][k]= level-2;
3731             }else{
3732                 level= (bias - level)>>QMAT_SHIFT;
3733                 coeff[0][i]= -level;
3734                 coeff[1][i]= -level+1;
3735 //                coeff[2][k]= -level+2;
3736             }
3737             coeff_count[i]= FFMIN(level, 2);
3738             av_assert2(coeff_count[i]);
3739             max |=level;
3740         }else{
3741             coeff[0][i]= (level>>31)|1;
3742             coeff_count[i]= 1;
3743         }
3744     }
3745
3746     *overflow= s->max_qcoeff < max; //overflow might have happened
3747
3748     if(last_non_zero < start_i){
3749         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3750         return last_non_zero;
3751     }
3752
3753     score_tab[start_i]= 0;
3754     survivor[0]= start_i;
3755     survivor_count= 1;
3756
3757     for(i=start_i; i<=last_non_zero; i++){
3758         int level_index, j, zero_distortion;
3759         int dct_coeff= FFABS(block[ scantable[i] ]);
3760         int best_score=256*256*256*120;
3761
3762         if (s->dsp.fdct == ff_fdct_ifast)
3763             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3764         zero_distortion= dct_coeff*dct_coeff;
3765
3766         for(level_index=0; level_index < coeff_count[i]; level_index++){
3767             int distortion;
3768             int level= coeff[level_index][i];
3769             const int alevel= FFABS(level);
3770             int unquant_coeff;
3771
3772             av_assert2(level);
3773
3774             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3775                 unquant_coeff= alevel*qmul + qadd;
3776             }else{ //MPEG1
3777                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3778                 if(s->mb_intra){
3779                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3780                         unquant_coeff =   (unquant_coeff - 1) | 1;
3781                 }else{
3782                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3783                         unquant_coeff =   (unquant_coeff - 1) | 1;
3784                 }
3785                 unquant_coeff<<= 3;
3786             }
3787
3788             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3789             level+=64;
3790             if((level&(~127)) == 0){
3791                 for(j=survivor_count-1; j>=0; j--){
3792                     int run= i - survivor[j];
3793                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3794                     score += score_tab[i-run];
3795
3796                     if(score < best_score){
3797                         best_score= score;
3798                         run_tab[i+1]= run;
3799                         level_tab[i+1]= level-64;
3800                     }
3801                 }
3802
3803                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3804                     for(j=survivor_count-1; j>=0; j--){
3805                         int run= i - survivor[j];
3806                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3807                         score += score_tab[i-run];
3808                         if(score < last_score){
3809                             last_score= score;
3810                             last_run= run;
3811                             last_level= level-64;
3812                             last_i= i+1;
3813                         }
3814                     }
3815                 }
3816             }else{
3817                 distortion += esc_length*lambda;
3818                 for(j=survivor_count-1; j>=0; j--){
3819                     int run= i - survivor[j];
3820                     int score= distortion + score_tab[i-run];
3821
3822                     if(score < best_score){
3823                         best_score= score;
3824                         run_tab[i+1]= run;
3825                         level_tab[i+1]= level-64;
3826                     }
3827                 }
3828
3829                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3830                   for(j=survivor_count-1; j>=0; j--){
3831                         int run= i - survivor[j];
3832                         int score= distortion + score_tab[i-run];
3833                         if(score < last_score){
3834                             last_score= score;
3835                             last_run= run;
3836                             last_level= level-64;
3837                             last_i= i+1;
3838                         }
3839                     }
3840                 }
3841             }
3842         }
3843
3844         score_tab[i+1]= best_score;
3845
3846         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3847         if(last_non_zero <= 27){
3848             for(; survivor_count; survivor_count--){
3849                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3850                     break;
3851             }
3852         }else{
3853             for(; survivor_count; survivor_count--){
3854                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3855                     break;
3856             }
3857         }
3858
3859         survivor[ survivor_count++ ]= i+1;
3860     }
3861
3862     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3863         last_score= 256*256*256*120;
3864         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3865             int score= score_tab[i];
3866             if(i) score += lambda*2; //FIXME exacter?
3867
3868             if(score < last_score){
3869                 last_score= score;
3870                 last_i= i;
3871                 last_level= level_tab[i];
3872                 last_run= run_tab[i];
3873             }
3874         }
3875     }
3876
3877     s->coded_score[n] = last_score;
3878
3879     dc= FFABS(block[0]);
3880     last_non_zero= last_i - 1;
3881     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3882
3883     if(last_non_zero < start_i)
3884         return last_non_zero;
3885
3886     if(last_non_zero == 0 && start_i == 0){
3887         int best_level= 0;
3888         int best_score= dc * dc;
3889
3890         for(i=0; i<coeff_count[0]; i++){
3891             int level= coeff[i][0];
3892             int alevel= FFABS(level);
3893             int unquant_coeff, score, distortion;
3894
3895             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3896                     unquant_coeff= (alevel*qmul + qadd)>>3;
3897             }else{ //MPEG1
3898                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3899                     unquant_coeff =   (unquant_coeff - 1) | 1;
3900             }
3901             unquant_coeff = (unquant_coeff + 4) >> 3;
3902             unquant_coeff<<= 3 + 3;
3903
3904             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3905             level+=64;
3906             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3907             else                    score= distortion + esc_length*lambda;
3908
3909             if(score < best_score){
3910                 best_score= score;
3911                 best_level= level - 64;
3912             }
3913         }
3914         block[0]= best_level;
3915         s->coded_score[n] = best_score - dc*dc;
3916         if(best_level == 0) return -1;
3917         else                return last_non_zero;
3918     }
3919
3920     i= last_i;
3921     av_assert2(last_level);
3922
3923     block[ perm_scantable[last_non_zero] ]= last_level;
3924     i -= last_run + 1;
3925
3926     for(; i>start_i; i -= run_tab[i] + 1){
3927         block[ perm_scantable[i-1] ]= level_tab[i];
3928     }
3929
3930     return last_non_zero;
3931 }
3932
3933 //#define REFINE_STATS 1
3934 static int16_t basis[64][64];
3935
3936 static void build_basis(uint8_t *perm){
3937     int i, j, x, y;
3938     emms_c();
3939     for(i=0; i<8; i++){
3940         for(j=0; j<8; j++){
3941             for(y=0; y<8; y++){
3942                 for(x=0; x<8; x++){
3943                     double s= 0.25*(1<<BASIS_SHIFT);
3944                     int index= 8*i + j;
3945                     int perm_index= perm[index];
3946                     if(i==0) s*= sqrt(0.5);
3947                     if(j==0) s*= sqrt(0.5);
3948                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3949                 }
3950             }
3951         }
3952     }
3953 }
3954
3955 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3956                         int16_t *block, int16_t *weight, int16_t *orig,
3957                         int n, int qscale){
3958     int16_t rem[64];
3959     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3960     const uint8_t *scantable= s->intra_scantable.scantable;
3961     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3962 //    unsigned int threshold1, threshold2;
3963 //    int bias=0;
3964     int run_tab[65];
3965     int prev_run=0;
3966     int prev_level=0;
3967     int qmul, qadd, start_i, last_non_zero, i, dc;
3968     uint8_t * length;
3969     uint8_t * last_length;
3970     int lambda;
3971     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3972 #ifdef REFINE_STATS
3973 static int count=0;
3974 static int after_last=0;
3975 static int to_zero=0;
3976 static int from_zero=0;
3977 static int raise=0;
3978 static int lower=0;
3979 static int messed_sign=0;
3980 #endif
3981
3982     if(basis[0][0] == 0)
3983         build_basis(s->dsp.idct_permutation);
3984
3985     qmul= qscale*2;
3986     qadd= (qscale-1)|1;
3987     if (s->mb_intra) {
3988         if (!s->h263_aic) {
3989             if (n < 4)
3990                 q = s->y_dc_scale;
3991             else
3992                 q = s->c_dc_scale;
3993         } else{
3994             /* For AIC we skip quant/dequant of INTRADC */
3995             q = 1;
3996             qadd=0;
3997         }
3998         q <<= RECON_SHIFT-3;
3999         /* note: block[0] is assumed to be positive */
4000         dc= block[0]*q;
4001 //        block[0] = (block[0] + (q >> 1)) / q;
4002         start_i = 1;
4003 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4004 //            bias= 1<<(QMAT_SHIFT-1);
4005         length     = s->intra_ac_vlc_length;
4006         last_length= s->intra_ac_vlc_last_length;
4007     } else {
4008         dc= 0;
4009         start_i = 0;
4010         length     = s->inter_ac_vlc_length;
4011         last_length= s->inter_ac_vlc_last_length;
4012     }
4013     last_non_zero = s->block_last_index[n];
4014
4015 #ifdef REFINE_STATS
4016 {START_TIMER
4017 #endif
4018     dc += (1<<(RECON_SHIFT-1));
4019     for(i=0; i<64; i++){
4020         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4021     }
4022 #ifdef REFINE_STATS
4023 STOP_TIMER("memset rem[]")}
4024 #endif
4025     sum=0;
4026     for(i=0; i<64; i++){
4027         int one= 36;
4028         int qns=4;
4029         int w;
4030
4031         w= FFABS(weight[i]) + qns*one;
4032         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4033
4034         weight[i] = w;
4035 //        w=weight[i] = (63*qns + (w/2)) / w;
4036
4037         av_assert2(w>0);
4038         av_assert2(w<(1<<6));
4039         sum += w*w;
4040     }
4041     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4042 #ifdef REFINE_STATS
4043 {START_TIMER
4044 #endif
4045     run=0;
4046     rle_index=0;
4047     for(i=start_i; i<=last_non_zero; i++){
4048         int j= perm_scantable[i];
4049         const int level= block[j];
4050         int coeff;
4051
4052         if(level){
4053             if(level<0) coeff= qmul*level - qadd;
4054             else        coeff= qmul*level + qadd;
4055             run_tab[rle_index++]=run;
4056             run=0;
4057
4058             s->dsp.add_8x8basis(rem, basis[j], coeff);
4059         }else{
4060             run++;
4061         }
4062     }
4063 #ifdef REFINE_STATS
4064 if(last_non_zero>0){
4065 STOP_TIMER("init rem[]")
4066 }
4067 }
4068
4069 {START_TIMER
4070 #endif
4071     for(;;){
4072         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4073         int best_coeff=0;
4074         int best_change=0;
4075         int run2, best_unquant_change=0, analyze_gradient;
4076 #ifdef REFINE_STATS
4077 {START_TIMER
4078 #endif
4079         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4080
4081         if(analyze_gradient){
4082 #ifdef REFINE_STATS
4083 {START_TIMER
4084 #endif
4085             for(i=0; i<64; i++){
4086                 int w= weight[i];
4087
4088                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4089             }
4090 #ifdef REFINE_STATS
4091 STOP_TIMER("rem*w*w")}
4092 {START_TIMER
4093 #endif
4094             s->dsp.fdct(d1);
4095 #ifdef REFINE_STATS
4096 STOP_TIMER("dct")}
4097 #endif
4098         }
4099
4100         if(start_i){
4101             const int level= block[0];
4102             int change, old_coeff;
4103
4104             av_assert2(s->mb_intra);
4105
4106             old_coeff= q*level;
4107
4108             for(change=-1; change<=1; change+=2){
4109                 int new_level= level + change;
4110                 int score, new_coeff;
4111
4112                 new_coeff= q*new_level;
4113                 if(new_coeff >= 2048 || new_coeff < 0)
4114                     continue;
4115
4116                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4117                 if(score<best_score){
4118                     best_score= score;
4119                     best_coeff= 0;
4120                     best_change= change;
4121                     best_unquant_change= new_coeff - old_coeff;
4122                 }
4123             }
4124         }
4125
4126         run=0;
4127         rle_index=0;
4128         run2= run_tab[rle_index++];
4129         prev_level=0;
4130         prev_run=0;
4131
4132         for(i=start_i; i<64; i++){
4133             int j= perm_scantable[i];
4134             const int level= block[j];
4135             int change, old_coeff;
4136
4137             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4138                 break;
4139
4140             if(level){
4141                 if(level<0) old_coeff= qmul*level - qadd;
4142                 else        old_coeff= qmul*level + qadd;
4143                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4144             }else{
4145                 old_coeff=0;
4146                 run2--;
4147                 av_assert2(run2>=0 || i >= last_non_zero );
4148             }
4149
4150             for(change=-1; change<=1; change+=2){
4151                 int new_level= level + change;
4152                 int score, new_coeff, unquant_change;
4153
4154                 score=0;
4155                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4156                    continue;
4157
4158                 if(new_level){
4159                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4160                     else            new_coeff= qmul*new_level + qadd;
4161                     if(new_coeff >= 2048 || new_coeff <= -2048)
4162                         continue;
4163                     //FIXME check for overflow
4164
4165                     if(level){
4166                         if(level < 63 && level > -63){
4167                             if(i < last_non_zero)
4168                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4169                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4170                             else
4171                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4172                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4173                         }
4174                     }else{
4175                         av_assert2(FFABS(new_level)==1);
4176
4177                         if(analyze_gradient){
4178                             int g= d1[ scantable[i] ];
4179                             if(g && (g^new_level) >= 0)
4180                                 continue;
4181                         }
4182
4183                         if(i < last_non_zero){
4184                             int next_i= i + run2 + 1;
4185                             int next_level= block[ perm_scantable[next_i] ] + 64;
4186
4187                             if(next_level&(~127))
4188                                 next_level= 0;
4189
4190                             if(next_i < last_non_zero)
4191                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4192                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4193                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4194                             else
4195                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4196                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4197                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4198                         }else{
4199                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4200                             if(prev_level){
4201                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4202                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4203                             }
4204                         }
4205                     }
4206                 }else{
4207                     new_coeff=0;
4208                     av_assert2(FFABS(level)==1);
4209
4210                     if(i < last_non_zero){
4211                         int next_i= i + run2 + 1;
4212                         int next_level= block[ perm_scantable[next_i] ] + 64;
4213
4214                         if(next_level&(~127))
4215                             next_level= 0;
4216
4217                         if(next_i < last_non_zero)
4218                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4219                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4220                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4221                         else
4222                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4223                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4224                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4225                     }else{
4226                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4227                         if(prev_level){
4228                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4229                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4230                         }
4231                     }
4232                 }
4233
4234                 score *= lambda;
4235
4236                 unquant_change= new_coeff - old_coeff;
4237                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4238
4239                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4240                 if(score<best_score){
4241                     best_score= score;
4242                     best_coeff= i;
4243                     best_change= change;
4244                     best_unquant_change= unquant_change;
4245                 }
4246             }
4247             if(level){
4248                 prev_level= level + 64;
4249                 if(prev_level&(~127))
4250                     prev_level= 0;
4251                 prev_run= run;
4252                 run=0;
4253             }else{
4254                 run++;
4255             }
4256         }
4257 #ifdef REFINE_STATS
4258 STOP_TIMER("iterative step")}
4259 #endif
4260
4261         if(best_change){
4262             int j= perm_scantable[ best_coeff ];
4263
4264             block[j] += best_change;
4265
4266             if(best_coeff > last_non_zero){
4267                 last_non_zero= best_coeff;
4268                 av_assert2(block[j]);
4269 #ifdef REFINE_STATS
4270 after_last++;
4271 #endif
4272             }else{
4273 #ifdef REFINE_STATS
4274 if(block[j]){
4275     if(block[j] - best_change){
4276         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4277             raise++;
4278         }else{
4279             lower++;
4280         }
4281     }else{
4282         from_zero++;
4283     }
4284 }else{
4285     to_zero++;
4286 }
4287 #endif
4288                 for(; last_non_zero>=start_i; last_non_zero--){
4289                     if(block[perm_scantable[last_non_zero]])
4290                         break;
4291                 }
4292             }
4293 #ifdef REFINE_STATS
4294 count++;
4295 if(256*256*256*64 % count == 0){
4296     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4297 }
4298 #endif
4299             run=0;
4300             rle_index=0;
4301             for(i=start_i; i<=last_non_zero; i++){
4302                 int j= perm_scantable[i];
4303                 const int level= block[j];
4304
4305                  if(level){
4306                      run_tab[rle_index++]=run;
4307                      run=0;
4308                  }else{
4309                      run++;
4310                  }
4311             }
4312
4313             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4314         }else{
4315             break;
4316         }
4317     }
4318 #ifdef REFINE_STATS
4319 if(last_non_zero>0){
4320 STOP_TIMER("iterative search")
4321 }
4322 }
4323 #endif
4324
4325     return last_non_zero;
4326 }
4327
4328 int ff_dct_quantize_c(MpegEncContext *s,
4329                         int16_t *block, int n,
4330                         int qscale, int *overflow)
4331 {
4332     int i, j, level, last_non_zero, q, start_i;
4333     const int *qmat;
4334     const uint8_t *scantable= s->intra_scantable.scantable;
4335     int bias;
4336     int max=0;
4337     unsigned int threshold1, threshold2;
4338
4339     s->dsp.fdct (block);
4340
4341     if(s->dct_error_sum)
4342         s->denoise_dct(s, block);
4343
4344     if (s->mb_intra) {
4345         if (!s->h263_aic) {
4346             if (n < 4)
4347                 q = s->y_dc_scale;
4348             else
4349                 q = s->c_dc_scale;
4350             q = q << 3;
4351         } else
4352             /* For AIC we skip quant/dequant of INTRADC */
4353             q = 1 << 3;
4354
4355         /* note: block[0] is assumed to be positive */
4356         block[0] = (block[0] + (q >> 1)) / q;
4357         start_i = 1;
4358         last_non_zero = 0;
4359         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4360         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4361     } else {
4362         start_i = 0;
4363         last_non_zero = -1;
4364         qmat = s->q_inter_matrix[qscale];
4365         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4366     }
4367     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4368     threshold2= (threshold1<<1);
4369     for(i=63;i>=start_i;i--) {
4370         j = scantable[i];
4371         level = block[j] * qmat[j];
4372
4373         if(((unsigned)(level+threshold1))>threshold2){
4374             last_non_zero = i;
4375             break;
4376         }else{
4377             block[j]=0;
4378         }
4379     }
4380     for(i=start_i; i<=last_non_zero; i++) {
4381         j = scantable[i];
4382         level = block[j] * qmat[j];
4383
4384 //        if(   bias+level >= (1<<QMAT_SHIFT)
4385 //           || bias-level >= (1<<QMAT_SHIFT)){
4386         if(((unsigned)(level+threshold1))>threshold2){
4387             if(level>0){
4388                 level= (bias + level)>>QMAT_SHIFT;
4389                 block[j]= level;
4390             }else{
4391                 level= (bias - level)>>QMAT_SHIFT;
4392                 block[j]= -level;
4393             }
4394             max |=level;
4395         }else{
4396             block[j]=0;
4397         }
4398     }
4399     *overflow= s->max_qcoeff < max; //overflow might have happened
4400
4401     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4402     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4403         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4404
4405     return last_non_zero;
4406 }
4407
4408 #define OFFSET(x) offsetof(MpegEncContext, x)
4409 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4410 static const AVOption h263_options[] = {
4411     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4412     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4413     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4414     FF_MPV_COMMON_OPTS
4415     { NULL },
4416 };
4417
4418 static const AVClass h263_class = {
4419     .class_name = "H.263 encoder",
4420     .item_name  = av_default_item_name,
4421     .option     = h263_options,
4422     .version    = LIBAVUTIL_VERSION_INT,
4423 };
4424
4425 AVCodec ff_h263_encoder = {
4426     .name           = "h263",
4427     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4428     .type           = AVMEDIA_TYPE_VIDEO,
4429     .id             = AV_CODEC_ID_H263,
4430     .priv_data_size = sizeof(MpegEncContext),
4431     .init           = ff_MPV_encode_init,
4432     .encode2        = ff_MPV_encode_picture,
4433     .close          = ff_MPV_encode_end,
4434     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4435     .priv_class     = &h263_class,
4436 };
4437
4438 static const AVOption h263p_options[] = {
4439     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4440     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4441     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4442     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4443     FF_MPV_COMMON_OPTS
4444     { NULL },
4445 };
4446 static const AVClass h263p_class = {
4447     .class_name = "H.263p encoder",
4448     .item_name  = av_default_item_name,
4449     .option     = h263p_options,
4450     .version    = LIBAVUTIL_VERSION_INT,
4451 };
4452
4453 AVCodec ff_h263p_encoder = {
4454     .name           = "h263p",
4455     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4456     .type           = AVMEDIA_TYPE_VIDEO,
4457     .id             = AV_CODEC_ID_H263P,
4458     .priv_data_size = sizeof(MpegEncContext),
4459     .init           = ff_MPV_encode_init,
4460     .encode2        = ff_MPV_encode_picture,
4461     .close          = ff_MPV_encode_end,
4462     .capabilities   = CODEC_CAP_SLICE_THREADS,
4463     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4464     .priv_class     = &h263p_class,
4465 };
4466
4467 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4468
4469 AVCodec ff_msmpeg4v2_encoder = {
4470     .name           = "msmpeg4v2",
4471     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4472     .type           = AVMEDIA_TYPE_VIDEO,
4473     .id             = AV_CODEC_ID_MSMPEG4V2,
4474     .priv_data_size = sizeof(MpegEncContext),
4475     .init           = ff_MPV_encode_init,
4476     .encode2        = ff_MPV_encode_picture,
4477     .close          = ff_MPV_encode_end,
4478     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4479     .priv_class     = &msmpeg4v2_class,
4480 };
4481
4482 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4483
4484 AVCodec ff_msmpeg4v3_encoder = {
4485     .name           = "msmpeg4",
4486     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4487     .type           = AVMEDIA_TYPE_VIDEO,
4488     .id             = AV_CODEC_ID_MSMPEG4V3,
4489     .priv_data_size = sizeof(MpegEncContext),
4490     .init           = ff_MPV_encode_init,
4491     .encode2        = ff_MPV_encode_picture,
4492     .close          = ff_MPV_encode_end,
4493     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4494     .priv_class     = &msmpeg4v3_class,
4495 };
4496
4497 FF_MPV_GENERIC_CLASS(wmv1)
4498
4499 AVCodec ff_wmv1_encoder = {
4500     .name           = "wmv1",
4501     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4502     .type           = AVMEDIA_TYPE_VIDEO,
4503     .id             = AV_CODEC_ID_WMV1,
4504     .priv_data_size = sizeof(MpegEncContext),
4505     .init           = ff_MPV_encode_init,
4506     .encode2        = ff_MPV_encode_picture,
4507     .close          = ff_MPV_encode_end,
4508     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4509     .priv_class     = &wmv1_class,
4510 };