git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "avcodec.h"
  38 #include "dct.h"
  39 #include "dsputil.h"
  40 #include "mpeg12.h"
  41 #include "mpegvideo.h"
  42 #include "h261.h"
  43 #include "h263.h"
  44 #include "mathops.h"
  45 #include "mjpegenc.h"
  46 #include "msmpeg4.h"
  47 #include "faandct.h"
  48 #include "thread.h"
  49 #include "aandcttab.h"
  50 #include "flv.h"
  51 #include "mpeg4video.h"
  52 #include "internal.h"
  53 #include "bytestream.h"
  54 #include <limits.h>
  55 #include "sp5x.h"
  56
  57 static int encode_picture(MpegEncContext *s, int picture_number);
  58 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  59 static int sse_mb(MpegEncContext *s);
  60 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  61 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  62
  63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  65
  66 const AVOption ff_mpv_generic_options[] = {
  67     FF_MPV_COMMON_OPTS
  68     { NULL },
  69 };
  70
  71 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  72                        uint16_t (*qmat16)[2][64],
  73                        const uint16_t *quant_matrix,
  74                        int bias, int qmin, int qmax, int intra)
  75 {
  76     int qscale;
  77     int shift = 0;
  78
  79     for (qscale = qmin; qscale <= qmax; qscale++) {
  80         int i;
  81         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  82             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  83             dsp->fdct == ff_faandct) {
  84             for (i = 0; i < 64; i++) {
  85                 const int j = dsp->idct_permutation[i];
  86                 /* 16 <= qscale * quant_matrix[i] <= 7905
  87                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  88                  *             19952 <=              x  <= 249205026
  89                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  90                  *           3444240 >= (1 << 36) / (x) >= 275 */
  91
  92                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  93                                         (qscale * quant_matrix[j]));
  94             }
  95         } else if (dsp->fdct == ff_fdct_ifast) {
  96             for (i = 0; i < 64; i++) {
  97                 const int j = dsp->idct_permutation[i];
  98                 /* 16 <= qscale * quant_matrix[i] <= 7905
  99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 100                  *             19952 <=              x  <= 249205026
 101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 102                  *           3444240 >= (1 << 36) / (x) >= 275 */
 103
 104                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 105                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
 106             }
 107         } else {
 108             for (i = 0; i < 64; i++) {
 109                 const int j = dsp->idct_permutation[i];
 110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 111                  * Assume x = qscale * quant_matrix[i]
 112                  * So             16 <=              x  <= 7905
 113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 114                  * so          32768 >= (1 << 19) / (x) >= 67 */
 115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 116                                         (qscale * quant_matrix[j]));
 117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 118                 //                    (qscale * quant_matrix[i]);
 119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 120                                        (qscale * quant_matrix[j]);
 121
 122                 if (qmat16[qscale][0][i] == 0 ||
 123                     qmat16[qscale][0][i] == 128 * 256)
 124                     qmat16[qscale][0][i] = 128 * 256 - 1;
 125                 qmat16[qscale][1][i] =
 126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 127                                 qmat16[qscale][0][i]);
 128             }
 129         }
 130
 131         for (i = intra; i < 64; i++) {
 132             int64_t max = 8191;
 133             if (dsp->fdct == ff_fdct_ifast) {
 134                 max = (8191LL * ff_aanscales[i]) >> 14;
 135             }
 136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 137                 shift++;
 138             }
 139         }
 140     }
 141     if (shift) {
 142         av_log(NULL, AV_LOG_INFO,
 143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 144                QMAT_SHIFT - shift);
 145     }
 146 }
 147
 148 static inline void update_qscale(MpegEncContext *s)
 149 {
 150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 151                 (FF_LAMBDA_SHIFT + 7);
 152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 153
 154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 155                  FF_LAMBDA_SHIFT;
 156 }
 157
 158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 159 {
 160     int i;
 161
 162     if (matrix) {
 163         put_bits(pb, 1, 1);
 164         for (i = 0; i < 64; i++) {
 165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 166         }
 167     } else
 168         put_bits(pb, 1, 0);
 169 }
 170
 171 /**
 172  * init s->current_picture.qscale_table from s->lambda_table
 173  */
 174 void ff_init_qscale_tab(MpegEncContext *s)
 175 {
 176     int8_t * const qscale_table = s->current_picture.qscale_table;
 177     int i;
 178
 179     for (i = 0; i < s->mb_num; i++) {
 180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 183                                                   s->avctx->qmax);
 184     }
 185 }
 186
 187 static void update_duplicate_context_after_me(MpegEncContext *dst,
 188                                               MpegEncContext *src)
 189 {
 190 #define COPY(a) dst->a= src->a
 191     COPY(pict_type);
 192     COPY(current_picture);
 193     COPY(f_code);
 194     COPY(b_code);
 195     COPY(qscale);
 196     COPY(lambda);
 197     COPY(lambda2);
 198     COPY(picture_in_gop_number);
 199     COPY(gop_picture_number);
 200     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 201     COPY(progressive_frame);    // FIXME don't set in encode_header
 202     COPY(partitioned_frame);    // FIXME don't set in encode_header
 203 #undef COPY
 204 }
 205
 206 /**
 207  * Set the given MpegEncContext to defaults for encoding.
 208  * the changed fields will not depend upon the prior state of the MpegEncContext.
 209  */
 210 static void MPV_encode_defaults(MpegEncContext *s)
 211 {
 212     int i;
 213     ff_MPV_common_defaults(s);
 214
 215     for (i = -16; i < 16; i++) {
 216         default_fcode_tab[i + MAX_MV] = 1;
 217     }
 218     s->me.mv_penalty = default_mv_penalty;
 219     s->fcode_tab     = default_fcode_tab;
 220
 221     s->input_picture_number  = 0;
 222     s->picture_in_gop_number = 0;
 223 }
 224
 225 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 226     if (ARCH_X86)
 227         ff_dct_encode_init_x86(s);
 228
 229     if (CONFIG_H263_ENCODER)
 230         ff_h263dsp_init(&s->h263dsp);
 231     if (!s->dct_quantize)
 232         s->dct_quantize = ff_dct_quantize_c;
 233     if (!s->denoise_dct)
 234         s->denoise_dct  = denoise_dct_c;
 235     s->fast_dct_quantize = s->dct_quantize;
 236     if (s->avctx->trellis)
 237         s->dct_quantize  = dct_quantize_trellis_c;
 238
 239     return 0;
 240 }
 241
 242 /* init video encoder */
 243 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 244 {
 245     MpegEncContext *s = avctx->priv_data;
 246     int i, ret;
 247
 248     MPV_encode_defaults(s);
 249
 250     switch (avctx->codec_id) {
 251     case AV_CODEC_ID_MPEG2VIDEO:
 252         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 253             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 254             av_log(avctx, AV_LOG_ERROR,
 255                    "only YUV420 and YUV422 are supported\n");
 256             return -1;
 257         }
 258         break;
 259     case AV_CODEC_ID_MJPEG:
 260     case AV_CODEC_ID_AMV:
 261         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
 262             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
 263             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
 264             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 265               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
 266               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
 267              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 268             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 269             return -1;
 270         }
 271         break;
 272     default:
 273         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 274             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 275             return -1;
 276         }
 277     }
 278
 279     switch (avctx->pix_fmt) {
 280     case AV_PIX_FMT_YUVJ444P:
 281     case AV_PIX_FMT_YUV444P:
 282         s->chroma_format = CHROMA_444;
 283         break;
 284     case AV_PIX_FMT_YUVJ422P:
 285     case AV_PIX_FMT_YUV422P:
 286         s->chroma_format = CHROMA_422;
 287         break;
 288     case AV_PIX_FMT_YUVJ420P:
 289     case AV_PIX_FMT_YUV420P:
 290     default:
 291         s->chroma_format = CHROMA_420;
 292         break;
 293     }
 294
 295     s->bit_rate = avctx->bit_rate;
 296     s->width    = avctx->width;
 297     s->height   = avctx->height;
 298     if (avctx->gop_size > 600 &&
 299         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 300         av_log(avctx, AV_LOG_WARNING,
 301                "keyframe interval too large!, reducing it from %d to %d\n",
 302                avctx->gop_size, 600);
 303         avctx->gop_size = 600;
 304     }
 305     s->gop_size     = avctx->gop_size;
 306     s->avctx        = avctx;
 307     s->flags        = avctx->flags;
 308     s->flags2       = avctx->flags2;
 309     if (avctx->max_b_frames > MAX_B_FRAMES) {
 310         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 311                "is %d.\n", MAX_B_FRAMES);
 312         avctx->max_b_frames = MAX_B_FRAMES;
 313     }
 314     s->max_b_frames = avctx->max_b_frames;
 315     s->codec_id     = avctx->codec->id;
 316     s->strict_std_compliance = avctx->strict_std_compliance;
 317     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 318     s->mpeg_quant         = avctx->mpeg_quant;
 319     s->rtp_mode           = !!avctx->rtp_payload_size;
 320     s->intra_dc_precision = avctx->intra_dc_precision;
 321     s->user_specified_pts = AV_NOPTS_VALUE;
 322
 323     if (s->gop_size <= 1) {
 324         s->intra_only = 1;
 325         s->gop_size   = 12;
 326     } else {
 327         s->intra_only = 0;
 328     }
 329
 330     s->me_method = avctx->me_method;
 331
 332     /* Fixed QSCALE */
 333     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 334
 335     s->adaptive_quant = (s->avctx->lumi_masking ||
 336                          s->avctx->dark_masking ||
 337                          s->avctx->temporal_cplx_masking ||
 338                          s->avctx->spatial_cplx_masking  ||
 339                          s->avctx->p_masking      ||
 340                          s->avctx->border_masking ||
 341                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 342                         !s->fixed_qscale;
 343
 344     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 345
 346     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 347         switch(avctx->codec_id) {
 348         case AV_CODEC_ID_MPEG1VIDEO:
 349         case AV_CODEC_ID_MPEG2VIDEO:
 350             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 351             break;
 352         case AV_CODEC_ID_MPEG4:
 353         case AV_CODEC_ID_MSMPEG4V1:
 354         case AV_CODEC_ID_MSMPEG4V2:
 355         case AV_CODEC_ID_MSMPEG4V3:
 356             if       (avctx->rc_max_rate >= 15000000) {
 357                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 358             } else if(avctx->rc_max_rate >=  2000000) {
 359                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 360             } else if(avctx->rc_max_rate >=   384000) {
 361                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 362             } else
 363                 avctx->rc_buffer_size = 40;
 364             avctx->rc_buffer_size *= 16384;
 365             break;
 366         }
 367         if (avctx->rc_buffer_size) {
 368             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 369         }
 370     }
 371
 372     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 373         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 374         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
 375             return -1;
 376     }
 377
 378     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 379         av_log(avctx, AV_LOG_INFO,
 380                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 381     }
 382
 383     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 384         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 385         return -1;
 386     }
 387
 388     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 389         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 390         return -1;
 391     }
 392
 393     if (avctx->rc_max_rate &&
 394         avctx->rc_max_rate == avctx->bit_rate &&
 395         avctx->rc_max_rate != avctx->rc_min_rate) {
 396         av_log(avctx, AV_LOG_INFO,
 397                "impossible bitrate constraints, this will fail\n");
 398     }
 399
 400     if (avctx->rc_buffer_size &&
 401         avctx->bit_rate * (int64_t)avctx->time_base.num >
 402             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 403         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 404         return -1;
 405     }
 406
 407     if (!s->fixed_qscale &&
 408         avctx->bit_rate * av_q2d(avctx->time_base) >
 409             avctx->bit_rate_tolerance) {
 410         av_log(avctx, AV_LOG_ERROR,
 411                "bitrate tolerance %d too small for bitrate %d\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 412         return -1;
 413     }
 414
 415     if (s->avctx->rc_max_rate &&
 416         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 417         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 418          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 419         90000LL * (avctx->rc_buffer_size - 1) >
 420             s->avctx->rc_max_rate * 0xFFFFLL) {
 421         av_log(avctx, AV_LOG_INFO,
 422                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 423                "specified vbv buffer is too large for the given bitrate!\n");
 424     }
 425
 426     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 427         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 428         s->codec_id != AV_CODEC_ID_FLV1) {
 429         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 430         return -1;
 431     }
 432
 433     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 434         av_log(avctx, AV_LOG_ERROR,
 435                "OBMC is only supported with simple mb decision\n");
 436         return -1;
 437     }
 438
 439     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 440         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 441         return -1;
 442     }
 443
 444     if (s->max_b_frames                    &&
 445         s->codec_id != AV_CODEC_ID_MPEG4      &&
 446         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 447         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 448         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 449         return -1;
 450     }
 451     if (s->max_b_frames < 0) {
 452         av_log(avctx, AV_LOG_ERROR,
 453                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 454         return -1;
 455     }
 456
 457     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 458          s->codec_id == AV_CODEC_ID_H263  ||
 459          s->codec_id == AV_CODEC_ID_H263P) &&
 460         (avctx->sample_aspect_ratio.num > 255 ||
 461          avctx->sample_aspect_ratio.den > 255)) {
 462         av_log(avctx, AV_LOG_WARNING,
 463                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 464                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 465         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 466                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 467     }
 468
 469     if ((s->codec_id == AV_CODEC_ID_H263  ||
 470          s->codec_id == AV_CODEC_ID_H263P) &&
 471         (avctx->width  > 2048 ||
 472          avctx->height > 1152 )) {
 473         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 474         return -1;
 475     }
 476     if ((s->codec_id == AV_CODEC_ID_H263  ||
 477          s->codec_id == AV_CODEC_ID_H263P) &&
 478         ((avctx->width &3) ||
 479          (avctx->height&3) )) {
 480         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 481         return -1;
 482     }
 483
 484     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 485         (avctx->width  > 4095 ||
 486          avctx->height > 4095 )) {
 487         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 488         return -1;
 489     }
 490
 491     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 492         (avctx->width  > 16383 ||
 493          avctx->height > 16383 )) {
 494         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 495         return -1;
 496     }
 497
 498     if (s->codec_id == AV_CODEC_ID_RV10 &&
 499         (avctx->width &15 ||
 500          avctx->height&15 )) {
 501         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 502         return AVERROR(EINVAL);
 503     }
 504
 505     if (s->codec_id == AV_CODEC_ID_RV20 &&
 506         (avctx->width &3 ||
 507          avctx->height&3 )) {
 508         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 509         return AVERROR(EINVAL);
 510     }
 511
 512     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 513          s->codec_id == AV_CODEC_ID_WMV2) &&
 514          avctx->width & 1) {
 515          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 516          return -1;
 517     }
 518
 519     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 520         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 521         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 522         return -1;
 523     }
 524
 525     // FIXME mpeg2 uses that too
 526     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 527                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 528         av_log(avctx, AV_LOG_ERROR,
 529                "mpeg2 style quantization not supported by codec\n");
 530         return -1;
 531     }
 532
 533     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 534         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 535         return -1;
 536     }
 537
 538     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 539         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 540         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 541         return -1;
 542     }
 543
 544     if (s->avctx->scenechange_threshold < 1000000000 &&
 545         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 546         av_log(avctx, AV_LOG_ERROR,
 547                "closed gop with scene change detection are not supported yet, "
 548                "set threshold to 1000000000\n");
 549         return -1;
 550     }
 551
 552     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 553         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 554             av_log(avctx, AV_LOG_ERROR,
 555                   "low delay forcing is only available for mpeg2\n");
 556             return -1;
 557         }
 558         if (s->max_b_frames != 0) {
 559             av_log(avctx, AV_LOG_ERROR,
 560                    "b frames cannot be used with low delay\n");
 561             return -1;
 562         }
 563     }
 564
 565     if (s->q_scale_type == 1) {
 566         if (avctx->qmax > 12) {
 567             av_log(avctx, AV_LOG_ERROR,
 568                    "non linear quant only supports qmax <= 12 currently\n");
 569             return -1;
 570         }
 571     }
 572
 573     if (s->avctx->thread_count > 1         &&
 574         s->codec_id != AV_CODEC_ID_MPEG4      &&
 575         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 576         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 577         s->codec_id != AV_CODEC_ID_MJPEG      &&
 578         (s->codec_id != AV_CODEC_ID_H263P)) {
 579         av_log(avctx, AV_LOG_ERROR,
 580                "multi threaded encoding not supported by codec\n");
 581         return -1;
 582     }
 583
 584     if (s->avctx->thread_count < 1) {
 585         av_log(avctx, AV_LOG_ERROR,
 586                "automatic thread number detection not supported by codec, "
 587                "patch welcome\n");
 588         return -1;
 589     }
 590
 591     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 592         s->rtp_mode = 1;
 593
 594     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 595         s->h263_slice_structured = 1;
 596
 597     if (!avctx->time_base.den || !avctx->time_base.num) {
 598         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 599         return -1;
 600     }
 601
 602     i = (INT_MAX / 2 + 128) >> 8;
 603     if (avctx->mb_threshold >= i) {
 604         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 605                i - 1);
 606         return -1;
 607     }
 608
 609     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 610         av_log(avctx, AV_LOG_INFO,
 611                "notice: b_frame_strategy only affects the first pass\n");
 612         avctx->b_frame_strategy = 0;
 613     }
 614
 615     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 616     if (i > 1) {
 617         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 618         avctx->time_base.den /= i;
 619         avctx->time_base.num /= i;
 620         //return -1;
 621     }
 622
 623     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 624         // (a + x * 3 / 8) / x
 625         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 626         s->inter_quant_bias = 0;
 627     } else {
 628         s->intra_quant_bias = 0;
 629         // (a - x / 4) / x
 630         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 631     }
 632
 633     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 634         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 635         return AVERROR(EINVAL);
 636     }
 637
 638     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 639         s->intra_quant_bias = avctx->intra_quant_bias;
 640     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 641         s->inter_quant_bias = avctx->inter_quant_bias;
 642
 643     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 644
 645     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 646         s->avctx->time_base.den > (1 << 16) - 1) {
 647         av_log(avctx, AV_LOG_ERROR,
 648                "timebase %d/%d not supported by MPEG 4 standard, "
 649                "the maximum admitted value for the timebase denominator "
 650                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 651                (1 << 16) - 1);
 652         return -1;
 653     }
 654     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 655
 656     switch (avctx->codec->id) {
 657     case AV_CODEC_ID_MPEG1VIDEO:
 658         s->out_format = FMT_MPEG1;
 659         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 660         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 661         break;
 662     case AV_CODEC_ID_MPEG2VIDEO:
 663         s->out_format = FMT_MPEG1;
 664         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 665         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 666         s->rtp_mode   = 1;
 667         break;
 668     case AV_CODEC_ID_MJPEG:
 669     case AV_CODEC_ID_AMV:
 670         s->out_format = FMT_MJPEG;
 671         s->intra_only = 1; /* force intra only for jpeg */
 672         if (!CONFIG_MJPEG_ENCODER ||
 673             ff_mjpeg_encode_init(s) < 0)
 674             return -1;
 675         avctx->delay = 0;
 676         s->low_delay = 1;
 677         break;
 678     case AV_CODEC_ID_H261:
 679         if (!CONFIG_H261_ENCODER)
 680             return -1;
 681         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 682             av_log(avctx, AV_LOG_ERROR,
 683                    "The specified picture size of %dx%d is not valid for the "
 684                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 685                     s->width, s->height);
 686             return -1;
 687         }
 688         s->out_format = FMT_H261;
 689         avctx->delay  = 0;
 690         s->low_delay  = 1;
 691         break;
 692     case AV_CODEC_ID_H263:
 693         if (!CONFIG_H263_ENCODER)
 694             return -1;
 695         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 696                              s->width, s->height) == 8) {
 697             av_log(avctx, AV_LOG_ERROR,
 698                    "The specified picture size of %dx%d is not valid for "
 699                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 700                    "352x288, 704x576, and 1408x1152. "
 701                    "Try H.263+.\n", s->width, s->height);
 702             return -1;
 703         }
 704         s->out_format = FMT_H263;
 705         avctx->delay  = 0;
 706         s->low_delay  = 1;
 707         break;
 708     case AV_CODEC_ID_H263P:
 709         s->out_format = FMT_H263;
 710         s->h263_plus  = 1;
 711         /* Fx */
 712         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 713         s->modified_quant  = s->h263_aic;
 714         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 715         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 716
 717         /* /Fx */
 718         /* These are just to be sure */
 719         avctx->delay = 0;
 720         s->low_delay = 1;
 721         break;
 722     case AV_CODEC_ID_FLV1:
 723         s->out_format      = FMT_H263;
 724         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 725         s->unrestricted_mv = 1;
 726         s->rtp_mode  = 0; /* don't allow GOB */
 727         avctx->delay = 0;
 728         s->low_delay = 1;
 729         break;
 730     case AV_CODEC_ID_RV10:
 731         s->out_format = FMT_H263;
 732         avctx->delay  = 0;
 733         s->low_delay  = 1;
 734         break;
 735     case AV_CODEC_ID_RV20:
 736         s->out_format      = FMT_H263;
 737         avctx->delay       = 0;
 738         s->low_delay       = 1;
 739         s->modified_quant  = 1;
 740         s->h263_aic        = 1;
 741         s->h263_plus       = 1;
 742         s->loop_filter     = 1;
 743         s->unrestricted_mv = 0;
 744         break;
 745     case AV_CODEC_ID_MPEG4:
 746         s->out_format      = FMT_H263;
 747         s->h263_pred       = 1;
 748         s->unrestricted_mv = 1;
 749         s->low_delay       = s->max_b_frames ? 0 : 1;
 750         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 751         break;
 752     case AV_CODEC_ID_MSMPEG4V2:
 753         s->out_format      = FMT_H263;
 754         s->h263_pred       = 1;
 755         s->unrestricted_mv = 1;
 756         s->msmpeg4_version = 2;
 757         avctx->delay       = 0;
 758         s->low_delay       = 1;
 759         break;
 760     case AV_CODEC_ID_MSMPEG4V3:
 761         s->out_format        = FMT_H263;
 762         s->h263_pred         = 1;
 763         s->unrestricted_mv   = 1;
 764         s->msmpeg4_version   = 3;
 765         s->flipflop_rounding = 1;
 766         avctx->delay         = 0;
 767         s->low_delay         = 1;
 768         break;
 769     case AV_CODEC_ID_WMV1:
 770         s->out_format        = FMT_H263;
 771         s->h263_pred         = 1;
 772         s->unrestricted_mv   = 1;
 773         s->msmpeg4_version   = 4;
 774         s->flipflop_rounding = 1;
 775         avctx->delay         = 0;
 776         s->low_delay         = 1;
 777         break;
 778     case AV_CODEC_ID_WMV2:
 779         s->out_format        = FMT_H263;
 780         s->h263_pred         = 1;
 781         s->unrestricted_mv   = 1;
 782         s->msmpeg4_version   = 5;
 783         s->flipflop_rounding = 1;
 784         avctx->delay         = 0;
 785         s->low_delay         = 1;
 786         break;
 787     default:
 788         return -1;
 789     }
 790
 791     avctx->has_b_frames = !s->low_delay;
 792
 793     s->encoding = 1;
 794
 795     s->progressive_frame    =
 796     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 797                                                 CODEC_FLAG_INTERLACED_ME) ||
 798                                 s->alternate_scan);
 799
 800     /* init */
 801     if (ff_MPV_common_init(s) < 0)
 802         return -1;
 803
 804     s->avctx->coded_frame = &s->current_picture.f;
 805
 806     if (s->msmpeg4_version) {
 807         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 808                           2 * 2 * (MAX_LEVEL + 1) *
 809                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 810     }
 811     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 812
 813     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 814     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 815     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 816     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 817     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 818     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 819     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 820                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 821     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 822                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 823
 824     if (s->avctx->noise_reduction) {
 825         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 826                           2 * 64 * sizeof(uint16_t), fail);
 827     }
 828
 829     ff_dct_encode_init(s);
 830
 831     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 832         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 833
 834     s->quant_precision = 5;
 835
 836     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 837     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 838
 839     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 840         ff_h261_encode_init(s);
 841     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 842         ff_h263_encode_init(s);
 843     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 844         ff_msmpeg4_encode_init(s);
 845     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 846         && s->out_format == FMT_MPEG1)
 847         ff_mpeg1_encode_init(s);
 848
 849     /* init q matrix */
 850     for (i = 0; i < 64; i++) {
 851         int j = s->dsp.idct_permutation[i];
 852         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 853             s->mpeg_quant) {
 854             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 855             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 856         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 857             s->intra_matrix[j] =
 858             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 859         } else {
 860             /* mpeg1/2 */
 861             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 862             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 863         }
 864         if (s->avctx->intra_matrix)
 865             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 866         if (s->avctx->inter_matrix)
 867             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 868     }
 869
 870     /* precompute matrix */
 871     /* for mjpeg, we do include qscale in the matrix */
 872     if (s->out_format != FMT_MJPEG) {
 873         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 874                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 875                           31, 1);
 876         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 877                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 878                           31, 0);
 879     }
 880
 881     if (ff_rate_control_init(s) < 0)
 882         return -1;
 883
 884 #if FF_API_ERROR_RATE
 885     FF_DISABLE_DEPRECATION_WARNINGS
 886     if (avctx->error_rate)
 887         s->error_rate = avctx->error_rate;
 888     FF_ENABLE_DEPRECATION_WARNINGS;
 889 #endif
 890
 891     if (avctx->b_frame_strategy == 2) {
 892         for (i = 0; i < s->max_b_frames + 2; i++) {
 893             s->tmp_frames[i] = av_frame_alloc();
 894             if (!s->tmp_frames[i])
 895                 return AVERROR(ENOMEM);
 896
 897             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 898             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 899             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 900
 901             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 902             if (ret < 0)
 903                 return ret;
 904         }
 905     }
 906
 907     return 0;
 908 fail:
 909     ff_MPV_encode_end(avctx);
 910     return AVERROR_UNKNOWN;
 911 }
 912
 913 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 914 {
 915     MpegEncContext *s = avctx->priv_data;
 916     int i;
 917
 918     ff_rate_control_uninit(s);
 919
 920     ff_MPV_common_end(s);
 921     if (CONFIG_MJPEG_ENCODER &&
 922         s->out_format == FMT_MJPEG)
 923         ff_mjpeg_encode_close(s);
 924
 925     av_freep(&avctx->extradata);
 926
 927     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 928         av_frame_free(&s->tmp_frames[i]);
 929
 930     ff_free_picture_tables(&s->new_picture);
 931     ff_mpeg_unref_picture(s, &s->new_picture);
 932
 933     av_freep(&s->avctx->stats_out);
 934     av_freep(&s->ac_stats);
 935
 936     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 937     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
 938     s->q_chroma_intra_matrix=   NULL;
 939     s->q_chroma_intra_matrix16= NULL;
 940     av_freep(&s->q_intra_matrix);
 941     av_freep(&s->q_inter_matrix);
 942     av_freep(&s->q_intra_matrix16);
 943     av_freep(&s->q_inter_matrix16);
 944     av_freep(&s->input_picture);
 945     av_freep(&s->reordered_input_picture);
 946     av_freep(&s->dct_offset);
 947
 948     return 0;
 949 }
 950
 951 static int get_sae(uint8_t *src, int ref, int stride)
 952 {
 953     int x,y;
 954     int acc = 0;
 955
 956     for (y = 0; y < 16; y++) {
 957         for (x = 0; x < 16; x++) {
 958             acc += FFABS(src[x + y * stride] - ref);
 959         }
 960     }
 961
 962     return acc;
 963 }
 964
 965 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 966                            uint8_t *ref, int stride)
 967 {
 968     int x, y, w, h;
 969     int acc = 0;
 970
 971     w = s->width  & ~15;
 972     h = s->height & ~15;
 973
 974     for (y = 0; y < h; y += 16) {
 975         for (x = 0; x < w; x += 16) {
 976             int offset = x + y * stride;
 977             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 978                                      16);
 979             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 980             int sae  = get_sae(src + offset, mean, stride);
 981
 982             acc += sae + 500 < sad;
 983         }
 984     }
 985     return acc;
 986 }
 987
 988
 989 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 990 {
 991     Picture *pic = NULL;
 992     int64_t pts;
 993     int i, display_picture_number = 0, ret;
 994     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
 995                                                  (s->low_delay ? 0 : 1);
 996     int direct = 1;
 997
 998     if (pic_arg) {
 999         pts = pic_arg->pts;
1000         display_picture_number = s->input_picture_number++;
1001
1002         if (pts != AV_NOPTS_VALUE) {
1003             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1004                 int64_t last = s->user_specified_pts;
1005
1006                 if (pts <= last) {
1007                     av_log(s->avctx, AV_LOG_ERROR,
1008                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1009                            pts, last);
1010                     return AVERROR(EINVAL);
1011                 }
1012
1013                 if (!s->low_delay && display_picture_number == 1)
1014                     s->dts_delta = pts - last;
1015             }
1016             s->user_specified_pts = pts;
1017         } else {
1018             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1019                 s->user_specified_pts =
1020                 pts = s->user_specified_pts + 1;
1021                 av_log(s->avctx, AV_LOG_INFO,
1022                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1023                        pts);
1024             } else {
1025                 pts = display_picture_number;
1026             }
1027         }
1028     }
1029
1030     if (pic_arg) {
1031         if (!pic_arg->buf[0])
1032             direct = 0;
1033         if (pic_arg->linesize[0] != s->linesize)
1034             direct = 0;
1035         if (pic_arg->linesize[1] != s->uvlinesize)
1036             direct = 0;
1037         if (pic_arg->linesize[2] != s->uvlinesize)
1038             direct = 0;
1039         if ((s->width & 15) || (s->height & 15))
1040             direct = 0;
1041
1042         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1043                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1044
1045         if (direct) {
1046             i = ff_find_unused_picture(s, 1);
1047             if (i < 0)
1048                 return i;
1049
1050             pic = &s->picture[i];
1051             pic->reference = 3;
1052
1053             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
1054                 return ret;
1055             if (ff_alloc_picture(s, pic, 1) < 0) {
1056                 return -1;
1057             }
1058         } else {
1059             i = ff_find_unused_picture(s, 0);
1060             if (i < 0)
1061                 return i;
1062
1063             pic = &s->picture[i];
1064             pic->reference = 3;
1065
1066             if (ff_alloc_picture(s, pic, 0) < 0) {
1067                 return -1;
1068             }
1069
1070             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1071                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1072                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1073                 // empty
1074             } else {
1075                 int h_chroma_shift, v_chroma_shift;
1076                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1077                                                  &h_chroma_shift,
1078                                                  &v_chroma_shift);
1079
1080                 for (i = 0; i < 3; i++) {
1081                     int src_stride = pic_arg->linesize[i];
1082                     int dst_stride = i ? s->uvlinesize : s->linesize;
1083                     int h_shift = i ? h_chroma_shift : 0;
1084                     int v_shift = i ? v_chroma_shift : 0;
1085                     int w = s->width  >> h_shift;
1086                     int h = s->height >> v_shift;
1087                     uint8_t *src = pic_arg->data[i];
1088                     uint8_t *dst = pic->f.data[i];
1089
1090                     if (s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1091                         h = ((s->height + 15)/16*16) >> v_shift;
1092                     }
1093
1094                     if (!s->avctx->rc_buffer_size)
1095                         dst += INPLACE_OFFSET;
1096
1097                     if (src_stride == dst_stride)
1098                         memcpy(dst, src, src_stride * h);
1099                     else {
1100                         int h2 = h;
1101                         uint8_t *dst2 = dst;
1102                         while (h2--) {
1103                             memcpy(dst2, src, w);
1104                             dst2 += dst_stride;
1105                             src += src_stride;
1106                         }
1107                     }
1108                     if ((s->width & 15) || (s->height & 15)) {
1109                         s->dsp.draw_edges(dst, dst_stride,
1110                                           w, h,
1111                                           16>>h_shift,
1112                                           16>>v_shift,
1113                                           EDGE_BOTTOM);
1114                     }
1115                 }
1116             }
1117         }
1118         ret = av_frame_copy_props(&pic->f, pic_arg);
1119         if (ret < 0)
1120             return ret;
1121
1122         pic->f.display_picture_number = display_picture_number;
1123         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
1124     }
1125
1126     /* shift buffer entries */
1127     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1128         s->input_picture[i - 1] = s->input_picture[i];
1129
1130     s->input_picture[encoding_delay] = (Picture*) pic;
1131
1132     return 0;
1133 }
1134
1135 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1136 {
1137     int x, y, plane;
1138     int score = 0;
1139     int64_t score64 = 0;
1140
1141     for (plane = 0; plane < 3; plane++) {
1142         const int stride = p->f.linesize[plane];
1143         const int bw = plane ? 1 : 2;
1144         for (y = 0; y < s->mb_height * bw; y++) {
1145             for (x = 0; x < s->mb_width * bw; x++) {
1146                 int off = p->shared ? 0 : 16;
1147                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1148                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1149                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1150
1151                 switch (FFABS(s->avctx->frame_skip_exp)) {
1152                 case 0: score    =  FFMAX(score, v);          break;
1153                 case 1: score   += FFABS(v);                  break;
1154                 case 2: score64 += v * (int64_t)v;                       break;
1155                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1156                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1157                 }
1158             }
1159         }
1160     }
1161     emms_c();
1162
1163     if (score)
1164         score64 = score;
1165     if (s->avctx->frame_skip_exp < 0)
1166         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1167                       -1.0/s->avctx->frame_skip_exp);
1168
1169     if (score64 < s->avctx->frame_skip_threshold)
1170         return 1;
1171     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1172         return 1;
1173     return 0;
1174 }
1175
1176 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1177 {
1178     AVPacket pkt = { 0 };
1179     int ret, got_output;
1180
1181     av_init_packet(&pkt);
1182     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1183     if (ret < 0)
1184         return ret;
1185
1186     ret = pkt.size;
1187     av_free_packet(&pkt);
1188     return ret;
1189 }
1190
1191 static int estimate_best_b_count(MpegEncContext *s)
1192 {
1193     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1194     AVCodecContext *c = avcodec_alloc_context3(NULL);
1195     const int scale = s->avctx->brd_scale;
1196     int i, j, out_size, p_lambda, b_lambda, lambda2;
1197     int64_t best_rd  = INT64_MAX;
1198     int best_b_count = -1;
1199
1200     av_assert0(scale >= 0 && scale <= 3);
1201
1202     //emms_c();
1203     //s->next_picture_ptr->quality;
1204     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1205     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1206     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1207     if (!b_lambda) // FIXME we should do this somewhere else
1208         b_lambda = p_lambda;
1209     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1210                FF_LAMBDA_SHIFT;
1211
1212     c->width        = s->width  >> scale;
1213     c->height       = s->height >> scale;
1214     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1215                       CODEC_FLAG_INPUT_PRESERVED;
1216     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1217     c->mb_decision  = s->avctx->mb_decision;
1218     c->me_cmp       = s->avctx->me_cmp;
1219     c->mb_cmp       = s->avctx->mb_cmp;
1220     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1221     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1222     c->time_base    = s->avctx->time_base;
1223     c->max_b_frames = s->max_b_frames;
1224
1225     if (avcodec_open2(c, codec, NULL) < 0)
1226         return -1;
1227
1228     for (i = 0; i < s->max_b_frames + 2; i++) {
1229         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1230                                                 s->next_picture_ptr;
1231
1232         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1233             pre_input = *pre_input_ptr;
1234
1235             if (!pre_input.shared && i) {
1236                 pre_input.f.data[0] += INPLACE_OFFSET;
1237                 pre_input.f.data[1] += INPLACE_OFFSET;
1238                 pre_input.f.data[2] += INPLACE_OFFSET;
1239             }
1240
1241             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1242                                  pre_input.f.data[0], pre_input.f.linesize[0],
1243                                  c->width,      c->height);
1244             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1245                                  pre_input.f.data[1], pre_input.f.linesize[1],
1246                                  c->width >> 1, c->height >> 1);
1247             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1248                                  pre_input.f.data[2], pre_input.f.linesize[2],
1249                                  c->width >> 1, c->height >> 1);
1250         }
1251     }
1252
1253     for (j = 0; j < s->max_b_frames + 1; j++) {
1254         int64_t rd = 0;
1255
1256         if (!s->input_picture[j])
1257             break;
1258
1259         c->error[0] = c->error[1] = c->error[2] = 0;
1260
1261         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1262         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1263
1264         out_size = encode_frame(c, s->tmp_frames[0]);
1265
1266         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1267
1268         for (i = 0; i < s->max_b_frames + 1; i++) {
1269             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1270
1271             s->tmp_frames[i + 1]->pict_type = is_p ?
1272                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1273             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1274
1275             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1276
1277             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1278         }
1279
1280         /* get the delayed frames */
1281         while (out_size) {
1282             out_size = encode_frame(c, NULL);
1283             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1284         }
1285
1286         rd += c->error[0] + c->error[1] + c->error[2];
1287
1288         if (rd < best_rd) {
1289             best_rd = rd;
1290             best_b_count = j;
1291         }
1292     }
1293
1294     avcodec_close(c);
1295     av_freep(&c);
1296
1297     return best_b_count;
1298 }
1299
1300 static int select_input_picture(MpegEncContext *s)
1301 {
1302     int i, ret;
1303
1304     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1305         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1306     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1307
1308     /* set next picture type & ordering */
1309     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1310         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1311             if (s->picture_in_gop_number < s->gop_size &&
1312                 s->next_picture_ptr &&
1313                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1314                 // FIXME check that te gop check above is +-1 correct
1315                 av_frame_unref(&s->input_picture[0]->f);
1316
1317                 ff_vbv_update(s, 0);
1318
1319                 goto no_output_pic;
1320             }
1321         }
1322
1323         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1324             s->next_picture_ptr == NULL || s->intra_only) {
1325             s->reordered_input_picture[0] = s->input_picture[0];
1326             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1327             s->reordered_input_picture[0]->f.coded_picture_number =
1328                 s->coded_picture_number++;
1329         } else {
1330             int b_frames;
1331
1332             if (s->flags & CODEC_FLAG_PASS2) {
1333                 for (i = 0; i < s->max_b_frames + 1; i++) {
1334                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1335
1336                     if (pict_num >= s->rc_context.num_entries)
1337                         break;
1338                     if (!s->input_picture[i]) {
1339                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1340                         break;
1341                     }
1342
1343                     s->input_picture[i]->f.pict_type =
1344                         s->rc_context.entry[pict_num].new_pict_type;
1345                 }
1346             }
1347
1348             if (s->avctx->b_frame_strategy == 0) {
1349                 b_frames = s->max_b_frames;
1350                 while (b_frames && !s->input_picture[b_frames])
1351                     b_frames--;
1352             } else if (s->avctx->b_frame_strategy == 1) {
1353                 for (i = 1; i < s->max_b_frames + 1; i++) {
1354                     if (s->input_picture[i] &&
1355                         s->input_picture[i]->b_frame_score == 0) {
1356                         s->input_picture[i]->b_frame_score =
1357                             get_intra_count(s,
1358                                             s->input_picture[i    ]->f.data[0],
1359                                             s->input_picture[i - 1]->f.data[0],
1360                                             s->linesize) + 1;
1361                     }
1362                 }
1363                 for (i = 0; i < s->max_b_frames + 1; i++) {
1364                     if (s->input_picture[i] == NULL ||
1365                         s->input_picture[i]->b_frame_score - 1 >
1366                             s->mb_num / s->avctx->b_sensitivity)
1367                         break;
1368                 }
1369
1370                 b_frames = FFMAX(0, i - 1);
1371
1372                 /* reset scores */
1373                 for (i = 0; i < b_frames + 1; i++) {
1374                     s->input_picture[i]->b_frame_score = 0;
1375                 }
1376             } else if (s->avctx->b_frame_strategy == 2) {
1377                 b_frames = estimate_best_b_count(s);
1378             } else {
1379                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1380                 b_frames = 0;
1381             }
1382
1383             emms_c();
1384
1385             for (i = b_frames - 1; i >= 0; i--) {
1386                 int type = s->input_picture[i]->f.pict_type;
1387                 if (type && type != AV_PICTURE_TYPE_B)
1388                     b_frames = i;
1389             }
1390             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1391                 b_frames == s->max_b_frames) {
1392                 av_log(s->avctx, AV_LOG_ERROR,
1393                        "warning, too many b frames in a row\n");
1394             }
1395
1396             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1397                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1398                     s->gop_size > s->picture_in_gop_number) {
1399                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1400                 } else {
1401                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1402                         b_frames = 0;
1403                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1404                 }
1405             }
1406
1407             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1408                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1409                 b_frames--;
1410
1411             s->reordered_input_picture[0] = s->input_picture[b_frames];
1412             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1413                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1414             s->reordered_input_picture[0]->f.coded_picture_number =
1415                 s->coded_picture_number++;
1416             for (i = 0; i < b_frames; i++) {
1417                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1418                 s->reordered_input_picture[i + 1]->f.pict_type =
1419                     AV_PICTURE_TYPE_B;
1420                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1421                     s->coded_picture_number++;
1422             }
1423         }
1424     }
1425 no_output_pic:
1426     if (s->reordered_input_picture[0]) {
1427         s->reordered_input_picture[0]->reference =
1428            s->reordered_input_picture[0]->f.pict_type !=
1429                AV_PICTURE_TYPE_B ? 3 : 0;
1430
1431         ff_mpeg_unref_picture(s, &s->new_picture);
1432         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1433             return ret;
1434
1435         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1436             // input is a shared pix, so we can't modifiy it -> alloc a new
1437             // one & ensure that the shared one is reuseable
1438
1439             Picture *pic;
1440             int i = ff_find_unused_picture(s, 0);
1441             if (i < 0)
1442                 return i;
1443             pic = &s->picture[i];
1444
1445             pic->reference = s->reordered_input_picture[0]->reference;
1446             if (ff_alloc_picture(s, pic, 0) < 0) {
1447                 return -1;
1448             }
1449
1450             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1451             if (ret < 0)
1452                 return ret;
1453
1454             /* mark us unused / free shared pic */
1455             av_frame_unref(&s->reordered_input_picture[0]->f);
1456             s->reordered_input_picture[0]->shared = 0;
1457
1458             s->current_picture_ptr = pic;
1459         } else {
1460             // input is not a shared pix -> reuse buffer for current_pix
1461             s->current_picture_ptr = s->reordered_input_picture[0];
1462             for (i = 0; i < 4; i++) {
1463                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1464             }
1465         }
1466         ff_mpeg_unref_picture(s, &s->current_picture);
1467         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1468                                        s->current_picture_ptr)) < 0)
1469             return ret;
1470
1471         s->picture_number = s->new_picture.f.display_picture_number;
1472     } else {
1473         ff_mpeg_unref_picture(s, &s->new_picture);
1474     }
1475     return 0;
1476 }
1477
1478 static void frame_end(MpegEncContext *s)
1479 {
1480     if (s->unrestricted_mv &&
1481         s->current_picture.reference &&
1482         !s->intra_only) {
1483         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1484         int hshift = desc->log2_chroma_w;
1485         int vshift = desc->log2_chroma_h;
1486         s->dsp.draw_edges(s->current_picture.f.data[0], s->current_picture.f.linesize[0],
1487                           s->h_edge_pos, s->v_edge_pos,
1488                           EDGE_WIDTH, EDGE_WIDTH,
1489                           EDGE_TOP | EDGE_BOTTOM);
1490         s->dsp.draw_edges(s->current_picture.f.data[1], s->current_picture.f.linesize[1],
1491                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1492                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1493                           EDGE_TOP | EDGE_BOTTOM);
1494         s->dsp.draw_edges(s->current_picture.f.data[2], s->current_picture.f.linesize[2],
1495                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1496                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1497                           EDGE_TOP | EDGE_BOTTOM);
1498     }
1499
1500     emms_c();
1501
1502     s->last_pict_type                 = s->pict_type;
1503     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1504     if (s->pict_type!= AV_PICTURE_TYPE_B)
1505         s->last_non_b_pict_type = s->pict_type;
1506
1507     s->avctx->coded_frame = &s->current_picture_ptr->f;
1508
1509 }
1510
1511 static void update_noise_reduction(MpegEncContext *s)
1512 {
1513     int intra, i;
1514
1515     for (intra = 0; intra < 2; intra++) {
1516         if (s->dct_count[intra] > (1 << 16)) {
1517             for (i = 0; i < 64; i++) {
1518                 s->dct_error_sum[intra][i] >>= 1;
1519             }
1520             s->dct_count[intra] >>= 1;
1521         }
1522
1523         for (i = 0; i < 64; i++) {
1524             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1525                                        s->dct_count[intra] +
1526                                        s->dct_error_sum[intra][i] / 2) /
1527                                       (s->dct_error_sum[intra][i] + 1);
1528         }
1529     }
1530 }
1531
1532 static int frame_start(MpegEncContext *s)
1533 {
1534     int ret;
1535
1536     /* mark & release old frames */
1537     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1538         s->last_picture_ptr != s->next_picture_ptr &&
1539         s->last_picture_ptr->f.buf[0]) {
1540         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1541     }
1542
1543     s->current_picture_ptr->f.pict_type = s->pict_type;
1544     s->current_picture_ptr->f.key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1545
1546     ff_mpeg_unref_picture(s, &s->current_picture);
1547     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1548                                    s->current_picture_ptr)) < 0)
1549         return ret;
1550
1551     if (s->pict_type != AV_PICTURE_TYPE_B) {
1552         s->last_picture_ptr = s->next_picture_ptr;
1553         if (!s->droppable)
1554             s->next_picture_ptr = s->current_picture_ptr;
1555     }
1556
1557     if (s->last_picture_ptr) {
1558         ff_mpeg_unref_picture(s, &s->last_picture);
1559         if (s->last_picture_ptr->f.buf[0] &&
1560             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1561                                        s->last_picture_ptr)) < 0)
1562             return ret;
1563     }
1564     if (s->next_picture_ptr) {
1565         ff_mpeg_unref_picture(s, &s->next_picture);
1566         if (s->next_picture_ptr->f.buf[0] &&
1567             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1568                                        s->next_picture_ptr)) < 0)
1569             return ret;
1570     }
1571
1572     if (s->picture_structure!= PICT_FRAME) {
1573         int i;
1574         for (i = 0; i < 4; i++) {
1575             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1576                 s->current_picture.f.data[i] +=
1577                     s->current_picture.f.linesize[i];
1578             }
1579             s->current_picture.f.linesize[i] *= 2;
1580             s->last_picture.f.linesize[i]    *= 2;
1581             s->next_picture.f.linesize[i]    *= 2;
1582         }
1583     }
1584
1585     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1586         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1587         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1588     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1589         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1590         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1591     } else {
1592         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1593         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1594     }
1595
1596     if (s->dct_error_sum) {
1597         av_assert2(s->avctx->noise_reduction && s->encoding);
1598         update_noise_reduction(s);
1599     }
1600
1601     return 0;
1602 }
1603
1604 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1605                           AVFrame *pic_arg, int *got_packet)
1606 {
1607     MpegEncContext *s = avctx->priv_data;
1608     int i, stuffing_count, ret;
1609     int context_count = s->slice_context_count;
1610
1611     s->picture_in_gop_number++;
1612
1613     if (load_input_picture(s, pic_arg) < 0)
1614         return -1;
1615
1616     if (select_input_picture(s) < 0) {
1617         return -1;
1618     }
1619
1620     /* output? */
1621     if (s->new_picture.f.data[0]) {
1622         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1623             return ret;
1624         if (s->mb_info) {
1625             s->mb_info_ptr = av_packet_new_side_data(pkt,
1626                                  AV_PKT_DATA_H263_MB_INFO,
1627                                  s->mb_width*s->mb_height*12);
1628             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1629         }
1630
1631         for (i = 0; i < context_count; i++) {
1632             int start_y = s->thread_context[i]->start_mb_y;
1633             int   end_y = s->thread_context[i]->  end_mb_y;
1634             int h       = s->mb_height;
1635             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1636             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1637
1638             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1639         }
1640
1641         s->pict_type = s->new_picture.f.pict_type;
1642         //emms_c();
1643         ret = frame_start(s);
1644         if (ret < 0)
1645             return ret;
1646 vbv_retry:
1647         if (encode_picture(s, s->picture_number) < 0)
1648             return -1;
1649
1650         avctx->header_bits = s->header_bits;
1651         avctx->mv_bits     = s->mv_bits;
1652         avctx->misc_bits   = s->misc_bits;
1653         avctx->i_tex_bits  = s->i_tex_bits;
1654         avctx->p_tex_bits  = s->p_tex_bits;
1655         avctx->i_count     = s->i_count;
1656         // FIXME f/b_count in avctx
1657         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1658         avctx->skip_count  = s->skip_count;
1659
1660         frame_end(s);
1661
1662         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1663             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1664
1665         if (avctx->rc_buffer_size) {
1666             RateControlContext *rcc = &s->rc_context;
1667             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1668
1669             if (put_bits_count(&s->pb) > max_size &&
1670                 s->lambda < s->avctx->lmax) {
1671                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1672                                        (s->qscale + 1) / s->qscale);
1673                 if (s->adaptive_quant) {
1674                     int i;
1675                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1676                         s->lambda_table[i] =
1677                             FFMAX(s->lambda_table[i] + 1,
1678                                   s->lambda_table[i] * (s->qscale + 1) /
1679                                   s->qscale);
1680                 }
1681                 s->mb_skipped = 0;        // done in frame_start()
1682                 // done in encode_picture() so we must undo it
1683                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1684                     if (s->flipflop_rounding          ||
1685                         s->codec_id == AV_CODEC_ID_H263P ||
1686                         s->codec_id == AV_CODEC_ID_MPEG4)
1687                         s->no_rounding ^= 1;
1688                 }
1689                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1690                     s->time_base       = s->last_time_base;
1691                     s->last_non_b_time = s->time - s->pp_time;
1692                 }
1693                 for (i = 0; i < context_count; i++) {
1694                     PutBitContext *pb = &s->thread_context[i]->pb;
1695                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1696                 }
1697                 goto vbv_retry;
1698             }
1699
1700             assert(s->avctx->rc_max_rate);
1701         }
1702
1703         if (s->flags & CODEC_FLAG_PASS1)
1704             ff_write_pass1_stats(s);
1705
1706         for (i = 0; i < 4; i++) {
1707             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1708             avctx->error[i] += s->current_picture_ptr->f.error[i];
1709         }
1710
1711         if (s->flags & CODEC_FLAG_PASS1)
1712             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1713                    avctx->i_tex_bits + avctx->p_tex_bits ==
1714                        put_bits_count(&s->pb));
1715         flush_put_bits(&s->pb);
1716         s->frame_bits  = put_bits_count(&s->pb);
1717
1718         stuffing_count = ff_vbv_update(s, s->frame_bits);
1719         s->stuffing_bits = 8*stuffing_count;
1720         if (stuffing_count) {
1721             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1722                     stuffing_count + 50) {
1723                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1724                 return -1;
1725             }
1726
1727             switch (s->codec_id) {
1728             case AV_CODEC_ID_MPEG1VIDEO:
1729             case AV_CODEC_ID_MPEG2VIDEO:
1730                 while (stuffing_count--) {
1731                     put_bits(&s->pb, 8, 0);
1732                 }
1733             break;
1734             case AV_CODEC_ID_MPEG4:
1735                 put_bits(&s->pb, 16, 0);
1736                 put_bits(&s->pb, 16, 0x1C3);
1737                 stuffing_count -= 4;
1738                 while (stuffing_count--) {
1739                     put_bits(&s->pb, 8, 0xFF);
1740                 }
1741             break;
1742             default:
1743                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1744             }
1745             flush_put_bits(&s->pb);
1746             s->frame_bits  = put_bits_count(&s->pb);
1747         }
1748
1749         /* update mpeg1/2 vbv_delay for CBR */
1750         if (s->avctx->rc_max_rate                          &&
1751             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1752             s->out_format == FMT_MPEG1                     &&
1753             90000LL * (avctx->rc_buffer_size - 1) <=
1754                 s->avctx->rc_max_rate * 0xFFFFLL) {
1755             int vbv_delay, min_delay;
1756             double inbits  = s->avctx->rc_max_rate *
1757                              av_q2d(s->avctx->time_base);
1758             int    minbits = s->frame_bits - 8 *
1759                              (s->vbv_delay_ptr - s->pb.buf - 1);
1760             double bits    = s->rc_context.buffer_index + minbits - inbits;
1761
1762             if (bits < 0)
1763                 av_log(s->avctx, AV_LOG_ERROR,
1764                        "Internal error, negative bits\n");
1765
1766             assert(s->repeat_first_field == 0);
1767
1768             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1769             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1770                         s->avctx->rc_max_rate;
1771
1772             vbv_delay = FFMAX(vbv_delay, min_delay);
1773
1774             av_assert0(vbv_delay < 0xFFFF);
1775
1776             s->vbv_delay_ptr[0] &= 0xF8;
1777             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1778             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1779             s->vbv_delay_ptr[2] &= 0x07;
1780             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1781             avctx->vbv_delay     = vbv_delay * 300;
1782         }
1783         s->total_bits     += s->frame_bits;
1784         avctx->frame_bits  = s->frame_bits;
1785
1786         pkt->pts = s->current_picture.f.pts;
1787         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1788             if (!s->current_picture.f.coded_picture_number)
1789                 pkt->dts = pkt->pts - s->dts_delta;
1790             else
1791                 pkt->dts = s->reordered_pts;
1792             s->reordered_pts = pkt->pts;
1793         } else
1794             pkt->dts = pkt->pts;
1795         if (s->current_picture.f.key_frame)
1796             pkt->flags |= AV_PKT_FLAG_KEY;
1797         if (s->mb_info)
1798             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1799     } else {
1800         s->frame_bits = 0;
1801     }
1802
1803     /* release non-reference frames */
1804     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1805         if (!s->picture[i].reference)
1806             ff_mpeg_unref_picture(s, &s->picture[i]);
1807     }
1808
1809     assert((s->frame_bits & 7) == 0);
1810
1811     pkt->size = s->frame_bits / 8;
1812     *got_packet = !!pkt->size;
1813     return 0;
1814 }
1815
1816 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1817                                                 int n, int threshold)
1818 {
1819     static const char tab[64] = {
1820         3, 2, 2, 1, 1, 1, 1, 1,
1821         1, 1, 1, 1, 1, 1, 1, 1,
1822         1, 1, 1, 1, 1, 1, 1, 1,
1823         0, 0, 0, 0, 0, 0, 0, 0,
1824         0, 0, 0, 0, 0, 0, 0, 0,
1825         0, 0, 0, 0, 0, 0, 0, 0,
1826         0, 0, 0, 0, 0, 0, 0, 0,
1827         0, 0, 0, 0, 0, 0, 0, 0
1828     };
1829     int score = 0;
1830     int run = 0;
1831     int i;
1832     int16_t *block = s->block[n];
1833     const int last_index = s->block_last_index[n];
1834     int skip_dc;
1835
1836     if (threshold < 0) {
1837         skip_dc = 0;
1838         threshold = -threshold;
1839     } else
1840         skip_dc = 1;
1841
1842     /* Are all we could set to zero already zero? */
1843     if (last_index <= skip_dc - 1)
1844         return;
1845
1846     for (i = 0; i <= last_index; i++) {
1847         const int j = s->intra_scantable.permutated[i];
1848         const int level = FFABS(block[j]);
1849         if (level == 1) {
1850             if (skip_dc && i == 0)
1851                 continue;
1852             score += tab[run];
1853             run = 0;
1854         } else if (level > 1) {
1855             return;
1856         } else {
1857             run++;
1858         }
1859     }
1860     if (score >= threshold)
1861         return;
1862     for (i = skip_dc; i <= last_index; i++) {
1863         const int j = s->intra_scantable.permutated[i];
1864         block[j] = 0;
1865     }
1866     if (block[0])
1867         s->block_last_index[n] = 0;
1868     else
1869         s->block_last_index[n] = -1;
1870 }
1871
1872 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1873                                int last_index)
1874 {
1875     int i;
1876     const int maxlevel = s->max_qcoeff;
1877     const int minlevel = s->min_qcoeff;
1878     int overflow = 0;
1879
1880     if (s->mb_intra) {
1881         i = 1; // skip clipping of intra dc
1882     } else
1883         i = 0;
1884
1885     for (; i <= last_index; i++) {
1886         const int j = s->intra_scantable.permutated[i];
1887         int level = block[j];
1888
1889         if (level > maxlevel) {
1890             level = maxlevel;
1891             overflow++;
1892         } else if (level < minlevel) {
1893             level = minlevel;
1894             overflow++;
1895         }
1896
1897         block[j] = level;
1898     }
1899
1900     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1901         av_log(s->avctx, AV_LOG_INFO,
1902                "warning, clipping %d dct coefficients to %d..%d\n",
1903                overflow, minlevel, maxlevel);
1904 }
1905
1906 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1907 {
1908     int x, y;
1909     // FIXME optimize
1910     for (y = 0; y < 8; y++) {
1911         for (x = 0; x < 8; x++) {
1912             int x2, y2;
1913             int sum = 0;
1914             int sqr = 0;
1915             int count = 0;
1916
1917             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1918                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1919                     int v = ptr[x2 + y2 * stride];
1920                     sum += v;
1921                     sqr += v * v;
1922                     count++;
1923                 }
1924             }
1925             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1926         }
1927     }
1928 }
1929
1930 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1931                                                 int motion_x, int motion_y,
1932                                                 int mb_block_height,
1933                                                 int mb_block_width,
1934                                                 int mb_block_count)
1935 {
1936     int16_t weight[12][64];
1937     int16_t orig[12][64];
1938     const int mb_x = s->mb_x;
1939     const int mb_y = s->mb_y;
1940     int i;
1941     int skip_dct[12];
1942     int dct_offset = s->linesize * 8; // default for progressive frames
1943     int uv_dct_offset = s->uvlinesize * 8;
1944     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1945     ptrdiff_t wrap_y, wrap_c;
1946
1947     for (i = 0; i < mb_block_count; i++)
1948         skip_dct[i] = s->skipdct;
1949
1950     if (s->adaptive_quant) {
1951         const int last_qp = s->qscale;
1952         const int mb_xy = mb_x + mb_y * s->mb_stride;
1953
1954         s->lambda = s->lambda_table[mb_xy];
1955         update_qscale(s);
1956
1957         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1958             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1959             s->dquant = s->qscale - last_qp;
1960
1961             if (s->out_format == FMT_H263) {
1962                 s->dquant = av_clip(s->dquant, -2, 2);
1963
1964                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1965                     if (!s->mb_intra) {
1966                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1967                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1968                                 s->dquant = 0;
1969                         }
1970                         if (s->mv_type == MV_TYPE_8X8)
1971                             s->dquant = 0;
1972                     }
1973                 }
1974             }
1975         }
1976         ff_set_qscale(s, last_qp + s->dquant);
1977     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1978         ff_set_qscale(s, s->qscale + s->dquant);
1979
1980     wrap_y = s->linesize;
1981     wrap_c = s->uvlinesize;
1982     ptr_y  = s->new_picture.f.data[0] +
1983              (mb_y * 16 * wrap_y)              + mb_x * 16;
1984     ptr_cb = s->new_picture.f.data[1] +
1985              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1986     ptr_cr = s->new_picture.f.data[2] +
1987              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1988
1989     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1990         uint8_t *ebuf = s->edge_emu_buffer + 32;
1991         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
1992         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
1993         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1994                                  wrap_y, wrap_y,
1995                                  16, 16, mb_x * 16, mb_y * 16,
1996                                  s->width, s->height);
1997         ptr_y = ebuf;
1998         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1999                                  wrap_c, wrap_c,
2000                                  mb_block_width, mb_block_height,
2001                                  mb_x * mb_block_width, mb_y * mb_block_height,
2002                                  cw, ch);
2003         ptr_cb = ebuf + 18 * wrap_y;
2004         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr,
2005                                  wrap_c, wrap_c,
2006                                  mb_block_width, mb_block_height,
2007                                  mb_x * mb_block_width, mb_y * mb_block_height,
2008                                  cw, ch);
2009         ptr_cr = ebuf + 18 * wrap_y + 16;
2010     }
2011
2012     if (s->mb_intra) {
2013         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2014             int progressive_score, interlaced_score;
2015
2016             s->interlaced_dct = 0;
2017             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2018                                                     NULL, wrap_y, 8) +
2019                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2020                                                     NULL, wrap_y, 8) - 400;
2021
2022             if (progressive_score > 0) {
2023                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2024                                                        NULL, wrap_y * 2, 8) +
2025                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2026                                                        NULL, wrap_y * 2, 8);
2027                 if (progressive_score > interlaced_score) {
2028                     s->interlaced_dct = 1;
2029
2030                     dct_offset = wrap_y;
2031                     uv_dct_offset = wrap_c;
2032                     wrap_y <<= 1;
2033                     if (s->chroma_format == CHROMA_422 ||
2034                         s->chroma_format == CHROMA_444)
2035                         wrap_c <<= 1;
2036                 }
2037             }
2038         }
2039
2040         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2041         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2042         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2043         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2044
2045         if (s->flags & CODEC_FLAG_GRAY) {
2046             skip_dct[4] = 1;
2047             skip_dct[5] = 1;
2048         } else {
2049             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2050             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2051             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2052                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2053                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2054             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2055                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2056                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2057                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2058                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2059                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2060                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2061             }
2062         }
2063     } else {
2064         op_pixels_func (*op_pix)[4];
2065         qpel_mc_func (*op_qpix)[16];
2066         uint8_t *dest_y, *dest_cb, *dest_cr;
2067
2068         dest_y  = s->dest[0];
2069         dest_cb = s->dest[1];
2070         dest_cr = s->dest[2];
2071
2072         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2073             op_pix  = s->hdsp.put_pixels_tab;
2074             op_qpix = s->dsp.put_qpel_pixels_tab;
2075         } else {
2076             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2077             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
2078         }
2079
2080         if (s->mv_dir & MV_DIR_FORWARD) {
2081             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2082                           s->last_picture.f.data,
2083                           op_pix, op_qpix);
2084             op_pix  = s->hdsp.avg_pixels_tab;
2085             op_qpix = s->dsp.avg_qpel_pixels_tab;
2086         }
2087         if (s->mv_dir & MV_DIR_BACKWARD) {
2088             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2089                           s->next_picture.f.data,
2090                           op_pix, op_qpix);
2091         }
2092
2093         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2094             int progressive_score, interlaced_score;
2095
2096             s->interlaced_dct = 0;
2097             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2098                                                     ptr_y,              wrap_y,
2099                                                     8) +
2100                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2101                                                     ptr_y + wrap_y * 8, wrap_y,
2102                                                     8) - 400;
2103
2104             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2105                 progressive_score -= 400;
2106
2107             if (progressive_score > 0) {
2108                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2109                                                        ptr_y,
2110                                                        wrap_y * 2, 8) +
2111                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2112                                                        ptr_y + wrap_y,
2113                                                        wrap_y * 2, 8);
2114
2115                 if (progressive_score > interlaced_score) {
2116                     s->interlaced_dct = 1;
2117
2118                     dct_offset = wrap_y;
2119                     uv_dct_offset = wrap_c;
2120                     wrap_y <<= 1;
2121                     if (s->chroma_format == CHROMA_422)
2122                         wrap_c <<= 1;
2123                 }
2124             }
2125         }
2126
2127         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2128         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2129         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2130                            dest_y + dct_offset, wrap_y);
2131         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2132                            dest_y + dct_offset + 8, wrap_y);
2133
2134         if (s->flags & CODEC_FLAG_GRAY) {
2135             skip_dct[4] = 1;
2136             skip_dct[5] = 1;
2137         } else {
2138             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2139             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2140             if (!s->chroma_y_shift) { /* 422 */
2141                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2142                                    dest_cb + uv_dct_offset, wrap_c);
2143                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2144                                    dest_cr + uv_dct_offset, wrap_c);
2145             }
2146         }
2147         /* pre quantization */
2148         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2149                 2 * s->qscale * s->qscale) {
2150             // FIXME optimize
2151             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2152                               wrap_y, 8) < 20 * s->qscale)
2153                 skip_dct[0] = 1;
2154             if (s->dsp.sad[1](NULL, ptr_y + 8,
2155                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2156                 skip_dct[1] = 1;
2157             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2158                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2159                 skip_dct[2] = 1;
2160             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2161                               dest_y + dct_offset + 8,
2162                               wrap_y, 8) < 20 * s->qscale)
2163                 skip_dct[3] = 1;
2164             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2165                               wrap_c, 8) < 20 * s->qscale)
2166                 skip_dct[4] = 1;
2167             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2168                               wrap_c, 8) < 20 * s->qscale)
2169                 skip_dct[5] = 1;
2170             if (!s->chroma_y_shift) { /* 422 */
2171                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2172                                   dest_cb + uv_dct_offset,
2173                                   wrap_c, 8) < 20 * s->qscale)
2174                     skip_dct[6] = 1;
2175                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2176                                   dest_cr + uv_dct_offset,
2177                                   wrap_c, 8) < 20 * s->qscale)
2178                     skip_dct[7] = 1;
2179             }
2180         }
2181     }
2182
2183     if (s->quantizer_noise_shaping) {
2184         if (!skip_dct[0])
2185             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2186         if (!skip_dct[1])
2187             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2188         if (!skip_dct[2])
2189             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2190         if (!skip_dct[3])
2191             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2192         if (!skip_dct[4])
2193             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2194         if (!skip_dct[5])
2195             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2196         if (!s->chroma_y_shift) { /* 422 */
2197             if (!skip_dct[6])
2198                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2199                                   wrap_c);
2200             if (!skip_dct[7])
2201                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2202                                   wrap_c);
2203         }
2204         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2205     }
2206
2207     /* DCT & quantize */
2208     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2209     {
2210         for (i = 0; i < mb_block_count; i++) {
2211             if (!skip_dct[i]) {
2212                 int overflow;
2213                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2214                 // FIXME we could decide to change to quantizer instead of
2215                 // clipping
2216                 // JS: I don't think that would be a good idea it could lower
2217                 //     quality instead of improve it. Just INTRADC clipping
2218                 //     deserves changes in quantizer
2219                 if (overflow)
2220                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2221             } else
2222                 s->block_last_index[i] = -1;
2223         }
2224         if (s->quantizer_noise_shaping) {
2225             for (i = 0; i < mb_block_count; i++) {
2226                 if (!skip_dct[i]) {
2227                     s->block_last_index[i] =
2228                         dct_quantize_refine(s, s->block[i], weight[i],
2229                                             orig[i], i, s->qscale);
2230                 }
2231             }
2232         }
2233
2234         if (s->luma_elim_threshold && !s->mb_intra)
2235             for (i = 0; i < 4; i++)
2236                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2237         if (s->chroma_elim_threshold && !s->mb_intra)
2238             for (i = 4; i < mb_block_count; i++)
2239                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2240
2241         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2242             for (i = 0; i < mb_block_count; i++) {
2243                 if (s->block_last_index[i] == -1)
2244                     s->coded_score[i] = INT_MAX / 256;
2245             }
2246         }
2247     }
2248
2249     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2250         s->block_last_index[4] =
2251         s->block_last_index[5] = 0;
2252         s->block[4][0] =
2253         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2254         if (!s->chroma_y_shift) { /* 422 / 444 */
2255             for (i=6; i<12; i++) {
2256                 s->block_last_index[i] = 0;
2257                 s->block[i][0] = s->block[4][0];
2258             }
2259         }
2260     }
2261
2262     // non c quantize code returns incorrect block_last_index FIXME
2263     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2264         for (i = 0; i < mb_block_count; i++) {
2265             int j;
2266             if (s->block_last_index[i] > 0) {
2267                 for (j = 63; j > 0; j--) {
2268                     if (s->block[i][s->intra_scantable.permutated[j]])
2269                         break;
2270                 }
2271                 s->block_last_index[i] = j;
2272             }
2273         }
2274     }
2275
2276     /* huffman encode */
2277     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2278     case AV_CODEC_ID_MPEG1VIDEO:
2279     case AV_CODEC_ID_MPEG2VIDEO:
2280         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2281             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2282         break;
2283     case AV_CODEC_ID_MPEG4:
2284         if (CONFIG_MPEG4_ENCODER)
2285             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2286         break;
2287     case AV_CODEC_ID_MSMPEG4V2:
2288     case AV_CODEC_ID_MSMPEG4V3:
2289     case AV_CODEC_ID_WMV1:
2290         if (CONFIG_MSMPEG4_ENCODER)
2291             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2292         break;
2293     case AV_CODEC_ID_WMV2:
2294         if (CONFIG_WMV2_ENCODER)
2295             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2296         break;
2297     case AV_CODEC_ID_H261:
2298         if (CONFIG_H261_ENCODER)
2299             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2300         break;
2301     case AV_CODEC_ID_H263:
2302     case AV_CODEC_ID_H263P:
2303     case AV_CODEC_ID_FLV1:
2304     case AV_CODEC_ID_RV10:
2305     case AV_CODEC_ID_RV20:
2306         if (CONFIG_H263_ENCODER)
2307             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2308         break;
2309     case AV_CODEC_ID_MJPEG:
2310     case AV_CODEC_ID_AMV:
2311         if (CONFIG_MJPEG_ENCODER)
2312             ff_mjpeg_encode_mb(s, s->block);
2313         break;
2314     default:
2315         av_assert1(0);
2316     }
2317 }
2318
2319 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2320 {
2321     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2322     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2323     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2324 }
2325
2326 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2327     int i;
2328
2329     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2330
2331     /* mpeg1 */
2332     d->mb_skip_run= s->mb_skip_run;
2333     for(i=0; i<3; i++)
2334         d->last_dc[i] = s->last_dc[i];
2335
2336     /* statistics */
2337     d->mv_bits= s->mv_bits;
2338     d->i_tex_bits= s->i_tex_bits;
2339     d->p_tex_bits= s->p_tex_bits;
2340     d->i_count= s->i_count;
2341     d->f_count= s->f_count;
2342     d->b_count= s->b_count;
2343     d->skip_count= s->skip_count;
2344     d->misc_bits= s->misc_bits;
2345     d->last_bits= 0;
2346
2347     d->mb_skipped= 0;
2348     d->qscale= s->qscale;
2349     d->dquant= s->dquant;
2350
2351     d->esc3_level_length= s->esc3_level_length;
2352 }
2353
2354 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2355     int i;
2356
2357     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2358     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2359
2360     /* mpeg1 */
2361     d->mb_skip_run= s->mb_skip_run;
2362     for(i=0; i<3; i++)
2363         d->last_dc[i] = s->last_dc[i];
2364
2365     /* statistics */
2366     d->mv_bits= s->mv_bits;
2367     d->i_tex_bits= s->i_tex_bits;
2368     d->p_tex_bits= s->p_tex_bits;
2369     d->i_count= s->i_count;
2370     d->f_count= s->f_count;
2371     d->b_count= s->b_count;
2372     d->skip_count= s->skip_count;
2373     d->misc_bits= s->misc_bits;
2374
2375     d->mb_intra= s->mb_intra;
2376     d->mb_skipped= s->mb_skipped;
2377     d->mv_type= s->mv_type;
2378     d->mv_dir= s->mv_dir;
2379     d->pb= s->pb;
2380     if(s->data_partitioning){
2381         d->pb2= s->pb2;
2382         d->tex_pb= s->tex_pb;
2383     }
2384     d->block= s->block;
2385     for(i=0; i<8; i++)
2386         d->block_last_index[i]= s->block_last_index[i];
2387     d->interlaced_dct= s->interlaced_dct;
2388     d->qscale= s->qscale;
2389
2390     d->esc3_level_length= s->esc3_level_length;
2391 }
2392
2393 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2394                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2395                            int *dmin, int *next_block, int motion_x, int motion_y)
2396 {
2397     int score;
2398     uint8_t *dest_backup[3];
2399
2400     copy_context_before_encode(s, backup, type);
2401
2402     s->block= s->blocks[*next_block];
2403     s->pb= pb[*next_block];
2404     if(s->data_partitioning){
2405         s->pb2   = pb2   [*next_block];
2406         s->tex_pb= tex_pb[*next_block];
2407     }
2408
2409     if(*next_block){
2410         memcpy(dest_backup, s->dest, sizeof(s->dest));
2411         s->dest[0] = s->rd_scratchpad;
2412         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2413         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2414         assert(s->linesize >= 32); //FIXME
2415     }
2416
2417     encode_mb(s, motion_x, motion_y);
2418
2419     score= put_bits_count(&s->pb);
2420     if(s->data_partitioning){
2421         score+= put_bits_count(&s->pb2);
2422         score+= put_bits_count(&s->tex_pb);
2423     }
2424
2425     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2426         ff_MPV_decode_mb(s, s->block);
2427
2428         score *= s->lambda2;
2429         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2430     }
2431
2432     if(*next_block){
2433         memcpy(s->dest, dest_backup, sizeof(s->dest));
2434     }
2435
2436     if(score<*dmin){
2437         *dmin= score;
2438         *next_block^=1;
2439
2440         copy_context_after_encode(best, s, type);
2441     }
2442 }
2443
2444 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2445     uint32_t *sq = ff_squareTbl + 256;
2446     int acc=0;
2447     int x,y;
2448
2449     if(w==16 && h==16)
2450         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2451     else if(w==8 && h==8)
2452         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2453
2454     for(y=0; y<h; y++){
2455         for(x=0; x<w; x++){
2456             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2457         }
2458     }
2459
2460     av_assert2(acc>=0);
2461
2462     return acc;
2463 }
2464
2465 static int sse_mb(MpegEncContext *s){
2466     int w= 16;
2467     int h= 16;
2468
2469     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2470     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2471
2472     if(w==16 && h==16)
2473       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2474         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2475                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2476                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2477       }else{
2478         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2479                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2480                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2481       }
2482     else
2483         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2484                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2485                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2486 }
2487
2488 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2489     MpegEncContext *s= *(void**)arg;
2490
2491
2492     s->me.pre_pass=1;
2493     s->me.dia_size= s->avctx->pre_dia_size;
2494     s->first_slice_line=1;
2495     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2496         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2497             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2498         }
2499         s->first_slice_line=0;
2500     }
2501
2502     s->me.pre_pass=0;
2503
2504     return 0;
2505 }
2506
2507 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2508     MpegEncContext *s= *(void**)arg;
2509
2510     ff_check_alignment();
2511
2512     s->me.dia_size= s->avctx->dia_size;
2513     s->first_slice_line=1;
2514     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2515         s->mb_x=0; //for block init below
2516         ff_init_block_index(s);
2517         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2518             s->block_index[0]+=2;
2519             s->block_index[1]+=2;
2520             s->block_index[2]+=2;
2521             s->block_index[3]+=2;
2522
2523             /* compute motion vector & mb_type and store in context */
2524             if(s->pict_type==AV_PICTURE_TYPE_B)
2525                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2526             else
2527                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2528         }
2529         s->first_slice_line=0;
2530     }
2531     return 0;
2532 }
2533
2534 static int mb_var_thread(AVCodecContext *c, void *arg){
2535     MpegEncContext *s= *(void**)arg;
2536     int mb_x, mb_y;
2537
2538     ff_check_alignment();
2539
2540     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2541         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2542             int xx = mb_x * 16;
2543             int yy = mb_y * 16;
2544             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2545             int varc;
2546             int sum = s->dsp.pix_sum(pix, s->linesize);
2547
2548             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2549
2550             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2551             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2552             s->me.mb_var_sum_temp    += varc;
2553         }
2554     }
2555     return 0;
2556 }
2557
2558 static void write_slice_end(MpegEncContext *s){
2559     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2560         if(s->partitioned_frame){
2561             ff_mpeg4_merge_partitions(s);
2562         }
2563
2564         ff_mpeg4_stuffing(&s->pb);
2565     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2566         ff_mjpeg_encode_stuffing(s);
2567     }
2568
2569     avpriv_align_put_bits(&s->pb);
2570     flush_put_bits(&s->pb);
2571
2572     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2573         s->misc_bits+= get_bits_diff(s);
2574 }
2575
2576 static void write_mb_info(MpegEncContext *s)
2577 {
2578     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2579     int offset = put_bits_count(&s->pb);
2580     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2581     int gobn = s->mb_y / s->gob_index;
2582     int pred_x, pred_y;
2583     if (CONFIG_H263_ENCODER)
2584         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2585     bytestream_put_le32(&ptr, offset);
2586     bytestream_put_byte(&ptr, s->qscale);
2587     bytestream_put_byte(&ptr, gobn);
2588     bytestream_put_le16(&ptr, mba);
2589     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2590     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2591     /* 4MV not implemented */
2592     bytestream_put_byte(&ptr, 0); /* hmv2 */
2593     bytestream_put_byte(&ptr, 0); /* vmv2 */
2594 }
2595
2596 static void update_mb_info(MpegEncContext *s, int startcode)
2597 {
2598     if (!s->mb_info)
2599         return;
2600     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2601         s->mb_info_size += 12;
2602         s->prev_mb_info = s->last_mb_info;
2603     }
2604     if (startcode) {
2605         s->prev_mb_info = put_bits_count(&s->pb)/8;
2606         /* This might have incremented mb_info_size above, and we return without
2607          * actually writing any info into that slot yet. But in that case,
2608          * this will be called again at the start of the after writing the
2609          * start code, actually writing the mb info. */
2610         return;
2611     }
2612
2613     s->last_mb_info = put_bits_count(&s->pb)/8;
2614     if (!s->mb_info_size)
2615         s->mb_info_size += 12;
2616     write_mb_info(s);
2617 }
2618
2619 static int encode_thread(AVCodecContext *c, void *arg){
2620     MpegEncContext *s= *(void**)arg;
2621     int mb_x, mb_y, pdif = 0;
2622     int chr_h= 16>>s->chroma_y_shift;
2623     int i, j;
2624     MpegEncContext best_s, backup_s;
2625     uint8_t bit_buf[2][MAX_MB_BYTES];
2626     uint8_t bit_buf2[2][MAX_MB_BYTES];
2627     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2628     PutBitContext pb[2], pb2[2], tex_pb[2];
2629
2630     ff_check_alignment();
2631
2632     for(i=0; i<2; i++){
2633         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2634         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2635         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2636     }
2637
2638     s->last_bits= put_bits_count(&s->pb);
2639     s->mv_bits=0;
2640     s->misc_bits=0;
2641     s->i_tex_bits=0;
2642     s->p_tex_bits=0;
2643     s->i_count=0;
2644     s->f_count=0;
2645     s->b_count=0;
2646     s->skip_count=0;
2647
2648     for(i=0; i<3; i++){
2649         /* init last dc values */
2650         /* note: quant matrix value (8) is implied here */
2651         s->last_dc[i] = 128 << s->intra_dc_precision;
2652
2653         s->current_picture.f.error[i] = 0;
2654     }
2655     if(s->codec_id==AV_CODEC_ID_AMV){
2656         s->last_dc[0] = 128*8/13;
2657         s->last_dc[1] = 128*8/14;
2658         s->last_dc[2] = 128*8/14;
2659     }
2660     s->mb_skip_run = 0;
2661     memset(s->last_mv, 0, sizeof(s->last_mv));
2662
2663     s->last_mv_dir = 0;
2664
2665     switch(s->codec_id){
2666     case AV_CODEC_ID_H263:
2667     case AV_CODEC_ID_H263P:
2668     case AV_CODEC_ID_FLV1:
2669         if (CONFIG_H263_ENCODER)
2670             s->gob_index = ff_h263_get_gob_height(s);
2671         break;
2672     case AV_CODEC_ID_MPEG4:
2673         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2674             ff_mpeg4_init_partitions(s);
2675         break;
2676     }
2677
2678     s->resync_mb_x=0;
2679     s->resync_mb_y=0;
2680     s->first_slice_line = 1;
2681     s->ptr_lastgob = s->pb.buf;
2682     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2683         s->mb_x=0;
2684         s->mb_y= mb_y;
2685
2686         ff_set_qscale(s, s->qscale);
2687         ff_init_block_index(s);
2688
2689         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2690             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2691             int mb_type= s->mb_type[xy];
2692 //            int d;
2693             int dmin= INT_MAX;
2694             int dir;
2695
2696             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2697                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2698                 return -1;
2699             }
2700             if(s->data_partitioning){
2701                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2702                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2703                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2704                     return -1;
2705                 }
2706             }
2707
2708             s->mb_x = mb_x;
2709             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2710             ff_update_block_index(s);
2711
2712             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2713                 ff_h261_reorder_mb_index(s);
2714                 xy= s->mb_y*s->mb_stride + s->mb_x;
2715                 mb_type= s->mb_type[xy];
2716             }
2717
2718             /* write gob / video packet header  */
2719             if(s->rtp_mode){
2720                 int current_packet_size, is_gob_start;
2721
2722                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2723
2724                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2725
2726                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2727
2728                 switch(s->codec_id){
2729                 case AV_CODEC_ID_H263:
2730                 case AV_CODEC_ID_H263P:
2731                     if(!s->h263_slice_structured)
2732                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2733                     break;
2734                 case AV_CODEC_ID_MPEG2VIDEO:
2735                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2736                 case AV_CODEC_ID_MPEG1VIDEO:
2737                     if(s->mb_skip_run) is_gob_start=0;
2738                     break;
2739                 case AV_CODEC_ID_MJPEG:
2740                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2741                     break;
2742                 }
2743
2744                 if(is_gob_start){
2745                     if(s->start_mb_y != mb_y || mb_x!=0){
2746                         write_slice_end(s);
2747
2748                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2749                             ff_mpeg4_init_partitions(s);
2750                         }
2751                     }
2752
2753                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2754                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2755
2756                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2757                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2758                         int d = 100 / s->error_rate;
2759                         if(r % d == 0){
2760                             current_packet_size=0;
2761                             s->pb.buf_ptr= s->ptr_lastgob;
2762                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2763                         }
2764                     }
2765
2766                     if (s->avctx->rtp_callback){
2767                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2768                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2769                     }
2770                     update_mb_info(s, 1);
2771
2772                     switch(s->codec_id){
2773                     case AV_CODEC_ID_MPEG4:
2774                         if (CONFIG_MPEG4_ENCODER) {
2775                             ff_mpeg4_encode_video_packet_header(s);
2776                             ff_mpeg4_clean_buffers(s);
2777                         }
2778                     break;
2779                     case AV_CODEC_ID_MPEG1VIDEO:
2780                     case AV_CODEC_ID_MPEG2VIDEO:
2781                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2782                             ff_mpeg1_encode_slice_header(s);
2783                             ff_mpeg1_clean_buffers(s);
2784                         }
2785                     break;
2786                     case AV_CODEC_ID_H263:
2787                     case AV_CODEC_ID_H263P:
2788                         if (CONFIG_H263_ENCODER)
2789                             ff_h263_encode_gob_header(s, mb_y);
2790                     break;
2791                     }
2792
2793                     if(s->flags&CODEC_FLAG_PASS1){
2794                         int bits= put_bits_count(&s->pb);
2795                         s->misc_bits+= bits - s->last_bits;
2796                         s->last_bits= bits;
2797                     }
2798
2799                     s->ptr_lastgob += current_packet_size;
2800                     s->first_slice_line=1;
2801                     s->resync_mb_x=mb_x;
2802                     s->resync_mb_y=mb_y;
2803                 }
2804             }
2805
2806             if(  (s->resync_mb_x   == s->mb_x)
2807                && s->resync_mb_y+1 == s->mb_y){
2808                 s->first_slice_line=0;
2809             }
2810
2811             s->mb_skipped=0;
2812             s->dquant=0; //only for QP_RD
2813
2814             update_mb_info(s, 0);
2815
2816             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2817                 int next_block=0;
2818                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2819
2820                 copy_context_before_encode(&backup_s, s, -1);
2821                 backup_s.pb= s->pb;
2822                 best_s.data_partitioning= s->data_partitioning;
2823                 best_s.partitioned_frame= s->partitioned_frame;
2824                 if(s->data_partitioning){
2825                     backup_s.pb2= s->pb2;
2826                     backup_s.tex_pb= s->tex_pb;
2827                 }
2828
2829                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2830                     s->mv_dir = MV_DIR_FORWARD;
2831                     s->mv_type = MV_TYPE_16X16;
2832                     s->mb_intra= 0;
2833                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2834                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2835                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2836                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2837                 }
2838                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2839                     s->mv_dir = MV_DIR_FORWARD;
2840                     s->mv_type = MV_TYPE_FIELD;
2841                     s->mb_intra= 0;
2842                     for(i=0; i<2; i++){
2843                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2844                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2845                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2846                     }
2847                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2848                                  &dmin, &next_block, 0, 0);
2849                 }
2850                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2851                     s->mv_dir = MV_DIR_FORWARD;
2852                     s->mv_type = MV_TYPE_16X16;
2853                     s->mb_intra= 0;
2854                     s->mv[0][0][0] = 0;
2855                     s->mv[0][0][1] = 0;
2856                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2857                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2858                 }
2859                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2860                     s->mv_dir = MV_DIR_FORWARD;
2861                     s->mv_type = MV_TYPE_8X8;
2862                     s->mb_intra= 0;
2863                     for(i=0; i<4; i++){
2864                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2865                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2866                     }
2867                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2868                                  &dmin, &next_block, 0, 0);
2869                 }
2870                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2871                     s->mv_dir = MV_DIR_FORWARD;
2872                     s->mv_type = MV_TYPE_16X16;
2873                     s->mb_intra= 0;
2874                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2875                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2876                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2877                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2878                 }
2879                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2880                     s->mv_dir = MV_DIR_BACKWARD;
2881                     s->mv_type = MV_TYPE_16X16;
2882                     s->mb_intra= 0;
2883                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2884                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2885                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2886                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2887                 }
2888                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2889                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2890                     s->mv_type = MV_TYPE_16X16;
2891                     s->mb_intra= 0;
2892                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2893                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2894                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2895                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2896                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2897                                  &dmin, &next_block, 0, 0);
2898                 }
2899                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2900                     s->mv_dir = MV_DIR_FORWARD;
2901                     s->mv_type = MV_TYPE_FIELD;
2902                     s->mb_intra= 0;
2903                     for(i=0; i<2; i++){
2904                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2905                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2906                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2907                     }
2908                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2909                                  &dmin, &next_block, 0, 0);
2910                 }
2911                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2912                     s->mv_dir = MV_DIR_BACKWARD;
2913                     s->mv_type = MV_TYPE_FIELD;
2914                     s->mb_intra= 0;
2915                     for(i=0; i<2; i++){
2916                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2917                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2918                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2919                     }
2920                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2921                                  &dmin, &next_block, 0, 0);
2922                 }
2923                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2924                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2925                     s->mv_type = MV_TYPE_FIELD;
2926                     s->mb_intra= 0;
2927                     for(dir=0; dir<2; dir++){
2928                         for(i=0; i<2; i++){
2929                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2930                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2931                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2932                         }
2933                     }
2934                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2935                                  &dmin, &next_block, 0, 0);
2936                 }
2937                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2938                     s->mv_dir = 0;
2939                     s->mv_type = MV_TYPE_16X16;
2940                     s->mb_intra= 1;
2941                     s->mv[0][0][0] = 0;
2942                     s->mv[0][0][1] = 0;
2943                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2944                                  &dmin, &next_block, 0, 0);
2945                     if(s->h263_pred || s->h263_aic){
2946                         if(best_s.mb_intra)
2947                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2948                         else
2949                             ff_clean_intra_table_entries(s); //old mode?
2950                     }
2951                 }
2952
2953                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2954                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2955                         const int last_qp= backup_s.qscale;
2956                         int qpi, qp, dc[6];
2957                         int16_t ac[6][16];
2958                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2959                         static const int dquant_tab[4]={-1,1,-2,2};
2960                         int storecoefs = s->mb_intra && s->dc_val[0];
2961
2962                         av_assert2(backup_s.dquant == 0);
2963
2964                         //FIXME intra
2965                         s->mv_dir= best_s.mv_dir;
2966                         s->mv_type = MV_TYPE_16X16;
2967                         s->mb_intra= best_s.mb_intra;
2968                         s->mv[0][0][0] = best_s.mv[0][0][0];
2969                         s->mv[0][0][1] = best_s.mv[0][0][1];
2970                         s->mv[1][0][0] = best_s.mv[1][0][0];
2971                         s->mv[1][0][1] = best_s.mv[1][0][1];
2972
2973                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2974                         for(; qpi<4; qpi++){
2975                             int dquant= dquant_tab[qpi];
2976                             qp= last_qp + dquant;
2977                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2978                                 continue;
2979                             backup_s.dquant= dquant;
2980                             if(storecoefs){
2981                                 for(i=0; i<6; i++){
2982                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2983                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2984                                 }
2985                             }
2986
2987                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2988                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2989                             if(best_s.qscale != qp){
2990                                 if(storecoefs){
2991                                     for(i=0; i<6; i++){
2992                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2993                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2994                                     }
2995                                 }
2996                             }
2997                         }
2998                     }
2999                 }
3000                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3001                     int mx= s->b_direct_mv_table[xy][0];
3002                     int my= s->b_direct_mv_table[xy][1];
3003
3004                     backup_s.dquant = 0;
3005                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3006                     s->mb_intra= 0;
3007                     ff_mpeg4_set_direct_mv(s, mx, my);
3008                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3009                                  &dmin, &next_block, mx, my);
3010                 }
3011                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3012                     backup_s.dquant = 0;
3013                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3014                     s->mb_intra= 0;
3015                     ff_mpeg4_set_direct_mv(s, 0, 0);
3016                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3017                                  &dmin, &next_block, 0, 0);
3018                 }
3019                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3020                     int coded=0;
3021                     for(i=0; i<6; i++)
3022                         coded |= s->block_last_index[i];
3023                     if(coded){
3024                         int mx,my;
3025                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3026                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3027                             mx=my=0; //FIXME find the one we actually used
3028                             ff_mpeg4_set_direct_mv(s, mx, my);
3029                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3030                             mx= s->mv[1][0][0];
3031                             my= s->mv[1][0][1];
3032                         }else{
3033                             mx= s->mv[0][0][0];
3034                             my= s->mv[0][0][1];
3035                         }
3036
3037                         s->mv_dir= best_s.mv_dir;
3038                         s->mv_type = best_s.mv_type;
3039                         s->mb_intra= 0;
3040 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3041                         s->mv[0][0][1] = best_s.mv[0][0][1];
3042                         s->mv[1][0][0] = best_s.mv[1][0][0];
3043                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3044                         backup_s.dquant= 0;
3045                         s->skipdct=1;
3046                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3047                                         &dmin, &next_block, mx, my);
3048                         s->skipdct=0;
3049                     }
3050                 }
3051
3052                 s->current_picture.qscale_table[xy] = best_s.qscale;
3053
3054                 copy_context_after_encode(s, &best_s, -1);
3055
3056                 pb_bits_count= put_bits_count(&s->pb);
3057                 flush_put_bits(&s->pb);
3058                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3059                 s->pb= backup_s.pb;
3060
3061                 if(s->data_partitioning){
3062                     pb2_bits_count= put_bits_count(&s->pb2);
3063                     flush_put_bits(&s->pb2);
3064                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3065                     s->pb2= backup_s.pb2;
3066
3067                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3068                     flush_put_bits(&s->tex_pb);
3069                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3070                     s->tex_pb= backup_s.tex_pb;
3071                 }
3072                 s->last_bits= put_bits_count(&s->pb);
3073
3074                 if (CONFIG_H263_ENCODER &&
3075                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3076                     ff_h263_update_motion_val(s);
3077
3078                 if(next_block==0){ //FIXME 16 vs linesize16
3079                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3080                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3081                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3082                 }
3083
3084                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3085                     ff_MPV_decode_mb(s, s->block);
3086             } else {
3087                 int motion_x = 0, motion_y = 0;
3088                 s->mv_type=MV_TYPE_16X16;
3089                 // only one MB-Type possible
3090
3091                 switch(mb_type){
3092                 case CANDIDATE_MB_TYPE_INTRA:
3093                     s->mv_dir = 0;
3094                     s->mb_intra= 1;
3095                     motion_x= s->mv[0][0][0] = 0;
3096                     motion_y= s->mv[0][0][1] = 0;
3097                     break;
3098                 case CANDIDATE_MB_TYPE_INTER:
3099                     s->mv_dir = MV_DIR_FORWARD;
3100                     s->mb_intra= 0;
3101                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3102                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3103                     break;
3104                 case CANDIDATE_MB_TYPE_INTER_I:
3105                     s->mv_dir = MV_DIR_FORWARD;
3106                     s->mv_type = MV_TYPE_FIELD;
3107                     s->mb_intra= 0;
3108                     for(i=0; i<2; i++){
3109                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3110                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3111                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3112                     }
3113                     break;
3114                 case CANDIDATE_MB_TYPE_INTER4V:
3115                     s->mv_dir = MV_DIR_FORWARD;
3116                     s->mv_type = MV_TYPE_8X8;
3117                     s->mb_intra= 0;
3118                     for(i=0; i<4; i++){
3119                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3120                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3121                     }
3122                     break;
3123                 case CANDIDATE_MB_TYPE_DIRECT:
3124                     if (CONFIG_MPEG4_ENCODER) {
3125                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3126                         s->mb_intra= 0;
3127                         motion_x=s->b_direct_mv_table[xy][0];
3128                         motion_y=s->b_direct_mv_table[xy][1];
3129                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3130                     }
3131                     break;
3132                 case CANDIDATE_MB_TYPE_DIRECT0:
3133                     if (CONFIG_MPEG4_ENCODER) {
3134                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3135                         s->mb_intra= 0;
3136                         ff_mpeg4_set_direct_mv(s, 0, 0);
3137                     }
3138                     break;
3139                 case CANDIDATE_MB_TYPE_BIDIR:
3140                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3141                     s->mb_intra= 0;
3142                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3143                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3144                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3145                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3146                     break;
3147                 case CANDIDATE_MB_TYPE_BACKWARD:
3148                     s->mv_dir = MV_DIR_BACKWARD;
3149                     s->mb_intra= 0;
3150                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3151                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3152                     break;
3153                 case CANDIDATE_MB_TYPE_FORWARD:
3154                     s->mv_dir = MV_DIR_FORWARD;
3155                     s->mb_intra= 0;
3156                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3157                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3158                     break;
3159                 case CANDIDATE_MB_TYPE_FORWARD_I:
3160                     s->mv_dir = MV_DIR_FORWARD;
3161                     s->mv_type = MV_TYPE_FIELD;
3162                     s->mb_intra= 0;
3163                     for(i=0; i<2; i++){
3164                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3165                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3166                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3167                     }
3168                     break;
3169                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3170                     s->mv_dir = MV_DIR_BACKWARD;
3171                     s->mv_type = MV_TYPE_FIELD;
3172                     s->mb_intra= 0;
3173                     for(i=0; i<2; i++){
3174                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3175                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3176                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3177                     }
3178                     break;
3179                 case CANDIDATE_MB_TYPE_BIDIR_I:
3180                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3181                     s->mv_type = MV_TYPE_FIELD;
3182                     s->mb_intra= 0;
3183                     for(dir=0; dir<2; dir++){
3184                         for(i=0; i<2; i++){
3185                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3186                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3187                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3188                         }
3189                     }
3190                     break;
3191                 default:
3192                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3193                 }
3194
3195                 encode_mb(s, motion_x, motion_y);
3196
3197                 // RAL: Update last macroblock type
3198                 s->last_mv_dir = s->mv_dir;
3199
3200                 if (CONFIG_H263_ENCODER &&
3201                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3202                     ff_h263_update_motion_val(s);
3203
3204                 ff_MPV_decode_mb(s, s->block);
3205             }
3206
3207             /* clean the MV table in IPS frames for direct mode in B frames */
3208             if(s->mb_intra /* && I,P,S_TYPE */){
3209                 s->p_mv_table[xy][0]=0;
3210                 s->p_mv_table[xy][1]=0;
3211             }
3212
3213             if(s->flags&CODEC_FLAG_PSNR){
3214                 int w= 16;
3215                 int h= 16;
3216
3217                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3218                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3219
3220                 s->current_picture.f.error[0] += sse(
3221                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3222                     s->dest[0], w, h, s->linesize);
3223                 s->current_picture.f.error[1] += sse(
3224                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3225                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3226                 s->current_picture.f.error[2] += sse(
3227                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3228                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3229             }
3230             if(s->loop_filter){
3231                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3232                     ff_h263_loop_filter(s);
3233             }
3234             av_dlog(s->avctx, "MB %d %d bits\n",
3235                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3236         }
3237     }
3238
3239     //not beautiful here but we must write it before flushing so it has to be here
3240     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3241         ff_msmpeg4_encode_ext_header(s);
3242
3243     write_slice_end(s);
3244
3245     /* Send the last GOB if RTP */
3246     if (s->avctx->rtp_callback) {
3247         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3248         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3249         /* Call the RTP callback to send the last GOB */
3250         emms_c();
3251         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3252     }
3253
3254     return 0;
3255 }
3256
3257 #define MERGE(field) dst->field += src->field; src->field=0
3258 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3259     MERGE(me.scene_change_score);
3260     MERGE(me.mc_mb_var_sum_temp);
3261     MERGE(me.mb_var_sum_temp);
3262 }
3263
3264 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3265     int i;
3266
3267     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3268     MERGE(dct_count[1]);
3269     MERGE(mv_bits);
3270     MERGE(i_tex_bits);
3271     MERGE(p_tex_bits);
3272     MERGE(i_count);
3273     MERGE(f_count);
3274     MERGE(b_count);
3275     MERGE(skip_count);
3276     MERGE(misc_bits);
3277     MERGE(er.error_count);
3278     MERGE(padding_bug_score);
3279     MERGE(current_picture.f.error[0]);
3280     MERGE(current_picture.f.error[1]);
3281     MERGE(current_picture.f.error[2]);
3282
3283     if(dst->avctx->noise_reduction){
3284         for(i=0; i<64; i++){
3285             MERGE(dct_error_sum[0][i]);
3286             MERGE(dct_error_sum[1][i]);
3287         }
3288     }
3289
3290     assert(put_bits_count(&src->pb) % 8 ==0);
3291     assert(put_bits_count(&dst->pb) % 8 ==0);
3292     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3293     flush_put_bits(&dst->pb);
3294 }
3295
3296 static int estimate_qp(MpegEncContext *s, int dry_run){
3297     if (s->next_lambda){
3298         s->current_picture_ptr->f.quality =
3299         s->current_picture.f.quality = s->next_lambda;
3300         if(!dry_run) s->next_lambda= 0;
3301     } else if (!s->fixed_qscale) {
3302         s->current_picture_ptr->f.quality =
3303         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3304         if (s->current_picture.f.quality < 0)
3305             return -1;
3306     }
3307
3308     if(s->adaptive_quant){
3309         switch(s->codec_id){
3310         case AV_CODEC_ID_MPEG4:
3311             if (CONFIG_MPEG4_ENCODER)
3312                 ff_clean_mpeg4_qscales(s);
3313             break;
3314         case AV_CODEC_ID_H263:
3315         case AV_CODEC_ID_H263P:
3316         case AV_CODEC_ID_FLV1:
3317             if (CONFIG_H263_ENCODER)
3318                 ff_clean_h263_qscales(s);
3319             break;
3320         default:
3321             ff_init_qscale_tab(s);
3322         }
3323
3324         s->lambda= s->lambda_table[0];
3325         //FIXME broken
3326     }else
3327         s->lambda = s->current_picture.f.quality;
3328     update_qscale(s);
3329     return 0;
3330 }
3331
3332 /* must be called before writing the header */
3333 static void set_frame_distances(MpegEncContext * s){
3334     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3335     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3336
3337     if(s->pict_type==AV_PICTURE_TYPE_B){
3338         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3339         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3340     }else{
3341         s->pp_time= s->time - s->last_non_b_time;
3342         s->last_non_b_time= s->time;
3343         assert(s->picture_number==0 || s->pp_time > 0);
3344     }
3345 }
3346
3347 static int encode_picture(MpegEncContext *s, int picture_number)
3348 {
3349     int i, ret;
3350     int bits;
3351     int context_count = s->slice_context_count;
3352
3353     s->picture_number = picture_number;
3354
3355     /* Reset the average MB variance */
3356     s->me.mb_var_sum_temp    =
3357     s->me.mc_mb_var_sum_temp = 0;
3358
3359     /* we need to initialize some time vars before we can encode b-frames */
3360     // RAL: Condition added for MPEG1VIDEO
3361     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3362         set_frame_distances(s);
3363     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3364         ff_set_mpeg4_time(s);
3365
3366     s->me.scene_change_score=0;
3367
3368 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3369
3370     if(s->pict_type==AV_PICTURE_TYPE_I){
3371         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3372         else                        s->no_rounding=0;
3373     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3374         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3375             s->no_rounding ^= 1;
3376     }
3377
3378     if(s->flags & CODEC_FLAG_PASS2){
3379         if (estimate_qp(s,1) < 0)
3380             return -1;
3381         ff_get_2pass_fcode(s);
3382     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3383         if(s->pict_type==AV_PICTURE_TYPE_B)
3384             s->lambda= s->last_lambda_for[s->pict_type];
3385         else
3386             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3387         update_qscale(s);
3388     }
3389
3390     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3391         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3392         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3393         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3394         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3395     }
3396
3397     s->mb_intra=0; //for the rate distortion & bit compare functions
3398     for(i=1; i<context_count; i++){
3399         ret = ff_update_duplicate_context(s->thread_context[i], s);
3400         if (ret < 0)
3401             return ret;
3402     }
3403
3404     if(ff_init_me(s)<0)
3405         return -1;
3406
3407     /* Estimate motion for every MB */
3408     if(s->pict_type != AV_PICTURE_TYPE_I){
3409         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3410         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3411         if (s->pict_type != AV_PICTURE_TYPE_B) {
3412             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3413                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3414             }
3415         }
3416
3417         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3418     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3419         /* I-Frame */
3420         for(i=0; i<s->mb_stride*s->mb_height; i++)
3421             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3422
3423         if(!s->fixed_qscale){
3424             /* finding spatial complexity for I-frame rate control */
3425             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3426         }
3427     }
3428     for(i=1; i<context_count; i++){
3429         merge_context_after_me(s, s->thread_context[i]);
3430     }
3431     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3432     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3433     emms_c();
3434
3435     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3436         s->pict_type= AV_PICTURE_TYPE_I;
3437         for(i=0; i<s->mb_stride*s->mb_height; i++)
3438             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3439         if(s->msmpeg4_version >= 3)
3440             s->no_rounding=1;
3441         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3442                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3443     }
3444
3445     if(!s->umvplus){
3446         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3447             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3448
3449             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3450                 int a,b;
3451                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3452                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3453                 s->f_code= FFMAX3(s->f_code, a, b);
3454             }
3455
3456             ff_fix_long_p_mvs(s);
3457             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3458             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3459                 int j;
3460                 for(i=0; i<2; i++){
3461                     for(j=0; j<2; j++)
3462                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3463                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3464                 }
3465             }
3466         }
3467
3468         if(s->pict_type==AV_PICTURE_TYPE_B){
3469             int a, b;
3470
3471             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3472             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3473             s->f_code = FFMAX(a, b);
3474
3475             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3476             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3477             s->b_code = FFMAX(a, b);
3478
3479             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3480             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3481             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3482             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3483             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3484                 int dir, j;
3485                 for(dir=0; dir<2; dir++){
3486                     for(i=0; i<2; i++){
3487                         for(j=0; j<2; j++){
3488                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3489                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3490                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3491                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3492                         }
3493                     }
3494                 }
3495             }
3496         }
3497     }
3498
3499     if (estimate_qp(s, 0) < 0)
3500         return -1;
3501
3502     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3503         s->qscale= 3; //reduce clipping problems
3504
3505     if (s->out_format == FMT_MJPEG) {
3506         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3507         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3508
3509         if (s->avctx->intra_matrix) {
3510             chroma_matrix =
3511             luma_matrix = s->avctx->intra_matrix;
3512         }
3513
3514         /* for mjpeg, we do include qscale in the matrix */
3515         for(i=1;i<64;i++){
3516             int j= s->dsp.idct_permutation[i];
3517
3518             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3519             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3520         }
3521         s->y_dc_scale_table=
3522         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3523         s->chroma_intra_matrix[0] =
3524         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3525         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3526                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3527         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3528                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3529         s->qscale= 8;
3530     }
3531     if(s->codec_id == AV_CODEC_ID_AMV){
3532         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3533         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3534         for(i=1;i<64;i++){
3535             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3536
3537             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3538             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3539         }
3540         s->y_dc_scale_table= y;
3541         s->c_dc_scale_table= c;
3542         s->intra_matrix[0] = 13;
3543         s->chroma_intra_matrix[0] = 14;
3544         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3545                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3546         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3547                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3548         s->qscale= 8;
3549     }
3550
3551     //FIXME var duplication
3552     s->current_picture_ptr->f.key_frame =
3553     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3554     s->current_picture_ptr->f.pict_type =
3555     s->current_picture.f.pict_type = s->pict_type;
3556
3557     if (s->current_picture.f.key_frame)
3558         s->picture_in_gop_number=0;
3559
3560     s->mb_x = s->mb_y = 0;
3561     s->last_bits= put_bits_count(&s->pb);
3562     switch(s->out_format) {
3563     case FMT_MJPEG:
3564         if (CONFIG_MJPEG_ENCODER)
3565             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3566                                            s->intra_matrix, s->chroma_intra_matrix);
3567         break;
3568     case FMT_H261:
3569         if (CONFIG_H261_ENCODER)
3570             ff_h261_encode_picture_header(s, picture_number);
3571         break;
3572     case FMT_H263:
3573         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3574             ff_wmv2_encode_picture_header(s, picture_number);
3575         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3576             ff_msmpeg4_encode_picture_header(s, picture_number);
3577         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3578             ff_mpeg4_encode_picture_header(s, picture_number);
3579         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3580             ff_rv10_encode_picture_header(s, picture_number);
3581         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3582             ff_rv20_encode_picture_header(s, picture_number);
3583         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3584             ff_flv_encode_picture_header(s, picture_number);
3585         else if (CONFIG_H263_ENCODER)
3586             ff_h263_encode_picture_header(s, picture_number);
3587         break;
3588     case FMT_MPEG1:
3589         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3590             ff_mpeg1_encode_picture_header(s, picture_number);
3591         break;
3592     default:
3593         av_assert0(0);
3594     }
3595     bits= put_bits_count(&s->pb);
3596     s->header_bits= bits - s->last_bits;
3597
3598     for(i=1; i<context_count; i++){
3599         update_duplicate_context_after_me(s->thread_context[i], s);
3600     }
3601     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3602     for(i=1; i<context_count; i++){
3603         merge_context_after_encode(s, s->thread_context[i]);
3604     }
3605     emms_c();
3606     return 0;
3607 }
3608
3609 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3610     const int intra= s->mb_intra;
3611     int i;
3612
3613     s->dct_count[intra]++;
3614
3615     for(i=0; i<64; i++){
3616         int level= block[i];
3617
3618         if(level){
3619             if(level>0){
3620                 s->dct_error_sum[intra][i] += level;
3621                 level -= s->dct_offset[intra][i];
3622                 if(level<0) level=0;
3623             }else{
3624                 s->dct_error_sum[intra][i] -= level;
3625                 level += s->dct_offset[intra][i];
3626                 if(level>0) level=0;
3627             }
3628             block[i]= level;
3629         }
3630     }
3631 }
3632
3633 static int dct_quantize_trellis_c(MpegEncContext *s,
3634                                   int16_t *block, int n,
3635                                   int qscale, int *overflow){
3636     const int *qmat;
3637     const uint8_t *scantable= s->intra_scantable.scantable;
3638     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3639     int max=0;
3640     unsigned int threshold1, threshold2;
3641     int bias=0;
3642     int run_tab[65];
3643     int level_tab[65];
3644     int score_tab[65];
3645     int survivor[65];
3646     int survivor_count;
3647     int last_run=0;
3648     int last_level=0;
3649     int last_score= 0;
3650     int last_i;
3651     int coeff[2][64];
3652     int coeff_count[64];
3653     int qmul, qadd, start_i, last_non_zero, i, dc;
3654     const int esc_length= s->ac_esc_length;
3655     uint8_t * length;
3656     uint8_t * last_length;
3657     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3658
3659     s->dsp.fdct (block);
3660
3661     if(s->dct_error_sum)
3662         s->denoise_dct(s, block);
3663     qmul= qscale*16;
3664     qadd= ((qscale-1)|1)*8;
3665
3666     if (s->mb_intra) {
3667         int q;
3668         if (!s->h263_aic) {
3669             if (n < 4)
3670                 q = s->y_dc_scale;
3671             else
3672                 q = s->c_dc_scale;
3673             q = q << 3;
3674         } else{
3675             /* For AIC we skip quant/dequant of INTRADC */
3676             q = 1 << 3;
3677             qadd=0;
3678         }
3679
3680         /* note: block[0] is assumed to be positive */
3681         block[0] = (block[0] + (q >> 1)) / q;
3682         start_i = 1;
3683         last_non_zero = 0;
3684         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3685         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3686             bias= 1<<(QMAT_SHIFT-1);
3687         length     = s->intra_ac_vlc_length;
3688         last_length= s->intra_ac_vlc_last_length;
3689     } else {
3690         start_i = 0;
3691         last_non_zero = -1;
3692         qmat = s->q_inter_matrix[qscale];
3693         length     = s->inter_ac_vlc_length;
3694         last_length= s->inter_ac_vlc_last_length;
3695     }
3696     last_i= start_i;
3697
3698     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3699     threshold2= (threshold1<<1);
3700
3701     for(i=63; i>=start_i; i--) {
3702         const int j = scantable[i];
3703         int level = block[j] * qmat[j];
3704
3705         if(((unsigned)(level+threshold1))>threshold2){
3706             last_non_zero = i;
3707             break;
3708         }
3709     }
3710
3711     for(i=start_i; i<=last_non_zero; i++) {
3712         const int j = scantable[i];
3713         int level = block[j] * qmat[j];
3714
3715 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3716 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3717         if(((unsigned)(level+threshold1))>threshold2){
3718             if(level>0){
3719                 level= (bias + level)>>QMAT_SHIFT;
3720                 coeff[0][i]= level;
3721                 coeff[1][i]= level-1;
3722 //                coeff[2][k]= level-2;
3723             }else{
3724                 level= (bias - level)>>QMAT_SHIFT;
3725                 coeff[0][i]= -level;
3726                 coeff[1][i]= -level+1;
3727 //                coeff[2][k]= -level+2;
3728             }
3729             coeff_count[i]= FFMIN(level, 2);
3730             av_assert2(coeff_count[i]);
3731             max |=level;
3732         }else{
3733             coeff[0][i]= (level>>31)|1;
3734             coeff_count[i]= 1;
3735         }
3736     }
3737
3738     *overflow= s->max_qcoeff < max; //overflow might have happened
3739
3740     if(last_non_zero < start_i){
3741         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3742         return last_non_zero;
3743     }
3744
3745     score_tab[start_i]= 0;
3746     survivor[0]= start_i;
3747     survivor_count= 1;
3748
3749     for(i=start_i; i<=last_non_zero; i++){
3750         int level_index, j, zero_distortion;
3751         int dct_coeff= FFABS(block[ scantable[i] ]);
3752         int best_score=256*256*256*120;
3753
3754         if (s->dsp.fdct == ff_fdct_ifast)
3755             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3756         zero_distortion= dct_coeff*dct_coeff;
3757
3758         for(level_index=0; level_index < coeff_count[i]; level_index++){
3759             int distortion;
3760             int level= coeff[level_index][i];
3761             const int alevel= FFABS(level);
3762             int unquant_coeff;
3763
3764             av_assert2(level);
3765
3766             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3767                 unquant_coeff= alevel*qmul + qadd;
3768             }else{ //MPEG1
3769                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3770                 if(s->mb_intra){
3771                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3772                         unquant_coeff =   (unquant_coeff - 1) | 1;
3773                 }else{
3774                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3775                         unquant_coeff =   (unquant_coeff - 1) | 1;
3776                 }
3777                 unquant_coeff<<= 3;
3778             }
3779
3780             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3781             level+=64;
3782             if((level&(~127)) == 0){
3783                 for(j=survivor_count-1; j>=0; j--){
3784                     int run= i - survivor[j];
3785                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3786                     score += score_tab[i-run];
3787
3788                     if(score < best_score){
3789                         best_score= score;
3790                         run_tab[i+1]= run;
3791                         level_tab[i+1]= level-64;
3792                     }
3793                 }
3794
3795                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3796                     for(j=survivor_count-1; j>=0; j--){
3797                         int run= i - survivor[j];
3798                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3799                         score += score_tab[i-run];
3800                         if(score < last_score){
3801                             last_score= score;
3802                             last_run= run;
3803                             last_level= level-64;
3804                             last_i= i+1;
3805                         }
3806                     }
3807                 }
3808             }else{
3809                 distortion += esc_length*lambda;
3810                 for(j=survivor_count-1; j>=0; j--){
3811                     int run= i - survivor[j];
3812                     int score= distortion + score_tab[i-run];
3813
3814                     if(score < best_score){
3815                         best_score= score;
3816                         run_tab[i+1]= run;
3817                         level_tab[i+1]= level-64;
3818                     }
3819                 }
3820
3821                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3822                   for(j=survivor_count-1; j>=0; j--){
3823                         int run= i - survivor[j];
3824                         int score= distortion + score_tab[i-run];
3825                         if(score < last_score){
3826                             last_score= score;
3827                             last_run= run;
3828                             last_level= level-64;
3829                             last_i= i+1;
3830                         }
3831                     }
3832                 }
3833             }
3834         }
3835
3836         score_tab[i+1]= best_score;
3837
3838         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3839         if(last_non_zero <= 27){
3840             for(; survivor_count; survivor_count--){
3841                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3842                     break;
3843             }
3844         }else{
3845             for(; survivor_count; survivor_count--){
3846                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3847                     break;
3848             }
3849         }
3850
3851         survivor[ survivor_count++ ]= i+1;
3852     }
3853
3854     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3855         last_score= 256*256*256*120;
3856         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3857             int score= score_tab[i];
3858             if(i) score += lambda*2; //FIXME exacter?
3859
3860             if(score < last_score){
3861                 last_score= score;
3862                 last_i= i;
3863                 last_level= level_tab[i];
3864                 last_run= run_tab[i];
3865             }
3866         }
3867     }
3868
3869     s->coded_score[n] = last_score;
3870
3871     dc= FFABS(block[0]);
3872     last_non_zero= last_i - 1;
3873     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3874
3875     if(last_non_zero < start_i)
3876         return last_non_zero;
3877
3878     if(last_non_zero == 0 && start_i == 0){
3879         int best_level= 0;
3880         int best_score= dc * dc;
3881
3882         for(i=0; i<coeff_count[0]; i++){
3883             int level= coeff[i][0];
3884             int alevel= FFABS(level);
3885             int unquant_coeff, score, distortion;
3886
3887             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3888                     unquant_coeff= (alevel*qmul + qadd)>>3;
3889             }else{ //MPEG1
3890                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3891                     unquant_coeff =   (unquant_coeff - 1) | 1;
3892             }
3893             unquant_coeff = (unquant_coeff + 4) >> 3;
3894             unquant_coeff<<= 3 + 3;
3895
3896             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3897             level+=64;
3898             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3899             else                    score= distortion + esc_length*lambda;
3900
3901             if(score < best_score){
3902                 best_score= score;
3903                 best_level= level - 64;
3904             }
3905         }
3906         block[0]= best_level;
3907         s->coded_score[n] = best_score - dc*dc;
3908         if(best_level == 0) return -1;
3909         else                return last_non_zero;
3910     }
3911
3912     i= last_i;
3913     av_assert2(last_level);
3914
3915     block[ perm_scantable[last_non_zero] ]= last_level;
3916     i -= last_run + 1;
3917
3918     for(; i>start_i; i -= run_tab[i] + 1){
3919         block[ perm_scantable[i-1] ]= level_tab[i];
3920     }
3921
3922     return last_non_zero;
3923 }
3924
3925 //#define REFINE_STATS 1
3926 static int16_t basis[64][64];
3927
3928 static void build_basis(uint8_t *perm){
3929     int i, j, x, y;
3930     emms_c();
3931     for(i=0; i<8; i++){
3932         for(j=0; j<8; j++){
3933             for(y=0; y<8; y++){
3934                 for(x=0; x<8; x++){
3935                     double s= 0.25*(1<<BASIS_SHIFT);
3936                     int index= 8*i + j;
3937                     int perm_index= perm[index];
3938                     if(i==0) s*= sqrt(0.5);
3939                     if(j==0) s*= sqrt(0.5);
3940                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3941                 }
3942             }
3943         }
3944     }
3945 }
3946
3947 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3948                         int16_t *block, int16_t *weight, int16_t *orig,
3949                         int n, int qscale){
3950     int16_t rem[64];
3951     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3952     const uint8_t *scantable= s->intra_scantable.scantable;
3953     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3954 //    unsigned int threshold1, threshold2;
3955 //    int bias=0;
3956     int run_tab[65];
3957     int prev_run=0;
3958     int prev_level=0;
3959     int qmul, qadd, start_i, last_non_zero, i, dc;
3960     uint8_t * length;
3961     uint8_t * last_length;
3962     int lambda;
3963     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3964 #ifdef REFINE_STATS
3965 static int count=0;
3966 static int after_last=0;
3967 static int to_zero=0;
3968 static int from_zero=0;
3969 static int raise=0;
3970 static int lower=0;
3971 static int messed_sign=0;
3972 #endif
3973
3974     if(basis[0][0] == 0)
3975         build_basis(s->dsp.idct_permutation);
3976
3977     qmul= qscale*2;
3978     qadd= (qscale-1)|1;
3979     if (s->mb_intra) {
3980         if (!s->h263_aic) {
3981             if (n < 4)
3982                 q = s->y_dc_scale;
3983             else
3984                 q = s->c_dc_scale;
3985         } else{
3986             /* For AIC we skip quant/dequant of INTRADC */
3987             q = 1;
3988             qadd=0;
3989         }
3990         q <<= RECON_SHIFT-3;
3991         /* note: block[0] is assumed to be positive */
3992         dc= block[0]*q;
3993 //        block[0] = (block[0] + (q >> 1)) / q;
3994         start_i = 1;
3995 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3996 //            bias= 1<<(QMAT_SHIFT-1);
3997         length     = s->intra_ac_vlc_length;
3998         last_length= s->intra_ac_vlc_last_length;
3999     } else {
4000         dc= 0;
4001         start_i = 0;
4002         length     = s->inter_ac_vlc_length;
4003         last_length= s->inter_ac_vlc_last_length;
4004     }
4005     last_non_zero = s->block_last_index[n];
4006
4007 #ifdef REFINE_STATS
4008 {START_TIMER
4009 #endif
4010     dc += (1<<(RECON_SHIFT-1));
4011     for(i=0; i<64; i++){
4012         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4013     }
4014 #ifdef REFINE_STATS
4015 STOP_TIMER("memset rem[]")}
4016 #endif
4017     sum=0;
4018     for(i=0; i<64; i++){
4019         int one= 36;
4020         int qns=4;
4021         int w;
4022
4023         w= FFABS(weight[i]) + qns*one;
4024         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4025
4026         weight[i] = w;
4027 //        w=weight[i] = (63*qns + (w/2)) / w;
4028
4029         av_assert2(w>0);
4030         av_assert2(w<(1<<6));
4031         sum += w*w;
4032     }
4033     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4034 #ifdef REFINE_STATS
4035 {START_TIMER
4036 #endif
4037     run=0;
4038     rle_index=0;
4039     for(i=start_i; i<=last_non_zero; i++){
4040         int j= perm_scantable[i];
4041         const int level= block[j];
4042         int coeff;
4043
4044         if(level){
4045             if(level<0) coeff= qmul*level - qadd;
4046             else        coeff= qmul*level + qadd;
4047             run_tab[rle_index++]=run;
4048             run=0;
4049
4050             s->dsp.add_8x8basis(rem, basis[j], coeff);
4051         }else{
4052             run++;
4053         }
4054     }
4055 #ifdef REFINE_STATS
4056 if(last_non_zero>0){
4057 STOP_TIMER("init rem[]")
4058 }
4059 }
4060
4061 {START_TIMER
4062 #endif
4063     for(;;){
4064         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4065         int best_coeff=0;
4066         int best_change=0;
4067         int run2, best_unquant_change=0, analyze_gradient;
4068 #ifdef REFINE_STATS
4069 {START_TIMER
4070 #endif
4071         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4072
4073         if(analyze_gradient){
4074 #ifdef REFINE_STATS
4075 {START_TIMER
4076 #endif
4077             for(i=0; i<64; i++){
4078                 int w= weight[i];
4079
4080                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4081             }
4082 #ifdef REFINE_STATS
4083 STOP_TIMER("rem*w*w")}
4084 {START_TIMER
4085 #endif
4086             s->dsp.fdct(d1);
4087 #ifdef REFINE_STATS
4088 STOP_TIMER("dct")}
4089 #endif
4090         }
4091
4092         if(start_i){
4093             const int level= block[0];
4094             int change, old_coeff;
4095
4096             av_assert2(s->mb_intra);
4097
4098             old_coeff= q*level;
4099
4100             for(change=-1; change<=1; change+=2){
4101                 int new_level= level + change;
4102                 int score, new_coeff;
4103
4104                 new_coeff= q*new_level;
4105                 if(new_coeff >= 2048 || new_coeff < 0)
4106                     continue;
4107
4108                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4109                 if(score<best_score){
4110                     best_score= score;
4111                     best_coeff= 0;
4112                     best_change= change;
4113                     best_unquant_change= new_coeff - old_coeff;
4114                 }
4115             }
4116         }
4117
4118         run=0;
4119         rle_index=0;
4120         run2= run_tab[rle_index++];
4121         prev_level=0;
4122         prev_run=0;
4123
4124         for(i=start_i; i<64; i++){
4125             int j= perm_scantable[i];
4126             const int level= block[j];
4127             int change, old_coeff;
4128
4129             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4130                 break;
4131
4132             if(level){
4133                 if(level<0) old_coeff= qmul*level - qadd;
4134                 else        old_coeff= qmul*level + qadd;
4135                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4136             }else{
4137                 old_coeff=0;
4138                 run2--;
4139                 av_assert2(run2>=0 || i >= last_non_zero );
4140             }
4141
4142             for(change=-1; change<=1; change+=2){
4143                 int new_level= level + change;
4144                 int score, new_coeff, unquant_change;
4145
4146                 score=0;
4147                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4148                    continue;
4149
4150                 if(new_level){
4151                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4152                     else            new_coeff= qmul*new_level + qadd;
4153                     if(new_coeff >= 2048 || new_coeff <= -2048)
4154                         continue;
4155                     //FIXME check for overflow
4156
4157                     if(level){
4158                         if(level < 63 && level > -63){
4159                             if(i < last_non_zero)
4160                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4161                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4162                             else
4163                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4164                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4165                         }
4166                     }else{
4167                         av_assert2(FFABS(new_level)==1);
4168
4169                         if(analyze_gradient){
4170                             int g= d1[ scantable[i] ];
4171                             if(g && (g^new_level) >= 0)
4172                                 continue;
4173                         }
4174
4175                         if(i < last_non_zero){
4176                             int next_i= i + run2 + 1;
4177                             int next_level= block[ perm_scantable[next_i] ] + 64;
4178
4179                             if(next_level&(~127))
4180                                 next_level= 0;
4181
4182                             if(next_i < last_non_zero)
4183                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4184                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4185                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4186                             else
4187                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4188                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4189                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4190                         }else{
4191                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4192                             if(prev_level){
4193                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4194                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4195                             }
4196                         }
4197                     }
4198                 }else{
4199                     new_coeff=0;
4200                     av_assert2(FFABS(level)==1);
4201
4202                     if(i < last_non_zero){
4203                         int next_i= i + run2 + 1;
4204                         int next_level= block[ perm_scantable[next_i] ] + 64;
4205
4206                         if(next_level&(~127))
4207                             next_level= 0;
4208
4209                         if(next_i < last_non_zero)
4210                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4211                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4212                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4213                         else
4214                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4215                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4216                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4217                     }else{
4218                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4219                         if(prev_level){
4220                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4221                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4222                         }
4223                     }
4224                 }
4225
4226                 score *= lambda;
4227
4228                 unquant_change= new_coeff - old_coeff;
4229                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4230
4231                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4232                 if(score<best_score){
4233                     best_score= score;
4234                     best_coeff= i;
4235                     best_change= change;
4236                     best_unquant_change= unquant_change;
4237                 }
4238             }
4239             if(level){
4240                 prev_level= level + 64;
4241                 if(prev_level&(~127))
4242                     prev_level= 0;
4243                 prev_run= run;
4244                 run=0;
4245             }else{
4246                 run++;
4247             }
4248         }
4249 #ifdef REFINE_STATS
4250 STOP_TIMER("iterative step")}
4251 #endif
4252
4253         if(best_change){
4254             int j= perm_scantable[ best_coeff ];
4255
4256             block[j] += best_change;
4257
4258             if(best_coeff > last_non_zero){
4259                 last_non_zero= best_coeff;
4260                 av_assert2(block[j]);
4261 #ifdef REFINE_STATS
4262 after_last++;
4263 #endif
4264             }else{
4265 #ifdef REFINE_STATS
4266 if(block[j]){
4267     if(block[j] - best_change){
4268         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4269             raise++;
4270         }else{
4271             lower++;
4272         }
4273     }else{
4274         from_zero++;
4275     }
4276 }else{
4277     to_zero++;
4278 }
4279 #endif
4280                 for(; last_non_zero>=start_i; last_non_zero--){
4281                     if(block[perm_scantable[last_non_zero]])
4282                         break;
4283                 }
4284             }
4285 #ifdef REFINE_STATS
4286 count++;
4287 if(256*256*256*64 % count == 0){
4288     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4289 }
4290 #endif
4291             run=0;
4292             rle_index=0;
4293             for(i=start_i; i<=last_non_zero; i++){
4294                 int j= perm_scantable[i];
4295                 const int level= block[j];
4296
4297                  if(level){
4298                      run_tab[rle_index++]=run;
4299                      run=0;
4300                  }else{
4301                      run++;
4302                  }
4303             }
4304
4305             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4306         }else{
4307             break;
4308         }
4309     }
4310 #ifdef REFINE_STATS
4311 if(last_non_zero>0){
4312 STOP_TIMER("iterative search")
4313 }
4314 }
4315 #endif
4316
4317     return last_non_zero;
4318 }
4319
4320 int ff_dct_quantize_c(MpegEncContext *s,
4321                         int16_t *block, int n,
4322                         int qscale, int *overflow)
4323 {
4324     int i, j, level, last_non_zero, q, start_i;
4325     const int *qmat;
4326     const uint8_t *scantable= s->intra_scantable.scantable;
4327     int bias;
4328     int max=0;
4329     unsigned int threshold1, threshold2;
4330
4331     s->dsp.fdct (block);
4332
4333     if(s->dct_error_sum)
4334         s->denoise_dct(s, block);
4335
4336     if (s->mb_intra) {
4337         if (!s->h263_aic) {
4338             if (n < 4)
4339                 q = s->y_dc_scale;
4340             else
4341                 q = s->c_dc_scale;
4342             q = q << 3;
4343         } else
4344             /* For AIC we skip quant/dequant of INTRADC */
4345             q = 1 << 3;
4346
4347         /* note: block[0] is assumed to be positive */
4348         block[0] = (block[0] + (q >> 1)) / q;
4349         start_i = 1;
4350         last_non_zero = 0;
4351         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4352         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4353     } else {
4354         start_i = 0;
4355         last_non_zero = -1;
4356         qmat = s->q_inter_matrix[qscale];
4357         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4358     }
4359     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4360     threshold2= (threshold1<<1);
4361     for(i=63;i>=start_i;i--) {
4362         j = scantable[i];
4363         level = block[j] * qmat[j];
4364
4365         if(((unsigned)(level+threshold1))>threshold2){
4366             last_non_zero = i;
4367             break;
4368         }else{
4369             block[j]=0;
4370         }
4371     }
4372     for(i=start_i; i<=last_non_zero; i++) {
4373         j = scantable[i];
4374         level = block[j] * qmat[j];
4375
4376 //        if(   bias+level >= (1<<QMAT_SHIFT)
4377 //           || bias-level >= (1<<QMAT_SHIFT)){
4378         if(((unsigned)(level+threshold1))>threshold2){
4379             if(level>0){
4380                 level= (bias + level)>>QMAT_SHIFT;
4381                 block[j]= level;
4382             }else{
4383                 level= (bias - level)>>QMAT_SHIFT;
4384                 block[j]= -level;
4385             }
4386             max |=level;
4387         }else{
4388             block[j]=0;
4389         }
4390     }
4391     *overflow= s->max_qcoeff < max; //overflow might have happened
4392
4393     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4394     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4395         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4396
4397     return last_non_zero;
4398 }
4399
4400 #define OFFSET(x) offsetof(MpegEncContext, x)
4401 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4402 static const AVOption h263_options[] = {
4403     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4404     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4405     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4406     FF_MPV_COMMON_OPTS
4407     { NULL },
4408 };
4409
4410 static const AVClass h263_class = {
4411     .class_name = "H.263 encoder",
4412     .item_name  = av_default_item_name,
4413     .option     = h263_options,
4414     .version    = LIBAVUTIL_VERSION_INT,
4415 };
4416
4417 AVCodec ff_h263_encoder = {
4418     .name           = "h263",
4419     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4420     .type           = AVMEDIA_TYPE_VIDEO,
4421     .id             = AV_CODEC_ID_H263,
4422     .priv_data_size = sizeof(MpegEncContext),
4423     .init           = ff_MPV_encode_init,
4424     .encode2        = ff_MPV_encode_picture,
4425     .close          = ff_MPV_encode_end,
4426     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4427     .priv_class     = &h263_class,
4428 };
4429
4430 static const AVOption h263p_options[] = {
4431     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4432     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4433     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4434     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4435     FF_MPV_COMMON_OPTS
4436     { NULL },
4437 };
4438 static const AVClass h263p_class = {
4439     .class_name = "H.263p encoder",
4440     .item_name  = av_default_item_name,
4441     .option     = h263p_options,
4442     .version    = LIBAVUTIL_VERSION_INT,
4443 };
4444
4445 AVCodec ff_h263p_encoder = {
4446     .name           = "h263p",
4447     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4448     .type           = AVMEDIA_TYPE_VIDEO,
4449     .id             = AV_CODEC_ID_H263P,
4450     .priv_data_size = sizeof(MpegEncContext),
4451     .init           = ff_MPV_encode_init,
4452     .encode2        = ff_MPV_encode_picture,
4453     .close          = ff_MPV_encode_end,
4454     .capabilities   = CODEC_CAP_SLICE_THREADS,
4455     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4456     .priv_class     = &h263p_class,
4457 };
4458
4459 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4460
4461 AVCodec ff_msmpeg4v2_encoder = {
4462     .name           = "msmpeg4v2",
4463     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4464     .type           = AVMEDIA_TYPE_VIDEO,
4465     .id             = AV_CODEC_ID_MSMPEG4V2,
4466     .priv_data_size = sizeof(MpegEncContext),
4467     .init           = ff_MPV_encode_init,
4468     .encode2        = ff_MPV_encode_picture,
4469     .close          = ff_MPV_encode_end,
4470     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4471     .priv_class     = &msmpeg4v2_class,
4472 };
4473
4474 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4475
4476 AVCodec ff_msmpeg4v3_encoder = {
4477     .name           = "msmpeg4",
4478     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4479     .type           = AVMEDIA_TYPE_VIDEO,
4480     .id             = AV_CODEC_ID_MSMPEG4V3,
4481     .priv_data_size = sizeof(MpegEncContext),
4482     .init           = ff_MPV_encode_init,
4483     .encode2        = ff_MPV_encode_picture,
4484     .close          = ff_MPV_encode_end,
4485     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4486     .priv_class     = &msmpeg4v3_class,
4487 };
4488
4489 FF_MPV_GENERIC_CLASS(wmv1)
4490
4491 AVCodec ff_wmv1_encoder = {
4492     .name           = "wmv1",
4493     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4494     .type           = AVMEDIA_TYPE_VIDEO,
4495     .id             = AV_CODEC_ID_WMV1,
4496     .priv_data_size = sizeof(MpegEncContext),
4497     .init           = ff_MPV_encode_init,
4498     .encode2        = ff_MPV_encode_picture,
4499     .close          = ff_MPV_encode_end,
4500     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4501     .priv_class     = &wmv1_class,
4502 };