git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "avcodec.h"
  38 #include "dct.h"
  39 #include "dsputil.h"
  40 #include "mpeg12.h"
  41 #include "mpegvideo.h"
  42 #include "h261.h"
  43 #include "h263.h"
  44 #include "mathops.h"
  45 #include "mjpegenc.h"
  46 #include "msmpeg4.h"
  47 #include "faandct.h"
  48 #include "thread.h"
  49 #include "aandcttab.h"
  50 #include "flv.h"
  51 #include "mpeg4video.h"
  52 #include "internal.h"
  53 #include "bytestream.h"
  54 #include <limits.h>
  55 #include "sp5x.h"
  56
  57 static int encode_picture(MpegEncContext *s, int picture_number);
  58 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  59 static int sse_mb(MpegEncContext *s);
  60 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  61 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  62
  63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  65
  66 const AVOption ff_mpv_generic_options[] = {
  67     FF_MPV_COMMON_OPTS
  68     { NULL },
  69 };
  70
  71 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  72                        uint16_t (*qmat16)[2][64],
  73                        const uint16_t *quant_matrix,
  74                        int bias, int qmin, int qmax, int intra)
  75 {
  76     int qscale;
  77     int shift = 0;
  78
  79     for (qscale = qmin; qscale <= qmax; qscale++) {
  80         int i;
  81         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  82             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  83             dsp->fdct == ff_faandct) {
  84             for (i = 0; i < 64; i++) {
  85                 const int j = dsp->idct_permutation[i];
  86                 /* 16 <= qscale * quant_matrix[i] <= 7905
  87                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  88                  *             19952 <=              x  <= 249205026
  89                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  90                  *           3444240 >= (1 << 36) / (x) >= 275 */
  91
  92                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  93                                         (qscale * quant_matrix[j]));
  94             }
  95         } else if (dsp->fdct == ff_fdct_ifast) {
  96             for (i = 0; i < 64; i++) {
  97                 const int j = dsp->idct_permutation[i];
  98                 /* 16 <= qscale * quant_matrix[i] <= 7905
  99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 100                  *             19952 <=              x  <= 249205026
 101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 102                  *           3444240 >= (1 << 36) / (x) >= 275 */
 103
 104                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 105                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
 106             }
 107         } else {
 108             for (i = 0; i < 64; i++) {
 109                 const int j = dsp->idct_permutation[i];
 110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 111                  * Assume x = qscale * quant_matrix[i]
 112                  * So             16 <=              x  <= 7905
 113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 114                  * so          32768 >= (1 << 19) / (x) >= 67 */
 115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 116                                         (qscale * quant_matrix[j]));
 117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 118                 //                    (qscale * quant_matrix[i]);
 119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 120                                        (qscale * quant_matrix[j]);
 121
 122                 if (qmat16[qscale][0][i] == 0 ||
 123                     qmat16[qscale][0][i] == 128 * 256)
 124                     qmat16[qscale][0][i] = 128 * 256 - 1;
 125                 qmat16[qscale][1][i] =
 126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 127                                 qmat16[qscale][0][i]);
 128             }
 129         }
 130
 131         for (i = intra; i < 64; i++) {
 132             int64_t max = 8191;
 133             if (dsp->fdct == ff_fdct_ifast) {
 134                 max = (8191LL * ff_aanscales[i]) >> 14;
 135             }
 136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 137                 shift++;
 138             }
 139         }
 140     }
 141     if (shift) {
 142         av_log(NULL, AV_LOG_INFO,
 143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 144                QMAT_SHIFT - shift);
 145     }
 146 }
 147
 148 static inline void update_qscale(MpegEncContext *s)
 149 {
 150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 151                 (FF_LAMBDA_SHIFT + 7);
 152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 153
 154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 155                  FF_LAMBDA_SHIFT;
 156 }
 157
 158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 159 {
 160     int i;
 161
 162     if (matrix) {
 163         put_bits(pb, 1, 1);
 164         for (i = 0; i < 64; i++) {
 165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 166         }
 167     } else
 168         put_bits(pb, 1, 0);
 169 }
 170
 171 /**
 172  * init s->current_picture.qscale_table from s->lambda_table
 173  */
 174 void ff_init_qscale_tab(MpegEncContext *s)
 175 {
 176     int8_t * const qscale_table = s->current_picture.qscale_table;
 177     int i;
 178
 179     for (i = 0; i < s->mb_num; i++) {
 180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 183                                                   s->avctx->qmax);
 184     }
 185 }
 186
 187 static void update_duplicate_context_after_me(MpegEncContext *dst,
 188                                               MpegEncContext *src)
 189 {
 190 #define COPY(a) dst->a= src->a
 191     COPY(pict_type);
 192     COPY(current_picture);
 193     COPY(f_code);
 194     COPY(b_code);
 195     COPY(qscale);
 196     COPY(lambda);
 197     COPY(lambda2);
 198     COPY(picture_in_gop_number);
 199     COPY(gop_picture_number);
 200     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 201     COPY(progressive_frame);    // FIXME don't set in encode_header
 202     COPY(partitioned_frame);    // FIXME don't set in encode_header
 203 #undef COPY
 204 }
 205
 206 /**
 207  * Set the given MpegEncContext to defaults for encoding.
 208  * the changed fields will not depend upon the prior state of the MpegEncContext.
 209  */
 210 static void MPV_encode_defaults(MpegEncContext *s)
 211 {
 212     int i;
 213     ff_MPV_common_defaults(s);
 214
 215     for (i = -16; i < 16; i++) {
 216         default_fcode_tab[i + MAX_MV] = 1;
 217     }
 218     s->me.mv_penalty = default_mv_penalty;
 219     s->fcode_tab     = default_fcode_tab;
 220
 221     s->input_picture_number  = 0;
 222     s->picture_in_gop_number = 0;
 223 }
 224
 225 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 226     if (ARCH_X86)
 227         ff_dct_encode_init_x86(s);
 228
 229     ff_h263dsp_init(&s->h263dsp);
 230     if (!s->dct_quantize)
 231         s->dct_quantize = ff_dct_quantize_c;
 232     if (!s->denoise_dct)
 233         s->denoise_dct  = denoise_dct_c;
 234     s->fast_dct_quantize = s->dct_quantize;
 235     if (s->avctx->trellis)
 236         s->dct_quantize  = dct_quantize_trellis_c;
 237
 238     return 0;
 239 }
 240
 241 /* init video encoder */
 242 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 243 {
 244     MpegEncContext *s = avctx->priv_data;
 245     int i, ret;
 246
 247     MPV_encode_defaults(s);
 248
 249     switch (avctx->codec_id) {
 250     case AV_CODEC_ID_MPEG2VIDEO:
 251         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 252             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 253             av_log(avctx, AV_LOG_ERROR,
 254                    "only YUV420 and YUV422 are supported\n");
 255             return -1;
 256         }
 257         break;
 258     case AV_CODEC_ID_MJPEG:
 259     case AV_CODEC_ID_AMV:
 260         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
 261             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
 262             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
 263             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 264               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
 265               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
 266              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 267             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 268             return -1;
 269         }
 270         break;
 271     default:
 272         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 273             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 274             return -1;
 275         }
 276     }
 277
 278     switch (avctx->pix_fmt) {
 279     case AV_PIX_FMT_YUVJ444P:
 280     case AV_PIX_FMT_YUV444P:
 281         s->chroma_format = CHROMA_444;
 282         break;
 283     case AV_PIX_FMT_YUVJ422P:
 284     case AV_PIX_FMT_YUV422P:
 285         s->chroma_format = CHROMA_422;
 286         break;
 287     case AV_PIX_FMT_YUVJ420P:
 288     case AV_PIX_FMT_YUV420P:
 289     default:
 290         s->chroma_format = CHROMA_420;
 291         break;
 292     }
 293
 294     s->bit_rate = avctx->bit_rate;
 295     s->width    = avctx->width;
 296     s->height   = avctx->height;
 297     if (avctx->gop_size > 600 &&
 298         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 299         av_log(avctx, AV_LOG_WARNING,
 300                "keyframe interval too large!, reducing it from %d to %d\n",
 301                avctx->gop_size, 600);
 302         avctx->gop_size = 600;
 303     }
 304     s->gop_size     = avctx->gop_size;
 305     s->avctx        = avctx;
 306     s->flags        = avctx->flags;
 307     s->flags2       = avctx->flags2;
 308     if (avctx->max_b_frames > MAX_B_FRAMES) {
 309         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 310                "is %d.\n", MAX_B_FRAMES);
 311         avctx->max_b_frames = MAX_B_FRAMES;
 312     }
 313     s->max_b_frames = avctx->max_b_frames;
 314     s->codec_id     = avctx->codec->id;
 315     s->strict_std_compliance = avctx->strict_std_compliance;
 316     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 317     s->mpeg_quant         = avctx->mpeg_quant;
 318     s->rtp_mode           = !!avctx->rtp_payload_size;
 319     s->intra_dc_precision = avctx->intra_dc_precision;
 320     s->user_specified_pts = AV_NOPTS_VALUE;
 321
 322     if (s->gop_size <= 1) {
 323         s->intra_only = 1;
 324         s->gop_size   = 12;
 325     } else {
 326         s->intra_only = 0;
 327     }
 328
 329     s->me_method = avctx->me_method;
 330
 331     /* Fixed QSCALE */
 332     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 333
 334     s->adaptive_quant = (s->avctx->lumi_masking ||
 335                          s->avctx->dark_masking ||
 336                          s->avctx->temporal_cplx_masking ||
 337                          s->avctx->spatial_cplx_masking  ||
 338                          s->avctx->p_masking      ||
 339                          s->avctx->border_masking ||
 340                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 341                         !s->fixed_qscale;
 342
 343     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 344
 345     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 346         switch(avctx->codec_id) {
 347         case AV_CODEC_ID_MPEG1VIDEO:
 348         case AV_CODEC_ID_MPEG2VIDEO:
 349             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 350             break;
 351         case AV_CODEC_ID_MPEG4:
 352         case AV_CODEC_ID_MSMPEG4V1:
 353         case AV_CODEC_ID_MSMPEG4V2:
 354         case AV_CODEC_ID_MSMPEG4V3:
 355             if       (avctx->rc_max_rate >= 15000000) {
 356                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 357             } else if(avctx->rc_max_rate >=  2000000) {
 358                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 359             } else if(avctx->rc_max_rate >=   384000) {
 360                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 361             } else
 362                 avctx->rc_buffer_size = 40;
 363             avctx->rc_buffer_size *= 16384;
 364             break;
 365         }
 366         if (avctx->rc_buffer_size) {
 367             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 368         }
 369     }
 370
 371     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 372         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 373         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
 374             return -1;
 375     }
 376
 377     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 378         av_log(avctx, AV_LOG_INFO,
 379                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 380     }
 381
 382     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 383         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 384         return -1;
 385     }
 386
 387     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 388         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 389         return -1;
 390     }
 391
 392     if (avctx->rc_max_rate &&
 393         avctx->rc_max_rate == avctx->bit_rate &&
 394         avctx->rc_max_rate != avctx->rc_min_rate) {
 395         av_log(avctx, AV_LOG_INFO,
 396                "impossible bitrate constraints, this will fail\n");
 397     }
 398
 399     if (avctx->rc_buffer_size &&
 400         avctx->bit_rate * (int64_t)avctx->time_base.num >
 401             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 402         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 403         return -1;
 404     }
 405
 406     if (!s->fixed_qscale &&
 407         avctx->bit_rate * av_q2d(avctx->time_base) >
 408             avctx->bit_rate_tolerance) {
 409         av_log(avctx, AV_LOG_ERROR,
 410                "bitrate tolerance %d too small for bitrate %d\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 411         return -1;
 412     }
 413
 414     if (s->avctx->rc_max_rate &&
 415         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 416         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 417          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 418         90000LL * (avctx->rc_buffer_size - 1) >
 419             s->avctx->rc_max_rate * 0xFFFFLL) {
 420         av_log(avctx, AV_LOG_INFO,
 421                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 422                "specified vbv buffer is too large for the given bitrate!\n");
 423     }
 424
 425     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 426         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 427         s->codec_id != AV_CODEC_ID_FLV1) {
 428         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 429         return -1;
 430     }
 431
 432     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 433         av_log(avctx, AV_LOG_ERROR,
 434                "OBMC is only supported with simple mb decision\n");
 435         return -1;
 436     }
 437
 438     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 439         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 440         return -1;
 441     }
 442
 443     if (s->max_b_frames                    &&
 444         s->codec_id != AV_CODEC_ID_MPEG4      &&
 445         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 446         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 447         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 448         return -1;
 449     }
 450     if (s->max_b_frames < 0) {
 451         av_log(avctx, AV_LOG_ERROR,
 452                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 453         return -1;
 454     }
 455
 456     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 457          s->codec_id == AV_CODEC_ID_H263  ||
 458          s->codec_id == AV_CODEC_ID_H263P) &&
 459         (avctx->sample_aspect_ratio.num > 255 ||
 460          avctx->sample_aspect_ratio.den > 255)) {
 461         av_log(avctx, AV_LOG_WARNING,
 462                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 463                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 464         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 465                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 466     }
 467
 468     if ((s->codec_id == AV_CODEC_ID_H263  ||
 469          s->codec_id == AV_CODEC_ID_H263P) &&
 470         (avctx->width  > 2048 ||
 471          avctx->height > 1152 )) {
 472         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 473         return -1;
 474     }
 475     if ((s->codec_id == AV_CODEC_ID_H263  ||
 476          s->codec_id == AV_CODEC_ID_H263P) &&
 477         ((avctx->width &3) ||
 478          (avctx->height&3) )) {
 479         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 480         return -1;
 481     }
 482
 483     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 484         (avctx->width  > 4095 ||
 485          avctx->height > 4095 )) {
 486         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 487         return -1;
 488     }
 489
 490     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 491         (avctx->width  > 16383 ||
 492          avctx->height > 16383 )) {
 493         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 494         return -1;
 495     }
 496
 497     if (s->codec_id == AV_CODEC_ID_RV10 &&
 498         (avctx->width &15 ||
 499          avctx->height&15 )) {
 500         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 501         return AVERROR(EINVAL);
 502     }
 503
 504     if (s->codec_id == AV_CODEC_ID_RV20 &&
 505         (avctx->width &3 ||
 506          avctx->height&3 )) {
 507         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 508         return AVERROR(EINVAL);
 509     }
 510
 511     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 512          s->codec_id == AV_CODEC_ID_WMV2) &&
 513          avctx->width & 1) {
 514          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 515          return -1;
 516     }
 517
 518     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 519         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 520         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 521         return -1;
 522     }
 523
 524     // FIXME mpeg2 uses that too
 525     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 526                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 527         av_log(avctx, AV_LOG_ERROR,
 528                "mpeg2 style quantization not supported by codec\n");
 529         return -1;
 530     }
 531
 532     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 533         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 534         return -1;
 535     }
 536
 537     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 538         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 539         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 540         return -1;
 541     }
 542
 543     if (s->avctx->scenechange_threshold < 1000000000 &&
 544         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 545         av_log(avctx, AV_LOG_ERROR,
 546                "closed gop with scene change detection are not supported yet, "
 547                "set threshold to 1000000000\n");
 548         return -1;
 549     }
 550
 551     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 552         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 553             av_log(avctx, AV_LOG_ERROR,
 554                   "low delay forcing is only available for mpeg2\n");
 555             return -1;
 556         }
 557         if (s->max_b_frames != 0) {
 558             av_log(avctx, AV_LOG_ERROR,
 559                    "b frames cannot be used with low delay\n");
 560             return -1;
 561         }
 562     }
 563
 564     if (s->q_scale_type == 1) {
 565         if (avctx->qmax > 12) {
 566             av_log(avctx, AV_LOG_ERROR,
 567                    "non linear quant only supports qmax <= 12 currently\n");
 568             return -1;
 569         }
 570     }
 571
 572     if (s->avctx->thread_count > 1         &&
 573         s->codec_id != AV_CODEC_ID_MPEG4      &&
 574         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 575         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 576         s->codec_id != AV_CODEC_ID_MJPEG      &&
 577         (s->codec_id != AV_CODEC_ID_H263P)) {
 578         av_log(avctx, AV_LOG_ERROR,
 579                "multi threaded encoding not supported by codec\n");
 580         return -1;
 581     }
 582
 583     if (s->avctx->thread_count < 1) {
 584         av_log(avctx, AV_LOG_ERROR,
 585                "automatic thread number detection not supported by codec, "
 586                "patch welcome\n");
 587         return -1;
 588     }
 589
 590     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 591         s->rtp_mode = 1;
 592
 593     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 594         s->h263_slice_structured = 1;
 595
 596     if (!avctx->time_base.den || !avctx->time_base.num) {
 597         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 598         return -1;
 599     }
 600
 601     i = (INT_MAX / 2 + 128) >> 8;
 602     if (avctx->mb_threshold >= i) {
 603         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 604                i - 1);
 605         return -1;
 606     }
 607
 608     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 609         av_log(avctx, AV_LOG_INFO,
 610                "notice: b_frame_strategy only affects the first pass\n");
 611         avctx->b_frame_strategy = 0;
 612     }
 613
 614     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 615     if (i > 1) {
 616         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 617         avctx->time_base.den /= i;
 618         avctx->time_base.num /= i;
 619         //return -1;
 620     }
 621
 622     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 623         // (a + x * 3 / 8) / x
 624         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 625         s->inter_quant_bias = 0;
 626     } else {
 627         s->intra_quant_bias = 0;
 628         // (a - x / 4) / x
 629         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 630     }
 631
 632     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 633         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 634         return AVERROR(EINVAL);
 635     }
 636
 637     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 638         s->intra_quant_bias = avctx->intra_quant_bias;
 639     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 640         s->inter_quant_bias = avctx->inter_quant_bias;
 641
 642     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 643
 644     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 645         s->avctx->time_base.den > (1 << 16) - 1) {
 646         av_log(avctx, AV_LOG_ERROR,
 647                "timebase %d/%d not supported by MPEG 4 standard, "
 648                "the maximum admitted value for the timebase denominator "
 649                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 650                (1 << 16) - 1);
 651         return -1;
 652     }
 653     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 654
 655     switch (avctx->codec->id) {
 656     case AV_CODEC_ID_MPEG1VIDEO:
 657         s->out_format = FMT_MPEG1;
 658         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 659         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 660         break;
 661     case AV_CODEC_ID_MPEG2VIDEO:
 662         s->out_format = FMT_MPEG1;
 663         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 664         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 665         s->rtp_mode   = 1;
 666         break;
 667     case AV_CODEC_ID_MJPEG:
 668     case AV_CODEC_ID_AMV:
 669         s->out_format = FMT_MJPEG;
 670         s->intra_only = 1; /* force intra only for jpeg */
 671         if (!CONFIG_MJPEG_ENCODER ||
 672             ff_mjpeg_encode_init(s) < 0)
 673             return -1;
 674         avctx->delay = 0;
 675         s->low_delay = 1;
 676         break;
 677     case AV_CODEC_ID_H261:
 678         if (!CONFIG_H261_ENCODER)
 679             return -1;
 680         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 681             av_log(avctx, AV_LOG_ERROR,
 682                    "The specified picture size of %dx%d is not valid for the "
 683                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 684                     s->width, s->height);
 685             return -1;
 686         }
 687         s->out_format = FMT_H261;
 688         avctx->delay  = 0;
 689         s->low_delay  = 1;
 690         break;
 691     case AV_CODEC_ID_H263:
 692         if (!CONFIG_H263_ENCODER)
 693             return -1;
 694         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 695                              s->width, s->height) == 8) {
 696             av_log(avctx, AV_LOG_ERROR,
 697                    "The specified picture size of %dx%d is not valid for "
 698                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 699                    "352x288, 704x576, and 1408x1152. "
 700                    "Try H.263+.\n", s->width, s->height);
 701             return -1;
 702         }
 703         s->out_format = FMT_H263;
 704         avctx->delay  = 0;
 705         s->low_delay  = 1;
 706         break;
 707     case AV_CODEC_ID_H263P:
 708         s->out_format = FMT_H263;
 709         s->h263_plus  = 1;
 710         /* Fx */
 711         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 712         s->modified_quant  = s->h263_aic;
 713         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 714         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 715
 716         /* /Fx */
 717         /* These are just to be sure */
 718         avctx->delay = 0;
 719         s->low_delay = 1;
 720         break;
 721     case AV_CODEC_ID_FLV1:
 722         s->out_format      = FMT_H263;
 723         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 724         s->unrestricted_mv = 1;
 725         s->rtp_mode  = 0; /* don't allow GOB */
 726         avctx->delay = 0;
 727         s->low_delay = 1;
 728         break;
 729     case AV_CODEC_ID_RV10:
 730         s->out_format = FMT_H263;
 731         avctx->delay  = 0;
 732         s->low_delay  = 1;
 733         break;
 734     case AV_CODEC_ID_RV20:
 735         s->out_format      = FMT_H263;
 736         avctx->delay       = 0;
 737         s->low_delay       = 1;
 738         s->modified_quant  = 1;
 739         s->h263_aic        = 1;
 740         s->h263_plus       = 1;
 741         s->loop_filter     = 1;
 742         s->unrestricted_mv = 0;
 743         break;
 744     case AV_CODEC_ID_MPEG4:
 745         s->out_format      = FMT_H263;
 746         s->h263_pred       = 1;
 747         s->unrestricted_mv = 1;
 748         s->low_delay       = s->max_b_frames ? 0 : 1;
 749         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 750         break;
 751     case AV_CODEC_ID_MSMPEG4V2:
 752         s->out_format      = FMT_H263;
 753         s->h263_pred       = 1;
 754         s->unrestricted_mv = 1;
 755         s->msmpeg4_version = 2;
 756         avctx->delay       = 0;
 757         s->low_delay       = 1;
 758         break;
 759     case AV_CODEC_ID_MSMPEG4V3:
 760         s->out_format        = FMT_H263;
 761         s->h263_pred         = 1;
 762         s->unrestricted_mv   = 1;
 763         s->msmpeg4_version   = 3;
 764         s->flipflop_rounding = 1;
 765         avctx->delay         = 0;
 766         s->low_delay         = 1;
 767         break;
 768     case AV_CODEC_ID_WMV1:
 769         s->out_format        = FMT_H263;
 770         s->h263_pred         = 1;
 771         s->unrestricted_mv   = 1;
 772         s->msmpeg4_version   = 4;
 773         s->flipflop_rounding = 1;
 774         avctx->delay         = 0;
 775         s->low_delay         = 1;
 776         break;
 777     case AV_CODEC_ID_WMV2:
 778         s->out_format        = FMT_H263;
 779         s->h263_pred         = 1;
 780         s->unrestricted_mv   = 1;
 781         s->msmpeg4_version   = 5;
 782         s->flipflop_rounding = 1;
 783         avctx->delay         = 0;
 784         s->low_delay         = 1;
 785         break;
 786     default:
 787         return -1;
 788     }
 789
 790     avctx->has_b_frames = !s->low_delay;
 791
 792     s->encoding = 1;
 793
 794     s->progressive_frame    =
 795     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 796                                                 CODEC_FLAG_INTERLACED_ME) ||
 797                                 s->alternate_scan);
 798
 799     /* init */
 800     if (ff_MPV_common_init(s) < 0)
 801         return -1;
 802
 803     s->avctx->coded_frame = &s->current_picture.f;
 804
 805     if (s->msmpeg4_version) {
 806         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 807                           2 * 2 * (MAX_LEVEL + 1) *
 808                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 809     }
 810     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 811
 812     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 813     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 814     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 815     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 816     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 817     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 818     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 819                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 820     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 821                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 822
 823     if (s->avctx->noise_reduction) {
 824         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 825                           2 * 64 * sizeof(uint16_t), fail);
 826     }
 827
 828     ff_dct_encode_init(s);
 829
 830     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 831         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 832
 833     s->quant_precision = 5;
 834
 835     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 836     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 837
 838     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 839         ff_h261_encode_init(s);
 840     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 841         ff_h263_encode_init(s);
 842     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 843         ff_msmpeg4_encode_init(s);
 844     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 845         && s->out_format == FMT_MPEG1)
 846         ff_mpeg1_encode_init(s);
 847
 848     /* init q matrix */
 849     for (i = 0; i < 64; i++) {
 850         int j = s->dsp.idct_permutation[i];
 851         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 852             s->mpeg_quant) {
 853             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 854             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 855         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 856             s->intra_matrix[j] =
 857             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 858         } else {
 859             /* mpeg1/2 */
 860             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 861             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 862         }
 863         if (s->avctx->intra_matrix)
 864             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 865         if (s->avctx->inter_matrix)
 866             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 867     }
 868
 869     /* precompute matrix */
 870     /* for mjpeg, we do include qscale in the matrix */
 871     if (s->out_format != FMT_MJPEG) {
 872         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 873                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 874                           31, 1);
 875         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 876                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 877                           31, 0);
 878     }
 879
 880     if (ff_rate_control_init(s) < 0)
 881         return -1;
 882
 883 #if FF_API_ERROR_RATE
 884     FF_DISABLE_DEPRECATION_WARNINGS
 885     if (avctx->error_rate)
 886         s->error_rate = avctx->error_rate;
 887     FF_ENABLE_DEPRECATION_WARNINGS;
 888 #endif
 889
 890     if (avctx->b_frame_strategy == 2) {
 891         for (i = 0; i < s->max_b_frames + 2; i++) {
 892             s->tmp_frames[i] = av_frame_alloc();
 893             if (!s->tmp_frames[i])
 894                 return AVERROR(ENOMEM);
 895
 896             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 897             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 898             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 899
 900             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 901             if (ret < 0)
 902                 return ret;
 903         }
 904     }
 905
 906     return 0;
 907 fail:
 908     ff_MPV_encode_end(avctx);
 909     return AVERROR_UNKNOWN;
 910 }
 911
 912 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 913 {
 914     MpegEncContext *s = avctx->priv_data;
 915     int i;
 916
 917     ff_rate_control_uninit(s);
 918
 919     ff_MPV_common_end(s);
 920     if (CONFIG_MJPEG_ENCODER &&
 921         s->out_format == FMT_MJPEG)
 922         ff_mjpeg_encode_close(s);
 923
 924     av_freep(&avctx->extradata);
 925
 926     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 927         av_frame_free(&s->tmp_frames[i]);
 928
 929     ff_free_picture_tables(&s->new_picture);
 930     ff_mpeg_unref_picture(s, &s->new_picture);
 931
 932     av_freep(&s->avctx->stats_out);
 933     av_freep(&s->ac_stats);
 934
 935     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 936     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
 937     s->q_chroma_intra_matrix=   NULL;
 938     s->q_chroma_intra_matrix16= NULL;
 939     av_freep(&s->q_intra_matrix);
 940     av_freep(&s->q_inter_matrix);
 941     av_freep(&s->q_intra_matrix16);
 942     av_freep(&s->q_inter_matrix16);
 943     av_freep(&s->input_picture);
 944     av_freep(&s->reordered_input_picture);
 945     av_freep(&s->dct_offset);
 946
 947     return 0;
 948 }
 949
 950 static int get_sae(uint8_t *src, int ref, int stride)
 951 {
 952     int x,y;
 953     int acc = 0;
 954
 955     for (y = 0; y < 16; y++) {
 956         for (x = 0; x < 16; x++) {
 957             acc += FFABS(src[x + y * stride] - ref);
 958         }
 959     }
 960
 961     return acc;
 962 }
 963
 964 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 965                            uint8_t *ref, int stride)
 966 {
 967     int x, y, w, h;
 968     int acc = 0;
 969
 970     w = s->width  & ~15;
 971     h = s->height & ~15;
 972
 973     for (y = 0; y < h; y += 16) {
 974         for (x = 0; x < w; x += 16) {
 975             int offset = x + y * stride;
 976             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 977                                      16);
 978             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 979             int sae  = get_sae(src + offset, mean, stride);
 980
 981             acc += sae + 500 < sad;
 982         }
 983     }
 984     return acc;
 985 }
 986
 987
 988 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 989 {
 990     Picture *pic = NULL;
 991     int64_t pts;
 992     int i, display_picture_number = 0, ret;
 993     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
 994                                                  (s->low_delay ? 0 : 1);
 995     int direct = 1;
 996
 997     if (pic_arg) {
 998         pts = pic_arg->pts;
 999         display_picture_number = s->input_picture_number++;
1000
1001         if (pts != AV_NOPTS_VALUE) {
1002             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1003                 int64_t last = s->user_specified_pts;
1004
1005                 if (pts <= last) {
1006                     av_log(s->avctx, AV_LOG_ERROR,
1007                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1008                            pts, last);
1009                     return AVERROR(EINVAL);
1010                 }
1011
1012                 if (!s->low_delay && display_picture_number == 1)
1013                     s->dts_delta = pts - last;
1014             }
1015             s->user_specified_pts = pts;
1016         } else {
1017             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1018                 s->user_specified_pts =
1019                 pts = s->user_specified_pts + 1;
1020                 av_log(s->avctx, AV_LOG_INFO,
1021                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1022                        pts);
1023             } else {
1024                 pts = display_picture_number;
1025             }
1026         }
1027     }
1028
1029     if (pic_arg) {
1030         if (!pic_arg->buf[0])
1031             direct = 0;
1032         if (pic_arg->linesize[0] != s->linesize)
1033             direct = 0;
1034         if (pic_arg->linesize[1] != s->uvlinesize)
1035             direct = 0;
1036         if (pic_arg->linesize[2] != s->uvlinesize)
1037             direct = 0;
1038
1039         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1040                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1041
1042         if (direct) {
1043             i = ff_find_unused_picture(s, 1);
1044             if (i < 0)
1045                 return i;
1046
1047             pic = &s->picture[i];
1048             pic->reference = 3;
1049
1050             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
1051                 return ret;
1052             if (ff_alloc_picture(s, pic, 1) < 0) {
1053                 return -1;
1054             }
1055         } else {
1056             i = ff_find_unused_picture(s, 0);
1057             if (i < 0)
1058                 return i;
1059
1060             pic = &s->picture[i];
1061             pic->reference = 3;
1062
1063             if (ff_alloc_picture(s, pic, 0) < 0) {
1064                 return -1;
1065             }
1066
1067             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1068                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1069                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1070                 // empty
1071             } else {
1072                 int h_chroma_shift, v_chroma_shift;
1073                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1074                                                  &h_chroma_shift,
1075                                                  &v_chroma_shift);
1076
1077                 for (i = 0; i < 3; i++) {
1078                     int src_stride = pic_arg->linesize[i];
1079                     int dst_stride = i ? s->uvlinesize : s->linesize;
1080                     int h_shift = i ? h_chroma_shift : 0;
1081                     int v_shift = i ? v_chroma_shift : 0;
1082                     int w = s->width  >> h_shift;
1083                     int h = s->height >> v_shift;
1084                     uint8_t *src = pic_arg->data[i];
1085                     uint8_t *dst = pic->f.data[i];
1086
1087                     if (s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1088                         h = ((s->height + 15)/16*16) >> v_shift;
1089                     }
1090
1091                     if (!s->avctx->rc_buffer_size)
1092                         dst += INPLACE_OFFSET;
1093
1094                     if (src_stride == dst_stride)
1095                         memcpy(dst, src, src_stride * h);
1096                     else {
1097                         int h2 = h;
1098                         uint8_t *dst2 = dst;
1099                         while (h2--) {
1100                             memcpy(dst2, src, w);
1101                             dst2 += dst_stride;
1102                             src += src_stride;
1103                         }
1104                     }
1105                     if ((s->width & 15) || (s->height & 15)) {
1106                         s->dsp.draw_edges(dst, dst_stride,
1107                                           w, h,
1108                                           16>>h_shift,
1109                                           16>>v_shift,
1110                                           EDGE_BOTTOM);
1111                     }
1112                 }
1113             }
1114         }
1115         ret = av_frame_copy_props(&pic->f, pic_arg);
1116         if (ret < 0)
1117             return ret;
1118
1119         pic->f.display_picture_number = display_picture_number;
1120         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
1121     }
1122
1123     /* shift buffer entries */
1124     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1125         s->input_picture[i - 1] = s->input_picture[i];
1126
1127     s->input_picture[encoding_delay] = (Picture*) pic;
1128
1129     return 0;
1130 }
1131
1132 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1133 {
1134     int x, y, plane;
1135     int score = 0;
1136     int64_t score64 = 0;
1137
1138     for (plane = 0; plane < 3; plane++) {
1139         const int stride = p->f.linesize[plane];
1140         const int bw = plane ? 1 : 2;
1141         for (y = 0; y < s->mb_height * bw; y++) {
1142             for (x = 0; x < s->mb_width * bw; x++) {
1143                 int off = p->shared ? 0 : 16;
1144                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1145                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1146                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1147
1148                 switch (FFABS(s->avctx->frame_skip_exp)) {
1149                 case 0: score    =  FFMAX(score, v);          break;
1150                 case 1: score   += FFABS(v);                  break;
1151                 case 2: score64 += v * (int64_t)v;                       break;
1152                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1153                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1154                 }
1155             }
1156         }
1157     }
1158     emms_c();
1159
1160     if (score)
1161         score64 = score;
1162     if (s->avctx->frame_skip_exp < 0)
1163         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1164                       -1.0/s->avctx->frame_skip_exp);
1165
1166     if (score64 < s->avctx->frame_skip_threshold)
1167         return 1;
1168     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1169         return 1;
1170     return 0;
1171 }
1172
1173 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1174 {
1175     AVPacket pkt = { 0 };
1176     int ret, got_output;
1177
1178     av_init_packet(&pkt);
1179     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1180     if (ret < 0)
1181         return ret;
1182
1183     ret = pkt.size;
1184     av_free_packet(&pkt);
1185     return ret;
1186 }
1187
1188 static int estimate_best_b_count(MpegEncContext *s)
1189 {
1190     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1191     AVCodecContext *c = avcodec_alloc_context3(NULL);
1192     const int scale = s->avctx->brd_scale;
1193     int i, j, out_size, p_lambda, b_lambda, lambda2;
1194     int64_t best_rd  = INT64_MAX;
1195     int best_b_count = -1;
1196
1197     av_assert0(scale >= 0 && scale <= 3);
1198
1199     //emms_c();
1200     //s->next_picture_ptr->quality;
1201     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1202     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1203     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1204     if (!b_lambda) // FIXME we should do this somewhere else
1205         b_lambda = p_lambda;
1206     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1207                FF_LAMBDA_SHIFT;
1208
1209     c->width        = s->width  >> scale;
1210     c->height       = s->height >> scale;
1211     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1212                       CODEC_FLAG_INPUT_PRESERVED;
1213     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1214     c->mb_decision  = s->avctx->mb_decision;
1215     c->me_cmp       = s->avctx->me_cmp;
1216     c->mb_cmp       = s->avctx->mb_cmp;
1217     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1218     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1219     c->time_base    = s->avctx->time_base;
1220     c->max_b_frames = s->max_b_frames;
1221
1222     if (avcodec_open2(c, codec, NULL) < 0)
1223         return -1;
1224
1225     for (i = 0; i < s->max_b_frames + 2; i++) {
1226         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1227                                                 s->next_picture_ptr;
1228
1229         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1230             pre_input = *pre_input_ptr;
1231
1232             if (!pre_input.shared && i) {
1233                 pre_input.f.data[0] += INPLACE_OFFSET;
1234                 pre_input.f.data[1] += INPLACE_OFFSET;
1235                 pre_input.f.data[2] += INPLACE_OFFSET;
1236             }
1237
1238             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1239                                  pre_input.f.data[0], pre_input.f.linesize[0],
1240                                  c->width,      c->height);
1241             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1242                                  pre_input.f.data[1], pre_input.f.linesize[1],
1243                                  c->width >> 1, c->height >> 1);
1244             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1245                                  pre_input.f.data[2], pre_input.f.linesize[2],
1246                                  c->width >> 1, c->height >> 1);
1247         }
1248     }
1249
1250     for (j = 0; j < s->max_b_frames + 1; j++) {
1251         int64_t rd = 0;
1252
1253         if (!s->input_picture[j])
1254             break;
1255
1256         c->error[0] = c->error[1] = c->error[2] = 0;
1257
1258         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1259         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1260
1261         out_size = encode_frame(c, s->tmp_frames[0]);
1262
1263         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1264
1265         for (i = 0; i < s->max_b_frames + 1; i++) {
1266             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1267
1268             s->tmp_frames[i + 1]->pict_type = is_p ?
1269                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1270             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1271
1272             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1273
1274             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1275         }
1276
1277         /* get the delayed frames */
1278         while (out_size) {
1279             out_size = encode_frame(c, NULL);
1280             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1281         }
1282
1283         rd += c->error[0] + c->error[1] + c->error[2];
1284
1285         if (rd < best_rd) {
1286             best_rd = rd;
1287             best_b_count = j;
1288         }
1289     }
1290
1291     avcodec_close(c);
1292     av_freep(&c);
1293
1294     return best_b_count;
1295 }
1296
1297 static int select_input_picture(MpegEncContext *s)
1298 {
1299     int i, ret;
1300
1301     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1302         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1303     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1304
1305     /* set next picture type & ordering */
1306     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1307         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1308             if (s->picture_in_gop_number < s->gop_size &&
1309                 s->next_picture_ptr &&
1310                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1311                 // FIXME check that te gop check above is +-1 correct
1312                 av_frame_unref(&s->input_picture[0]->f);
1313
1314                 ff_vbv_update(s, 0);
1315
1316                 goto no_output_pic;
1317             }
1318         }
1319
1320         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1321             s->next_picture_ptr == NULL || s->intra_only) {
1322             s->reordered_input_picture[0] = s->input_picture[0];
1323             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1324             s->reordered_input_picture[0]->f.coded_picture_number =
1325                 s->coded_picture_number++;
1326         } else {
1327             int b_frames;
1328
1329             if (s->flags & CODEC_FLAG_PASS2) {
1330                 for (i = 0; i < s->max_b_frames + 1; i++) {
1331                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1332
1333                     if (pict_num >= s->rc_context.num_entries)
1334                         break;
1335                     if (!s->input_picture[i]) {
1336                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1337                         break;
1338                     }
1339
1340                     s->input_picture[i]->f.pict_type =
1341                         s->rc_context.entry[pict_num].new_pict_type;
1342                 }
1343             }
1344
1345             if (s->avctx->b_frame_strategy == 0) {
1346                 b_frames = s->max_b_frames;
1347                 while (b_frames && !s->input_picture[b_frames])
1348                     b_frames--;
1349             } else if (s->avctx->b_frame_strategy == 1) {
1350                 for (i = 1; i < s->max_b_frames + 1; i++) {
1351                     if (s->input_picture[i] &&
1352                         s->input_picture[i]->b_frame_score == 0) {
1353                         s->input_picture[i]->b_frame_score =
1354                             get_intra_count(s,
1355                                             s->input_picture[i    ]->f.data[0],
1356                                             s->input_picture[i - 1]->f.data[0],
1357                                             s->linesize) + 1;
1358                     }
1359                 }
1360                 for (i = 0; i < s->max_b_frames + 1; i++) {
1361                     if (s->input_picture[i] == NULL ||
1362                         s->input_picture[i]->b_frame_score - 1 >
1363                             s->mb_num / s->avctx->b_sensitivity)
1364                         break;
1365                 }
1366
1367                 b_frames = FFMAX(0, i - 1);
1368
1369                 /* reset scores */
1370                 for (i = 0; i < b_frames + 1; i++) {
1371                     s->input_picture[i]->b_frame_score = 0;
1372                 }
1373             } else if (s->avctx->b_frame_strategy == 2) {
1374                 b_frames = estimate_best_b_count(s);
1375             } else {
1376                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1377                 b_frames = 0;
1378             }
1379
1380             emms_c();
1381
1382             for (i = b_frames - 1; i >= 0; i--) {
1383                 int type = s->input_picture[i]->f.pict_type;
1384                 if (type && type != AV_PICTURE_TYPE_B)
1385                     b_frames = i;
1386             }
1387             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1388                 b_frames == s->max_b_frames) {
1389                 av_log(s->avctx, AV_LOG_ERROR,
1390                        "warning, too many b frames in a row\n");
1391             }
1392
1393             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1394                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1395                     s->gop_size > s->picture_in_gop_number) {
1396                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1397                 } else {
1398                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1399                         b_frames = 0;
1400                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1401                 }
1402             }
1403
1404             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1405                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1406                 b_frames--;
1407
1408             s->reordered_input_picture[0] = s->input_picture[b_frames];
1409             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1410                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1411             s->reordered_input_picture[0]->f.coded_picture_number =
1412                 s->coded_picture_number++;
1413             for (i = 0; i < b_frames; i++) {
1414                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1415                 s->reordered_input_picture[i + 1]->f.pict_type =
1416                     AV_PICTURE_TYPE_B;
1417                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1418                     s->coded_picture_number++;
1419             }
1420         }
1421     }
1422 no_output_pic:
1423     if (s->reordered_input_picture[0]) {
1424         s->reordered_input_picture[0]->reference =
1425            s->reordered_input_picture[0]->f.pict_type !=
1426                AV_PICTURE_TYPE_B ? 3 : 0;
1427
1428         ff_mpeg_unref_picture(s, &s->new_picture);
1429         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1430             return ret;
1431
1432         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1433             // input is a shared pix, so we can't modifiy it -> alloc a new
1434             // one & ensure that the shared one is reuseable
1435
1436             Picture *pic;
1437             int i = ff_find_unused_picture(s, 0);
1438             if (i < 0)
1439                 return i;
1440             pic = &s->picture[i];
1441
1442             pic->reference = s->reordered_input_picture[0]->reference;
1443             if (ff_alloc_picture(s, pic, 0) < 0) {
1444                 return -1;
1445             }
1446
1447             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1448             if (ret < 0)
1449                 return ret;
1450
1451             /* mark us unused / free shared pic */
1452             av_frame_unref(&s->reordered_input_picture[0]->f);
1453             s->reordered_input_picture[0]->shared = 0;
1454
1455             s->current_picture_ptr = pic;
1456         } else {
1457             // input is not a shared pix -> reuse buffer for current_pix
1458             s->current_picture_ptr = s->reordered_input_picture[0];
1459             for (i = 0; i < 4; i++) {
1460                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1461             }
1462         }
1463         ff_mpeg_unref_picture(s, &s->current_picture);
1464         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1465                                        s->current_picture_ptr)) < 0)
1466             return ret;
1467
1468         s->picture_number = s->new_picture.f.display_picture_number;
1469     } else {
1470         ff_mpeg_unref_picture(s, &s->new_picture);
1471     }
1472     return 0;
1473 }
1474
1475 static void frame_end(MpegEncContext *s)
1476 {
1477     if (s->unrestricted_mv &&
1478         s->current_picture.reference &&
1479         !s->intra_only) {
1480         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1481         int hshift = desc->log2_chroma_w;
1482         int vshift = desc->log2_chroma_h;
1483         s->dsp.draw_edges(s->current_picture.f.data[0], s->current_picture.f.linesize[0],
1484                           s->h_edge_pos, s->v_edge_pos,
1485                           EDGE_WIDTH, EDGE_WIDTH,
1486                           EDGE_TOP | EDGE_BOTTOM);
1487         s->dsp.draw_edges(s->current_picture.f.data[1], s->current_picture.f.linesize[1],
1488                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1489                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1490                           EDGE_TOP | EDGE_BOTTOM);
1491         s->dsp.draw_edges(s->current_picture.f.data[2], s->current_picture.f.linesize[2],
1492                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1493                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1494                           EDGE_TOP | EDGE_BOTTOM);
1495     }
1496
1497     emms_c();
1498
1499     s->last_pict_type                 = s->pict_type;
1500     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1501     if (s->pict_type!= AV_PICTURE_TYPE_B)
1502         s->last_non_b_pict_type = s->pict_type;
1503
1504     s->avctx->coded_frame = &s->current_picture_ptr->f;
1505
1506 }
1507
1508 static void update_noise_reduction(MpegEncContext *s)
1509 {
1510     int intra, i;
1511
1512     for (intra = 0; intra < 2; intra++) {
1513         if (s->dct_count[intra] > (1 << 16)) {
1514             for (i = 0; i < 64; i++) {
1515                 s->dct_error_sum[intra][i] >>= 1;
1516             }
1517             s->dct_count[intra] >>= 1;
1518         }
1519
1520         for (i = 0; i < 64; i++) {
1521             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1522                                        s->dct_count[intra] +
1523                                        s->dct_error_sum[intra][i] / 2) /
1524                                       (s->dct_error_sum[intra][i] + 1);
1525         }
1526     }
1527 }
1528
1529 static int frame_start(MpegEncContext *s)
1530 {
1531     int ret;
1532
1533     /* mark & release old frames */
1534     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1535         s->last_picture_ptr != s->next_picture_ptr &&
1536         s->last_picture_ptr->f.buf[0]) {
1537         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1538     }
1539
1540     s->current_picture_ptr->f.pict_type = s->pict_type;
1541     s->current_picture_ptr->f.key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1542
1543     ff_mpeg_unref_picture(s, &s->current_picture);
1544     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1545                                    s->current_picture_ptr)) < 0)
1546         return ret;
1547
1548     if (s->pict_type != AV_PICTURE_TYPE_B) {
1549         s->last_picture_ptr = s->next_picture_ptr;
1550         if (!s->droppable)
1551             s->next_picture_ptr = s->current_picture_ptr;
1552     }
1553
1554     if (s->last_picture_ptr) {
1555         ff_mpeg_unref_picture(s, &s->last_picture);
1556         if (s->last_picture_ptr->f.buf[0] &&
1557             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1558                                        s->last_picture_ptr)) < 0)
1559             return ret;
1560     }
1561     if (s->next_picture_ptr) {
1562         ff_mpeg_unref_picture(s, &s->next_picture);
1563         if (s->next_picture_ptr->f.buf[0] &&
1564             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1565                                        s->next_picture_ptr)) < 0)
1566             return ret;
1567     }
1568
1569     if (s->picture_structure!= PICT_FRAME) {
1570         int i;
1571         for (i = 0; i < 4; i++) {
1572             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1573                 s->current_picture.f.data[i] +=
1574                     s->current_picture.f.linesize[i];
1575             }
1576             s->current_picture.f.linesize[i] *= 2;
1577             s->last_picture.f.linesize[i]    *= 2;
1578             s->next_picture.f.linesize[i]    *= 2;
1579         }
1580     }
1581
1582     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1583         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1584         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1585     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1586         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1587         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1588     } else {
1589         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1590         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1591     }
1592
1593     if (s->dct_error_sum) {
1594         av_assert2(s->avctx->noise_reduction && s->encoding);
1595         update_noise_reduction(s);
1596     }
1597
1598     return 0;
1599 }
1600
1601 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1602                           AVFrame *pic_arg, int *got_packet)
1603 {
1604     MpegEncContext *s = avctx->priv_data;
1605     int i, stuffing_count, ret;
1606     int context_count = s->slice_context_count;
1607
1608     s->picture_in_gop_number++;
1609
1610     if (load_input_picture(s, pic_arg) < 0)
1611         return -1;
1612
1613     if (select_input_picture(s) < 0) {
1614         return -1;
1615     }
1616
1617     /* output? */
1618     if (s->new_picture.f.data[0]) {
1619         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1620             return ret;
1621         if (s->mb_info) {
1622             s->mb_info_ptr = av_packet_new_side_data(pkt,
1623                                  AV_PKT_DATA_H263_MB_INFO,
1624                                  s->mb_width*s->mb_height*12);
1625             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1626         }
1627
1628         for (i = 0; i < context_count; i++) {
1629             int start_y = s->thread_context[i]->start_mb_y;
1630             int   end_y = s->thread_context[i]->  end_mb_y;
1631             int h       = s->mb_height;
1632             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1633             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1634
1635             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1636         }
1637
1638         s->pict_type = s->new_picture.f.pict_type;
1639         //emms_c();
1640         ret = frame_start(s);
1641         if (ret < 0)
1642             return ret;
1643 vbv_retry:
1644         if (encode_picture(s, s->picture_number) < 0)
1645             return -1;
1646
1647         avctx->header_bits = s->header_bits;
1648         avctx->mv_bits     = s->mv_bits;
1649         avctx->misc_bits   = s->misc_bits;
1650         avctx->i_tex_bits  = s->i_tex_bits;
1651         avctx->p_tex_bits  = s->p_tex_bits;
1652         avctx->i_count     = s->i_count;
1653         // FIXME f/b_count in avctx
1654         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1655         avctx->skip_count  = s->skip_count;
1656
1657         frame_end(s);
1658
1659         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1660             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1661
1662         if (avctx->rc_buffer_size) {
1663             RateControlContext *rcc = &s->rc_context;
1664             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1665
1666             if (put_bits_count(&s->pb) > max_size &&
1667                 s->lambda < s->avctx->lmax) {
1668                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1669                                        (s->qscale + 1) / s->qscale);
1670                 if (s->adaptive_quant) {
1671                     int i;
1672                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1673                         s->lambda_table[i] =
1674                             FFMAX(s->lambda_table[i] + 1,
1675                                   s->lambda_table[i] * (s->qscale + 1) /
1676                                   s->qscale);
1677                 }
1678                 s->mb_skipped = 0;        // done in frame_start()
1679                 // done in encode_picture() so we must undo it
1680                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1681                     if (s->flipflop_rounding          ||
1682                         s->codec_id == AV_CODEC_ID_H263P ||
1683                         s->codec_id == AV_CODEC_ID_MPEG4)
1684                         s->no_rounding ^= 1;
1685                 }
1686                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1687                     s->time_base       = s->last_time_base;
1688                     s->last_non_b_time = s->time - s->pp_time;
1689                 }
1690                 for (i = 0; i < context_count; i++) {
1691                     PutBitContext *pb = &s->thread_context[i]->pb;
1692                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1693                 }
1694                 goto vbv_retry;
1695             }
1696
1697             assert(s->avctx->rc_max_rate);
1698         }
1699
1700         if (s->flags & CODEC_FLAG_PASS1)
1701             ff_write_pass1_stats(s);
1702
1703         for (i = 0; i < 4; i++) {
1704             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1705             avctx->error[i] += s->current_picture_ptr->f.error[i];
1706         }
1707
1708         if (s->flags & CODEC_FLAG_PASS1)
1709             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1710                    avctx->i_tex_bits + avctx->p_tex_bits ==
1711                        put_bits_count(&s->pb));
1712         flush_put_bits(&s->pb);
1713         s->frame_bits  = put_bits_count(&s->pb);
1714
1715         stuffing_count = ff_vbv_update(s, s->frame_bits);
1716         s->stuffing_bits = 8*stuffing_count;
1717         if (stuffing_count) {
1718             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1719                     stuffing_count + 50) {
1720                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1721                 return -1;
1722             }
1723
1724             switch (s->codec_id) {
1725             case AV_CODEC_ID_MPEG1VIDEO:
1726             case AV_CODEC_ID_MPEG2VIDEO:
1727                 while (stuffing_count--) {
1728                     put_bits(&s->pb, 8, 0);
1729                 }
1730             break;
1731             case AV_CODEC_ID_MPEG4:
1732                 put_bits(&s->pb, 16, 0);
1733                 put_bits(&s->pb, 16, 0x1C3);
1734                 stuffing_count -= 4;
1735                 while (stuffing_count--) {
1736                     put_bits(&s->pb, 8, 0xFF);
1737                 }
1738             break;
1739             default:
1740                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1741             }
1742             flush_put_bits(&s->pb);
1743             s->frame_bits  = put_bits_count(&s->pb);
1744         }
1745
1746         /* update mpeg1/2 vbv_delay for CBR */
1747         if (s->avctx->rc_max_rate                          &&
1748             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1749             s->out_format == FMT_MPEG1                     &&
1750             90000LL * (avctx->rc_buffer_size - 1) <=
1751                 s->avctx->rc_max_rate * 0xFFFFLL) {
1752             int vbv_delay, min_delay;
1753             double inbits  = s->avctx->rc_max_rate *
1754                              av_q2d(s->avctx->time_base);
1755             int    minbits = s->frame_bits - 8 *
1756                              (s->vbv_delay_ptr - s->pb.buf - 1);
1757             double bits    = s->rc_context.buffer_index + minbits - inbits;
1758
1759             if (bits < 0)
1760                 av_log(s->avctx, AV_LOG_ERROR,
1761                        "Internal error, negative bits\n");
1762
1763             assert(s->repeat_first_field == 0);
1764
1765             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1766             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1767                         s->avctx->rc_max_rate;
1768
1769             vbv_delay = FFMAX(vbv_delay, min_delay);
1770
1771             av_assert0(vbv_delay < 0xFFFF);
1772
1773             s->vbv_delay_ptr[0] &= 0xF8;
1774             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1775             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1776             s->vbv_delay_ptr[2] &= 0x07;
1777             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1778             avctx->vbv_delay     = vbv_delay * 300;
1779         }
1780         s->total_bits     += s->frame_bits;
1781         avctx->frame_bits  = s->frame_bits;
1782
1783         pkt->pts = s->current_picture.f.pts;
1784         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1785             if (!s->current_picture.f.coded_picture_number)
1786                 pkt->dts = pkt->pts - s->dts_delta;
1787             else
1788                 pkt->dts = s->reordered_pts;
1789             s->reordered_pts = pkt->pts;
1790         } else
1791             pkt->dts = pkt->pts;
1792         if (s->current_picture.f.key_frame)
1793             pkt->flags |= AV_PKT_FLAG_KEY;
1794         if (s->mb_info)
1795             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1796     } else {
1797         s->frame_bits = 0;
1798     }
1799
1800     /* release non-reference frames */
1801     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1802         if (!s->picture[i].reference)
1803             ff_mpeg_unref_picture(s, &s->picture[i]);
1804     }
1805
1806     assert((s->frame_bits & 7) == 0);
1807
1808     pkt->size = s->frame_bits / 8;
1809     *got_packet = !!pkt->size;
1810     return 0;
1811 }
1812
1813 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1814                                                 int n, int threshold)
1815 {
1816     static const char tab[64] = {
1817         3, 2, 2, 1, 1, 1, 1, 1,
1818         1, 1, 1, 1, 1, 1, 1, 1,
1819         1, 1, 1, 1, 1, 1, 1, 1,
1820         0, 0, 0, 0, 0, 0, 0, 0,
1821         0, 0, 0, 0, 0, 0, 0, 0,
1822         0, 0, 0, 0, 0, 0, 0, 0,
1823         0, 0, 0, 0, 0, 0, 0, 0,
1824         0, 0, 0, 0, 0, 0, 0, 0
1825     };
1826     int score = 0;
1827     int run = 0;
1828     int i;
1829     int16_t *block = s->block[n];
1830     const int last_index = s->block_last_index[n];
1831     int skip_dc;
1832
1833     if (threshold < 0) {
1834         skip_dc = 0;
1835         threshold = -threshold;
1836     } else
1837         skip_dc = 1;
1838
1839     /* Are all we could set to zero already zero? */
1840     if (last_index <= skip_dc - 1)
1841         return;
1842
1843     for (i = 0; i <= last_index; i++) {
1844         const int j = s->intra_scantable.permutated[i];
1845         const int level = FFABS(block[j]);
1846         if (level == 1) {
1847             if (skip_dc && i == 0)
1848                 continue;
1849             score += tab[run];
1850             run = 0;
1851         } else if (level > 1) {
1852             return;
1853         } else {
1854             run++;
1855         }
1856     }
1857     if (score >= threshold)
1858         return;
1859     for (i = skip_dc; i <= last_index; i++) {
1860         const int j = s->intra_scantable.permutated[i];
1861         block[j] = 0;
1862     }
1863     if (block[0])
1864         s->block_last_index[n] = 0;
1865     else
1866         s->block_last_index[n] = -1;
1867 }
1868
1869 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1870                                int last_index)
1871 {
1872     int i;
1873     const int maxlevel = s->max_qcoeff;
1874     const int minlevel = s->min_qcoeff;
1875     int overflow = 0;
1876
1877     if (s->mb_intra) {
1878         i = 1; // skip clipping of intra dc
1879     } else
1880         i = 0;
1881
1882     for (; i <= last_index; i++) {
1883         const int j = s->intra_scantable.permutated[i];
1884         int level = block[j];
1885
1886         if (level > maxlevel) {
1887             level = maxlevel;
1888             overflow++;
1889         } else if (level < minlevel) {
1890             level = minlevel;
1891             overflow++;
1892         }
1893
1894         block[j] = level;
1895     }
1896
1897     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1898         av_log(s->avctx, AV_LOG_INFO,
1899                "warning, clipping %d dct coefficients to %d..%d\n",
1900                overflow, minlevel, maxlevel);
1901 }
1902
1903 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1904 {
1905     int x, y;
1906     // FIXME optimize
1907     for (y = 0; y < 8; y++) {
1908         for (x = 0; x < 8; x++) {
1909             int x2, y2;
1910             int sum = 0;
1911             int sqr = 0;
1912             int count = 0;
1913
1914             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1915                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1916                     int v = ptr[x2 + y2 * stride];
1917                     sum += v;
1918                     sqr += v * v;
1919                     count++;
1920                 }
1921             }
1922             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1923         }
1924     }
1925 }
1926
1927 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1928                                                 int motion_x, int motion_y,
1929                                                 int mb_block_height,
1930                                                 int mb_block_width,
1931                                                 int mb_block_count)
1932 {
1933     int16_t weight[12][64];
1934     int16_t orig[12][64];
1935     const int mb_x = s->mb_x;
1936     const int mb_y = s->mb_y;
1937     int i;
1938     int skip_dct[12];
1939     int dct_offset = s->linesize * 8; // default for progressive frames
1940     int uv_dct_offset = s->uvlinesize * 8;
1941     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1942     ptrdiff_t wrap_y, wrap_c;
1943
1944     for (i = 0; i < mb_block_count; i++)
1945         skip_dct[i] = s->skipdct;
1946
1947     if (s->adaptive_quant) {
1948         const int last_qp = s->qscale;
1949         const int mb_xy = mb_x + mb_y * s->mb_stride;
1950
1951         s->lambda = s->lambda_table[mb_xy];
1952         update_qscale(s);
1953
1954         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1955             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1956             s->dquant = s->qscale - last_qp;
1957
1958             if (s->out_format == FMT_H263) {
1959                 s->dquant = av_clip(s->dquant, -2, 2);
1960
1961                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1962                     if (!s->mb_intra) {
1963                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1964                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1965                                 s->dquant = 0;
1966                         }
1967                         if (s->mv_type == MV_TYPE_8X8)
1968                             s->dquant = 0;
1969                     }
1970                 }
1971             }
1972         }
1973         ff_set_qscale(s, last_qp + s->dquant);
1974     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1975         ff_set_qscale(s, s->qscale + s->dquant);
1976
1977     wrap_y = s->linesize;
1978     wrap_c = s->uvlinesize;
1979     ptr_y  = s->new_picture.f.data[0] +
1980              (mb_y * 16 * wrap_y)              + mb_x * 16;
1981     ptr_cb = s->new_picture.f.data[1] +
1982              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1983     ptr_cr = s->new_picture.f.data[2] +
1984              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1985
1986     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1987         uint8_t *ebuf = s->edge_emu_buffer + 32;
1988         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
1989         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
1990         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1991                                  wrap_y, wrap_y,
1992                                  16, 16, mb_x * 16, mb_y * 16,
1993                                  s->width, s->height);
1994         ptr_y = ebuf;
1995         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1996                                  wrap_c, wrap_c,
1997                                  mb_block_width, mb_block_height,
1998                                  mb_x * mb_block_width, mb_y * mb_block_height,
1999                                  cw, ch);
2000         ptr_cb = ebuf + 18 * wrap_y;
2001         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr,
2002                                  wrap_c, wrap_c,
2003                                  mb_block_width, mb_block_height,
2004                                  mb_x * mb_block_width, mb_y * mb_block_height,
2005                                  cw, ch);
2006         ptr_cr = ebuf + 18 * wrap_y + 16;
2007     }
2008
2009     if (s->mb_intra) {
2010         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2011             int progressive_score, interlaced_score;
2012
2013             s->interlaced_dct = 0;
2014             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2015                                                     NULL, wrap_y, 8) +
2016                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2017                                                     NULL, wrap_y, 8) - 400;
2018
2019             if (progressive_score > 0) {
2020                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2021                                                        NULL, wrap_y * 2, 8) +
2022                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2023                                                        NULL, wrap_y * 2, 8);
2024                 if (progressive_score > interlaced_score) {
2025                     s->interlaced_dct = 1;
2026
2027                     dct_offset = wrap_y;
2028                     uv_dct_offset = wrap_c;
2029                     wrap_y <<= 1;
2030                     if (s->chroma_format == CHROMA_422 ||
2031                         s->chroma_format == CHROMA_444)
2032                         wrap_c <<= 1;
2033                 }
2034             }
2035         }
2036
2037         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2038         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2039         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2040         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2041
2042         if (s->flags & CODEC_FLAG_GRAY) {
2043             skip_dct[4] = 1;
2044             skip_dct[5] = 1;
2045         } else {
2046             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2047             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2048             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2049                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2050                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2051             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2052                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2053                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2054                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2055                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2056                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2057                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2058             }
2059         }
2060     } else {
2061         op_pixels_func (*op_pix)[4];
2062         qpel_mc_func (*op_qpix)[16];
2063         uint8_t *dest_y, *dest_cb, *dest_cr;
2064
2065         dest_y  = s->dest[0];
2066         dest_cb = s->dest[1];
2067         dest_cr = s->dest[2];
2068
2069         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2070             op_pix  = s->hdsp.put_pixels_tab;
2071             op_qpix = s->dsp.put_qpel_pixels_tab;
2072         } else {
2073             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2074             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
2075         }
2076
2077         if (s->mv_dir & MV_DIR_FORWARD) {
2078             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2079                           s->last_picture.f.data,
2080                           op_pix, op_qpix);
2081             op_pix  = s->hdsp.avg_pixels_tab;
2082             op_qpix = s->dsp.avg_qpel_pixels_tab;
2083         }
2084         if (s->mv_dir & MV_DIR_BACKWARD) {
2085             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2086                           s->next_picture.f.data,
2087                           op_pix, op_qpix);
2088         }
2089
2090         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2091             int progressive_score, interlaced_score;
2092
2093             s->interlaced_dct = 0;
2094             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2095                                                     ptr_y,              wrap_y,
2096                                                     8) +
2097                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2098                                                     ptr_y + wrap_y * 8, wrap_y,
2099                                                     8) - 400;
2100
2101             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2102                 progressive_score -= 400;
2103
2104             if (progressive_score > 0) {
2105                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2106                                                        ptr_y,
2107                                                        wrap_y * 2, 8) +
2108                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2109                                                        ptr_y + wrap_y,
2110                                                        wrap_y * 2, 8);
2111
2112                 if (progressive_score > interlaced_score) {
2113                     s->interlaced_dct = 1;
2114
2115                     dct_offset = wrap_y;
2116                     uv_dct_offset = wrap_c;
2117                     wrap_y <<= 1;
2118                     if (s->chroma_format == CHROMA_422)
2119                         wrap_c <<= 1;
2120                 }
2121             }
2122         }
2123
2124         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2125         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2126         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2127                            dest_y + dct_offset, wrap_y);
2128         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2129                            dest_y + dct_offset + 8, wrap_y);
2130
2131         if (s->flags & CODEC_FLAG_GRAY) {
2132             skip_dct[4] = 1;
2133             skip_dct[5] = 1;
2134         } else {
2135             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2136             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2137             if (!s->chroma_y_shift) { /* 422 */
2138                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2139                                    dest_cb + uv_dct_offset, wrap_c);
2140                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2141                                    dest_cr + uv_dct_offset, wrap_c);
2142             }
2143         }
2144         /* pre quantization */
2145         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2146                 2 * s->qscale * s->qscale) {
2147             // FIXME optimize
2148             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2149                               wrap_y, 8) < 20 * s->qscale)
2150                 skip_dct[0] = 1;
2151             if (s->dsp.sad[1](NULL, ptr_y + 8,
2152                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2153                 skip_dct[1] = 1;
2154             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2155                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2156                 skip_dct[2] = 1;
2157             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2158                               dest_y + dct_offset + 8,
2159                               wrap_y, 8) < 20 * s->qscale)
2160                 skip_dct[3] = 1;
2161             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2162                               wrap_c, 8) < 20 * s->qscale)
2163                 skip_dct[4] = 1;
2164             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2165                               wrap_c, 8) < 20 * s->qscale)
2166                 skip_dct[5] = 1;
2167             if (!s->chroma_y_shift) { /* 422 */
2168                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2169                                   dest_cb + uv_dct_offset,
2170                                   wrap_c, 8) < 20 * s->qscale)
2171                     skip_dct[6] = 1;
2172                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2173                                   dest_cr + uv_dct_offset,
2174                                   wrap_c, 8) < 20 * s->qscale)
2175                     skip_dct[7] = 1;
2176             }
2177         }
2178     }
2179
2180     if (s->quantizer_noise_shaping) {
2181         if (!skip_dct[0])
2182             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2183         if (!skip_dct[1])
2184             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2185         if (!skip_dct[2])
2186             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2187         if (!skip_dct[3])
2188             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2189         if (!skip_dct[4])
2190             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2191         if (!skip_dct[5])
2192             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2193         if (!s->chroma_y_shift) { /* 422 */
2194             if (!skip_dct[6])
2195                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2196                                   wrap_c);
2197             if (!skip_dct[7])
2198                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2199                                   wrap_c);
2200         }
2201         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2202     }
2203
2204     /* DCT & quantize */
2205     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2206     {
2207         for (i = 0; i < mb_block_count; i++) {
2208             if (!skip_dct[i]) {
2209                 int overflow;
2210                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2211                 // FIXME we could decide to change to quantizer instead of
2212                 // clipping
2213                 // JS: I don't think that would be a good idea it could lower
2214                 //     quality instead of improve it. Just INTRADC clipping
2215                 //     deserves changes in quantizer
2216                 if (overflow)
2217                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2218             } else
2219                 s->block_last_index[i] = -1;
2220         }
2221         if (s->quantizer_noise_shaping) {
2222             for (i = 0; i < mb_block_count; i++) {
2223                 if (!skip_dct[i]) {
2224                     s->block_last_index[i] =
2225                         dct_quantize_refine(s, s->block[i], weight[i],
2226                                             orig[i], i, s->qscale);
2227                 }
2228             }
2229         }
2230
2231         if (s->luma_elim_threshold && !s->mb_intra)
2232             for (i = 0; i < 4; i++)
2233                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2234         if (s->chroma_elim_threshold && !s->mb_intra)
2235             for (i = 4; i < mb_block_count; i++)
2236                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2237
2238         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2239             for (i = 0; i < mb_block_count; i++) {
2240                 if (s->block_last_index[i] == -1)
2241                     s->coded_score[i] = INT_MAX / 256;
2242             }
2243         }
2244     }
2245
2246     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2247         s->block_last_index[4] =
2248         s->block_last_index[5] = 0;
2249         s->block[4][0] =
2250         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2251         if (!s->chroma_y_shift) { /* 422 / 444 */
2252             for (i=6; i<12; i++) {
2253                 s->block_last_index[i] = 0;
2254                 s->block[i][0] = s->block[4][0];
2255             }
2256         }
2257     }
2258
2259     // non c quantize code returns incorrect block_last_index FIXME
2260     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2261         for (i = 0; i < mb_block_count; i++) {
2262             int j;
2263             if (s->block_last_index[i] > 0) {
2264                 for (j = 63; j > 0; j--) {
2265                     if (s->block[i][s->intra_scantable.permutated[j]])
2266                         break;
2267                 }
2268                 s->block_last_index[i] = j;
2269             }
2270         }
2271     }
2272
2273     /* huffman encode */
2274     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2275     case AV_CODEC_ID_MPEG1VIDEO:
2276     case AV_CODEC_ID_MPEG2VIDEO:
2277         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2278             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2279         break;
2280     case AV_CODEC_ID_MPEG4:
2281         if (CONFIG_MPEG4_ENCODER)
2282             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2283         break;
2284     case AV_CODEC_ID_MSMPEG4V2:
2285     case AV_CODEC_ID_MSMPEG4V3:
2286     case AV_CODEC_ID_WMV1:
2287         if (CONFIG_MSMPEG4_ENCODER)
2288             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2289         break;
2290     case AV_CODEC_ID_WMV2:
2291         if (CONFIG_WMV2_ENCODER)
2292             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2293         break;
2294     case AV_CODEC_ID_H261:
2295         if (CONFIG_H261_ENCODER)
2296             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2297         break;
2298     case AV_CODEC_ID_H263:
2299     case AV_CODEC_ID_H263P:
2300     case AV_CODEC_ID_FLV1:
2301     case AV_CODEC_ID_RV10:
2302     case AV_CODEC_ID_RV20:
2303         if (CONFIG_H263_ENCODER)
2304             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2305         break;
2306     case AV_CODEC_ID_MJPEG:
2307     case AV_CODEC_ID_AMV:
2308         if (CONFIG_MJPEG_ENCODER)
2309             ff_mjpeg_encode_mb(s, s->block);
2310         break;
2311     default:
2312         av_assert1(0);
2313     }
2314 }
2315
2316 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2317 {
2318     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2319     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2320     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2321 }
2322
2323 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2324     int i;
2325
2326     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2327
2328     /* mpeg1 */
2329     d->mb_skip_run= s->mb_skip_run;
2330     for(i=0; i<3; i++)
2331         d->last_dc[i] = s->last_dc[i];
2332
2333     /* statistics */
2334     d->mv_bits= s->mv_bits;
2335     d->i_tex_bits= s->i_tex_bits;
2336     d->p_tex_bits= s->p_tex_bits;
2337     d->i_count= s->i_count;
2338     d->f_count= s->f_count;
2339     d->b_count= s->b_count;
2340     d->skip_count= s->skip_count;
2341     d->misc_bits= s->misc_bits;
2342     d->last_bits= 0;
2343
2344     d->mb_skipped= 0;
2345     d->qscale= s->qscale;
2346     d->dquant= s->dquant;
2347
2348     d->esc3_level_length= s->esc3_level_length;
2349 }
2350
2351 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2352     int i;
2353
2354     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2355     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2356
2357     /* mpeg1 */
2358     d->mb_skip_run= s->mb_skip_run;
2359     for(i=0; i<3; i++)
2360         d->last_dc[i] = s->last_dc[i];
2361
2362     /* statistics */
2363     d->mv_bits= s->mv_bits;
2364     d->i_tex_bits= s->i_tex_bits;
2365     d->p_tex_bits= s->p_tex_bits;
2366     d->i_count= s->i_count;
2367     d->f_count= s->f_count;
2368     d->b_count= s->b_count;
2369     d->skip_count= s->skip_count;
2370     d->misc_bits= s->misc_bits;
2371
2372     d->mb_intra= s->mb_intra;
2373     d->mb_skipped= s->mb_skipped;
2374     d->mv_type= s->mv_type;
2375     d->mv_dir= s->mv_dir;
2376     d->pb= s->pb;
2377     if(s->data_partitioning){
2378         d->pb2= s->pb2;
2379         d->tex_pb= s->tex_pb;
2380     }
2381     d->block= s->block;
2382     for(i=0; i<8; i++)
2383         d->block_last_index[i]= s->block_last_index[i];
2384     d->interlaced_dct= s->interlaced_dct;
2385     d->qscale= s->qscale;
2386
2387     d->esc3_level_length= s->esc3_level_length;
2388 }
2389
2390 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2391                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2392                            int *dmin, int *next_block, int motion_x, int motion_y)
2393 {
2394     int score;
2395     uint8_t *dest_backup[3];
2396
2397     copy_context_before_encode(s, backup, type);
2398
2399     s->block= s->blocks[*next_block];
2400     s->pb= pb[*next_block];
2401     if(s->data_partitioning){
2402         s->pb2   = pb2   [*next_block];
2403         s->tex_pb= tex_pb[*next_block];
2404     }
2405
2406     if(*next_block){
2407         memcpy(dest_backup, s->dest, sizeof(s->dest));
2408         s->dest[0] = s->rd_scratchpad;
2409         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2410         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2411         assert(s->linesize >= 32); //FIXME
2412     }
2413
2414     encode_mb(s, motion_x, motion_y);
2415
2416     score= put_bits_count(&s->pb);
2417     if(s->data_partitioning){
2418         score+= put_bits_count(&s->pb2);
2419         score+= put_bits_count(&s->tex_pb);
2420     }
2421
2422     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2423         ff_MPV_decode_mb(s, s->block);
2424
2425         score *= s->lambda2;
2426         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2427     }
2428
2429     if(*next_block){
2430         memcpy(s->dest, dest_backup, sizeof(s->dest));
2431     }
2432
2433     if(score<*dmin){
2434         *dmin= score;
2435         *next_block^=1;
2436
2437         copy_context_after_encode(best, s, type);
2438     }
2439 }
2440
2441 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2442     uint32_t *sq = ff_squareTbl + 256;
2443     int acc=0;
2444     int x,y;
2445
2446     if(w==16 && h==16)
2447         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2448     else if(w==8 && h==8)
2449         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2450
2451     for(y=0; y<h; y++){
2452         for(x=0; x<w; x++){
2453             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2454         }
2455     }
2456
2457     av_assert2(acc>=0);
2458
2459     return acc;
2460 }
2461
2462 static int sse_mb(MpegEncContext *s){
2463     int w= 16;
2464     int h= 16;
2465
2466     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2467     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2468
2469     if(w==16 && h==16)
2470       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2471         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2472                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2473                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2474       }else{
2475         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2476                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2477                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2478       }
2479     else
2480         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2481                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2482                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2483 }
2484
2485 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2486     MpegEncContext *s= *(void**)arg;
2487
2488
2489     s->me.pre_pass=1;
2490     s->me.dia_size= s->avctx->pre_dia_size;
2491     s->first_slice_line=1;
2492     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2493         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2494             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2495         }
2496         s->first_slice_line=0;
2497     }
2498
2499     s->me.pre_pass=0;
2500
2501     return 0;
2502 }
2503
2504 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2505     MpegEncContext *s= *(void**)arg;
2506
2507     ff_check_alignment();
2508
2509     s->me.dia_size= s->avctx->dia_size;
2510     s->first_slice_line=1;
2511     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2512         s->mb_x=0; //for block init below
2513         ff_init_block_index(s);
2514         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2515             s->block_index[0]+=2;
2516             s->block_index[1]+=2;
2517             s->block_index[2]+=2;
2518             s->block_index[3]+=2;
2519
2520             /* compute motion vector & mb_type and store in context */
2521             if(s->pict_type==AV_PICTURE_TYPE_B)
2522                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2523             else
2524                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2525         }
2526         s->first_slice_line=0;
2527     }
2528     return 0;
2529 }
2530
2531 static int mb_var_thread(AVCodecContext *c, void *arg){
2532     MpegEncContext *s= *(void**)arg;
2533     int mb_x, mb_y;
2534
2535     ff_check_alignment();
2536
2537     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2538         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2539             int xx = mb_x * 16;
2540             int yy = mb_y * 16;
2541             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2542             int varc;
2543             int sum = s->dsp.pix_sum(pix, s->linesize);
2544
2545             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2546
2547             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2548             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2549             s->me.mb_var_sum_temp    += varc;
2550         }
2551     }
2552     return 0;
2553 }
2554
2555 static void write_slice_end(MpegEncContext *s){
2556     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2557         if(s->partitioned_frame){
2558             ff_mpeg4_merge_partitions(s);
2559         }
2560
2561         ff_mpeg4_stuffing(&s->pb);
2562     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2563         ff_mjpeg_encode_stuffing(s);
2564     }
2565
2566     avpriv_align_put_bits(&s->pb);
2567     flush_put_bits(&s->pb);
2568
2569     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2570         s->misc_bits+= get_bits_diff(s);
2571 }
2572
2573 static void write_mb_info(MpegEncContext *s)
2574 {
2575     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2576     int offset = put_bits_count(&s->pb);
2577     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2578     int gobn = s->mb_y / s->gob_index;
2579     int pred_x, pred_y;
2580     if (CONFIG_H263_ENCODER)
2581         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2582     bytestream_put_le32(&ptr, offset);
2583     bytestream_put_byte(&ptr, s->qscale);
2584     bytestream_put_byte(&ptr, gobn);
2585     bytestream_put_le16(&ptr, mba);
2586     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2587     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2588     /* 4MV not implemented */
2589     bytestream_put_byte(&ptr, 0); /* hmv2 */
2590     bytestream_put_byte(&ptr, 0); /* vmv2 */
2591 }
2592
2593 static void update_mb_info(MpegEncContext *s, int startcode)
2594 {
2595     if (!s->mb_info)
2596         return;
2597     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2598         s->mb_info_size += 12;
2599         s->prev_mb_info = s->last_mb_info;
2600     }
2601     if (startcode) {
2602         s->prev_mb_info = put_bits_count(&s->pb)/8;
2603         /* This might have incremented mb_info_size above, and we return without
2604          * actually writing any info into that slot yet. But in that case,
2605          * this will be called again at the start of the after writing the
2606          * start code, actually writing the mb info. */
2607         return;
2608     }
2609
2610     s->last_mb_info = put_bits_count(&s->pb)/8;
2611     if (!s->mb_info_size)
2612         s->mb_info_size += 12;
2613     write_mb_info(s);
2614 }
2615
2616 static int encode_thread(AVCodecContext *c, void *arg){
2617     MpegEncContext *s= *(void**)arg;
2618     int mb_x, mb_y, pdif = 0;
2619     int chr_h= 16>>s->chroma_y_shift;
2620     int i, j;
2621     MpegEncContext best_s, backup_s;
2622     uint8_t bit_buf[2][MAX_MB_BYTES];
2623     uint8_t bit_buf2[2][MAX_MB_BYTES];
2624     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2625     PutBitContext pb[2], pb2[2], tex_pb[2];
2626
2627     ff_check_alignment();
2628
2629     for(i=0; i<2; i++){
2630         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2631         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2632         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2633     }
2634
2635     s->last_bits= put_bits_count(&s->pb);
2636     s->mv_bits=0;
2637     s->misc_bits=0;
2638     s->i_tex_bits=0;
2639     s->p_tex_bits=0;
2640     s->i_count=0;
2641     s->f_count=0;
2642     s->b_count=0;
2643     s->skip_count=0;
2644
2645     for(i=0; i<3; i++){
2646         /* init last dc values */
2647         /* note: quant matrix value (8) is implied here */
2648         s->last_dc[i] = 128 << s->intra_dc_precision;
2649
2650         s->current_picture.f.error[i] = 0;
2651     }
2652     if(s->codec_id==AV_CODEC_ID_AMV){
2653         s->last_dc[0] = 128*8/13;
2654         s->last_dc[1] = 128*8/14;
2655         s->last_dc[2] = 128*8/14;
2656     }
2657     s->mb_skip_run = 0;
2658     memset(s->last_mv, 0, sizeof(s->last_mv));
2659
2660     s->last_mv_dir = 0;
2661
2662     switch(s->codec_id){
2663     case AV_CODEC_ID_H263:
2664     case AV_CODEC_ID_H263P:
2665     case AV_CODEC_ID_FLV1:
2666         if (CONFIG_H263_ENCODER)
2667             s->gob_index = ff_h263_get_gob_height(s);
2668         break;
2669     case AV_CODEC_ID_MPEG4:
2670         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2671             ff_mpeg4_init_partitions(s);
2672         break;
2673     }
2674
2675     s->resync_mb_x=0;
2676     s->resync_mb_y=0;
2677     s->first_slice_line = 1;
2678     s->ptr_lastgob = s->pb.buf;
2679     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2680         s->mb_x=0;
2681         s->mb_y= mb_y;
2682
2683         ff_set_qscale(s, s->qscale);
2684         ff_init_block_index(s);
2685
2686         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2687             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2688             int mb_type= s->mb_type[xy];
2689 //            int d;
2690             int dmin= INT_MAX;
2691             int dir;
2692
2693             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2694                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2695                 return -1;
2696             }
2697             if(s->data_partitioning){
2698                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2699                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2700                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2701                     return -1;
2702                 }
2703             }
2704
2705             s->mb_x = mb_x;
2706             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2707             ff_update_block_index(s);
2708
2709             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2710                 ff_h261_reorder_mb_index(s);
2711                 xy= s->mb_y*s->mb_stride + s->mb_x;
2712                 mb_type= s->mb_type[xy];
2713             }
2714
2715             /* write gob / video packet header  */
2716             if(s->rtp_mode){
2717                 int current_packet_size, is_gob_start;
2718
2719                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2720
2721                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2722
2723                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2724
2725                 switch(s->codec_id){
2726                 case AV_CODEC_ID_H263:
2727                 case AV_CODEC_ID_H263P:
2728                     if(!s->h263_slice_structured)
2729                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2730                     break;
2731                 case AV_CODEC_ID_MPEG2VIDEO:
2732                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2733                 case AV_CODEC_ID_MPEG1VIDEO:
2734                     if(s->mb_skip_run) is_gob_start=0;
2735                     break;
2736                 case AV_CODEC_ID_MJPEG:
2737                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2738                     break;
2739                 }
2740
2741                 if(is_gob_start){
2742                     if(s->start_mb_y != mb_y || mb_x!=0){
2743                         write_slice_end(s);
2744
2745                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2746                             ff_mpeg4_init_partitions(s);
2747                         }
2748                     }
2749
2750                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2751                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2752
2753                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2754                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2755                         int d = 100 / s->error_rate;
2756                         if(r % d == 0){
2757                             current_packet_size=0;
2758                             s->pb.buf_ptr= s->ptr_lastgob;
2759                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2760                         }
2761                     }
2762
2763                     if (s->avctx->rtp_callback){
2764                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2765                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2766                     }
2767                     update_mb_info(s, 1);
2768
2769                     switch(s->codec_id){
2770                     case AV_CODEC_ID_MPEG4:
2771                         if (CONFIG_MPEG4_ENCODER) {
2772                             ff_mpeg4_encode_video_packet_header(s);
2773                             ff_mpeg4_clean_buffers(s);
2774                         }
2775                     break;
2776                     case AV_CODEC_ID_MPEG1VIDEO:
2777                     case AV_CODEC_ID_MPEG2VIDEO:
2778                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2779                             ff_mpeg1_encode_slice_header(s);
2780                             ff_mpeg1_clean_buffers(s);
2781                         }
2782                     break;
2783                     case AV_CODEC_ID_H263:
2784                     case AV_CODEC_ID_H263P:
2785                         if (CONFIG_H263_ENCODER)
2786                             ff_h263_encode_gob_header(s, mb_y);
2787                     break;
2788                     }
2789
2790                     if(s->flags&CODEC_FLAG_PASS1){
2791                         int bits= put_bits_count(&s->pb);
2792                         s->misc_bits+= bits - s->last_bits;
2793                         s->last_bits= bits;
2794                     }
2795
2796                     s->ptr_lastgob += current_packet_size;
2797                     s->first_slice_line=1;
2798                     s->resync_mb_x=mb_x;
2799                     s->resync_mb_y=mb_y;
2800                 }
2801             }
2802
2803             if(  (s->resync_mb_x   == s->mb_x)
2804                && s->resync_mb_y+1 == s->mb_y){
2805                 s->first_slice_line=0;
2806             }
2807
2808             s->mb_skipped=0;
2809             s->dquant=0; //only for QP_RD
2810
2811             update_mb_info(s, 0);
2812
2813             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2814                 int next_block=0;
2815                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2816
2817                 copy_context_before_encode(&backup_s, s, -1);
2818                 backup_s.pb= s->pb;
2819                 best_s.data_partitioning= s->data_partitioning;
2820                 best_s.partitioned_frame= s->partitioned_frame;
2821                 if(s->data_partitioning){
2822                     backup_s.pb2= s->pb2;
2823                     backup_s.tex_pb= s->tex_pb;
2824                 }
2825
2826                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2827                     s->mv_dir = MV_DIR_FORWARD;
2828                     s->mv_type = MV_TYPE_16X16;
2829                     s->mb_intra= 0;
2830                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2831                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2832                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2833                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2834                 }
2835                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2836                     s->mv_dir = MV_DIR_FORWARD;
2837                     s->mv_type = MV_TYPE_FIELD;
2838                     s->mb_intra= 0;
2839                     for(i=0; i<2; i++){
2840                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2841                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2842                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2843                     }
2844                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2845                                  &dmin, &next_block, 0, 0);
2846                 }
2847                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2848                     s->mv_dir = MV_DIR_FORWARD;
2849                     s->mv_type = MV_TYPE_16X16;
2850                     s->mb_intra= 0;
2851                     s->mv[0][0][0] = 0;
2852                     s->mv[0][0][1] = 0;
2853                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2854                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2855                 }
2856                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2857                     s->mv_dir = MV_DIR_FORWARD;
2858                     s->mv_type = MV_TYPE_8X8;
2859                     s->mb_intra= 0;
2860                     for(i=0; i<4; i++){
2861                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2862                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2863                     }
2864                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2865                                  &dmin, &next_block, 0, 0);
2866                 }
2867                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2868                     s->mv_dir = MV_DIR_FORWARD;
2869                     s->mv_type = MV_TYPE_16X16;
2870                     s->mb_intra= 0;
2871                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2872                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2873                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2874                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2875                 }
2876                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2877                     s->mv_dir = MV_DIR_BACKWARD;
2878                     s->mv_type = MV_TYPE_16X16;
2879                     s->mb_intra= 0;
2880                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2881                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2882                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2883                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2884                 }
2885                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2886                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2887                     s->mv_type = MV_TYPE_16X16;
2888                     s->mb_intra= 0;
2889                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2890                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2891                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2892                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2893                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2894                                  &dmin, &next_block, 0, 0);
2895                 }
2896                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2897                     s->mv_dir = MV_DIR_FORWARD;
2898                     s->mv_type = MV_TYPE_FIELD;
2899                     s->mb_intra= 0;
2900                     for(i=0; i<2; i++){
2901                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2902                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2903                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2904                     }
2905                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2906                                  &dmin, &next_block, 0, 0);
2907                 }
2908                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2909                     s->mv_dir = MV_DIR_BACKWARD;
2910                     s->mv_type = MV_TYPE_FIELD;
2911                     s->mb_intra= 0;
2912                     for(i=0; i<2; i++){
2913                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2914                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2915                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2916                     }
2917                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2918                                  &dmin, &next_block, 0, 0);
2919                 }
2920                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2921                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2922                     s->mv_type = MV_TYPE_FIELD;
2923                     s->mb_intra= 0;
2924                     for(dir=0; dir<2; dir++){
2925                         for(i=0; i<2; i++){
2926                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2927                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2928                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2929                         }
2930                     }
2931                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2932                                  &dmin, &next_block, 0, 0);
2933                 }
2934                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2935                     s->mv_dir = 0;
2936                     s->mv_type = MV_TYPE_16X16;
2937                     s->mb_intra= 1;
2938                     s->mv[0][0][0] = 0;
2939                     s->mv[0][0][1] = 0;
2940                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2941                                  &dmin, &next_block, 0, 0);
2942                     if(s->h263_pred || s->h263_aic){
2943                         if(best_s.mb_intra)
2944                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2945                         else
2946                             ff_clean_intra_table_entries(s); //old mode?
2947                     }
2948                 }
2949
2950                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2951                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2952                         const int last_qp= backup_s.qscale;
2953                         int qpi, qp, dc[6];
2954                         int16_t ac[6][16];
2955                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2956                         static const int dquant_tab[4]={-1,1,-2,2};
2957                         int storecoefs = s->mb_intra && s->dc_val[0];
2958
2959                         av_assert2(backup_s.dquant == 0);
2960
2961                         //FIXME intra
2962                         s->mv_dir= best_s.mv_dir;
2963                         s->mv_type = MV_TYPE_16X16;
2964                         s->mb_intra= best_s.mb_intra;
2965                         s->mv[0][0][0] = best_s.mv[0][0][0];
2966                         s->mv[0][0][1] = best_s.mv[0][0][1];
2967                         s->mv[1][0][0] = best_s.mv[1][0][0];
2968                         s->mv[1][0][1] = best_s.mv[1][0][1];
2969
2970                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2971                         for(; qpi<4; qpi++){
2972                             int dquant= dquant_tab[qpi];
2973                             qp= last_qp + dquant;
2974                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2975                                 continue;
2976                             backup_s.dquant= dquant;
2977                             if(storecoefs){
2978                                 for(i=0; i<6; i++){
2979                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2980                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2981                                 }
2982                             }
2983
2984                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2985                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2986                             if(best_s.qscale != qp){
2987                                 if(storecoefs){
2988                                     for(i=0; i<6; i++){
2989                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2990                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2991                                     }
2992                                 }
2993                             }
2994                         }
2995                     }
2996                 }
2997                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2998                     int mx= s->b_direct_mv_table[xy][0];
2999                     int my= s->b_direct_mv_table[xy][1];
3000
3001                     backup_s.dquant = 0;
3002                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3003                     s->mb_intra= 0;
3004                     ff_mpeg4_set_direct_mv(s, mx, my);
3005                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3006                                  &dmin, &next_block, mx, my);
3007                 }
3008                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3009                     backup_s.dquant = 0;
3010                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3011                     s->mb_intra= 0;
3012                     ff_mpeg4_set_direct_mv(s, 0, 0);
3013                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3014                                  &dmin, &next_block, 0, 0);
3015                 }
3016                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3017                     int coded=0;
3018                     for(i=0; i<6; i++)
3019                         coded |= s->block_last_index[i];
3020                     if(coded){
3021                         int mx,my;
3022                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3023                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3024                             mx=my=0; //FIXME find the one we actually used
3025                             ff_mpeg4_set_direct_mv(s, mx, my);
3026                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3027                             mx= s->mv[1][0][0];
3028                             my= s->mv[1][0][1];
3029                         }else{
3030                             mx= s->mv[0][0][0];
3031                             my= s->mv[0][0][1];
3032                         }
3033
3034                         s->mv_dir= best_s.mv_dir;
3035                         s->mv_type = best_s.mv_type;
3036                         s->mb_intra= 0;
3037 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3038                         s->mv[0][0][1] = best_s.mv[0][0][1];
3039                         s->mv[1][0][0] = best_s.mv[1][0][0];
3040                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3041                         backup_s.dquant= 0;
3042                         s->skipdct=1;
3043                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3044                                         &dmin, &next_block, mx, my);
3045                         s->skipdct=0;
3046                     }
3047                 }
3048
3049                 s->current_picture.qscale_table[xy] = best_s.qscale;
3050
3051                 copy_context_after_encode(s, &best_s, -1);
3052
3053                 pb_bits_count= put_bits_count(&s->pb);
3054                 flush_put_bits(&s->pb);
3055                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3056                 s->pb= backup_s.pb;
3057
3058                 if(s->data_partitioning){
3059                     pb2_bits_count= put_bits_count(&s->pb2);
3060                     flush_put_bits(&s->pb2);
3061                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3062                     s->pb2= backup_s.pb2;
3063
3064                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3065                     flush_put_bits(&s->tex_pb);
3066                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3067                     s->tex_pb= backup_s.tex_pb;
3068                 }
3069                 s->last_bits= put_bits_count(&s->pb);
3070
3071                 if (CONFIG_H263_ENCODER &&
3072                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3073                     ff_h263_update_motion_val(s);
3074
3075                 if(next_block==0){ //FIXME 16 vs linesize16
3076                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3077                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3078                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3079                 }
3080
3081                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3082                     ff_MPV_decode_mb(s, s->block);
3083             } else {
3084                 int motion_x = 0, motion_y = 0;
3085                 s->mv_type=MV_TYPE_16X16;
3086                 // only one MB-Type possible
3087
3088                 switch(mb_type){
3089                 case CANDIDATE_MB_TYPE_INTRA:
3090                     s->mv_dir = 0;
3091                     s->mb_intra= 1;
3092                     motion_x= s->mv[0][0][0] = 0;
3093                     motion_y= s->mv[0][0][1] = 0;
3094                     break;
3095                 case CANDIDATE_MB_TYPE_INTER:
3096                     s->mv_dir = MV_DIR_FORWARD;
3097                     s->mb_intra= 0;
3098                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3099                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3100                     break;
3101                 case CANDIDATE_MB_TYPE_INTER_I:
3102                     s->mv_dir = MV_DIR_FORWARD;
3103                     s->mv_type = MV_TYPE_FIELD;
3104                     s->mb_intra= 0;
3105                     for(i=0; i<2; i++){
3106                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3107                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3108                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3109                     }
3110                     break;
3111                 case CANDIDATE_MB_TYPE_INTER4V:
3112                     s->mv_dir = MV_DIR_FORWARD;
3113                     s->mv_type = MV_TYPE_8X8;
3114                     s->mb_intra= 0;
3115                     for(i=0; i<4; i++){
3116                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3117                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3118                     }
3119                     break;
3120                 case CANDIDATE_MB_TYPE_DIRECT:
3121                     if (CONFIG_MPEG4_ENCODER) {
3122                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3123                         s->mb_intra= 0;
3124                         motion_x=s->b_direct_mv_table[xy][0];
3125                         motion_y=s->b_direct_mv_table[xy][1];
3126                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3127                     }
3128                     break;
3129                 case CANDIDATE_MB_TYPE_DIRECT0:
3130                     if (CONFIG_MPEG4_ENCODER) {
3131                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3132                         s->mb_intra= 0;
3133                         ff_mpeg4_set_direct_mv(s, 0, 0);
3134                     }
3135                     break;
3136                 case CANDIDATE_MB_TYPE_BIDIR:
3137                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3138                     s->mb_intra= 0;
3139                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3140                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3141                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3142                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3143                     break;
3144                 case CANDIDATE_MB_TYPE_BACKWARD:
3145                     s->mv_dir = MV_DIR_BACKWARD;
3146                     s->mb_intra= 0;
3147                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3148                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3149                     break;
3150                 case CANDIDATE_MB_TYPE_FORWARD:
3151                     s->mv_dir = MV_DIR_FORWARD;
3152                     s->mb_intra= 0;
3153                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3154                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3155                     break;
3156                 case CANDIDATE_MB_TYPE_FORWARD_I:
3157                     s->mv_dir = MV_DIR_FORWARD;
3158                     s->mv_type = MV_TYPE_FIELD;
3159                     s->mb_intra= 0;
3160                     for(i=0; i<2; i++){
3161                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3162                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3163                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3164                     }
3165                     break;
3166                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3167                     s->mv_dir = MV_DIR_BACKWARD;
3168                     s->mv_type = MV_TYPE_FIELD;
3169                     s->mb_intra= 0;
3170                     for(i=0; i<2; i++){
3171                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3172                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3173                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3174                     }
3175                     break;
3176                 case CANDIDATE_MB_TYPE_BIDIR_I:
3177                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3178                     s->mv_type = MV_TYPE_FIELD;
3179                     s->mb_intra= 0;
3180                     for(dir=0; dir<2; dir++){
3181                         for(i=0; i<2; i++){
3182                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3183                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3184                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3185                         }
3186                     }
3187                     break;
3188                 default:
3189                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3190                 }
3191
3192                 encode_mb(s, motion_x, motion_y);
3193
3194                 // RAL: Update last macroblock type
3195                 s->last_mv_dir = s->mv_dir;
3196
3197                 if (CONFIG_H263_ENCODER &&
3198                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3199                     ff_h263_update_motion_val(s);
3200
3201                 ff_MPV_decode_mb(s, s->block);
3202             }
3203
3204             /* clean the MV table in IPS frames for direct mode in B frames */
3205             if(s->mb_intra /* && I,P,S_TYPE */){
3206                 s->p_mv_table[xy][0]=0;
3207                 s->p_mv_table[xy][1]=0;
3208             }
3209
3210             if(s->flags&CODEC_FLAG_PSNR){
3211                 int w= 16;
3212                 int h= 16;
3213
3214                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3215                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3216
3217                 s->current_picture.f.error[0] += sse(
3218                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3219                     s->dest[0], w, h, s->linesize);
3220                 s->current_picture.f.error[1] += sse(
3221                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3222                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3223                 s->current_picture.f.error[2] += sse(
3224                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3225                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3226             }
3227             if(s->loop_filter){
3228                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3229                     ff_h263_loop_filter(s);
3230             }
3231             av_dlog(s->avctx, "MB %d %d bits\n",
3232                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3233         }
3234     }
3235
3236     //not beautiful here but we must write it before flushing so it has to be here
3237     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3238         ff_msmpeg4_encode_ext_header(s);
3239
3240     write_slice_end(s);
3241
3242     /* Send the last GOB if RTP */
3243     if (s->avctx->rtp_callback) {
3244         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3245         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3246         /* Call the RTP callback to send the last GOB */
3247         emms_c();
3248         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3249     }
3250
3251     return 0;
3252 }
3253
3254 #define MERGE(field) dst->field += src->field; src->field=0
3255 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3256     MERGE(me.scene_change_score);
3257     MERGE(me.mc_mb_var_sum_temp);
3258     MERGE(me.mb_var_sum_temp);
3259 }
3260
3261 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3262     int i;
3263
3264     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3265     MERGE(dct_count[1]);
3266     MERGE(mv_bits);
3267     MERGE(i_tex_bits);
3268     MERGE(p_tex_bits);
3269     MERGE(i_count);
3270     MERGE(f_count);
3271     MERGE(b_count);
3272     MERGE(skip_count);
3273     MERGE(misc_bits);
3274     MERGE(er.error_count);
3275     MERGE(padding_bug_score);
3276     MERGE(current_picture.f.error[0]);
3277     MERGE(current_picture.f.error[1]);
3278     MERGE(current_picture.f.error[2]);
3279
3280     if(dst->avctx->noise_reduction){
3281         for(i=0; i<64; i++){
3282             MERGE(dct_error_sum[0][i]);
3283             MERGE(dct_error_sum[1][i]);
3284         }
3285     }
3286
3287     assert(put_bits_count(&src->pb) % 8 ==0);
3288     assert(put_bits_count(&dst->pb) % 8 ==0);
3289     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3290     flush_put_bits(&dst->pb);
3291 }
3292
3293 static int estimate_qp(MpegEncContext *s, int dry_run){
3294     if (s->next_lambda){
3295         s->current_picture_ptr->f.quality =
3296         s->current_picture.f.quality = s->next_lambda;
3297         if(!dry_run) s->next_lambda= 0;
3298     } else if (!s->fixed_qscale) {
3299         s->current_picture_ptr->f.quality =
3300         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3301         if (s->current_picture.f.quality < 0)
3302             return -1;
3303     }
3304
3305     if(s->adaptive_quant){
3306         switch(s->codec_id){
3307         case AV_CODEC_ID_MPEG4:
3308             if (CONFIG_MPEG4_ENCODER)
3309                 ff_clean_mpeg4_qscales(s);
3310             break;
3311         case AV_CODEC_ID_H263:
3312         case AV_CODEC_ID_H263P:
3313         case AV_CODEC_ID_FLV1:
3314             if (CONFIG_H263_ENCODER)
3315                 ff_clean_h263_qscales(s);
3316             break;
3317         default:
3318             ff_init_qscale_tab(s);
3319         }
3320
3321         s->lambda= s->lambda_table[0];
3322         //FIXME broken
3323     }else
3324         s->lambda = s->current_picture.f.quality;
3325     update_qscale(s);
3326     return 0;
3327 }
3328
3329 /* must be called before writing the header */
3330 static void set_frame_distances(MpegEncContext * s){
3331     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3332     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3333
3334     if(s->pict_type==AV_PICTURE_TYPE_B){
3335         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3336         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3337     }else{
3338         s->pp_time= s->time - s->last_non_b_time;
3339         s->last_non_b_time= s->time;
3340         assert(s->picture_number==0 || s->pp_time > 0);
3341     }
3342 }
3343
3344 static int encode_picture(MpegEncContext *s, int picture_number)
3345 {
3346     int i, ret;
3347     int bits;
3348     int context_count = s->slice_context_count;
3349
3350     s->picture_number = picture_number;
3351
3352     /* Reset the average MB variance */
3353     s->me.mb_var_sum_temp    =
3354     s->me.mc_mb_var_sum_temp = 0;
3355
3356     /* we need to initialize some time vars before we can encode b-frames */
3357     // RAL: Condition added for MPEG1VIDEO
3358     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3359         set_frame_distances(s);
3360     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3361         ff_set_mpeg4_time(s);
3362
3363     s->me.scene_change_score=0;
3364
3365 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3366
3367     if(s->pict_type==AV_PICTURE_TYPE_I){
3368         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3369         else                        s->no_rounding=0;
3370     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3371         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3372             s->no_rounding ^= 1;
3373     }
3374
3375     if(s->flags & CODEC_FLAG_PASS2){
3376         if (estimate_qp(s,1) < 0)
3377             return -1;
3378         ff_get_2pass_fcode(s);
3379     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3380         if(s->pict_type==AV_PICTURE_TYPE_B)
3381             s->lambda= s->last_lambda_for[s->pict_type];
3382         else
3383             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3384         update_qscale(s);
3385     }
3386
3387     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3388         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3389         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3390         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3391         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3392     }
3393
3394     s->mb_intra=0; //for the rate distortion & bit compare functions
3395     for(i=1; i<context_count; i++){
3396         ret = ff_update_duplicate_context(s->thread_context[i], s);
3397         if (ret < 0)
3398             return ret;
3399     }
3400
3401     if(ff_init_me(s)<0)
3402         return -1;
3403
3404     /* Estimate motion for every MB */
3405     if(s->pict_type != AV_PICTURE_TYPE_I){
3406         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3407         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3408         if (s->pict_type != AV_PICTURE_TYPE_B) {
3409             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3410                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3411             }
3412         }
3413
3414         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3415     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3416         /* I-Frame */
3417         for(i=0; i<s->mb_stride*s->mb_height; i++)
3418             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3419
3420         if(!s->fixed_qscale){
3421             /* finding spatial complexity for I-frame rate control */
3422             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3423         }
3424     }
3425     for(i=1; i<context_count; i++){
3426         merge_context_after_me(s, s->thread_context[i]);
3427     }
3428     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3429     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3430     emms_c();
3431
3432     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3433         s->pict_type= AV_PICTURE_TYPE_I;
3434         for(i=0; i<s->mb_stride*s->mb_height; i++)
3435             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3436         if(s->msmpeg4_version >= 3)
3437             s->no_rounding=1;
3438         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3439                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3440     }
3441
3442     if(!s->umvplus){
3443         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3444             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3445
3446             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3447                 int a,b;
3448                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3449                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3450                 s->f_code= FFMAX3(s->f_code, a, b);
3451             }
3452
3453             ff_fix_long_p_mvs(s);
3454             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3455             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3456                 int j;
3457                 for(i=0; i<2; i++){
3458                     for(j=0; j<2; j++)
3459                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3460                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3461                 }
3462             }
3463         }
3464
3465         if(s->pict_type==AV_PICTURE_TYPE_B){
3466             int a, b;
3467
3468             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3469             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3470             s->f_code = FFMAX(a, b);
3471
3472             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3473             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3474             s->b_code = FFMAX(a, b);
3475
3476             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3477             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3478             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3479             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3480             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3481                 int dir, j;
3482                 for(dir=0; dir<2; dir++){
3483                     for(i=0; i<2; i++){
3484                         for(j=0; j<2; j++){
3485                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3486                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3487                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3488                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3489                         }
3490                     }
3491                 }
3492             }
3493         }
3494     }
3495
3496     if (estimate_qp(s, 0) < 0)
3497         return -1;
3498
3499     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3500         s->qscale= 3; //reduce clipping problems
3501
3502     if (s->out_format == FMT_MJPEG) {
3503         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3504         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3505
3506         if (s->avctx->intra_matrix) {
3507             chroma_matrix =
3508             luma_matrix = s->avctx->intra_matrix;
3509         }
3510
3511         /* for mjpeg, we do include qscale in the matrix */
3512         for(i=1;i<64;i++){
3513             int j= s->dsp.idct_permutation[i];
3514
3515             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3516             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3517         }
3518         s->y_dc_scale_table=
3519         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3520         s->chroma_intra_matrix[0] =
3521         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3522         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3523                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3524         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3525                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3526         s->qscale= 8;
3527     }
3528     if(s->codec_id == AV_CODEC_ID_AMV){
3529         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3530         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3531         for(i=1;i<64;i++){
3532             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3533
3534             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3535             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3536         }
3537         s->y_dc_scale_table= y;
3538         s->c_dc_scale_table= c;
3539         s->intra_matrix[0] = 13;
3540         s->chroma_intra_matrix[0] = 14;
3541         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3542                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3543         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3544                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3545         s->qscale= 8;
3546     }
3547
3548     //FIXME var duplication
3549     s->current_picture_ptr->f.key_frame =
3550     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3551     s->current_picture_ptr->f.pict_type =
3552     s->current_picture.f.pict_type = s->pict_type;
3553
3554     if (s->current_picture.f.key_frame)
3555         s->picture_in_gop_number=0;
3556
3557     s->mb_x = s->mb_y = 0;
3558     s->last_bits= put_bits_count(&s->pb);
3559     switch(s->out_format) {
3560     case FMT_MJPEG:
3561         if (CONFIG_MJPEG_ENCODER)
3562             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3563                                            s->intra_matrix, s->chroma_intra_matrix);
3564         break;
3565     case FMT_H261:
3566         if (CONFIG_H261_ENCODER)
3567             ff_h261_encode_picture_header(s, picture_number);
3568         break;
3569     case FMT_H263:
3570         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3571             ff_wmv2_encode_picture_header(s, picture_number);
3572         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3573             ff_msmpeg4_encode_picture_header(s, picture_number);
3574         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3575             ff_mpeg4_encode_picture_header(s, picture_number);
3576         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3577             ff_rv10_encode_picture_header(s, picture_number);
3578         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3579             ff_rv20_encode_picture_header(s, picture_number);
3580         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3581             ff_flv_encode_picture_header(s, picture_number);
3582         else if (CONFIG_H263_ENCODER)
3583             ff_h263_encode_picture_header(s, picture_number);
3584         break;
3585     case FMT_MPEG1:
3586         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3587             ff_mpeg1_encode_picture_header(s, picture_number);
3588         break;
3589     default:
3590         av_assert0(0);
3591     }
3592     bits= put_bits_count(&s->pb);
3593     s->header_bits= bits - s->last_bits;
3594
3595     for(i=1; i<context_count; i++){
3596         update_duplicate_context_after_me(s->thread_context[i], s);
3597     }
3598     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3599     for(i=1; i<context_count; i++){
3600         merge_context_after_encode(s, s->thread_context[i]);
3601     }
3602     emms_c();
3603     return 0;
3604 }
3605
3606 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3607     const int intra= s->mb_intra;
3608     int i;
3609
3610     s->dct_count[intra]++;
3611
3612     for(i=0; i<64; i++){
3613         int level= block[i];
3614
3615         if(level){
3616             if(level>0){
3617                 s->dct_error_sum[intra][i] += level;
3618                 level -= s->dct_offset[intra][i];
3619                 if(level<0) level=0;
3620             }else{
3621                 s->dct_error_sum[intra][i] -= level;
3622                 level += s->dct_offset[intra][i];
3623                 if(level>0) level=0;
3624             }
3625             block[i]= level;
3626         }
3627     }
3628 }
3629
3630 static int dct_quantize_trellis_c(MpegEncContext *s,
3631                                   int16_t *block, int n,
3632                                   int qscale, int *overflow){
3633     const int *qmat;
3634     const uint8_t *scantable= s->intra_scantable.scantable;
3635     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3636     int max=0;
3637     unsigned int threshold1, threshold2;
3638     int bias=0;
3639     int run_tab[65];
3640     int level_tab[65];
3641     int score_tab[65];
3642     int survivor[65];
3643     int survivor_count;
3644     int last_run=0;
3645     int last_level=0;
3646     int last_score= 0;
3647     int last_i;
3648     int coeff[2][64];
3649     int coeff_count[64];
3650     int qmul, qadd, start_i, last_non_zero, i, dc;
3651     const int esc_length= s->ac_esc_length;
3652     uint8_t * length;
3653     uint8_t * last_length;
3654     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3655
3656     s->dsp.fdct (block);
3657
3658     if(s->dct_error_sum)
3659         s->denoise_dct(s, block);
3660     qmul= qscale*16;
3661     qadd= ((qscale-1)|1)*8;
3662
3663     if (s->mb_intra) {
3664         int q;
3665         if (!s->h263_aic) {
3666             if (n < 4)
3667                 q = s->y_dc_scale;
3668             else
3669                 q = s->c_dc_scale;
3670             q = q << 3;
3671         } else{
3672             /* For AIC we skip quant/dequant of INTRADC */
3673             q = 1 << 3;
3674             qadd=0;
3675         }
3676
3677         /* note: block[0] is assumed to be positive */
3678         block[0] = (block[0] + (q >> 1)) / q;
3679         start_i = 1;
3680         last_non_zero = 0;
3681         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3682         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3683             bias= 1<<(QMAT_SHIFT-1);
3684         length     = s->intra_ac_vlc_length;
3685         last_length= s->intra_ac_vlc_last_length;
3686     } else {
3687         start_i = 0;
3688         last_non_zero = -1;
3689         qmat = s->q_inter_matrix[qscale];
3690         length     = s->inter_ac_vlc_length;
3691         last_length= s->inter_ac_vlc_last_length;
3692     }
3693     last_i= start_i;
3694
3695     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3696     threshold2= (threshold1<<1);
3697
3698     for(i=63; i>=start_i; i--) {
3699         const int j = scantable[i];
3700         int level = block[j] * qmat[j];
3701
3702         if(((unsigned)(level+threshold1))>threshold2){
3703             last_non_zero = i;
3704             break;
3705         }
3706     }
3707
3708     for(i=start_i; i<=last_non_zero; i++) {
3709         const int j = scantable[i];
3710         int level = block[j] * qmat[j];
3711
3712 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3713 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3714         if(((unsigned)(level+threshold1))>threshold2){
3715             if(level>0){
3716                 level= (bias + level)>>QMAT_SHIFT;
3717                 coeff[0][i]= level;
3718                 coeff[1][i]= level-1;
3719 //                coeff[2][k]= level-2;
3720             }else{
3721                 level= (bias - level)>>QMAT_SHIFT;
3722                 coeff[0][i]= -level;
3723                 coeff[1][i]= -level+1;
3724 //                coeff[2][k]= -level+2;
3725             }
3726             coeff_count[i]= FFMIN(level, 2);
3727             av_assert2(coeff_count[i]);
3728             max |=level;
3729         }else{
3730             coeff[0][i]= (level>>31)|1;
3731             coeff_count[i]= 1;
3732         }
3733     }
3734
3735     *overflow= s->max_qcoeff < max; //overflow might have happened
3736
3737     if(last_non_zero < start_i){
3738         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3739         return last_non_zero;
3740     }
3741
3742     score_tab[start_i]= 0;
3743     survivor[0]= start_i;
3744     survivor_count= 1;
3745
3746     for(i=start_i; i<=last_non_zero; i++){
3747         int level_index, j, zero_distortion;
3748         int dct_coeff= FFABS(block[ scantable[i] ]);
3749         int best_score=256*256*256*120;
3750
3751         if (s->dsp.fdct == ff_fdct_ifast)
3752             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3753         zero_distortion= dct_coeff*dct_coeff;
3754
3755         for(level_index=0; level_index < coeff_count[i]; level_index++){
3756             int distortion;
3757             int level= coeff[level_index][i];
3758             const int alevel= FFABS(level);
3759             int unquant_coeff;
3760
3761             av_assert2(level);
3762
3763             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3764                 unquant_coeff= alevel*qmul + qadd;
3765             }else{ //MPEG1
3766                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3767                 if(s->mb_intra){
3768                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3769                         unquant_coeff =   (unquant_coeff - 1) | 1;
3770                 }else{
3771                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3772                         unquant_coeff =   (unquant_coeff - 1) | 1;
3773                 }
3774                 unquant_coeff<<= 3;
3775             }
3776
3777             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3778             level+=64;
3779             if((level&(~127)) == 0){
3780                 for(j=survivor_count-1; j>=0; j--){
3781                     int run= i - survivor[j];
3782                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3783                     score += score_tab[i-run];
3784
3785                     if(score < best_score){
3786                         best_score= score;
3787                         run_tab[i+1]= run;
3788                         level_tab[i+1]= level-64;
3789                     }
3790                 }
3791
3792                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3793                     for(j=survivor_count-1; j>=0; j--){
3794                         int run= i - survivor[j];
3795                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3796                         score += score_tab[i-run];
3797                         if(score < last_score){
3798                             last_score= score;
3799                             last_run= run;
3800                             last_level= level-64;
3801                             last_i= i+1;
3802                         }
3803                     }
3804                 }
3805             }else{
3806                 distortion += esc_length*lambda;
3807                 for(j=survivor_count-1; j>=0; j--){
3808                     int run= i - survivor[j];
3809                     int score= distortion + score_tab[i-run];
3810
3811                     if(score < best_score){
3812                         best_score= score;
3813                         run_tab[i+1]= run;
3814                         level_tab[i+1]= level-64;
3815                     }
3816                 }
3817
3818                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3819                   for(j=survivor_count-1; j>=0; j--){
3820                         int run= i - survivor[j];
3821                         int score= distortion + score_tab[i-run];
3822                         if(score < last_score){
3823                             last_score= score;
3824                             last_run= run;
3825                             last_level= level-64;
3826                             last_i= i+1;
3827                         }
3828                     }
3829                 }
3830             }
3831         }
3832
3833         score_tab[i+1]= best_score;
3834
3835         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3836         if(last_non_zero <= 27){
3837             for(; survivor_count; survivor_count--){
3838                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3839                     break;
3840             }
3841         }else{
3842             for(; survivor_count; survivor_count--){
3843                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3844                     break;
3845             }
3846         }
3847
3848         survivor[ survivor_count++ ]= i+1;
3849     }
3850
3851     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3852         last_score= 256*256*256*120;
3853         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3854             int score= score_tab[i];
3855             if(i) score += lambda*2; //FIXME exacter?
3856
3857             if(score < last_score){
3858                 last_score= score;
3859                 last_i= i;
3860                 last_level= level_tab[i];
3861                 last_run= run_tab[i];
3862             }
3863         }
3864     }
3865
3866     s->coded_score[n] = last_score;
3867
3868     dc= FFABS(block[0]);
3869     last_non_zero= last_i - 1;
3870     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3871
3872     if(last_non_zero < start_i)
3873         return last_non_zero;
3874
3875     if(last_non_zero == 0 && start_i == 0){
3876         int best_level= 0;
3877         int best_score= dc * dc;
3878
3879         for(i=0; i<coeff_count[0]; i++){
3880             int level= coeff[i][0];
3881             int alevel= FFABS(level);
3882             int unquant_coeff, score, distortion;
3883
3884             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3885                     unquant_coeff= (alevel*qmul + qadd)>>3;
3886             }else{ //MPEG1
3887                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3888                     unquant_coeff =   (unquant_coeff - 1) | 1;
3889             }
3890             unquant_coeff = (unquant_coeff + 4) >> 3;
3891             unquant_coeff<<= 3 + 3;
3892
3893             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3894             level+=64;
3895             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3896             else                    score= distortion + esc_length*lambda;
3897
3898             if(score < best_score){
3899                 best_score= score;
3900                 best_level= level - 64;
3901             }
3902         }
3903         block[0]= best_level;
3904         s->coded_score[n] = best_score - dc*dc;
3905         if(best_level == 0) return -1;
3906         else                return last_non_zero;
3907     }
3908
3909     i= last_i;
3910     av_assert2(last_level);
3911
3912     block[ perm_scantable[last_non_zero] ]= last_level;
3913     i -= last_run + 1;
3914
3915     for(; i>start_i; i -= run_tab[i] + 1){
3916         block[ perm_scantable[i-1] ]= level_tab[i];
3917     }
3918
3919     return last_non_zero;
3920 }
3921
3922 //#define REFINE_STATS 1
3923 static int16_t basis[64][64];
3924
3925 static void build_basis(uint8_t *perm){
3926     int i, j, x, y;
3927     emms_c();
3928     for(i=0; i<8; i++){
3929         for(j=0; j<8; j++){
3930             for(y=0; y<8; y++){
3931                 for(x=0; x<8; x++){
3932                     double s= 0.25*(1<<BASIS_SHIFT);
3933                     int index= 8*i + j;
3934                     int perm_index= perm[index];
3935                     if(i==0) s*= sqrt(0.5);
3936                     if(j==0) s*= sqrt(0.5);
3937                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3938                 }
3939             }
3940         }
3941     }
3942 }
3943
3944 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3945                         int16_t *block, int16_t *weight, int16_t *orig,
3946                         int n, int qscale){
3947     int16_t rem[64];
3948     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3949     const uint8_t *scantable= s->intra_scantable.scantable;
3950     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3951 //    unsigned int threshold1, threshold2;
3952 //    int bias=0;
3953     int run_tab[65];
3954     int prev_run=0;
3955     int prev_level=0;
3956     int qmul, qadd, start_i, last_non_zero, i, dc;
3957     uint8_t * length;
3958     uint8_t * last_length;
3959     int lambda;
3960     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3961 #ifdef REFINE_STATS
3962 static int count=0;
3963 static int after_last=0;
3964 static int to_zero=0;
3965 static int from_zero=0;
3966 static int raise=0;
3967 static int lower=0;
3968 static int messed_sign=0;
3969 #endif
3970
3971     if(basis[0][0] == 0)
3972         build_basis(s->dsp.idct_permutation);
3973
3974     qmul= qscale*2;
3975     qadd= (qscale-1)|1;
3976     if (s->mb_intra) {
3977         if (!s->h263_aic) {
3978             if (n < 4)
3979                 q = s->y_dc_scale;
3980             else
3981                 q = s->c_dc_scale;
3982         } else{
3983             /* For AIC we skip quant/dequant of INTRADC */
3984             q = 1;
3985             qadd=0;
3986         }
3987         q <<= RECON_SHIFT-3;
3988         /* note: block[0] is assumed to be positive */
3989         dc= block[0]*q;
3990 //        block[0] = (block[0] + (q >> 1)) / q;
3991         start_i = 1;
3992 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3993 //            bias= 1<<(QMAT_SHIFT-1);
3994         length     = s->intra_ac_vlc_length;
3995         last_length= s->intra_ac_vlc_last_length;
3996     } else {
3997         dc= 0;
3998         start_i = 0;
3999         length     = s->inter_ac_vlc_length;
4000         last_length= s->inter_ac_vlc_last_length;
4001     }
4002     last_non_zero = s->block_last_index[n];
4003
4004 #ifdef REFINE_STATS
4005 {START_TIMER
4006 #endif
4007     dc += (1<<(RECON_SHIFT-1));
4008     for(i=0; i<64; i++){
4009         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4010     }
4011 #ifdef REFINE_STATS
4012 STOP_TIMER("memset rem[]")}
4013 #endif
4014     sum=0;
4015     for(i=0; i<64; i++){
4016         int one= 36;
4017         int qns=4;
4018         int w;
4019
4020         w= FFABS(weight[i]) + qns*one;
4021         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4022
4023         weight[i] = w;
4024 //        w=weight[i] = (63*qns + (w/2)) / w;
4025
4026         av_assert2(w>0);
4027         av_assert2(w<(1<<6));
4028         sum += w*w;
4029     }
4030     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4031 #ifdef REFINE_STATS
4032 {START_TIMER
4033 #endif
4034     run=0;
4035     rle_index=0;
4036     for(i=start_i; i<=last_non_zero; i++){
4037         int j= perm_scantable[i];
4038         const int level= block[j];
4039         int coeff;
4040
4041         if(level){
4042             if(level<0) coeff= qmul*level - qadd;
4043             else        coeff= qmul*level + qadd;
4044             run_tab[rle_index++]=run;
4045             run=0;
4046
4047             s->dsp.add_8x8basis(rem, basis[j], coeff);
4048         }else{
4049             run++;
4050         }
4051     }
4052 #ifdef REFINE_STATS
4053 if(last_non_zero>0){
4054 STOP_TIMER("init rem[]")
4055 }
4056 }
4057
4058 {START_TIMER
4059 #endif
4060     for(;;){
4061         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4062         int best_coeff=0;
4063         int best_change=0;
4064         int run2, best_unquant_change=0, analyze_gradient;
4065 #ifdef REFINE_STATS
4066 {START_TIMER
4067 #endif
4068         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4069
4070         if(analyze_gradient){
4071 #ifdef REFINE_STATS
4072 {START_TIMER
4073 #endif
4074             for(i=0; i<64; i++){
4075                 int w= weight[i];
4076
4077                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4078             }
4079 #ifdef REFINE_STATS
4080 STOP_TIMER("rem*w*w")}
4081 {START_TIMER
4082 #endif
4083             s->dsp.fdct(d1);
4084 #ifdef REFINE_STATS
4085 STOP_TIMER("dct")}
4086 #endif
4087         }
4088
4089         if(start_i){
4090             const int level= block[0];
4091             int change, old_coeff;
4092
4093             av_assert2(s->mb_intra);
4094
4095             old_coeff= q*level;
4096
4097             for(change=-1; change<=1; change+=2){
4098                 int new_level= level + change;
4099                 int score, new_coeff;
4100
4101                 new_coeff= q*new_level;
4102                 if(new_coeff >= 2048 || new_coeff < 0)
4103                     continue;
4104
4105                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4106                 if(score<best_score){
4107                     best_score= score;
4108                     best_coeff= 0;
4109                     best_change= change;
4110                     best_unquant_change= new_coeff - old_coeff;
4111                 }
4112             }
4113         }
4114
4115         run=0;
4116         rle_index=0;
4117         run2= run_tab[rle_index++];
4118         prev_level=0;
4119         prev_run=0;
4120
4121         for(i=start_i; i<64; i++){
4122             int j= perm_scantable[i];
4123             const int level= block[j];
4124             int change, old_coeff;
4125
4126             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4127                 break;
4128
4129             if(level){
4130                 if(level<0) old_coeff= qmul*level - qadd;
4131                 else        old_coeff= qmul*level + qadd;
4132                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4133             }else{
4134                 old_coeff=0;
4135                 run2--;
4136                 av_assert2(run2>=0 || i >= last_non_zero );
4137             }
4138
4139             for(change=-1; change<=1; change+=2){
4140                 int new_level= level + change;
4141                 int score, new_coeff, unquant_change;
4142
4143                 score=0;
4144                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4145                    continue;
4146
4147                 if(new_level){
4148                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4149                     else            new_coeff= qmul*new_level + qadd;
4150                     if(new_coeff >= 2048 || new_coeff <= -2048)
4151                         continue;
4152                     //FIXME check for overflow
4153
4154                     if(level){
4155                         if(level < 63 && level > -63){
4156                             if(i < last_non_zero)
4157                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4158                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4159                             else
4160                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4161                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4162                         }
4163                     }else{
4164                         av_assert2(FFABS(new_level)==1);
4165
4166                         if(analyze_gradient){
4167                             int g= d1[ scantable[i] ];
4168                             if(g && (g^new_level) >= 0)
4169                                 continue;
4170                         }
4171
4172                         if(i < last_non_zero){
4173                             int next_i= i + run2 + 1;
4174                             int next_level= block[ perm_scantable[next_i] ] + 64;
4175
4176                             if(next_level&(~127))
4177                                 next_level= 0;
4178
4179                             if(next_i < last_non_zero)
4180                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4181                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4182                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4183                             else
4184                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4185                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4186                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4187                         }else{
4188                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4189                             if(prev_level){
4190                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4191                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4192                             }
4193                         }
4194                     }
4195                 }else{
4196                     new_coeff=0;
4197                     av_assert2(FFABS(level)==1);
4198
4199                     if(i < last_non_zero){
4200                         int next_i= i + run2 + 1;
4201                         int next_level= block[ perm_scantable[next_i] ] + 64;
4202
4203                         if(next_level&(~127))
4204                             next_level= 0;
4205
4206                         if(next_i < last_non_zero)
4207                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4208                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4209                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4210                         else
4211                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4212                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4213                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4214                     }else{
4215                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4216                         if(prev_level){
4217                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4218                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4219                         }
4220                     }
4221                 }
4222
4223                 score *= lambda;
4224
4225                 unquant_change= new_coeff - old_coeff;
4226                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4227
4228                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4229                 if(score<best_score){
4230                     best_score= score;
4231                     best_coeff= i;
4232                     best_change= change;
4233                     best_unquant_change= unquant_change;
4234                 }
4235             }
4236             if(level){
4237                 prev_level= level + 64;
4238                 if(prev_level&(~127))
4239                     prev_level= 0;
4240                 prev_run= run;
4241                 run=0;
4242             }else{
4243                 run++;
4244             }
4245         }
4246 #ifdef REFINE_STATS
4247 STOP_TIMER("iterative step")}
4248 #endif
4249
4250         if(best_change){
4251             int j= perm_scantable[ best_coeff ];
4252
4253             block[j] += best_change;
4254
4255             if(best_coeff > last_non_zero){
4256                 last_non_zero= best_coeff;
4257                 av_assert2(block[j]);
4258 #ifdef REFINE_STATS
4259 after_last++;
4260 #endif
4261             }else{
4262 #ifdef REFINE_STATS
4263 if(block[j]){
4264     if(block[j] - best_change){
4265         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4266             raise++;
4267         }else{
4268             lower++;
4269         }
4270     }else{
4271         from_zero++;
4272     }
4273 }else{
4274     to_zero++;
4275 }
4276 #endif
4277                 for(; last_non_zero>=start_i; last_non_zero--){
4278                     if(block[perm_scantable[last_non_zero]])
4279                         break;
4280                 }
4281             }
4282 #ifdef REFINE_STATS
4283 count++;
4284 if(256*256*256*64 % count == 0){
4285     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4286 }
4287 #endif
4288             run=0;
4289             rle_index=0;
4290             for(i=start_i; i<=last_non_zero; i++){
4291                 int j= perm_scantable[i];
4292                 const int level= block[j];
4293
4294                  if(level){
4295                      run_tab[rle_index++]=run;
4296                      run=0;
4297                  }else{
4298                      run++;
4299                  }
4300             }
4301
4302             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4303         }else{
4304             break;
4305         }
4306     }
4307 #ifdef REFINE_STATS
4308 if(last_non_zero>0){
4309 STOP_TIMER("iterative search")
4310 }
4311 }
4312 #endif
4313
4314     return last_non_zero;
4315 }
4316
4317 int ff_dct_quantize_c(MpegEncContext *s,
4318                         int16_t *block, int n,
4319                         int qscale, int *overflow)
4320 {
4321     int i, j, level, last_non_zero, q, start_i;
4322     const int *qmat;
4323     const uint8_t *scantable= s->intra_scantable.scantable;
4324     int bias;
4325     int max=0;
4326     unsigned int threshold1, threshold2;
4327
4328     s->dsp.fdct (block);
4329
4330     if(s->dct_error_sum)
4331         s->denoise_dct(s, block);
4332
4333     if (s->mb_intra) {
4334         if (!s->h263_aic) {
4335             if (n < 4)
4336                 q = s->y_dc_scale;
4337             else
4338                 q = s->c_dc_scale;
4339             q = q << 3;
4340         } else
4341             /* For AIC we skip quant/dequant of INTRADC */
4342             q = 1 << 3;
4343
4344         /* note: block[0] is assumed to be positive */
4345         block[0] = (block[0] + (q >> 1)) / q;
4346         start_i = 1;
4347         last_non_zero = 0;
4348         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4349         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4350     } else {
4351         start_i = 0;
4352         last_non_zero = -1;
4353         qmat = s->q_inter_matrix[qscale];
4354         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4355     }
4356     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4357     threshold2= (threshold1<<1);
4358     for(i=63;i>=start_i;i--) {
4359         j = scantable[i];
4360         level = block[j] * qmat[j];
4361
4362         if(((unsigned)(level+threshold1))>threshold2){
4363             last_non_zero = i;
4364             break;
4365         }else{
4366             block[j]=0;
4367         }
4368     }
4369     for(i=start_i; i<=last_non_zero; i++) {
4370         j = scantable[i];
4371         level = block[j] * qmat[j];
4372
4373 //        if(   bias+level >= (1<<QMAT_SHIFT)
4374 //           || bias-level >= (1<<QMAT_SHIFT)){
4375         if(((unsigned)(level+threshold1))>threshold2){
4376             if(level>0){
4377                 level= (bias + level)>>QMAT_SHIFT;
4378                 block[j]= level;
4379             }else{
4380                 level= (bias - level)>>QMAT_SHIFT;
4381                 block[j]= -level;
4382             }
4383             max |=level;
4384         }else{
4385             block[j]=0;
4386         }
4387     }
4388     *overflow= s->max_qcoeff < max; //overflow might have happened
4389
4390     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4391     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4392         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4393
4394     return last_non_zero;
4395 }
4396
4397 #define OFFSET(x) offsetof(MpegEncContext, x)
4398 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4399 static const AVOption h263_options[] = {
4400     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4401     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4402     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4403     FF_MPV_COMMON_OPTS
4404     { NULL },
4405 };
4406
4407 static const AVClass h263_class = {
4408     .class_name = "H.263 encoder",
4409     .item_name  = av_default_item_name,
4410     .option     = h263_options,
4411     .version    = LIBAVUTIL_VERSION_INT,
4412 };
4413
4414 AVCodec ff_h263_encoder = {
4415     .name           = "h263",
4416     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4417     .type           = AVMEDIA_TYPE_VIDEO,
4418     .id             = AV_CODEC_ID_H263,
4419     .priv_data_size = sizeof(MpegEncContext),
4420     .init           = ff_MPV_encode_init,
4421     .encode2        = ff_MPV_encode_picture,
4422     .close          = ff_MPV_encode_end,
4423     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4424     .priv_class     = &h263_class,
4425 };
4426
4427 static const AVOption h263p_options[] = {
4428     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4429     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4430     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4431     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4432     FF_MPV_COMMON_OPTS
4433     { NULL },
4434 };
4435 static const AVClass h263p_class = {
4436     .class_name = "H.263p encoder",
4437     .item_name  = av_default_item_name,
4438     .option     = h263p_options,
4439     .version    = LIBAVUTIL_VERSION_INT,
4440 };
4441
4442 AVCodec ff_h263p_encoder = {
4443     .name           = "h263p",
4444     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4445     .type           = AVMEDIA_TYPE_VIDEO,
4446     .id             = AV_CODEC_ID_H263P,
4447     .priv_data_size = sizeof(MpegEncContext),
4448     .init           = ff_MPV_encode_init,
4449     .encode2        = ff_MPV_encode_picture,
4450     .close          = ff_MPV_encode_end,
4451     .capabilities   = CODEC_CAP_SLICE_THREADS,
4452     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4453     .priv_class     = &h263p_class,
4454 };
4455
4456 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4457
4458 AVCodec ff_msmpeg4v2_encoder = {
4459     .name           = "msmpeg4v2",
4460     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4461     .type           = AVMEDIA_TYPE_VIDEO,
4462     .id             = AV_CODEC_ID_MSMPEG4V2,
4463     .priv_data_size = sizeof(MpegEncContext),
4464     .init           = ff_MPV_encode_init,
4465     .encode2        = ff_MPV_encode_picture,
4466     .close          = ff_MPV_encode_end,
4467     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4468     .priv_class     = &msmpeg4v2_class,
4469 };
4470
4471 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4472
4473 AVCodec ff_msmpeg4v3_encoder = {
4474     .name           = "msmpeg4",
4475     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4476     .type           = AVMEDIA_TYPE_VIDEO,
4477     .id             = AV_CODEC_ID_MSMPEG4V3,
4478     .priv_data_size = sizeof(MpegEncContext),
4479     .init           = ff_MPV_encode_init,
4480     .encode2        = ff_MPV_encode_picture,
4481     .close          = ff_MPV_encode_end,
4482     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4483     .priv_class     = &msmpeg4v3_class,
4484 };
4485
4486 FF_MPV_GENERIC_CLASS(wmv1)
4487
4488 AVCodec ff_wmv1_encoder = {
4489     .name           = "wmv1",
4490     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4491     .type           = AVMEDIA_TYPE_VIDEO,
4492     .id             = AV_CODEC_ID_WMV1,
4493     .priv_data_size = sizeof(MpegEncContext),
4494     .init           = ff_MPV_encode_init,
4495     .encode2        = ff_MPV_encode_picture,
4496     .close          = ff_MPV_encode_end,
4497     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4498     .priv_class     = &wmv1_class,
4499 };