git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "avcodec.h"
  38 #include "dct.h"
  39 #include "dsputil.h"
  40 #include "mpeg12.h"
  41 #include "mpegvideo.h"
  42 #include "h261.h"
  43 #include "h263.h"
  44 #include "mathops.h"
  45 #include "mjpegenc.h"
  46 #include "msmpeg4.h"
  47 #include "faandct.h"
  48 #include "thread.h"
  49 #include "aandcttab.h"
  50 #include "flv.h"
  51 #include "mpeg4video.h"
  52 #include "internal.h"
  53 #include "bytestream.h"
  54 #include <limits.h>
  55 #include "sp5x.h"
  56
  57 static int encode_picture(MpegEncContext *s, int picture_number);
  58 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  59 static int sse_mb(MpegEncContext *s);
  60 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  61 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  62
  63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  65
  66 const AVOption ff_mpv_generic_options[] = {
  67     FF_MPV_COMMON_OPTS
  68     { NULL },
  69 };
  70
  71 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  72                        uint16_t (*qmat16)[2][64],
  73                        const uint16_t *quant_matrix,
  74                        int bias, int qmin, int qmax, int intra)
  75 {
  76     int qscale;
  77     int shift = 0;
  78
  79     for (qscale = qmin; qscale <= qmax; qscale++) {
  80         int i;
  81         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  82             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  83             dsp->fdct == ff_faandct) {
  84             for (i = 0; i < 64; i++) {
  85                 const int j = dsp->idct_permutation[i];
  86                 /* 16 <= qscale * quant_matrix[i] <= 7905
  87                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  88                  *             19952 <=              x  <= 249205026
  89                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  90                  *           3444240 >= (1 << 36) / (x) >= 275 */
  91
  92                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  93                                         (qscale * quant_matrix[j]));
  94             }
  95         } else if (dsp->fdct == ff_fdct_ifast) {
  96             for (i = 0; i < 64; i++) {
  97                 const int j = dsp->idct_permutation[i];
  98                 /* 16 <= qscale * quant_matrix[i] <= 7905
  99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 100                  *             19952 <=              x  <= 249205026
 101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 102                  *           3444240 >= (1 << 36) / (x) >= 275 */
 103
 104                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 105                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
 106             }
 107         } else {
 108             for (i = 0; i < 64; i++) {
 109                 const int j = dsp->idct_permutation[i];
 110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 111                  * Assume x = qscale * quant_matrix[i]
 112                  * So             16 <=              x  <= 7905
 113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 114                  * so          32768 >= (1 << 19) / (x) >= 67 */
 115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 116                                         (qscale * quant_matrix[j]));
 117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 118                 //                    (qscale * quant_matrix[i]);
 119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 120                                        (qscale * quant_matrix[j]);
 121
 122                 if (qmat16[qscale][0][i] == 0 ||
 123                     qmat16[qscale][0][i] == 128 * 256)
 124                     qmat16[qscale][0][i] = 128 * 256 - 1;
 125                 qmat16[qscale][1][i] =
 126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 127                                 qmat16[qscale][0][i]);
 128             }
 129         }
 130
 131         for (i = intra; i < 64; i++) {
 132             int64_t max = 8191;
 133             if (dsp->fdct == ff_fdct_ifast) {
 134                 max = (8191LL * ff_aanscales[i]) >> 14;
 135             }
 136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 137                 shift++;
 138             }
 139         }
 140     }
 141     if (shift) {
 142         av_log(NULL, AV_LOG_INFO,
 143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 144                QMAT_SHIFT - shift);
 145     }
 146 }
 147
 148 static inline void update_qscale(MpegEncContext *s)
 149 {
 150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 151                 (FF_LAMBDA_SHIFT + 7);
 152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 153
 154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 155                  FF_LAMBDA_SHIFT;
 156 }
 157
 158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 159 {
 160     int i;
 161
 162     if (matrix) {
 163         put_bits(pb, 1, 1);
 164         for (i = 0; i < 64; i++) {
 165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 166         }
 167     } else
 168         put_bits(pb, 1, 0);
 169 }
 170
 171 /**
 172  * init s->current_picture.qscale_table from s->lambda_table
 173  */
 174 void ff_init_qscale_tab(MpegEncContext *s)
 175 {
 176     int8_t * const qscale_table = s->current_picture.qscale_table;
 177     int i;
 178
 179     for (i = 0; i < s->mb_num; i++) {
 180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 183                                                   s->avctx->qmax);
 184     }
 185 }
 186
 187 static void update_duplicate_context_after_me(MpegEncContext *dst,
 188                                               MpegEncContext *src)
 189 {
 190 #define COPY(a) dst->a= src->a
 191     COPY(pict_type);
 192     COPY(current_picture);
 193     COPY(f_code);
 194     COPY(b_code);
 195     COPY(qscale);
 196     COPY(lambda);
 197     COPY(lambda2);
 198     COPY(picture_in_gop_number);
 199     COPY(gop_picture_number);
 200     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 201     COPY(progressive_frame);    // FIXME don't set in encode_header
 202     COPY(partitioned_frame);    // FIXME don't set in encode_header
 203 #undef COPY
 204 }
 205
 206 /**
 207  * Set the given MpegEncContext to defaults for encoding.
 208  * the changed fields will not depend upon the prior state of the MpegEncContext.
 209  */
 210 static void MPV_encode_defaults(MpegEncContext *s)
 211 {
 212     int i;
 213     ff_MPV_common_defaults(s);
 214
 215     for (i = -16; i < 16; i++) {
 216         default_fcode_tab[i + MAX_MV] = 1;
 217     }
 218     s->me.mv_penalty = default_mv_penalty;
 219     s->fcode_tab     = default_fcode_tab;
 220
 221     s->input_picture_number  = 0;
 222     s->picture_in_gop_number = 0;
 223 }
 224
 225 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 226     if (ARCH_X86)
 227         ff_dct_encode_init_x86(s);
 228
 229     ff_h263dsp_init(&s->h263dsp);
 230     if (!s->dct_quantize)
 231         s->dct_quantize = ff_dct_quantize_c;
 232     if (!s->denoise_dct)
 233         s->denoise_dct  = denoise_dct_c;
 234     s->fast_dct_quantize = s->dct_quantize;
 235     if (s->avctx->trellis)
 236         s->dct_quantize  = dct_quantize_trellis_c;
 237
 238     return 0;
 239 }
 240
 241 /* init video encoder */
 242 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 243 {
 244     MpegEncContext *s = avctx->priv_data;
 245     int i, ret;
 246
 247     MPV_encode_defaults(s);
 248
 249     switch (avctx->codec_id) {
 250     case AV_CODEC_ID_MPEG2VIDEO:
 251         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 252             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 253             av_log(avctx, AV_LOG_ERROR,
 254                    "only YUV420 and YUV422 are supported\n");
 255             return -1;
 256         }
 257         break;
 258     case AV_CODEC_ID_MJPEG:
 259     case AV_CODEC_ID_AMV:
 260         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
 261             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
 262             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
 263             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 264               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
 265               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
 266              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 267             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 268             return -1;
 269         }
 270         break;
 271     default:
 272         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 273             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 274             return -1;
 275         }
 276     }
 277
 278     switch (avctx->pix_fmt) {
 279     case AV_PIX_FMT_YUVJ444P:
 280     case AV_PIX_FMT_YUV444P:
 281         s->chroma_format = CHROMA_444;
 282         break;
 283     case AV_PIX_FMT_YUVJ422P:
 284     case AV_PIX_FMT_YUV422P:
 285         s->chroma_format = CHROMA_422;
 286         break;
 287     case AV_PIX_FMT_YUVJ420P:
 288     case AV_PIX_FMT_YUV420P:
 289     default:
 290         s->chroma_format = CHROMA_420;
 291         break;
 292     }
 293
 294     s->bit_rate = avctx->bit_rate;
 295     s->width    = avctx->width;
 296     s->height   = avctx->height;
 297     if (avctx->gop_size > 600 &&
 298         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 299         av_log(avctx, AV_LOG_WARNING,
 300                "keyframe interval too large!, reducing it from %d to %d\n",
 301                avctx->gop_size, 600);
 302         avctx->gop_size = 600;
 303     }
 304     s->gop_size     = avctx->gop_size;
 305     s->avctx        = avctx;
 306     s->flags        = avctx->flags;
 307     s->flags2       = avctx->flags2;
 308     if (avctx->max_b_frames > MAX_B_FRAMES) {
 309         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 310                "is %d.\n", MAX_B_FRAMES);
 311         avctx->max_b_frames = MAX_B_FRAMES;
 312     }
 313     s->max_b_frames = avctx->max_b_frames;
 314     s->codec_id     = avctx->codec->id;
 315     s->strict_std_compliance = avctx->strict_std_compliance;
 316     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 317     s->mpeg_quant         = avctx->mpeg_quant;
 318     s->rtp_mode           = !!avctx->rtp_payload_size;
 319     s->intra_dc_precision = avctx->intra_dc_precision;
 320     s->user_specified_pts = AV_NOPTS_VALUE;
 321
 322     if (s->gop_size <= 1) {
 323         s->intra_only = 1;
 324         s->gop_size   = 12;
 325     } else {
 326         s->intra_only = 0;
 327     }
 328
 329     s->me_method = avctx->me_method;
 330
 331     /* Fixed QSCALE */
 332     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 333
 334     s->adaptive_quant = (s->avctx->lumi_masking ||
 335                          s->avctx->dark_masking ||
 336                          s->avctx->temporal_cplx_masking ||
 337                          s->avctx->spatial_cplx_masking  ||
 338                          s->avctx->p_masking      ||
 339                          s->avctx->border_masking ||
 340                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 341                         !s->fixed_qscale;
 342
 343     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 344
 345     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 346         switch(avctx->codec_id) {
 347         case AV_CODEC_ID_MPEG1VIDEO:
 348         case AV_CODEC_ID_MPEG2VIDEO:
 349             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 350             break;
 351         case AV_CODEC_ID_MPEG4:
 352         case AV_CODEC_ID_MSMPEG4V1:
 353         case AV_CODEC_ID_MSMPEG4V2:
 354         case AV_CODEC_ID_MSMPEG4V3:
 355             if       (avctx->rc_max_rate >= 15000000) {
 356                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 357             } else if(avctx->rc_max_rate >=  2000000) {
 358                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 359             } else if(avctx->rc_max_rate >=   384000) {
 360                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 361             } else
 362                 avctx->rc_buffer_size = 40;
 363             avctx->rc_buffer_size *= 16384;
 364             break;
 365         }
 366         if (avctx->rc_buffer_size) {
 367             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 368         }
 369     }
 370
 371     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 372         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 373         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
 374             return -1;
 375     }
 376
 377     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 378         av_log(avctx, AV_LOG_INFO,
 379                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 380     }
 381
 382     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 383         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 384         return -1;
 385     }
 386
 387     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 388         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 389         return -1;
 390     }
 391
 392     if (avctx->rc_max_rate &&
 393         avctx->rc_max_rate == avctx->bit_rate &&
 394         avctx->rc_max_rate != avctx->rc_min_rate) {
 395         av_log(avctx, AV_LOG_INFO,
 396                "impossible bitrate constraints, this will fail\n");
 397     }
 398
 399     if (avctx->rc_buffer_size &&
 400         avctx->bit_rate * (int64_t)avctx->time_base.num >
 401             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 402         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 403         return -1;
 404     }
 405
 406     if (!s->fixed_qscale &&
 407         avctx->bit_rate * av_q2d(avctx->time_base) >
 408             avctx->bit_rate_tolerance) {
 409         av_log(avctx, AV_LOG_ERROR,
 410                "bitrate tolerance %d too small for bitrate %d\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 411         return -1;
 412     }
 413
 414     if (s->avctx->rc_max_rate &&
 415         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 416         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 417          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 418         90000LL * (avctx->rc_buffer_size - 1) >
 419             s->avctx->rc_max_rate * 0xFFFFLL) {
 420         av_log(avctx, AV_LOG_INFO,
 421                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 422                "specified vbv buffer is too large for the given bitrate!\n");
 423     }
 424
 425     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 426         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 427         s->codec_id != AV_CODEC_ID_FLV1) {
 428         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 429         return -1;
 430     }
 431
 432     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 433         av_log(avctx, AV_LOG_ERROR,
 434                "OBMC is only supported with simple mb decision\n");
 435         return -1;
 436     }
 437
 438     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 439         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 440         return -1;
 441     }
 442
 443     if (s->max_b_frames                    &&
 444         s->codec_id != AV_CODEC_ID_MPEG4      &&
 445         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 446         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 447         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 448         return -1;
 449     }
 450     if (s->max_b_frames < 0) {
 451         av_log(avctx, AV_LOG_ERROR,
 452                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 453         return -1;
 454     }
 455
 456     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 457          s->codec_id == AV_CODEC_ID_H263  ||
 458          s->codec_id == AV_CODEC_ID_H263P) &&
 459         (avctx->sample_aspect_ratio.num > 255 ||
 460          avctx->sample_aspect_ratio.den > 255)) {
 461         av_log(avctx, AV_LOG_WARNING,
 462                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 463                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 464         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 465                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 466     }
 467
 468     if ((s->codec_id == AV_CODEC_ID_H263  ||
 469          s->codec_id == AV_CODEC_ID_H263P) &&
 470         (avctx->width  > 2048 ||
 471          avctx->height > 1152 )) {
 472         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 473         return -1;
 474     }
 475     if ((s->codec_id == AV_CODEC_ID_H263  ||
 476          s->codec_id == AV_CODEC_ID_H263P) &&
 477         ((avctx->width &3) ||
 478          (avctx->height&3) )) {
 479         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 480         return -1;
 481     }
 482
 483     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 484         (avctx->width  > 4095 ||
 485          avctx->height > 4095 )) {
 486         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 487         return -1;
 488     }
 489
 490     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 491         (avctx->width  > 16383 ||
 492          avctx->height > 16383 )) {
 493         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 494         return -1;
 495     }
 496
 497     if (s->codec_id == AV_CODEC_ID_RV10 &&
 498         (avctx->width &15 ||
 499          avctx->height&15 )) {
 500         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 501         return AVERROR(EINVAL);
 502     }
 503
 504     if (s->codec_id == AV_CODEC_ID_RV20 &&
 505         (avctx->width &3 ||
 506          avctx->height&3 )) {
 507         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 508         return AVERROR(EINVAL);
 509     }
 510
 511     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 512          s->codec_id == AV_CODEC_ID_WMV2) &&
 513          avctx->width & 1) {
 514          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 515          return -1;
 516     }
 517
 518     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 519         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 520         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 521         return -1;
 522     }
 523
 524     // FIXME mpeg2 uses that too
 525     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 526                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 527         av_log(avctx, AV_LOG_ERROR,
 528                "mpeg2 style quantization not supported by codec\n");
 529         return -1;
 530     }
 531
 532     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 533         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 534         return -1;
 535     }
 536
 537     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 538         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 539         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 540         return -1;
 541     }
 542
 543     if (s->avctx->scenechange_threshold < 1000000000 &&
 544         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 545         av_log(avctx, AV_LOG_ERROR,
 546                "closed gop with scene change detection are not supported yet, "
 547                "set threshold to 1000000000\n");
 548         return -1;
 549     }
 550
 551     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 552         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 553             av_log(avctx, AV_LOG_ERROR,
 554                   "low delay forcing is only available for mpeg2\n");
 555             return -1;
 556         }
 557         if (s->max_b_frames != 0) {
 558             av_log(avctx, AV_LOG_ERROR,
 559                    "b frames cannot be used with low delay\n");
 560             return -1;
 561         }
 562     }
 563
 564     if (s->q_scale_type == 1) {
 565         if (avctx->qmax > 12) {
 566             av_log(avctx, AV_LOG_ERROR,
 567                    "non linear quant only supports qmax <= 12 currently\n");
 568             return -1;
 569         }
 570     }
 571
 572     if (s->avctx->thread_count > 1         &&
 573         s->codec_id != AV_CODEC_ID_MPEG4      &&
 574         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 575         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 576         s->codec_id != AV_CODEC_ID_MJPEG      &&
 577         (s->codec_id != AV_CODEC_ID_H263P)) {
 578         av_log(avctx, AV_LOG_ERROR,
 579                "multi threaded encoding not supported by codec\n");
 580         return -1;
 581     }
 582
 583     if (s->avctx->thread_count < 1) {
 584         av_log(avctx, AV_LOG_ERROR,
 585                "automatic thread number detection not supported by codec, "
 586                "patch welcome\n");
 587         return -1;
 588     }
 589
 590     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 591         s->rtp_mode = 1;
 592
 593     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 594         s->h263_slice_structured = 1;
 595
 596     if (!avctx->time_base.den || !avctx->time_base.num) {
 597         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 598         return -1;
 599     }
 600
 601     i = (INT_MAX / 2 + 128) >> 8;
 602     if (avctx->mb_threshold >= i) {
 603         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 604                i - 1);
 605         return -1;
 606     }
 607
 608     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 609         av_log(avctx, AV_LOG_INFO,
 610                "notice: b_frame_strategy only affects the first pass\n");
 611         avctx->b_frame_strategy = 0;
 612     }
 613
 614     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 615     if (i > 1) {
 616         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 617         avctx->time_base.den /= i;
 618         avctx->time_base.num /= i;
 619         //return -1;
 620     }
 621
 622     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 623         // (a + x * 3 / 8) / x
 624         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 625         s->inter_quant_bias = 0;
 626     } else {
 627         s->intra_quant_bias = 0;
 628         // (a - x / 4) / x
 629         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 630     }
 631
 632     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 633         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 634         return AVERROR(EINVAL);
 635     }
 636
 637     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 638         s->intra_quant_bias = avctx->intra_quant_bias;
 639     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 640         s->inter_quant_bias = avctx->inter_quant_bias;
 641
 642     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 643
 644     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 645         s->avctx->time_base.den > (1 << 16) - 1) {
 646         av_log(avctx, AV_LOG_ERROR,
 647                "timebase %d/%d not supported by MPEG 4 standard, "
 648                "the maximum admitted value for the timebase denominator "
 649                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 650                (1 << 16) - 1);
 651         return -1;
 652     }
 653     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 654
 655     switch (avctx->codec->id) {
 656     case AV_CODEC_ID_MPEG1VIDEO:
 657         s->out_format = FMT_MPEG1;
 658         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 659         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 660         break;
 661     case AV_CODEC_ID_MPEG2VIDEO:
 662         s->out_format = FMT_MPEG1;
 663         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 664         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 665         s->rtp_mode   = 1;
 666         break;
 667     case AV_CODEC_ID_MJPEG:
 668     case AV_CODEC_ID_AMV:
 669         s->out_format = FMT_MJPEG;
 670         s->intra_only = 1; /* force intra only for jpeg */
 671         if (!CONFIG_MJPEG_ENCODER ||
 672             ff_mjpeg_encode_init(s) < 0)
 673             return -1;
 674         avctx->delay = 0;
 675         s->low_delay = 1;
 676         break;
 677     case AV_CODEC_ID_H261:
 678         if (!CONFIG_H261_ENCODER)
 679             return -1;
 680         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 681             av_log(avctx, AV_LOG_ERROR,
 682                    "The specified picture size of %dx%d is not valid for the "
 683                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 684                     s->width, s->height);
 685             return -1;
 686         }
 687         s->out_format = FMT_H261;
 688         avctx->delay  = 0;
 689         s->low_delay  = 1;
 690         break;
 691     case AV_CODEC_ID_H263:
 692         if (!CONFIG_H263_ENCODER)
 693             return -1;
 694         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 695                              s->width, s->height) == 8) {
 696             av_log(avctx, AV_LOG_ERROR,
 697                    "The specified picture size of %dx%d is not valid for "
 698                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 699                    "352x288, 704x576, and 1408x1152. "
 700                    "Try H.263+.\n", s->width, s->height);
 701             return -1;
 702         }
 703         s->out_format = FMT_H263;
 704         avctx->delay  = 0;
 705         s->low_delay  = 1;
 706         break;
 707     case AV_CODEC_ID_H263P:
 708         s->out_format = FMT_H263;
 709         s->h263_plus  = 1;
 710         /* Fx */
 711         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 712         s->modified_quant  = s->h263_aic;
 713         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 714         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 715
 716         /* /Fx */
 717         /* These are just to be sure */
 718         avctx->delay = 0;
 719         s->low_delay = 1;
 720         break;
 721     case AV_CODEC_ID_FLV1:
 722         s->out_format      = FMT_H263;
 723         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 724         s->unrestricted_mv = 1;
 725         s->rtp_mode  = 0; /* don't allow GOB */
 726         avctx->delay = 0;
 727         s->low_delay = 1;
 728         break;
 729     case AV_CODEC_ID_RV10:
 730         s->out_format = FMT_H263;
 731         avctx->delay  = 0;
 732         s->low_delay  = 1;
 733         break;
 734     case AV_CODEC_ID_RV20:
 735         s->out_format      = FMT_H263;
 736         avctx->delay       = 0;
 737         s->low_delay       = 1;
 738         s->modified_quant  = 1;
 739         s->h263_aic        = 1;
 740         s->h263_plus       = 1;
 741         s->loop_filter     = 1;
 742         s->unrestricted_mv = 0;
 743         break;
 744     case AV_CODEC_ID_MPEG4:
 745         s->out_format      = FMT_H263;
 746         s->h263_pred       = 1;
 747         s->unrestricted_mv = 1;
 748         s->low_delay       = s->max_b_frames ? 0 : 1;
 749         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 750         break;
 751     case AV_CODEC_ID_MSMPEG4V2:
 752         s->out_format      = FMT_H263;
 753         s->h263_pred       = 1;
 754         s->unrestricted_mv = 1;
 755         s->msmpeg4_version = 2;
 756         avctx->delay       = 0;
 757         s->low_delay       = 1;
 758         break;
 759     case AV_CODEC_ID_MSMPEG4V3:
 760         s->out_format        = FMT_H263;
 761         s->h263_pred         = 1;
 762         s->unrestricted_mv   = 1;
 763         s->msmpeg4_version   = 3;
 764         s->flipflop_rounding = 1;
 765         avctx->delay         = 0;
 766         s->low_delay         = 1;
 767         break;
 768     case AV_CODEC_ID_WMV1:
 769         s->out_format        = FMT_H263;
 770         s->h263_pred         = 1;
 771         s->unrestricted_mv   = 1;
 772         s->msmpeg4_version   = 4;
 773         s->flipflop_rounding = 1;
 774         avctx->delay         = 0;
 775         s->low_delay         = 1;
 776         break;
 777     case AV_CODEC_ID_WMV2:
 778         s->out_format        = FMT_H263;
 779         s->h263_pred         = 1;
 780         s->unrestricted_mv   = 1;
 781         s->msmpeg4_version   = 5;
 782         s->flipflop_rounding = 1;
 783         avctx->delay         = 0;
 784         s->low_delay         = 1;
 785         break;
 786     default:
 787         return -1;
 788     }
 789
 790     avctx->has_b_frames = !s->low_delay;
 791
 792     s->encoding = 1;
 793
 794     s->progressive_frame    =
 795     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 796                                                 CODEC_FLAG_INTERLACED_ME) ||
 797                                 s->alternate_scan);
 798
 799     /* init */
 800     if (ff_MPV_common_init(s) < 0)
 801         return -1;
 802
 803     s->avctx->coded_frame = &s->current_picture.f;
 804
 805     if (s->msmpeg4_version) {
 806         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 807                           2 * 2 * (MAX_LEVEL + 1) *
 808                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 809     }
 810     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 811
 812     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 813     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 814     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 815     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 816     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 817     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 818     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 819                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 820     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 821                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 822
 823     if (s->avctx->noise_reduction) {
 824         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 825                           2 * 64 * sizeof(uint16_t), fail);
 826     }
 827
 828     ff_dct_encode_init(s);
 829
 830     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 831         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 832
 833     s->quant_precision = 5;
 834
 835     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 836     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 837
 838     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 839         ff_h261_encode_init(s);
 840     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 841         ff_h263_encode_init(s);
 842     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 843         ff_msmpeg4_encode_init(s);
 844     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 845         && s->out_format == FMT_MPEG1)
 846         ff_mpeg1_encode_init(s);
 847
 848     /* init q matrix */
 849     for (i = 0; i < 64; i++) {
 850         int j = s->dsp.idct_permutation[i];
 851         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 852             s->mpeg_quant) {
 853             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 854             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 855         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 856             s->intra_matrix[j] =
 857             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 858         } else {
 859             /* mpeg1/2 */
 860             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 861             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 862         }
 863         if (s->avctx->intra_matrix)
 864             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 865         if (s->avctx->inter_matrix)
 866             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 867     }
 868
 869     /* precompute matrix */
 870     /* for mjpeg, we do include qscale in the matrix */
 871     if (s->out_format != FMT_MJPEG) {
 872         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 873                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 874                           31, 1);
 875         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 876                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 877                           31, 0);
 878     }
 879
 880     if (ff_rate_control_init(s) < 0)
 881         return -1;
 882
 883 #if FF_API_ERROR_RATE
 884     FF_DISABLE_DEPRECATION_WARNINGS
 885     if (avctx->error_rate)
 886         s->error_rate = avctx->error_rate;
 887     FF_ENABLE_DEPRECATION_WARNINGS;
 888 #endif
 889
 890     if (avctx->b_frame_strategy == 2) {
 891         for (i = 0; i < s->max_b_frames + 2; i++) {
 892             s->tmp_frames[i] = av_frame_alloc();
 893             if (!s->tmp_frames[i])
 894                 return AVERROR(ENOMEM);
 895
 896             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 897             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 898             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 899
 900             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 901             if (ret < 0)
 902                 return ret;
 903         }
 904     }
 905
 906     return 0;
 907 fail:
 908     ff_MPV_encode_end(avctx);
 909     return AVERROR_UNKNOWN;
 910 }
 911
 912 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 913 {
 914     MpegEncContext *s = avctx->priv_data;
 915     int i;
 916
 917     ff_rate_control_uninit(s);
 918
 919     ff_MPV_common_end(s);
 920     if (CONFIG_MJPEG_ENCODER &&
 921         s->out_format == FMT_MJPEG)
 922         ff_mjpeg_encode_close(s);
 923
 924     av_freep(&avctx->extradata);
 925
 926     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 927         av_frame_free(&s->tmp_frames[i]);
 928
 929     ff_free_picture_tables(&s->new_picture);
 930     ff_mpeg_unref_picture(s, &s->new_picture);
 931
 932     av_freep(&s->avctx->stats_out);
 933     av_freep(&s->ac_stats);
 934
 935     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 936     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
 937     s->q_chroma_intra_matrix=   NULL;
 938     s->q_chroma_intra_matrix16= NULL;
 939     av_freep(&s->q_intra_matrix);
 940     av_freep(&s->q_inter_matrix);
 941     av_freep(&s->q_intra_matrix16);
 942     av_freep(&s->q_inter_matrix16);
 943     av_freep(&s->input_picture);
 944     av_freep(&s->reordered_input_picture);
 945     av_freep(&s->dct_offset);
 946
 947     return 0;
 948 }
 949
 950 static int get_sae(uint8_t *src, int ref, int stride)
 951 {
 952     int x,y;
 953     int acc = 0;
 954
 955     for (y = 0; y < 16; y++) {
 956         for (x = 0; x < 16; x++) {
 957             acc += FFABS(src[x + y * stride] - ref);
 958         }
 959     }
 960
 961     return acc;
 962 }
 963
 964 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 965                            uint8_t *ref, int stride)
 966 {
 967     int x, y, w, h;
 968     int acc = 0;
 969
 970     w = s->width  & ~15;
 971     h = s->height & ~15;
 972
 973     for (y = 0; y < h; y += 16) {
 974         for (x = 0; x < w; x += 16) {
 975             int offset = x + y * stride;
 976             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 977                                      16);
 978             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 979             int sae  = get_sae(src + offset, mean, stride);
 980
 981             acc += sae + 500 < sad;
 982         }
 983     }
 984     return acc;
 985 }
 986
 987
 988 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 989 {
 990     Picture *pic = NULL;
 991     int64_t pts;
 992     int i, display_picture_number = 0, ret;
 993     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
 994                                                  (s->low_delay ? 0 : 1);
 995     int direct = 1;
 996
 997     if (pic_arg) {
 998         pts = pic_arg->pts;
 999         display_picture_number = s->input_picture_number++;
1000
1001         if (pts != AV_NOPTS_VALUE) {
1002             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1003                 int64_t last = s->user_specified_pts;
1004
1005                 if (pts <= last) {
1006                     av_log(s->avctx, AV_LOG_ERROR,
1007                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1008                            pts, last);
1009                     return AVERROR(EINVAL);
1010                 }
1011
1012                 if (!s->low_delay && display_picture_number == 1)
1013                     s->dts_delta = pts - last;
1014             }
1015             s->user_specified_pts = pts;
1016         } else {
1017             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1018                 s->user_specified_pts =
1019                 pts = s->user_specified_pts + 1;
1020                 av_log(s->avctx, AV_LOG_INFO,
1021                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1022                        pts);
1023             } else {
1024                 pts = display_picture_number;
1025             }
1026         }
1027     }
1028
1029     if (pic_arg) {
1030         if (!pic_arg->buf[0])
1031             direct = 0;
1032         if (pic_arg->linesize[0] != s->linesize)
1033             direct = 0;
1034         if (pic_arg->linesize[1] != s->uvlinesize)
1035             direct = 0;
1036         if (pic_arg->linesize[2] != s->uvlinesize)
1037             direct = 0;
1038         if ((s->width & 15) || (s->height & 15))
1039             direct = 0;
1040
1041         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1042                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1043
1044         if (direct) {
1045             i = ff_find_unused_picture(s, 1);
1046             if (i < 0)
1047                 return i;
1048
1049             pic = &s->picture[i];
1050             pic->reference = 3;
1051
1052             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
1053                 return ret;
1054             if (ff_alloc_picture(s, pic, 1) < 0) {
1055                 return -1;
1056             }
1057         } else {
1058             i = ff_find_unused_picture(s, 0);
1059             if (i < 0)
1060                 return i;
1061
1062             pic = &s->picture[i];
1063             pic->reference = 3;
1064
1065             if (ff_alloc_picture(s, pic, 0) < 0) {
1066                 return -1;
1067             }
1068
1069             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1070                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1071                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1072                 // empty
1073             } else {
1074                 int h_chroma_shift, v_chroma_shift;
1075                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1076                                                  &h_chroma_shift,
1077                                                  &v_chroma_shift);
1078
1079                 for (i = 0; i < 3; i++) {
1080                     int src_stride = pic_arg->linesize[i];
1081                     int dst_stride = i ? s->uvlinesize : s->linesize;
1082                     int h_shift = i ? h_chroma_shift : 0;
1083                     int v_shift = i ? v_chroma_shift : 0;
1084                     int w = s->width  >> h_shift;
1085                     int h = s->height >> v_shift;
1086                     uint8_t *src = pic_arg->data[i];
1087                     uint8_t *dst = pic->f.data[i];
1088
1089                     if (s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1090                         h = ((s->height + 15)/16*16) >> v_shift;
1091                     }
1092
1093                     if (!s->avctx->rc_buffer_size)
1094                         dst += INPLACE_OFFSET;
1095
1096                     if (src_stride == dst_stride)
1097                         memcpy(dst, src, src_stride * h);
1098                     else {
1099                         int h2 = h;
1100                         uint8_t *dst2 = dst;
1101                         while (h2--) {
1102                             memcpy(dst2, src, w);
1103                             dst2 += dst_stride;
1104                             src += src_stride;
1105                         }
1106                     }
1107                     if ((s->width & 15) || (s->height & 15)) {
1108                         s->dsp.draw_edges(dst, dst_stride,
1109                                           w, h,
1110                                           16>>h_shift,
1111                                           16>>v_shift,
1112                                           EDGE_BOTTOM);
1113                     }
1114                 }
1115             }
1116         }
1117         ret = av_frame_copy_props(&pic->f, pic_arg);
1118         if (ret < 0)
1119             return ret;
1120
1121         pic->f.display_picture_number = display_picture_number;
1122         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
1123     }
1124
1125     /* shift buffer entries */
1126     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1127         s->input_picture[i - 1] = s->input_picture[i];
1128
1129     s->input_picture[encoding_delay] = (Picture*) pic;
1130
1131     return 0;
1132 }
1133
1134 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1135 {
1136     int x, y, plane;
1137     int score = 0;
1138     int64_t score64 = 0;
1139
1140     for (plane = 0; plane < 3; plane++) {
1141         const int stride = p->f.linesize[plane];
1142         const int bw = plane ? 1 : 2;
1143         for (y = 0; y < s->mb_height * bw; y++) {
1144             for (x = 0; x < s->mb_width * bw; x++) {
1145                 int off = p->shared ? 0 : 16;
1146                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1147                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1148                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1149
1150                 switch (FFABS(s->avctx->frame_skip_exp)) {
1151                 case 0: score    =  FFMAX(score, v);          break;
1152                 case 1: score   += FFABS(v);                  break;
1153                 case 2: score64 += v * (int64_t)v;                       break;
1154                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1155                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1156                 }
1157             }
1158         }
1159     }
1160     emms_c();
1161
1162     if (score)
1163         score64 = score;
1164     if (s->avctx->frame_skip_exp < 0)
1165         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1166                       -1.0/s->avctx->frame_skip_exp);
1167
1168     if (score64 < s->avctx->frame_skip_threshold)
1169         return 1;
1170     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1171         return 1;
1172     return 0;
1173 }
1174
1175 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1176 {
1177     AVPacket pkt = { 0 };
1178     int ret, got_output;
1179
1180     av_init_packet(&pkt);
1181     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1182     if (ret < 0)
1183         return ret;
1184
1185     ret = pkt.size;
1186     av_free_packet(&pkt);
1187     return ret;
1188 }
1189
1190 static int estimate_best_b_count(MpegEncContext *s)
1191 {
1192     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1193     AVCodecContext *c = avcodec_alloc_context3(NULL);
1194     const int scale = s->avctx->brd_scale;
1195     int i, j, out_size, p_lambda, b_lambda, lambda2;
1196     int64_t best_rd  = INT64_MAX;
1197     int best_b_count = -1;
1198
1199     av_assert0(scale >= 0 && scale <= 3);
1200
1201     //emms_c();
1202     //s->next_picture_ptr->quality;
1203     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1204     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1205     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1206     if (!b_lambda) // FIXME we should do this somewhere else
1207         b_lambda = p_lambda;
1208     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1209                FF_LAMBDA_SHIFT;
1210
1211     c->width        = s->width  >> scale;
1212     c->height       = s->height >> scale;
1213     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1214                       CODEC_FLAG_INPUT_PRESERVED;
1215     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1216     c->mb_decision  = s->avctx->mb_decision;
1217     c->me_cmp       = s->avctx->me_cmp;
1218     c->mb_cmp       = s->avctx->mb_cmp;
1219     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1220     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1221     c->time_base    = s->avctx->time_base;
1222     c->max_b_frames = s->max_b_frames;
1223
1224     if (avcodec_open2(c, codec, NULL) < 0)
1225         return -1;
1226
1227     for (i = 0; i < s->max_b_frames + 2; i++) {
1228         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1229                                                 s->next_picture_ptr;
1230
1231         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1232             pre_input = *pre_input_ptr;
1233
1234             if (!pre_input.shared && i) {
1235                 pre_input.f.data[0] += INPLACE_OFFSET;
1236                 pre_input.f.data[1] += INPLACE_OFFSET;
1237                 pre_input.f.data[2] += INPLACE_OFFSET;
1238             }
1239
1240             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1241                                  pre_input.f.data[0], pre_input.f.linesize[0],
1242                                  c->width,      c->height);
1243             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1244                                  pre_input.f.data[1], pre_input.f.linesize[1],
1245                                  c->width >> 1, c->height >> 1);
1246             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1247                                  pre_input.f.data[2], pre_input.f.linesize[2],
1248                                  c->width >> 1, c->height >> 1);
1249         }
1250     }
1251
1252     for (j = 0; j < s->max_b_frames + 1; j++) {
1253         int64_t rd = 0;
1254
1255         if (!s->input_picture[j])
1256             break;
1257
1258         c->error[0] = c->error[1] = c->error[2] = 0;
1259
1260         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1261         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1262
1263         out_size = encode_frame(c, s->tmp_frames[0]);
1264
1265         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1266
1267         for (i = 0; i < s->max_b_frames + 1; i++) {
1268             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1269
1270             s->tmp_frames[i + 1]->pict_type = is_p ?
1271                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1272             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1273
1274             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1275
1276             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1277         }
1278
1279         /* get the delayed frames */
1280         while (out_size) {
1281             out_size = encode_frame(c, NULL);
1282             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1283         }
1284
1285         rd += c->error[0] + c->error[1] + c->error[2];
1286
1287         if (rd < best_rd) {
1288             best_rd = rd;
1289             best_b_count = j;
1290         }
1291     }
1292
1293     avcodec_close(c);
1294     av_freep(&c);
1295
1296     return best_b_count;
1297 }
1298
1299 static int select_input_picture(MpegEncContext *s)
1300 {
1301     int i, ret;
1302
1303     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1304         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1305     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1306
1307     /* set next picture type & ordering */
1308     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1309         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1310             if (s->picture_in_gop_number < s->gop_size &&
1311                 s->next_picture_ptr &&
1312                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1313                 // FIXME check that te gop check above is +-1 correct
1314                 av_frame_unref(&s->input_picture[0]->f);
1315
1316                 ff_vbv_update(s, 0);
1317
1318                 goto no_output_pic;
1319             }
1320         }
1321
1322         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1323             s->next_picture_ptr == NULL || s->intra_only) {
1324             s->reordered_input_picture[0] = s->input_picture[0];
1325             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1326             s->reordered_input_picture[0]->f.coded_picture_number =
1327                 s->coded_picture_number++;
1328         } else {
1329             int b_frames;
1330
1331             if (s->flags & CODEC_FLAG_PASS2) {
1332                 for (i = 0; i < s->max_b_frames + 1; i++) {
1333                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1334
1335                     if (pict_num >= s->rc_context.num_entries)
1336                         break;
1337                     if (!s->input_picture[i]) {
1338                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1339                         break;
1340                     }
1341
1342                     s->input_picture[i]->f.pict_type =
1343                         s->rc_context.entry[pict_num].new_pict_type;
1344                 }
1345             }
1346
1347             if (s->avctx->b_frame_strategy == 0) {
1348                 b_frames = s->max_b_frames;
1349                 while (b_frames && !s->input_picture[b_frames])
1350                     b_frames--;
1351             } else if (s->avctx->b_frame_strategy == 1) {
1352                 for (i = 1; i < s->max_b_frames + 1; i++) {
1353                     if (s->input_picture[i] &&
1354                         s->input_picture[i]->b_frame_score == 0) {
1355                         s->input_picture[i]->b_frame_score =
1356                             get_intra_count(s,
1357                                             s->input_picture[i    ]->f.data[0],
1358                                             s->input_picture[i - 1]->f.data[0],
1359                                             s->linesize) + 1;
1360                     }
1361                 }
1362                 for (i = 0; i < s->max_b_frames + 1; i++) {
1363                     if (s->input_picture[i] == NULL ||
1364                         s->input_picture[i]->b_frame_score - 1 >
1365                             s->mb_num / s->avctx->b_sensitivity)
1366                         break;
1367                 }
1368
1369                 b_frames = FFMAX(0, i - 1);
1370
1371                 /* reset scores */
1372                 for (i = 0; i < b_frames + 1; i++) {
1373                     s->input_picture[i]->b_frame_score = 0;
1374                 }
1375             } else if (s->avctx->b_frame_strategy == 2) {
1376                 b_frames = estimate_best_b_count(s);
1377             } else {
1378                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1379                 b_frames = 0;
1380             }
1381
1382             emms_c();
1383
1384             for (i = b_frames - 1; i >= 0; i--) {
1385                 int type = s->input_picture[i]->f.pict_type;
1386                 if (type && type != AV_PICTURE_TYPE_B)
1387                     b_frames = i;
1388             }
1389             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1390                 b_frames == s->max_b_frames) {
1391                 av_log(s->avctx, AV_LOG_ERROR,
1392                        "warning, too many b frames in a row\n");
1393             }
1394
1395             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1396                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1397                     s->gop_size > s->picture_in_gop_number) {
1398                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1399                 } else {
1400                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1401                         b_frames = 0;
1402                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1403                 }
1404             }
1405
1406             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1407                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1408                 b_frames--;
1409
1410             s->reordered_input_picture[0] = s->input_picture[b_frames];
1411             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1412                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1413             s->reordered_input_picture[0]->f.coded_picture_number =
1414                 s->coded_picture_number++;
1415             for (i = 0; i < b_frames; i++) {
1416                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1417                 s->reordered_input_picture[i + 1]->f.pict_type =
1418                     AV_PICTURE_TYPE_B;
1419                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1420                     s->coded_picture_number++;
1421             }
1422         }
1423     }
1424 no_output_pic:
1425     if (s->reordered_input_picture[0]) {
1426         s->reordered_input_picture[0]->reference =
1427            s->reordered_input_picture[0]->f.pict_type !=
1428                AV_PICTURE_TYPE_B ? 3 : 0;
1429
1430         ff_mpeg_unref_picture(s, &s->new_picture);
1431         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1432             return ret;
1433
1434         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1435             // input is a shared pix, so we can't modifiy it -> alloc a new
1436             // one & ensure that the shared one is reuseable
1437
1438             Picture *pic;
1439             int i = ff_find_unused_picture(s, 0);
1440             if (i < 0)
1441                 return i;
1442             pic = &s->picture[i];
1443
1444             pic->reference = s->reordered_input_picture[0]->reference;
1445             if (ff_alloc_picture(s, pic, 0) < 0) {
1446                 return -1;
1447             }
1448
1449             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1450             if (ret < 0)
1451                 return ret;
1452
1453             /* mark us unused / free shared pic */
1454             av_frame_unref(&s->reordered_input_picture[0]->f);
1455             s->reordered_input_picture[0]->shared = 0;
1456
1457             s->current_picture_ptr = pic;
1458         } else {
1459             // input is not a shared pix -> reuse buffer for current_pix
1460             s->current_picture_ptr = s->reordered_input_picture[0];
1461             for (i = 0; i < 4; i++) {
1462                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1463             }
1464         }
1465         ff_mpeg_unref_picture(s, &s->current_picture);
1466         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1467                                        s->current_picture_ptr)) < 0)
1468             return ret;
1469
1470         s->picture_number = s->new_picture.f.display_picture_number;
1471     } else {
1472         ff_mpeg_unref_picture(s, &s->new_picture);
1473     }
1474     return 0;
1475 }
1476
1477 static void frame_end(MpegEncContext *s)
1478 {
1479     if (s->unrestricted_mv &&
1480         s->current_picture.reference &&
1481         !s->intra_only) {
1482         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1483         int hshift = desc->log2_chroma_w;
1484         int vshift = desc->log2_chroma_h;
1485         s->dsp.draw_edges(s->current_picture.f.data[0], s->current_picture.f.linesize[0],
1486                           s->h_edge_pos, s->v_edge_pos,
1487                           EDGE_WIDTH, EDGE_WIDTH,
1488                           EDGE_TOP | EDGE_BOTTOM);
1489         s->dsp.draw_edges(s->current_picture.f.data[1], s->current_picture.f.linesize[1],
1490                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1491                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1492                           EDGE_TOP | EDGE_BOTTOM);
1493         s->dsp.draw_edges(s->current_picture.f.data[2], s->current_picture.f.linesize[2],
1494                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1495                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1496                           EDGE_TOP | EDGE_BOTTOM);
1497     }
1498
1499     emms_c();
1500
1501     s->last_pict_type                 = s->pict_type;
1502     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1503     if (s->pict_type!= AV_PICTURE_TYPE_B)
1504         s->last_non_b_pict_type = s->pict_type;
1505
1506     s->avctx->coded_frame = &s->current_picture_ptr->f;
1507
1508 }
1509
1510 static void update_noise_reduction(MpegEncContext *s)
1511 {
1512     int intra, i;
1513
1514     for (intra = 0; intra < 2; intra++) {
1515         if (s->dct_count[intra] > (1 << 16)) {
1516             for (i = 0; i < 64; i++) {
1517                 s->dct_error_sum[intra][i] >>= 1;
1518             }
1519             s->dct_count[intra] >>= 1;
1520         }
1521
1522         for (i = 0; i < 64; i++) {
1523             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1524                                        s->dct_count[intra] +
1525                                        s->dct_error_sum[intra][i] / 2) /
1526                                       (s->dct_error_sum[intra][i] + 1);
1527         }
1528     }
1529 }
1530
1531 static int frame_start(MpegEncContext *s)
1532 {
1533     int ret;
1534
1535     /* mark & release old frames */
1536     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1537         s->last_picture_ptr != s->next_picture_ptr &&
1538         s->last_picture_ptr->f.buf[0]) {
1539         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1540     }
1541
1542     s->current_picture_ptr->f.pict_type = s->pict_type;
1543     s->current_picture_ptr->f.key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1544
1545     ff_mpeg_unref_picture(s, &s->current_picture);
1546     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1547                                    s->current_picture_ptr)) < 0)
1548         return ret;
1549
1550     if (s->pict_type != AV_PICTURE_TYPE_B) {
1551         s->last_picture_ptr = s->next_picture_ptr;
1552         if (!s->droppable)
1553             s->next_picture_ptr = s->current_picture_ptr;
1554     }
1555
1556     if (s->last_picture_ptr) {
1557         ff_mpeg_unref_picture(s, &s->last_picture);
1558         if (s->last_picture_ptr->f.buf[0] &&
1559             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1560                                        s->last_picture_ptr)) < 0)
1561             return ret;
1562     }
1563     if (s->next_picture_ptr) {
1564         ff_mpeg_unref_picture(s, &s->next_picture);
1565         if (s->next_picture_ptr->f.buf[0] &&
1566             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1567                                        s->next_picture_ptr)) < 0)
1568             return ret;
1569     }
1570
1571     if (s->picture_structure!= PICT_FRAME) {
1572         int i;
1573         for (i = 0; i < 4; i++) {
1574             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1575                 s->current_picture.f.data[i] +=
1576                     s->current_picture.f.linesize[i];
1577             }
1578             s->current_picture.f.linesize[i] *= 2;
1579             s->last_picture.f.linesize[i]    *= 2;
1580             s->next_picture.f.linesize[i]    *= 2;
1581         }
1582     }
1583
1584     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1585         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1586         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1587     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1588         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1589         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1590     } else {
1591         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1592         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1593     }
1594
1595     if (s->dct_error_sum) {
1596         av_assert2(s->avctx->noise_reduction && s->encoding);
1597         update_noise_reduction(s);
1598     }
1599
1600     return 0;
1601 }
1602
1603 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1604                           AVFrame *pic_arg, int *got_packet)
1605 {
1606     MpegEncContext *s = avctx->priv_data;
1607     int i, stuffing_count, ret;
1608     int context_count = s->slice_context_count;
1609
1610     s->picture_in_gop_number++;
1611
1612     if (load_input_picture(s, pic_arg) < 0)
1613         return -1;
1614
1615     if (select_input_picture(s) < 0) {
1616         return -1;
1617     }
1618
1619     /* output? */
1620     if (s->new_picture.f.data[0]) {
1621         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1622             return ret;
1623         if (s->mb_info) {
1624             s->mb_info_ptr = av_packet_new_side_data(pkt,
1625                                  AV_PKT_DATA_H263_MB_INFO,
1626                                  s->mb_width*s->mb_height*12);
1627             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1628         }
1629
1630         for (i = 0; i < context_count; i++) {
1631             int start_y = s->thread_context[i]->start_mb_y;
1632             int   end_y = s->thread_context[i]->  end_mb_y;
1633             int h       = s->mb_height;
1634             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1635             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1636
1637             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1638         }
1639
1640         s->pict_type = s->new_picture.f.pict_type;
1641         //emms_c();
1642         ret = frame_start(s);
1643         if (ret < 0)
1644             return ret;
1645 vbv_retry:
1646         if (encode_picture(s, s->picture_number) < 0)
1647             return -1;
1648
1649         avctx->header_bits = s->header_bits;
1650         avctx->mv_bits     = s->mv_bits;
1651         avctx->misc_bits   = s->misc_bits;
1652         avctx->i_tex_bits  = s->i_tex_bits;
1653         avctx->p_tex_bits  = s->p_tex_bits;
1654         avctx->i_count     = s->i_count;
1655         // FIXME f/b_count in avctx
1656         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1657         avctx->skip_count  = s->skip_count;
1658
1659         frame_end(s);
1660
1661         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1662             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1663
1664         if (avctx->rc_buffer_size) {
1665             RateControlContext *rcc = &s->rc_context;
1666             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1667
1668             if (put_bits_count(&s->pb) > max_size &&
1669                 s->lambda < s->avctx->lmax) {
1670                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1671                                        (s->qscale + 1) / s->qscale);
1672                 if (s->adaptive_quant) {
1673                     int i;
1674                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1675                         s->lambda_table[i] =
1676                             FFMAX(s->lambda_table[i] + 1,
1677                                   s->lambda_table[i] * (s->qscale + 1) /
1678                                   s->qscale);
1679                 }
1680                 s->mb_skipped = 0;        // done in frame_start()
1681                 // done in encode_picture() so we must undo it
1682                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1683                     if (s->flipflop_rounding          ||
1684                         s->codec_id == AV_CODEC_ID_H263P ||
1685                         s->codec_id == AV_CODEC_ID_MPEG4)
1686                         s->no_rounding ^= 1;
1687                 }
1688                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1689                     s->time_base       = s->last_time_base;
1690                     s->last_non_b_time = s->time - s->pp_time;
1691                 }
1692                 for (i = 0; i < context_count; i++) {
1693                     PutBitContext *pb = &s->thread_context[i]->pb;
1694                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1695                 }
1696                 goto vbv_retry;
1697             }
1698
1699             assert(s->avctx->rc_max_rate);
1700         }
1701
1702         if (s->flags & CODEC_FLAG_PASS1)
1703             ff_write_pass1_stats(s);
1704
1705         for (i = 0; i < 4; i++) {
1706             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1707             avctx->error[i] += s->current_picture_ptr->f.error[i];
1708         }
1709
1710         if (s->flags & CODEC_FLAG_PASS1)
1711             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1712                    avctx->i_tex_bits + avctx->p_tex_bits ==
1713                        put_bits_count(&s->pb));
1714         flush_put_bits(&s->pb);
1715         s->frame_bits  = put_bits_count(&s->pb);
1716
1717         stuffing_count = ff_vbv_update(s, s->frame_bits);
1718         s->stuffing_bits = 8*stuffing_count;
1719         if (stuffing_count) {
1720             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1721                     stuffing_count + 50) {
1722                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1723                 return -1;
1724             }
1725
1726             switch (s->codec_id) {
1727             case AV_CODEC_ID_MPEG1VIDEO:
1728             case AV_CODEC_ID_MPEG2VIDEO:
1729                 while (stuffing_count--) {
1730                     put_bits(&s->pb, 8, 0);
1731                 }
1732             break;
1733             case AV_CODEC_ID_MPEG4:
1734                 put_bits(&s->pb, 16, 0);
1735                 put_bits(&s->pb, 16, 0x1C3);
1736                 stuffing_count -= 4;
1737                 while (stuffing_count--) {
1738                     put_bits(&s->pb, 8, 0xFF);
1739                 }
1740             break;
1741             default:
1742                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1743             }
1744             flush_put_bits(&s->pb);
1745             s->frame_bits  = put_bits_count(&s->pb);
1746         }
1747
1748         /* update mpeg1/2 vbv_delay for CBR */
1749         if (s->avctx->rc_max_rate                          &&
1750             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1751             s->out_format == FMT_MPEG1                     &&
1752             90000LL * (avctx->rc_buffer_size - 1) <=
1753                 s->avctx->rc_max_rate * 0xFFFFLL) {
1754             int vbv_delay, min_delay;
1755             double inbits  = s->avctx->rc_max_rate *
1756                              av_q2d(s->avctx->time_base);
1757             int    minbits = s->frame_bits - 8 *
1758                              (s->vbv_delay_ptr - s->pb.buf - 1);
1759             double bits    = s->rc_context.buffer_index + minbits - inbits;
1760
1761             if (bits < 0)
1762                 av_log(s->avctx, AV_LOG_ERROR,
1763                        "Internal error, negative bits\n");
1764
1765             assert(s->repeat_first_field == 0);
1766
1767             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1768             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1769                         s->avctx->rc_max_rate;
1770
1771             vbv_delay = FFMAX(vbv_delay, min_delay);
1772
1773             av_assert0(vbv_delay < 0xFFFF);
1774
1775             s->vbv_delay_ptr[0] &= 0xF8;
1776             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1777             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1778             s->vbv_delay_ptr[2] &= 0x07;
1779             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1780             avctx->vbv_delay     = vbv_delay * 300;
1781         }
1782         s->total_bits     += s->frame_bits;
1783         avctx->frame_bits  = s->frame_bits;
1784
1785         pkt->pts = s->current_picture.f.pts;
1786         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1787             if (!s->current_picture.f.coded_picture_number)
1788                 pkt->dts = pkt->pts - s->dts_delta;
1789             else
1790                 pkt->dts = s->reordered_pts;
1791             s->reordered_pts = pkt->pts;
1792         } else
1793             pkt->dts = pkt->pts;
1794         if (s->current_picture.f.key_frame)
1795             pkt->flags |= AV_PKT_FLAG_KEY;
1796         if (s->mb_info)
1797             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1798     } else {
1799         s->frame_bits = 0;
1800     }
1801
1802     /* release non-reference frames */
1803     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1804         if (!s->picture[i].reference)
1805             ff_mpeg_unref_picture(s, &s->picture[i]);
1806     }
1807
1808     assert((s->frame_bits & 7) == 0);
1809
1810     pkt->size = s->frame_bits / 8;
1811     *got_packet = !!pkt->size;
1812     return 0;
1813 }
1814
1815 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1816                                                 int n, int threshold)
1817 {
1818     static const char tab[64] = {
1819         3, 2, 2, 1, 1, 1, 1, 1,
1820         1, 1, 1, 1, 1, 1, 1, 1,
1821         1, 1, 1, 1, 1, 1, 1, 1,
1822         0, 0, 0, 0, 0, 0, 0, 0,
1823         0, 0, 0, 0, 0, 0, 0, 0,
1824         0, 0, 0, 0, 0, 0, 0, 0,
1825         0, 0, 0, 0, 0, 0, 0, 0,
1826         0, 0, 0, 0, 0, 0, 0, 0
1827     };
1828     int score = 0;
1829     int run = 0;
1830     int i;
1831     int16_t *block = s->block[n];
1832     const int last_index = s->block_last_index[n];
1833     int skip_dc;
1834
1835     if (threshold < 0) {
1836         skip_dc = 0;
1837         threshold = -threshold;
1838     } else
1839         skip_dc = 1;
1840
1841     /* Are all we could set to zero already zero? */
1842     if (last_index <= skip_dc - 1)
1843         return;
1844
1845     for (i = 0; i <= last_index; i++) {
1846         const int j = s->intra_scantable.permutated[i];
1847         const int level = FFABS(block[j]);
1848         if (level == 1) {
1849             if (skip_dc && i == 0)
1850                 continue;
1851             score += tab[run];
1852             run = 0;
1853         } else if (level > 1) {
1854             return;
1855         } else {
1856             run++;
1857         }
1858     }
1859     if (score >= threshold)
1860         return;
1861     for (i = skip_dc; i <= last_index; i++) {
1862         const int j = s->intra_scantable.permutated[i];
1863         block[j] = 0;
1864     }
1865     if (block[0])
1866         s->block_last_index[n] = 0;
1867     else
1868         s->block_last_index[n] = -1;
1869 }
1870
1871 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1872                                int last_index)
1873 {
1874     int i;
1875     const int maxlevel = s->max_qcoeff;
1876     const int minlevel = s->min_qcoeff;
1877     int overflow = 0;
1878
1879     if (s->mb_intra) {
1880         i = 1; // skip clipping of intra dc
1881     } else
1882         i = 0;
1883
1884     for (; i <= last_index; i++) {
1885         const int j = s->intra_scantable.permutated[i];
1886         int level = block[j];
1887
1888         if (level > maxlevel) {
1889             level = maxlevel;
1890             overflow++;
1891         } else if (level < minlevel) {
1892             level = minlevel;
1893             overflow++;
1894         }
1895
1896         block[j] = level;
1897     }
1898
1899     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1900         av_log(s->avctx, AV_LOG_INFO,
1901                "warning, clipping %d dct coefficients to %d..%d\n",
1902                overflow, minlevel, maxlevel);
1903 }
1904
1905 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1906 {
1907     int x, y;
1908     // FIXME optimize
1909     for (y = 0; y < 8; y++) {
1910         for (x = 0; x < 8; x++) {
1911             int x2, y2;
1912             int sum = 0;
1913             int sqr = 0;
1914             int count = 0;
1915
1916             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1917                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1918                     int v = ptr[x2 + y2 * stride];
1919                     sum += v;
1920                     sqr += v * v;
1921                     count++;
1922                 }
1923             }
1924             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1925         }
1926     }
1927 }
1928
1929 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1930                                                 int motion_x, int motion_y,
1931                                                 int mb_block_height,
1932                                                 int mb_block_width,
1933                                                 int mb_block_count)
1934 {
1935     int16_t weight[12][64];
1936     int16_t orig[12][64];
1937     const int mb_x = s->mb_x;
1938     const int mb_y = s->mb_y;
1939     int i;
1940     int skip_dct[12];
1941     int dct_offset = s->linesize * 8; // default for progressive frames
1942     int uv_dct_offset = s->uvlinesize * 8;
1943     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1944     ptrdiff_t wrap_y, wrap_c;
1945
1946     for (i = 0; i < mb_block_count; i++)
1947         skip_dct[i] = s->skipdct;
1948
1949     if (s->adaptive_quant) {
1950         const int last_qp = s->qscale;
1951         const int mb_xy = mb_x + mb_y * s->mb_stride;
1952
1953         s->lambda = s->lambda_table[mb_xy];
1954         update_qscale(s);
1955
1956         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1957             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1958             s->dquant = s->qscale - last_qp;
1959
1960             if (s->out_format == FMT_H263) {
1961                 s->dquant = av_clip(s->dquant, -2, 2);
1962
1963                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1964                     if (!s->mb_intra) {
1965                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1966                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1967                                 s->dquant = 0;
1968                         }
1969                         if (s->mv_type == MV_TYPE_8X8)
1970                             s->dquant = 0;
1971                     }
1972                 }
1973             }
1974         }
1975         ff_set_qscale(s, last_qp + s->dquant);
1976     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1977         ff_set_qscale(s, s->qscale + s->dquant);
1978
1979     wrap_y = s->linesize;
1980     wrap_c = s->uvlinesize;
1981     ptr_y  = s->new_picture.f.data[0] +
1982              (mb_y * 16 * wrap_y)              + mb_x * 16;
1983     ptr_cb = s->new_picture.f.data[1] +
1984              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1985     ptr_cr = s->new_picture.f.data[2] +
1986              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1987
1988     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1989         uint8_t *ebuf = s->edge_emu_buffer + 32;
1990         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
1991         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
1992         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1993                                  wrap_y, wrap_y,
1994                                  16, 16, mb_x * 16, mb_y * 16,
1995                                  s->width, s->height);
1996         ptr_y = ebuf;
1997         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1998                                  wrap_c, wrap_c,
1999                                  mb_block_width, mb_block_height,
2000                                  mb_x * mb_block_width, mb_y * mb_block_height,
2001                                  cw, ch);
2002         ptr_cb = ebuf + 18 * wrap_y;
2003         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr,
2004                                  wrap_c, wrap_c,
2005                                  mb_block_width, mb_block_height,
2006                                  mb_x * mb_block_width, mb_y * mb_block_height,
2007                                  cw, ch);
2008         ptr_cr = ebuf + 18 * wrap_y + 16;
2009     }
2010
2011     if (s->mb_intra) {
2012         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2013             int progressive_score, interlaced_score;
2014
2015             s->interlaced_dct = 0;
2016             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2017                                                     NULL, wrap_y, 8) +
2018                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2019                                                     NULL, wrap_y, 8) - 400;
2020
2021             if (progressive_score > 0) {
2022                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2023                                                        NULL, wrap_y * 2, 8) +
2024                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2025                                                        NULL, wrap_y * 2, 8);
2026                 if (progressive_score > interlaced_score) {
2027                     s->interlaced_dct = 1;
2028
2029                     dct_offset = wrap_y;
2030                     uv_dct_offset = wrap_c;
2031                     wrap_y <<= 1;
2032                     if (s->chroma_format == CHROMA_422 ||
2033                         s->chroma_format == CHROMA_444)
2034                         wrap_c <<= 1;
2035                 }
2036             }
2037         }
2038
2039         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2040         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2041         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2042         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2043
2044         if (s->flags & CODEC_FLAG_GRAY) {
2045             skip_dct[4] = 1;
2046             skip_dct[5] = 1;
2047         } else {
2048             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2049             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2050             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2051                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2052                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2053             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2054                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2055                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2056                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2057                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2058                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2059                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2060             }
2061         }
2062     } else {
2063         op_pixels_func (*op_pix)[4];
2064         qpel_mc_func (*op_qpix)[16];
2065         uint8_t *dest_y, *dest_cb, *dest_cr;
2066
2067         dest_y  = s->dest[0];
2068         dest_cb = s->dest[1];
2069         dest_cr = s->dest[2];
2070
2071         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2072             op_pix  = s->hdsp.put_pixels_tab;
2073             op_qpix = s->dsp.put_qpel_pixels_tab;
2074         } else {
2075             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2076             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
2077         }
2078
2079         if (s->mv_dir & MV_DIR_FORWARD) {
2080             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2081                           s->last_picture.f.data,
2082                           op_pix, op_qpix);
2083             op_pix  = s->hdsp.avg_pixels_tab;
2084             op_qpix = s->dsp.avg_qpel_pixels_tab;
2085         }
2086         if (s->mv_dir & MV_DIR_BACKWARD) {
2087             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2088                           s->next_picture.f.data,
2089                           op_pix, op_qpix);
2090         }
2091
2092         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2093             int progressive_score, interlaced_score;
2094
2095             s->interlaced_dct = 0;
2096             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2097                                                     ptr_y,              wrap_y,
2098                                                     8) +
2099                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2100                                                     ptr_y + wrap_y * 8, wrap_y,
2101                                                     8) - 400;
2102
2103             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2104                 progressive_score -= 400;
2105
2106             if (progressive_score > 0) {
2107                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2108                                                        ptr_y,
2109                                                        wrap_y * 2, 8) +
2110                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2111                                                        ptr_y + wrap_y,
2112                                                        wrap_y * 2, 8);
2113
2114                 if (progressive_score > interlaced_score) {
2115                     s->interlaced_dct = 1;
2116
2117                     dct_offset = wrap_y;
2118                     uv_dct_offset = wrap_c;
2119                     wrap_y <<= 1;
2120                     if (s->chroma_format == CHROMA_422)
2121                         wrap_c <<= 1;
2122                 }
2123             }
2124         }
2125
2126         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2127         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2128         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2129                            dest_y + dct_offset, wrap_y);
2130         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2131                            dest_y + dct_offset + 8, wrap_y);
2132
2133         if (s->flags & CODEC_FLAG_GRAY) {
2134             skip_dct[4] = 1;
2135             skip_dct[5] = 1;
2136         } else {
2137             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2138             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2139             if (!s->chroma_y_shift) { /* 422 */
2140                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2141                                    dest_cb + uv_dct_offset, wrap_c);
2142                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2143                                    dest_cr + uv_dct_offset, wrap_c);
2144             }
2145         }
2146         /* pre quantization */
2147         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2148                 2 * s->qscale * s->qscale) {
2149             // FIXME optimize
2150             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2151                               wrap_y, 8) < 20 * s->qscale)
2152                 skip_dct[0] = 1;
2153             if (s->dsp.sad[1](NULL, ptr_y + 8,
2154                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2155                 skip_dct[1] = 1;
2156             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2157                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2158                 skip_dct[2] = 1;
2159             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2160                               dest_y + dct_offset + 8,
2161                               wrap_y, 8) < 20 * s->qscale)
2162                 skip_dct[3] = 1;
2163             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2164                               wrap_c, 8) < 20 * s->qscale)
2165                 skip_dct[4] = 1;
2166             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2167                               wrap_c, 8) < 20 * s->qscale)
2168                 skip_dct[5] = 1;
2169             if (!s->chroma_y_shift) { /* 422 */
2170                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2171                                   dest_cb + uv_dct_offset,
2172                                   wrap_c, 8) < 20 * s->qscale)
2173                     skip_dct[6] = 1;
2174                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2175                                   dest_cr + uv_dct_offset,
2176                                   wrap_c, 8) < 20 * s->qscale)
2177                     skip_dct[7] = 1;
2178             }
2179         }
2180     }
2181
2182     if (s->quantizer_noise_shaping) {
2183         if (!skip_dct[0])
2184             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2185         if (!skip_dct[1])
2186             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2187         if (!skip_dct[2])
2188             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2189         if (!skip_dct[3])
2190             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2191         if (!skip_dct[4])
2192             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2193         if (!skip_dct[5])
2194             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2195         if (!s->chroma_y_shift) { /* 422 */
2196             if (!skip_dct[6])
2197                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2198                                   wrap_c);
2199             if (!skip_dct[7])
2200                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2201                                   wrap_c);
2202         }
2203         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2204     }
2205
2206     /* DCT & quantize */
2207     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2208     {
2209         for (i = 0; i < mb_block_count; i++) {
2210             if (!skip_dct[i]) {
2211                 int overflow;
2212                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2213                 // FIXME we could decide to change to quantizer instead of
2214                 // clipping
2215                 // JS: I don't think that would be a good idea it could lower
2216                 //     quality instead of improve it. Just INTRADC clipping
2217                 //     deserves changes in quantizer
2218                 if (overflow)
2219                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2220             } else
2221                 s->block_last_index[i] = -1;
2222         }
2223         if (s->quantizer_noise_shaping) {
2224             for (i = 0; i < mb_block_count; i++) {
2225                 if (!skip_dct[i]) {
2226                     s->block_last_index[i] =
2227                         dct_quantize_refine(s, s->block[i], weight[i],
2228                                             orig[i], i, s->qscale);
2229                 }
2230             }
2231         }
2232
2233         if (s->luma_elim_threshold && !s->mb_intra)
2234             for (i = 0; i < 4; i++)
2235                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2236         if (s->chroma_elim_threshold && !s->mb_intra)
2237             for (i = 4; i < mb_block_count; i++)
2238                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2239
2240         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2241             for (i = 0; i < mb_block_count; i++) {
2242                 if (s->block_last_index[i] == -1)
2243                     s->coded_score[i] = INT_MAX / 256;
2244             }
2245         }
2246     }
2247
2248     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2249         s->block_last_index[4] =
2250         s->block_last_index[5] = 0;
2251         s->block[4][0] =
2252         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2253         if (!s->chroma_y_shift) { /* 422 / 444 */
2254             for (i=6; i<12; i++) {
2255                 s->block_last_index[i] = 0;
2256                 s->block[i][0] = s->block[4][0];
2257             }
2258         }
2259     }
2260
2261     // non c quantize code returns incorrect block_last_index FIXME
2262     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2263         for (i = 0; i < mb_block_count; i++) {
2264             int j;
2265             if (s->block_last_index[i] > 0) {
2266                 for (j = 63; j > 0; j--) {
2267                     if (s->block[i][s->intra_scantable.permutated[j]])
2268                         break;
2269                 }
2270                 s->block_last_index[i] = j;
2271             }
2272         }
2273     }
2274
2275     /* huffman encode */
2276     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2277     case AV_CODEC_ID_MPEG1VIDEO:
2278     case AV_CODEC_ID_MPEG2VIDEO:
2279         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2280             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2281         break;
2282     case AV_CODEC_ID_MPEG4:
2283         if (CONFIG_MPEG4_ENCODER)
2284             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2285         break;
2286     case AV_CODEC_ID_MSMPEG4V2:
2287     case AV_CODEC_ID_MSMPEG4V3:
2288     case AV_CODEC_ID_WMV1:
2289         if (CONFIG_MSMPEG4_ENCODER)
2290             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2291         break;
2292     case AV_CODEC_ID_WMV2:
2293         if (CONFIG_WMV2_ENCODER)
2294             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2295         break;
2296     case AV_CODEC_ID_H261:
2297         if (CONFIG_H261_ENCODER)
2298             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2299         break;
2300     case AV_CODEC_ID_H263:
2301     case AV_CODEC_ID_H263P:
2302     case AV_CODEC_ID_FLV1:
2303     case AV_CODEC_ID_RV10:
2304     case AV_CODEC_ID_RV20:
2305         if (CONFIG_H263_ENCODER)
2306             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2307         break;
2308     case AV_CODEC_ID_MJPEG:
2309     case AV_CODEC_ID_AMV:
2310         if (CONFIG_MJPEG_ENCODER)
2311             ff_mjpeg_encode_mb(s, s->block);
2312         break;
2313     default:
2314         av_assert1(0);
2315     }
2316 }
2317
2318 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2319 {
2320     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2321     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2322     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2323 }
2324
2325 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2326     int i;
2327
2328     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2329
2330     /* mpeg1 */
2331     d->mb_skip_run= s->mb_skip_run;
2332     for(i=0; i<3; i++)
2333         d->last_dc[i] = s->last_dc[i];
2334
2335     /* statistics */
2336     d->mv_bits= s->mv_bits;
2337     d->i_tex_bits= s->i_tex_bits;
2338     d->p_tex_bits= s->p_tex_bits;
2339     d->i_count= s->i_count;
2340     d->f_count= s->f_count;
2341     d->b_count= s->b_count;
2342     d->skip_count= s->skip_count;
2343     d->misc_bits= s->misc_bits;
2344     d->last_bits= 0;
2345
2346     d->mb_skipped= 0;
2347     d->qscale= s->qscale;
2348     d->dquant= s->dquant;
2349
2350     d->esc3_level_length= s->esc3_level_length;
2351 }
2352
2353 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2354     int i;
2355
2356     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2357     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2358
2359     /* mpeg1 */
2360     d->mb_skip_run= s->mb_skip_run;
2361     for(i=0; i<3; i++)
2362         d->last_dc[i] = s->last_dc[i];
2363
2364     /* statistics */
2365     d->mv_bits= s->mv_bits;
2366     d->i_tex_bits= s->i_tex_bits;
2367     d->p_tex_bits= s->p_tex_bits;
2368     d->i_count= s->i_count;
2369     d->f_count= s->f_count;
2370     d->b_count= s->b_count;
2371     d->skip_count= s->skip_count;
2372     d->misc_bits= s->misc_bits;
2373
2374     d->mb_intra= s->mb_intra;
2375     d->mb_skipped= s->mb_skipped;
2376     d->mv_type= s->mv_type;
2377     d->mv_dir= s->mv_dir;
2378     d->pb= s->pb;
2379     if(s->data_partitioning){
2380         d->pb2= s->pb2;
2381         d->tex_pb= s->tex_pb;
2382     }
2383     d->block= s->block;
2384     for(i=0; i<8; i++)
2385         d->block_last_index[i]= s->block_last_index[i];
2386     d->interlaced_dct= s->interlaced_dct;
2387     d->qscale= s->qscale;
2388
2389     d->esc3_level_length= s->esc3_level_length;
2390 }
2391
2392 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2393                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2394                            int *dmin, int *next_block, int motion_x, int motion_y)
2395 {
2396     int score;
2397     uint8_t *dest_backup[3];
2398
2399     copy_context_before_encode(s, backup, type);
2400
2401     s->block= s->blocks[*next_block];
2402     s->pb= pb[*next_block];
2403     if(s->data_partitioning){
2404         s->pb2   = pb2   [*next_block];
2405         s->tex_pb= tex_pb[*next_block];
2406     }
2407
2408     if(*next_block){
2409         memcpy(dest_backup, s->dest, sizeof(s->dest));
2410         s->dest[0] = s->rd_scratchpad;
2411         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2412         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2413         assert(s->linesize >= 32); //FIXME
2414     }
2415
2416     encode_mb(s, motion_x, motion_y);
2417
2418     score= put_bits_count(&s->pb);
2419     if(s->data_partitioning){
2420         score+= put_bits_count(&s->pb2);
2421         score+= put_bits_count(&s->tex_pb);
2422     }
2423
2424     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2425         ff_MPV_decode_mb(s, s->block);
2426
2427         score *= s->lambda2;
2428         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2429     }
2430
2431     if(*next_block){
2432         memcpy(s->dest, dest_backup, sizeof(s->dest));
2433     }
2434
2435     if(score<*dmin){
2436         *dmin= score;
2437         *next_block^=1;
2438
2439         copy_context_after_encode(best, s, type);
2440     }
2441 }
2442
2443 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2444     uint32_t *sq = ff_squareTbl + 256;
2445     int acc=0;
2446     int x,y;
2447
2448     if(w==16 && h==16)
2449         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2450     else if(w==8 && h==8)
2451         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2452
2453     for(y=0; y<h; y++){
2454         for(x=0; x<w; x++){
2455             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2456         }
2457     }
2458
2459     av_assert2(acc>=0);
2460
2461     return acc;
2462 }
2463
2464 static int sse_mb(MpegEncContext *s){
2465     int w= 16;
2466     int h= 16;
2467
2468     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2469     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2470
2471     if(w==16 && h==16)
2472       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2473         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2474                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2475                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2476       }else{
2477         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2478                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2479                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2480       }
2481     else
2482         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2483                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2484                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2485 }
2486
2487 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2488     MpegEncContext *s= *(void**)arg;
2489
2490
2491     s->me.pre_pass=1;
2492     s->me.dia_size= s->avctx->pre_dia_size;
2493     s->first_slice_line=1;
2494     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2495         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2496             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2497         }
2498         s->first_slice_line=0;
2499     }
2500
2501     s->me.pre_pass=0;
2502
2503     return 0;
2504 }
2505
2506 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2507     MpegEncContext *s= *(void**)arg;
2508
2509     ff_check_alignment();
2510
2511     s->me.dia_size= s->avctx->dia_size;
2512     s->first_slice_line=1;
2513     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2514         s->mb_x=0; //for block init below
2515         ff_init_block_index(s);
2516         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2517             s->block_index[0]+=2;
2518             s->block_index[1]+=2;
2519             s->block_index[2]+=2;
2520             s->block_index[3]+=2;
2521
2522             /* compute motion vector & mb_type and store in context */
2523             if(s->pict_type==AV_PICTURE_TYPE_B)
2524                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2525             else
2526                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2527         }
2528         s->first_slice_line=0;
2529     }
2530     return 0;
2531 }
2532
2533 static int mb_var_thread(AVCodecContext *c, void *arg){
2534     MpegEncContext *s= *(void**)arg;
2535     int mb_x, mb_y;
2536
2537     ff_check_alignment();
2538
2539     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2540         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2541             int xx = mb_x * 16;
2542             int yy = mb_y * 16;
2543             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2544             int varc;
2545             int sum = s->dsp.pix_sum(pix, s->linesize);
2546
2547             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2548
2549             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2550             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2551             s->me.mb_var_sum_temp    += varc;
2552         }
2553     }
2554     return 0;
2555 }
2556
2557 static void write_slice_end(MpegEncContext *s){
2558     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2559         if(s->partitioned_frame){
2560             ff_mpeg4_merge_partitions(s);
2561         }
2562
2563         ff_mpeg4_stuffing(&s->pb);
2564     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2565         ff_mjpeg_encode_stuffing(s);
2566     }
2567
2568     avpriv_align_put_bits(&s->pb);
2569     flush_put_bits(&s->pb);
2570
2571     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2572         s->misc_bits+= get_bits_diff(s);
2573 }
2574
2575 static void write_mb_info(MpegEncContext *s)
2576 {
2577     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2578     int offset = put_bits_count(&s->pb);
2579     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2580     int gobn = s->mb_y / s->gob_index;
2581     int pred_x, pred_y;
2582     if (CONFIG_H263_ENCODER)
2583         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2584     bytestream_put_le32(&ptr, offset);
2585     bytestream_put_byte(&ptr, s->qscale);
2586     bytestream_put_byte(&ptr, gobn);
2587     bytestream_put_le16(&ptr, mba);
2588     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2589     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2590     /* 4MV not implemented */
2591     bytestream_put_byte(&ptr, 0); /* hmv2 */
2592     bytestream_put_byte(&ptr, 0); /* vmv2 */
2593 }
2594
2595 static void update_mb_info(MpegEncContext *s, int startcode)
2596 {
2597     if (!s->mb_info)
2598         return;
2599     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2600         s->mb_info_size += 12;
2601         s->prev_mb_info = s->last_mb_info;
2602     }
2603     if (startcode) {
2604         s->prev_mb_info = put_bits_count(&s->pb)/8;
2605         /* This might have incremented mb_info_size above, and we return without
2606          * actually writing any info into that slot yet. But in that case,
2607          * this will be called again at the start of the after writing the
2608          * start code, actually writing the mb info. */
2609         return;
2610     }
2611
2612     s->last_mb_info = put_bits_count(&s->pb)/8;
2613     if (!s->mb_info_size)
2614         s->mb_info_size += 12;
2615     write_mb_info(s);
2616 }
2617
2618 static int encode_thread(AVCodecContext *c, void *arg){
2619     MpegEncContext *s= *(void**)arg;
2620     int mb_x, mb_y, pdif = 0;
2621     int chr_h= 16>>s->chroma_y_shift;
2622     int i, j;
2623     MpegEncContext best_s, backup_s;
2624     uint8_t bit_buf[2][MAX_MB_BYTES];
2625     uint8_t bit_buf2[2][MAX_MB_BYTES];
2626     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2627     PutBitContext pb[2], pb2[2], tex_pb[2];
2628
2629     ff_check_alignment();
2630
2631     for(i=0; i<2; i++){
2632         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2633         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2634         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2635     }
2636
2637     s->last_bits= put_bits_count(&s->pb);
2638     s->mv_bits=0;
2639     s->misc_bits=0;
2640     s->i_tex_bits=0;
2641     s->p_tex_bits=0;
2642     s->i_count=0;
2643     s->f_count=0;
2644     s->b_count=0;
2645     s->skip_count=0;
2646
2647     for(i=0; i<3; i++){
2648         /* init last dc values */
2649         /* note: quant matrix value (8) is implied here */
2650         s->last_dc[i] = 128 << s->intra_dc_precision;
2651
2652         s->current_picture.f.error[i] = 0;
2653     }
2654     if(s->codec_id==AV_CODEC_ID_AMV){
2655         s->last_dc[0] = 128*8/13;
2656         s->last_dc[1] = 128*8/14;
2657         s->last_dc[2] = 128*8/14;
2658     }
2659     s->mb_skip_run = 0;
2660     memset(s->last_mv, 0, sizeof(s->last_mv));
2661
2662     s->last_mv_dir = 0;
2663
2664     switch(s->codec_id){
2665     case AV_CODEC_ID_H263:
2666     case AV_CODEC_ID_H263P:
2667     case AV_CODEC_ID_FLV1:
2668         if (CONFIG_H263_ENCODER)
2669             s->gob_index = ff_h263_get_gob_height(s);
2670         break;
2671     case AV_CODEC_ID_MPEG4:
2672         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2673             ff_mpeg4_init_partitions(s);
2674         break;
2675     }
2676
2677     s->resync_mb_x=0;
2678     s->resync_mb_y=0;
2679     s->first_slice_line = 1;
2680     s->ptr_lastgob = s->pb.buf;
2681     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2682         s->mb_x=0;
2683         s->mb_y= mb_y;
2684
2685         ff_set_qscale(s, s->qscale);
2686         ff_init_block_index(s);
2687
2688         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2689             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2690             int mb_type= s->mb_type[xy];
2691 //            int d;
2692             int dmin= INT_MAX;
2693             int dir;
2694
2695             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2696                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2697                 return -1;
2698             }
2699             if(s->data_partitioning){
2700                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2701                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2702                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2703                     return -1;
2704                 }
2705             }
2706
2707             s->mb_x = mb_x;
2708             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2709             ff_update_block_index(s);
2710
2711             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2712                 ff_h261_reorder_mb_index(s);
2713                 xy= s->mb_y*s->mb_stride + s->mb_x;
2714                 mb_type= s->mb_type[xy];
2715             }
2716
2717             /* write gob / video packet header  */
2718             if(s->rtp_mode){
2719                 int current_packet_size, is_gob_start;
2720
2721                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2722
2723                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2724
2725                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2726
2727                 switch(s->codec_id){
2728                 case AV_CODEC_ID_H263:
2729                 case AV_CODEC_ID_H263P:
2730                     if(!s->h263_slice_structured)
2731                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2732                     break;
2733                 case AV_CODEC_ID_MPEG2VIDEO:
2734                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2735                 case AV_CODEC_ID_MPEG1VIDEO:
2736                     if(s->mb_skip_run) is_gob_start=0;
2737                     break;
2738                 case AV_CODEC_ID_MJPEG:
2739                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2740                     break;
2741                 }
2742
2743                 if(is_gob_start){
2744                     if(s->start_mb_y != mb_y || mb_x!=0){
2745                         write_slice_end(s);
2746
2747                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2748                             ff_mpeg4_init_partitions(s);
2749                         }
2750                     }
2751
2752                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2753                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2754
2755                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2756                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2757                         int d = 100 / s->error_rate;
2758                         if(r % d == 0){
2759                             current_packet_size=0;
2760                             s->pb.buf_ptr= s->ptr_lastgob;
2761                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2762                         }
2763                     }
2764
2765                     if (s->avctx->rtp_callback){
2766                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2767                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2768                     }
2769                     update_mb_info(s, 1);
2770
2771                     switch(s->codec_id){
2772                     case AV_CODEC_ID_MPEG4:
2773                         if (CONFIG_MPEG4_ENCODER) {
2774                             ff_mpeg4_encode_video_packet_header(s);
2775                             ff_mpeg4_clean_buffers(s);
2776                         }
2777                     break;
2778                     case AV_CODEC_ID_MPEG1VIDEO:
2779                     case AV_CODEC_ID_MPEG2VIDEO:
2780                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2781                             ff_mpeg1_encode_slice_header(s);
2782                             ff_mpeg1_clean_buffers(s);
2783                         }
2784                     break;
2785                     case AV_CODEC_ID_H263:
2786                     case AV_CODEC_ID_H263P:
2787                         if (CONFIG_H263_ENCODER)
2788                             ff_h263_encode_gob_header(s, mb_y);
2789                     break;
2790                     }
2791
2792                     if(s->flags&CODEC_FLAG_PASS1){
2793                         int bits= put_bits_count(&s->pb);
2794                         s->misc_bits+= bits - s->last_bits;
2795                         s->last_bits= bits;
2796                     }
2797
2798                     s->ptr_lastgob += current_packet_size;
2799                     s->first_slice_line=1;
2800                     s->resync_mb_x=mb_x;
2801                     s->resync_mb_y=mb_y;
2802                 }
2803             }
2804
2805             if(  (s->resync_mb_x   == s->mb_x)
2806                && s->resync_mb_y+1 == s->mb_y){
2807                 s->first_slice_line=0;
2808             }
2809
2810             s->mb_skipped=0;
2811             s->dquant=0; //only for QP_RD
2812
2813             update_mb_info(s, 0);
2814
2815             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2816                 int next_block=0;
2817                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2818
2819                 copy_context_before_encode(&backup_s, s, -1);
2820                 backup_s.pb= s->pb;
2821                 best_s.data_partitioning= s->data_partitioning;
2822                 best_s.partitioned_frame= s->partitioned_frame;
2823                 if(s->data_partitioning){
2824                     backup_s.pb2= s->pb2;
2825                     backup_s.tex_pb= s->tex_pb;
2826                 }
2827
2828                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2829                     s->mv_dir = MV_DIR_FORWARD;
2830                     s->mv_type = MV_TYPE_16X16;
2831                     s->mb_intra= 0;
2832                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2833                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2834                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2835                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2836                 }
2837                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2838                     s->mv_dir = MV_DIR_FORWARD;
2839                     s->mv_type = MV_TYPE_FIELD;
2840                     s->mb_intra= 0;
2841                     for(i=0; i<2; i++){
2842                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2843                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2844                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2845                     }
2846                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2847                                  &dmin, &next_block, 0, 0);
2848                 }
2849                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2850                     s->mv_dir = MV_DIR_FORWARD;
2851                     s->mv_type = MV_TYPE_16X16;
2852                     s->mb_intra= 0;
2853                     s->mv[0][0][0] = 0;
2854                     s->mv[0][0][1] = 0;
2855                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2856                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2857                 }
2858                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2859                     s->mv_dir = MV_DIR_FORWARD;
2860                     s->mv_type = MV_TYPE_8X8;
2861                     s->mb_intra= 0;
2862                     for(i=0; i<4; i++){
2863                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2864                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2865                     }
2866                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2867                                  &dmin, &next_block, 0, 0);
2868                 }
2869                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2870                     s->mv_dir = MV_DIR_FORWARD;
2871                     s->mv_type = MV_TYPE_16X16;
2872                     s->mb_intra= 0;
2873                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2874                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2875                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2876                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2877                 }
2878                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2879                     s->mv_dir = MV_DIR_BACKWARD;
2880                     s->mv_type = MV_TYPE_16X16;
2881                     s->mb_intra= 0;
2882                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2883                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2884                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2885                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2886                 }
2887                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2888                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2889                     s->mv_type = MV_TYPE_16X16;
2890                     s->mb_intra= 0;
2891                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2892                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2893                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2894                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2895                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2896                                  &dmin, &next_block, 0, 0);
2897                 }
2898                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2899                     s->mv_dir = MV_DIR_FORWARD;
2900                     s->mv_type = MV_TYPE_FIELD;
2901                     s->mb_intra= 0;
2902                     for(i=0; i<2; i++){
2903                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2904                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2905                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2906                     }
2907                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2908                                  &dmin, &next_block, 0, 0);
2909                 }
2910                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2911                     s->mv_dir = MV_DIR_BACKWARD;
2912                     s->mv_type = MV_TYPE_FIELD;
2913                     s->mb_intra= 0;
2914                     for(i=0; i<2; i++){
2915                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2916                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2917                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2918                     }
2919                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2920                                  &dmin, &next_block, 0, 0);
2921                 }
2922                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2923                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2924                     s->mv_type = MV_TYPE_FIELD;
2925                     s->mb_intra= 0;
2926                     for(dir=0; dir<2; dir++){
2927                         for(i=0; i<2; i++){
2928                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2929                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2930                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2931                         }
2932                     }
2933                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2934                                  &dmin, &next_block, 0, 0);
2935                 }
2936                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2937                     s->mv_dir = 0;
2938                     s->mv_type = MV_TYPE_16X16;
2939                     s->mb_intra= 1;
2940                     s->mv[0][0][0] = 0;
2941                     s->mv[0][0][1] = 0;
2942                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2943                                  &dmin, &next_block, 0, 0);
2944                     if(s->h263_pred || s->h263_aic){
2945                         if(best_s.mb_intra)
2946                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2947                         else
2948                             ff_clean_intra_table_entries(s); //old mode?
2949                     }
2950                 }
2951
2952                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2953                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2954                         const int last_qp= backup_s.qscale;
2955                         int qpi, qp, dc[6];
2956                         int16_t ac[6][16];
2957                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2958                         static const int dquant_tab[4]={-1,1,-2,2};
2959                         int storecoefs = s->mb_intra && s->dc_val[0];
2960
2961                         av_assert2(backup_s.dquant == 0);
2962
2963                         //FIXME intra
2964                         s->mv_dir= best_s.mv_dir;
2965                         s->mv_type = MV_TYPE_16X16;
2966                         s->mb_intra= best_s.mb_intra;
2967                         s->mv[0][0][0] = best_s.mv[0][0][0];
2968                         s->mv[0][0][1] = best_s.mv[0][0][1];
2969                         s->mv[1][0][0] = best_s.mv[1][0][0];
2970                         s->mv[1][0][1] = best_s.mv[1][0][1];
2971
2972                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2973                         for(; qpi<4; qpi++){
2974                             int dquant= dquant_tab[qpi];
2975                             qp= last_qp + dquant;
2976                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2977                                 continue;
2978                             backup_s.dquant= dquant;
2979                             if(storecoefs){
2980                                 for(i=0; i<6; i++){
2981                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2982                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2983                                 }
2984                             }
2985
2986                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2987                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2988                             if(best_s.qscale != qp){
2989                                 if(storecoefs){
2990                                     for(i=0; i<6; i++){
2991                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2992                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2993                                     }
2994                                 }
2995                             }
2996                         }
2997                     }
2998                 }
2999                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3000                     int mx= s->b_direct_mv_table[xy][0];
3001                     int my= s->b_direct_mv_table[xy][1];
3002
3003                     backup_s.dquant = 0;
3004                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3005                     s->mb_intra= 0;
3006                     ff_mpeg4_set_direct_mv(s, mx, my);
3007                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3008                                  &dmin, &next_block, mx, my);
3009                 }
3010                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3011                     backup_s.dquant = 0;
3012                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3013                     s->mb_intra= 0;
3014                     ff_mpeg4_set_direct_mv(s, 0, 0);
3015                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3016                                  &dmin, &next_block, 0, 0);
3017                 }
3018                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3019                     int coded=0;
3020                     for(i=0; i<6; i++)
3021                         coded |= s->block_last_index[i];
3022                     if(coded){
3023                         int mx,my;
3024                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3025                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3026                             mx=my=0; //FIXME find the one we actually used
3027                             ff_mpeg4_set_direct_mv(s, mx, my);
3028                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3029                             mx= s->mv[1][0][0];
3030                             my= s->mv[1][0][1];
3031                         }else{
3032                             mx= s->mv[0][0][0];
3033                             my= s->mv[0][0][1];
3034                         }
3035
3036                         s->mv_dir= best_s.mv_dir;
3037                         s->mv_type = best_s.mv_type;
3038                         s->mb_intra= 0;
3039 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3040                         s->mv[0][0][1] = best_s.mv[0][0][1];
3041                         s->mv[1][0][0] = best_s.mv[1][0][0];
3042                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3043                         backup_s.dquant= 0;
3044                         s->skipdct=1;
3045                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3046                                         &dmin, &next_block, mx, my);
3047                         s->skipdct=0;
3048                     }
3049                 }
3050
3051                 s->current_picture.qscale_table[xy] = best_s.qscale;
3052
3053                 copy_context_after_encode(s, &best_s, -1);
3054
3055                 pb_bits_count= put_bits_count(&s->pb);
3056                 flush_put_bits(&s->pb);
3057                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3058                 s->pb= backup_s.pb;
3059
3060                 if(s->data_partitioning){
3061                     pb2_bits_count= put_bits_count(&s->pb2);
3062                     flush_put_bits(&s->pb2);
3063                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3064                     s->pb2= backup_s.pb2;
3065
3066                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3067                     flush_put_bits(&s->tex_pb);
3068                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3069                     s->tex_pb= backup_s.tex_pb;
3070                 }
3071                 s->last_bits= put_bits_count(&s->pb);
3072
3073                 if (CONFIG_H263_ENCODER &&
3074                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3075                     ff_h263_update_motion_val(s);
3076
3077                 if(next_block==0){ //FIXME 16 vs linesize16
3078                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3079                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3080                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3081                 }
3082
3083                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3084                     ff_MPV_decode_mb(s, s->block);
3085             } else {
3086                 int motion_x = 0, motion_y = 0;
3087                 s->mv_type=MV_TYPE_16X16;
3088                 // only one MB-Type possible
3089
3090                 switch(mb_type){
3091                 case CANDIDATE_MB_TYPE_INTRA:
3092                     s->mv_dir = 0;
3093                     s->mb_intra= 1;
3094                     motion_x= s->mv[0][0][0] = 0;
3095                     motion_y= s->mv[0][0][1] = 0;
3096                     break;
3097                 case CANDIDATE_MB_TYPE_INTER:
3098                     s->mv_dir = MV_DIR_FORWARD;
3099                     s->mb_intra= 0;
3100                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3101                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3102                     break;
3103                 case CANDIDATE_MB_TYPE_INTER_I:
3104                     s->mv_dir = MV_DIR_FORWARD;
3105                     s->mv_type = MV_TYPE_FIELD;
3106                     s->mb_intra= 0;
3107                     for(i=0; i<2; i++){
3108                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3109                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3110                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3111                     }
3112                     break;
3113                 case CANDIDATE_MB_TYPE_INTER4V:
3114                     s->mv_dir = MV_DIR_FORWARD;
3115                     s->mv_type = MV_TYPE_8X8;
3116                     s->mb_intra= 0;
3117                     for(i=0; i<4; i++){
3118                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3119                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3120                     }
3121                     break;
3122                 case CANDIDATE_MB_TYPE_DIRECT:
3123                     if (CONFIG_MPEG4_ENCODER) {
3124                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3125                         s->mb_intra= 0;
3126                         motion_x=s->b_direct_mv_table[xy][0];
3127                         motion_y=s->b_direct_mv_table[xy][1];
3128                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3129                     }
3130                     break;
3131                 case CANDIDATE_MB_TYPE_DIRECT0:
3132                     if (CONFIG_MPEG4_ENCODER) {
3133                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3134                         s->mb_intra= 0;
3135                         ff_mpeg4_set_direct_mv(s, 0, 0);
3136                     }
3137                     break;
3138                 case CANDIDATE_MB_TYPE_BIDIR:
3139                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3140                     s->mb_intra= 0;
3141                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3142                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3143                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3144                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3145                     break;
3146                 case CANDIDATE_MB_TYPE_BACKWARD:
3147                     s->mv_dir = MV_DIR_BACKWARD;
3148                     s->mb_intra= 0;
3149                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3150                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3151                     break;
3152                 case CANDIDATE_MB_TYPE_FORWARD:
3153                     s->mv_dir = MV_DIR_FORWARD;
3154                     s->mb_intra= 0;
3155                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3156                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3157                     break;
3158                 case CANDIDATE_MB_TYPE_FORWARD_I:
3159                     s->mv_dir = MV_DIR_FORWARD;
3160                     s->mv_type = MV_TYPE_FIELD;
3161                     s->mb_intra= 0;
3162                     for(i=0; i<2; i++){
3163                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3164                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3165                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3166                     }
3167                     break;
3168                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3169                     s->mv_dir = MV_DIR_BACKWARD;
3170                     s->mv_type = MV_TYPE_FIELD;
3171                     s->mb_intra= 0;
3172                     for(i=0; i<2; i++){
3173                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3174                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3175                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3176                     }
3177                     break;
3178                 case CANDIDATE_MB_TYPE_BIDIR_I:
3179                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3180                     s->mv_type = MV_TYPE_FIELD;
3181                     s->mb_intra= 0;
3182                     for(dir=0; dir<2; dir++){
3183                         for(i=0; i<2; i++){
3184                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3185                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3186                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3187                         }
3188                     }
3189                     break;
3190                 default:
3191                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3192                 }
3193
3194                 encode_mb(s, motion_x, motion_y);
3195
3196                 // RAL: Update last macroblock type
3197                 s->last_mv_dir = s->mv_dir;
3198
3199                 if (CONFIG_H263_ENCODER &&
3200                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3201                     ff_h263_update_motion_val(s);
3202
3203                 ff_MPV_decode_mb(s, s->block);
3204             }
3205
3206             /* clean the MV table in IPS frames for direct mode in B frames */
3207             if(s->mb_intra /* && I,P,S_TYPE */){
3208                 s->p_mv_table[xy][0]=0;
3209                 s->p_mv_table[xy][1]=0;
3210             }
3211
3212             if(s->flags&CODEC_FLAG_PSNR){
3213                 int w= 16;
3214                 int h= 16;
3215
3216                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3217                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3218
3219                 s->current_picture.f.error[0] += sse(
3220                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3221                     s->dest[0], w, h, s->linesize);
3222                 s->current_picture.f.error[1] += sse(
3223                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3224                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3225                 s->current_picture.f.error[2] += sse(
3226                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3227                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3228             }
3229             if(s->loop_filter){
3230                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3231                     ff_h263_loop_filter(s);
3232             }
3233             av_dlog(s->avctx, "MB %d %d bits\n",
3234                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3235         }
3236     }
3237
3238     //not beautiful here but we must write it before flushing so it has to be here
3239     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3240         ff_msmpeg4_encode_ext_header(s);
3241
3242     write_slice_end(s);
3243
3244     /* Send the last GOB if RTP */
3245     if (s->avctx->rtp_callback) {
3246         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3247         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3248         /* Call the RTP callback to send the last GOB */
3249         emms_c();
3250         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3251     }
3252
3253     return 0;
3254 }
3255
3256 #define MERGE(field) dst->field += src->field; src->field=0
3257 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3258     MERGE(me.scene_change_score);
3259     MERGE(me.mc_mb_var_sum_temp);
3260     MERGE(me.mb_var_sum_temp);
3261 }
3262
3263 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3264     int i;
3265
3266     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3267     MERGE(dct_count[1]);
3268     MERGE(mv_bits);
3269     MERGE(i_tex_bits);
3270     MERGE(p_tex_bits);
3271     MERGE(i_count);
3272     MERGE(f_count);
3273     MERGE(b_count);
3274     MERGE(skip_count);
3275     MERGE(misc_bits);
3276     MERGE(er.error_count);
3277     MERGE(padding_bug_score);
3278     MERGE(current_picture.f.error[0]);
3279     MERGE(current_picture.f.error[1]);
3280     MERGE(current_picture.f.error[2]);
3281
3282     if(dst->avctx->noise_reduction){
3283         for(i=0; i<64; i++){
3284             MERGE(dct_error_sum[0][i]);
3285             MERGE(dct_error_sum[1][i]);
3286         }
3287     }
3288
3289     assert(put_bits_count(&src->pb) % 8 ==0);
3290     assert(put_bits_count(&dst->pb) % 8 ==0);
3291     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3292     flush_put_bits(&dst->pb);
3293 }
3294
3295 static int estimate_qp(MpegEncContext *s, int dry_run){
3296     if (s->next_lambda){
3297         s->current_picture_ptr->f.quality =
3298         s->current_picture.f.quality = s->next_lambda;
3299         if(!dry_run) s->next_lambda= 0;
3300     } else if (!s->fixed_qscale) {
3301         s->current_picture_ptr->f.quality =
3302         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3303         if (s->current_picture.f.quality < 0)
3304             return -1;
3305     }
3306
3307     if(s->adaptive_quant){
3308         switch(s->codec_id){
3309         case AV_CODEC_ID_MPEG4:
3310             if (CONFIG_MPEG4_ENCODER)
3311                 ff_clean_mpeg4_qscales(s);
3312             break;
3313         case AV_CODEC_ID_H263:
3314         case AV_CODEC_ID_H263P:
3315         case AV_CODEC_ID_FLV1:
3316             if (CONFIG_H263_ENCODER)
3317                 ff_clean_h263_qscales(s);
3318             break;
3319         default:
3320             ff_init_qscale_tab(s);
3321         }
3322
3323         s->lambda= s->lambda_table[0];
3324         //FIXME broken
3325     }else
3326         s->lambda = s->current_picture.f.quality;
3327     update_qscale(s);
3328     return 0;
3329 }
3330
3331 /* must be called before writing the header */
3332 static void set_frame_distances(MpegEncContext * s){
3333     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3334     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3335
3336     if(s->pict_type==AV_PICTURE_TYPE_B){
3337         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3338         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3339     }else{
3340         s->pp_time= s->time - s->last_non_b_time;
3341         s->last_non_b_time= s->time;
3342         assert(s->picture_number==0 || s->pp_time > 0);
3343     }
3344 }
3345
3346 static int encode_picture(MpegEncContext *s, int picture_number)
3347 {
3348     int i, ret;
3349     int bits;
3350     int context_count = s->slice_context_count;
3351
3352     s->picture_number = picture_number;
3353
3354     /* Reset the average MB variance */
3355     s->me.mb_var_sum_temp    =
3356     s->me.mc_mb_var_sum_temp = 0;
3357
3358     /* we need to initialize some time vars before we can encode b-frames */
3359     // RAL: Condition added for MPEG1VIDEO
3360     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3361         set_frame_distances(s);
3362     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3363         ff_set_mpeg4_time(s);
3364
3365     s->me.scene_change_score=0;
3366
3367 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3368
3369     if(s->pict_type==AV_PICTURE_TYPE_I){
3370         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3371         else                        s->no_rounding=0;
3372     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3373         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3374             s->no_rounding ^= 1;
3375     }
3376
3377     if(s->flags & CODEC_FLAG_PASS2){
3378         if (estimate_qp(s,1) < 0)
3379             return -1;
3380         ff_get_2pass_fcode(s);
3381     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3382         if(s->pict_type==AV_PICTURE_TYPE_B)
3383             s->lambda= s->last_lambda_for[s->pict_type];
3384         else
3385             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3386         update_qscale(s);
3387     }
3388
3389     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3390         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3391         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3392         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3393         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3394     }
3395
3396     s->mb_intra=0; //for the rate distortion & bit compare functions
3397     for(i=1; i<context_count; i++){
3398         ret = ff_update_duplicate_context(s->thread_context[i], s);
3399         if (ret < 0)
3400             return ret;
3401     }
3402
3403     if(ff_init_me(s)<0)
3404         return -1;
3405
3406     /* Estimate motion for every MB */
3407     if(s->pict_type != AV_PICTURE_TYPE_I){
3408         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3409         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3410         if (s->pict_type != AV_PICTURE_TYPE_B) {
3411             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3412                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3413             }
3414         }
3415
3416         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3417     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3418         /* I-Frame */
3419         for(i=0; i<s->mb_stride*s->mb_height; i++)
3420             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3421
3422         if(!s->fixed_qscale){
3423             /* finding spatial complexity for I-frame rate control */
3424             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3425         }
3426     }
3427     for(i=1; i<context_count; i++){
3428         merge_context_after_me(s, s->thread_context[i]);
3429     }
3430     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3431     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3432     emms_c();
3433
3434     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3435         s->pict_type= AV_PICTURE_TYPE_I;
3436         for(i=0; i<s->mb_stride*s->mb_height; i++)
3437             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3438         if(s->msmpeg4_version >= 3)
3439             s->no_rounding=1;
3440         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3441                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3442     }
3443
3444     if(!s->umvplus){
3445         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3446             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3447
3448             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3449                 int a,b;
3450                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3451                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3452                 s->f_code= FFMAX3(s->f_code, a, b);
3453             }
3454
3455             ff_fix_long_p_mvs(s);
3456             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3457             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3458                 int j;
3459                 for(i=0; i<2; i++){
3460                     for(j=0; j<2; j++)
3461                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3462                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3463                 }
3464             }
3465         }
3466
3467         if(s->pict_type==AV_PICTURE_TYPE_B){
3468             int a, b;
3469
3470             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3471             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3472             s->f_code = FFMAX(a, b);
3473
3474             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3475             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3476             s->b_code = FFMAX(a, b);
3477
3478             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3479             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3480             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3481             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3482             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3483                 int dir, j;
3484                 for(dir=0; dir<2; dir++){
3485                     for(i=0; i<2; i++){
3486                         for(j=0; j<2; j++){
3487                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3488                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3489                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3490                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3491                         }
3492                     }
3493                 }
3494             }
3495         }
3496     }
3497
3498     if (estimate_qp(s, 0) < 0)
3499         return -1;
3500
3501     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3502         s->qscale= 3; //reduce clipping problems
3503
3504     if (s->out_format == FMT_MJPEG) {
3505         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3506         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3507
3508         if (s->avctx->intra_matrix) {
3509             chroma_matrix =
3510             luma_matrix = s->avctx->intra_matrix;
3511         }
3512
3513         /* for mjpeg, we do include qscale in the matrix */
3514         for(i=1;i<64;i++){
3515             int j= s->dsp.idct_permutation[i];
3516
3517             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3518             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3519         }
3520         s->y_dc_scale_table=
3521         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3522         s->chroma_intra_matrix[0] =
3523         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3524         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3525                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3526         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3527                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3528         s->qscale= 8;
3529     }
3530     if(s->codec_id == AV_CODEC_ID_AMV){
3531         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3532         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3533         for(i=1;i<64;i++){
3534             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3535
3536             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3537             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3538         }
3539         s->y_dc_scale_table= y;
3540         s->c_dc_scale_table= c;
3541         s->intra_matrix[0] = 13;
3542         s->chroma_intra_matrix[0] = 14;
3543         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3544                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3545         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3546                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3547         s->qscale= 8;
3548     }
3549
3550     //FIXME var duplication
3551     s->current_picture_ptr->f.key_frame =
3552     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3553     s->current_picture_ptr->f.pict_type =
3554     s->current_picture.f.pict_type = s->pict_type;
3555
3556     if (s->current_picture.f.key_frame)
3557         s->picture_in_gop_number=0;
3558
3559     s->mb_x = s->mb_y = 0;
3560     s->last_bits= put_bits_count(&s->pb);
3561     switch(s->out_format) {
3562     case FMT_MJPEG:
3563         if (CONFIG_MJPEG_ENCODER)
3564             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3565                                            s->intra_matrix, s->chroma_intra_matrix);
3566         break;
3567     case FMT_H261:
3568         if (CONFIG_H261_ENCODER)
3569             ff_h261_encode_picture_header(s, picture_number);
3570         break;
3571     case FMT_H263:
3572         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3573             ff_wmv2_encode_picture_header(s, picture_number);
3574         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3575             ff_msmpeg4_encode_picture_header(s, picture_number);
3576         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3577             ff_mpeg4_encode_picture_header(s, picture_number);
3578         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3579             ff_rv10_encode_picture_header(s, picture_number);
3580         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3581             ff_rv20_encode_picture_header(s, picture_number);
3582         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3583             ff_flv_encode_picture_header(s, picture_number);
3584         else if (CONFIG_H263_ENCODER)
3585             ff_h263_encode_picture_header(s, picture_number);
3586         break;
3587     case FMT_MPEG1:
3588         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3589             ff_mpeg1_encode_picture_header(s, picture_number);
3590         break;
3591     default:
3592         av_assert0(0);
3593     }
3594     bits= put_bits_count(&s->pb);
3595     s->header_bits= bits - s->last_bits;
3596
3597     for(i=1; i<context_count; i++){
3598         update_duplicate_context_after_me(s->thread_context[i], s);
3599     }
3600     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3601     for(i=1; i<context_count; i++){
3602         merge_context_after_encode(s, s->thread_context[i]);
3603     }
3604     emms_c();
3605     return 0;
3606 }
3607
3608 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3609     const int intra= s->mb_intra;
3610     int i;
3611
3612     s->dct_count[intra]++;
3613
3614     for(i=0; i<64; i++){
3615         int level= block[i];
3616
3617         if(level){
3618             if(level>0){
3619                 s->dct_error_sum[intra][i] += level;
3620                 level -= s->dct_offset[intra][i];
3621                 if(level<0) level=0;
3622             }else{
3623                 s->dct_error_sum[intra][i] -= level;
3624                 level += s->dct_offset[intra][i];
3625                 if(level>0) level=0;
3626             }
3627             block[i]= level;
3628         }
3629     }
3630 }
3631
3632 static int dct_quantize_trellis_c(MpegEncContext *s,
3633                                   int16_t *block, int n,
3634                                   int qscale, int *overflow){
3635     const int *qmat;
3636     const uint8_t *scantable= s->intra_scantable.scantable;
3637     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3638     int max=0;
3639     unsigned int threshold1, threshold2;
3640     int bias=0;
3641     int run_tab[65];
3642     int level_tab[65];
3643     int score_tab[65];
3644     int survivor[65];
3645     int survivor_count;
3646     int last_run=0;
3647     int last_level=0;
3648     int last_score= 0;
3649     int last_i;
3650     int coeff[2][64];
3651     int coeff_count[64];
3652     int qmul, qadd, start_i, last_non_zero, i, dc;
3653     const int esc_length= s->ac_esc_length;
3654     uint8_t * length;
3655     uint8_t * last_length;
3656     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3657
3658     s->dsp.fdct (block);
3659
3660     if(s->dct_error_sum)
3661         s->denoise_dct(s, block);
3662     qmul= qscale*16;
3663     qadd= ((qscale-1)|1)*8;
3664
3665     if (s->mb_intra) {
3666         int q;
3667         if (!s->h263_aic) {
3668             if (n < 4)
3669                 q = s->y_dc_scale;
3670             else
3671                 q = s->c_dc_scale;
3672             q = q << 3;
3673         } else{
3674             /* For AIC we skip quant/dequant of INTRADC */
3675             q = 1 << 3;
3676             qadd=0;
3677         }
3678
3679         /* note: block[0] is assumed to be positive */
3680         block[0] = (block[0] + (q >> 1)) / q;
3681         start_i = 1;
3682         last_non_zero = 0;
3683         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3684         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3685             bias= 1<<(QMAT_SHIFT-1);
3686         length     = s->intra_ac_vlc_length;
3687         last_length= s->intra_ac_vlc_last_length;
3688     } else {
3689         start_i = 0;
3690         last_non_zero = -1;
3691         qmat = s->q_inter_matrix[qscale];
3692         length     = s->inter_ac_vlc_length;
3693         last_length= s->inter_ac_vlc_last_length;
3694     }
3695     last_i= start_i;
3696
3697     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3698     threshold2= (threshold1<<1);
3699
3700     for(i=63; i>=start_i; i--) {
3701         const int j = scantable[i];
3702         int level = block[j] * qmat[j];
3703
3704         if(((unsigned)(level+threshold1))>threshold2){
3705             last_non_zero = i;
3706             break;
3707         }
3708     }
3709
3710     for(i=start_i; i<=last_non_zero; i++) {
3711         const int j = scantable[i];
3712         int level = block[j] * qmat[j];
3713
3714 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3715 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3716         if(((unsigned)(level+threshold1))>threshold2){
3717             if(level>0){
3718                 level= (bias + level)>>QMAT_SHIFT;
3719                 coeff[0][i]= level;
3720                 coeff[1][i]= level-1;
3721 //                coeff[2][k]= level-2;
3722             }else{
3723                 level= (bias - level)>>QMAT_SHIFT;
3724                 coeff[0][i]= -level;
3725                 coeff[1][i]= -level+1;
3726 //                coeff[2][k]= -level+2;
3727             }
3728             coeff_count[i]= FFMIN(level, 2);
3729             av_assert2(coeff_count[i]);
3730             max |=level;
3731         }else{
3732             coeff[0][i]= (level>>31)|1;
3733             coeff_count[i]= 1;
3734         }
3735     }
3736
3737     *overflow= s->max_qcoeff < max; //overflow might have happened
3738
3739     if(last_non_zero < start_i){
3740         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3741         return last_non_zero;
3742     }
3743
3744     score_tab[start_i]= 0;
3745     survivor[0]= start_i;
3746     survivor_count= 1;
3747
3748     for(i=start_i; i<=last_non_zero; i++){
3749         int level_index, j, zero_distortion;
3750         int dct_coeff= FFABS(block[ scantable[i] ]);
3751         int best_score=256*256*256*120;
3752
3753         if (s->dsp.fdct == ff_fdct_ifast)
3754             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3755         zero_distortion= dct_coeff*dct_coeff;
3756
3757         for(level_index=0; level_index < coeff_count[i]; level_index++){
3758             int distortion;
3759             int level= coeff[level_index][i];
3760             const int alevel= FFABS(level);
3761             int unquant_coeff;
3762
3763             av_assert2(level);
3764
3765             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3766                 unquant_coeff= alevel*qmul + qadd;
3767             }else{ //MPEG1
3768                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3769                 if(s->mb_intra){
3770                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3771                         unquant_coeff =   (unquant_coeff - 1) | 1;
3772                 }else{
3773                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3774                         unquant_coeff =   (unquant_coeff - 1) | 1;
3775                 }
3776                 unquant_coeff<<= 3;
3777             }
3778
3779             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3780             level+=64;
3781             if((level&(~127)) == 0){
3782                 for(j=survivor_count-1; j>=0; j--){
3783                     int run= i - survivor[j];
3784                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3785                     score += score_tab[i-run];
3786
3787                     if(score < best_score){
3788                         best_score= score;
3789                         run_tab[i+1]= run;
3790                         level_tab[i+1]= level-64;
3791                     }
3792                 }
3793
3794                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3795                     for(j=survivor_count-1; j>=0; j--){
3796                         int run= i - survivor[j];
3797                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3798                         score += score_tab[i-run];
3799                         if(score < last_score){
3800                             last_score= score;
3801                             last_run= run;
3802                             last_level= level-64;
3803                             last_i= i+1;
3804                         }
3805                     }
3806                 }
3807             }else{
3808                 distortion += esc_length*lambda;
3809                 for(j=survivor_count-1; j>=0; j--){
3810                     int run= i - survivor[j];
3811                     int score= distortion + score_tab[i-run];
3812
3813                     if(score < best_score){
3814                         best_score= score;
3815                         run_tab[i+1]= run;
3816                         level_tab[i+1]= level-64;
3817                     }
3818                 }
3819
3820                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3821                   for(j=survivor_count-1; j>=0; j--){
3822                         int run= i - survivor[j];
3823                         int score= distortion + score_tab[i-run];
3824                         if(score < last_score){
3825                             last_score= score;
3826                             last_run= run;
3827                             last_level= level-64;
3828                             last_i= i+1;
3829                         }
3830                     }
3831                 }
3832             }
3833         }
3834
3835         score_tab[i+1]= best_score;
3836
3837         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3838         if(last_non_zero <= 27){
3839             for(; survivor_count; survivor_count--){
3840                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3841                     break;
3842             }
3843         }else{
3844             for(; survivor_count; survivor_count--){
3845                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3846                     break;
3847             }
3848         }
3849
3850         survivor[ survivor_count++ ]= i+1;
3851     }
3852
3853     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3854         last_score= 256*256*256*120;
3855         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3856             int score= score_tab[i];
3857             if(i) score += lambda*2; //FIXME exacter?
3858
3859             if(score < last_score){
3860                 last_score= score;
3861                 last_i= i;
3862                 last_level= level_tab[i];
3863                 last_run= run_tab[i];
3864             }
3865         }
3866     }
3867
3868     s->coded_score[n] = last_score;
3869
3870     dc= FFABS(block[0]);
3871     last_non_zero= last_i - 1;
3872     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3873
3874     if(last_non_zero < start_i)
3875         return last_non_zero;
3876
3877     if(last_non_zero == 0 && start_i == 0){
3878         int best_level= 0;
3879         int best_score= dc * dc;
3880
3881         for(i=0; i<coeff_count[0]; i++){
3882             int level= coeff[i][0];
3883             int alevel= FFABS(level);
3884             int unquant_coeff, score, distortion;
3885
3886             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3887                     unquant_coeff= (alevel*qmul + qadd)>>3;
3888             }else{ //MPEG1
3889                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3890                     unquant_coeff =   (unquant_coeff - 1) | 1;
3891             }
3892             unquant_coeff = (unquant_coeff + 4) >> 3;
3893             unquant_coeff<<= 3 + 3;
3894
3895             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3896             level+=64;
3897             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3898             else                    score= distortion + esc_length*lambda;
3899
3900             if(score < best_score){
3901                 best_score= score;
3902                 best_level= level - 64;
3903             }
3904         }
3905         block[0]= best_level;
3906         s->coded_score[n] = best_score - dc*dc;
3907         if(best_level == 0) return -1;
3908         else                return last_non_zero;
3909     }
3910
3911     i= last_i;
3912     av_assert2(last_level);
3913
3914     block[ perm_scantable[last_non_zero] ]= last_level;
3915     i -= last_run + 1;
3916
3917     for(; i>start_i; i -= run_tab[i] + 1){
3918         block[ perm_scantable[i-1] ]= level_tab[i];
3919     }
3920
3921     return last_non_zero;
3922 }
3923
3924 //#define REFINE_STATS 1
3925 static int16_t basis[64][64];
3926
3927 static void build_basis(uint8_t *perm){
3928     int i, j, x, y;
3929     emms_c();
3930     for(i=0; i<8; i++){
3931         for(j=0; j<8; j++){
3932             for(y=0; y<8; y++){
3933                 for(x=0; x<8; x++){
3934                     double s= 0.25*(1<<BASIS_SHIFT);
3935                     int index= 8*i + j;
3936                     int perm_index= perm[index];
3937                     if(i==0) s*= sqrt(0.5);
3938                     if(j==0) s*= sqrt(0.5);
3939                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3940                 }
3941             }
3942         }
3943     }
3944 }
3945
3946 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3947                         int16_t *block, int16_t *weight, int16_t *orig,
3948                         int n, int qscale){
3949     int16_t rem[64];
3950     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3951     const uint8_t *scantable= s->intra_scantable.scantable;
3952     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3953 //    unsigned int threshold1, threshold2;
3954 //    int bias=0;
3955     int run_tab[65];
3956     int prev_run=0;
3957     int prev_level=0;
3958     int qmul, qadd, start_i, last_non_zero, i, dc;
3959     uint8_t * length;
3960     uint8_t * last_length;
3961     int lambda;
3962     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3963 #ifdef REFINE_STATS
3964 static int count=0;
3965 static int after_last=0;
3966 static int to_zero=0;
3967 static int from_zero=0;
3968 static int raise=0;
3969 static int lower=0;
3970 static int messed_sign=0;
3971 #endif
3972
3973     if(basis[0][0] == 0)
3974         build_basis(s->dsp.idct_permutation);
3975
3976     qmul= qscale*2;
3977     qadd= (qscale-1)|1;
3978     if (s->mb_intra) {
3979         if (!s->h263_aic) {
3980             if (n < 4)
3981                 q = s->y_dc_scale;
3982             else
3983                 q = s->c_dc_scale;
3984         } else{
3985             /* For AIC we skip quant/dequant of INTRADC */
3986             q = 1;
3987             qadd=0;
3988         }
3989         q <<= RECON_SHIFT-3;
3990         /* note: block[0] is assumed to be positive */
3991         dc= block[0]*q;
3992 //        block[0] = (block[0] + (q >> 1)) / q;
3993         start_i = 1;
3994 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3995 //            bias= 1<<(QMAT_SHIFT-1);
3996         length     = s->intra_ac_vlc_length;
3997         last_length= s->intra_ac_vlc_last_length;
3998     } else {
3999         dc= 0;
4000         start_i = 0;
4001         length     = s->inter_ac_vlc_length;
4002         last_length= s->inter_ac_vlc_last_length;
4003     }
4004     last_non_zero = s->block_last_index[n];
4005
4006 #ifdef REFINE_STATS
4007 {START_TIMER
4008 #endif
4009     dc += (1<<(RECON_SHIFT-1));
4010     for(i=0; i<64; i++){
4011         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4012     }
4013 #ifdef REFINE_STATS
4014 STOP_TIMER("memset rem[]")}
4015 #endif
4016     sum=0;
4017     for(i=0; i<64; i++){
4018         int one= 36;
4019         int qns=4;
4020         int w;
4021
4022         w= FFABS(weight[i]) + qns*one;
4023         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4024
4025         weight[i] = w;
4026 //        w=weight[i] = (63*qns + (w/2)) / w;
4027
4028         av_assert2(w>0);
4029         av_assert2(w<(1<<6));
4030         sum += w*w;
4031     }
4032     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4033 #ifdef REFINE_STATS
4034 {START_TIMER
4035 #endif
4036     run=0;
4037     rle_index=0;
4038     for(i=start_i; i<=last_non_zero; i++){
4039         int j= perm_scantable[i];
4040         const int level= block[j];
4041         int coeff;
4042
4043         if(level){
4044             if(level<0) coeff= qmul*level - qadd;
4045             else        coeff= qmul*level + qadd;
4046             run_tab[rle_index++]=run;
4047             run=0;
4048
4049             s->dsp.add_8x8basis(rem, basis[j], coeff);
4050         }else{
4051             run++;
4052         }
4053     }
4054 #ifdef REFINE_STATS
4055 if(last_non_zero>0){
4056 STOP_TIMER("init rem[]")
4057 }
4058 }
4059
4060 {START_TIMER
4061 #endif
4062     for(;;){
4063         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4064         int best_coeff=0;
4065         int best_change=0;
4066         int run2, best_unquant_change=0, analyze_gradient;
4067 #ifdef REFINE_STATS
4068 {START_TIMER
4069 #endif
4070         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4071
4072         if(analyze_gradient){
4073 #ifdef REFINE_STATS
4074 {START_TIMER
4075 #endif
4076             for(i=0; i<64; i++){
4077                 int w= weight[i];
4078
4079                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4080             }
4081 #ifdef REFINE_STATS
4082 STOP_TIMER("rem*w*w")}
4083 {START_TIMER
4084 #endif
4085             s->dsp.fdct(d1);
4086 #ifdef REFINE_STATS
4087 STOP_TIMER("dct")}
4088 #endif
4089         }
4090
4091         if(start_i){
4092             const int level= block[0];
4093             int change, old_coeff;
4094
4095             av_assert2(s->mb_intra);
4096
4097             old_coeff= q*level;
4098
4099             for(change=-1; change<=1; change+=2){
4100                 int new_level= level + change;
4101                 int score, new_coeff;
4102
4103                 new_coeff= q*new_level;
4104                 if(new_coeff >= 2048 || new_coeff < 0)
4105                     continue;
4106
4107                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4108                 if(score<best_score){
4109                     best_score= score;
4110                     best_coeff= 0;
4111                     best_change= change;
4112                     best_unquant_change= new_coeff - old_coeff;
4113                 }
4114             }
4115         }
4116
4117         run=0;
4118         rle_index=0;
4119         run2= run_tab[rle_index++];
4120         prev_level=0;
4121         prev_run=0;
4122
4123         for(i=start_i; i<64; i++){
4124             int j= perm_scantable[i];
4125             const int level= block[j];
4126             int change, old_coeff;
4127
4128             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4129                 break;
4130
4131             if(level){
4132                 if(level<0) old_coeff= qmul*level - qadd;
4133                 else        old_coeff= qmul*level + qadd;
4134                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4135             }else{
4136                 old_coeff=0;
4137                 run2--;
4138                 av_assert2(run2>=0 || i >= last_non_zero );
4139             }
4140
4141             for(change=-1; change<=1; change+=2){
4142                 int new_level= level + change;
4143                 int score, new_coeff, unquant_change;
4144
4145                 score=0;
4146                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4147                    continue;
4148
4149                 if(new_level){
4150                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4151                     else            new_coeff= qmul*new_level + qadd;
4152                     if(new_coeff >= 2048 || new_coeff <= -2048)
4153                         continue;
4154                     //FIXME check for overflow
4155
4156                     if(level){
4157                         if(level < 63 && level > -63){
4158                             if(i < last_non_zero)
4159                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4160                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4161                             else
4162                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4163                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4164                         }
4165                     }else{
4166                         av_assert2(FFABS(new_level)==1);
4167
4168                         if(analyze_gradient){
4169                             int g= d1[ scantable[i] ];
4170                             if(g && (g^new_level) >= 0)
4171                                 continue;
4172                         }
4173
4174                         if(i < last_non_zero){
4175                             int next_i= i + run2 + 1;
4176                             int next_level= block[ perm_scantable[next_i] ] + 64;
4177
4178                             if(next_level&(~127))
4179                                 next_level= 0;
4180
4181                             if(next_i < last_non_zero)
4182                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4183                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4184                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4185                             else
4186                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4187                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4188                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4189                         }else{
4190                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4191                             if(prev_level){
4192                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4193                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4194                             }
4195                         }
4196                     }
4197                 }else{
4198                     new_coeff=0;
4199                     av_assert2(FFABS(level)==1);
4200
4201                     if(i < last_non_zero){
4202                         int next_i= i + run2 + 1;
4203                         int next_level= block[ perm_scantable[next_i] ] + 64;
4204
4205                         if(next_level&(~127))
4206                             next_level= 0;
4207
4208                         if(next_i < last_non_zero)
4209                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4210                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4211                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4212                         else
4213                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4214                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4215                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4216                     }else{
4217                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4218                         if(prev_level){
4219                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4220                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4221                         }
4222                     }
4223                 }
4224
4225                 score *= lambda;
4226
4227                 unquant_change= new_coeff - old_coeff;
4228                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4229
4230                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4231                 if(score<best_score){
4232                     best_score= score;
4233                     best_coeff= i;
4234                     best_change= change;
4235                     best_unquant_change= unquant_change;
4236                 }
4237             }
4238             if(level){
4239                 prev_level= level + 64;
4240                 if(prev_level&(~127))
4241                     prev_level= 0;
4242                 prev_run= run;
4243                 run=0;
4244             }else{
4245                 run++;
4246             }
4247         }
4248 #ifdef REFINE_STATS
4249 STOP_TIMER("iterative step")}
4250 #endif
4251
4252         if(best_change){
4253             int j= perm_scantable[ best_coeff ];
4254
4255             block[j] += best_change;
4256
4257             if(best_coeff > last_non_zero){
4258                 last_non_zero= best_coeff;
4259                 av_assert2(block[j]);
4260 #ifdef REFINE_STATS
4261 after_last++;
4262 #endif
4263             }else{
4264 #ifdef REFINE_STATS
4265 if(block[j]){
4266     if(block[j] - best_change){
4267         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4268             raise++;
4269         }else{
4270             lower++;
4271         }
4272     }else{
4273         from_zero++;
4274     }
4275 }else{
4276     to_zero++;
4277 }
4278 #endif
4279                 for(; last_non_zero>=start_i; last_non_zero--){
4280                     if(block[perm_scantable[last_non_zero]])
4281                         break;
4282                 }
4283             }
4284 #ifdef REFINE_STATS
4285 count++;
4286 if(256*256*256*64 % count == 0){
4287     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4288 }
4289 #endif
4290             run=0;
4291             rle_index=0;
4292             for(i=start_i; i<=last_non_zero; i++){
4293                 int j= perm_scantable[i];
4294                 const int level= block[j];
4295
4296                  if(level){
4297                      run_tab[rle_index++]=run;
4298                      run=0;
4299                  }else{
4300                      run++;
4301                  }
4302             }
4303
4304             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4305         }else{
4306             break;
4307         }
4308     }
4309 #ifdef REFINE_STATS
4310 if(last_non_zero>0){
4311 STOP_TIMER("iterative search")
4312 }
4313 }
4314 #endif
4315
4316     return last_non_zero;
4317 }
4318
4319 int ff_dct_quantize_c(MpegEncContext *s,
4320                         int16_t *block, int n,
4321                         int qscale, int *overflow)
4322 {
4323     int i, j, level, last_non_zero, q, start_i;
4324     const int *qmat;
4325     const uint8_t *scantable= s->intra_scantable.scantable;
4326     int bias;
4327     int max=0;
4328     unsigned int threshold1, threshold2;
4329
4330     s->dsp.fdct (block);
4331
4332     if(s->dct_error_sum)
4333         s->denoise_dct(s, block);
4334
4335     if (s->mb_intra) {
4336         if (!s->h263_aic) {
4337             if (n < 4)
4338                 q = s->y_dc_scale;
4339             else
4340                 q = s->c_dc_scale;
4341             q = q << 3;
4342         } else
4343             /* For AIC we skip quant/dequant of INTRADC */
4344             q = 1 << 3;
4345
4346         /* note: block[0] is assumed to be positive */
4347         block[0] = (block[0] + (q >> 1)) / q;
4348         start_i = 1;
4349         last_non_zero = 0;
4350         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4351         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4352     } else {
4353         start_i = 0;
4354         last_non_zero = -1;
4355         qmat = s->q_inter_matrix[qscale];
4356         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4357     }
4358     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4359     threshold2= (threshold1<<1);
4360     for(i=63;i>=start_i;i--) {
4361         j = scantable[i];
4362         level = block[j] * qmat[j];
4363
4364         if(((unsigned)(level+threshold1))>threshold2){
4365             last_non_zero = i;
4366             break;
4367         }else{
4368             block[j]=0;
4369         }
4370     }
4371     for(i=start_i; i<=last_non_zero; i++) {
4372         j = scantable[i];
4373         level = block[j] * qmat[j];
4374
4375 //        if(   bias+level >= (1<<QMAT_SHIFT)
4376 //           || bias-level >= (1<<QMAT_SHIFT)){
4377         if(((unsigned)(level+threshold1))>threshold2){
4378             if(level>0){
4379                 level= (bias + level)>>QMAT_SHIFT;
4380                 block[j]= level;
4381             }else{
4382                 level= (bias - level)>>QMAT_SHIFT;
4383                 block[j]= -level;
4384             }
4385             max |=level;
4386         }else{
4387             block[j]=0;
4388         }
4389     }
4390     *overflow= s->max_qcoeff < max; //overflow might have happened
4391
4392     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4393     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4394         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4395
4396     return last_non_zero;
4397 }
4398
4399 #define OFFSET(x) offsetof(MpegEncContext, x)
4400 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4401 static const AVOption h263_options[] = {
4402     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4403     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4404     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4405     FF_MPV_COMMON_OPTS
4406     { NULL },
4407 };
4408
4409 static const AVClass h263_class = {
4410     .class_name = "H.263 encoder",
4411     .item_name  = av_default_item_name,
4412     .option     = h263_options,
4413     .version    = LIBAVUTIL_VERSION_INT,
4414 };
4415
4416 AVCodec ff_h263_encoder = {
4417     .name           = "h263",
4418     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4419     .type           = AVMEDIA_TYPE_VIDEO,
4420     .id             = AV_CODEC_ID_H263,
4421     .priv_data_size = sizeof(MpegEncContext),
4422     .init           = ff_MPV_encode_init,
4423     .encode2        = ff_MPV_encode_picture,
4424     .close          = ff_MPV_encode_end,
4425     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4426     .priv_class     = &h263_class,
4427 };
4428
4429 static const AVOption h263p_options[] = {
4430     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4431     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4432     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4433     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4434     FF_MPV_COMMON_OPTS
4435     { NULL },
4436 };
4437 static const AVClass h263p_class = {
4438     .class_name = "H.263p encoder",
4439     .item_name  = av_default_item_name,
4440     .option     = h263p_options,
4441     .version    = LIBAVUTIL_VERSION_INT,
4442 };
4443
4444 AVCodec ff_h263p_encoder = {
4445     .name           = "h263p",
4446     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4447     .type           = AVMEDIA_TYPE_VIDEO,
4448     .id             = AV_CODEC_ID_H263P,
4449     .priv_data_size = sizeof(MpegEncContext),
4450     .init           = ff_MPV_encode_init,
4451     .encode2        = ff_MPV_encode_picture,
4452     .close          = ff_MPV_encode_end,
4453     .capabilities   = CODEC_CAP_SLICE_THREADS,
4454     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4455     .priv_class     = &h263p_class,
4456 };
4457
4458 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4459
4460 AVCodec ff_msmpeg4v2_encoder = {
4461     .name           = "msmpeg4v2",
4462     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4463     .type           = AVMEDIA_TYPE_VIDEO,
4464     .id             = AV_CODEC_ID_MSMPEG4V2,
4465     .priv_data_size = sizeof(MpegEncContext),
4466     .init           = ff_MPV_encode_init,
4467     .encode2        = ff_MPV_encode_picture,
4468     .close          = ff_MPV_encode_end,
4469     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4470     .priv_class     = &msmpeg4v2_class,
4471 };
4472
4473 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4474
4475 AVCodec ff_msmpeg4v3_encoder = {
4476     .name           = "msmpeg4",
4477     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4478     .type           = AVMEDIA_TYPE_VIDEO,
4479     .id             = AV_CODEC_ID_MSMPEG4V3,
4480     .priv_data_size = sizeof(MpegEncContext),
4481     .init           = ff_MPV_encode_init,
4482     .encode2        = ff_MPV_encode_picture,
4483     .close          = ff_MPV_encode_end,
4484     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4485     .priv_class     = &msmpeg4v3_class,
4486 };
4487
4488 FF_MPV_GENERIC_CLASS(wmv1)
4489
4490 AVCodec ff_wmv1_encoder = {
4491     .name           = "wmv1",
4492     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4493     .type           = AVMEDIA_TYPE_VIDEO,
4494     .id             = AV_CODEC_ID_WMV1,
4495     .priv_data_size = sizeof(MpegEncContext),
4496     .init           = ff_MPV_encode_init,
4497     .encode2        = ff_MPV_encode_picture,
4498     .close          = ff_MPV_encode_end,
4499     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4500     .priv_class     = &wmv1_class,
4501 };