git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /*
  26  * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
  27  */
  28
  29 /**
  30  * @file
  31  * The simplest mpeg encoder (well, it was the simplest!).
  32  */
  33
  34 #include <stdint.h>
  35
  36 #include "libavutil/internal.h"
  37 #include "libavutil/intmath.h"
  38 #include "libavutil/mathematics.h"
  39 #include "libavutil/mem_internal.h"
  40 #include "libavutil/pixdesc.h"
  41 #include "libavutil/opt.h"
  42 #include "libavutil/thread.h"
  43 #include "avcodec.h"
  44 #include "dct.h"
  45 #include "idctdsp.h"
  46 #include "mpeg12.h"
  47 #include "mpegvideo.h"
  48 #include "mpegvideodata.h"
  49 #include "h261.h"
  50 #include "h263.h"
  51 #include "h263data.h"
  52 #include "mjpegenc_common.h"
  53 #include "mathops.h"
  54 #include "mpegutils.h"
  55 #include "mjpegenc.h"
  56 #include "speedhqenc.h"
  57 #include "msmpeg4.h"
  58 #include "pixblockdsp.h"
  59 #include "qpeldsp.h"
  60 #include "faandct.h"
  61 #include "thread.h"
  62 #include "aandcttab.h"
  63 #include "flv.h"
  64 #include "mpeg4video.h"
  65 #include "internal.h"
  66 #include "bytestream.h"
  67 #include "wmv2.h"
  68 #include "rv10.h"
  69 #include "packet_internal.h"
  70 #include <limits.h>
  71 #include "sp5x.h"
  72
  73 #define QUANT_BIAS_SHIFT 8
  74
  75 #define QMAT_SHIFT_MMX 16
  76 #define QMAT_SHIFT 21
  77
  78 static int encode_picture(MpegEncContext *s, int picture_number);
  79 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  80 static int sse_mb(MpegEncContext *s);
  81 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  82 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  83
  84 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_DMV * 2 + 1];
  85 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  86
  87 const AVOption ff_mpv_generic_options[] = {
  88     FF_MPV_COMMON_OPTS
  89 #if FF_API_MPEGVIDEO_OPTS
  90     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
  91     FF_MPV_DEPRECATED_A53_CC_OPT
  92     FF_MPV_DEPRECATED_MATRIX_OPT
  93     FF_MPV_DEPRECATED_BFRAME_OPTS
  94 #endif
  95     { NULL },
  96 };
  97
  98 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  99                        uint16_t (*qmat16)[2][64],
 100                        const uint16_t *quant_matrix,
 101                        int bias, int qmin, int qmax, int intra)
 102 {
 103     FDCTDSPContext *fdsp = &s->fdsp;
 104     int qscale;
 105     int shift = 0;
 106
 107     for (qscale = qmin; qscale <= qmax; qscale++) {
 108         int i;
 109         int qscale2;
 110
 111         if (s->q_scale_type) qscale2 = ff_mpeg2_non_linear_qscale[qscale];
 112         else                 qscale2 = qscale << 1;
 113
 114         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
 115 #if CONFIG_FAANDCT
 116             fdsp->fdct == ff_faandct            ||
 117 #endif /* CONFIG_FAANDCT */
 118             fdsp->fdct == ff_jpeg_fdct_islow_10) {
 119             for (i = 0; i < 64; i++) {
 120                 const int j = s->idsp.idct_permutation[i];
 121                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
 122                 /* 16 <= qscale * quant_matrix[i] <= 7905
 123                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 124                  *             19952 <=              x  <= 249205026
 125                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 126                  *           3444240 >= (1 << 36) / (x) >= 275 */
 127
 128                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
 129             }
 130         } else if (fdsp->fdct == ff_fdct_ifast) {
 131             for (i = 0; i < 64; i++) {
 132                 const int j = s->idsp.idct_permutation[i];
 133                 int64_t den = ff_aanscales[i] * (int64_t) qscale2 * quant_matrix[j];
 134                 /* 16 <= qscale * quant_matrix[i] <= 7905
 135                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 136                  *             19952 <=              x  <= 249205026
 137                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 138                  *           3444240 >= (1 << 36) / (x) >= 275 */
 139
 140                 qmat[qscale][i] = (int)((UINT64_C(2) << (QMAT_SHIFT + 14)) / den);
 141             }
 142         } else {
 143             for (i = 0; i < 64; i++) {
 144                 const int j = s->idsp.idct_permutation[i];
 145                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
 146                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 147                  * Assume x = qscale * quant_matrix[i]
 148                  * So             16 <=              x  <= 7905
 149                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 150                  * so          32768 >= (1 << 19) / (x) >= 67 */
 151                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
 152                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 153                 //                    (qscale * quant_matrix[i]);
 154                 qmat16[qscale][0][i] = (2 << QMAT_SHIFT_MMX) / den;
 155
 156                 if (qmat16[qscale][0][i] == 0 ||
 157                     qmat16[qscale][0][i] == 128 * 256)
 158                     qmat16[qscale][0][i] = 128 * 256 - 1;
 159                 qmat16[qscale][1][i] =
 160                     ROUNDED_DIV(bias * (1<<(16 - QUANT_BIAS_SHIFT)),
 161                                 qmat16[qscale][0][i]);
 162             }
 163         }
 164
 165         for (i = intra; i < 64; i++) {
 166             int64_t max = 8191;
 167             if (fdsp->fdct == ff_fdct_ifast) {
 168                 max = (8191LL * ff_aanscales[i]) >> 14;
 169             }
 170             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 171                 shift++;
 172             }
 173         }
 174     }
 175     if (shift) {
 176         av_log(s->avctx, AV_LOG_INFO,
 177                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 178                QMAT_SHIFT - shift);
 179     }
 180 }
 181
 182 static inline void update_qscale(MpegEncContext *s)
 183 {
 184     if (s->q_scale_type == 1 && 0) {
 185         int i;
 186         int bestdiff=INT_MAX;
 187         int best = 1;
 188
 189         for (i = 0 ; i<FF_ARRAY_ELEMS(ff_mpeg2_non_linear_qscale); i++) {
 190             int diff = FFABS((ff_mpeg2_non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 6)) - (int)s->lambda * 139);
 191             if (ff_mpeg2_non_linear_qscale[i] < s->avctx->qmin ||
 192                 (ff_mpeg2_non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
 193                 continue;
 194             if (diff < bestdiff) {
 195                 bestdiff = diff;
 196                 best = i;
 197             }
 198         }
 199         s->qscale = best;
 200     } else {
 201         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 202                     (FF_LAMBDA_SHIFT + 7);
 203         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
 204     }
 205
 206     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 207                  FF_LAMBDA_SHIFT;
 208 }
 209
 210 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 211 {
 212     int i;
 213
 214     if (matrix) {
 215         put_bits(pb, 1, 1);
 216         for (i = 0; i < 64; i++) {
 217             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 218         }
 219     } else
 220         put_bits(pb, 1, 0);
 221 }
 222
 223 /**
 224  * init s->current_picture.qscale_table from s->lambda_table
 225  */
 226 void ff_init_qscale_tab(MpegEncContext *s)
 227 {
 228     int8_t * const qscale_table = s->current_picture.qscale_table;
 229     int i;
 230
 231     for (i = 0; i < s->mb_num; i++) {
 232         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 233         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 234         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 235                                                   s->avctx->qmax);
 236     }
 237 }
 238
 239 static void update_duplicate_context_after_me(MpegEncContext *dst,
 240                                               MpegEncContext *src)
 241 {
 242 #define COPY(a) dst->a= src->a
 243     COPY(pict_type);
 244     COPY(current_picture);
 245     COPY(f_code);
 246     COPY(b_code);
 247     COPY(qscale);
 248     COPY(lambda);
 249     COPY(lambda2);
 250     COPY(picture_in_gop_number);
 251     COPY(gop_picture_number);
 252     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 253     COPY(progressive_frame);    // FIXME don't set in encode_header
 254     COPY(partitioned_frame);    // FIXME don't set in encode_header
 255 #undef COPY
 256 }
 257
 258 static void mpv_encode_init_static(void)
 259 {
 260    for (int i = -16; i < 16; i++)
 261         default_fcode_tab[i + MAX_MV] = 1;
 262 }
 263
 264 /**
 265  * Set the given MpegEncContext to defaults for encoding.
 266  * the changed fields will not depend upon the prior state of the MpegEncContext.
 267  */
 268 static void mpv_encode_defaults(MpegEncContext *s)
 269 {
 270     static AVOnce init_static_once = AV_ONCE_INIT;
 271
 272     ff_mpv_common_defaults(s);
 273
 274     ff_thread_once(&init_static_once, mpv_encode_init_static);
 275
 276     s->me.mv_penalty = default_mv_penalty;
 277     s->fcode_tab     = default_fcode_tab;
 278
 279     s->input_picture_number  = 0;
 280     s->picture_in_gop_number = 0;
 281 }
 282
 283 av_cold int ff_dct_encode_init(MpegEncContext *s)
 284 {
 285     if (ARCH_X86)
 286         ff_dct_encode_init_x86(s);
 287
 288     if (CONFIG_H263_ENCODER)
 289         ff_h263dsp_init(&s->h263dsp);
 290     if (!s->dct_quantize)
 291         s->dct_quantize = ff_dct_quantize_c;
 292     if (!s->denoise_dct)
 293         s->denoise_dct  = denoise_dct_c;
 294     s->fast_dct_quantize = s->dct_quantize;
 295     if (s->avctx->trellis)
 296         s->dct_quantize  = dct_quantize_trellis_c;
 297
 298     return 0;
 299 }
 300
 301 /* init video encoder */
 302 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 303 {
 304     MpegEncContext *s = avctx->priv_data;
 305     AVCPBProperties *cpb_props;
 306     int i, ret;
 307
 308     mpv_encode_defaults(s);
 309
 310     switch (avctx->pix_fmt) {
 311     case AV_PIX_FMT_YUVJ444P:
 312     case AV_PIX_FMT_YUV444P:
 313         s->chroma_format = CHROMA_444;
 314         break;
 315     case AV_PIX_FMT_YUVJ422P:
 316     case AV_PIX_FMT_YUV422P:
 317         s->chroma_format = CHROMA_422;
 318         break;
 319     case AV_PIX_FMT_YUVJ420P:
 320     case AV_PIX_FMT_YUV420P:
 321     default:
 322         s->chroma_format = CHROMA_420;
 323         break;
 324     }
 325
 326     avctx->bits_per_raw_sample = av_clip(avctx->bits_per_raw_sample, 0, 8);
 327
 328     s->bit_rate = avctx->bit_rate;
 329     s->width    = avctx->width;
 330     s->height   = avctx->height;
 331     if (avctx->gop_size > 600 &&
 332         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 333         av_log(avctx, AV_LOG_WARNING,
 334                "keyframe interval too large!, reducing it from %d to %d\n",
 335                avctx->gop_size, 600);
 336         avctx->gop_size = 600;
 337     }
 338     s->gop_size     = avctx->gop_size;
 339     s->avctx        = avctx;
 340     if (avctx->max_b_frames > MAX_B_FRAMES) {
 341         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 342                "is %d.\n", MAX_B_FRAMES);
 343         avctx->max_b_frames = MAX_B_FRAMES;
 344     }
 345     s->max_b_frames = avctx->max_b_frames;
 346     s->codec_id     = avctx->codec->id;
 347     s->strict_std_compliance = avctx->strict_std_compliance;
 348     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
 349     s->rtp_mode           = !!s->rtp_payload_size;
 350     s->intra_dc_precision = avctx->intra_dc_precision;
 351
 352     // workaround some differences between how applications specify dc precision
 353     if (s->intra_dc_precision < 0) {
 354         s->intra_dc_precision += 8;
 355     } else if (s->intra_dc_precision >= 8)
 356         s->intra_dc_precision -= 8;
 357
 358     if (s->intra_dc_precision < 0) {
 359         av_log(avctx, AV_LOG_ERROR,
 360                 "intra dc precision must be positive, note some applications use"
 361                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 362         return AVERROR(EINVAL);
 363     }
 364
 365     if (avctx->codec_id == AV_CODEC_ID_AMV || (avctx->active_thread_type & FF_THREAD_SLICE))
 366         s->huffman = 0;
 367
 368     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 369         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 370         return AVERROR(EINVAL);
 371     }
 372     s->user_specified_pts = AV_NOPTS_VALUE;
 373
 374     if (s->gop_size <= 1) {
 375         s->intra_only = 1;
 376         s->gop_size   = 12;
 377     } else {
 378         s->intra_only = 0;
 379     }
 380
 381     /* Fixed QSCALE */
 382     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
 383
 384     s->adaptive_quant = (avctx->lumi_masking ||
 385                          avctx->dark_masking ||
 386                          avctx->temporal_cplx_masking ||
 387                          avctx->spatial_cplx_masking  ||
 388                          avctx->p_masking      ||
 389                          s->border_masking ||
 390                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 391                         !s->fixed_qscale;
 392
 393     s->loop_filter = !!(avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
 394
 395     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 396         switch(avctx->codec_id) {
 397         case AV_CODEC_ID_MPEG1VIDEO:
 398         case AV_CODEC_ID_MPEG2VIDEO:
 399             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
 400             break;
 401         case AV_CODEC_ID_MPEG4:
 402         case AV_CODEC_ID_MSMPEG4V1:
 403         case AV_CODEC_ID_MSMPEG4V2:
 404         case AV_CODEC_ID_MSMPEG4V3:
 405             if       (avctx->rc_max_rate >= 15000000) {
 406                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
 407             } else if(avctx->rc_max_rate >=  2000000) {
 408                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
 409             } else if(avctx->rc_max_rate >=   384000) {
 410                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
 411             } else
 412                 avctx->rc_buffer_size = 40;
 413             avctx->rc_buffer_size *= 16384;
 414             break;
 415         }
 416         if (avctx->rc_buffer_size) {
 417             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 418         }
 419     }
 420
 421     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 422         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 423         return AVERROR(EINVAL);
 424     }
 425
 426     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 427         av_log(avctx, AV_LOG_INFO,
 428                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 429     }
 430
 431     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 432         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 433         return AVERROR(EINVAL);
 434     }
 435
 436     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 437         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 438         return AVERROR(EINVAL);
 439     }
 440
 441     if (avctx->rc_max_rate &&
 442         avctx->rc_max_rate == avctx->bit_rate &&
 443         avctx->rc_max_rate != avctx->rc_min_rate) {
 444         av_log(avctx, AV_LOG_INFO,
 445                "impossible bitrate constraints, this will fail\n");
 446     }
 447
 448     if (avctx->rc_buffer_size &&
 449         avctx->bit_rate * (int64_t)avctx->time_base.num >
 450             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 451         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 452         return AVERROR(EINVAL);
 453     }
 454
 455     if (!s->fixed_qscale &&
 456         avctx->bit_rate * av_q2d(avctx->time_base) >
 457             avctx->bit_rate_tolerance) {
 458         av_log(avctx, AV_LOG_WARNING,
 459                "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 460         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 461     }
 462
 463     if (avctx->rc_max_rate &&
 464         avctx->rc_min_rate == avctx->rc_max_rate &&
 465         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 466          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 467         90000LL * (avctx->rc_buffer_size - 1) >
 468             avctx->rc_max_rate * 0xFFFFLL) {
 469         av_log(avctx, AV_LOG_INFO,
 470                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 471                "specified vbv buffer is too large for the given bitrate!\n");
 472     }
 473
 474     if ((avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 475         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 476         s->codec_id != AV_CODEC_ID_FLV1) {
 477         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 478         return AVERROR(EINVAL);
 479     }
 480
 481     if (s->obmc && avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 482         av_log(avctx, AV_LOG_ERROR,
 483                "OBMC is only supported with simple mb decision\n");
 484         return AVERROR(EINVAL);
 485     }
 486
 487     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 488         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 489         return AVERROR(EINVAL);
 490     }
 491
 492     if (s->max_b_frames                    &&
 493         s->codec_id != AV_CODEC_ID_MPEG4      &&
 494         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 495         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 496         av_log(avctx, AV_LOG_ERROR, "B-frames not supported by codec\n");
 497         return AVERROR(EINVAL);
 498     }
 499     if (s->max_b_frames < 0) {
 500         av_log(avctx, AV_LOG_ERROR,
 501                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 502         return AVERROR(EINVAL);
 503     }
 504
 505     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 506          s->codec_id == AV_CODEC_ID_H263  ||
 507          s->codec_id == AV_CODEC_ID_H263P) &&
 508         (avctx->sample_aspect_ratio.num > 255 ||
 509          avctx->sample_aspect_ratio.den > 255)) {
 510         av_log(avctx, AV_LOG_WARNING,
 511                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 512                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 513         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 514                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 515     }
 516
 517     if ((s->codec_id == AV_CODEC_ID_H263  ||
 518          s->codec_id == AV_CODEC_ID_H263P) &&
 519         (avctx->width  > 2048 ||
 520          avctx->height > 1152 )) {
 521         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 522         return AVERROR(EINVAL);
 523     }
 524     if ((s->codec_id == AV_CODEC_ID_H263  ||
 525          s->codec_id == AV_CODEC_ID_H263P ||
 526          s->codec_id == AV_CODEC_ID_RV20) &&
 527         ((avctx->width &3) ||
 528          (avctx->height&3) )) {
 529         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 530         return AVERROR(EINVAL);
 531     }
 532
 533     if (s->codec_id == AV_CODEC_ID_RV10 &&
 534         (avctx->width &15 ||
 535          avctx->height&15 )) {
 536         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 537         return AVERROR(EINVAL);
 538     }
 539
 540     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 541          s->codec_id == AV_CODEC_ID_WMV2) &&
 542          avctx->width & 1) {
 543         av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 544         return AVERROR(EINVAL);
 545     }
 546
 547     if ((avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
 548         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 549         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 550         return AVERROR(EINVAL);
 551     }
 552
 553     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 554         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 555         return AVERROR(EINVAL);
 556     }
 557
 558     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 559         avctx->mb_decision != FF_MB_DECISION_RD) {
 560         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 561         return AVERROR(EINVAL);
 562     }
 563
 564     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 565             (s->codec_id == AV_CODEC_ID_AMV ||
 566              s->codec_id == AV_CODEC_ID_MJPEG)) {
 567         // Used to produce garbage with MJPEG.
 568         av_log(avctx, AV_LOG_ERROR,
 569                "QP RD is no longer compatible with MJPEG or AMV\n");
 570         return AVERROR(EINVAL);
 571     }
 572
 573     if (s->scenechange_threshold < 1000000000 &&
 574         (avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
 575         av_log(avctx, AV_LOG_ERROR,
 576                "closed gop with scene change detection are not supported yet, "
 577                "set threshold to 1000000000\n");
 578         return AVERROR_PATCHWELCOME;
 579     }
 580
 581     if (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
 582         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 583             s->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
 584             av_log(avctx, AV_LOG_ERROR,
 585                    "low delay forcing is only available for mpeg2, "
 586                    "set strict_std_compliance to 'unofficial' or lower in order to allow it\n");
 587             return AVERROR(EINVAL);
 588         }
 589         if (s->max_b_frames != 0) {
 590             av_log(avctx, AV_LOG_ERROR,
 591                    "B-frames cannot be used with low delay\n");
 592             return AVERROR(EINVAL);
 593         }
 594     }
 595
 596     if (s->q_scale_type == 1) {
 597         if (avctx->qmax > 28) {
 598             av_log(avctx, AV_LOG_ERROR,
 599                    "non linear quant only supports qmax <= 28 currently\n");
 600             return AVERROR_PATCHWELCOME;
 601         }
 602     }
 603
 604     if (avctx->slices > 1 &&
 605         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
 606         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
 607         return AVERROR(EINVAL);
 608     }
 609
 610     if (avctx->thread_count > 1         &&
 611         s->codec_id != AV_CODEC_ID_MPEG4      &&
 612         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 613         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 614         s->codec_id != AV_CODEC_ID_MJPEG      &&
 615         (s->codec_id != AV_CODEC_ID_H263P)) {
 616         av_log(avctx, AV_LOG_ERROR,
 617                "multi threaded encoding not supported by codec\n");
 618         return AVERROR_PATCHWELCOME;
 619     }
 620
 621     if (avctx->thread_count < 1) {
 622         av_log(avctx, AV_LOG_ERROR,
 623                "automatic thread number detection not supported by codec, "
 624                "patch welcome\n");
 625         return AVERROR_PATCHWELCOME;
 626     }
 627
 628     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
 629         av_log(avctx, AV_LOG_INFO,
 630                "notice: b_frame_strategy only affects the first pass\n");
 631         s->b_frame_strategy = 0;
 632     }
 633
 634     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 635     if (i > 1) {
 636         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 637         avctx->time_base.den /= i;
 638         avctx->time_base.num /= i;
 639         //return -1;
 640     }
 641
 642     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id == AV_CODEC_ID_AMV || s->codec_id == AV_CODEC_ID_SPEEDHQ) {
 643         // (a + x * 3 / 8) / x
 644         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 645         s->inter_quant_bias = 0;
 646     } else {
 647         s->intra_quant_bias = 0;
 648         // (a - x / 4) / x
 649         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 650     }
 651
 652     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 653         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 654         return AVERROR(EINVAL);
 655     }
 656
 657     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 658
 659     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 660         avctx->time_base.den > (1 << 16) - 1) {
 661         av_log(avctx, AV_LOG_ERROR,
 662                "timebase %d/%d not supported by MPEG 4 standard, "
 663                "the maximum admitted value for the timebase denominator "
 664                "is %d\n", avctx->time_base.num, avctx->time_base.den,
 665                (1 << 16) - 1);
 666         return AVERROR(EINVAL);
 667     }
 668     s->time_increment_bits = av_log2(avctx->time_base.den - 1) + 1;
 669
 670     switch (avctx->codec->id) {
 671     case AV_CODEC_ID_MPEG1VIDEO:
 672         s->out_format = FMT_MPEG1;
 673         s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 674         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 675         break;
 676     case AV_CODEC_ID_MPEG2VIDEO:
 677         s->out_format = FMT_MPEG1;
 678         s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 679         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 680         s->rtp_mode   = 1;
 681         break;
 682 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
 683     case AV_CODEC_ID_MJPEG:
 684     case AV_CODEC_ID_AMV:
 685         s->out_format = FMT_MJPEG;
 686         s->intra_only = 1; /* force intra only for jpeg */
 687         if ((ret = ff_mjpeg_encode_init(s)) < 0)
 688             return ret;
 689         avctx->delay = 0;
 690         s->low_delay = 1;
 691         break;
 692 #endif
 693     case AV_CODEC_ID_SPEEDHQ:
 694         s->out_format = FMT_SPEEDHQ;
 695         s->intra_only = 1; /* force intra only for SHQ */
 696         if (!CONFIG_SPEEDHQ_ENCODER)
 697             return AVERROR_ENCODER_NOT_FOUND;
 698         if ((ret = ff_speedhq_encode_init(s)) < 0)
 699             return ret;
 700         avctx->delay = 0;
 701         s->low_delay = 1;
 702         break;
 703     case AV_CODEC_ID_H261:
 704         if (!CONFIG_H261_ENCODER)
 705             return AVERROR_ENCODER_NOT_FOUND;
 706         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 707             av_log(avctx, AV_LOG_ERROR,
 708                    "The specified picture size of %dx%d is not valid for the "
 709                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 710                     s->width, s->height);
 711             return AVERROR(EINVAL);
 712         }
 713         s->out_format = FMT_H261;
 714         avctx->delay  = 0;
 715         s->low_delay  = 1;
 716         s->rtp_mode   = 0; /* Sliced encoding not supported */
 717         break;
 718     case AV_CODEC_ID_H263:
 719         if (!CONFIG_H263_ENCODER)
 720             return AVERROR_ENCODER_NOT_FOUND;
 721         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 722                              s->width, s->height) == 8) {
 723             av_log(avctx, AV_LOG_ERROR,
 724                    "The specified picture size of %dx%d is not valid for "
 725                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 726                    "352x288, 704x576, and 1408x1152. "
 727                    "Try H.263+.\n", s->width, s->height);
 728             return AVERROR(EINVAL);
 729         }
 730         s->out_format = FMT_H263;
 731         avctx->delay  = 0;
 732         s->low_delay  = 1;
 733         break;
 734     case AV_CODEC_ID_H263P:
 735         s->out_format = FMT_H263;
 736         s->h263_plus  = 1;
 737         /* Fx */
 738         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
 739         s->modified_quant  = s->h263_aic;
 740         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 741         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 742
 743         /* /Fx */
 744         /* These are just to be sure */
 745         avctx->delay = 0;
 746         s->low_delay = 1;
 747         break;
 748     case AV_CODEC_ID_FLV1:
 749         s->out_format      = FMT_H263;
 750         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 751         s->unrestricted_mv = 1;
 752         s->rtp_mode  = 0; /* don't allow GOB */
 753         avctx->delay = 0;
 754         s->low_delay = 1;
 755         break;
 756     case AV_CODEC_ID_RV10:
 757         s->out_format = FMT_H263;
 758         avctx->delay  = 0;
 759         s->low_delay  = 1;
 760         break;
 761     case AV_CODEC_ID_RV20:
 762         s->out_format      = FMT_H263;
 763         avctx->delay       = 0;
 764         s->low_delay       = 1;
 765         s->modified_quant  = 1;
 766         s->h263_aic        = 1;
 767         s->h263_plus       = 1;
 768         s->loop_filter     = 1;
 769         s->unrestricted_mv = 0;
 770         break;
 771     case AV_CODEC_ID_MPEG4:
 772         s->out_format      = FMT_H263;
 773         s->h263_pred       = 1;
 774         s->unrestricted_mv = 1;
 775         s->low_delay       = s->max_b_frames ? 0 : 1;
 776         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 777         break;
 778     case AV_CODEC_ID_MSMPEG4V2:
 779         s->out_format      = FMT_H263;
 780         s->h263_pred       = 1;
 781         s->unrestricted_mv = 1;
 782         s->msmpeg4_version = 2;
 783         avctx->delay       = 0;
 784         s->low_delay       = 1;
 785         break;
 786     case AV_CODEC_ID_MSMPEG4V3:
 787         s->out_format        = FMT_H263;
 788         s->h263_pred         = 1;
 789         s->unrestricted_mv   = 1;
 790         s->msmpeg4_version   = 3;
 791         s->flipflop_rounding = 1;
 792         avctx->delay         = 0;
 793         s->low_delay         = 1;
 794         break;
 795     case AV_CODEC_ID_WMV1:
 796         s->out_format        = FMT_H263;
 797         s->h263_pred         = 1;
 798         s->unrestricted_mv   = 1;
 799         s->msmpeg4_version   = 4;
 800         s->flipflop_rounding = 1;
 801         avctx->delay         = 0;
 802         s->low_delay         = 1;
 803         break;
 804     case AV_CODEC_ID_WMV2:
 805         s->out_format        = FMT_H263;
 806         s->h263_pred         = 1;
 807         s->unrestricted_mv   = 1;
 808         s->msmpeg4_version   = 5;
 809         s->flipflop_rounding = 1;
 810         avctx->delay         = 0;
 811         s->low_delay         = 1;
 812         break;
 813     default:
 814         return AVERROR(EINVAL);
 815     }
 816
 817     avctx->has_b_frames = !s->low_delay;
 818
 819     s->encoding = 1;
 820
 821     s->progressive_frame    =
 822     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
 823                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
 824                                 s->alternate_scan);
 825
 826     /* init */
 827     ff_mpv_idct_init(s);
 828     if ((ret = ff_mpv_common_init(s)) < 0)
 829         return ret;
 830
 831     ff_fdctdsp_init(&s->fdsp, avctx);
 832     ff_me_cmp_init(&s->mecc, avctx);
 833     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 834     ff_pixblockdsp_init(&s->pdsp, avctx);
 835     ff_qpeldsp_init(&s->qdsp);
 836
 837     if (s->msmpeg4_version) {
 838         int ac_stats_size = 2 * 2 * (MAX_LEVEL + 1) *  (MAX_RUN + 1) * 2 * sizeof(int);
 839         if (!(s->ac_stats = av_mallocz(ac_stats_size)))
 840             return AVERROR(ENOMEM);
 841     }
 842
 843     if (!(avctx->stats_out = av_mallocz(256))               ||
 844         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix,          32) ||
 845         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix,   32) ||
 846         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix,          32) ||
 847         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix16,        32) ||
 848         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix16, 32) ||
 849         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix16,        32) ||
 850         !FF_ALLOCZ_TYPED_ARRAY(s->input_picture,           MAX_PICTURE_COUNT) ||
 851         !FF_ALLOCZ_TYPED_ARRAY(s->reordered_input_picture, MAX_PICTURE_COUNT))
 852         return AVERROR(ENOMEM);
 853
 854     if (s->noise_reduction) {
 855         if (!FF_ALLOCZ_TYPED_ARRAY(s->dct_offset, 2))
 856             return AVERROR(ENOMEM);
 857     }
 858
 859     ff_dct_encode_init(s);
 860
 861     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 862         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 863
 864     if (s->slice_context_count > 1) {
 865         s->rtp_mode = 1;
 866
 867         if (avctx->codec_id == AV_CODEC_ID_H263P)
 868             s->h263_slice_structured = 1;
 869     }
 870
 871     s->quant_precision = 5;
 872
 873     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      avctx->ildct_cmp);
 874     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
 875
 876     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 877         ff_h261_encode_init(s);
 878     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 879         ff_h263_encode_init(s);
 880     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 881         ff_msmpeg4_encode_init(s);
 882     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 883         && s->out_format == FMT_MPEG1)
 884         ff_mpeg1_encode_init(s);
 885
 886     /* init q matrix */
 887     for (i = 0; i < 64; i++) {
 888         int j = s->idsp.idct_permutation[i];
 889         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 890             s->mpeg_quant) {
 891             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 892             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 893         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 894             s->intra_matrix[j] =
 895             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 896         } else if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
 897             s->intra_matrix[j] =
 898             s->inter_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 899         } else {
 900             /* MPEG-1/2 */
 901             s->chroma_intra_matrix[j] =
 902             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 903             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 904         }
 905         if (avctx->intra_matrix)
 906             s->intra_matrix[j] = avctx->intra_matrix[i];
 907         if (avctx->inter_matrix)
 908             s->inter_matrix[j] = avctx->inter_matrix[i];
 909     }
 910
 911     /* precompute matrix */
 912     /* for mjpeg, we do include qscale in the matrix */
 913     if (s->out_format != FMT_MJPEG) {
 914         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 915                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 916                           31, 1);
 917         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 918                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 919                           31, 0);
 920     }
 921
 922     if ((ret = ff_rate_control_init(s)) < 0)
 923         return ret;
 924
 925     if (s->b_frame_strategy == 2) {
 926         for (i = 0; i < s->max_b_frames + 2; i++) {
 927             s->tmp_frames[i] = av_frame_alloc();
 928             if (!s->tmp_frames[i])
 929                 return AVERROR(ENOMEM);
 930
 931             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 932             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
 933             s->tmp_frames[i]->height = s->height >> s->brd_scale;
 934
 935             ret = av_frame_get_buffer(s->tmp_frames[i], 0);
 936             if (ret < 0)
 937                 return ret;
 938         }
 939     }
 940
 941     cpb_props = ff_add_cpb_side_data(avctx);
 942     if (!cpb_props)
 943         return AVERROR(ENOMEM);
 944     cpb_props->max_bitrate = avctx->rc_max_rate;
 945     cpb_props->min_bitrate = avctx->rc_min_rate;
 946     cpb_props->avg_bitrate = avctx->bit_rate;
 947     cpb_props->buffer_size = avctx->rc_buffer_size;
 948
 949     return 0;
 950 }
 951
 952 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
 953 {
 954     MpegEncContext *s = avctx->priv_data;
 955     int i;
 956
 957     ff_rate_control_uninit(s);
 958
 959     ff_mpv_common_end(s);
 960     if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
 961         s->out_format == FMT_MJPEG)
 962         ff_mjpeg_encode_close(s);
 963
 964     av_freep(&avctx->extradata);
 965
 966     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 967         av_frame_free(&s->tmp_frames[i]);
 968
 969     ff_free_picture_tables(&s->new_picture);
 970     ff_mpeg_unref_picture(avctx, &s->new_picture);
 971
 972     av_freep(&avctx->stats_out);
 973     av_freep(&s->ac_stats);
 974
 975     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 976     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
 977     s->q_chroma_intra_matrix=   NULL;
 978     s->q_chroma_intra_matrix16= NULL;
 979     av_freep(&s->q_intra_matrix);
 980     av_freep(&s->q_inter_matrix);
 981     av_freep(&s->q_intra_matrix16);
 982     av_freep(&s->q_inter_matrix16);
 983     av_freep(&s->input_picture);
 984     av_freep(&s->reordered_input_picture);
 985     av_freep(&s->dct_offset);
 986
 987     return 0;
 988 }
 989
 990 static int get_sae(uint8_t *src, int ref, int stride)
 991 {
 992     int x,y;
 993     int acc = 0;
 994
 995     for (y = 0; y < 16; y++) {
 996         for (x = 0; x < 16; x++) {
 997             acc += FFABS(src[x + y * stride] - ref);
 998         }
 999     }
1000
1001     return acc;
1002 }
1003
1004 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1005                            uint8_t *ref, int stride)
1006 {
1007     int x, y, w, h;
1008     int acc = 0;
1009
1010     w = s->width  & ~15;
1011     h = s->height & ~15;
1012
1013     for (y = 0; y < h; y += 16) {
1014         for (x = 0; x < w; x += 16) {
1015             int offset = x + y * stride;
1016             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1017                                       stride, 16);
1018             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1019             int sae  = get_sae(src + offset, mean, stride);
1020
1021             acc += sae + 500 < sad;
1022         }
1023     }
1024     return acc;
1025 }
1026
1027 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1028 {
1029     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1030                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1031                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1032                             &s->linesize, &s->uvlinesize);
1033 }
1034
1035 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1036 {
1037     Picture *pic = NULL;
1038     int64_t pts;
1039     int i, display_picture_number = 0, ret;
1040     int encoding_delay = s->max_b_frames ? s->max_b_frames
1041                                          : (s->low_delay ? 0 : 1);
1042     int flush_offset = 1;
1043     int direct = 1;
1044
1045     if (pic_arg) {
1046         pts = pic_arg->pts;
1047         display_picture_number = s->input_picture_number++;
1048
1049         if (pts != AV_NOPTS_VALUE) {
1050             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1051                 int64_t last = s->user_specified_pts;
1052
1053                 if (pts <= last) {
1054                     av_log(s->avctx, AV_LOG_ERROR,
1055                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1056                            pts, last);
1057                     return AVERROR(EINVAL);
1058                 }
1059
1060                 if (!s->low_delay && display_picture_number == 1)
1061                     s->dts_delta = pts - last;
1062             }
1063             s->user_specified_pts = pts;
1064         } else {
1065             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1066                 s->user_specified_pts =
1067                 pts = s->user_specified_pts + 1;
1068                 av_log(s->avctx, AV_LOG_INFO,
1069                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1070                        pts);
1071             } else {
1072                 pts = display_picture_number;
1073             }
1074         }
1075
1076         if (!pic_arg->buf[0] ||
1077             pic_arg->linesize[0] != s->linesize ||
1078             pic_arg->linesize[1] != s->uvlinesize ||
1079             pic_arg->linesize[2] != s->uvlinesize)
1080             direct = 0;
1081         if ((s->width & 15) || (s->height & 15))
1082             direct = 0;
1083         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1084             direct = 0;
1085         if (s->linesize & (STRIDE_ALIGN-1))
1086             direct = 0;
1087
1088         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1089                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1090
1091         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1092         if (i < 0)
1093             return i;
1094
1095         pic = &s->picture[i];
1096         pic->reference = 3;
1097
1098         if (direct) {
1099             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1100                 return ret;
1101         }
1102         ret = alloc_picture(s, pic, direct);
1103         if (ret < 0)
1104             return ret;
1105
1106         if (!direct) {
1107             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1108                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1109                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1110                 // empty
1111             } else {
1112                 int h_chroma_shift, v_chroma_shift;
1113                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1114                                                  &h_chroma_shift,
1115                                                  &v_chroma_shift);
1116
1117                 for (i = 0; i < 3; i++) {
1118                     int src_stride = pic_arg->linesize[i];
1119                     int dst_stride = i ? s->uvlinesize : s->linesize;
1120                     int h_shift = i ? h_chroma_shift : 0;
1121                     int v_shift = i ? v_chroma_shift : 0;
1122                     int w = s->width  >> h_shift;
1123                     int h = s->height >> v_shift;
1124                     uint8_t *src = pic_arg->data[i];
1125                     uint8_t *dst = pic->f->data[i];
1126                     int vpad = 16;
1127
1128                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1129                         && !s->progressive_sequence
1130                         && FFALIGN(s->height, 32) - s->height > 16)
1131                         vpad = 32;
1132
1133                     if (!s->avctx->rc_buffer_size)
1134                         dst += INPLACE_OFFSET;
1135
1136                     if (src_stride == dst_stride)
1137                         memcpy(dst, src, src_stride * h);
1138                     else {
1139                         int h2 = h;
1140                         uint8_t *dst2 = dst;
1141                         while (h2--) {
1142                             memcpy(dst2, src, w);
1143                             dst2 += dst_stride;
1144                             src += src_stride;
1145                         }
1146                     }
1147                     if ((s->width & 15) || (s->height & (vpad-1))) {
1148                         s->mpvencdsp.draw_edges(dst, dst_stride,
1149                                                 w, h,
1150                                                 16 >> h_shift,
1151                                                 vpad >> v_shift,
1152                                                 EDGE_BOTTOM);
1153                     }
1154                 }
1155                 emms_c();
1156             }
1157         }
1158         ret = av_frame_copy_props(pic->f, pic_arg);
1159         if (ret < 0)
1160             return ret;
1161
1162         pic->f->display_picture_number = display_picture_number;
1163         pic->f->pts = pts; // we set this here to avoid modifying pic_arg
1164     } else {
1165         /* Flushing: When we have not received enough input frames,
1166          * ensure s->input_picture[0] contains the first picture */
1167         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1168             if (s->input_picture[flush_offset])
1169                 break;
1170
1171         if (flush_offset <= 1)
1172             flush_offset = 1;
1173         else
1174             encoding_delay = encoding_delay - flush_offset + 1;
1175     }
1176
1177     /* shift buffer entries */
1178     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1179         s->input_picture[i - flush_offset] = s->input_picture[i];
1180
1181     s->input_picture[encoding_delay] = (Picture*) pic;
1182
1183     return 0;
1184 }
1185
1186 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1187 {
1188     int x, y, plane;
1189     int score = 0;
1190     int64_t score64 = 0;
1191
1192     for (plane = 0; plane < 3; plane++) {
1193         const int stride = p->f->linesize[plane];
1194         const int bw = plane ? 1 : 2;
1195         for (y = 0; y < s->mb_height * bw; y++) {
1196             for (x = 0; x < s->mb_width * bw; x++) {
1197                 int off = p->shared ? 0 : 16;
1198                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1199                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1200                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1201
1202                 switch (FFABS(s->frame_skip_exp)) {
1203                 case 0: score    =  FFMAX(score, v);          break;
1204                 case 1: score   += FFABS(v);                  break;
1205                 case 2: score64 += v * (int64_t)v;                       break;
1206                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1207                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1208                 }
1209             }
1210         }
1211     }
1212     emms_c();
1213
1214     if (score)
1215         score64 = score;
1216     if (s->frame_skip_exp < 0)
1217         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1218                       -1.0/s->frame_skip_exp);
1219
1220     if (score64 < s->frame_skip_threshold)
1221         return 1;
1222     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1223         return 1;
1224     return 0;
1225 }
1226
1227 static int encode_frame(AVCodecContext *c, AVFrame *frame, AVPacket *pkt)
1228 {
1229     int ret;
1230     int size = 0;
1231
1232     ret = avcodec_send_frame(c, frame);
1233     if (ret < 0)
1234         return ret;
1235
1236     do {
1237         ret = avcodec_receive_packet(c, pkt);
1238         if (ret >= 0) {
1239             size += pkt->size;
1240             av_packet_unref(pkt);
1241         } else if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
1242             return ret;
1243     } while (ret >= 0);
1244
1245     return size;
1246 }
1247
1248 static int estimate_best_b_count(MpegEncContext *s)
1249 {
1250     const AVCodec *codec = avcodec_find_encoder(s->avctx->codec_id);
1251     AVPacket *pkt;
1252     const int scale = s->brd_scale;
1253     int width  = s->width  >> scale;
1254     int height = s->height >> scale;
1255     int i, j, out_size, p_lambda, b_lambda, lambda2;
1256     int64_t best_rd  = INT64_MAX;
1257     int best_b_count = -1;
1258     int ret = 0;
1259
1260     av_assert0(scale >= 0 && scale <= 3);
1261
1262     pkt = av_packet_alloc();
1263     if (!pkt)
1264         return AVERROR(ENOMEM);
1265
1266     //emms_c();
1267     //s->next_picture_ptr->quality;
1268     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1269     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1270     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1271     if (!b_lambda) // FIXME we should do this somewhere else
1272         b_lambda = p_lambda;
1273     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1274                FF_LAMBDA_SHIFT;
1275
1276     for (i = 0; i < s->max_b_frames + 2; i++) {
1277         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1278                                                 s->next_picture_ptr;
1279         uint8_t *data[4];
1280
1281         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1282             pre_input = *pre_input_ptr;
1283             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1284
1285             if (!pre_input.shared && i) {
1286                 data[0] += INPLACE_OFFSET;
1287                 data[1] += INPLACE_OFFSET;
1288                 data[2] += INPLACE_OFFSET;
1289             }
1290
1291             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1292                                        s->tmp_frames[i]->linesize[0],
1293                                        data[0],
1294                                        pre_input.f->linesize[0],
1295                                        width, height);
1296             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1297                                        s->tmp_frames[i]->linesize[1],
1298                                        data[1],
1299                                        pre_input.f->linesize[1],
1300                                        width >> 1, height >> 1);
1301             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1302                                        s->tmp_frames[i]->linesize[2],
1303                                        data[2],
1304                                        pre_input.f->linesize[2],
1305                                        width >> 1, height >> 1);
1306         }
1307     }
1308
1309     for (j = 0; j < s->max_b_frames + 1; j++) {
1310         AVCodecContext *c;
1311         int64_t rd = 0;
1312
1313         if (!s->input_picture[j])
1314             break;
1315
1316         c = avcodec_alloc_context3(NULL);
1317         if (!c) {
1318             ret = AVERROR(ENOMEM);
1319             goto fail;
1320         }
1321
1322         c->width        = width;
1323         c->height       = height;
1324         c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1325         c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1326         c->mb_decision  = s->avctx->mb_decision;
1327         c->me_cmp       = s->avctx->me_cmp;
1328         c->mb_cmp       = s->avctx->mb_cmp;
1329         c->me_sub_cmp   = s->avctx->me_sub_cmp;
1330         c->pix_fmt      = AV_PIX_FMT_YUV420P;
1331         c->time_base    = s->avctx->time_base;
1332         c->max_b_frames = s->max_b_frames;
1333
1334         ret = avcodec_open2(c, codec, NULL);
1335         if (ret < 0)
1336             goto fail;
1337
1338
1339         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1340         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1341
1342         out_size = encode_frame(c, s->tmp_frames[0], pkt);
1343         if (out_size < 0) {
1344             ret = out_size;
1345             goto fail;
1346         }
1347
1348         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1349
1350         for (i = 0; i < s->max_b_frames + 1; i++) {
1351             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1352
1353             s->tmp_frames[i + 1]->pict_type = is_p ?
1354                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1355             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1356
1357             out_size = encode_frame(c, s->tmp_frames[i + 1], pkt);
1358             if (out_size < 0) {
1359                 ret = out_size;
1360                 goto fail;
1361             }
1362
1363             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1364         }
1365
1366         /* get the delayed frames */
1367         out_size = encode_frame(c, NULL, pkt);
1368         if (out_size < 0) {
1369             ret = out_size;
1370             goto fail;
1371         }
1372         rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1373
1374         rd += c->error[0] + c->error[1] + c->error[2];
1375
1376         if (rd < best_rd) {
1377             best_rd = rd;
1378             best_b_count = j;
1379         }
1380
1381 fail:
1382         avcodec_free_context(&c);
1383         av_packet_unref(pkt);
1384         if (ret < 0) {
1385             best_b_count = ret;
1386             break;
1387         }
1388     }
1389
1390     av_packet_free(&pkt);
1391
1392     return best_b_count;
1393 }
1394
1395 static int select_input_picture(MpegEncContext *s)
1396 {
1397     int i, ret;
1398
1399     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1400         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1401     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1402
1403     /* set next picture type & ordering */
1404     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1405         if (s->frame_skip_threshold || s->frame_skip_factor) {
1406             if (s->picture_in_gop_number < s->gop_size &&
1407                 s->next_picture_ptr &&
1408                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1409                 // FIXME check that the gop check above is +-1 correct
1410                 av_frame_unref(s->input_picture[0]->f);
1411
1412                 ff_vbv_update(s, 0);
1413
1414                 goto no_output_pic;
1415             }
1416         }
1417
1418         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1419             !s->next_picture_ptr || s->intra_only) {
1420             s->reordered_input_picture[0] = s->input_picture[0];
1421             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1422             s->reordered_input_picture[0]->f->coded_picture_number =
1423                 s->coded_picture_number++;
1424         } else {
1425             int b_frames = 0;
1426
1427             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1428                 for (i = 0; i < s->max_b_frames + 1; i++) {
1429                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1430
1431                     if (pict_num >= s->rc_context.num_entries)
1432                         break;
1433                     if (!s->input_picture[i]) {
1434                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1435                         break;
1436                     }
1437
1438                     s->input_picture[i]->f->pict_type =
1439                         s->rc_context.entry[pict_num].new_pict_type;
1440                 }
1441             }
1442
1443             if (s->b_frame_strategy == 0) {
1444                 b_frames = s->max_b_frames;
1445                 while (b_frames && !s->input_picture[b_frames])
1446                     b_frames--;
1447             } else if (s->b_frame_strategy == 1) {
1448                 for (i = 1; i < s->max_b_frames + 1; i++) {
1449                     if (s->input_picture[i] &&
1450                         s->input_picture[i]->b_frame_score == 0) {
1451                         s->input_picture[i]->b_frame_score =
1452                             get_intra_count(s,
1453                                             s->input_picture[i    ]->f->data[0],
1454                                             s->input_picture[i - 1]->f->data[0],
1455                                             s->linesize) + 1;
1456                     }
1457                 }
1458                 for (i = 0; i < s->max_b_frames + 1; i++) {
1459                     if (!s->input_picture[i] ||
1460                         s->input_picture[i]->b_frame_score - 1 >
1461                             s->mb_num / s->b_sensitivity)
1462                         break;
1463                 }
1464
1465                 b_frames = FFMAX(0, i - 1);
1466
1467                 /* reset scores */
1468                 for (i = 0; i < b_frames + 1; i++) {
1469                     s->input_picture[i]->b_frame_score = 0;
1470                 }
1471             } else if (s->b_frame_strategy == 2) {
1472                 b_frames = estimate_best_b_count(s);
1473                 if (b_frames < 0)
1474                     return b_frames;
1475             }
1476
1477             emms_c();
1478
1479             for (i = b_frames - 1; i >= 0; i--) {
1480                 int type = s->input_picture[i]->f->pict_type;
1481                 if (type && type != AV_PICTURE_TYPE_B)
1482                     b_frames = i;
1483             }
1484             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1485                 b_frames == s->max_b_frames) {
1486                 av_log(s->avctx, AV_LOG_ERROR,
1487                        "warning, too many B-frames in a row\n");
1488             }
1489
1490             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1491                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1492                     s->gop_size > s->picture_in_gop_number) {
1493                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1494                 } else {
1495                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1496                         b_frames = 0;
1497                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1498                 }
1499             }
1500
1501             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1502                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1503                 b_frames--;
1504
1505             s->reordered_input_picture[0] = s->input_picture[b_frames];
1506             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1507                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1508             s->reordered_input_picture[0]->f->coded_picture_number =
1509                 s->coded_picture_number++;
1510             for (i = 0; i < b_frames; i++) {
1511                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1512                 s->reordered_input_picture[i + 1]->f->pict_type =
1513                     AV_PICTURE_TYPE_B;
1514                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1515                     s->coded_picture_number++;
1516             }
1517         }
1518     }
1519 no_output_pic:
1520     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1521
1522     if (s->reordered_input_picture[0]) {
1523         s->reordered_input_picture[0]->reference =
1524            s->reordered_input_picture[0]->f->pict_type !=
1525                AV_PICTURE_TYPE_B ? 3 : 0;
1526
1527         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1528             return ret;
1529
1530         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1531             // input is a shared pix, so we can't modify it -> allocate a new
1532             // one & ensure that the shared one is reuseable
1533
1534             Picture *pic;
1535             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1536             if (i < 0)
1537                 return i;
1538             pic = &s->picture[i];
1539
1540             pic->reference = s->reordered_input_picture[0]->reference;
1541             if (alloc_picture(s, pic, 0) < 0) {
1542                 return -1;
1543             }
1544
1545             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1546             if (ret < 0)
1547                 return ret;
1548
1549             /* mark us unused / free shared pic */
1550             av_frame_unref(s->reordered_input_picture[0]->f);
1551             s->reordered_input_picture[0]->shared = 0;
1552
1553             s->current_picture_ptr = pic;
1554         } else {
1555             // input is not a shared pix -> reuse buffer for current_pix
1556             s->current_picture_ptr = s->reordered_input_picture[0];
1557             for (i = 0; i < 4; i++) {
1558                 if (s->new_picture.f->data[i])
1559                     s->new_picture.f->data[i] += INPLACE_OFFSET;
1560             }
1561         }
1562         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1563         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1564                                        s->current_picture_ptr)) < 0)
1565             return ret;
1566
1567         s->picture_number = s->new_picture.f->display_picture_number;
1568     }
1569     return 0;
1570 }
1571
1572 static void frame_end(MpegEncContext *s)
1573 {
1574     if (s->unrestricted_mv &&
1575         s->current_picture.reference &&
1576         !s->intra_only) {
1577         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1578         int hshift = desc->log2_chroma_w;
1579         int vshift = desc->log2_chroma_h;
1580         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1581                                 s->current_picture.f->linesize[0],
1582                                 s->h_edge_pos, s->v_edge_pos,
1583                                 EDGE_WIDTH, EDGE_WIDTH,
1584                                 EDGE_TOP | EDGE_BOTTOM);
1585         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1586                                 s->current_picture.f->linesize[1],
1587                                 s->h_edge_pos >> hshift,
1588                                 s->v_edge_pos >> vshift,
1589                                 EDGE_WIDTH >> hshift,
1590                                 EDGE_WIDTH >> vshift,
1591                                 EDGE_TOP | EDGE_BOTTOM);
1592         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1593                                 s->current_picture.f->linesize[2],
1594                                 s->h_edge_pos >> hshift,
1595                                 s->v_edge_pos >> vshift,
1596                                 EDGE_WIDTH >> hshift,
1597                                 EDGE_WIDTH >> vshift,
1598                                 EDGE_TOP | EDGE_BOTTOM);
1599     }
1600
1601     emms_c();
1602
1603     s->last_pict_type                 = s->pict_type;
1604     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1605     if (s->pict_type!= AV_PICTURE_TYPE_B)
1606         s->last_non_b_pict_type = s->pict_type;
1607 }
1608
1609 static void update_noise_reduction(MpegEncContext *s)
1610 {
1611     int intra, i;
1612
1613     for (intra = 0; intra < 2; intra++) {
1614         if (s->dct_count[intra] > (1 << 16)) {
1615             for (i = 0; i < 64; i++) {
1616                 s->dct_error_sum[intra][i] >>= 1;
1617             }
1618             s->dct_count[intra] >>= 1;
1619         }
1620
1621         for (i = 0; i < 64; i++) {
1622             s->dct_offset[intra][i] = (s->noise_reduction *
1623                                        s->dct_count[intra] +
1624                                        s->dct_error_sum[intra][i] / 2) /
1625                                       (s->dct_error_sum[intra][i] + 1);
1626         }
1627     }
1628 }
1629
1630 static int frame_start(MpegEncContext *s)
1631 {
1632     int ret;
1633
1634     /* mark & release old frames */
1635     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1636         s->last_picture_ptr != s->next_picture_ptr &&
1637         s->last_picture_ptr->f->buf[0]) {
1638         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1639     }
1640
1641     s->current_picture_ptr->f->pict_type = s->pict_type;
1642     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1643
1644     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1645     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1646                                    s->current_picture_ptr)) < 0)
1647         return ret;
1648
1649     if (s->pict_type != AV_PICTURE_TYPE_B) {
1650         s->last_picture_ptr = s->next_picture_ptr;
1651         if (!s->droppable)
1652             s->next_picture_ptr = s->current_picture_ptr;
1653     }
1654
1655     if (s->last_picture_ptr) {
1656         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1657         if (s->last_picture_ptr->f->buf[0] &&
1658             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1659                                        s->last_picture_ptr)) < 0)
1660             return ret;
1661     }
1662     if (s->next_picture_ptr) {
1663         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1664         if (s->next_picture_ptr->f->buf[0] &&
1665             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1666                                        s->next_picture_ptr)) < 0)
1667             return ret;
1668     }
1669
1670     if (s->picture_structure!= PICT_FRAME) {
1671         int i;
1672         for (i = 0; i < 4; i++) {
1673             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1674                 s->current_picture.f->data[i] +=
1675                     s->current_picture.f->linesize[i];
1676             }
1677             s->current_picture.f->linesize[i] *= 2;
1678             s->last_picture.f->linesize[i]    *= 2;
1679             s->next_picture.f->linesize[i]    *= 2;
1680         }
1681     }
1682
1683     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1684         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1685         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1686     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1687         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1688         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1689     } else {
1690         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1691         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1692     }
1693
1694     if (s->dct_error_sum) {
1695         av_assert2(s->noise_reduction && s->encoding);
1696         update_noise_reduction(s);
1697     }
1698
1699     return 0;
1700 }
1701
1702 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1703                           const AVFrame *pic_arg, int *got_packet)
1704 {
1705     MpegEncContext *s = avctx->priv_data;
1706     int i, stuffing_count, ret;
1707     int context_count = s->slice_context_count;
1708
1709     s->vbv_ignore_qmax = 0;
1710
1711     s->picture_in_gop_number++;
1712
1713     if (load_input_picture(s, pic_arg) < 0)
1714         return -1;
1715
1716     if (select_input_picture(s) < 0) {
1717         return -1;
1718     }
1719
1720     /* output? */
1721     if (s->new_picture.f->data[0]) {
1722         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1723         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1724                                               :
1725                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1726         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1727             return ret;
1728         if (s->mb_info) {
1729             s->mb_info_ptr = av_packet_new_side_data(pkt,
1730                                  AV_PKT_DATA_H263_MB_INFO,
1731                                  s->mb_width*s->mb_height*12);
1732             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1733         }
1734
1735         for (i = 0; i < context_count; i++) {
1736             int start_y = s->thread_context[i]->start_mb_y;
1737             int   end_y = s->thread_context[i]->  end_mb_y;
1738             int h       = s->mb_height;
1739             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1740             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1741
1742             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1743         }
1744
1745         s->pict_type = s->new_picture.f->pict_type;
1746         //emms_c();
1747         ret = frame_start(s);
1748         if (ret < 0)
1749             return ret;
1750 vbv_retry:
1751         ret = encode_picture(s, s->picture_number);
1752         if (growing_buffer) {
1753             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1754             pkt->data = s->pb.buf;
1755             pkt->size = avctx->internal->byte_buffer_size;
1756         }
1757         if (ret < 0)
1758             return -1;
1759
1760         frame_end(s);
1761
1762        if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) && s->out_format == FMT_MJPEG)
1763             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1764
1765         if (avctx->rc_buffer_size) {
1766             RateControlContext *rcc = &s->rc_context;
1767             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1768             int hq = (avctx->mb_decision == FF_MB_DECISION_RD || avctx->trellis);
1769             int min_step = hq ? 1 : (1<<(FF_LAMBDA_SHIFT + 7))/139;
1770
1771             if (put_bits_count(&s->pb) > max_size &&
1772                 s->lambda < s->lmax) {
1773                 s->next_lambda = FFMAX(s->lambda + min_step, s->lambda *
1774                                        (s->qscale + 1) / s->qscale);
1775                 if (s->adaptive_quant) {
1776                     int i;
1777                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1778                         s->lambda_table[i] =
1779                             FFMAX(s->lambda_table[i] + min_step,
1780                                   s->lambda_table[i] * (s->qscale + 1) /
1781                                   s->qscale);
1782                 }
1783                 s->mb_skipped = 0;        // done in frame_start()
1784                 // done in encode_picture() so we must undo it
1785                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1786                     if (s->flipflop_rounding          ||
1787                         s->codec_id == AV_CODEC_ID_H263P ||
1788                         s->codec_id == AV_CODEC_ID_MPEG4)
1789                         s->no_rounding ^= 1;
1790                 }
1791                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1792                     s->time_base       = s->last_time_base;
1793                     s->last_non_b_time = s->time - s->pp_time;
1794                 }
1795                 for (i = 0; i < context_count; i++) {
1796                     PutBitContext *pb = &s->thread_context[i]->pb;
1797                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1798                 }
1799                 s->vbv_ignore_qmax = 1;
1800                 av_log(avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1801                 goto vbv_retry;
1802             }
1803
1804             av_assert0(avctx->rc_max_rate);
1805         }
1806
1807         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1808             ff_write_pass1_stats(s);
1809
1810         for (i = 0; i < 4; i++) {
1811             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1812             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1813         }
1814         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1815                                        s->current_picture_ptr->encoding_error,
1816                                        (avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
1817                                        s->pict_type);
1818
1819         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1820             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1821                                              s->misc_bits + s->i_tex_bits +
1822                                              s->p_tex_bits);
1823         flush_put_bits(&s->pb);
1824         s->frame_bits  = put_bits_count(&s->pb);
1825
1826         stuffing_count = ff_vbv_update(s, s->frame_bits);
1827         s->stuffing_bits = 8*stuffing_count;
1828         if (stuffing_count) {
1829             if (put_bytes_left(&s->pb, 0) < stuffing_count + 50) {
1830                 av_log(avctx, AV_LOG_ERROR, "stuffing too large\n");
1831                 return -1;
1832             }
1833
1834             switch (s->codec_id) {
1835             case AV_CODEC_ID_MPEG1VIDEO:
1836             case AV_CODEC_ID_MPEG2VIDEO:
1837                 while (stuffing_count--) {
1838                     put_bits(&s->pb, 8, 0);
1839                 }
1840             break;
1841             case AV_CODEC_ID_MPEG4:
1842                 put_bits(&s->pb, 16, 0);
1843                 put_bits(&s->pb, 16, 0x1C3);
1844                 stuffing_count -= 4;
1845                 while (stuffing_count--) {
1846                     put_bits(&s->pb, 8, 0xFF);
1847                 }
1848             break;
1849             default:
1850                 av_log(avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1851             }
1852             flush_put_bits(&s->pb);
1853             s->frame_bits  = put_bits_count(&s->pb);
1854         }
1855
1856         /* update MPEG-1/2 vbv_delay for CBR */
1857         if (avctx->rc_max_rate                          &&
1858             avctx->rc_min_rate == avctx->rc_max_rate &&
1859             s->out_format == FMT_MPEG1                     &&
1860             90000LL * (avctx->rc_buffer_size - 1) <=
1861                 avctx->rc_max_rate * 0xFFFFLL) {
1862             AVCPBProperties *props;
1863             size_t props_size;
1864
1865             int vbv_delay, min_delay;
1866             double inbits  = avctx->rc_max_rate *
1867                              av_q2d(avctx->time_base);
1868             int    minbits = s->frame_bits - 8 *
1869                              (s->vbv_delay_ptr - s->pb.buf - 1);
1870             double bits    = s->rc_context.buffer_index + minbits - inbits;
1871
1872             if (bits < 0)
1873                 av_log(avctx, AV_LOG_ERROR,
1874                        "Internal error, negative bits\n");
1875
1876             av_assert1(s->repeat_first_field == 0);
1877
1878             vbv_delay = bits * 90000 / avctx->rc_max_rate;
1879             min_delay = (minbits * 90000LL + avctx->rc_max_rate - 1) /
1880                         avctx->rc_max_rate;
1881
1882             vbv_delay = FFMAX(vbv_delay, min_delay);
1883
1884             av_assert0(vbv_delay < 0xFFFF);
1885
1886             s->vbv_delay_ptr[0] &= 0xF8;
1887             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1888             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1889             s->vbv_delay_ptr[2] &= 0x07;
1890             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1891
1892             props = av_cpb_properties_alloc(&props_size);
1893             if (!props)
1894                 return AVERROR(ENOMEM);
1895             props->vbv_delay = vbv_delay * 300;
1896
1897             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1898                                           (uint8_t*)props, props_size);
1899             if (ret < 0) {
1900                 av_freep(&props);
1901                 return ret;
1902             }
1903         }
1904         s->total_bits     += s->frame_bits;
1905
1906         pkt->pts = s->current_picture.f->pts;
1907         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1908             if (!s->current_picture.f->coded_picture_number)
1909                 pkt->dts = pkt->pts - s->dts_delta;
1910             else
1911                 pkt->dts = s->reordered_pts;
1912             s->reordered_pts = pkt->pts;
1913         } else
1914             pkt->dts = pkt->pts;
1915         if (s->current_picture.f->key_frame)
1916             pkt->flags |= AV_PKT_FLAG_KEY;
1917         if (s->mb_info)
1918             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1919     } else {
1920         s->frame_bits = 0;
1921     }
1922
1923     /* release non-reference frames */
1924     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1925         if (!s->picture[i].reference)
1926             ff_mpeg_unref_picture(avctx, &s->picture[i]);
1927     }
1928
1929     av_assert1((s->frame_bits & 7) == 0);
1930
1931     pkt->size = s->frame_bits / 8;
1932     *got_packet = !!pkt->size;
1933     return 0;
1934 }
1935
1936 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1937                                                 int n, int threshold)
1938 {
1939     static const char tab[64] = {
1940         3, 2, 2, 1, 1, 1, 1, 1,
1941         1, 1, 1, 1, 1, 1, 1, 1,
1942         1, 1, 1, 1, 1, 1, 1, 1,
1943         0, 0, 0, 0, 0, 0, 0, 0,
1944         0, 0, 0, 0, 0, 0, 0, 0,
1945         0, 0, 0, 0, 0, 0, 0, 0,
1946         0, 0, 0, 0, 0, 0, 0, 0,
1947         0, 0, 0, 0, 0, 0, 0, 0
1948     };
1949     int score = 0;
1950     int run = 0;
1951     int i;
1952     int16_t *block = s->block[n];
1953     const int last_index = s->block_last_index[n];
1954     int skip_dc;
1955
1956     if (threshold < 0) {
1957         skip_dc = 0;
1958         threshold = -threshold;
1959     } else
1960         skip_dc = 1;
1961
1962     /* Are all we could set to zero already zero? */
1963     if (last_index <= skip_dc - 1)
1964         return;
1965
1966     for (i = 0; i <= last_index; i++) {
1967         const int j = s->intra_scantable.permutated[i];
1968         const int level = FFABS(block[j]);
1969         if (level == 1) {
1970             if (skip_dc && i == 0)
1971                 continue;
1972             score += tab[run];
1973             run = 0;
1974         } else if (level > 1) {
1975             return;
1976         } else {
1977             run++;
1978         }
1979     }
1980     if (score >= threshold)
1981         return;
1982     for (i = skip_dc; i <= last_index; i++) {
1983         const int j = s->intra_scantable.permutated[i];
1984         block[j] = 0;
1985     }
1986     if (block[0])
1987         s->block_last_index[n] = 0;
1988     else
1989         s->block_last_index[n] = -1;
1990 }
1991
1992 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1993                                int last_index)
1994 {
1995     int i;
1996     const int maxlevel = s->max_qcoeff;
1997     const int minlevel = s->min_qcoeff;
1998     int overflow = 0;
1999
2000     if (s->mb_intra) {
2001         i = 1; // skip clipping of intra dc
2002     } else
2003         i = 0;
2004
2005     for (; i <= last_index; i++) {
2006         const int j = s->intra_scantable.permutated[i];
2007         int level = block[j];
2008
2009         if (level > maxlevel) {
2010             level = maxlevel;
2011             overflow++;
2012         } else if (level < minlevel) {
2013             level = minlevel;
2014             overflow++;
2015         }
2016
2017         block[j] = level;
2018     }
2019
2020     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2021         av_log(s->avctx, AV_LOG_INFO,
2022                "warning, clipping %d dct coefficients to %d..%d\n",
2023                overflow, minlevel, maxlevel);
2024 }
2025
2026 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2027 {
2028     int x, y;
2029     // FIXME optimize
2030     for (y = 0; y < 8; y++) {
2031         for (x = 0; x < 8; x++) {
2032             int x2, y2;
2033             int sum = 0;
2034             int sqr = 0;
2035             int count = 0;
2036
2037             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2038                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2039                     int v = ptr[x2 + y2 * stride];
2040                     sum += v;
2041                     sqr += v * v;
2042                     count++;
2043                 }
2044             }
2045             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2046         }
2047     }
2048 }
2049
2050 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2051                                                 int motion_x, int motion_y,
2052                                                 int mb_block_height,
2053                                                 int mb_block_width,
2054                                                 int mb_block_count)
2055 {
2056     int16_t weight[12][64];
2057     int16_t orig[12][64];
2058     const int mb_x = s->mb_x;
2059     const int mb_y = s->mb_y;
2060     int i;
2061     int skip_dct[12];
2062     int dct_offset = s->linesize * 8; // default for progressive frames
2063     int uv_dct_offset = s->uvlinesize * 8;
2064     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2065     ptrdiff_t wrap_y, wrap_c;
2066
2067     for (i = 0; i < mb_block_count; i++)
2068         skip_dct[i] = s->skipdct;
2069
2070     if (s->adaptive_quant) {
2071         const int last_qp = s->qscale;
2072         const int mb_xy = mb_x + mb_y * s->mb_stride;
2073
2074         s->lambda = s->lambda_table[mb_xy];
2075         update_qscale(s);
2076
2077         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2078             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2079             s->dquant = s->qscale - last_qp;
2080
2081             if (s->out_format == FMT_H263) {
2082                 s->dquant = av_clip(s->dquant, -2, 2);
2083
2084                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2085                     if (!s->mb_intra) {
2086                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2087                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2088                                 s->dquant = 0;
2089                         }
2090                         if (s->mv_type == MV_TYPE_8X8)
2091                             s->dquant = 0;
2092                     }
2093                 }
2094             }
2095         }
2096         ff_set_qscale(s, last_qp + s->dquant);
2097     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2098         ff_set_qscale(s, s->qscale + s->dquant);
2099
2100     wrap_y = s->linesize;
2101     wrap_c = s->uvlinesize;
2102     ptr_y  = s->new_picture.f->data[0] +
2103              (mb_y * 16 * wrap_y)              + mb_x * 16;
2104     ptr_cb = s->new_picture.f->data[1] +
2105              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2106     ptr_cr = s->new_picture.f->data[2] +
2107              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2108
2109     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2110         uint8_t *ebuf = s->sc.edge_emu_buffer + 38 * wrap_y;
2111         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2112         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2113         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2114                                  wrap_y, wrap_y,
2115                                  16, 16, mb_x * 16, mb_y * 16,
2116                                  s->width, s->height);
2117         ptr_y = ebuf;
2118         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2119                                  wrap_c, wrap_c,
2120                                  mb_block_width, mb_block_height,
2121                                  mb_x * mb_block_width, mb_y * mb_block_height,
2122                                  cw, ch);
2123         ptr_cb = ebuf + 16 * wrap_y;
2124         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2125                                  wrap_c, wrap_c,
2126                                  mb_block_width, mb_block_height,
2127                                  mb_x * mb_block_width, mb_y * mb_block_height,
2128                                  cw, ch);
2129         ptr_cr = ebuf + 16 * wrap_y + 16;
2130     }
2131
2132     if (s->mb_intra) {
2133         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2134             int progressive_score, interlaced_score;
2135
2136             s->interlaced_dct = 0;
2137             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2138                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2139                                                      NULL, wrap_y, 8) - 400;
2140
2141             if (progressive_score > 0) {
2142                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2143                                                         NULL, wrap_y * 2, 8) +
2144                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2145                                                         NULL, wrap_y * 2, 8);
2146                 if (progressive_score > interlaced_score) {
2147                     s->interlaced_dct = 1;
2148
2149                     dct_offset = wrap_y;
2150                     uv_dct_offset = wrap_c;
2151                     wrap_y <<= 1;
2152                     if (s->chroma_format == CHROMA_422 ||
2153                         s->chroma_format == CHROMA_444)
2154                         wrap_c <<= 1;
2155                 }
2156             }
2157         }
2158
2159         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2160         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2161         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2162         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2163
2164         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2165             skip_dct[4] = 1;
2166             skip_dct[5] = 1;
2167         } else {
2168             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2169             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2170             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2171                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2172                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2173             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2174                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2175                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2176                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2177                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2178                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2179                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2180             }
2181         }
2182     } else {
2183         op_pixels_func (*op_pix)[4];
2184         qpel_mc_func (*op_qpix)[16];
2185         uint8_t *dest_y, *dest_cb, *dest_cr;
2186
2187         dest_y  = s->dest[0];
2188         dest_cb = s->dest[1];
2189         dest_cr = s->dest[2];
2190
2191         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2192             op_pix  = s->hdsp.put_pixels_tab;
2193             op_qpix = s->qdsp.put_qpel_pixels_tab;
2194         } else {
2195             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2196             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2197         }
2198
2199         if (s->mv_dir & MV_DIR_FORWARD) {
2200             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2201                           s->last_picture.f->data,
2202                           op_pix, op_qpix);
2203             op_pix  = s->hdsp.avg_pixels_tab;
2204             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2205         }
2206         if (s->mv_dir & MV_DIR_BACKWARD) {
2207             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2208                           s->next_picture.f->data,
2209                           op_pix, op_qpix);
2210         }
2211
2212         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2213             int progressive_score, interlaced_score;
2214
2215             s->interlaced_dct = 0;
2216             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2217                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2218                                                      ptr_y + wrap_y * 8,
2219                                                      wrap_y, 8) - 400;
2220
2221             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2222                 progressive_score -= 400;
2223
2224             if (progressive_score > 0) {
2225                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2226                                                         wrap_y * 2, 8) +
2227                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2228                                                         ptr_y + wrap_y,
2229                                                         wrap_y * 2, 8);
2230
2231                 if (progressive_score > interlaced_score) {
2232                     s->interlaced_dct = 1;
2233
2234                     dct_offset = wrap_y;
2235                     uv_dct_offset = wrap_c;
2236                     wrap_y <<= 1;
2237                     if (s->chroma_format == CHROMA_422)
2238                         wrap_c <<= 1;
2239                 }
2240             }
2241         }
2242
2243         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2244         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2245         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2246                             dest_y + dct_offset, wrap_y);
2247         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2248                             dest_y + dct_offset + 8, wrap_y);
2249
2250         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2251             skip_dct[4] = 1;
2252             skip_dct[5] = 1;
2253         } else {
2254             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2255             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2256             if (!s->chroma_y_shift) { /* 422 */
2257                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2258                                     dest_cb + uv_dct_offset, wrap_c);
2259                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2260                                     dest_cr + uv_dct_offset, wrap_c);
2261             }
2262         }
2263         /* pre quantization */
2264         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2265                 2 * s->qscale * s->qscale) {
2266             // FIXME optimize
2267             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2268                 skip_dct[0] = 1;
2269             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2270                 skip_dct[1] = 1;
2271             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2272                                wrap_y, 8) < 20 * s->qscale)
2273                 skip_dct[2] = 1;
2274             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2275                                wrap_y, 8) < 20 * s->qscale)
2276                 skip_dct[3] = 1;
2277             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2278                 skip_dct[4] = 1;
2279             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2280                 skip_dct[5] = 1;
2281             if (!s->chroma_y_shift) { /* 422 */
2282                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2283                                    dest_cb + uv_dct_offset,
2284                                    wrap_c, 8) < 20 * s->qscale)
2285                     skip_dct[6] = 1;
2286                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2287                                    dest_cr + uv_dct_offset,
2288                                    wrap_c, 8) < 20 * s->qscale)
2289                     skip_dct[7] = 1;
2290             }
2291         }
2292     }
2293
2294     if (s->quantizer_noise_shaping) {
2295         if (!skip_dct[0])
2296             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2297         if (!skip_dct[1])
2298             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2299         if (!skip_dct[2])
2300             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2301         if (!skip_dct[3])
2302             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2303         if (!skip_dct[4])
2304             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2305         if (!skip_dct[5])
2306             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2307         if (!s->chroma_y_shift) { /* 422 */
2308             if (!skip_dct[6])
2309                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2310                                   wrap_c);
2311             if (!skip_dct[7])
2312                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2313                                   wrap_c);
2314         }
2315         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2316     }
2317
2318     /* DCT & quantize */
2319     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2320     {
2321         for (i = 0; i < mb_block_count; i++) {
2322             if (!skip_dct[i]) {
2323                 int overflow;
2324                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2325                 // FIXME we could decide to change to quantizer instead of
2326                 // clipping
2327                 // JS: I don't think that would be a good idea it could lower
2328                 //     quality instead of improve it. Just INTRADC clipping
2329                 //     deserves changes in quantizer
2330                 if (overflow)
2331                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2332             } else
2333                 s->block_last_index[i] = -1;
2334         }
2335         if (s->quantizer_noise_shaping) {
2336             for (i = 0; i < mb_block_count; i++) {
2337                 if (!skip_dct[i]) {
2338                     s->block_last_index[i] =
2339                         dct_quantize_refine(s, s->block[i], weight[i],
2340                                             orig[i], i, s->qscale);
2341                 }
2342             }
2343         }
2344
2345         if (s->luma_elim_threshold && !s->mb_intra)
2346             for (i = 0; i < 4; i++)
2347                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2348         if (s->chroma_elim_threshold && !s->mb_intra)
2349             for (i = 4; i < mb_block_count; i++)
2350                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2351
2352         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2353             for (i = 0; i < mb_block_count; i++) {
2354                 if (s->block_last_index[i] == -1)
2355                     s->coded_score[i] = INT_MAX / 256;
2356             }
2357         }
2358     }
2359
2360     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2361         s->block_last_index[4] =
2362         s->block_last_index[5] = 0;
2363         s->block[4][0] =
2364         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2365         if (!s->chroma_y_shift) { /* 422 / 444 */
2366             for (i=6; i<12; i++) {
2367                 s->block_last_index[i] = 0;
2368                 s->block[i][0] = s->block[4][0];
2369             }
2370         }
2371     }
2372
2373     // non c quantize code returns incorrect block_last_index FIXME
2374     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2375         for (i = 0; i < mb_block_count; i++) {
2376             int j;
2377             if (s->block_last_index[i] > 0) {
2378                 for (j = 63; j > 0; j--) {
2379                     if (s->block[i][s->intra_scantable.permutated[j]])
2380                         break;
2381                 }
2382                 s->block_last_index[i] = j;
2383             }
2384         }
2385     }
2386
2387     /* huffman encode */
2388     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2389     case AV_CODEC_ID_MPEG1VIDEO:
2390     case AV_CODEC_ID_MPEG2VIDEO:
2391         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2392             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2393         break;
2394     case AV_CODEC_ID_MPEG4:
2395         if (CONFIG_MPEG4_ENCODER)
2396             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2397         break;
2398     case AV_CODEC_ID_MSMPEG4V2:
2399     case AV_CODEC_ID_MSMPEG4V3:
2400     case AV_CODEC_ID_WMV1:
2401         if (CONFIG_MSMPEG4_ENCODER)
2402             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2403         break;
2404     case AV_CODEC_ID_WMV2:
2405         if (CONFIG_WMV2_ENCODER)
2406             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2407         break;
2408     case AV_CODEC_ID_H261:
2409         if (CONFIG_H261_ENCODER)
2410             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2411         break;
2412     case AV_CODEC_ID_H263:
2413     case AV_CODEC_ID_H263P:
2414     case AV_CODEC_ID_FLV1:
2415     case AV_CODEC_ID_RV10:
2416     case AV_CODEC_ID_RV20:
2417         if (CONFIG_H263_ENCODER)
2418             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2419         break;
2420 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
2421     case AV_CODEC_ID_MJPEG:
2422     case AV_CODEC_ID_AMV:
2423         ff_mjpeg_encode_mb(s, s->block);
2424         break;
2425 #endif
2426     case AV_CODEC_ID_SPEEDHQ:
2427         if (CONFIG_SPEEDHQ_ENCODER)
2428             ff_speedhq_encode_mb(s, s->block);
2429         break;
2430     default:
2431         av_assert1(0);
2432     }
2433 }
2434
2435 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2436 {
2437     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2438     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2439     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2440 }
2441
2442 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2443     int i;
2444
2445     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2446
2447     /* MPEG-1 */
2448     d->mb_skip_run= s->mb_skip_run;
2449     for(i=0; i<3; i++)
2450         d->last_dc[i] = s->last_dc[i];
2451
2452     /* statistics */
2453     d->mv_bits= s->mv_bits;
2454     d->i_tex_bits= s->i_tex_bits;
2455     d->p_tex_bits= s->p_tex_bits;
2456     d->i_count= s->i_count;
2457     d->f_count= s->f_count;
2458     d->b_count= s->b_count;
2459     d->skip_count= s->skip_count;
2460     d->misc_bits= s->misc_bits;
2461     d->last_bits= 0;
2462
2463     d->mb_skipped= 0;
2464     d->qscale= s->qscale;
2465     d->dquant= s->dquant;
2466
2467     d->esc3_level_length= s->esc3_level_length;
2468 }
2469
2470 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2471     int i;
2472
2473     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2474     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2475
2476     /* MPEG-1 */
2477     d->mb_skip_run= s->mb_skip_run;
2478     for(i=0; i<3; i++)
2479         d->last_dc[i] = s->last_dc[i];
2480
2481     /* statistics */
2482     d->mv_bits= s->mv_bits;
2483     d->i_tex_bits= s->i_tex_bits;
2484     d->p_tex_bits= s->p_tex_bits;
2485     d->i_count= s->i_count;
2486     d->f_count= s->f_count;
2487     d->b_count= s->b_count;
2488     d->skip_count= s->skip_count;
2489     d->misc_bits= s->misc_bits;
2490
2491     d->mb_intra= s->mb_intra;
2492     d->mb_skipped= s->mb_skipped;
2493     d->mv_type= s->mv_type;
2494     d->mv_dir= s->mv_dir;
2495     d->pb= s->pb;
2496     if(s->data_partitioning){
2497         d->pb2= s->pb2;
2498         d->tex_pb= s->tex_pb;
2499     }
2500     d->block= s->block;
2501     for(i=0; i<8; i++)
2502         d->block_last_index[i]= s->block_last_index[i];
2503     d->interlaced_dct= s->interlaced_dct;
2504     d->qscale= s->qscale;
2505
2506     d->esc3_level_length= s->esc3_level_length;
2507 }
2508
2509 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2510                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2511                            int *dmin, int *next_block, int motion_x, int motion_y)
2512 {
2513     int score;
2514     uint8_t *dest_backup[3];
2515
2516     copy_context_before_encode(s, backup, type);
2517
2518     s->block= s->blocks[*next_block];
2519     s->pb= pb[*next_block];
2520     if(s->data_partitioning){
2521         s->pb2   = pb2   [*next_block];
2522         s->tex_pb= tex_pb[*next_block];
2523     }
2524
2525     if(*next_block){
2526         memcpy(dest_backup, s->dest, sizeof(s->dest));
2527         s->dest[0] = s->sc.rd_scratchpad;
2528         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2529         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2530         av_assert0(s->linesize >= 32); //FIXME
2531     }
2532
2533     encode_mb(s, motion_x, motion_y);
2534
2535     score= put_bits_count(&s->pb);
2536     if(s->data_partitioning){
2537         score+= put_bits_count(&s->pb2);
2538         score+= put_bits_count(&s->tex_pb);
2539     }
2540
2541     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2542         ff_mpv_reconstruct_mb(s, s->block);
2543
2544         score *= s->lambda2;
2545         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2546     }
2547
2548     if(*next_block){
2549         memcpy(s->dest, dest_backup, sizeof(s->dest));
2550     }
2551
2552     if(score<*dmin){
2553         *dmin= score;
2554         *next_block^=1;
2555
2556         copy_context_after_encode(best, s, type);
2557     }
2558 }
2559
2560 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2561     const uint32_t *sq = ff_square_tab + 256;
2562     int acc=0;
2563     int x,y;
2564
2565     if(w==16 && h==16)
2566         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2567     else if(w==8 && h==8)
2568         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2569
2570     for(y=0; y<h; y++){
2571         for(x=0; x<w; x++){
2572             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2573         }
2574     }
2575
2576     av_assert2(acc>=0);
2577
2578     return acc;
2579 }
2580
2581 static int sse_mb(MpegEncContext *s){
2582     int w= 16;
2583     int h= 16;
2584
2585     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2586     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2587
2588     if(w==16 && h==16)
2589       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2590         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2591                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2592                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2593       }else{
2594         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2595                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2596                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2597       }
2598     else
2599         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2600                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2601                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2602 }
2603
2604 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2605     MpegEncContext *s= *(void**)arg;
2606
2607
2608     s->me.pre_pass=1;
2609     s->me.dia_size= s->avctx->pre_dia_size;
2610     s->first_slice_line=1;
2611     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2612         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2613             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2614         }
2615         s->first_slice_line=0;
2616     }
2617
2618     s->me.pre_pass=0;
2619
2620     return 0;
2621 }
2622
2623 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2624     MpegEncContext *s= *(void**)arg;
2625
2626     s->me.dia_size= s->avctx->dia_size;
2627     s->first_slice_line=1;
2628     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2629         s->mb_x=0; //for block init below
2630         ff_init_block_index(s);
2631         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2632             s->block_index[0]+=2;
2633             s->block_index[1]+=2;
2634             s->block_index[2]+=2;
2635             s->block_index[3]+=2;
2636
2637             /* compute motion vector & mb_type and store in context */
2638             if(s->pict_type==AV_PICTURE_TYPE_B)
2639                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2640             else
2641                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2642         }
2643         s->first_slice_line=0;
2644     }
2645     return 0;
2646 }
2647
2648 static int mb_var_thread(AVCodecContext *c, void *arg){
2649     MpegEncContext *s= *(void**)arg;
2650     int mb_x, mb_y;
2651
2652     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2653         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2654             int xx = mb_x * 16;
2655             int yy = mb_y * 16;
2656             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2657             int varc;
2658             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2659
2660             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2661                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2662
2663             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2664             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2665             s->me.mb_var_sum_temp    += varc;
2666         }
2667     }
2668     return 0;
2669 }
2670
2671 static void write_slice_end(MpegEncContext *s){
2672     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2673         if(s->partitioned_frame){
2674             ff_mpeg4_merge_partitions(s);
2675         }
2676
2677         ff_mpeg4_stuffing(&s->pb);
2678     } else if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
2679                s->out_format == FMT_MJPEG) {
2680         ff_mjpeg_encode_stuffing(s);
2681     } else if (CONFIG_SPEEDHQ_ENCODER && s->out_format == FMT_SPEEDHQ) {
2682         ff_speedhq_end_slice(s);
2683     }
2684
2685     flush_put_bits(&s->pb);
2686
2687     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2688         s->misc_bits+= get_bits_diff(s);
2689 }
2690
2691 static void write_mb_info(MpegEncContext *s)
2692 {
2693     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2694     int offset = put_bits_count(&s->pb);
2695     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2696     int gobn = s->mb_y / s->gob_index;
2697     int pred_x, pred_y;
2698     if (CONFIG_H263_ENCODER)
2699         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2700     bytestream_put_le32(&ptr, offset);
2701     bytestream_put_byte(&ptr, s->qscale);
2702     bytestream_put_byte(&ptr, gobn);
2703     bytestream_put_le16(&ptr, mba);
2704     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2705     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2706     /* 4MV not implemented */
2707     bytestream_put_byte(&ptr, 0); /* hmv2 */
2708     bytestream_put_byte(&ptr, 0); /* vmv2 */
2709 }
2710
2711 static void update_mb_info(MpegEncContext *s, int startcode)
2712 {
2713     if (!s->mb_info)
2714         return;
2715     if (put_bytes_count(&s->pb, 0) - s->prev_mb_info >= s->mb_info) {
2716         s->mb_info_size += 12;
2717         s->prev_mb_info = s->last_mb_info;
2718     }
2719     if (startcode) {
2720         s->prev_mb_info = put_bytes_count(&s->pb, 0);
2721         /* This might have incremented mb_info_size above, and we return without
2722          * actually writing any info into that slot yet. But in that case,
2723          * this will be called again at the start of the after writing the
2724          * start code, actually writing the mb info. */
2725         return;
2726     }
2727
2728     s->last_mb_info = put_bytes_count(&s->pb, 0);
2729     if (!s->mb_info_size)
2730         s->mb_info_size += 12;
2731     write_mb_info(s);
2732 }
2733
2734 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2735 {
2736     if (put_bytes_left(&s->pb, 0) < threshold
2737         && s->slice_context_count == 1
2738         && s->pb.buf == s->avctx->internal->byte_buffer) {
2739         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2740         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2741
2742         uint8_t *new_buffer = NULL;
2743         int new_buffer_size = 0;
2744
2745         if ((s->avctx->internal->byte_buffer_size + size_increase) >= INT_MAX/8) {
2746             av_log(s->avctx, AV_LOG_ERROR, "Cannot reallocate putbit buffer\n");
2747             return AVERROR(ENOMEM);
2748         }
2749
2750         emms_c();
2751
2752         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2753                               s->avctx->internal->byte_buffer_size + size_increase);
2754         if (!new_buffer)
2755             return AVERROR(ENOMEM);
2756
2757         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2758         av_free(s->avctx->internal->byte_buffer);
2759         s->avctx->internal->byte_buffer      = new_buffer;
2760         s->avctx->internal->byte_buffer_size = new_buffer_size;
2761         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2762         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2763         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2764     }
2765     if (put_bytes_left(&s->pb, 0) < threshold)
2766         return AVERROR(EINVAL);
2767     return 0;
2768 }
2769
2770 static int encode_thread(AVCodecContext *c, void *arg){
2771     MpegEncContext *s= *(void**)arg;
2772     int mb_x, mb_y, mb_y_order;
2773     int chr_h= 16>>s->chroma_y_shift;
2774     int i, j;
2775     MpegEncContext best_s = { 0 }, backup_s;
2776     uint8_t bit_buf[2][MAX_MB_BYTES];
2777     uint8_t bit_buf2[2][MAX_MB_BYTES];
2778     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2779     PutBitContext pb[2], pb2[2], tex_pb[2];
2780
2781     for(i=0; i<2; i++){
2782         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2783         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2784         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2785     }
2786
2787     s->last_bits= put_bits_count(&s->pb);
2788     s->mv_bits=0;
2789     s->misc_bits=0;
2790     s->i_tex_bits=0;
2791     s->p_tex_bits=0;
2792     s->i_count=0;
2793     s->f_count=0;
2794     s->b_count=0;
2795     s->skip_count=0;
2796
2797     for(i=0; i<3; i++){
2798         /* init last dc values */
2799         /* note: quant matrix value (8) is implied here */
2800         s->last_dc[i] = 128 << s->intra_dc_precision;
2801
2802         s->current_picture.encoding_error[i] = 0;
2803     }
2804     if(s->codec_id==AV_CODEC_ID_AMV){
2805         s->last_dc[0] = 128*8/13;
2806         s->last_dc[1] = 128*8/14;
2807         s->last_dc[2] = 128*8/14;
2808     }
2809     s->mb_skip_run = 0;
2810     memset(s->last_mv, 0, sizeof(s->last_mv));
2811
2812     s->last_mv_dir = 0;
2813
2814     switch(s->codec_id){
2815     case AV_CODEC_ID_H263:
2816     case AV_CODEC_ID_H263P:
2817     case AV_CODEC_ID_FLV1:
2818         if (CONFIG_H263_ENCODER)
2819             s->gob_index = H263_GOB_HEIGHT(s->height);
2820         break;
2821     case AV_CODEC_ID_MPEG4:
2822         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2823             ff_mpeg4_init_partitions(s);
2824         break;
2825     }
2826
2827     s->resync_mb_x=0;
2828     s->resync_mb_y=0;
2829     s->first_slice_line = 1;
2830     s->ptr_lastgob = s->pb.buf;
2831     for (mb_y_order = s->start_mb_y; mb_y_order < s->end_mb_y; mb_y_order++) {
2832         if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
2833             int first_in_slice;
2834             mb_y = ff_speedhq_mb_y_order_to_mb(mb_y_order, s->mb_height, &first_in_slice);
2835             if (first_in_slice && mb_y_order != s->start_mb_y)
2836                 ff_speedhq_end_slice(s);
2837             s->last_dc[0] = s->last_dc[1] = s->last_dc[2] = 1024 << s->intra_dc_precision;
2838         } else {
2839             mb_y = mb_y_order;
2840         }
2841         s->mb_x=0;
2842         s->mb_y= mb_y;
2843
2844         ff_set_qscale(s, s->qscale);
2845         ff_init_block_index(s);
2846
2847         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2848             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2849             int mb_type= s->mb_type[xy];
2850 //            int d;
2851             int dmin= INT_MAX;
2852             int dir;
2853             int size_increase =  s->avctx->internal->byte_buffer_size/4
2854                                + s->mb_width*MAX_MB_BYTES;
2855
2856             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2857             if (put_bytes_left(&s->pb, 0) < MAX_MB_BYTES){
2858                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2859                 return -1;
2860             }
2861             if(s->data_partitioning){
2862                 if (put_bytes_left(&s->pb2,    0) < MAX_MB_BYTES ||
2863                     put_bytes_left(&s->tex_pb, 0) < MAX_MB_BYTES) {
2864                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2865                     return -1;
2866                 }
2867             }
2868
2869             s->mb_x = mb_x;
2870             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2871             ff_update_block_index(s);
2872
2873             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2874                 ff_h261_reorder_mb_index(s);
2875                 xy= s->mb_y*s->mb_stride + s->mb_x;
2876                 mb_type= s->mb_type[xy];
2877             }
2878
2879             /* write gob / video packet header  */
2880             if(s->rtp_mode){
2881                 int current_packet_size, is_gob_start;
2882
2883                 current_packet_size = put_bytes_count(&s->pb, 1)
2884                                       - (s->ptr_lastgob - s->pb.buf);
2885
2886                 is_gob_start = s->rtp_payload_size &&
2887                                current_packet_size >= s->rtp_payload_size &&
2888                                mb_y + mb_x > 0;
2889
2890                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2891
2892                 switch(s->codec_id){
2893                 case AV_CODEC_ID_H263:
2894                 case AV_CODEC_ID_H263P:
2895                     if(!s->h263_slice_structured)
2896                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2897                     break;
2898                 case AV_CODEC_ID_MPEG2VIDEO:
2899                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2900                 case AV_CODEC_ID_MPEG1VIDEO:
2901                     if(s->mb_skip_run) is_gob_start=0;
2902                     break;
2903                 case AV_CODEC_ID_MJPEG:
2904                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2905                     break;
2906                 }
2907
2908                 if(is_gob_start){
2909                     if(s->start_mb_y != mb_y || mb_x!=0){
2910                         write_slice_end(s);
2911
2912                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2913                             ff_mpeg4_init_partitions(s);
2914                         }
2915                     }
2916
2917                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2918                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2919
2920                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2921                         int r = put_bytes_count(&s->pb, 0) + s->picture_number + 16 + s->mb_x + s->mb_y;
2922                         int d = 100 / s->error_rate;
2923                         if(r % d == 0){
2924                             current_packet_size=0;
2925                             s->pb.buf_ptr= s->ptr_lastgob;
2926                             av_assert1(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2927                         }
2928                     }
2929
2930                     update_mb_info(s, 1);
2931
2932                     switch(s->codec_id){
2933                     case AV_CODEC_ID_MPEG4:
2934                         if (CONFIG_MPEG4_ENCODER) {
2935                             ff_mpeg4_encode_video_packet_header(s);
2936                             ff_mpeg4_clean_buffers(s);
2937                         }
2938                     break;
2939                     case AV_CODEC_ID_MPEG1VIDEO:
2940                     case AV_CODEC_ID_MPEG2VIDEO:
2941                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2942                             ff_mpeg1_encode_slice_header(s);
2943                             ff_mpeg1_clean_buffers(s);
2944                         }
2945                     break;
2946                     case AV_CODEC_ID_H263:
2947                     case AV_CODEC_ID_H263P:
2948                         if (CONFIG_H263_ENCODER)
2949                             ff_h263_encode_gob_header(s, mb_y);
2950                     break;
2951                     }
2952
2953                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2954                         int bits= put_bits_count(&s->pb);
2955                         s->misc_bits+= bits - s->last_bits;
2956                         s->last_bits= bits;
2957                     }
2958
2959                     s->ptr_lastgob += current_packet_size;
2960                     s->first_slice_line=1;
2961                     s->resync_mb_x=mb_x;
2962                     s->resync_mb_y=mb_y;
2963                 }
2964             }
2965
2966             if(  (s->resync_mb_x   == s->mb_x)
2967                && s->resync_mb_y+1 == s->mb_y){
2968                 s->first_slice_line=0;
2969             }
2970
2971             s->mb_skipped=0;
2972             s->dquant=0; //only for QP_RD
2973
2974             update_mb_info(s, 0);
2975
2976             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2977                 int next_block=0;
2978                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2979
2980                 copy_context_before_encode(&backup_s, s, -1);
2981                 backup_s.pb= s->pb;
2982                 best_s.data_partitioning= s->data_partitioning;
2983                 best_s.partitioned_frame= s->partitioned_frame;
2984                 if(s->data_partitioning){
2985                     backup_s.pb2= s->pb2;
2986                     backup_s.tex_pb= s->tex_pb;
2987                 }
2988
2989                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2990                     s->mv_dir = MV_DIR_FORWARD;
2991                     s->mv_type = MV_TYPE_16X16;
2992                     s->mb_intra= 0;
2993                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2994                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2995                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2996                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2997                 }
2998                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2999                     s->mv_dir = MV_DIR_FORWARD;
3000                     s->mv_type = MV_TYPE_FIELD;
3001                     s->mb_intra= 0;
3002                     for(i=0; i<2; i++){
3003                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3004                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3005                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3006                     }
3007                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3008                                  &dmin, &next_block, 0, 0);
3009                 }
3010                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3011                     s->mv_dir = MV_DIR_FORWARD;
3012                     s->mv_type = MV_TYPE_16X16;
3013                     s->mb_intra= 0;
3014                     s->mv[0][0][0] = 0;
3015                     s->mv[0][0][1] = 0;
3016                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3017                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3018                 }
3019                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3020                     s->mv_dir = MV_DIR_FORWARD;
3021                     s->mv_type = MV_TYPE_8X8;
3022                     s->mb_intra= 0;
3023                     for(i=0; i<4; i++){
3024                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3025                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3026                     }
3027                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3028                                  &dmin, &next_block, 0, 0);
3029                 }
3030                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3031                     s->mv_dir = MV_DIR_FORWARD;
3032                     s->mv_type = MV_TYPE_16X16;
3033                     s->mb_intra= 0;
3034                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3035                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3036                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3037                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3038                 }
3039                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3040                     s->mv_dir = MV_DIR_BACKWARD;
3041                     s->mv_type = MV_TYPE_16X16;
3042                     s->mb_intra= 0;
3043                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3044                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3045                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3046                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3047                 }
3048                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3049                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3050                     s->mv_type = MV_TYPE_16X16;
3051                     s->mb_intra= 0;
3052                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3053                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3054                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3055                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3056                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3057                                  &dmin, &next_block, 0, 0);
3058                 }
3059                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3060                     s->mv_dir = MV_DIR_FORWARD;
3061                     s->mv_type = MV_TYPE_FIELD;
3062                     s->mb_intra= 0;
3063                     for(i=0; i<2; i++){
3064                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3065                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3066                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3067                     }
3068                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3069                                  &dmin, &next_block, 0, 0);
3070                 }
3071                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3072                     s->mv_dir = MV_DIR_BACKWARD;
3073                     s->mv_type = MV_TYPE_FIELD;
3074                     s->mb_intra= 0;
3075                     for(i=0; i<2; i++){
3076                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3077                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3078                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3079                     }
3080                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3081                                  &dmin, &next_block, 0, 0);
3082                 }
3083                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3084                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3085                     s->mv_type = MV_TYPE_FIELD;
3086                     s->mb_intra= 0;
3087                     for(dir=0; dir<2; dir++){
3088                         for(i=0; i<2; i++){
3089                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3090                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3091                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3092                         }
3093                     }
3094                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3095                                  &dmin, &next_block, 0, 0);
3096                 }
3097                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3098                     s->mv_dir = 0;
3099                     s->mv_type = MV_TYPE_16X16;
3100                     s->mb_intra= 1;
3101                     s->mv[0][0][0] = 0;
3102                     s->mv[0][0][1] = 0;
3103                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3104                                  &dmin, &next_block, 0, 0);
3105                     if(s->h263_pred || s->h263_aic){
3106                         if(best_s.mb_intra)
3107                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3108                         else
3109                             ff_clean_intra_table_entries(s); //old mode?
3110                     }
3111                 }
3112
3113                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3114                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3115                         const int last_qp= backup_s.qscale;
3116                         int qpi, qp, dc[6];
3117                         int16_t ac[6][16];
3118                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3119                         static const int dquant_tab[4]={-1,1,-2,2};
3120                         int storecoefs = s->mb_intra && s->dc_val[0];
3121
3122                         av_assert2(backup_s.dquant == 0);
3123
3124                         //FIXME intra
3125                         s->mv_dir= best_s.mv_dir;
3126                         s->mv_type = MV_TYPE_16X16;
3127                         s->mb_intra= best_s.mb_intra;
3128                         s->mv[0][0][0] = best_s.mv[0][0][0];
3129                         s->mv[0][0][1] = best_s.mv[0][0][1];
3130                         s->mv[1][0][0] = best_s.mv[1][0][0];
3131                         s->mv[1][0][1] = best_s.mv[1][0][1];
3132
3133                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3134                         for(; qpi<4; qpi++){
3135                             int dquant= dquant_tab[qpi];
3136                             qp= last_qp + dquant;
3137                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3138                                 continue;
3139                             backup_s.dquant= dquant;
3140                             if(storecoefs){
3141                                 for(i=0; i<6; i++){
3142                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3143                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3144                                 }
3145                             }
3146
3147                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3148                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3149                             if(best_s.qscale != qp){
3150                                 if(storecoefs){
3151                                     for(i=0; i<6; i++){
3152                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3153                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3154                                     }
3155                                 }
3156                             }
3157                         }
3158                     }
3159                 }
3160                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3161                     int mx= s->b_direct_mv_table[xy][0];
3162                     int my= s->b_direct_mv_table[xy][1];
3163
3164                     backup_s.dquant = 0;
3165                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3166                     s->mb_intra= 0;
3167                     ff_mpeg4_set_direct_mv(s, mx, my);
3168                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3169                                  &dmin, &next_block, mx, my);
3170                 }
3171                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3172                     backup_s.dquant = 0;
3173                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3174                     s->mb_intra= 0;
3175                     ff_mpeg4_set_direct_mv(s, 0, 0);
3176                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3177                                  &dmin, &next_block, 0, 0);
3178                 }
3179                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3180                     int coded=0;
3181                     for(i=0; i<6; i++)
3182                         coded |= s->block_last_index[i];
3183                     if(coded){
3184                         int mx,my;
3185                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3186                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3187                             mx=my=0; //FIXME find the one we actually used
3188                             ff_mpeg4_set_direct_mv(s, mx, my);
3189                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3190                             mx= s->mv[1][0][0];
3191                             my= s->mv[1][0][1];
3192                         }else{
3193                             mx= s->mv[0][0][0];
3194                             my= s->mv[0][0][1];
3195                         }
3196
3197                         s->mv_dir= best_s.mv_dir;
3198                         s->mv_type = best_s.mv_type;
3199                         s->mb_intra= 0;
3200 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3201                         s->mv[0][0][1] = best_s.mv[0][0][1];
3202                         s->mv[1][0][0] = best_s.mv[1][0][0];
3203                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3204                         backup_s.dquant= 0;
3205                         s->skipdct=1;
3206                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3207                                         &dmin, &next_block, mx, my);
3208                         s->skipdct=0;
3209                     }
3210                 }
3211
3212                 s->current_picture.qscale_table[xy] = best_s.qscale;
3213
3214                 copy_context_after_encode(s, &best_s, -1);
3215
3216                 pb_bits_count= put_bits_count(&s->pb);
3217                 flush_put_bits(&s->pb);
3218                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3219                 s->pb= backup_s.pb;
3220
3221                 if(s->data_partitioning){
3222                     pb2_bits_count= put_bits_count(&s->pb2);
3223                     flush_put_bits(&s->pb2);
3224                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3225                     s->pb2= backup_s.pb2;
3226
3227                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3228                     flush_put_bits(&s->tex_pb);
3229                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3230                     s->tex_pb= backup_s.tex_pb;
3231                 }
3232                 s->last_bits= put_bits_count(&s->pb);
3233
3234                 if (CONFIG_H263_ENCODER &&
3235                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3236                     ff_h263_update_motion_val(s);
3237
3238                 if(next_block==0){ //FIXME 16 vs linesize16
3239                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3240                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3241                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3242                 }
3243
3244                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3245                     ff_mpv_reconstruct_mb(s, s->block);
3246             } else {
3247                 int motion_x = 0, motion_y = 0;
3248                 s->mv_type=MV_TYPE_16X16;
3249                 // only one MB-Type possible
3250
3251                 switch(mb_type){
3252                 case CANDIDATE_MB_TYPE_INTRA:
3253                     s->mv_dir = 0;
3254                     s->mb_intra= 1;
3255                     motion_x= s->mv[0][0][0] = 0;
3256                     motion_y= s->mv[0][0][1] = 0;
3257                     break;
3258                 case CANDIDATE_MB_TYPE_INTER:
3259                     s->mv_dir = MV_DIR_FORWARD;
3260                     s->mb_intra= 0;
3261                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3262                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3263                     break;
3264                 case CANDIDATE_MB_TYPE_INTER_I:
3265                     s->mv_dir = MV_DIR_FORWARD;
3266                     s->mv_type = MV_TYPE_FIELD;
3267                     s->mb_intra= 0;
3268                     for(i=0; i<2; i++){
3269                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3270                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3271                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3272                     }
3273                     break;
3274                 case CANDIDATE_MB_TYPE_INTER4V:
3275                     s->mv_dir = MV_DIR_FORWARD;
3276                     s->mv_type = MV_TYPE_8X8;
3277                     s->mb_intra= 0;
3278                     for(i=0; i<4; i++){
3279                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3280                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3281                     }
3282                     break;
3283                 case CANDIDATE_MB_TYPE_DIRECT:
3284                     if (CONFIG_MPEG4_ENCODER) {
3285                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3286                         s->mb_intra= 0;
3287                         motion_x=s->b_direct_mv_table[xy][0];
3288                         motion_y=s->b_direct_mv_table[xy][1];
3289                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3290                     }
3291                     break;
3292                 case CANDIDATE_MB_TYPE_DIRECT0:
3293                     if (CONFIG_MPEG4_ENCODER) {
3294                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3295                         s->mb_intra= 0;
3296                         ff_mpeg4_set_direct_mv(s, 0, 0);
3297                     }
3298                     break;
3299                 case CANDIDATE_MB_TYPE_BIDIR:
3300                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3301                     s->mb_intra= 0;
3302                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3303                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3304                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3305                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3306                     break;
3307                 case CANDIDATE_MB_TYPE_BACKWARD:
3308                     s->mv_dir = MV_DIR_BACKWARD;
3309                     s->mb_intra= 0;
3310                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3311                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3312                     break;
3313                 case CANDIDATE_MB_TYPE_FORWARD:
3314                     s->mv_dir = MV_DIR_FORWARD;
3315                     s->mb_intra= 0;
3316                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3317                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3318                     break;
3319                 case CANDIDATE_MB_TYPE_FORWARD_I:
3320                     s->mv_dir = MV_DIR_FORWARD;
3321                     s->mv_type = MV_TYPE_FIELD;
3322                     s->mb_intra= 0;
3323                     for(i=0; i<2; i++){
3324                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3325                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3326                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3327                     }
3328                     break;
3329                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3330                     s->mv_dir = MV_DIR_BACKWARD;
3331                     s->mv_type = MV_TYPE_FIELD;
3332                     s->mb_intra= 0;
3333                     for(i=0; i<2; i++){
3334                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3335                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3336                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3337                     }
3338                     break;
3339                 case CANDIDATE_MB_TYPE_BIDIR_I:
3340                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3341                     s->mv_type = MV_TYPE_FIELD;
3342                     s->mb_intra= 0;
3343                     for(dir=0; dir<2; dir++){
3344                         for(i=0; i<2; i++){
3345                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3346                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3347                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3348                         }
3349                     }
3350                     break;
3351                 default:
3352                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3353                 }
3354
3355                 encode_mb(s, motion_x, motion_y);
3356
3357                 // RAL: Update last macroblock type
3358                 s->last_mv_dir = s->mv_dir;
3359
3360                 if (CONFIG_H263_ENCODER &&
3361                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3362                     ff_h263_update_motion_val(s);
3363
3364                 ff_mpv_reconstruct_mb(s, s->block);
3365             }
3366
3367             /* clean the MV table in IPS frames for direct mode in B-frames */
3368             if(s->mb_intra /* && I,P,S_TYPE */){
3369                 s->p_mv_table[xy][0]=0;
3370                 s->p_mv_table[xy][1]=0;
3371             }
3372
3373             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3374                 int w= 16;
3375                 int h= 16;
3376
3377                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3378                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3379
3380                 s->current_picture.encoding_error[0] += sse(
3381                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3382                     s->dest[0], w, h, s->linesize);
3383                 s->current_picture.encoding_error[1] += sse(
3384                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3385                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3386                 s->current_picture.encoding_error[2] += sse(
3387                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3388                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3389             }
3390             if(s->loop_filter){
3391                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3392                     ff_h263_loop_filter(s);
3393             }
3394             ff_dlog(s->avctx, "MB %d %d bits\n",
3395                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3396         }
3397     }
3398
3399     //not beautiful here but we must write it before flushing so it has to be here
3400     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3401         ff_msmpeg4_encode_ext_header(s);
3402
3403     write_slice_end(s);
3404
3405     return 0;
3406 }
3407
3408 #define MERGE(field) dst->field += src->field; src->field=0
3409 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3410     MERGE(me.scene_change_score);
3411     MERGE(me.mc_mb_var_sum_temp);
3412     MERGE(me.mb_var_sum_temp);
3413 }
3414
3415 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3416     int i;
3417
3418     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3419     MERGE(dct_count[1]);
3420     MERGE(mv_bits);
3421     MERGE(i_tex_bits);
3422     MERGE(p_tex_bits);
3423     MERGE(i_count);
3424     MERGE(f_count);
3425     MERGE(b_count);
3426     MERGE(skip_count);
3427     MERGE(misc_bits);
3428     MERGE(er.error_count);
3429     MERGE(padding_bug_score);
3430     MERGE(current_picture.encoding_error[0]);
3431     MERGE(current_picture.encoding_error[1]);
3432     MERGE(current_picture.encoding_error[2]);
3433
3434     if (dst->noise_reduction){
3435         for(i=0; i<64; i++){
3436             MERGE(dct_error_sum[0][i]);
3437             MERGE(dct_error_sum[1][i]);
3438         }
3439     }
3440
3441     av_assert1(put_bits_count(&src->pb) % 8 ==0);
3442     av_assert1(put_bits_count(&dst->pb) % 8 ==0);
3443     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3444     flush_put_bits(&dst->pb);
3445 }
3446
3447 static int estimate_qp(MpegEncContext *s, int dry_run){
3448     if (s->next_lambda){
3449         s->current_picture_ptr->f->quality =
3450         s->current_picture.f->quality = s->next_lambda;
3451         if(!dry_run) s->next_lambda= 0;
3452     } else if (!s->fixed_qscale) {
3453         int quality = ff_rate_estimate_qscale(s, dry_run);
3454         s->current_picture_ptr->f->quality =
3455         s->current_picture.f->quality = quality;
3456         if (s->current_picture.f->quality < 0)
3457             return -1;
3458     }
3459
3460     if(s->adaptive_quant){
3461         switch(s->codec_id){
3462         case AV_CODEC_ID_MPEG4:
3463             if (CONFIG_MPEG4_ENCODER)
3464                 ff_clean_mpeg4_qscales(s);
3465             break;
3466         case AV_CODEC_ID_H263:
3467         case AV_CODEC_ID_H263P:
3468         case AV_CODEC_ID_FLV1:
3469             if (CONFIG_H263_ENCODER)
3470                 ff_clean_h263_qscales(s);
3471             break;
3472         default:
3473             ff_init_qscale_tab(s);
3474         }
3475
3476         s->lambda= s->lambda_table[0];
3477         //FIXME broken
3478     }else
3479         s->lambda = s->current_picture.f->quality;
3480     update_qscale(s);
3481     return 0;
3482 }
3483
3484 /* must be called before writing the header */
3485 static void set_frame_distances(MpegEncContext * s){
3486     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3487     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3488
3489     if(s->pict_type==AV_PICTURE_TYPE_B){
3490         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3491         av_assert1(s->pb_time > 0 && s->pb_time < s->pp_time);
3492     }else{
3493         s->pp_time= s->time - s->last_non_b_time;
3494         s->last_non_b_time= s->time;
3495         av_assert1(s->picture_number==0 || s->pp_time > 0);
3496     }
3497 }
3498
3499 static int encode_picture(MpegEncContext *s, int picture_number)
3500 {
3501     int i, ret;
3502     int bits;
3503     int context_count = s->slice_context_count;
3504
3505     s->picture_number = picture_number;
3506
3507     /* Reset the average MB variance */
3508     s->me.mb_var_sum_temp    =
3509     s->me.mc_mb_var_sum_temp = 0;
3510
3511     /* we need to initialize some time vars before we can encode B-frames */
3512     // RAL: Condition added for MPEG1VIDEO
3513     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3514         set_frame_distances(s);
3515     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3516         ff_set_mpeg4_time(s);
3517
3518     s->me.scene_change_score=0;
3519
3520 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3521
3522     if(s->pict_type==AV_PICTURE_TYPE_I){
3523         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3524         else                        s->no_rounding=0;
3525     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3526         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3527             s->no_rounding ^= 1;
3528     }
3529
3530     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3531         if (estimate_qp(s,1) < 0)
3532             return -1;
3533         ff_get_2pass_fcode(s);
3534     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3535         if(s->pict_type==AV_PICTURE_TYPE_B)
3536             s->lambda= s->last_lambda_for[s->pict_type];
3537         else
3538             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3539         update_qscale(s);
3540     }
3541
3542     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3543         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3544         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3545         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3546         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3547     }
3548
3549     s->mb_intra=0; //for the rate distortion & bit compare functions
3550     for(i=1; i<context_count; i++){
3551         ret = ff_update_duplicate_context(s->thread_context[i], s);
3552         if (ret < 0)
3553             return ret;
3554     }
3555
3556     if(ff_init_me(s)<0)
3557         return -1;
3558
3559     /* Estimate motion for every MB */
3560     if(s->pict_type != AV_PICTURE_TYPE_I){
3561         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3562         s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3563         if (s->pict_type != AV_PICTURE_TYPE_B) {
3564             if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
3565                 s->me_pre == 2) {
3566                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3567             }
3568         }
3569
3570         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3571     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3572         /* I-Frame */
3573         for(i=0; i<s->mb_stride*s->mb_height; i++)
3574             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3575
3576         if(!s->fixed_qscale){
3577             /* finding spatial complexity for I-frame rate control */
3578             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3579         }
3580     }
3581     for(i=1; i<context_count; i++){
3582         merge_context_after_me(s, s->thread_context[i]);
3583     }
3584     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3585     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3586     emms_c();
3587
3588     if (s->me.scene_change_score > s->scenechange_threshold &&
3589         s->pict_type == AV_PICTURE_TYPE_P) {
3590         s->pict_type= AV_PICTURE_TYPE_I;
3591         for(i=0; i<s->mb_stride*s->mb_height; i++)
3592             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3593         if(s->msmpeg4_version >= 3)
3594             s->no_rounding=1;
3595         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3596                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3597     }
3598
3599     if(!s->umvplus){
3600         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3601             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3602
3603             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3604                 int a,b;
3605                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3606                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3607                 s->f_code= FFMAX3(s->f_code, a, b);
3608             }
3609
3610             ff_fix_long_p_mvs(s, s->intra_penalty ? CANDIDATE_MB_TYPE_INTER : CANDIDATE_MB_TYPE_INTRA);
3611             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, !!s->intra_penalty);
3612             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3613                 int j;
3614                 for(i=0; i<2; i++){
3615                     for(j=0; j<2; j++)
3616                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3617                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, !!s->intra_penalty);
3618                 }
3619             }
3620         }
3621
3622         if(s->pict_type==AV_PICTURE_TYPE_B){
3623             int a, b;
3624
3625             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3626             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3627             s->f_code = FFMAX(a, b);
3628
3629             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3630             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3631             s->b_code = FFMAX(a, b);
3632
3633             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3634             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3635             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3636             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3637             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3638                 int dir, j;
3639                 for(dir=0; dir<2; dir++){
3640                     for(i=0; i<2; i++){
3641                         for(j=0; j<2; j++){
3642                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3643                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3644                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3645                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3646                         }
3647                     }
3648                 }
3649             }
3650         }
3651     }
3652
3653     if (estimate_qp(s, 0) < 0)
3654         return -1;
3655
3656     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3657         s->pict_type == AV_PICTURE_TYPE_I &&
3658         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3659         s->qscale= 3; //reduce clipping problems
3660
3661     if (s->out_format == FMT_MJPEG) {
3662         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3663         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3664
3665         if (s->avctx->intra_matrix) {
3666             chroma_matrix =
3667             luma_matrix = s->avctx->intra_matrix;
3668         }
3669         if (s->avctx->chroma_intra_matrix)
3670             chroma_matrix = s->avctx->chroma_intra_matrix;
3671
3672         /* for mjpeg, we do include qscale in the matrix */
3673         for(i=1;i<64;i++){
3674             int j = s->idsp.idct_permutation[i];
3675
3676             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3677             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3678         }
3679         s->y_dc_scale_table=
3680         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3681         s->chroma_intra_matrix[0] =
3682         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3683         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3684                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3685         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3686                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3687         s->qscale= 8;
3688     }
3689     if(s->codec_id == AV_CODEC_ID_AMV){
3690         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3691         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3692         for(i=1;i<64;i++){
3693             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3694
3695             s->intra_matrix[j]        = sp5x_qscale_five_quant_table[0][i];
3696             s->chroma_intra_matrix[j] = sp5x_qscale_five_quant_table[1][i];
3697         }
3698         s->y_dc_scale_table= y;
3699         s->c_dc_scale_table= c;
3700         s->intra_matrix[0] = 13;
3701         s->chroma_intra_matrix[0] = 14;
3702         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3703                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3704         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3705                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3706         s->qscale= 8;
3707     }
3708
3709     if (s->out_format == FMT_SPEEDHQ) {
3710         s->y_dc_scale_table=
3711         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[3];
3712     }
3713
3714     //FIXME var duplication
3715     s->current_picture_ptr->f->key_frame =
3716     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3717     s->current_picture_ptr->f->pict_type =
3718     s->current_picture.f->pict_type = s->pict_type;
3719
3720     if (s->current_picture.f->key_frame)
3721         s->picture_in_gop_number=0;
3722
3723     s->mb_x = s->mb_y = 0;
3724     s->last_bits= put_bits_count(&s->pb);
3725     switch(s->out_format) {
3726 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
3727     case FMT_MJPEG:
3728         /* s->huffman == HUFFMAN_TABLE_OPTIMAL can only be true for MJPEG. */
3729         if (!CONFIG_MJPEG_ENCODER || s->huffman != HUFFMAN_TABLE_OPTIMAL)
3730             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3731                                            s->pred, s->intra_matrix, s->chroma_intra_matrix);
3732         break;
3733 #endif
3734     case FMT_SPEEDHQ:
3735         if (CONFIG_SPEEDHQ_ENCODER)
3736             ff_speedhq_encode_picture_header(s);
3737         break;
3738     case FMT_H261:
3739         if (CONFIG_H261_ENCODER)
3740             ff_h261_encode_picture_header(s, picture_number);
3741         break;
3742     case FMT_H263:
3743         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3744             ff_wmv2_encode_picture_header(s, picture_number);
3745         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3746             ff_msmpeg4_encode_picture_header(s, picture_number);
3747         else if (CONFIG_MPEG4_ENCODER && s->h263_pred) {
3748             ret = ff_mpeg4_encode_picture_header(s, picture_number);
3749             if (ret < 0)
3750                 return ret;
3751         } else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3752             ret = ff_rv10_encode_picture_header(s, picture_number);
3753             if (ret < 0)
3754                 return ret;
3755         }
3756         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3757             ff_rv20_encode_picture_header(s, picture_number);
3758         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3759             ff_flv_encode_picture_header(s, picture_number);
3760         else if (CONFIG_H263_ENCODER)
3761             ff_h263_encode_picture_header(s, picture_number);
3762         break;
3763     case FMT_MPEG1:
3764         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3765             ff_mpeg1_encode_picture_header(s, picture_number);
3766         break;
3767     default:
3768         av_assert0(0);
3769     }
3770     bits= put_bits_count(&s->pb);
3771     s->header_bits= bits - s->last_bits;
3772
3773     for(i=1; i<context_count; i++){
3774         update_duplicate_context_after_me(s->thread_context[i], s);
3775     }
3776     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3777     for(i=1; i<context_count; i++){
3778         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3779             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-BUF_BITS));
3780         merge_context_after_encode(s, s->thread_context[i]);
3781     }
3782     emms_c();
3783     return 0;
3784 }
3785
3786 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3787     const int intra= s->mb_intra;
3788     int i;
3789
3790     s->dct_count[intra]++;
3791
3792     for(i=0; i<64; i++){
3793         int level= block[i];
3794
3795         if(level){
3796             if(level>0){
3797                 s->dct_error_sum[intra][i] += level;
3798                 level -= s->dct_offset[intra][i];
3799                 if(level<0) level=0;
3800             }else{
3801                 s->dct_error_sum[intra][i] -= level;
3802                 level += s->dct_offset[intra][i];
3803                 if(level>0) level=0;
3804             }
3805             block[i]= level;
3806         }
3807     }
3808 }
3809
3810 static int dct_quantize_trellis_c(MpegEncContext *s,
3811                                   int16_t *block, int n,
3812                                   int qscale, int *overflow){
3813     const int *qmat;
3814     const uint16_t *matrix;
3815     const uint8_t *scantable;
3816     const uint8_t *perm_scantable;
3817     int max=0;
3818     unsigned int threshold1, threshold2;
3819     int bias=0;
3820     int run_tab[65];
3821     int level_tab[65];
3822     int score_tab[65];
3823     int survivor[65];
3824     int survivor_count;
3825     int last_run=0;
3826     int last_level=0;
3827     int last_score= 0;
3828     int last_i;
3829     int coeff[2][64];
3830     int coeff_count[64];
3831     int qmul, qadd, start_i, last_non_zero, i, dc;
3832     const int esc_length= s->ac_esc_length;
3833     uint8_t * length;
3834     uint8_t * last_length;
3835     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3836     int mpeg2_qscale;
3837
3838     s->fdsp.fdct(block);
3839
3840     if(s->dct_error_sum)
3841         s->denoise_dct(s, block);
3842     qmul= qscale*16;
3843     qadd= ((qscale-1)|1)*8;
3844
3845     if (s->q_scale_type) mpeg2_qscale = ff_mpeg2_non_linear_qscale[qscale];
3846     else                 mpeg2_qscale = qscale << 1;
3847
3848     if (s->mb_intra) {
3849         int q;
3850         scantable= s->intra_scantable.scantable;
3851         perm_scantable= s->intra_scantable.permutated;
3852         if (!s->h263_aic) {
3853             if (n < 4)
3854                 q = s->y_dc_scale;
3855             else
3856                 q = s->c_dc_scale;
3857             q = q << 3;
3858         } else{
3859             /* For AIC we skip quant/dequant of INTRADC */
3860             q = 1 << 3;
3861             qadd=0;
3862         }
3863
3864         /* note: block[0] is assumed to be positive */
3865         block[0] = (block[0] + (q >> 1)) / q;
3866         start_i = 1;
3867         last_non_zero = 0;
3868         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3869         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3870         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3871             bias= 1<<(QMAT_SHIFT-1);
3872
3873         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3874             length     = s->intra_chroma_ac_vlc_length;
3875             last_length= s->intra_chroma_ac_vlc_last_length;
3876         } else {
3877             length     = s->intra_ac_vlc_length;
3878             last_length= s->intra_ac_vlc_last_length;
3879         }
3880     } else {
3881         scantable= s->inter_scantable.scantable;
3882         perm_scantable= s->inter_scantable.permutated;
3883         start_i = 0;
3884         last_non_zero = -1;
3885         qmat = s->q_inter_matrix[qscale];
3886         matrix = s->inter_matrix;
3887         length     = s->inter_ac_vlc_length;
3888         last_length= s->inter_ac_vlc_last_length;
3889     }
3890     last_i= start_i;
3891
3892     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3893     threshold2= (threshold1<<1);
3894
3895     for(i=63; i>=start_i; i--) {
3896         const int j = scantable[i];
3897         int level = block[j] * qmat[j];
3898
3899         if(((unsigned)(level+threshold1))>threshold2){
3900             last_non_zero = i;
3901             break;
3902         }
3903     }
3904
3905     for(i=start_i; i<=last_non_zero; i++) {
3906         const int j = scantable[i];
3907         int level = block[j] * qmat[j];
3908
3909 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3910 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3911         if(((unsigned)(level+threshold1))>threshold2){
3912             if(level>0){
3913                 level= (bias + level)>>QMAT_SHIFT;
3914                 coeff[0][i]= level;
3915                 coeff[1][i]= level-1;
3916 //                coeff[2][k]= level-2;
3917             }else{
3918                 level= (bias - level)>>QMAT_SHIFT;
3919                 coeff[0][i]= -level;
3920                 coeff[1][i]= -level+1;
3921 //                coeff[2][k]= -level+2;
3922             }
3923             coeff_count[i]= FFMIN(level, 2);
3924             av_assert2(coeff_count[i]);
3925             max |=level;
3926         }else{
3927             coeff[0][i]= (level>>31)|1;
3928             coeff_count[i]= 1;
3929         }
3930     }
3931
3932     *overflow= s->max_qcoeff < max; //overflow might have happened
3933
3934     if(last_non_zero < start_i){
3935         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3936         return last_non_zero;
3937     }
3938
3939     score_tab[start_i]= 0;
3940     survivor[0]= start_i;
3941     survivor_count= 1;
3942
3943     for(i=start_i; i<=last_non_zero; i++){
3944         int level_index, j, zero_distortion;
3945         int dct_coeff= FFABS(block[ scantable[i] ]);
3946         int best_score=256*256*256*120;
3947
3948         if (s->fdsp.fdct == ff_fdct_ifast)
3949             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3950         zero_distortion= dct_coeff*dct_coeff;
3951
3952         for(level_index=0; level_index < coeff_count[i]; level_index++){
3953             int distortion;
3954             int level= coeff[level_index][i];
3955             const int alevel= FFABS(level);
3956             int unquant_coeff;
3957
3958             av_assert2(level);
3959
3960             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3961                 unquant_coeff= alevel*qmul + qadd;
3962             } else if(s->out_format == FMT_MJPEG) {
3963                 j = s->idsp.idct_permutation[scantable[i]];
3964                 unquant_coeff = alevel * matrix[j] * 8;
3965             }else{ // MPEG-1
3966                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3967                 if(s->mb_intra){
3968                         unquant_coeff = (int)(  alevel  * mpeg2_qscale * matrix[j]) >> 4;
3969                         unquant_coeff =   (unquant_coeff - 1) | 1;
3970                 }else{
3971                         unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[j])) >> 5;
3972                         unquant_coeff =   (unquant_coeff - 1) | 1;
3973                 }
3974                 unquant_coeff<<= 3;
3975             }
3976
3977             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3978             level+=64;
3979             if((level&(~127)) == 0){
3980                 for(j=survivor_count-1; j>=0; j--){
3981                     int run= i - survivor[j];
3982                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3983                     score += score_tab[i-run];
3984
3985                     if(score < best_score){
3986                         best_score= score;
3987                         run_tab[i+1]= run;
3988                         level_tab[i+1]= level-64;
3989                     }
3990                 }
3991
3992                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3993                     for(j=survivor_count-1; j>=0; j--){
3994                         int run= i - survivor[j];
3995                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3996                         score += score_tab[i-run];
3997                         if(score < last_score){
3998                             last_score= score;
3999                             last_run= run;
4000                             last_level= level-64;
4001                             last_i= i+1;
4002                         }
4003                     }
4004                 }
4005             }else{
4006                 distortion += esc_length*lambda;
4007                 for(j=survivor_count-1; j>=0; j--){
4008                     int run= i - survivor[j];
4009                     int score= distortion + score_tab[i-run];
4010
4011                     if(score < best_score){
4012                         best_score= score;
4013                         run_tab[i+1]= run;
4014                         level_tab[i+1]= level-64;
4015                     }
4016                 }
4017
4018                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4019                   for(j=survivor_count-1; j>=0; j--){
4020                         int run= i - survivor[j];
4021                         int score= distortion + score_tab[i-run];
4022                         if(score < last_score){
4023                             last_score= score;
4024                             last_run= run;
4025                             last_level= level-64;
4026                             last_i= i+1;
4027                         }
4028                     }
4029                 }
4030             }
4031         }
4032
4033         score_tab[i+1]= best_score;
4034
4035         // Note: there is a vlc code in MPEG-4 which is 1 bit shorter then another one with a shorter run and the same level
4036         if(last_non_zero <= 27){
4037             for(; survivor_count; survivor_count--){
4038                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4039                     break;
4040             }
4041         }else{
4042             for(; survivor_count; survivor_count--){
4043                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4044                     break;
4045             }
4046         }
4047
4048         survivor[ survivor_count++ ]= i+1;
4049     }
4050
4051     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4052         last_score= 256*256*256*120;
4053         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4054             int score= score_tab[i];
4055             if (i)
4056                 score += lambda * 2; // FIXME more exact?
4057
4058             if(score < last_score){
4059                 last_score= score;
4060                 last_i= i;
4061                 last_level= level_tab[i];
4062                 last_run= run_tab[i];
4063             }
4064         }
4065     }
4066
4067     s->coded_score[n] = last_score;
4068
4069     dc= FFABS(block[0]);
4070     last_non_zero= last_i - 1;
4071     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4072
4073     if(last_non_zero < start_i)
4074         return last_non_zero;
4075
4076     if(last_non_zero == 0 && start_i == 0){
4077         int best_level= 0;
4078         int best_score= dc * dc;
4079
4080         for(i=0; i<coeff_count[0]; i++){
4081             int level= coeff[i][0];
4082             int alevel= FFABS(level);
4083             int unquant_coeff, score, distortion;
4084
4085             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4086                     unquant_coeff= (alevel*qmul + qadd)>>3;
4087             } else{ // MPEG-1
4088                     unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[0])) >> 5;
4089                     unquant_coeff =   (unquant_coeff - 1) | 1;
4090             }
4091             unquant_coeff = (unquant_coeff + 4) >> 3;
4092             unquant_coeff<<= 3 + 3;
4093
4094             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4095             level+=64;
4096             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4097             else                    score= distortion + esc_length*lambda;
4098
4099             if(score < best_score){
4100                 best_score= score;
4101                 best_level= level - 64;
4102             }
4103         }
4104         block[0]= best_level;
4105         s->coded_score[n] = best_score - dc*dc;
4106         if(best_level == 0) return -1;
4107         else                return last_non_zero;
4108     }
4109
4110     i= last_i;
4111     av_assert2(last_level);
4112
4113     block[ perm_scantable[last_non_zero] ]= last_level;
4114     i -= last_run + 1;
4115
4116     for(; i>start_i; i -= run_tab[i] + 1){
4117         block[ perm_scantable[i-1] ]= level_tab[i];
4118     }
4119
4120     return last_non_zero;
4121 }
4122
4123 static int16_t basis[64][64];
4124
4125 static void build_basis(uint8_t *perm){
4126     int i, j, x, y;
4127     emms_c();
4128     for(i=0; i<8; i++){
4129         for(j=0; j<8; j++){
4130             for(y=0; y<8; y++){
4131                 for(x=0; x<8; x++){
4132                     double s= 0.25*(1<<BASIS_SHIFT);
4133                     int index= 8*i + j;
4134                     int perm_index= perm[index];
4135                     if(i==0) s*= sqrt(0.5);
4136                     if(j==0) s*= sqrt(0.5);
4137                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4138                 }
4139             }
4140         }
4141     }
4142 }
4143
4144 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4145                         int16_t *block, int16_t *weight, int16_t *orig,
4146                         int n, int qscale){
4147     int16_t rem[64];
4148     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4149     const uint8_t *scantable;
4150     const uint8_t *perm_scantable;
4151 //    unsigned int threshold1, threshold2;
4152 //    int bias=0;
4153     int run_tab[65];
4154     int prev_run=0;
4155     int prev_level=0;
4156     int qmul, qadd, start_i, last_non_zero, i, dc;
4157     uint8_t * length;
4158     uint8_t * last_length;
4159     int lambda;
4160     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4161
4162     if(basis[0][0] == 0)
4163         build_basis(s->idsp.idct_permutation);
4164
4165     qmul= qscale*2;
4166     qadd= (qscale-1)|1;
4167     if (s->mb_intra) {
4168         scantable= s->intra_scantable.scantable;
4169         perm_scantable= s->intra_scantable.permutated;
4170         if (!s->h263_aic) {
4171             if (n < 4)
4172                 q = s->y_dc_scale;
4173             else
4174                 q = s->c_dc_scale;
4175         } else{
4176             /* For AIC we skip quant/dequant of INTRADC */
4177             q = 1;
4178             qadd=0;
4179         }
4180         q <<= RECON_SHIFT-3;
4181         /* note: block[0] is assumed to be positive */
4182         dc= block[0]*q;
4183 //        block[0] = (block[0] + (q >> 1)) / q;
4184         start_i = 1;
4185 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4186 //            bias= 1<<(QMAT_SHIFT-1);
4187         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4188             length     = s->intra_chroma_ac_vlc_length;
4189             last_length= s->intra_chroma_ac_vlc_last_length;
4190         } else {
4191             length     = s->intra_ac_vlc_length;
4192             last_length= s->intra_ac_vlc_last_length;
4193         }
4194     } else {
4195         scantable= s->inter_scantable.scantable;
4196         perm_scantable= s->inter_scantable.permutated;
4197         dc= 0;
4198         start_i = 0;
4199         length     = s->inter_ac_vlc_length;
4200         last_length= s->inter_ac_vlc_last_length;
4201     }
4202     last_non_zero = s->block_last_index[n];
4203
4204     dc += (1<<(RECON_SHIFT-1));
4205     for(i=0; i<64; i++){
4206         rem[i] = dc - (orig[i] << RECON_SHIFT); // FIXME use orig directly instead of copying to rem[]
4207     }
4208
4209     sum=0;
4210     for(i=0; i<64; i++){
4211         int one= 36;
4212         int qns=4;
4213         int w;
4214
4215         w= FFABS(weight[i]) + qns*one;
4216         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4217
4218         weight[i] = w;
4219 //        w=weight[i] = (63*qns + (w/2)) / w;
4220
4221         av_assert2(w>0);
4222         av_assert2(w<(1<<6));
4223         sum += w*w;
4224     }
4225     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4226
4227     run=0;
4228     rle_index=0;
4229     for(i=start_i; i<=last_non_zero; i++){
4230         int j= perm_scantable[i];
4231         const int level= block[j];
4232         int coeff;
4233
4234         if(level){
4235             if(level<0) coeff= qmul*level - qadd;
4236             else        coeff= qmul*level + qadd;
4237             run_tab[rle_index++]=run;
4238             run=0;
4239
4240             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4241         }else{
4242             run++;
4243         }
4244     }
4245
4246     for(;;){
4247         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4248         int best_coeff=0;
4249         int best_change=0;
4250         int run2, best_unquant_change=0, analyze_gradient;
4251         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4252
4253         if(analyze_gradient){
4254             for(i=0; i<64; i++){
4255                 int w= weight[i];
4256
4257                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4258             }
4259             s->fdsp.fdct(d1);
4260         }
4261
4262         if(start_i){
4263             const int level= block[0];
4264             int change, old_coeff;
4265
4266             av_assert2(s->mb_intra);
4267
4268             old_coeff= q*level;
4269
4270             for(change=-1; change<=1; change+=2){
4271                 int new_level= level + change;
4272                 int score, new_coeff;
4273
4274                 new_coeff= q*new_level;
4275                 if(new_coeff >= 2048 || new_coeff < 0)
4276                     continue;
4277
4278                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4279                                                   new_coeff - old_coeff);
4280                 if(score<best_score){
4281                     best_score= score;
4282                     best_coeff= 0;
4283                     best_change= change;
4284                     best_unquant_change= new_coeff - old_coeff;
4285                 }
4286             }
4287         }
4288
4289         run=0;
4290         rle_index=0;
4291         run2= run_tab[rle_index++];
4292         prev_level=0;
4293         prev_run=0;
4294
4295         for(i=start_i; i<64; i++){
4296             int j= perm_scantable[i];
4297             const int level= block[j];
4298             int change, old_coeff;
4299
4300             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4301                 break;
4302
4303             if(level){
4304                 if(level<0) old_coeff= qmul*level - qadd;
4305                 else        old_coeff= qmul*level + qadd;
4306                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4307             }else{
4308                 old_coeff=0;
4309                 run2--;
4310                 av_assert2(run2>=0 || i >= last_non_zero );
4311             }
4312
4313             for(change=-1; change<=1; change+=2){
4314                 int new_level= level + change;
4315                 int score, new_coeff, unquant_change;
4316
4317                 score=0;
4318                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4319                    continue;
4320
4321                 if(new_level){
4322                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4323                     else            new_coeff= qmul*new_level + qadd;
4324                     if(new_coeff >= 2048 || new_coeff <= -2048)
4325                         continue;
4326                     //FIXME check for overflow
4327
4328                     if(level){
4329                         if(level < 63 && level > -63){
4330                             if(i < last_non_zero)
4331                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4332                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4333                             else
4334                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4335                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4336                         }
4337                     }else{
4338                         av_assert2(FFABS(new_level)==1);
4339
4340                         if(analyze_gradient){
4341                             int g= d1[ scantable[i] ];
4342                             if(g && (g^new_level) >= 0)
4343                                 continue;
4344                         }
4345
4346                         if(i < last_non_zero){
4347                             int next_i= i + run2 + 1;
4348                             int next_level= block[ perm_scantable[next_i] ] + 64;
4349
4350                             if(next_level&(~127))
4351                                 next_level= 0;
4352
4353                             if(next_i < last_non_zero)
4354                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4355                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4356                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4357                             else
4358                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4359                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4360                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4361                         }else{
4362                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4363                             if(prev_level){
4364                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4365                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4366                             }
4367                         }
4368                     }
4369                 }else{
4370                     new_coeff=0;
4371                     av_assert2(FFABS(level)==1);
4372
4373                     if(i < last_non_zero){
4374                         int next_i= i + run2 + 1;
4375                         int next_level= block[ perm_scantable[next_i] ] + 64;
4376
4377                         if(next_level&(~127))
4378                             next_level= 0;
4379
4380                         if(next_i < last_non_zero)
4381                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4382                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4383                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4384                         else
4385                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4386                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4387                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4388                     }else{
4389                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4390                         if(prev_level){
4391                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4392                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4393                         }
4394                     }
4395                 }
4396
4397                 score *= lambda;
4398
4399                 unquant_change= new_coeff - old_coeff;
4400                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4401
4402                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4403                                                    unquant_change);
4404                 if(score<best_score){
4405                     best_score= score;
4406                     best_coeff= i;
4407                     best_change= change;
4408                     best_unquant_change= unquant_change;
4409                 }
4410             }
4411             if(level){
4412                 prev_level= level + 64;
4413                 if(prev_level&(~127))
4414                     prev_level= 0;
4415                 prev_run= run;
4416                 run=0;
4417             }else{
4418                 run++;
4419             }
4420         }
4421
4422         if(best_change){
4423             int j= perm_scantable[ best_coeff ];
4424
4425             block[j] += best_change;
4426
4427             if(best_coeff > last_non_zero){
4428                 last_non_zero= best_coeff;
4429                 av_assert2(block[j]);
4430             }else{
4431                 for(; last_non_zero>=start_i; last_non_zero--){
4432                     if(block[perm_scantable[last_non_zero]])
4433                         break;
4434                 }
4435             }
4436
4437             run=0;
4438             rle_index=0;
4439             for(i=start_i; i<=last_non_zero; i++){
4440                 int j= perm_scantable[i];
4441                 const int level= block[j];
4442
4443                  if(level){
4444                      run_tab[rle_index++]=run;
4445                      run=0;
4446                  }else{
4447                      run++;
4448                  }
4449             }
4450
4451             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4452         }else{
4453             break;
4454         }
4455     }
4456
4457     return last_non_zero;
4458 }
4459
4460 /**
4461  * Permute an 8x8 block according to permutation.
4462  * @param block the block which will be permuted according to
4463  *              the given permutation vector
4464  * @param permutation the permutation vector
4465  * @param last the last non zero coefficient in scantable order, used to
4466  *             speed the permutation up
4467  * @param scantable the used scantable, this is only used to speed the
4468  *                  permutation up, the block is not (inverse) permutated
4469  *                  to scantable order!
4470  */
4471 void ff_block_permute(int16_t *block, uint8_t *permutation,
4472                       const uint8_t *scantable, int last)
4473 {
4474     int i;
4475     int16_t temp[64];
4476
4477     if (last <= 0)
4478         return;
4479     //FIXME it is ok but not clean and might fail for some permutations
4480     // if (permutation[1] == 1)
4481     // return;
4482
4483     for (i = 0; i <= last; i++) {
4484         const int j = scantable[i];
4485         temp[j] = block[j];
4486         block[j] = 0;
4487     }
4488
4489     for (i = 0; i <= last; i++) {
4490         const int j = scantable[i];
4491         const int perm_j = permutation[j];
4492         block[perm_j] = temp[j];
4493     }
4494 }
4495
4496 int ff_dct_quantize_c(MpegEncContext *s,
4497                         int16_t *block, int n,
4498                         int qscale, int *overflow)
4499 {
4500     int i, j, level, last_non_zero, q, start_i;
4501     const int *qmat;
4502     const uint8_t *scantable;
4503     int bias;
4504     int max=0;
4505     unsigned int threshold1, threshold2;
4506
4507     s->fdsp.fdct(block);
4508
4509     if(s->dct_error_sum)
4510         s->denoise_dct(s, block);
4511
4512     if (s->mb_intra) {
4513         scantable= s->intra_scantable.scantable;
4514         if (!s->h263_aic) {
4515             if (n < 4)
4516                 q = s->y_dc_scale;
4517             else
4518                 q = s->c_dc_scale;
4519             q = q << 3;
4520         } else
4521             /* For AIC we skip quant/dequant of INTRADC */
4522             q = 1 << 3;
4523
4524         /* note: block[0] is assumed to be positive */
4525         block[0] = (block[0] + (q >> 1)) / q;
4526         start_i = 1;
4527         last_non_zero = 0;
4528         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4529         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4530     } else {
4531         scantable= s->inter_scantable.scantable;
4532         start_i = 0;
4533         last_non_zero = -1;
4534         qmat = s->q_inter_matrix[qscale];
4535         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4536     }
4537     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4538     threshold2= (threshold1<<1);
4539     for(i=63;i>=start_i;i--) {
4540         j = scantable[i];
4541         level = block[j] * qmat[j];
4542
4543         if(((unsigned)(level+threshold1))>threshold2){
4544             last_non_zero = i;
4545             break;
4546         }else{
4547             block[j]=0;
4548         }
4549     }
4550     for(i=start_i; i<=last_non_zero; i++) {
4551         j = scantable[i];
4552         level = block[j] * qmat[j];
4553
4554 //        if(   bias+level >= (1<<QMAT_SHIFT)
4555 //           || bias-level >= (1<<QMAT_SHIFT)){
4556         if(((unsigned)(level+threshold1))>threshold2){
4557             if(level>0){
4558                 level= (bias + level)>>QMAT_SHIFT;
4559                 block[j]= level;
4560             }else{
4561                 level= (bias - level)>>QMAT_SHIFT;
4562                 block[j]= -level;
4563             }
4564             max |=level;
4565         }else{
4566             block[j]=0;
4567         }
4568     }
4569     *overflow= s->max_qcoeff < max; //overflow might have happened
4570
4571     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4572     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4573         ff_block_permute(block, s->idsp.idct_permutation,
4574                       scantable, last_non_zero);
4575
4576     return last_non_zero;
4577 }
4578
4579 #define OFFSET(x) offsetof(MpegEncContext, x)
4580 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4581 static const AVOption h263_options[] = {
4582     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4583     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4584     FF_MPV_COMMON_OPTS
4585 #if FF_API_MPEGVIDEO_OPTS
4586     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
4587     FF_MPV_DEPRECATED_A53_CC_OPT
4588     FF_MPV_DEPRECATED_MATRIX_OPT
4589     FF_MPV_DEPRECATED_BFRAME_OPTS
4590 #endif
4591     { NULL },
4592 };
4593
4594 static const AVClass h263_class = {
4595     .class_name = "H.263 encoder",
4596     .item_name  = av_default_item_name,
4597     .option     = h263_options,
4598     .version    = LIBAVUTIL_VERSION_INT,
4599 };
4600
4601 const AVCodec ff_h263_encoder = {
4602     .name           = "h263",
4603     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4604     .type           = AVMEDIA_TYPE_VIDEO,
4605     .id             = AV_CODEC_ID_H263,
4606     .priv_data_size = sizeof(MpegEncContext),
4607     .init           = ff_mpv_encode_init,
4608     .encode2        = ff_mpv_encode_picture,
4609     .close          = ff_mpv_encode_end,
4610     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4611     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4612     .priv_class     = &h263_class,
4613 };
4614
4615 static const AVOption h263p_options[] = {
4616     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus),       AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4617     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4618     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4619     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE},
4620     FF_MPV_COMMON_OPTS
4621 #if FF_API_MPEGVIDEO_OPTS
4622     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
4623     FF_MPV_DEPRECATED_A53_CC_OPT
4624     FF_MPV_DEPRECATED_MATRIX_OPT
4625     FF_MPV_DEPRECATED_BFRAME_OPTS
4626 #endif
4627     { NULL },
4628 };
4629 static const AVClass h263p_class = {
4630     .class_name = "H.263p encoder",
4631     .item_name  = av_default_item_name,
4632     .option     = h263p_options,
4633     .version    = LIBAVUTIL_VERSION_INT,
4634 };
4635
4636 const AVCodec ff_h263p_encoder = {
4637     .name           = "h263p",
4638     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4639     .type           = AVMEDIA_TYPE_VIDEO,
4640     .id             = AV_CODEC_ID_H263P,
4641     .priv_data_size = sizeof(MpegEncContext),
4642     .init           = ff_mpv_encode_init,
4643     .encode2        = ff_mpv_encode_picture,
4644     .close          = ff_mpv_encode_end,
4645     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4646     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4647     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4648     .priv_class     = &h263p_class,
4649 };
4650
4651 static const AVClass msmpeg4v2_class = {
4652     .class_name = "msmpeg4v2 encoder",
4653     .item_name  = av_default_item_name,
4654     .option     = ff_mpv_generic_options,
4655     .version    = LIBAVUTIL_VERSION_INT,
4656 };
4657
4658 const AVCodec ff_msmpeg4v2_encoder = {
4659     .name           = "msmpeg4v2",
4660     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4661     .type           = AVMEDIA_TYPE_VIDEO,
4662     .id             = AV_CODEC_ID_MSMPEG4V2,
4663     .priv_data_size = sizeof(MpegEncContext),
4664     .init           = ff_mpv_encode_init,
4665     .encode2        = ff_mpv_encode_picture,
4666     .close          = ff_mpv_encode_end,
4667     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4668     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4669     .priv_class     = &msmpeg4v2_class,
4670 };
4671
4672 static const AVClass msmpeg4v3_class = {
4673     .class_name = "msmpeg4v3 encoder",
4674     .item_name  = av_default_item_name,
4675     .option     = ff_mpv_generic_options,
4676     .version    = LIBAVUTIL_VERSION_INT,
4677 };
4678
4679 const AVCodec ff_msmpeg4v3_encoder = {
4680     .name           = "msmpeg4",
4681     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4682     .type           = AVMEDIA_TYPE_VIDEO,
4683     .id             = AV_CODEC_ID_MSMPEG4V3,
4684     .priv_data_size = sizeof(MpegEncContext),
4685     .init           = ff_mpv_encode_init,
4686     .encode2        = ff_mpv_encode_picture,
4687     .close          = ff_mpv_encode_end,
4688     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4689     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4690     .priv_class     = &msmpeg4v3_class,
4691 };
4692
4693 static const AVClass wmv1_class = {
4694     .class_name = "wmv1 encoder",
4695     .item_name  = av_default_item_name,
4696     .option     = ff_mpv_generic_options,
4697     .version    = LIBAVUTIL_VERSION_INT,
4698 };
4699
4700 const AVCodec ff_wmv1_encoder = {
4701     .name           = "wmv1",
4702     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4703     .type           = AVMEDIA_TYPE_VIDEO,
4704     .id             = AV_CODEC_ID_WMV1,
4705     .priv_data_size = sizeof(MpegEncContext),
4706     .init           = ff_mpv_encode_init,
4707     .encode2        = ff_mpv_encode_picture,
4708     .close          = ff_mpv_encode_end,
4709     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4710     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4711     .priv_class     = &wmv1_class,
4712 };