git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "mpegvideodata.h"
  44 #include "h261.h"
  45 #include "h263.h"
  46 #include "mjpegenc_common.h"
  47 #include "mathops.h"
  48 #include "mpegutils.h"
  49 #include "mjpegenc.h"
  50 #include "msmpeg4.h"
  51 #include "pixblockdsp.h"
  52 #include "qpeldsp.h"
  53 #include "faandct.h"
  54 #include "thread.h"
  55 #include "aandcttab.h"
  56 #include "flv.h"
  57 #include "mpeg4video.h"
  58 #include "internal.h"
  59 #include "bytestream.h"
  60 #include "wmv2.h"
  61 #include <limits.h>
  62 #include "sp5x.h"
  63
  64 #define QUANT_BIAS_SHIFT 8
  65
  66 #define QMAT_SHIFT_MMX 16
  67 #define QMAT_SHIFT 21
  68
  69 static int encode_picture(MpegEncContext *s, int picture_number);
  70 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  71 static int sse_mb(MpegEncContext *s);
  72 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  73 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  74
  75 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  76 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  77
  78 const AVOption ff_mpv_generic_options[] = {
  79     FF_MPV_COMMON_OPTS
  80     { NULL },
  81 };
  82
  83 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  84                        uint16_t (*qmat16)[2][64],
  85                        const uint16_t *quant_matrix,
  86                        int bias, int qmin, int qmax, int intra)
  87 {
  88     FDCTDSPContext *fdsp = &s->fdsp;
  89     int qscale;
  90     int shift = 0;
  91
  92     for (qscale = qmin; qscale <= qmax; qscale++) {
  93         int i;
  94         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  95 #if CONFIG_FAANDCT
  96             fdsp->fdct == ff_faandct            ||
  97 #endif /* CONFIG_FAANDCT */
  98             fdsp->fdct == ff_jpeg_fdct_islow_10) {
  99             for (i = 0; i < 64; i++) {
 100                 const int j = s->idsp.idct_permutation[i];
 101                 int64_t den = (int64_t) qscale * quant_matrix[j];
 102                 /* 16 <= qscale * quant_matrix[i] <= 7905
 103                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 104                  *             19952 <=              x  <= 249205026
 105                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 106                  *           3444240 >= (1 << 36) / (x) >= 275 */
 107
 108                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 109             }
 110         } else if (fdsp->fdct == ff_fdct_ifast) {
 111             for (i = 0; i < 64; i++) {
 112                 const int j = s->idsp.idct_permutation[i];
 113                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 114                 /* 16 <= qscale * quant_matrix[i] <= 7905
 115                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 116                  *             19952 <=              x  <= 249205026
 117                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 118                  *           3444240 >= (1 << 36) / (x) >= 275 */
 119
 120                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 121             }
 122         } else {
 123             for (i = 0; i < 64; i++) {
 124                 const int j = s->idsp.idct_permutation[i];
 125                 int64_t den = (int64_t) qscale * quant_matrix[j];
 126                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 127                  * Assume x = qscale * quant_matrix[i]
 128                  * So             16 <=              x  <= 7905
 129                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 130                  * so          32768 >= (1 << 19) / (x) >= 67 */
 131                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 132                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 133                 //                    (qscale * quant_matrix[i]);
 134                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 135
 136                 if (qmat16[qscale][0][i] == 0 ||
 137                     qmat16[qscale][0][i] == 128 * 256)
 138                     qmat16[qscale][0][i] = 128 * 256 - 1;
 139                 qmat16[qscale][1][i] =
 140                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 141                                 qmat16[qscale][0][i]);
 142             }
 143         }
 144
 145         for (i = intra; i < 64; i++) {
 146             int64_t max = 8191;
 147             if (fdsp->fdct == ff_fdct_ifast) {
 148                 max = (8191LL * ff_aanscales[i]) >> 14;
 149             }
 150             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 151                 shift++;
 152             }
 153         }
 154     }
 155     if (shift) {
 156         av_log(NULL, AV_LOG_INFO,
 157                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 158                QMAT_SHIFT - shift);
 159     }
 160 }
 161
 162 static inline void update_qscale(MpegEncContext *s)
 163 {
 164     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 165                 (FF_LAMBDA_SHIFT + 7);
 166     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 167
 168     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 169                  FF_LAMBDA_SHIFT;
 170 }
 171
 172 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 173 {
 174     int i;
 175
 176     if (matrix) {
 177         put_bits(pb, 1, 1);
 178         for (i = 0; i < 64; i++) {
 179             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 180         }
 181     } else
 182         put_bits(pb, 1, 0);
 183 }
 184
 185 /**
 186  * init s->current_picture.qscale_table from s->lambda_table
 187  */
 188 void ff_init_qscale_tab(MpegEncContext *s)
 189 {
 190     int8_t * const qscale_table = s->current_picture.qscale_table;
 191     int i;
 192
 193     for (i = 0; i < s->mb_num; i++) {
 194         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 195         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 196         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 197                                                   s->avctx->qmax);
 198     }
 199 }
 200
 201 static void update_duplicate_context_after_me(MpegEncContext *dst,
 202                                               MpegEncContext *src)
 203 {
 204 #define COPY(a) dst->a= src->a
 205     COPY(pict_type);
 206     COPY(current_picture);
 207     COPY(f_code);
 208     COPY(b_code);
 209     COPY(qscale);
 210     COPY(lambda);
 211     COPY(lambda2);
 212     COPY(picture_in_gop_number);
 213     COPY(gop_picture_number);
 214     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 215     COPY(progressive_frame);    // FIXME don't set in encode_header
 216     COPY(partitioned_frame);    // FIXME don't set in encode_header
 217 #undef COPY
 218 }
 219
 220 /**
 221  * Set the given MpegEncContext to defaults for encoding.
 222  * the changed fields will not depend upon the prior state of the MpegEncContext.
 223  */
 224 static void mpv_encode_defaults(MpegEncContext *s)
 225 {
 226     int i;
 227     ff_mpv_common_defaults(s);
 228
 229     for (i = -16; i < 16; i++) {
 230         default_fcode_tab[i + MAX_MV] = 1;
 231     }
 232     s->me.mv_penalty = default_mv_penalty;
 233     s->fcode_tab     = default_fcode_tab;
 234
 235     s->input_picture_number  = 0;
 236     s->picture_in_gop_number = 0;
 237 }
 238
 239 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 240     if (ARCH_X86)
 241         ff_dct_encode_init_x86(s);
 242
 243     if (CONFIG_H263_ENCODER)
 244         ff_h263dsp_init(&s->h263dsp);
 245     if (!s->dct_quantize)
 246         s->dct_quantize = ff_dct_quantize_c;
 247     if (!s->denoise_dct)
 248         s->denoise_dct  = denoise_dct_c;
 249     s->fast_dct_quantize = s->dct_quantize;
 250     if (s->avctx->trellis)
 251         s->dct_quantize  = dct_quantize_trellis_c;
 252
 253     return 0;
 254 }
 255
 256 /* init video encoder */
 257 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 258 {
 259     MpegEncContext *s = avctx->priv_data;
 260     int i, ret, format_supported;
 261
 262     mpv_encode_defaults(s);
 263
 264     switch (avctx->codec_id) {
 265     case AV_CODEC_ID_MPEG2VIDEO:
 266         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 267             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 268             av_log(avctx, AV_LOG_ERROR,
 269                    "only YUV420 and YUV422 are supported\n");
 270             return -1;
 271         }
 272         break;
 273     case AV_CODEC_ID_MJPEG:
 274     case AV_CODEC_ID_AMV:
 275         format_supported = 0;
 276         /* JPEG color space */
 277         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 278             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 279             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 280             (avctx->color_range == AVCOL_RANGE_JPEG &&
 281              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 282               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 283               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 284             format_supported = 1;
 285         /* MPEG color space */
 286         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 287                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 288                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 289                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 290             format_supported = 1;
 291
 292         if (!format_supported) {
 293             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 294             return -1;
 295         }
 296         break;
 297     default:
 298         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 299             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 300             return -1;
 301         }
 302     }
 303
 304     switch (avctx->pix_fmt) {
 305     case AV_PIX_FMT_YUVJ444P:
 306     case AV_PIX_FMT_YUV444P:
 307         s->chroma_format = CHROMA_444;
 308         break;
 309     case AV_PIX_FMT_YUVJ422P:
 310     case AV_PIX_FMT_YUV422P:
 311         s->chroma_format = CHROMA_422;
 312         break;
 313     case AV_PIX_FMT_YUVJ420P:
 314     case AV_PIX_FMT_YUV420P:
 315     default:
 316         s->chroma_format = CHROMA_420;
 317         break;
 318     }
 319
 320     s->bit_rate = avctx->bit_rate;
 321     s->width    = avctx->width;
 322     s->height   = avctx->height;
 323     if (avctx->gop_size > 600 &&
 324         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 325         av_log(avctx, AV_LOG_WARNING,
 326                "keyframe interval too large!, reducing it from %d to %d\n",
 327                avctx->gop_size, 600);
 328         avctx->gop_size = 600;
 329     }
 330     s->gop_size     = avctx->gop_size;
 331     s->avctx        = avctx;
 332     if (avctx->max_b_frames > MAX_B_FRAMES) {
 333         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 334                "is %d.\n", MAX_B_FRAMES);
 335         avctx->max_b_frames = MAX_B_FRAMES;
 336     }
 337     s->max_b_frames = avctx->max_b_frames;
 338     s->codec_id     = avctx->codec->id;
 339     s->strict_std_compliance = avctx->strict_std_compliance;
 340     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 341     s->mpeg_quant         = avctx->mpeg_quant;
 342     s->rtp_mode           = !!avctx->rtp_payload_size;
 343     s->intra_dc_precision = avctx->intra_dc_precision;
 344
 345     // workaround some differences between how applications specify dc precision
 346     if (s->intra_dc_precision < 0) {
 347         s->intra_dc_precision += 8;
 348     } else if (s->intra_dc_precision >= 8)
 349         s->intra_dc_precision -= 8;
 350
 351     if (s->intra_dc_precision < 0) {
 352         av_log(avctx, AV_LOG_ERROR,
 353                 "intra dc precision must be positive, note some applications use"
 354                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 355         return AVERROR(EINVAL);
 356     }
 357
 358     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 359         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 360         return AVERROR(EINVAL);
 361     }
 362     s->user_specified_pts = AV_NOPTS_VALUE;
 363
 364     if (s->gop_size <= 1) {
 365         s->intra_only = 1;
 366         s->gop_size   = 12;
 367     } else {
 368         s->intra_only = 0;
 369     }
 370
 371     s->me_method = avctx->me_method;
 372
 373     /* Fixed QSCALE */
 374     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 375
 376 #if FF_API_MPV_OPT
 377     FF_DISABLE_DEPRECATION_WARNINGS
 378     if (avctx->border_masking != 0.0)
 379         s->border_masking = avctx->border_masking;
 380     FF_ENABLE_DEPRECATION_WARNINGS
 381 #endif
 382
 383     s->adaptive_quant = (s->avctx->lumi_masking ||
 384                          s->avctx->dark_masking ||
 385                          s->avctx->temporal_cplx_masking ||
 386                          s->avctx->spatial_cplx_masking  ||
 387                          s->avctx->p_masking      ||
 388                          s->border_masking ||
 389                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 390                         !s->fixed_qscale;
 391
 392     s->loop_filter = !!(s->avctx->flags & CODEC_FLAG_LOOP_FILTER);
 393
 394     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 395         switch(avctx->codec_id) {
 396         case AV_CODEC_ID_MPEG1VIDEO:
 397         case AV_CODEC_ID_MPEG2VIDEO:
 398             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
 399             break;
 400         case AV_CODEC_ID_MPEG4:
 401         case AV_CODEC_ID_MSMPEG4V1:
 402         case AV_CODEC_ID_MSMPEG4V2:
 403         case AV_CODEC_ID_MSMPEG4V3:
 404             if       (avctx->rc_max_rate >= 15000000) {
 405                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
 406             } else if(avctx->rc_max_rate >=  2000000) {
 407                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
 408             } else if(avctx->rc_max_rate >=   384000) {
 409                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
 410             } else
 411                 avctx->rc_buffer_size = 40;
 412             avctx->rc_buffer_size *= 16384;
 413             break;
 414         }
 415         if (avctx->rc_buffer_size) {
 416             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 417         }
 418     }
 419
 420     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 421         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 422         return -1;
 423     }
 424
 425     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 426         av_log(avctx, AV_LOG_INFO,
 427                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 428     }
 429
 430     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 431         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 432         return -1;
 433     }
 434
 435     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 436         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 437         return -1;
 438     }
 439
 440     if (avctx->rc_max_rate &&
 441         avctx->rc_max_rate == avctx->bit_rate &&
 442         avctx->rc_max_rate != avctx->rc_min_rate) {
 443         av_log(avctx, AV_LOG_INFO,
 444                "impossible bitrate constraints, this will fail\n");
 445     }
 446
 447     if (avctx->rc_buffer_size &&
 448         avctx->bit_rate * (int64_t)avctx->time_base.num >
 449             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 450         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 451         return -1;
 452     }
 453
 454     if (!s->fixed_qscale &&
 455         avctx->bit_rate * av_q2d(avctx->time_base) >
 456             avctx->bit_rate_tolerance) {
 457         av_log(avctx, AV_LOG_WARNING,
 458                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 459         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 460     }
 461
 462     if (s->avctx->rc_max_rate &&
 463         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 464         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 465          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 466         90000LL * (avctx->rc_buffer_size - 1) >
 467             s->avctx->rc_max_rate * 0xFFFFLL) {
 468         av_log(avctx, AV_LOG_INFO,
 469                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 470                "specified vbv buffer is too large for the given bitrate!\n");
 471     }
 472
 473     if ((s->avctx->flags & CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 474         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 475         s->codec_id != AV_CODEC_ID_FLV1) {
 476         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 477         return -1;
 478     }
 479
 480     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 481         av_log(avctx, AV_LOG_ERROR,
 482                "OBMC is only supported with simple mb decision\n");
 483         return -1;
 484     }
 485
 486     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 487         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 488         return -1;
 489     }
 490
 491     if (s->max_b_frames                    &&
 492         s->codec_id != AV_CODEC_ID_MPEG4      &&
 493         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 494         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 495         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 496         return -1;
 497     }
 498     if (s->max_b_frames < 0) {
 499         av_log(avctx, AV_LOG_ERROR,
 500                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 501         return -1;
 502     }
 503
 504     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 505          s->codec_id == AV_CODEC_ID_H263  ||
 506          s->codec_id == AV_CODEC_ID_H263P) &&
 507         (avctx->sample_aspect_ratio.num > 255 ||
 508          avctx->sample_aspect_ratio.den > 255)) {
 509         av_log(avctx, AV_LOG_WARNING,
 510                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 511                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 512         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 513                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 514     }
 515
 516     if ((s->codec_id == AV_CODEC_ID_H263  ||
 517          s->codec_id == AV_CODEC_ID_H263P) &&
 518         (avctx->width  > 2048 ||
 519          avctx->height > 1152 )) {
 520         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 521         return -1;
 522     }
 523     if ((s->codec_id == AV_CODEC_ID_H263  ||
 524          s->codec_id == AV_CODEC_ID_H263P) &&
 525         ((avctx->width &3) ||
 526          (avctx->height&3) )) {
 527         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 528         return -1;
 529     }
 530
 531     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 532         (avctx->width  > 4095 ||
 533          avctx->height > 4095 )) {
 534         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 535         return -1;
 536     }
 537
 538     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 539         (avctx->width  > 16383 ||
 540          avctx->height > 16383 )) {
 541         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 542         return -1;
 543     }
 544
 545     if (s->codec_id == AV_CODEC_ID_RV10 &&
 546         (avctx->width &15 ||
 547          avctx->height&15 )) {
 548         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 549         return AVERROR(EINVAL);
 550     }
 551
 552     if (s->codec_id == AV_CODEC_ID_RV20 &&
 553         (avctx->width &3 ||
 554          avctx->height&3 )) {
 555         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 556         return AVERROR(EINVAL);
 557     }
 558
 559     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 560          s->codec_id == AV_CODEC_ID_WMV2) &&
 561          avctx->width & 1) {
 562          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 563          return -1;
 564     }
 565
 566     if ((s->avctx->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 567         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 568         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 569         return -1;
 570     }
 571
 572     // FIXME mpeg2 uses that too
 573     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 574                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 575         av_log(avctx, AV_LOG_ERROR,
 576                "mpeg2 style quantization not supported by codec\n");
 577         return -1;
 578     }
 579
 580     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 581         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 582         return -1;
 583     }
 584
 585     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 586         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 587         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 588         return -1;
 589     }
 590
 591     if (s->avctx->scenechange_threshold < 1000000000 &&
 592         (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)) {
 593         av_log(avctx, AV_LOG_ERROR,
 594                "closed gop with scene change detection are not supported yet, "
 595                "set threshold to 1000000000\n");
 596         return -1;
 597     }
 598
 599     if (s->avctx->flags & CODEC_FLAG_LOW_DELAY) {
 600         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 601             av_log(avctx, AV_LOG_ERROR,
 602                   "low delay forcing is only available for mpeg2\n");
 603             return -1;
 604         }
 605         if (s->max_b_frames != 0) {
 606             av_log(avctx, AV_LOG_ERROR,
 607                    "b frames cannot be used with low delay\n");
 608             return -1;
 609         }
 610     }
 611
 612     if (s->q_scale_type == 1) {
 613         if (avctx->qmax > 12) {
 614             av_log(avctx, AV_LOG_ERROR,
 615                    "non linear quant only supports qmax <= 12 currently\n");
 616             return -1;
 617         }
 618     }
 619
 620     if (s->avctx->thread_count > 1         &&
 621         s->codec_id != AV_CODEC_ID_MPEG4      &&
 622         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 623         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 624         s->codec_id != AV_CODEC_ID_MJPEG      &&
 625         (s->codec_id != AV_CODEC_ID_H263P)) {
 626         av_log(avctx, AV_LOG_ERROR,
 627                "multi threaded encoding not supported by codec\n");
 628         return -1;
 629     }
 630
 631     if (s->avctx->thread_count < 1) {
 632         av_log(avctx, AV_LOG_ERROR,
 633                "automatic thread number detection not supported by codec, "
 634                "patch welcome\n");
 635         return -1;
 636     }
 637
 638     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 639         s->rtp_mode = 1;
 640
 641     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 642         s->h263_slice_structured = 1;
 643
 644     if (!avctx->time_base.den || !avctx->time_base.num) {
 645         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 646         return -1;
 647     }
 648
 649     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 650         av_log(avctx, AV_LOG_INFO,
 651                "notice: b_frame_strategy only affects the first pass\n");
 652         avctx->b_frame_strategy = 0;
 653     }
 654
 655     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 656     if (i > 1) {
 657         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 658         avctx->time_base.den /= i;
 659         avctx->time_base.num /= i;
 660         //return -1;
 661     }
 662
 663     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 664         // (a + x * 3 / 8) / x
 665         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 666         s->inter_quant_bias = 0;
 667     } else {
 668         s->intra_quant_bias = 0;
 669         // (a - x / 4) / x
 670         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 671     }
 672
 673     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 674         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 675         return AVERROR(EINVAL);
 676     }
 677
 678     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 679         s->intra_quant_bias = avctx->intra_quant_bias;
 680     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 681         s->inter_quant_bias = avctx->inter_quant_bias;
 682
 683     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 684
 685     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 686         s->avctx->time_base.den > (1 << 16) - 1) {
 687         av_log(avctx, AV_LOG_ERROR,
 688                "timebase %d/%d not supported by MPEG 4 standard, "
 689                "the maximum admitted value for the timebase denominator "
 690                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 691                (1 << 16) - 1);
 692         return -1;
 693     }
 694     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 695
 696     switch (avctx->codec->id) {
 697     case AV_CODEC_ID_MPEG1VIDEO:
 698         s->out_format = FMT_MPEG1;
 699         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
 700         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 701         break;
 702     case AV_CODEC_ID_MPEG2VIDEO:
 703         s->out_format = FMT_MPEG1;
 704         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
 705         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 706         s->rtp_mode   = 1;
 707         break;
 708     case AV_CODEC_ID_MJPEG:
 709     case AV_CODEC_ID_AMV:
 710         s->out_format = FMT_MJPEG;
 711         s->intra_only = 1; /* force intra only for jpeg */
 712         if (!CONFIG_MJPEG_ENCODER ||
 713             ff_mjpeg_encode_init(s) < 0)
 714             return -1;
 715         avctx->delay = 0;
 716         s->low_delay = 1;
 717         break;
 718     case AV_CODEC_ID_H261:
 719         if (!CONFIG_H261_ENCODER)
 720             return -1;
 721         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 722             av_log(avctx, AV_LOG_ERROR,
 723                    "The specified picture size of %dx%d is not valid for the "
 724                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 725                     s->width, s->height);
 726             return -1;
 727         }
 728         s->out_format = FMT_H261;
 729         avctx->delay  = 0;
 730         s->low_delay  = 1;
 731         s->rtp_mode   = 0; /* Sliced encoding not supported */
 732         break;
 733     case AV_CODEC_ID_H263:
 734         if (!CONFIG_H263_ENCODER)
 735             return -1;
 736         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 737                              s->width, s->height) == 8) {
 738             av_log(avctx, AV_LOG_ERROR,
 739                    "The specified picture size of %dx%d is not valid for "
 740                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 741                    "352x288, 704x576, and 1408x1152. "
 742                    "Try H.263+.\n", s->width, s->height);
 743             return -1;
 744         }
 745         s->out_format = FMT_H263;
 746         avctx->delay  = 0;
 747         s->low_delay  = 1;
 748         break;
 749     case AV_CODEC_ID_H263P:
 750         s->out_format = FMT_H263;
 751         s->h263_plus  = 1;
 752         /* Fx */
 753         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 754         s->modified_quant  = s->h263_aic;
 755         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 756         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 757
 758         /* /Fx */
 759         /* These are just to be sure */
 760         avctx->delay = 0;
 761         s->low_delay = 1;
 762         break;
 763     case AV_CODEC_ID_FLV1:
 764         s->out_format      = FMT_H263;
 765         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 766         s->unrestricted_mv = 1;
 767         s->rtp_mode  = 0; /* don't allow GOB */
 768         avctx->delay = 0;
 769         s->low_delay = 1;
 770         break;
 771     case AV_CODEC_ID_RV10:
 772         s->out_format = FMT_H263;
 773         avctx->delay  = 0;
 774         s->low_delay  = 1;
 775         break;
 776     case AV_CODEC_ID_RV20:
 777         s->out_format      = FMT_H263;
 778         avctx->delay       = 0;
 779         s->low_delay       = 1;
 780         s->modified_quant  = 1;
 781         s->h263_aic        = 1;
 782         s->h263_plus       = 1;
 783         s->loop_filter     = 1;
 784         s->unrestricted_mv = 0;
 785         break;
 786     case AV_CODEC_ID_MPEG4:
 787         s->out_format      = FMT_H263;
 788         s->h263_pred       = 1;
 789         s->unrestricted_mv = 1;
 790         s->low_delay       = s->max_b_frames ? 0 : 1;
 791         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 792         break;
 793     case AV_CODEC_ID_MSMPEG4V2:
 794         s->out_format      = FMT_H263;
 795         s->h263_pred       = 1;
 796         s->unrestricted_mv = 1;
 797         s->msmpeg4_version = 2;
 798         avctx->delay       = 0;
 799         s->low_delay       = 1;
 800         break;
 801     case AV_CODEC_ID_MSMPEG4V3:
 802         s->out_format        = FMT_H263;
 803         s->h263_pred         = 1;
 804         s->unrestricted_mv   = 1;
 805         s->msmpeg4_version   = 3;
 806         s->flipflop_rounding = 1;
 807         avctx->delay         = 0;
 808         s->low_delay         = 1;
 809         break;
 810     case AV_CODEC_ID_WMV1:
 811         s->out_format        = FMT_H263;
 812         s->h263_pred         = 1;
 813         s->unrestricted_mv   = 1;
 814         s->msmpeg4_version   = 4;
 815         s->flipflop_rounding = 1;
 816         avctx->delay         = 0;
 817         s->low_delay         = 1;
 818         break;
 819     case AV_CODEC_ID_WMV2:
 820         s->out_format        = FMT_H263;
 821         s->h263_pred         = 1;
 822         s->unrestricted_mv   = 1;
 823         s->msmpeg4_version   = 5;
 824         s->flipflop_rounding = 1;
 825         avctx->delay         = 0;
 826         s->low_delay         = 1;
 827         break;
 828     default:
 829         return -1;
 830     }
 831
 832     avctx->has_b_frames = !s->low_delay;
 833
 834     s->encoding = 1;
 835
 836     s->progressive_frame    =
 837     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 838                                                 CODEC_FLAG_INTERLACED_ME) ||
 839                                 s->alternate_scan);
 840
 841     /* init */
 842     ff_mpv_idct_init(s);
 843     if (ff_mpv_common_init(s) < 0)
 844         return -1;
 845
 846     ff_fdctdsp_init(&s->fdsp, avctx);
 847     ff_me_cmp_init(&s->mecc, avctx);
 848     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 849     ff_pixblockdsp_init(&s->pdsp, avctx);
 850     ff_qpeldsp_init(&s->qdsp);
 851
 852     s->avctx->coded_frame = s->current_picture.f;
 853
 854     if (s->msmpeg4_version) {
 855         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 856                           2 * 2 * (MAX_LEVEL + 1) *
 857                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 858     }
 859     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 860
 861     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 862     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 864     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 865     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 866     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 867     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 868                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 869     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 870                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 871
 872     if (s->avctx->noise_reduction) {
 873         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 874                           2 * 64 * sizeof(uint16_t), fail);
 875     }
 876
 877     ff_dct_encode_init(s);
 878
 879     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 880         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 881
 882     s->quant_precision = 5;
 883
 884     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 885     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 886
 887     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 888         ff_h261_encode_init(s);
 889     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 890         ff_h263_encode_init(s);
 891     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 892         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 893             return ret;
 894     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 895         && s->out_format == FMT_MPEG1)
 896         ff_mpeg1_encode_init(s);
 897
 898     /* init q matrix */
 899     for (i = 0; i < 64; i++) {
 900         int j = s->idsp.idct_permutation[i];
 901         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 902             s->mpeg_quant) {
 903             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 904             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 905         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 906             s->intra_matrix[j] =
 907             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 908         } else {
 909             /* mpeg1/2 */
 910             s->chroma_intra_matrix[j] =
 911             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 912             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 913         }
 914         if (s->avctx->intra_matrix)
 915             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 916         if (s->avctx->inter_matrix)
 917             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 918     }
 919
 920     /* precompute matrix */
 921     /* for mjpeg, we do include qscale in the matrix */
 922     if (s->out_format != FMT_MJPEG) {
 923         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 924                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 925                           31, 1);
 926         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 927                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 928                           31, 0);
 929     }
 930
 931     if (ff_rate_control_init(s) < 0)
 932         return -1;
 933
 934 #if FF_API_ERROR_RATE
 935     FF_DISABLE_DEPRECATION_WARNINGS
 936     if (avctx->error_rate)
 937         s->error_rate = avctx->error_rate;
 938     FF_ENABLE_DEPRECATION_WARNINGS;
 939 #endif
 940
 941 #if FF_API_NORMALIZE_AQP
 942     FF_DISABLE_DEPRECATION_WARNINGS
 943     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 944         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 945     FF_ENABLE_DEPRECATION_WARNINGS;
 946 #endif
 947
 948 #if FF_API_MV0
 949     FF_DISABLE_DEPRECATION_WARNINGS
 950     if (avctx->flags & CODEC_FLAG_MV0)
 951         s->mpv_flags |= FF_MPV_FLAG_MV0;
 952     FF_ENABLE_DEPRECATION_WARNINGS
 953 #endif
 954
 955 #if FF_API_MPV_OPT
 956     FF_DISABLE_DEPRECATION_WARNINGS
 957     if (avctx->rc_qsquish != 0.0)
 958         s->rc_qsquish = avctx->rc_qsquish;
 959     if (avctx->rc_qmod_amp != 0.0)
 960         s->rc_qmod_amp = avctx->rc_qmod_amp;
 961     if (avctx->rc_qmod_freq)
 962         s->rc_qmod_freq = avctx->rc_qmod_freq;
 963     if (avctx->rc_buffer_aggressivity != 1.0)
 964         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 965     if (avctx->rc_initial_cplx != 0.0)
 966         s->rc_initial_cplx = avctx->rc_initial_cplx;
 967     if (avctx->lmin)
 968         s->lmin = avctx->lmin;
 969     if (avctx->lmax)
 970         s->lmax = avctx->lmax;
 971
 972     if (avctx->rc_eq) {
 973         av_freep(&s->rc_eq);
 974         s->rc_eq = av_strdup(avctx->rc_eq);
 975         if (!s->rc_eq)
 976             return AVERROR(ENOMEM);
 977     }
 978     FF_ENABLE_DEPRECATION_WARNINGS
 979 #endif
 980
 981     if (avctx->b_frame_strategy == 2) {
 982         for (i = 0; i < s->max_b_frames + 2; i++) {
 983             s->tmp_frames[i] = av_frame_alloc();
 984             if (!s->tmp_frames[i])
 985                 return AVERROR(ENOMEM);
 986
 987             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 988             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 989             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 990
 991             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 992             if (ret < 0)
 993                 return ret;
 994         }
 995     }
 996
 997     return 0;
 998 fail:
 999     ff_mpv_encode_end(avctx);
1000     return AVERROR_UNKNOWN;
1001 }
1002
1003 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1004 {
1005     MpegEncContext *s = avctx->priv_data;
1006     int i;
1007
1008     ff_rate_control_uninit(s);
1009
1010     ff_mpv_common_end(s);
1011     if (CONFIG_MJPEG_ENCODER &&
1012         s->out_format == FMT_MJPEG)
1013         ff_mjpeg_encode_close(s);
1014
1015     av_freep(&avctx->extradata);
1016
1017     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1018         av_frame_free(&s->tmp_frames[i]);
1019
1020     ff_free_picture_tables(&s->new_picture);
1021     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1022
1023     av_freep(&s->avctx->stats_out);
1024     av_freep(&s->ac_stats);
1025
1026     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1027     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1028     s->q_chroma_intra_matrix=   NULL;
1029     s->q_chroma_intra_matrix16= NULL;
1030     av_freep(&s->q_intra_matrix);
1031     av_freep(&s->q_inter_matrix);
1032     av_freep(&s->q_intra_matrix16);
1033     av_freep(&s->q_inter_matrix16);
1034     av_freep(&s->input_picture);
1035     av_freep(&s->reordered_input_picture);
1036     av_freep(&s->dct_offset);
1037
1038     return 0;
1039 }
1040
1041 static int get_sae(uint8_t *src, int ref, int stride)
1042 {
1043     int x,y;
1044     int acc = 0;
1045
1046     for (y = 0; y < 16; y++) {
1047         for (x = 0; x < 16; x++) {
1048             acc += FFABS(src[x + y * stride] - ref);
1049         }
1050     }
1051
1052     return acc;
1053 }
1054
1055 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1056                            uint8_t *ref, int stride)
1057 {
1058     int x, y, w, h;
1059     int acc = 0;
1060
1061     w = s->width  & ~15;
1062     h = s->height & ~15;
1063
1064     for (y = 0; y < h; y += 16) {
1065         for (x = 0; x < w; x += 16) {
1066             int offset = x + y * stride;
1067             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1068                                       stride, 16);
1069             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1070             int sae  = get_sae(src + offset, mean, stride);
1071
1072             acc += sae + 500 < sad;
1073         }
1074     }
1075     return acc;
1076 }
1077
1078 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1079 {
1080     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1081                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1082                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1083                             &s->linesize, &s->uvlinesize);
1084 }
1085
1086 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1087 {
1088     Picture *pic = NULL;
1089     int64_t pts;
1090     int i, display_picture_number = 0, ret;
1091     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1092                                                  (s->low_delay ? 0 : 1);
1093     int direct = 1;
1094
1095     if (pic_arg) {
1096         pts = pic_arg->pts;
1097         display_picture_number = s->input_picture_number++;
1098
1099         if (pts != AV_NOPTS_VALUE) {
1100             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1101                 int64_t last = s->user_specified_pts;
1102
1103                 if (pts <= last) {
1104                     av_log(s->avctx, AV_LOG_ERROR,
1105                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1106                            pts, last);
1107                     return AVERROR(EINVAL);
1108                 }
1109
1110                 if (!s->low_delay && display_picture_number == 1)
1111                     s->dts_delta = pts - last;
1112             }
1113             s->user_specified_pts = pts;
1114         } else {
1115             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1116                 s->user_specified_pts =
1117                 pts = s->user_specified_pts + 1;
1118                 av_log(s->avctx, AV_LOG_INFO,
1119                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1120                        pts);
1121             } else {
1122                 pts = display_picture_number;
1123             }
1124         }
1125     }
1126
1127     if (pic_arg) {
1128         if (!pic_arg->buf[0] ||
1129             pic_arg->linesize[0] != s->linesize ||
1130             pic_arg->linesize[1] != s->uvlinesize ||
1131             pic_arg->linesize[2] != s->uvlinesize)
1132             direct = 0;
1133         if ((s->width & 15) || (s->height & 15))
1134             direct = 0;
1135         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1136             direct = 0;
1137         if (s->linesize & (STRIDE_ALIGN-1))
1138             direct = 0;
1139
1140         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1141                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1142
1143         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1144         if (i < 0)
1145             return i;
1146
1147         pic = &s->picture[i];
1148         pic->reference = 3;
1149
1150         if (direct) {
1151             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1152                 return ret;
1153         }
1154         ret = alloc_picture(s, pic, direct);
1155         if (ret < 0)
1156             return ret;
1157
1158         if (!direct) {
1159             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1160                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1161                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1162                 // empty
1163             } else {
1164                 int h_chroma_shift, v_chroma_shift;
1165                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1166                                                  &h_chroma_shift,
1167                                                  &v_chroma_shift);
1168
1169                 for (i = 0; i < 3; i++) {
1170                     int src_stride = pic_arg->linesize[i];
1171                     int dst_stride = i ? s->uvlinesize : s->linesize;
1172                     int h_shift = i ? h_chroma_shift : 0;
1173                     int v_shift = i ? v_chroma_shift : 0;
1174                     int w = s->width  >> h_shift;
1175                     int h = s->height >> v_shift;
1176                     uint8_t *src = pic_arg->data[i];
1177                     uint8_t *dst = pic->f->data[i];
1178                     int vpad = 16;
1179
1180                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1181                         && !s->progressive_sequence
1182                         && FFALIGN(s->height, 32) - s->height > 16)
1183                         vpad = 32;
1184
1185                     if (!s->avctx->rc_buffer_size)
1186                         dst += INPLACE_OFFSET;
1187
1188                     if (src_stride == dst_stride)
1189                         memcpy(dst, src, src_stride * h);
1190                     else {
1191                         int h2 = h;
1192                         uint8_t *dst2 = dst;
1193                         while (h2--) {
1194                             memcpy(dst2, src, w);
1195                             dst2 += dst_stride;
1196                             src += src_stride;
1197                         }
1198                     }
1199                     if ((s->width & 15) || (s->height & (vpad-1))) {
1200                         s->mpvencdsp.draw_edges(dst, dst_stride,
1201                                                 w, h,
1202                                                 16 >> h_shift,
1203                                                 vpad >> v_shift,
1204                                                 EDGE_BOTTOM);
1205                     }
1206                 }
1207             }
1208         }
1209         ret = av_frame_copy_props(pic->f, pic_arg);
1210         if (ret < 0)
1211             return ret;
1212
1213         pic->f->display_picture_number = display_picture_number;
1214         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1215     }
1216
1217     /* shift buffer entries */
1218     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1219         s->input_picture[i - 1] = s->input_picture[i];
1220
1221     s->input_picture[encoding_delay] = (Picture*) pic;
1222
1223     return 0;
1224 }
1225
1226 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1227 {
1228     int x, y, plane;
1229     int score = 0;
1230     int64_t score64 = 0;
1231
1232     for (plane = 0; plane < 3; plane++) {
1233         const int stride = p->f->linesize[plane];
1234         const int bw = plane ? 1 : 2;
1235         for (y = 0; y < s->mb_height * bw; y++) {
1236             for (x = 0; x < s->mb_width * bw; x++) {
1237                 int off = p->shared ? 0 : 16;
1238                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1239                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1240                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1241
1242                 switch (FFABS(s->avctx->frame_skip_exp)) {
1243                 case 0: score    =  FFMAX(score, v);          break;
1244                 case 1: score   += FFABS(v);                  break;
1245                 case 2: score64 += v * (int64_t)v;                       break;
1246                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1247                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1248                 }
1249             }
1250         }
1251     }
1252     emms_c();
1253
1254     if (score)
1255         score64 = score;
1256     if (s->avctx->frame_skip_exp < 0)
1257         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1258                       -1.0/s->avctx->frame_skip_exp);
1259
1260     if (score64 < s->avctx->frame_skip_threshold)
1261         return 1;
1262     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1263         return 1;
1264     return 0;
1265 }
1266
1267 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1268 {
1269     AVPacket pkt = { 0 };
1270     int ret, got_output;
1271
1272     av_init_packet(&pkt);
1273     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1274     if (ret < 0)
1275         return ret;
1276
1277     ret = pkt.size;
1278     av_free_packet(&pkt);
1279     return ret;
1280 }
1281
1282 static int estimate_best_b_count(MpegEncContext *s)
1283 {
1284     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1285     AVCodecContext *c = avcodec_alloc_context3(NULL);
1286     const int scale = s->avctx->brd_scale;
1287     int i, j, out_size, p_lambda, b_lambda, lambda2;
1288     int64_t best_rd  = INT64_MAX;
1289     int best_b_count = -1;
1290
1291     if (!c)
1292         return AVERROR(ENOMEM);
1293     av_assert0(scale >= 0 && scale <= 3);
1294
1295     //emms_c();
1296     //s->next_picture_ptr->quality;
1297     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1298     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1299     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1300     if (!b_lambda) // FIXME we should do this somewhere else
1301         b_lambda = p_lambda;
1302     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1303                FF_LAMBDA_SHIFT;
1304
1305     c->width        = s->width  >> scale;
1306     c->height       = s->height >> scale;
1307     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1308     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1309     c->mb_decision  = s->avctx->mb_decision;
1310     c->me_cmp       = s->avctx->me_cmp;
1311     c->mb_cmp       = s->avctx->mb_cmp;
1312     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1313     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1314     c->time_base    = s->avctx->time_base;
1315     c->max_b_frames = s->max_b_frames;
1316
1317     if (avcodec_open2(c, codec, NULL) < 0)
1318         return -1;
1319
1320     for (i = 0; i < s->max_b_frames + 2; i++) {
1321         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1322                                                 s->next_picture_ptr;
1323         uint8_t *data[4];
1324
1325         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1326             pre_input = *pre_input_ptr;
1327             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1328
1329             if (!pre_input.shared && i) {
1330                 data[0] += INPLACE_OFFSET;
1331                 data[1] += INPLACE_OFFSET;
1332                 data[2] += INPLACE_OFFSET;
1333             }
1334
1335             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1336                                        s->tmp_frames[i]->linesize[0],
1337                                        data[0],
1338                                        pre_input.f->linesize[0],
1339                                        c->width, c->height);
1340             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1341                                        s->tmp_frames[i]->linesize[1],
1342                                        data[1],
1343                                        pre_input.f->linesize[1],
1344                                        c->width >> 1, c->height >> 1);
1345             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1346                                        s->tmp_frames[i]->linesize[2],
1347                                        data[2],
1348                                        pre_input.f->linesize[2],
1349                                        c->width >> 1, c->height >> 1);
1350         }
1351     }
1352
1353     for (j = 0; j < s->max_b_frames + 1; j++) {
1354         int64_t rd = 0;
1355
1356         if (!s->input_picture[j])
1357             break;
1358
1359         c->error[0] = c->error[1] = c->error[2] = 0;
1360
1361         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1362         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1363
1364         out_size = encode_frame(c, s->tmp_frames[0]);
1365
1366         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1367
1368         for (i = 0; i < s->max_b_frames + 1; i++) {
1369             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1370
1371             s->tmp_frames[i + 1]->pict_type = is_p ?
1372                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1373             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1374
1375             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1376
1377             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1378         }
1379
1380         /* get the delayed frames */
1381         while (out_size) {
1382             out_size = encode_frame(c, NULL);
1383             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1384         }
1385
1386         rd += c->error[0] + c->error[1] + c->error[2];
1387
1388         if (rd < best_rd) {
1389             best_rd = rd;
1390             best_b_count = j;
1391         }
1392     }
1393
1394     avcodec_close(c);
1395     av_freep(&c);
1396
1397     return best_b_count;
1398 }
1399
1400 static int select_input_picture(MpegEncContext *s)
1401 {
1402     int i, ret;
1403
1404     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1405         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1406     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1407
1408     /* set next picture type & ordering */
1409     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1410         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1411             if (s->picture_in_gop_number < s->gop_size &&
1412                 s->next_picture_ptr &&
1413                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1414                 // FIXME check that te gop check above is +-1 correct
1415                 av_frame_unref(s->input_picture[0]->f);
1416
1417                 ff_vbv_update(s, 0);
1418
1419                 goto no_output_pic;
1420             }
1421         }
1422
1423         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1424             !s->next_picture_ptr || s->intra_only) {
1425             s->reordered_input_picture[0] = s->input_picture[0];
1426             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1427             s->reordered_input_picture[0]->f->coded_picture_number =
1428                 s->coded_picture_number++;
1429         } else {
1430             int b_frames;
1431
1432             if (s->avctx->flags & CODEC_FLAG_PASS2) {
1433                 for (i = 0; i < s->max_b_frames + 1; i++) {
1434                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1435
1436                     if (pict_num >= s->rc_context.num_entries)
1437                         break;
1438                     if (!s->input_picture[i]) {
1439                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1440                         break;
1441                     }
1442
1443                     s->input_picture[i]->f->pict_type =
1444                         s->rc_context.entry[pict_num].new_pict_type;
1445                 }
1446             }
1447
1448             if (s->avctx->b_frame_strategy == 0) {
1449                 b_frames = s->max_b_frames;
1450                 while (b_frames && !s->input_picture[b_frames])
1451                     b_frames--;
1452             } else if (s->avctx->b_frame_strategy == 1) {
1453                 for (i = 1; i < s->max_b_frames + 1; i++) {
1454                     if (s->input_picture[i] &&
1455                         s->input_picture[i]->b_frame_score == 0) {
1456                         s->input_picture[i]->b_frame_score =
1457                             get_intra_count(s,
1458                                             s->input_picture[i    ]->f->data[0],
1459                                             s->input_picture[i - 1]->f->data[0],
1460                                             s->linesize) + 1;
1461                     }
1462                 }
1463                 for (i = 0; i < s->max_b_frames + 1; i++) {
1464                     if (!s->input_picture[i] ||
1465                         s->input_picture[i]->b_frame_score - 1 >
1466                             s->mb_num / s->avctx->b_sensitivity)
1467                         break;
1468                 }
1469
1470                 b_frames = FFMAX(0, i - 1);
1471
1472                 /* reset scores */
1473                 for (i = 0; i < b_frames + 1; i++) {
1474                     s->input_picture[i]->b_frame_score = 0;
1475                 }
1476             } else if (s->avctx->b_frame_strategy == 2) {
1477                 b_frames = estimate_best_b_count(s);
1478             } else {
1479                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1480                 b_frames = 0;
1481             }
1482
1483             emms_c();
1484
1485             for (i = b_frames - 1; i >= 0; i--) {
1486                 int type = s->input_picture[i]->f->pict_type;
1487                 if (type && type != AV_PICTURE_TYPE_B)
1488                     b_frames = i;
1489             }
1490             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1491                 b_frames == s->max_b_frames) {
1492                 av_log(s->avctx, AV_LOG_ERROR,
1493                        "warning, too many b frames in a row\n");
1494             }
1495
1496             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1497                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1498                     s->gop_size > s->picture_in_gop_number) {
1499                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1500                 } else {
1501                     if (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)
1502                         b_frames = 0;
1503                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1504                 }
1505             }
1506
1507             if ((s->avctx->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1508                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1509                 b_frames--;
1510
1511             s->reordered_input_picture[0] = s->input_picture[b_frames];
1512             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1513                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1514             s->reordered_input_picture[0]->f->coded_picture_number =
1515                 s->coded_picture_number++;
1516             for (i = 0; i < b_frames; i++) {
1517                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1518                 s->reordered_input_picture[i + 1]->f->pict_type =
1519                     AV_PICTURE_TYPE_B;
1520                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1521                     s->coded_picture_number++;
1522             }
1523         }
1524     }
1525 no_output_pic:
1526     if (s->reordered_input_picture[0]) {
1527         s->reordered_input_picture[0]->reference =
1528            s->reordered_input_picture[0]->f->pict_type !=
1529                AV_PICTURE_TYPE_B ? 3 : 0;
1530
1531         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1532         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1533             return ret;
1534
1535         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1536             // input is a shared pix, so we can't modifiy it -> alloc a new
1537             // one & ensure that the shared one is reuseable
1538
1539             Picture *pic;
1540             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1541             if (i < 0)
1542                 return i;
1543             pic = &s->picture[i];
1544
1545             pic->reference = s->reordered_input_picture[0]->reference;
1546             if (alloc_picture(s, pic, 0) < 0) {
1547                 return -1;
1548             }
1549
1550             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1551             if (ret < 0)
1552                 return ret;
1553
1554             /* mark us unused / free shared pic */
1555             av_frame_unref(s->reordered_input_picture[0]->f);
1556             s->reordered_input_picture[0]->shared = 0;
1557
1558             s->current_picture_ptr = pic;
1559         } else {
1560             // input is not a shared pix -> reuse buffer for current_pix
1561             s->current_picture_ptr = s->reordered_input_picture[0];
1562             for (i = 0; i < 4; i++) {
1563                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1564             }
1565         }
1566         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1567         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1568                                        s->current_picture_ptr)) < 0)
1569             return ret;
1570
1571         s->picture_number = s->new_picture.f->display_picture_number;
1572     } else {
1573         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1574     }
1575     return 0;
1576 }
1577
1578 static void frame_end(MpegEncContext *s)
1579 {
1580     if (s->unrestricted_mv &&
1581         s->current_picture.reference &&
1582         !s->intra_only) {
1583         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1584         int hshift = desc->log2_chroma_w;
1585         int vshift = desc->log2_chroma_h;
1586         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1587                                 s->current_picture.f->linesize[0],
1588                                 s->h_edge_pos, s->v_edge_pos,
1589                                 EDGE_WIDTH, EDGE_WIDTH,
1590                                 EDGE_TOP | EDGE_BOTTOM);
1591         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1592                                 s->current_picture.f->linesize[1],
1593                                 s->h_edge_pos >> hshift,
1594                                 s->v_edge_pos >> vshift,
1595                                 EDGE_WIDTH >> hshift,
1596                                 EDGE_WIDTH >> vshift,
1597                                 EDGE_TOP | EDGE_BOTTOM);
1598         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1599                                 s->current_picture.f->linesize[2],
1600                                 s->h_edge_pos >> hshift,
1601                                 s->v_edge_pos >> vshift,
1602                                 EDGE_WIDTH >> hshift,
1603                                 EDGE_WIDTH >> vshift,
1604                                 EDGE_TOP | EDGE_BOTTOM);
1605     }
1606
1607     emms_c();
1608
1609     s->last_pict_type                 = s->pict_type;
1610     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1611     if (s->pict_type!= AV_PICTURE_TYPE_B)
1612         s->last_non_b_pict_type = s->pict_type;
1613
1614     s->avctx->coded_frame = s->current_picture_ptr->f;
1615
1616 }
1617
1618 static void update_noise_reduction(MpegEncContext *s)
1619 {
1620     int intra, i;
1621
1622     for (intra = 0; intra < 2; intra++) {
1623         if (s->dct_count[intra] > (1 << 16)) {
1624             for (i = 0; i < 64; i++) {
1625                 s->dct_error_sum[intra][i] >>= 1;
1626             }
1627             s->dct_count[intra] >>= 1;
1628         }
1629
1630         for (i = 0; i < 64; i++) {
1631             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1632                                        s->dct_count[intra] +
1633                                        s->dct_error_sum[intra][i] / 2) /
1634                                       (s->dct_error_sum[intra][i] + 1);
1635         }
1636     }
1637 }
1638
1639 static int frame_start(MpegEncContext *s)
1640 {
1641     int ret;
1642
1643     /* mark & release old frames */
1644     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1645         s->last_picture_ptr != s->next_picture_ptr &&
1646         s->last_picture_ptr->f->buf[0]) {
1647         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1648     }
1649
1650     s->current_picture_ptr->f->pict_type = s->pict_type;
1651     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1652
1653     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1654     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1655                                    s->current_picture_ptr)) < 0)
1656         return ret;
1657
1658     if (s->pict_type != AV_PICTURE_TYPE_B) {
1659         s->last_picture_ptr = s->next_picture_ptr;
1660         if (!s->droppable)
1661             s->next_picture_ptr = s->current_picture_ptr;
1662     }
1663
1664     if (s->last_picture_ptr) {
1665         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1666         if (s->last_picture_ptr->f->buf[0] &&
1667             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1668                                        s->last_picture_ptr)) < 0)
1669             return ret;
1670     }
1671     if (s->next_picture_ptr) {
1672         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1673         if (s->next_picture_ptr->f->buf[0] &&
1674             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1675                                        s->next_picture_ptr)) < 0)
1676             return ret;
1677     }
1678
1679     if (s->picture_structure!= PICT_FRAME) {
1680         int i;
1681         for (i = 0; i < 4; i++) {
1682             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1683                 s->current_picture.f->data[i] +=
1684                     s->current_picture.f->linesize[i];
1685             }
1686             s->current_picture.f->linesize[i] *= 2;
1687             s->last_picture.f->linesize[i]    *= 2;
1688             s->next_picture.f->linesize[i]    *= 2;
1689         }
1690     }
1691
1692     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1693         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1694         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1695     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1696         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1697         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1698     } else {
1699         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1700         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1701     }
1702
1703     if (s->dct_error_sum) {
1704         av_assert2(s->avctx->noise_reduction && s->encoding);
1705         update_noise_reduction(s);
1706     }
1707
1708     return 0;
1709 }
1710
1711 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1712                           const AVFrame *pic_arg, int *got_packet)
1713 {
1714     MpegEncContext *s = avctx->priv_data;
1715     int i, stuffing_count, ret;
1716     int context_count = s->slice_context_count;
1717
1718     s->picture_in_gop_number++;
1719
1720     if (load_input_picture(s, pic_arg) < 0)
1721         return -1;
1722
1723     if (select_input_picture(s) < 0) {
1724         return -1;
1725     }
1726
1727     /* output? */
1728     if (s->new_picture.f->data[0]) {
1729         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1730         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1731                                               :
1732                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1733         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1734             return ret;
1735         if (s->mb_info) {
1736             s->mb_info_ptr = av_packet_new_side_data(pkt,
1737                                  AV_PKT_DATA_H263_MB_INFO,
1738                                  s->mb_width*s->mb_height*12);
1739             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1740         }
1741
1742         for (i = 0; i < context_count; i++) {
1743             int start_y = s->thread_context[i]->start_mb_y;
1744             int   end_y = s->thread_context[i]->  end_mb_y;
1745             int h       = s->mb_height;
1746             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1747             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1748
1749             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1750         }
1751
1752         s->pict_type = s->new_picture.f->pict_type;
1753         //emms_c();
1754         ret = frame_start(s);
1755         if (ret < 0)
1756             return ret;
1757 vbv_retry:
1758         ret = encode_picture(s, s->picture_number);
1759         if (growing_buffer) {
1760             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1761             pkt->data = s->pb.buf;
1762             pkt->size = avctx->internal->byte_buffer_size;
1763         }
1764         if (ret < 0)
1765             return -1;
1766
1767         avctx->header_bits = s->header_bits;
1768         avctx->mv_bits     = s->mv_bits;
1769         avctx->misc_bits   = s->misc_bits;
1770         avctx->i_tex_bits  = s->i_tex_bits;
1771         avctx->p_tex_bits  = s->p_tex_bits;
1772         avctx->i_count     = s->i_count;
1773         // FIXME f/b_count in avctx
1774         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1775         avctx->skip_count  = s->skip_count;
1776
1777         frame_end(s);
1778
1779         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1780             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1781
1782         if (avctx->rc_buffer_size) {
1783             RateControlContext *rcc = &s->rc_context;
1784             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1785
1786             if (put_bits_count(&s->pb) > max_size &&
1787                 s->lambda < s->lmax) {
1788                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1789                                        (s->qscale + 1) / s->qscale);
1790                 if (s->adaptive_quant) {
1791                     int i;
1792                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1793                         s->lambda_table[i] =
1794                             FFMAX(s->lambda_table[i] + 1,
1795                                   s->lambda_table[i] * (s->qscale + 1) /
1796                                   s->qscale);
1797                 }
1798                 s->mb_skipped = 0;        // done in frame_start()
1799                 // done in encode_picture() so we must undo it
1800                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1801                     if (s->flipflop_rounding          ||
1802                         s->codec_id == AV_CODEC_ID_H263P ||
1803                         s->codec_id == AV_CODEC_ID_MPEG4)
1804                         s->no_rounding ^= 1;
1805                 }
1806                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1807                     s->time_base       = s->last_time_base;
1808                     s->last_non_b_time = s->time - s->pp_time;
1809                 }
1810                 for (i = 0; i < context_count; i++) {
1811                     PutBitContext *pb = &s->thread_context[i]->pb;
1812                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1813                 }
1814                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1815                 goto vbv_retry;
1816             }
1817
1818             av_assert0(s->avctx->rc_max_rate);
1819         }
1820
1821         if (s->avctx->flags & CODEC_FLAG_PASS1)
1822             ff_write_pass1_stats(s);
1823
1824         for (i = 0; i < 4; i++) {
1825             s->current_picture_ptr->f->error[i] =
1826             s->current_picture.f->error[i] =
1827                 s->current_picture.error[i];
1828             avctx->error[i] += s->current_picture_ptr->f->error[i];
1829         }
1830
1831         if (s->avctx->flags & CODEC_FLAG_PASS1)
1832             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1833                    avctx->i_tex_bits + avctx->p_tex_bits ==
1834                        put_bits_count(&s->pb));
1835         flush_put_bits(&s->pb);
1836         s->frame_bits  = put_bits_count(&s->pb);
1837
1838         stuffing_count = ff_vbv_update(s, s->frame_bits);
1839         s->stuffing_bits = 8*stuffing_count;
1840         if (stuffing_count) {
1841             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1842                     stuffing_count + 50) {
1843                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1844                 return -1;
1845             }
1846
1847             switch (s->codec_id) {
1848             case AV_CODEC_ID_MPEG1VIDEO:
1849             case AV_CODEC_ID_MPEG2VIDEO:
1850                 while (stuffing_count--) {
1851                     put_bits(&s->pb, 8, 0);
1852                 }
1853             break;
1854             case AV_CODEC_ID_MPEG4:
1855                 put_bits(&s->pb, 16, 0);
1856                 put_bits(&s->pb, 16, 0x1C3);
1857                 stuffing_count -= 4;
1858                 while (stuffing_count--) {
1859                     put_bits(&s->pb, 8, 0xFF);
1860                 }
1861             break;
1862             default:
1863                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1864             }
1865             flush_put_bits(&s->pb);
1866             s->frame_bits  = put_bits_count(&s->pb);
1867         }
1868
1869         /* update mpeg1/2 vbv_delay for CBR */
1870         if (s->avctx->rc_max_rate                          &&
1871             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1872             s->out_format == FMT_MPEG1                     &&
1873             90000LL * (avctx->rc_buffer_size - 1) <=
1874                 s->avctx->rc_max_rate * 0xFFFFLL) {
1875             int vbv_delay, min_delay;
1876             double inbits  = s->avctx->rc_max_rate *
1877                              av_q2d(s->avctx->time_base);
1878             int    minbits = s->frame_bits - 8 *
1879                              (s->vbv_delay_ptr - s->pb.buf - 1);
1880             double bits    = s->rc_context.buffer_index + minbits - inbits;
1881
1882             if (bits < 0)
1883                 av_log(s->avctx, AV_LOG_ERROR,
1884                        "Internal error, negative bits\n");
1885
1886             assert(s->repeat_first_field == 0);
1887
1888             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1889             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1890                         s->avctx->rc_max_rate;
1891
1892             vbv_delay = FFMAX(vbv_delay, min_delay);
1893
1894             av_assert0(vbv_delay < 0xFFFF);
1895
1896             s->vbv_delay_ptr[0] &= 0xF8;
1897             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1898             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1899             s->vbv_delay_ptr[2] &= 0x07;
1900             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1901             avctx->vbv_delay     = vbv_delay * 300;
1902         }
1903         s->total_bits     += s->frame_bits;
1904         avctx->frame_bits  = s->frame_bits;
1905
1906         pkt->pts = s->current_picture.f->pts;
1907         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1908             if (!s->current_picture.f->coded_picture_number)
1909                 pkt->dts = pkt->pts - s->dts_delta;
1910             else
1911                 pkt->dts = s->reordered_pts;
1912             s->reordered_pts = pkt->pts;
1913         } else
1914             pkt->dts = pkt->pts;
1915         if (s->current_picture.f->key_frame)
1916             pkt->flags |= AV_PKT_FLAG_KEY;
1917         if (s->mb_info)
1918             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1919     } else {
1920         s->frame_bits = 0;
1921     }
1922
1923     /* release non-reference frames */
1924     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1925         if (!s->picture[i].reference)
1926             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1927     }
1928
1929     av_assert1((s->frame_bits & 7) == 0);
1930
1931     pkt->size = s->frame_bits / 8;
1932     *got_packet = !!pkt->size;
1933     return 0;
1934 }
1935
1936 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1937                                                 int n, int threshold)
1938 {
1939     static const char tab[64] = {
1940         3, 2, 2, 1, 1, 1, 1, 1,
1941         1, 1, 1, 1, 1, 1, 1, 1,
1942         1, 1, 1, 1, 1, 1, 1, 1,
1943         0, 0, 0, 0, 0, 0, 0, 0,
1944         0, 0, 0, 0, 0, 0, 0, 0,
1945         0, 0, 0, 0, 0, 0, 0, 0,
1946         0, 0, 0, 0, 0, 0, 0, 0,
1947         0, 0, 0, 0, 0, 0, 0, 0
1948     };
1949     int score = 0;
1950     int run = 0;
1951     int i;
1952     int16_t *block = s->block[n];
1953     const int last_index = s->block_last_index[n];
1954     int skip_dc;
1955
1956     if (threshold < 0) {
1957         skip_dc = 0;
1958         threshold = -threshold;
1959     } else
1960         skip_dc = 1;
1961
1962     /* Are all we could set to zero already zero? */
1963     if (last_index <= skip_dc - 1)
1964         return;
1965
1966     for (i = 0; i <= last_index; i++) {
1967         const int j = s->intra_scantable.permutated[i];
1968         const int level = FFABS(block[j]);
1969         if (level == 1) {
1970             if (skip_dc && i == 0)
1971                 continue;
1972             score += tab[run];
1973             run = 0;
1974         } else if (level > 1) {
1975             return;
1976         } else {
1977             run++;
1978         }
1979     }
1980     if (score >= threshold)
1981         return;
1982     for (i = skip_dc; i <= last_index; i++) {
1983         const int j = s->intra_scantable.permutated[i];
1984         block[j] = 0;
1985     }
1986     if (block[0])
1987         s->block_last_index[n] = 0;
1988     else
1989         s->block_last_index[n] = -1;
1990 }
1991
1992 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1993                                int last_index)
1994 {
1995     int i;
1996     const int maxlevel = s->max_qcoeff;
1997     const int minlevel = s->min_qcoeff;
1998     int overflow = 0;
1999
2000     if (s->mb_intra) {
2001         i = 1; // skip clipping of intra dc
2002     } else
2003         i = 0;
2004
2005     for (; i <= last_index; i++) {
2006         const int j = s->intra_scantable.permutated[i];
2007         int level = block[j];
2008
2009         if (level > maxlevel) {
2010             level = maxlevel;
2011             overflow++;
2012         } else if (level < minlevel) {
2013             level = minlevel;
2014             overflow++;
2015         }
2016
2017         block[j] = level;
2018     }
2019
2020     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2021         av_log(s->avctx, AV_LOG_INFO,
2022                "warning, clipping %d dct coefficients to %d..%d\n",
2023                overflow, minlevel, maxlevel);
2024 }
2025
2026 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2027 {
2028     int x, y;
2029     // FIXME optimize
2030     for (y = 0; y < 8; y++) {
2031         for (x = 0; x < 8; x++) {
2032             int x2, y2;
2033             int sum = 0;
2034             int sqr = 0;
2035             int count = 0;
2036
2037             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2038                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2039                     int v = ptr[x2 + y2 * stride];
2040                     sum += v;
2041                     sqr += v * v;
2042                     count++;
2043                 }
2044             }
2045             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2046         }
2047     }
2048 }
2049
2050 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2051                                                 int motion_x, int motion_y,
2052                                                 int mb_block_height,
2053                                                 int mb_block_width,
2054                                                 int mb_block_count)
2055 {
2056     int16_t weight[12][64];
2057     int16_t orig[12][64];
2058     const int mb_x = s->mb_x;
2059     const int mb_y = s->mb_y;
2060     int i;
2061     int skip_dct[12];
2062     int dct_offset = s->linesize * 8; // default for progressive frames
2063     int uv_dct_offset = s->uvlinesize * 8;
2064     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2065     ptrdiff_t wrap_y, wrap_c;
2066
2067     for (i = 0; i < mb_block_count; i++)
2068         skip_dct[i] = s->skipdct;
2069
2070     if (s->adaptive_quant) {
2071         const int last_qp = s->qscale;
2072         const int mb_xy = mb_x + mb_y * s->mb_stride;
2073
2074         s->lambda = s->lambda_table[mb_xy];
2075         update_qscale(s);
2076
2077         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2078             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2079             s->dquant = s->qscale - last_qp;
2080
2081             if (s->out_format == FMT_H263) {
2082                 s->dquant = av_clip(s->dquant, -2, 2);
2083
2084                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2085                     if (!s->mb_intra) {
2086                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2087                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2088                                 s->dquant = 0;
2089                         }
2090                         if (s->mv_type == MV_TYPE_8X8)
2091                             s->dquant = 0;
2092                     }
2093                 }
2094             }
2095         }
2096         ff_set_qscale(s, last_qp + s->dquant);
2097     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2098         ff_set_qscale(s, s->qscale + s->dquant);
2099
2100     wrap_y = s->linesize;
2101     wrap_c = s->uvlinesize;
2102     ptr_y  = s->new_picture.f->data[0] +
2103              (mb_y * 16 * wrap_y)              + mb_x * 16;
2104     ptr_cb = s->new_picture.f->data[1] +
2105              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2106     ptr_cr = s->new_picture.f->data[2] +
2107              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2108
2109     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2110         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2111         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2112         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2113         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2114                                  wrap_y, wrap_y,
2115                                  16, 16, mb_x * 16, mb_y * 16,
2116                                  s->width, s->height);
2117         ptr_y = ebuf;
2118         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2119                                  wrap_c, wrap_c,
2120                                  mb_block_width, mb_block_height,
2121                                  mb_x * mb_block_width, mb_y * mb_block_height,
2122                                  cw, ch);
2123         ptr_cb = ebuf + 16 * wrap_y;
2124         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2125                                  wrap_c, wrap_c,
2126                                  mb_block_width, mb_block_height,
2127                                  mb_x * mb_block_width, mb_y * mb_block_height,
2128                                  cw, ch);
2129         ptr_cr = ebuf + 16 * wrap_y + 16;
2130     }
2131
2132     if (s->mb_intra) {
2133         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2134             int progressive_score, interlaced_score;
2135
2136             s->interlaced_dct = 0;
2137             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2138                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2139                                                      NULL, wrap_y, 8) - 400;
2140
2141             if (progressive_score > 0) {
2142                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2143                                                         NULL, wrap_y * 2, 8) +
2144                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2145                                                         NULL, wrap_y * 2, 8);
2146                 if (progressive_score > interlaced_score) {
2147                     s->interlaced_dct = 1;
2148
2149                     dct_offset = wrap_y;
2150                     uv_dct_offset = wrap_c;
2151                     wrap_y <<= 1;
2152                     if (s->chroma_format == CHROMA_422 ||
2153                         s->chroma_format == CHROMA_444)
2154                         wrap_c <<= 1;
2155                 }
2156             }
2157         }
2158
2159         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2160         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2161         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2162         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2163
2164         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2165             skip_dct[4] = 1;
2166             skip_dct[5] = 1;
2167         } else {
2168             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2169             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2170             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2171                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2172                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2173             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2174                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2175                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2176                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2177                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2178                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2179                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2180             }
2181         }
2182     } else {
2183         op_pixels_func (*op_pix)[4];
2184         qpel_mc_func (*op_qpix)[16];
2185         uint8_t *dest_y, *dest_cb, *dest_cr;
2186
2187         dest_y  = s->dest[0];
2188         dest_cb = s->dest[1];
2189         dest_cr = s->dest[2];
2190
2191         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2192             op_pix  = s->hdsp.put_pixels_tab;
2193             op_qpix = s->qdsp.put_qpel_pixels_tab;
2194         } else {
2195             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2196             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2197         }
2198
2199         if (s->mv_dir & MV_DIR_FORWARD) {
2200             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2201                           s->last_picture.f->data,
2202                           op_pix, op_qpix);
2203             op_pix  = s->hdsp.avg_pixels_tab;
2204             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2205         }
2206         if (s->mv_dir & MV_DIR_BACKWARD) {
2207             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2208                           s->next_picture.f->data,
2209                           op_pix, op_qpix);
2210         }
2211
2212         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2213             int progressive_score, interlaced_score;
2214
2215             s->interlaced_dct = 0;
2216             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2217                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2218                                                      ptr_y + wrap_y * 8,
2219                                                      wrap_y, 8) - 400;
2220
2221             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2222                 progressive_score -= 400;
2223
2224             if (progressive_score > 0) {
2225                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2226                                                         wrap_y * 2, 8) +
2227                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2228                                                         ptr_y + wrap_y,
2229                                                         wrap_y * 2, 8);
2230
2231                 if (progressive_score > interlaced_score) {
2232                     s->interlaced_dct = 1;
2233
2234                     dct_offset = wrap_y;
2235                     uv_dct_offset = wrap_c;
2236                     wrap_y <<= 1;
2237                     if (s->chroma_format == CHROMA_422)
2238                         wrap_c <<= 1;
2239                 }
2240             }
2241         }
2242
2243         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2244         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2245         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2246                             dest_y + dct_offset, wrap_y);
2247         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2248                             dest_y + dct_offset + 8, wrap_y);
2249
2250         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2251             skip_dct[4] = 1;
2252             skip_dct[5] = 1;
2253         } else {
2254             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2255             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2256             if (!s->chroma_y_shift) { /* 422 */
2257                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2258                                     dest_cb + uv_dct_offset, wrap_c);
2259                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2260                                     dest_cr + uv_dct_offset, wrap_c);
2261             }
2262         }
2263         /* pre quantization */
2264         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2265                 2 * s->qscale * s->qscale) {
2266             // FIXME optimize
2267             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2268                 skip_dct[0] = 1;
2269             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2270                 skip_dct[1] = 1;
2271             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2272                                wrap_y, 8) < 20 * s->qscale)
2273                 skip_dct[2] = 1;
2274             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2275                                wrap_y, 8) < 20 * s->qscale)
2276                 skip_dct[3] = 1;
2277             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2278                 skip_dct[4] = 1;
2279             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2280                 skip_dct[5] = 1;
2281             if (!s->chroma_y_shift) { /* 422 */
2282                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2283                                    dest_cb + uv_dct_offset,
2284                                    wrap_c, 8) < 20 * s->qscale)
2285                     skip_dct[6] = 1;
2286                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2287                                    dest_cr + uv_dct_offset,
2288                                    wrap_c, 8) < 20 * s->qscale)
2289                     skip_dct[7] = 1;
2290             }
2291         }
2292     }
2293
2294     if (s->quantizer_noise_shaping) {
2295         if (!skip_dct[0])
2296             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2297         if (!skip_dct[1])
2298             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2299         if (!skip_dct[2])
2300             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2301         if (!skip_dct[3])
2302             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2303         if (!skip_dct[4])
2304             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2305         if (!skip_dct[5])
2306             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2307         if (!s->chroma_y_shift) { /* 422 */
2308             if (!skip_dct[6])
2309                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2310                                   wrap_c);
2311             if (!skip_dct[7])
2312                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2313                                   wrap_c);
2314         }
2315         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2316     }
2317
2318     /* DCT & quantize */
2319     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2320     {
2321         for (i = 0; i < mb_block_count; i++) {
2322             if (!skip_dct[i]) {
2323                 int overflow;
2324                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2325                 // FIXME we could decide to change to quantizer instead of
2326                 // clipping
2327                 // JS: I don't think that would be a good idea it could lower
2328                 //     quality instead of improve it. Just INTRADC clipping
2329                 //     deserves changes in quantizer
2330                 if (overflow)
2331                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2332             } else
2333                 s->block_last_index[i] = -1;
2334         }
2335         if (s->quantizer_noise_shaping) {
2336             for (i = 0; i < mb_block_count; i++) {
2337                 if (!skip_dct[i]) {
2338                     s->block_last_index[i] =
2339                         dct_quantize_refine(s, s->block[i], weight[i],
2340                                             orig[i], i, s->qscale);
2341                 }
2342             }
2343         }
2344
2345         if (s->luma_elim_threshold && !s->mb_intra)
2346             for (i = 0; i < 4; i++)
2347                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2348         if (s->chroma_elim_threshold && !s->mb_intra)
2349             for (i = 4; i < mb_block_count; i++)
2350                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2351
2352         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2353             for (i = 0; i < mb_block_count; i++) {
2354                 if (s->block_last_index[i] == -1)
2355                     s->coded_score[i] = INT_MAX / 256;
2356             }
2357         }
2358     }
2359
2360     if ((s->avctx->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2361         s->block_last_index[4] =
2362         s->block_last_index[5] = 0;
2363         s->block[4][0] =
2364         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2365         if (!s->chroma_y_shift) { /* 422 / 444 */
2366             for (i=6; i<12; i++) {
2367                 s->block_last_index[i] = 0;
2368                 s->block[i][0] = s->block[4][0];
2369             }
2370         }
2371     }
2372
2373     // non c quantize code returns incorrect block_last_index FIXME
2374     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2375         for (i = 0; i < mb_block_count; i++) {
2376             int j;
2377             if (s->block_last_index[i] > 0) {
2378                 for (j = 63; j > 0; j--) {
2379                     if (s->block[i][s->intra_scantable.permutated[j]])
2380                         break;
2381                 }
2382                 s->block_last_index[i] = j;
2383             }
2384         }
2385     }
2386
2387     /* huffman encode */
2388     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2389     case AV_CODEC_ID_MPEG1VIDEO:
2390     case AV_CODEC_ID_MPEG2VIDEO:
2391         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2392             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2393         break;
2394     case AV_CODEC_ID_MPEG4:
2395         if (CONFIG_MPEG4_ENCODER)
2396             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2397         break;
2398     case AV_CODEC_ID_MSMPEG4V2:
2399     case AV_CODEC_ID_MSMPEG4V3:
2400     case AV_CODEC_ID_WMV1:
2401         if (CONFIG_MSMPEG4_ENCODER)
2402             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2403         break;
2404     case AV_CODEC_ID_WMV2:
2405         if (CONFIG_WMV2_ENCODER)
2406             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2407         break;
2408     case AV_CODEC_ID_H261:
2409         if (CONFIG_H261_ENCODER)
2410             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2411         break;
2412     case AV_CODEC_ID_H263:
2413     case AV_CODEC_ID_H263P:
2414     case AV_CODEC_ID_FLV1:
2415     case AV_CODEC_ID_RV10:
2416     case AV_CODEC_ID_RV20:
2417         if (CONFIG_H263_ENCODER)
2418             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2419         break;
2420     case AV_CODEC_ID_MJPEG:
2421     case AV_CODEC_ID_AMV:
2422         if (CONFIG_MJPEG_ENCODER)
2423             ff_mjpeg_encode_mb(s, s->block);
2424         break;
2425     default:
2426         av_assert1(0);
2427     }
2428 }
2429
2430 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2431 {
2432     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2433     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2434     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2435 }
2436
2437 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2438     int i;
2439
2440     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2441
2442     /* mpeg1 */
2443     d->mb_skip_run= s->mb_skip_run;
2444     for(i=0; i<3; i++)
2445         d->last_dc[i] = s->last_dc[i];
2446
2447     /* statistics */
2448     d->mv_bits= s->mv_bits;
2449     d->i_tex_bits= s->i_tex_bits;
2450     d->p_tex_bits= s->p_tex_bits;
2451     d->i_count= s->i_count;
2452     d->f_count= s->f_count;
2453     d->b_count= s->b_count;
2454     d->skip_count= s->skip_count;
2455     d->misc_bits= s->misc_bits;
2456     d->last_bits= 0;
2457
2458     d->mb_skipped= 0;
2459     d->qscale= s->qscale;
2460     d->dquant= s->dquant;
2461
2462     d->esc3_level_length= s->esc3_level_length;
2463 }
2464
2465 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2466     int i;
2467
2468     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2469     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2470
2471     /* mpeg1 */
2472     d->mb_skip_run= s->mb_skip_run;
2473     for(i=0; i<3; i++)
2474         d->last_dc[i] = s->last_dc[i];
2475
2476     /* statistics */
2477     d->mv_bits= s->mv_bits;
2478     d->i_tex_bits= s->i_tex_bits;
2479     d->p_tex_bits= s->p_tex_bits;
2480     d->i_count= s->i_count;
2481     d->f_count= s->f_count;
2482     d->b_count= s->b_count;
2483     d->skip_count= s->skip_count;
2484     d->misc_bits= s->misc_bits;
2485
2486     d->mb_intra= s->mb_intra;
2487     d->mb_skipped= s->mb_skipped;
2488     d->mv_type= s->mv_type;
2489     d->mv_dir= s->mv_dir;
2490     d->pb= s->pb;
2491     if(s->data_partitioning){
2492         d->pb2= s->pb2;
2493         d->tex_pb= s->tex_pb;
2494     }
2495     d->block= s->block;
2496     for(i=0; i<8; i++)
2497         d->block_last_index[i]= s->block_last_index[i];
2498     d->interlaced_dct= s->interlaced_dct;
2499     d->qscale= s->qscale;
2500
2501     d->esc3_level_length= s->esc3_level_length;
2502 }
2503
2504 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2505                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2506                            int *dmin, int *next_block, int motion_x, int motion_y)
2507 {
2508     int score;
2509     uint8_t *dest_backup[3];
2510
2511     copy_context_before_encode(s, backup, type);
2512
2513     s->block= s->blocks[*next_block];
2514     s->pb= pb[*next_block];
2515     if(s->data_partitioning){
2516         s->pb2   = pb2   [*next_block];
2517         s->tex_pb= tex_pb[*next_block];
2518     }
2519
2520     if(*next_block){
2521         memcpy(dest_backup, s->dest, sizeof(s->dest));
2522         s->dest[0] = s->sc.rd_scratchpad;
2523         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2524         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2525         av_assert0(s->linesize >= 32); //FIXME
2526     }
2527
2528     encode_mb(s, motion_x, motion_y);
2529
2530     score= put_bits_count(&s->pb);
2531     if(s->data_partitioning){
2532         score+= put_bits_count(&s->pb2);
2533         score+= put_bits_count(&s->tex_pb);
2534     }
2535
2536     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2537         ff_mpv_decode_mb(s, s->block);
2538
2539         score *= s->lambda2;
2540         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2541     }
2542
2543     if(*next_block){
2544         memcpy(s->dest, dest_backup, sizeof(s->dest));
2545     }
2546
2547     if(score<*dmin){
2548         *dmin= score;
2549         *next_block^=1;
2550
2551         copy_context_after_encode(best, s, type);
2552     }
2553 }
2554
2555 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2556     uint32_t *sq = ff_square_tab + 256;
2557     int acc=0;
2558     int x,y;
2559
2560     if(w==16 && h==16)
2561         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2562     else if(w==8 && h==8)
2563         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2564
2565     for(y=0; y<h; y++){
2566         for(x=0; x<w; x++){
2567             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2568         }
2569     }
2570
2571     av_assert2(acc>=0);
2572
2573     return acc;
2574 }
2575
2576 static int sse_mb(MpegEncContext *s){
2577     int w= 16;
2578     int h= 16;
2579
2580     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2581     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2582
2583     if(w==16 && h==16)
2584       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2585         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2586                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2587                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2588       }else{
2589         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2590                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2591                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2592       }
2593     else
2594         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2595                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2596                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2597 }
2598
2599 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2600     MpegEncContext *s= *(void**)arg;
2601
2602
2603     s->me.pre_pass=1;
2604     s->me.dia_size= s->avctx->pre_dia_size;
2605     s->first_slice_line=1;
2606     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2607         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2608             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2609         }
2610         s->first_slice_line=0;
2611     }
2612
2613     s->me.pre_pass=0;
2614
2615     return 0;
2616 }
2617
2618 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2619     MpegEncContext *s= *(void**)arg;
2620
2621     ff_check_alignment();
2622
2623     s->me.dia_size= s->avctx->dia_size;
2624     s->first_slice_line=1;
2625     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2626         s->mb_x=0; //for block init below
2627         ff_init_block_index(s);
2628         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2629             s->block_index[0]+=2;
2630             s->block_index[1]+=2;
2631             s->block_index[2]+=2;
2632             s->block_index[3]+=2;
2633
2634             /* compute motion vector & mb_type and store in context */
2635             if(s->pict_type==AV_PICTURE_TYPE_B)
2636                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2637             else
2638                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2639         }
2640         s->first_slice_line=0;
2641     }
2642     return 0;
2643 }
2644
2645 static int mb_var_thread(AVCodecContext *c, void *arg){
2646     MpegEncContext *s= *(void**)arg;
2647     int mb_x, mb_y;
2648
2649     ff_check_alignment();
2650
2651     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2652         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2653             int xx = mb_x * 16;
2654             int yy = mb_y * 16;
2655             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2656             int varc;
2657             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2658
2659             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2660                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2661
2662             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2663             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2664             s->me.mb_var_sum_temp    += varc;
2665         }
2666     }
2667     return 0;
2668 }
2669
2670 static void write_slice_end(MpegEncContext *s){
2671     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2672         if(s->partitioned_frame){
2673             ff_mpeg4_merge_partitions(s);
2674         }
2675
2676         ff_mpeg4_stuffing(&s->pb);
2677     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2678         ff_mjpeg_encode_stuffing(s);
2679     }
2680
2681     avpriv_align_put_bits(&s->pb);
2682     flush_put_bits(&s->pb);
2683
2684     if ((s->avctx->flags & CODEC_FLAG_PASS1) && !s->partitioned_frame)
2685         s->misc_bits+= get_bits_diff(s);
2686 }
2687
2688 static void write_mb_info(MpegEncContext *s)
2689 {
2690     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2691     int offset = put_bits_count(&s->pb);
2692     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2693     int gobn = s->mb_y / s->gob_index;
2694     int pred_x, pred_y;
2695     if (CONFIG_H263_ENCODER)
2696         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2697     bytestream_put_le32(&ptr, offset);
2698     bytestream_put_byte(&ptr, s->qscale);
2699     bytestream_put_byte(&ptr, gobn);
2700     bytestream_put_le16(&ptr, mba);
2701     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2702     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2703     /* 4MV not implemented */
2704     bytestream_put_byte(&ptr, 0); /* hmv2 */
2705     bytestream_put_byte(&ptr, 0); /* vmv2 */
2706 }
2707
2708 static void update_mb_info(MpegEncContext *s, int startcode)
2709 {
2710     if (!s->mb_info)
2711         return;
2712     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2713         s->mb_info_size += 12;
2714         s->prev_mb_info = s->last_mb_info;
2715     }
2716     if (startcode) {
2717         s->prev_mb_info = put_bits_count(&s->pb)/8;
2718         /* This might have incremented mb_info_size above, and we return without
2719          * actually writing any info into that slot yet. But in that case,
2720          * this will be called again at the start of the after writing the
2721          * start code, actually writing the mb info. */
2722         return;
2723     }
2724
2725     s->last_mb_info = put_bits_count(&s->pb)/8;
2726     if (!s->mb_info_size)
2727         s->mb_info_size += 12;
2728     write_mb_info(s);
2729 }
2730
2731 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2732 {
2733     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2734         && s->slice_context_count == 1
2735         && s->pb.buf == s->avctx->internal->byte_buffer) {
2736         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2737         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2738
2739         uint8_t *new_buffer = NULL;
2740         int new_buffer_size = 0;
2741
2742         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2743                               s->avctx->internal->byte_buffer_size + size_increase);
2744         if (!new_buffer)
2745             return AVERROR(ENOMEM);
2746
2747         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2748         av_free(s->avctx->internal->byte_buffer);
2749         s->avctx->internal->byte_buffer      = new_buffer;
2750         s->avctx->internal->byte_buffer_size = new_buffer_size;
2751         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2752         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2753         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2754     }
2755     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2756         return AVERROR(EINVAL);
2757     return 0;
2758 }
2759
2760 static int encode_thread(AVCodecContext *c, void *arg){
2761     MpegEncContext *s= *(void**)arg;
2762     int mb_x, mb_y, pdif = 0;
2763     int chr_h= 16>>s->chroma_y_shift;
2764     int i, j;
2765     MpegEncContext best_s = { 0 }, backup_s;
2766     uint8_t bit_buf[2][MAX_MB_BYTES];
2767     uint8_t bit_buf2[2][MAX_MB_BYTES];
2768     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2769     PutBitContext pb[2], pb2[2], tex_pb[2];
2770
2771     ff_check_alignment();
2772
2773     for(i=0; i<2; i++){
2774         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2775         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2776         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2777     }
2778
2779     s->last_bits= put_bits_count(&s->pb);
2780     s->mv_bits=0;
2781     s->misc_bits=0;
2782     s->i_tex_bits=0;
2783     s->p_tex_bits=0;
2784     s->i_count=0;
2785     s->f_count=0;
2786     s->b_count=0;
2787     s->skip_count=0;
2788
2789     for(i=0; i<3; i++){
2790         /* init last dc values */
2791         /* note: quant matrix value (8) is implied here */
2792         s->last_dc[i] = 128 << s->intra_dc_precision;
2793
2794         s->current_picture.error[i] = 0;
2795     }
2796     if(s->codec_id==AV_CODEC_ID_AMV){
2797         s->last_dc[0] = 128*8/13;
2798         s->last_dc[1] = 128*8/14;
2799         s->last_dc[2] = 128*8/14;
2800     }
2801     s->mb_skip_run = 0;
2802     memset(s->last_mv, 0, sizeof(s->last_mv));
2803
2804     s->last_mv_dir = 0;
2805
2806     switch(s->codec_id){
2807     case AV_CODEC_ID_H263:
2808     case AV_CODEC_ID_H263P:
2809     case AV_CODEC_ID_FLV1:
2810         if (CONFIG_H263_ENCODER)
2811             s->gob_index = H263_GOB_HEIGHT(s->height);
2812         break;
2813     case AV_CODEC_ID_MPEG4:
2814         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2815             ff_mpeg4_init_partitions(s);
2816         break;
2817     }
2818
2819     s->resync_mb_x=0;
2820     s->resync_mb_y=0;
2821     s->first_slice_line = 1;
2822     s->ptr_lastgob = s->pb.buf;
2823     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2824         s->mb_x=0;
2825         s->mb_y= mb_y;
2826
2827         ff_set_qscale(s, s->qscale);
2828         ff_init_block_index(s);
2829
2830         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2831             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2832             int mb_type= s->mb_type[xy];
2833 //            int d;
2834             int dmin= INT_MAX;
2835             int dir;
2836             int size_increase =  s->avctx->internal->byte_buffer_size/4
2837                                + s->mb_width*MAX_MB_BYTES;
2838
2839             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2840             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2841                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2842                 return -1;
2843             }
2844             if(s->data_partitioning){
2845                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2846                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2847                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2848                     return -1;
2849                 }
2850             }
2851
2852             s->mb_x = mb_x;
2853             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2854             ff_update_block_index(s);
2855
2856             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2857                 ff_h261_reorder_mb_index(s);
2858                 xy= s->mb_y*s->mb_stride + s->mb_x;
2859                 mb_type= s->mb_type[xy];
2860             }
2861
2862             /* write gob / video packet header  */
2863             if(s->rtp_mode){
2864                 int current_packet_size, is_gob_start;
2865
2866                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2867
2868                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2869
2870                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2871
2872                 switch(s->codec_id){
2873                 case AV_CODEC_ID_H263:
2874                 case AV_CODEC_ID_H263P:
2875                     if(!s->h263_slice_structured)
2876                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2877                     break;
2878                 case AV_CODEC_ID_MPEG2VIDEO:
2879                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2880                 case AV_CODEC_ID_MPEG1VIDEO:
2881                     if(s->mb_skip_run) is_gob_start=0;
2882                     break;
2883                 case AV_CODEC_ID_MJPEG:
2884                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2885                     break;
2886                 }
2887
2888                 if(is_gob_start){
2889                     if(s->start_mb_y != mb_y || mb_x!=0){
2890                         write_slice_end(s);
2891
2892                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2893                             ff_mpeg4_init_partitions(s);
2894                         }
2895                     }
2896
2897                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2898                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2899
2900                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2901                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2902                         int d = 100 / s->error_rate;
2903                         if(r % d == 0){
2904                             current_packet_size=0;
2905                             s->pb.buf_ptr= s->ptr_lastgob;
2906                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2907                         }
2908                     }
2909
2910                     if (s->avctx->rtp_callback){
2911                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2912                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2913                     }
2914                     update_mb_info(s, 1);
2915
2916                     switch(s->codec_id){
2917                     case AV_CODEC_ID_MPEG4:
2918                         if (CONFIG_MPEG4_ENCODER) {
2919                             ff_mpeg4_encode_video_packet_header(s);
2920                             ff_mpeg4_clean_buffers(s);
2921                         }
2922                     break;
2923                     case AV_CODEC_ID_MPEG1VIDEO:
2924                     case AV_CODEC_ID_MPEG2VIDEO:
2925                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2926                             ff_mpeg1_encode_slice_header(s);
2927                             ff_mpeg1_clean_buffers(s);
2928                         }
2929                     break;
2930                     case AV_CODEC_ID_H263:
2931                     case AV_CODEC_ID_H263P:
2932                         if (CONFIG_H263_ENCODER)
2933                             ff_h263_encode_gob_header(s, mb_y);
2934                     break;
2935                     }
2936
2937                     if (s->avctx->flags & CODEC_FLAG_PASS1) {
2938                         int bits= put_bits_count(&s->pb);
2939                         s->misc_bits+= bits - s->last_bits;
2940                         s->last_bits= bits;
2941                     }
2942
2943                     s->ptr_lastgob += current_packet_size;
2944                     s->first_slice_line=1;
2945                     s->resync_mb_x=mb_x;
2946                     s->resync_mb_y=mb_y;
2947                 }
2948             }
2949
2950             if(  (s->resync_mb_x   == s->mb_x)
2951                && s->resync_mb_y+1 == s->mb_y){
2952                 s->first_slice_line=0;
2953             }
2954
2955             s->mb_skipped=0;
2956             s->dquant=0; //only for QP_RD
2957
2958             update_mb_info(s, 0);
2959
2960             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2961                 int next_block=0;
2962                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2963
2964                 copy_context_before_encode(&backup_s, s, -1);
2965                 backup_s.pb= s->pb;
2966                 best_s.data_partitioning= s->data_partitioning;
2967                 best_s.partitioned_frame= s->partitioned_frame;
2968                 if(s->data_partitioning){
2969                     backup_s.pb2= s->pb2;
2970                     backup_s.tex_pb= s->tex_pb;
2971                 }
2972
2973                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2974                     s->mv_dir = MV_DIR_FORWARD;
2975                     s->mv_type = MV_TYPE_16X16;
2976                     s->mb_intra= 0;
2977                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2978                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2979                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2980                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2981                 }
2982                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2983                     s->mv_dir = MV_DIR_FORWARD;
2984                     s->mv_type = MV_TYPE_FIELD;
2985                     s->mb_intra= 0;
2986                     for(i=0; i<2; i++){
2987                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2988                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2989                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2990                     }
2991                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2992                                  &dmin, &next_block, 0, 0);
2993                 }
2994                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2995                     s->mv_dir = MV_DIR_FORWARD;
2996                     s->mv_type = MV_TYPE_16X16;
2997                     s->mb_intra= 0;
2998                     s->mv[0][0][0] = 0;
2999                     s->mv[0][0][1] = 0;
3000                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3001                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3002                 }
3003                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3004                     s->mv_dir = MV_DIR_FORWARD;
3005                     s->mv_type = MV_TYPE_8X8;
3006                     s->mb_intra= 0;
3007                     for(i=0; i<4; i++){
3008                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3009                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3010                     }
3011                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3012                                  &dmin, &next_block, 0, 0);
3013                 }
3014                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3015                     s->mv_dir = MV_DIR_FORWARD;
3016                     s->mv_type = MV_TYPE_16X16;
3017                     s->mb_intra= 0;
3018                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3019                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3020                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3021                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3022                 }
3023                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3024                     s->mv_dir = MV_DIR_BACKWARD;
3025                     s->mv_type = MV_TYPE_16X16;
3026                     s->mb_intra= 0;
3027                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3028                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3029                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3030                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3031                 }
3032                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3033                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3034                     s->mv_type = MV_TYPE_16X16;
3035                     s->mb_intra= 0;
3036                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3037                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3038                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3039                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3040                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3041                                  &dmin, &next_block, 0, 0);
3042                 }
3043                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3044                     s->mv_dir = MV_DIR_FORWARD;
3045                     s->mv_type = MV_TYPE_FIELD;
3046                     s->mb_intra= 0;
3047                     for(i=0; i<2; i++){
3048                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3049                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3050                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3051                     }
3052                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3053                                  &dmin, &next_block, 0, 0);
3054                 }
3055                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3056                     s->mv_dir = MV_DIR_BACKWARD;
3057                     s->mv_type = MV_TYPE_FIELD;
3058                     s->mb_intra= 0;
3059                     for(i=0; i<2; i++){
3060                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3061                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3062                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3063                     }
3064                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3065                                  &dmin, &next_block, 0, 0);
3066                 }
3067                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3068                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3069                     s->mv_type = MV_TYPE_FIELD;
3070                     s->mb_intra= 0;
3071                     for(dir=0; dir<2; dir++){
3072                         for(i=0; i<2; i++){
3073                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3074                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3075                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3076                         }
3077                     }
3078                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3079                                  &dmin, &next_block, 0, 0);
3080                 }
3081                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3082                     s->mv_dir = 0;
3083                     s->mv_type = MV_TYPE_16X16;
3084                     s->mb_intra= 1;
3085                     s->mv[0][0][0] = 0;
3086                     s->mv[0][0][1] = 0;
3087                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3088                                  &dmin, &next_block, 0, 0);
3089                     if(s->h263_pred || s->h263_aic){
3090                         if(best_s.mb_intra)
3091                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3092                         else
3093                             ff_clean_intra_table_entries(s); //old mode?
3094                     }
3095                 }
3096
3097                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3098                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3099                         const int last_qp= backup_s.qscale;
3100                         int qpi, qp, dc[6];
3101                         int16_t ac[6][16];
3102                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3103                         static const int dquant_tab[4]={-1,1,-2,2};
3104                         int storecoefs = s->mb_intra && s->dc_val[0];
3105
3106                         av_assert2(backup_s.dquant == 0);
3107
3108                         //FIXME intra
3109                         s->mv_dir= best_s.mv_dir;
3110                         s->mv_type = MV_TYPE_16X16;
3111                         s->mb_intra= best_s.mb_intra;
3112                         s->mv[0][0][0] = best_s.mv[0][0][0];
3113                         s->mv[0][0][1] = best_s.mv[0][0][1];
3114                         s->mv[1][0][0] = best_s.mv[1][0][0];
3115                         s->mv[1][0][1] = best_s.mv[1][0][1];
3116
3117                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3118                         for(; qpi<4; qpi++){
3119                             int dquant= dquant_tab[qpi];
3120                             qp= last_qp + dquant;
3121                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3122                                 continue;
3123                             backup_s.dquant= dquant;
3124                             if(storecoefs){
3125                                 for(i=0; i<6; i++){
3126                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3127                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3128                                 }
3129                             }
3130
3131                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3132                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3133                             if(best_s.qscale != qp){
3134                                 if(storecoefs){
3135                                     for(i=0; i<6; i++){
3136                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3137                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3138                                     }
3139                                 }
3140                             }
3141                         }
3142                     }
3143                 }
3144                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3145                     int mx= s->b_direct_mv_table[xy][0];
3146                     int my= s->b_direct_mv_table[xy][1];
3147
3148                     backup_s.dquant = 0;
3149                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3150                     s->mb_intra= 0;
3151                     ff_mpeg4_set_direct_mv(s, mx, my);
3152                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3153                                  &dmin, &next_block, mx, my);
3154                 }
3155                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3156                     backup_s.dquant = 0;
3157                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3158                     s->mb_intra= 0;
3159                     ff_mpeg4_set_direct_mv(s, 0, 0);
3160                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3161                                  &dmin, &next_block, 0, 0);
3162                 }
3163                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3164                     int coded=0;
3165                     for(i=0; i<6; i++)
3166                         coded |= s->block_last_index[i];
3167                     if(coded){
3168                         int mx,my;
3169                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3170                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3171                             mx=my=0; //FIXME find the one we actually used
3172                             ff_mpeg4_set_direct_mv(s, mx, my);
3173                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3174                             mx= s->mv[1][0][0];
3175                             my= s->mv[1][0][1];
3176                         }else{
3177                             mx= s->mv[0][0][0];
3178                             my= s->mv[0][0][1];
3179                         }
3180
3181                         s->mv_dir= best_s.mv_dir;
3182                         s->mv_type = best_s.mv_type;
3183                         s->mb_intra= 0;
3184 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3185                         s->mv[0][0][1] = best_s.mv[0][0][1];
3186                         s->mv[1][0][0] = best_s.mv[1][0][0];
3187                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3188                         backup_s.dquant= 0;
3189                         s->skipdct=1;
3190                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3191                                         &dmin, &next_block, mx, my);
3192                         s->skipdct=0;
3193                     }
3194                 }
3195
3196                 s->current_picture.qscale_table[xy] = best_s.qscale;
3197
3198                 copy_context_after_encode(s, &best_s, -1);
3199
3200                 pb_bits_count= put_bits_count(&s->pb);
3201                 flush_put_bits(&s->pb);
3202                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3203                 s->pb= backup_s.pb;
3204
3205                 if(s->data_partitioning){
3206                     pb2_bits_count= put_bits_count(&s->pb2);
3207                     flush_put_bits(&s->pb2);
3208                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3209                     s->pb2= backup_s.pb2;
3210
3211                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3212                     flush_put_bits(&s->tex_pb);
3213                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3214                     s->tex_pb= backup_s.tex_pb;
3215                 }
3216                 s->last_bits= put_bits_count(&s->pb);
3217
3218                 if (CONFIG_H263_ENCODER &&
3219                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3220                     ff_h263_update_motion_val(s);
3221
3222                 if(next_block==0){ //FIXME 16 vs linesize16
3223                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3224                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3225                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3226                 }
3227
3228                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3229                     ff_mpv_decode_mb(s, s->block);
3230             } else {
3231                 int motion_x = 0, motion_y = 0;
3232                 s->mv_type=MV_TYPE_16X16;
3233                 // only one MB-Type possible
3234
3235                 switch(mb_type){
3236                 case CANDIDATE_MB_TYPE_INTRA:
3237                     s->mv_dir = 0;
3238                     s->mb_intra= 1;
3239                     motion_x= s->mv[0][0][0] = 0;
3240                     motion_y= s->mv[0][0][1] = 0;
3241                     break;
3242                 case CANDIDATE_MB_TYPE_INTER:
3243                     s->mv_dir = MV_DIR_FORWARD;
3244                     s->mb_intra= 0;
3245                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3246                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3247                     break;
3248                 case CANDIDATE_MB_TYPE_INTER_I:
3249                     s->mv_dir = MV_DIR_FORWARD;
3250                     s->mv_type = MV_TYPE_FIELD;
3251                     s->mb_intra= 0;
3252                     for(i=0; i<2; i++){
3253                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3254                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3255                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3256                     }
3257                     break;
3258                 case CANDIDATE_MB_TYPE_INTER4V:
3259                     s->mv_dir = MV_DIR_FORWARD;
3260                     s->mv_type = MV_TYPE_8X8;
3261                     s->mb_intra= 0;
3262                     for(i=0; i<4; i++){
3263                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3264                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3265                     }
3266                     break;
3267                 case CANDIDATE_MB_TYPE_DIRECT:
3268                     if (CONFIG_MPEG4_ENCODER) {
3269                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3270                         s->mb_intra= 0;
3271                         motion_x=s->b_direct_mv_table[xy][0];
3272                         motion_y=s->b_direct_mv_table[xy][1];
3273                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3274                     }
3275                     break;
3276                 case CANDIDATE_MB_TYPE_DIRECT0:
3277                     if (CONFIG_MPEG4_ENCODER) {
3278                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3279                         s->mb_intra= 0;
3280                         ff_mpeg4_set_direct_mv(s, 0, 0);
3281                     }
3282                     break;
3283                 case CANDIDATE_MB_TYPE_BIDIR:
3284                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3285                     s->mb_intra= 0;
3286                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3287                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3288                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3289                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3290                     break;
3291                 case CANDIDATE_MB_TYPE_BACKWARD:
3292                     s->mv_dir = MV_DIR_BACKWARD;
3293                     s->mb_intra= 0;
3294                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3295                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3296                     break;
3297                 case CANDIDATE_MB_TYPE_FORWARD:
3298                     s->mv_dir = MV_DIR_FORWARD;
3299                     s->mb_intra= 0;
3300                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3301                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3302                     break;
3303                 case CANDIDATE_MB_TYPE_FORWARD_I:
3304                     s->mv_dir = MV_DIR_FORWARD;
3305                     s->mv_type = MV_TYPE_FIELD;
3306                     s->mb_intra= 0;
3307                     for(i=0; i<2; i++){
3308                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3309                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3310                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3311                     }
3312                     break;
3313                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3314                     s->mv_dir = MV_DIR_BACKWARD;
3315                     s->mv_type = MV_TYPE_FIELD;
3316                     s->mb_intra= 0;
3317                     for(i=0; i<2; i++){
3318                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3319                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3320                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3321                     }
3322                     break;
3323                 case CANDIDATE_MB_TYPE_BIDIR_I:
3324                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3325                     s->mv_type = MV_TYPE_FIELD;
3326                     s->mb_intra= 0;
3327                     for(dir=0; dir<2; dir++){
3328                         for(i=0; i<2; i++){
3329                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3330                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3331                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3332                         }
3333                     }
3334                     break;
3335                 default:
3336                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3337                 }
3338
3339                 encode_mb(s, motion_x, motion_y);
3340
3341                 // RAL: Update last macroblock type
3342                 s->last_mv_dir = s->mv_dir;
3343
3344                 if (CONFIG_H263_ENCODER &&
3345                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3346                     ff_h263_update_motion_val(s);
3347
3348                 ff_mpv_decode_mb(s, s->block);
3349             }
3350
3351             /* clean the MV table in IPS frames for direct mode in B frames */
3352             if(s->mb_intra /* && I,P,S_TYPE */){
3353                 s->p_mv_table[xy][0]=0;
3354                 s->p_mv_table[xy][1]=0;
3355             }
3356
3357             if (s->avctx->flags & CODEC_FLAG_PSNR) {
3358                 int w= 16;
3359                 int h= 16;
3360
3361                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3362                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3363
3364                 s->current_picture.error[0] += sse(
3365                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3366                     s->dest[0], w, h, s->linesize);
3367                 s->current_picture.error[1] += sse(
3368                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3369                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3370                 s->current_picture.error[2] += sse(
3371                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3372                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3373             }
3374             if(s->loop_filter){
3375                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3376                     ff_h263_loop_filter(s);
3377             }
3378             ff_dlog(s->avctx, "MB %d %d bits\n",
3379                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3380         }
3381     }
3382
3383     //not beautiful here but we must write it before flushing so it has to be here
3384     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3385         ff_msmpeg4_encode_ext_header(s);
3386
3387     write_slice_end(s);
3388
3389     /* Send the last GOB if RTP */
3390     if (s->avctx->rtp_callback) {
3391         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3392         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3393         /* Call the RTP callback to send the last GOB */
3394         emms_c();
3395         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3396     }
3397
3398     return 0;
3399 }
3400
3401 #define MERGE(field) dst->field += src->field; src->field=0
3402 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3403     MERGE(me.scene_change_score);
3404     MERGE(me.mc_mb_var_sum_temp);
3405     MERGE(me.mb_var_sum_temp);
3406 }
3407
3408 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3409     int i;
3410
3411     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3412     MERGE(dct_count[1]);
3413     MERGE(mv_bits);
3414     MERGE(i_tex_bits);
3415     MERGE(p_tex_bits);
3416     MERGE(i_count);
3417     MERGE(f_count);
3418     MERGE(b_count);
3419     MERGE(skip_count);
3420     MERGE(misc_bits);
3421     MERGE(er.error_count);
3422     MERGE(padding_bug_score);
3423     MERGE(current_picture.error[0]);
3424     MERGE(current_picture.error[1]);
3425     MERGE(current_picture.error[2]);
3426
3427     if(dst->avctx->noise_reduction){
3428         for(i=0; i<64; i++){
3429             MERGE(dct_error_sum[0][i]);
3430             MERGE(dct_error_sum[1][i]);
3431         }
3432     }
3433
3434     assert(put_bits_count(&src->pb) % 8 ==0);
3435     assert(put_bits_count(&dst->pb) % 8 ==0);
3436     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3437     flush_put_bits(&dst->pb);
3438 }
3439
3440 static int estimate_qp(MpegEncContext *s, int dry_run){
3441     if (s->next_lambda){
3442         s->current_picture_ptr->f->quality =
3443         s->current_picture.f->quality = s->next_lambda;
3444         if(!dry_run) s->next_lambda= 0;
3445     } else if (!s->fixed_qscale) {
3446         s->current_picture_ptr->f->quality =
3447         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3448         if (s->current_picture.f->quality < 0)
3449             return -1;
3450     }
3451
3452     if(s->adaptive_quant){
3453         switch(s->codec_id){
3454         case AV_CODEC_ID_MPEG4:
3455             if (CONFIG_MPEG4_ENCODER)
3456                 ff_clean_mpeg4_qscales(s);
3457             break;
3458         case AV_CODEC_ID_H263:
3459         case AV_CODEC_ID_H263P:
3460         case AV_CODEC_ID_FLV1:
3461             if (CONFIG_H263_ENCODER)
3462                 ff_clean_h263_qscales(s);
3463             break;
3464         default:
3465             ff_init_qscale_tab(s);
3466         }
3467
3468         s->lambda= s->lambda_table[0];
3469         //FIXME broken
3470     }else
3471         s->lambda = s->current_picture.f->quality;
3472     update_qscale(s);
3473     return 0;
3474 }
3475
3476 /* must be called before writing the header */
3477 static void set_frame_distances(MpegEncContext * s){
3478     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3479     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3480
3481     if(s->pict_type==AV_PICTURE_TYPE_B){
3482         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3483         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3484     }else{
3485         s->pp_time= s->time - s->last_non_b_time;
3486         s->last_non_b_time= s->time;
3487         assert(s->picture_number==0 || s->pp_time > 0);
3488     }
3489 }
3490
3491 static int encode_picture(MpegEncContext *s, int picture_number)
3492 {
3493     int i, ret;
3494     int bits;
3495     int context_count = s->slice_context_count;
3496
3497     s->picture_number = picture_number;
3498
3499     /* Reset the average MB variance */
3500     s->me.mb_var_sum_temp    =
3501     s->me.mc_mb_var_sum_temp = 0;
3502
3503     /* we need to initialize some time vars before we can encode b-frames */
3504     // RAL: Condition added for MPEG1VIDEO
3505     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3506         set_frame_distances(s);
3507     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3508         ff_set_mpeg4_time(s);
3509
3510     s->me.scene_change_score=0;
3511
3512 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3513
3514     if(s->pict_type==AV_PICTURE_TYPE_I){
3515         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3516         else                        s->no_rounding=0;
3517     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3518         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3519             s->no_rounding ^= 1;
3520     }
3521
3522     if (s->avctx->flags & CODEC_FLAG_PASS2) {
3523         if (estimate_qp(s,1) < 0)
3524             return -1;
3525         ff_get_2pass_fcode(s);
3526     } else if (!(s->avctx->flags & CODEC_FLAG_QSCALE)) {
3527         if(s->pict_type==AV_PICTURE_TYPE_B)
3528             s->lambda= s->last_lambda_for[s->pict_type];
3529         else
3530             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3531         update_qscale(s);
3532     }
3533
3534     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3535         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3536         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3537         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3538         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3539     }
3540
3541     s->mb_intra=0; //for the rate distortion & bit compare functions
3542     for(i=1; i<context_count; i++){
3543         ret = ff_update_duplicate_context(s->thread_context[i], s);
3544         if (ret < 0)
3545             return ret;
3546     }
3547
3548     if(ff_init_me(s)<0)
3549         return -1;
3550
3551     /* Estimate motion for every MB */
3552     if(s->pict_type != AV_PICTURE_TYPE_I){
3553         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3554         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3555         if (s->pict_type != AV_PICTURE_TYPE_B) {
3556             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3557                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3558             }
3559         }
3560
3561         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3562     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3563         /* I-Frame */
3564         for(i=0; i<s->mb_stride*s->mb_height; i++)
3565             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3566
3567         if(!s->fixed_qscale){
3568             /* finding spatial complexity for I-frame rate control */
3569             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3570         }
3571     }
3572     for(i=1; i<context_count; i++){
3573         merge_context_after_me(s, s->thread_context[i]);
3574     }
3575     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3576     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3577     emms_c();
3578
3579     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3580         s->pict_type= AV_PICTURE_TYPE_I;
3581         for(i=0; i<s->mb_stride*s->mb_height; i++)
3582             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3583         if(s->msmpeg4_version >= 3)
3584             s->no_rounding=1;
3585         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3586                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3587     }
3588
3589     if(!s->umvplus){
3590         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3591             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3592
3593             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3594                 int a,b;
3595                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3596                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3597                 s->f_code= FFMAX3(s->f_code, a, b);
3598             }
3599
3600             ff_fix_long_p_mvs(s);
3601             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3602             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3603                 int j;
3604                 for(i=0; i<2; i++){
3605                     for(j=0; j<2; j++)
3606                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3607                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3608                 }
3609             }
3610         }
3611
3612         if(s->pict_type==AV_PICTURE_TYPE_B){
3613             int a, b;
3614
3615             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3616             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3617             s->f_code = FFMAX(a, b);
3618
3619             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3620             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3621             s->b_code = FFMAX(a, b);
3622
3623             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3624             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3625             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3626             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3627             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3628                 int dir, j;
3629                 for(dir=0; dir<2; dir++){
3630                     for(i=0; i<2; i++){
3631                         for(j=0; j<2; j++){
3632                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3633                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3634                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3635                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3636                         }
3637                     }
3638                 }
3639             }
3640         }
3641     }
3642
3643     if (estimate_qp(s, 0) < 0)
3644         return -1;
3645
3646     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3647         s->pict_type == AV_PICTURE_TYPE_I &&
3648         !(s->avctx->flags & CODEC_FLAG_QSCALE))
3649         s->qscale= 3; //reduce clipping problems
3650
3651     if (s->out_format == FMT_MJPEG) {
3652         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3653         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3654
3655         if (s->avctx->intra_matrix) {
3656             chroma_matrix =
3657             luma_matrix = s->avctx->intra_matrix;
3658         }
3659         if (s->avctx->chroma_intra_matrix)
3660             chroma_matrix = s->avctx->chroma_intra_matrix;
3661
3662         /* for mjpeg, we do include qscale in the matrix */
3663         for(i=1;i<64;i++){
3664             int j = s->idsp.idct_permutation[i];
3665
3666             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3667             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3668         }
3669         s->y_dc_scale_table=
3670         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3671         s->chroma_intra_matrix[0] =
3672         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3673         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3674                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3675         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3676                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3677         s->qscale= 8;
3678     }
3679     if(s->codec_id == AV_CODEC_ID_AMV){
3680         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3681         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3682         for(i=1;i<64;i++){
3683             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3684
3685             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3686             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3687         }
3688         s->y_dc_scale_table= y;
3689         s->c_dc_scale_table= c;
3690         s->intra_matrix[0] = 13;
3691         s->chroma_intra_matrix[0] = 14;
3692         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3693                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3694         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3695                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3696         s->qscale= 8;
3697     }
3698
3699     //FIXME var duplication
3700     s->current_picture_ptr->f->key_frame =
3701     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3702     s->current_picture_ptr->f->pict_type =
3703     s->current_picture.f->pict_type = s->pict_type;
3704
3705     if (s->current_picture.f->key_frame)
3706         s->picture_in_gop_number=0;
3707
3708     s->mb_x = s->mb_y = 0;
3709     s->last_bits= put_bits_count(&s->pb);
3710     switch(s->out_format) {
3711     case FMT_MJPEG:
3712         if (CONFIG_MJPEG_ENCODER)
3713             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3714                                            s->intra_matrix, s->chroma_intra_matrix);
3715         break;
3716     case FMT_H261:
3717         if (CONFIG_H261_ENCODER)
3718             ff_h261_encode_picture_header(s, picture_number);
3719         break;
3720     case FMT_H263:
3721         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3722             ff_wmv2_encode_picture_header(s, picture_number);
3723         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3724             ff_msmpeg4_encode_picture_header(s, picture_number);
3725         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3726             ff_mpeg4_encode_picture_header(s, picture_number);
3727         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3728             ret = ff_rv10_encode_picture_header(s, picture_number);
3729             if (ret < 0)
3730                 return ret;
3731         }
3732         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3733             ff_rv20_encode_picture_header(s, picture_number);
3734         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3735             ff_flv_encode_picture_header(s, picture_number);
3736         else if (CONFIG_H263_ENCODER)
3737             ff_h263_encode_picture_header(s, picture_number);
3738         break;
3739     case FMT_MPEG1:
3740         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3741             ff_mpeg1_encode_picture_header(s, picture_number);
3742         break;
3743     default:
3744         av_assert0(0);
3745     }
3746     bits= put_bits_count(&s->pb);
3747     s->header_bits= bits - s->last_bits;
3748
3749     for(i=1; i<context_count; i++){
3750         update_duplicate_context_after_me(s->thread_context[i], s);
3751     }
3752     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3753     for(i=1; i<context_count; i++){
3754         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3755             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3756         merge_context_after_encode(s, s->thread_context[i]);
3757     }
3758     emms_c();
3759     return 0;
3760 }
3761
3762 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3763     const int intra= s->mb_intra;
3764     int i;
3765
3766     s->dct_count[intra]++;
3767
3768     for(i=0; i<64; i++){
3769         int level= block[i];
3770
3771         if(level){
3772             if(level>0){
3773                 s->dct_error_sum[intra][i] += level;
3774                 level -= s->dct_offset[intra][i];
3775                 if(level<0) level=0;
3776             }else{
3777                 s->dct_error_sum[intra][i] -= level;
3778                 level += s->dct_offset[intra][i];
3779                 if(level>0) level=0;
3780             }
3781             block[i]= level;
3782         }
3783     }
3784 }
3785
3786 static int dct_quantize_trellis_c(MpegEncContext *s,
3787                                   int16_t *block, int n,
3788                                   int qscale, int *overflow){
3789     const int *qmat;
3790     const uint16_t *matrix;
3791     const uint8_t *scantable= s->intra_scantable.scantable;
3792     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3793     int max=0;
3794     unsigned int threshold1, threshold2;
3795     int bias=0;
3796     int run_tab[65];
3797     int level_tab[65];
3798     int score_tab[65];
3799     int survivor[65];
3800     int survivor_count;
3801     int last_run=0;
3802     int last_level=0;
3803     int last_score= 0;
3804     int last_i;
3805     int coeff[2][64];
3806     int coeff_count[64];
3807     int qmul, qadd, start_i, last_non_zero, i, dc;
3808     const int esc_length= s->ac_esc_length;
3809     uint8_t * length;
3810     uint8_t * last_length;
3811     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3812
3813     s->fdsp.fdct(block);
3814
3815     if(s->dct_error_sum)
3816         s->denoise_dct(s, block);
3817     qmul= qscale*16;
3818     qadd= ((qscale-1)|1)*8;
3819
3820     if (s->mb_intra) {
3821         int q;
3822         if (!s->h263_aic) {
3823             if (n < 4)
3824                 q = s->y_dc_scale;
3825             else
3826                 q = s->c_dc_scale;
3827             q = q << 3;
3828         } else{
3829             /* For AIC we skip quant/dequant of INTRADC */
3830             q = 1 << 3;
3831             qadd=0;
3832         }
3833
3834         /* note: block[0] is assumed to be positive */
3835         block[0] = (block[0] + (q >> 1)) / q;
3836         start_i = 1;
3837         last_non_zero = 0;
3838         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3839         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3840         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3841             bias= 1<<(QMAT_SHIFT-1);
3842
3843         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3844             length     = s->intra_chroma_ac_vlc_length;
3845             last_length= s->intra_chroma_ac_vlc_last_length;
3846         } else {
3847             length     = s->intra_ac_vlc_length;
3848             last_length= s->intra_ac_vlc_last_length;
3849         }
3850     } else {
3851         start_i = 0;
3852         last_non_zero = -1;
3853         qmat = s->q_inter_matrix[qscale];
3854         matrix = s->inter_matrix;
3855         length     = s->inter_ac_vlc_length;
3856         last_length= s->inter_ac_vlc_last_length;
3857     }
3858     last_i= start_i;
3859
3860     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3861     threshold2= (threshold1<<1);
3862
3863     for(i=63; i>=start_i; i--) {
3864         const int j = scantable[i];
3865         int level = block[j] * qmat[j];
3866
3867         if(((unsigned)(level+threshold1))>threshold2){
3868             last_non_zero = i;
3869             break;
3870         }
3871     }
3872
3873     for(i=start_i; i<=last_non_zero; i++) {
3874         const int j = scantable[i];
3875         int level = block[j] * qmat[j];
3876
3877 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3878 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3879         if(((unsigned)(level+threshold1))>threshold2){
3880             if(level>0){
3881                 level= (bias + level)>>QMAT_SHIFT;
3882                 coeff[0][i]= level;
3883                 coeff[1][i]= level-1;
3884 //                coeff[2][k]= level-2;
3885             }else{
3886                 level= (bias - level)>>QMAT_SHIFT;
3887                 coeff[0][i]= -level;
3888                 coeff[1][i]= -level+1;
3889 //                coeff[2][k]= -level+2;
3890             }
3891             coeff_count[i]= FFMIN(level, 2);
3892             av_assert2(coeff_count[i]);
3893             max |=level;
3894         }else{
3895             coeff[0][i]= (level>>31)|1;
3896             coeff_count[i]= 1;
3897         }
3898     }
3899
3900     *overflow= s->max_qcoeff < max; //overflow might have happened
3901
3902     if(last_non_zero < start_i){
3903         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3904         return last_non_zero;
3905     }
3906
3907     score_tab[start_i]= 0;
3908     survivor[0]= start_i;
3909     survivor_count= 1;
3910
3911     for(i=start_i; i<=last_non_zero; i++){
3912         int level_index, j, zero_distortion;
3913         int dct_coeff= FFABS(block[ scantable[i] ]);
3914         int best_score=256*256*256*120;
3915
3916         if (s->fdsp.fdct == ff_fdct_ifast)
3917             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3918         zero_distortion= dct_coeff*dct_coeff;
3919
3920         for(level_index=0; level_index < coeff_count[i]; level_index++){
3921             int distortion;
3922             int level= coeff[level_index][i];
3923             const int alevel= FFABS(level);
3924             int unquant_coeff;
3925
3926             av_assert2(level);
3927
3928             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3929                 unquant_coeff= alevel*qmul + qadd;
3930             } else if(s->out_format == FMT_MJPEG) {
3931                 j = s->idsp.idct_permutation[scantable[i]];
3932                 unquant_coeff = alevel * matrix[j] * 8;
3933             }else{ //MPEG1
3934                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3935                 if(s->mb_intra){
3936                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3937                         unquant_coeff =   (unquant_coeff - 1) | 1;
3938                 }else{
3939                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3940                         unquant_coeff =   (unquant_coeff - 1) | 1;
3941                 }
3942                 unquant_coeff<<= 3;
3943             }
3944
3945             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3946             level+=64;
3947             if((level&(~127)) == 0){
3948                 for(j=survivor_count-1; j>=0; j--){
3949                     int run= i - survivor[j];
3950                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3951                     score += score_tab[i-run];
3952
3953                     if(score < best_score){
3954                         best_score= score;
3955                         run_tab[i+1]= run;
3956                         level_tab[i+1]= level-64;
3957                     }
3958                 }
3959
3960                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3961                     for(j=survivor_count-1; j>=0; j--){
3962                         int run= i - survivor[j];
3963                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3964                         score += score_tab[i-run];
3965                         if(score < last_score){
3966                             last_score= score;
3967                             last_run= run;
3968                             last_level= level-64;
3969                             last_i= i+1;
3970                         }
3971                     }
3972                 }
3973             }else{
3974                 distortion += esc_length*lambda;
3975                 for(j=survivor_count-1; j>=0; j--){
3976                     int run= i - survivor[j];
3977                     int score= distortion + score_tab[i-run];
3978
3979                     if(score < best_score){
3980                         best_score= score;
3981                         run_tab[i+1]= run;
3982                         level_tab[i+1]= level-64;
3983                     }
3984                 }
3985
3986                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3987                   for(j=survivor_count-1; j>=0; j--){
3988                         int run= i - survivor[j];
3989                         int score= distortion + score_tab[i-run];
3990                         if(score < last_score){
3991                             last_score= score;
3992                             last_run= run;
3993                             last_level= level-64;
3994                             last_i= i+1;
3995                         }
3996                     }
3997                 }
3998             }
3999         }
4000
4001         score_tab[i+1]= best_score;
4002
4003         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4004         if(last_non_zero <= 27){
4005             for(; survivor_count; survivor_count--){
4006                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4007                     break;
4008             }
4009         }else{
4010             for(; survivor_count; survivor_count--){
4011                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4012                     break;
4013             }
4014         }
4015
4016         survivor[ survivor_count++ ]= i+1;
4017     }
4018
4019     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4020         last_score= 256*256*256*120;
4021         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4022             int score= score_tab[i];
4023             if(i) score += lambda*2; //FIXME exacter?
4024
4025             if(score < last_score){
4026                 last_score= score;
4027                 last_i= i;
4028                 last_level= level_tab[i];
4029                 last_run= run_tab[i];
4030             }
4031         }
4032     }
4033
4034     s->coded_score[n] = last_score;
4035
4036     dc= FFABS(block[0]);
4037     last_non_zero= last_i - 1;
4038     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4039
4040     if(last_non_zero < start_i)
4041         return last_non_zero;
4042
4043     if(last_non_zero == 0 && start_i == 0){
4044         int best_level= 0;
4045         int best_score= dc * dc;
4046
4047         for(i=0; i<coeff_count[0]; i++){
4048             int level= coeff[i][0];
4049             int alevel= FFABS(level);
4050             int unquant_coeff, score, distortion;
4051
4052             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4053                     unquant_coeff= (alevel*qmul + qadd)>>3;
4054             }else{ //MPEG1
4055                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4056                     unquant_coeff =   (unquant_coeff - 1) | 1;
4057             }
4058             unquant_coeff = (unquant_coeff + 4) >> 3;
4059             unquant_coeff<<= 3 + 3;
4060
4061             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4062             level+=64;
4063             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4064             else                    score= distortion + esc_length*lambda;
4065
4066             if(score < best_score){
4067                 best_score= score;
4068                 best_level= level - 64;
4069             }
4070         }
4071         block[0]= best_level;
4072         s->coded_score[n] = best_score - dc*dc;
4073         if(best_level == 0) return -1;
4074         else                return last_non_zero;
4075     }
4076
4077     i= last_i;
4078     av_assert2(last_level);
4079
4080     block[ perm_scantable[last_non_zero] ]= last_level;
4081     i -= last_run + 1;
4082
4083     for(; i>start_i; i -= run_tab[i] + 1){
4084         block[ perm_scantable[i-1] ]= level_tab[i];
4085     }
4086
4087     return last_non_zero;
4088 }
4089
4090 //#define REFINE_STATS 1
4091 static int16_t basis[64][64];
4092
4093 static void build_basis(uint8_t *perm){
4094     int i, j, x, y;
4095     emms_c();
4096     for(i=0; i<8; i++){
4097         for(j=0; j<8; j++){
4098             for(y=0; y<8; y++){
4099                 for(x=0; x<8; x++){
4100                     double s= 0.25*(1<<BASIS_SHIFT);
4101                     int index= 8*i + j;
4102                     int perm_index= perm[index];
4103                     if(i==0) s*= sqrt(0.5);
4104                     if(j==0) s*= sqrt(0.5);
4105                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4106                 }
4107             }
4108         }
4109     }
4110 }
4111
4112 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4113                         int16_t *block, int16_t *weight, int16_t *orig,
4114                         int n, int qscale){
4115     int16_t rem[64];
4116     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4117     const uint8_t *scantable= s->intra_scantable.scantable;
4118     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4119 //    unsigned int threshold1, threshold2;
4120 //    int bias=0;
4121     int run_tab[65];
4122     int prev_run=0;
4123     int prev_level=0;
4124     int qmul, qadd, start_i, last_non_zero, i, dc;
4125     uint8_t * length;
4126     uint8_t * last_length;
4127     int lambda;
4128     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4129 #ifdef REFINE_STATS
4130 static int count=0;
4131 static int after_last=0;
4132 static int to_zero=0;
4133 static int from_zero=0;
4134 static int raise=0;
4135 static int lower=0;
4136 static int messed_sign=0;
4137 #endif
4138
4139     if(basis[0][0] == 0)
4140         build_basis(s->idsp.idct_permutation);
4141
4142     qmul= qscale*2;
4143     qadd= (qscale-1)|1;
4144     if (s->mb_intra) {
4145         if (!s->h263_aic) {
4146             if (n < 4)
4147                 q = s->y_dc_scale;
4148             else
4149                 q = s->c_dc_scale;
4150         } else{
4151             /* For AIC we skip quant/dequant of INTRADC */
4152             q = 1;
4153             qadd=0;
4154         }
4155         q <<= RECON_SHIFT-3;
4156         /* note: block[0] is assumed to be positive */
4157         dc= block[0]*q;
4158 //        block[0] = (block[0] + (q >> 1)) / q;
4159         start_i = 1;
4160 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4161 //            bias= 1<<(QMAT_SHIFT-1);
4162         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4163             length     = s->intra_chroma_ac_vlc_length;
4164             last_length= s->intra_chroma_ac_vlc_last_length;
4165         } else {
4166             length     = s->intra_ac_vlc_length;
4167             last_length= s->intra_ac_vlc_last_length;
4168         }
4169     } else {
4170         dc= 0;
4171         start_i = 0;
4172         length     = s->inter_ac_vlc_length;
4173         last_length= s->inter_ac_vlc_last_length;
4174     }
4175     last_non_zero = s->block_last_index[n];
4176
4177 #ifdef REFINE_STATS
4178 {START_TIMER
4179 #endif
4180     dc += (1<<(RECON_SHIFT-1));
4181     for(i=0; i<64; i++){
4182         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4183     }
4184 #ifdef REFINE_STATS
4185 STOP_TIMER("memset rem[]")}
4186 #endif
4187     sum=0;
4188     for(i=0; i<64; i++){
4189         int one= 36;
4190         int qns=4;
4191         int w;
4192
4193         w= FFABS(weight[i]) + qns*one;
4194         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4195
4196         weight[i] = w;
4197 //        w=weight[i] = (63*qns + (w/2)) / w;
4198
4199         av_assert2(w>0);
4200         av_assert2(w<(1<<6));
4201         sum += w*w;
4202     }
4203     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4204 #ifdef REFINE_STATS
4205 {START_TIMER
4206 #endif
4207     run=0;
4208     rle_index=0;
4209     for(i=start_i; i<=last_non_zero; i++){
4210         int j= perm_scantable[i];
4211         const int level= block[j];
4212         int coeff;
4213
4214         if(level){
4215             if(level<0) coeff= qmul*level - qadd;
4216             else        coeff= qmul*level + qadd;
4217             run_tab[rle_index++]=run;
4218             run=0;
4219
4220             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4221         }else{
4222             run++;
4223         }
4224     }
4225 #ifdef REFINE_STATS
4226 if(last_non_zero>0){
4227 STOP_TIMER("init rem[]")
4228 }
4229 }
4230
4231 {START_TIMER
4232 #endif
4233     for(;;){
4234         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4235         int best_coeff=0;
4236         int best_change=0;
4237         int run2, best_unquant_change=0, analyze_gradient;
4238 #ifdef REFINE_STATS
4239 {START_TIMER
4240 #endif
4241         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4242
4243         if(analyze_gradient){
4244 #ifdef REFINE_STATS
4245 {START_TIMER
4246 #endif
4247             for(i=0; i<64; i++){
4248                 int w= weight[i];
4249
4250                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4251             }
4252 #ifdef REFINE_STATS
4253 STOP_TIMER("rem*w*w")}
4254 {START_TIMER
4255 #endif
4256             s->fdsp.fdct(d1);
4257 #ifdef REFINE_STATS
4258 STOP_TIMER("dct")}
4259 #endif
4260         }
4261
4262         if(start_i){
4263             const int level= block[0];
4264             int change, old_coeff;
4265
4266             av_assert2(s->mb_intra);
4267
4268             old_coeff= q*level;
4269
4270             for(change=-1; change<=1; change+=2){
4271                 int new_level= level + change;
4272                 int score, new_coeff;
4273
4274                 new_coeff= q*new_level;
4275                 if(new_coeff >= 2048 || new_coeff < 0)
4276                     continue;
4277
4278                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4279                                                   new_coeff - old_coeff);
4280                 if(score<best_score){
4281                     best_score= score;
4282                     best_coeff= 0;
4283                     best_change= change;
4284                     best_unquant_change= new_coeff - old_coeff;
4285                 }
4286             }
4287         }
4288
4289         run=0;
4290         rle_index=0;
4291         run2= run_tab[rle_index++];
4292         prev_level=0;
4293         prev_run=0;
4294
4295         for(i=start_i; i<64; i++){
4296             int j= perm_scantable[i];
4297             const int level= block[j];
4298             int change, old_coeff;
4299
4300             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4301                 break;
4302
4303             if(level){
4304                 if(level<0) old_coeff= qmul*level - qadd;
4305                 else        old_coeff= qmul*level + qadd;
4306                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4307             }else{
4308                 old_coeff=0;
4309                 run2--;
4310                 av_assert2(run2>=0 || i >= last_non_zero );
4311             }
4312
4313             for(change=-1; change<=1; change+=2){
4314                 int new_level= level + change;
4315                 int score, new_coeff, unquant_change;
4316
4317                 score=0;
4318                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4319                    continue;
4320
4321                 if(new_level){
4322                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4323                     else            new_coeff= qmul*new_level + qadd;
4324                     if(new_coeff >= 2048 || new_coeff <= -2048)
4325                         continue;
4326                     //FIXME check for overflow
4327
4328                     if(level){
4329                         if(level < 63 && level > -63){
4330                             if(i < last_non_zero)
4331                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4332                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4333                             else
4334                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4335                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4336                         }
4337                     }else{
4338                         av_assert2(FFABS(new_level)==1);
4339
4340                         if(analyze_gradient){
4341                             int g= d1[ scantable[i] ];
4342                             if(g && (g^new_level) >= 0)
4343                                 continue;
4344                         }
4345
4346                         if(i < last_non_zero){
4347                             int next_i= i + run2 + 1;
4348                             int next_level= block[ perm_scantable[next_i] ] + 64;
4349
4350                             if(next_level&(~127))
4351                                 next_level= 0;
4352
4353                             if(next_i < last_non_zero)
4354                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4355                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4356                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4357                             else
4358                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4359                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4360                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4361                         }else{
4362                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4363                             if(prev_level){
4364                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4365                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4366                             }
4367                         }
4368                     }
4369                 }else{
4370                     new_coeff=0;
4371                     av_assert2(FFABS(level)==1);
4372
4373                     if(i < last_non_zero){
4374                         int next_i= i + run2 + 1;
4375                         int next_level= block[ perm_scantable[next_i] ] + 64;
4376
4377                         if(next_level&(~127))
4378                             next_level= 0;
4379
4380                         if(next_i < last_non_zero)
4381                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4382                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4383                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4384                         else
4385                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4386                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4387                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4388                     }else{
4389                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4390                         if(prev_level){
4391                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4392                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4393                         }
4394                     }
4395                 }
4396
4397                 score *= lambda;
4398
4399                 unquant_change= new_coeff - old_coeff;
4400                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4401
4402                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4403                                                    unquant_change);
4404                 if(score<best_score){
4405                     best_score= score;
4406                     best_coeff= i;
4407                     best_change= change;
4408                     best_unquant_change= unquant_change;
4409                 }
4410             }
4411             if(level){
4412                 prev_level= level + 64;
4413                 if(prev_level&(~127))
4414                     prev_level= 0;
4415                 prev_run= run;
4416                 run=0;
4417             }else{
4418                 run++;
4419             }
4420         }
4421 #ifdef REFINE_STATS
4422 STOP_TIMER("iterative step")}
4423 #endif
4424
4425         if(best_change){
4426             int j= perm_scantable[ best_coeff ];
4427
4428             block[j] += best_change;
4429
4430             if(best_coeff > last_non_zero){
4431                 last_non_zero= best_coeff;
4432                 av_assert2(block[j]);
4433 #ifdef REFINE_STATS
4434 after_last++;
4435 #endif
4436             }else{
4437 #ifdef REFINE_STATS
4438 if(block[j]){
4439     if(block[j] - best_change){
4440         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4441             raise++;
4442         }else{
4443             lower++;
4444         }
4445     }else{
4446         from_zero++;
4447     }
4448 }else{
4449     to_zero++;
4450 }
4451 #endif
4452                 for(; last_non_zero>=start_i; last_non_zero--){
4453                     if(block[perm_scantable[last_non_zero]])
4454                         break;
4455                 }
4456             }
4457 #ifdef REFINE_STATS
4458 count++;
4459 if(256*256*256*64 % count == 0){
4460     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4461 }
4462 #endif
4463             run=0;
4464             rle_index=0;
4465             for(i=start_i; i<=last_non_zero; i++){
4466                 int j= perm_scantable[i];
4467                 const int level= block[j];
4468
4469                  if(level){
4470                      run_tab[rle_index++]=run;
4471                      run=0;
4472                  }else{
4473                      run++;
4474                  }
4475             }
4476
4477             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4478         }else{
4479             break;
4480         }
4481     }
4482 #ifdef REFINE_STATS
4483 if(last_non_zero>0){
4484 STOP_TIMER("iterative search")
4485 }
4486 }
4487 #endif
4488
4489     return last_non_zero;
4490 }
4491
4492 /**
4493  * Permute an 8x8 block according to permuatation.
4494  * @param block the block which will be permuted according to
4495  *              the given permutation vector
4496  * @param permutation the permutation vector
4497  * @param last the last non zero coefficient in scantable order, used to
4498  *             speed the permutation up
4499  * @param scantable the used scantable, this is only used to speed the
4500  *                  permutation up, the block is not (inverse) permutated
4501  *                  to scantable order!
4502  */
4503 static void block_permute(int16_t *block, uint8_t *permutation,
4504                           const uint8_t *scantable, int last)
4505 {
4506     int i;
4507     int16_t temp[64];
4508
4509     if (last <= 0)
4510         return;
4511     //FIXME it is ok but not clean and might fail for some permutations
4512     // if (permutation[1] == 1)
4513     // return;
4514
4515     for (i = 0; i <= last; i++) {
4516         const int j = scantable[i];
4517         temp[j] = block[j];
4518         block[j] = 0;
4519     }
4520
4521     for (i = 0; i <= last; i++) {
4522         const int j = scantable[i];
4523         const int perm_j = permutation[j];
4524         block[perm_j] = temp[j];
4525     }
4526 }
4527
4528 int ff_dct_quantize_c(MpegEncContext *s,
4529                         int16_t *block, int n,
4530                         int qscale, int *overflow)
4531 {
4532     int i, j, level, last_non_zero, q, start_i;
4533     const int *qmat;
4534     const uint8_t *scantable= s->intra_scantable.scantable;
4535     int bias;
4536     int max=0;
4537     unsigned int threshold1, threshold2;
4538
4539     s->fdsp.fdct(block);
4540
4541     if(s->dct_error_sum)
4542         s->denoise_dct(s, block);
4543
4544     if (s->mb_intra) {
4545         if (!s->h263_aic) {
4546             if (n < 4)
4547                 q = s->y_dc_scale;
4548             else
4549                 q = s->c_dc_scale;
4550             q = q << 3;
4551         } else
4552             /* For AIC we skip quant/dequant of INTRADC */
4553             q = 1 << 3;
4554
4555         /* note: block[0] is assumed to be positive */
4556         block[0] = (block[0] + (q >> 1)) / q;
4557         start_i = 1;
4558         last_non_zero = 0;
4559         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4560         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4561     } else {
4562         start_i = 0;
4563         last_non_zero = -1;
4564         qmat = s->q_inter_matrix[qscale];
4565         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4566     }
4567     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4568     threshold2= (threshold1<<1);
4569     for(i=63;i>=start_i;i--) {
4570         j = scantable[i];
4571         level = block[j] * qmat[j];
4572
4573         if(((unsigned)(level+threshold1))>threshold2){
4574             last_non_zero = i;
4575             break;
4576         }else{
4577             block[j]=0;
4578         }
4579     }
4580     for(i=start_i; i<=last_non_zero; i++) {
4581         j = scantable[i];
4582         level = block[j] * qmat[j];
4583
4584 //        if(   bias+level >= (1<<QMAT_SHIFT)
4585 //           || bias-level >= (1<<QMAT_SHIFT)){
4586         if(((unsigned)(level+threshold1))>threshold2){
4587             if(level>0){
4588                 level= (bias + level)>>QMAT_SHIFT;
4589                 block[j]= level;
4590             }else{
4591                 level= (bias - level)>>QMAT_SHIFT;
4592                 block[j]= -level;
4593             }
4594             max |=level;
4595         }else{
4596             block[j]=0;
4597         }
4598     }
4599     *overflow= s->max_qcoeff < max; //overflow might have happened
4600
4601     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4602     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4603         block_permute(block, s->idsp.idct_permutation,
4604                       scantable, last_non_zero);
4605
4606     return last_non_zero;
4607 }
4608
4609 #define OFFSET(x) offsetof(MpegEncContext, x)
4610 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4611 static const AVOption h263_options[] = {
4612     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4613     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4614     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4615     FF_MPV_COMMON_OPTS
4616     { NULL },
4617 };
4618
4619 static const AVClass h263_class = {
4620     .class_name = "H.263 encoder",
4621     .item_name  = av_default_item_name,
4622     .option     = h263_options,
4623     .version    = LIBAVUTIL_VERSION_INT,
4624 };
4625
4626 AVCodec ff_h263_encoder = {
4627     .name           = "h263",
4628     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4629     .type           = AVMEDIA_TYPE_VIDEO,
4630     .id             = AV_CODEC_ID_H263,
4631     .priv_data_size = sizeof(MpegEncContext),
4632     .init           = ff_mpv_encode_init,
4633     .encode2        = ff_mpv_encode_picture,
4634     .close          = ff_mpv_encode_end,
4635     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4636     .priv_class     = &h263_class,
4637 };
4638
4639 static const AVOption h263p_options[] = {
4640     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4641     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4642     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4643     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4644     FF_MPV_COMMON_OPTS
4645     { NULL },
4646 };
4647 static const AVClass h263p_class = {
4648     .class_name = "H.263p encoder",
4649     .item_name  = av_default_item_name,
4650     .option     = h263p_options,
4651     .version    = LIBAVUTIL_VERSION_INT,
4652 };
4653
4654 AVCodec ff_h263p_encoder = {
4655     .name           = "h263p",
4656     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4657     .type           = AVMEDIA_TYPE_VIDEO,
4658     .id             = AV_CODEC_ID_H263P,
4659     .priv_data_size = sizeof(MpegEncContext),
4660     .init           = ff_mpv_encode_init,
4661     .encode2        = ff_mpv_encode_picture,
4662     .close          = ff_mpv_encode_end,
4663     .capabilities   = CODEC_CAP_SLICE_THREADS,
4664     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4665     .priv_class     = &h263p_class,
4666 };
4667
4668 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4669
4670 AVCodec ff_msmpeg4v2_encoder = {
4671     .name           = "msmpeg4v2",
4672     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4673     .type           = AVMEDIA_TYPE_VIDEO,
4674     .id             = AV_CODEC_ID_MSMPEG4V2,
4675     .priv_data_size = sizeof(MpegEncContext),
4676     .init           = ff_mpv_encode_init,
4677     .encode2        = ff_mpv_encode_picture,
4678     .close          = ff_mpv_encode_end,
4679     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4680     .priv_class     = &msmpeg4v2_class,
4681 };
4682
4683 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4684
4685 AVCodec ff_msmpeg4v3_encoder = {
4686     .name           = "msmpeg4",
4687     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4688     .type           = AVMEDIA_TYPE_VIDEO,
4689     .id             = AV_CODEC_ID_MSMPEG4V3,
4690     .priv_data_size = sizeof(MpegEncContext),
4691     .init           = ff_mpv_encode_init,
4692     .encode2        = ff_mpv_encode_picture,
4693     .close          = ff_mpv_encode_end,
4694     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4695     .priv_class     = &msmpeg4v3_class,
4696 };
4697
4698 FF_MPV_GENERIC_CLASS(wmv1)
4699
4700 AVCodec ff_wmv1_encoder = {
4701     .name           = "wmv1",
4702     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4703     .type           = AVMEDIA_TYPE_VIDEO,
4704     .id             = AV_CODEC_ID_WMV1,
4705     .priv_data_size = sizeof(MpegEncContext),
4706     .init           = ff_mpv_encode_init,
4707     .encode2        = ff_mpv_encode_picture,
4708     .close          = ff_mpv_encode_end,
4709     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4710     .priv_class     = &wmv1_class,
4711 };