git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "mpegvideodata.h"
  44 #include "h261.h"
  45 #include "h263.h"
  46 #include "h263data.h"
  47 #include "mjpegenc_common.h"
  48 #include "mathops.h"
  49 #include "mpegutils.h"
  50 #include "mjpegenc.h"
  51 #include "msmpeg4.h"
  52 #include "pixblockdsp.h"
  53 #include "qpeldsp.h"
  54 #include "faandct.h"
  55 #include "thread.h"
  56 #include "aandcttab.h"
  57 #include "flv.h"
  58 #include "mpeg4video.h"
  59 #include "internal.h"
  60 #include "bytestream.h"
  61 #include "wmv2.h"
  62 #include "rv10.h"
  63 #include <limits.h>
  64 #include "sp5x.h"
  65
  66 #define QUANT_BIAS_SHIFT 8
  67
  68 #define QMAT_SHIFT_MMX 16
  69 #define QMAT_SHIFT 21
  70
  71 static int encode_picture(MpegEncContext *s, int picture_number);
  72 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  73 static int sse_mb(MpegEncContext *s);
  74 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  75 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  76
  77 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  78 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  79
  80 const AVOption ff_mpv_generic_options[] = {
  81     FF_MPV_COMMON_OPTS
  82     { NULL },
  83 };
  84
  85 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  86                        uint16_t (*qmat16)[2][64],
  87                        const uint16_t *quant_matrix,
  88                        int bias, int qmin, int qmax, int intra)
  89 {
  90     FDCTDSPContext *fdsp = &s->fdsp;
  91     int qscale;
  92     int shift = 0;
  93
  94     for (qscale = qmin; qscale <= qmax; qscale++) {
  95         int i;
  96         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  97 #if CONFIG_FAANDCT
  98             fdsp->fdct == ff_faandct            ||
  99 #endif /* CONFIG_FAANDCT */
 100             fdsp->fdct == ff_jpeg_fdct_islow_10) {
 101             for (i = 0; i < 64; i++) {
 102                 const int j = s->idsp.idct_permutation[i];
 103                 int64_t den = (int64_t) qscale * quant_matrix[j];
 104                 /* 16 <= qscale * quant_matrix[i] <= 7905
 105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 106                  *             19952 <=              x  <= 249205026
 107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 108                  *           3444240 >= (1 << 36) / (x) >= 275 */
 109
 110                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 111             }
 112         } else if (fdsp->fdct == ff_fdct_ifast) {
 113             for (i = 0; i < 64; i++) {
 114                 const int j = s->idsp.idct_permutation[i];
 115                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 116                 /* 16 <= qscale * quant_matrix[i] <= 7905
 117                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 118                  *             19952 <=              x  <= 249205026
 119                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 120                  *           3444240 >= (1 << 36) / (x) >= 275 */
 121
 122                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 123             }
 124         } else {
 125             for (i = 0; i < 64; i++) {
 126                 const int j = s->idsp.idct_permutation[i];
 127                 int64_t den = (int64_t) qscale * quant_matrix[j];
 128                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 129                  * Assume x = qscale * quant_matrix[i]
 130                  * So             16 <=              x  <= 7905
 131                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 132                  * so          32768 >= (1 << 19) / (x) >= 67 */
 133                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 134                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 135                 //                    (qscale * quant_matrix[i]);
 136                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 137
 138                 if (qmat16[qscale][0][i] == 0 ||
 139                     qmat16[qscale][0][i] == 128 * 256)
 140                     qmat16[qscale][0][i] = 128 * 256 - 1;
 141                 qmat16[qscale][1][i] =
 142                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 143                                 qmat16[qscale][0][i]);
 144             }
 145         }
 146
 147         for (i = intra; i < 64; i++) {
 148             int64_t max = 8191;
 149             if (fdsp->fdct == ff_fdct_ifast) {
 150                 max = (8191LL * ff_aanscales[i]) >> 14;
 151             }
 152             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 153                 shift++;
 154             }
 155         }
 156     }
 157     if (shift) {
 158         av_log(NULL, AV_LOG_INFO,
 159                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 160                QMAT_SHIFT - shift);
 161     }
 162 }
 163
 164 static inline void update_qscale(MpegEncContext *s)
 165 {
 166     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 167                 (FF_LAMBDA_SHIFT + 7);
 168     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 169
 170     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 171                  FF_LAMBDA_SHIFT;
 172 }
 173
 174 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 175 {
 176     int i;
 177
 178     if (matrix) {
 179         put_bits(pb, 1, 1);
 180         for (i = 0; i < 64; i++) {
 181             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 182         }
 183     } else
 184         put_bits(pb, 1, 0);
 185 }
 186
 187 /**
 188  * init s->current_picture.qscale_table from s->lambda_table
 189  */
 190 void ff_init_qscale_tab(MpegEncContext *s)
 191 {
 192     int8_t * const qscale_table = s->current_picture.qscale_table;
 193     int i;
 194
 195     for (i = 0; i < s->mb_num; i++) {
 196         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 197         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 198         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 199                                                   s->avctx->qmax);
 200     }
 201 }
 202
 203 static void update_duplicate_context_after_me(MpegEncContext *dst,
 204                                               MpegEncContext *src)
 205 {
 206 #define COPY(a) dst->a= src->a
 207     COPY(pict_type);
 208     COPY(current_picture);
 209     COPY(f_code);
 210     COPY(b_code);
 211     COPY(qscale);
 212     COPY(lambda);
 213     COPY(lambda2);
 214     COPY(picture_in_gop_number);
 215     COPY(gop_picture_number);
 216     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 217     COPY(progressive_frame);    // FIXME don't set in encode_header
 218     COPY(partitioned_frame);    // FIXME don't set in encode_header
 219 #undef COPY
 220 }
 221
 222 /**
 223  * Set the given MpegEncContext to defaults for encoding.
 224  * the changed fields will not depend upon the prior state of the MpegEncContext.
 225  */
 226 static void mpv_encode_defaults(MpegEncContext *s)
 227 {
 228     int i;
 229     ff_mpv_common_defaults(s);
 230
 231     for (i = -16; i < 16; i++) {
 232         default_fcode_tab[i + MAX_MV] = 1;
 233     }
 234     s->me.mv_penalty = default_mv_penalty;
 235     s->fcode_tab     = default_fcode_tab;
 236
 237     s->input_picture_number  = 0;
 238     s->picture_in_gop_number = 0;
 239 }
 240
 241 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 242     if (ARCH_X86)
 243         ff_dct_encode_init_x86(s);
 244
 245     if (CONFIG_H263_ENCODER)
 246         ff_h263dsp_init(&s->h263dsp);
 247     if (!s->dct_quantize)
 248         s->dct_quantize = ff_dct_quantize_c;
 249     if (!s->denoise_dct)
 250         s->denoise_dct  = denoise_dct_c;
 251     s->fast_dct_quantize = s->dct_quantize;
 252     if (s->avctx->trellis)
 253         s->dct_quantize  = dct_quantize_trellis_c;
 254
 255     return 0;
 256 }
 257
 258 /* init video encoder */
 259 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 260 {
 261     MpegEncContext *s = avctx->priv_data;
 262     int i, ret, format_supported;
 263
 264     mpv_encode_defaults(s);
 265
 266     switch (avctx->codec_id) {
 267     case AV_CODEC_ID_MPEG2VIDEO:
 268         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 269             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 270             av_log(avctx, AV_LOG_ERROR,
 271                    "only YUV420 and YUV422 are supported\n");
 272             return -1;
 273         }
 274         break;
 275     case AV_CODEC_ID_MJPEG:
 276     case AV_CODEC_ID_AMV:
 277         format_supported = 0;
 278         /* JPEG color space */
 279         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 280             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 281             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 282             (avctx->color_range == AVCOL_RANGE_JPEG &&
 283              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 284               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 285               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 286             format_supported = 1;
 287         /* MPEG color space */
 288         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 289                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 290                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 291                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 292             format_supported = 1;
 293
 294         if (!format_supported) {
 295             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 296             return -1;
 297         }
 298         break;
 299     default:
 300         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 301             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 302             return -1;
 303         }
 304     }
 305
 306     switch (avctx->pix_fmt) {
 307     case AV_PIX_FMT_YUVJ444P:
 308     case AV_PIX_FMT_YUV444P:
 309         s->chroma_format = CHROMA_444;
 310         break;
 311     case AV_PIX_FMT_YUVJ422P:
 312     case AV_PIX_FMT_YUV422P:
 313         s->chroma_format = CHROMA_422;
 314         break;
 315     case AV_PIX_FMT_YUVJ420P:
 316     case AV_PIX_FMT_YUV420P:
 317     default:
 318         s->chroma_format = CHROMA_420;
 319         break;
 320     }
 321
 322     s->bit_rate = avctx->bit_rate;
 323     s->width    = avctx->width;
 324     s->height   = avctx->height;
 325     if (avctx->gop_size > 600 &&
 326         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 327         av_log(avctx, AV_LOG_WARNING,
 328                "keyframe interval too large!, reducing it from %d to %d\n",
 329                avctx->gop_size, 600);
 330         avctx->gop_size = 600;
 331     }
 332     s->gop_size     = avctx->gop_size;
 333     s->avctx        = avctx;
 334     if (avctx->max_b_frames > MAX_B_FRAMES) {
 335         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 336                "is %d.\n", MAX_B_FRAMES);
 337         avctx->max_b_frames = MAX_B_FRAMES;
 338     }
 339     s->max_b_frames = avctx->max_b_frames;
 340     s->codec_id     = avctx->codec->id;
 341     s->strict_std_compliance = avctx->strict_std_compliance;
 342     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 343     s->mpeg_quant         = avctx->mpeg_quant;
 344     s->rtp_mode           = !!avctx->rtp_payload_size;
 345     s->intra_dc_precision = avctx->intra_dc_precision;
 346
 347     // workaround some differences between how applications specify dc precision
 348     if (s->intra_dc_precision < 0) {
 349         s->intra_dc_precision += 8;
 350     } else if (s->intra_dc_precision >= 8)
 351         s->intra_dc_precision -= 8;
 352
 353     if (s->intra_dc_precision < 0) {
 354         av_log(avctx, AV_LOG_ERROR,
 355                 "intra dc precision must be positive, note some applications use"
 356                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 357         return AVERROR(EINVAL);
 358     }
 359
 360     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 361         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 362         return AVERROR(EINVAL);
 363     }
 364     s->user_specified_pts = AV_NOPTS_VALUE;
 365
 366     if (s->gop_size <= 1) {
 367         s->intra_only = 1;
 368         s->gop_size   = 12;
 369     } else {
 370         s->intra_only = 0;
 371     }
 372
 373     s->me_method = avctx->me_method;
 374
 375     /* Fixed QSCALE */
 376     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 377
 378 #if FF_API_MPV_OPT
 379     FF_DISABLE_DEPRECATION_WARNINGS
 380     if (avctx->border_masking != 0.0)
 381         s->border_masking = avctx->border_masking;
 382     FF_ENABLE_DEPRECATION_WARNINGS
 383 #endif
 384
 385     s->adaptive_quant = (s->avctx->lumi_masking ||
 386                          s->avctx->dark_masking ||
 387                          s->avctx->temporal_cplx_masking ||
 388                          s->avctx->spatial_cplx_masking  ||
 389                          s->avctx->p_masking      ||
 390                          s->border_masking ||
 391                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 392                         !s->fixed_qscale;
 393
 394     s->loop_filter = !!(s->avctx->flags & CODEC_FLAG_LOOP_FILTER);
 395
 396     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 397         switch(avctx->codec_id) {
 398         case AV_CODEC_ID_MPEG1VIDEO:
 399         case AV_CODEC_ID_MPEG2VIDEO:
 400             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
 401             break;
 402         case AV_CODEC_ID_MPEG4:
 403         case AV_CODEC_ID_MSMPEG4V1:
 404         case AV_CODEC_ID_MSMPEG4V2:
 405         case AV_CODEC_ID_MSMPEG4V3:
 406             if       (avctx->rc_max_rate >= 15000000) {
 407                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
 408             } else if(avctx->rc_max_rate >=  2000000) {
 409                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
 410             } else if(avctx->rc_max_rate >=   384000) {
 411                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
 412             } else
 413                 avctx->rc_buffer_size = 40;
 414             avctx->rc_buffer_size *= 16384;
 415             break;
 416         }
 417         if (avctx->rc_buffer_size) {
 418             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 419         }
 420     }
 421
 422     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 423         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 424         return -1;
 425     }
 426
 427     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 428         av_log(avctx, AV_LOG_INFO,
 429                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 430     }
 431
 432     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 433         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 434         return -1;
 435     }
 436
 437     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 438         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 439         return -1;
 440     }
 441
 442     if (avctx->rc_max_rate &&
 443         avctx->rc_max_rate == avctx->bit_rate &&
 444         avctx->rc_max_rate != avctx->rc_min_rate) {
 445         av_log(avctx, AV_LOG_INFO,
 446                "impossible bitrate constraints, this will fail\n");
 447     }
 448
 449     if (avctx->rc_buffer_size &&
 450         avctx->bit_rate * (int64_t)avctx->time_base.num >
 451             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 452         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 453         return -1;
 454     }
 455
 456     if (!s->fixed_qscale &&
 457         avctx->bit_rate * av_q2d(avctx->time_base) >
 458             avctx->bit_rate_tolerance) {
 459         av_log(avctx, AV_LOG_WARNING,
 460                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 461         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 462     }
 463
 464     if (s->avctx->rc_max_rate &&
 465         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 466         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 467          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 468         90000LL * (avctx->rc_buffer_size - 1) >
 469             s->avctx->rc_max_rate * 0xFFFFLL) {
 470         av_log(avctx, AV_LOG_INFO,
 471                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 472                "specified vbv buffer is too large for the given bitrate!\n");
 473     }
 474
 475     if ((s->avctx->flags & CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 476         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 477         s->codec_id != AV_CODEC_ID_FLV1) {
 478         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 479         return -1;
 480     }
 481
 482     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 483         av_log(avctx, AV_LOG_ERROR,
 484                "OBMC is only supported with simple mb decision\n");
 485         return -1;
 486     }
 487
 488     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 489         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 490         return -1;
 491     }
 492
 493     if (s->max_b_frames                    &&
 494         s->codec_id != AV_CODEC_ID_MPEG4      &&
 495         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 496         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 497         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 498         return -1;
 499     }
 500     if (s->max_b_frames < 0) {
 501         av_log(avctx, AV_LOG_ERROR,
 502                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 503         return -1;
 504     }
 505
 506     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 507          s->codec_id == AV_CODEC_ID_H263  ||
 508          s->codec_id == AV_CODEC_ID_H263P) &&
 509         (avctx->sample_aspect_ratio.num > 255 ||
 510          avctx->sample_aspect_ratio.den > 255)) {
 511         av_log(avctx, AV_LOG_WARNING,
 512                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 513                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 514         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 515                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 516     }
 517
 518     if ((s->codec_id == AV_CODEC_ID_H263  ||
 519          s->codec_id == AV_CODEC_ID_H263P) &&
 520         (avctx->width  > 2048 ||
 521          avctx->height > 1152 )) {
 522         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 523         return -1;
 524     }
 525     if ((s->codec_id == AV_CODEC_ID_H263  ||
 526          s->codec_id == AV_CODEC_ID_H263P) &&
 527         ((avctx->width &3) ||
 528          (avctx->height&3) )) {
 529         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 530         return -1;
 531     }
 532
 533     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 534         (avctx->width  > 4095 ||
 535          avctx->height > 4095 )) {
 536         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 537         return -1;
 538     }
 539
 540     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 541         (avctx->width  > 16383 ||
 542          avctx->height > 16383 )) {
 543         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 544         return -1;
 545     }
 546
 547     if (s->codec_id == AV_CODEC_ID_RV10 &&
 548         (avctx->width &15 ||
 549          avctx->height&15 )) {
 550         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 551         return AVERROR(EINVAL);
 552     }
 553
 554     if (s->codec_id == AV_CODEC_ID_RV20 &&
 555         (avctx->width &3 ||
 556          avctx->height&3 )) {
 557         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 558         return AVERROR(EINVAL);
 559     }
 560
 561     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 562          s->codec_id == AV_CODEC_ID_WMV2) &&
 563          avctx->width & 1) {
 564          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 565          return -1;
 566     }
 567
 568     if ((s->avctx->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 569         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 570         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 571         return -1;
 572     }
 573
 574     // FIXME mpeg2 uses that too
 575     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 576                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 577         av_log(avctx, AV_LOG_ERROR,
 578                "mpeg2 style quantization not supported by codec\n");
 579         return -1;
 580     }
 581
 582     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 583         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 584         return -1;
 585     }
 586
 587     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 588         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 589         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 590         return -1;
 591     }
 592
 593     if (s->avctx->scenechange_threshold < 1000000000 &&
 594         (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)) {
 595         av_log(avctx, AV_LOG_ERROR,
 596                "closed gop with scene change detection are not supported yet, "
 597                "set threshold to 1000000000\n");
 598         return -1;
 599     }
 600
 601     if (s->avctx->flags & CODEC_FLAG_LOW_DELAY) {
 602         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 603             av_log(avctx, AV_LOG_ERROR,
 604                   "low delay forcing is only available for mpeg2\n");
 605             return -1;
 606         }
 607         if (s->max_b_frames != 0) {
 608             av_log(avctx, AV_LOG_ERROR,
 609                    "b frames cannot be used with low delay\n");
 610             return -1;
 611         }
 612     }
 613
 614     if (s->q_scale_type == 1) {
 615         if (avctx->qmax > 12) {
 616             av_log(avctx, AV_LOG_ERROR,
 617                    "non linear quant only supports qmax <= 12 currently\n");
 618             return -1;
 619         }
 620     }
 621
 622     if (s->avctx->thread_count > 1         &&
 623         s->codec_id != AV_CODEC_ID_MPEG4      &&
 624         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 625         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 626         s->codec_id != AV_CODEC_ID_MJPEG      &&
 627         (s->codec_id != AV_CODEC_ID_H263P)) {
 628         av_log(avctx, AV_LOG_ERROR,
 629                "multi threaded encoding not supported by codec\n");
 630         return -1;
 631     }
 632
 633     if (s->avctx->thread_count < 1) {
 634         av_log(avctx, AV_LOG_ERROR,
 635                "automatic thread number detection not supported by codec, "
 636                "patch welcome\n");
 637         return -1;
 638     }
 639
 640     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 641         s->rtp_mode = 1;
 642
 643     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 644         s->h263_slice_structured = 1;
 645
 646     if (!avctx->time_base.den || !avctx->time_base.num) {
 647         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 648         return -1;
 649     }
 650
 651     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 652         av_log(avctx, AV_LOG_INFO,
 653                "notice: b_frame_strategy only affects the first pass\n");
 654         avctx->b_frame_strategy = 0;
 655     }
 656
 657     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 658     if (i > 1) {
 659         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 660         avctx->time_base.den /= i;
 661         avctx->time_base.num /= i;
 662         //return -1;
 663     }
 664
 665     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 666         // (a + x * 3 / 8) / x
 667         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 668         s->inter_quant_bias = 0;
 669     } else {
 670         s->intra_quant_bias = 0;
 671         // (a - x / 4) / x
 672         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 673     }
 674
 675     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 676         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 677         return AVERROR(EINVAL);
 678     }
 679
 680 #if FF_API_QUANT_BIAS
 681 FF_DISABLE_DEPRECATION_WARNINGS
 682     if (s->intra_quant_bias == FF_DEFAULT_QUANT_BIAS &&
 683         avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 684         s->intra_quant_bias = avctx->intra_quant_bias;
 685     if (s->inter_quant_bias == FF_DEFAULT_QUANT_BIAS &&
 686         avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 687         s->inter_quant_bias = avctx->inter_quant_bias;
 688 FF_ENABLE_DEPRECATION_WARNINGS
 689 #endif
 690
 691     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 692
 693     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 694         s->avctx->time_base.den > (1 << 16) - 1) {
 695         av_log(avctx, AV_LOG_ERROR,
 696                "timebase %d/%d not supported by MPEG 4 standard, "
 697                "the maximum admitted value for the timebase denominator "
 698                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 699                (1 << 16) - 1);
 700         return -1;
 701     }
 702     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 703
 704     switch (avctx->codec->id) {
 705     case AV_CODEC_ID_MPEG1VIDEO:
 706         s->out_format = FMT_MPEG1;
 707         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
 708         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 709         break;
 710     case AV_CODEC_ID_MPEG2VIDEO:
 711         s->out_format = FMT_MPEG1;
 712         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
 713         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 714         s->rtp_mode   = 1;
 715         break;
 716     case AV_CODEC_ID_MJPEG:
 717     case AV_CODEC_ID_AMV:
 718         s->out_format = FMT_MJPEG;
 719         s->intra_only = 1; /* force intra only for jpeg */
 720         if (!CONFIG_MJPEG_ENCODER ||
 721             ff_mjpeg_encode_init(s) < 0)
 722             return -1;
 723         avctx->delay = 0;
 724         s->low_delay = 1;
 725         break;
 726     case AV_CODEC_ID_H261:
 727         if (!CONFIG_H261_ENCODER)
 728             return -1;
 729         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 730             av_log(avctx, AV_LOG_ERROR,
 731                    "The specified picture size of %dx%d is not valid for the "
 732                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 733                     s->width, s->height);
 734             return -1;
 735         }
 736         s->out_format = FMT_H261;
 737         avctx->delay  = 0;
 738         s->low_delay  = 1;
 739         s->rtp_mode   = 0; /* Sliced encoding not supported */
 740         break;
 741     case AV_CODEC_ID_H263:
 742         if (!CONFIG_H263_ENCODER)
 743             return -1;
 744         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 745                              s->width, s->height) == 8) {
 746             av_log(avctx, AV_LOG_ERROR,
 747                    "The specified picture size of %dx%d is not valid for "
 748                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 749                    "352x288, 704x576, and 1408x1152. "
 750                    "Try H.263+.\n", s->width, s->height);
 751             return -1;
 752         }
 753         s->out_format = FMT_H263;
 754         avctx->delay  = 0;
 755         s->low_delay  = 1;
 756         break;
 757     case AV_CODEC_ID_H263P:
 758         s->out_format = FMT_H263;
 759         s->h263_plus  = 1;
 760         /* Fx */
 761         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 762         s->modified_quant  = s->h263_aic;
 763         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 764         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 765
 766         /* /Fx */
 767         /* These are just to be sure */
 768         avctx->delay = 0;
 769         s->low_delay = 1;
 770         break;
 771     case AV_CODEC_ID_FLV1:
 772         s->out_format      = FMT_H263;
 773         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 774         s->unrestricted_mv = 1;
 775         s->rtp_mode  = 0; /* don't allow GOB */
 776         avctx->delay = 0;
 777         s->low_delay = 1;
 778         break;
 779     case AV_CODEC_ID_RV10:
 780         s->out_format = FMT_H263;
 781         avctx->delay  = 0;
 782         s->low_delay  = 1;
 783         break;
 784     case AV_CODEC_ID_RV20:
 785         s->out_format      = FMT_H263;
 786         avctx->delay       = 0;
 787         s->low_delay       = 1;
 788         s->modified_quant  = 1;
 789         s->h263_aic        = 1;
 790         s->h263_plus       = 1;
 791         s->loop_filter     = 1;
 792         s->unrestricted_mv = 0;
 793         break;
 794     case AV_CODEC_ID_MPEG4:
 795         s->out_format      = FMT_H263;
 796         s->h263_pred       = 1;
 797         s->unrestricted_mv = 1;
 798         s->low_delay       = s->max_b_frames ? 0 : 1;
 799         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 800         break;
 801     case AV_CODEC_ID_MSMPEG4V2:
 802         s->out_format      = FMT_H263;
 803         s->h263_pred       = 1;
 804         s->unrestricted_mv = 1;
 805         s->msmpeg4_version = 2;
 806         avctx->delay       = 0;
 807         s->low_delay       = 1;
 808         break;
 809     case AV_CODEC_ID_MSMPEG4V3:
 810         s->out_format        = FMT_H263;
 811         s->h263_pred         = 1;
 812         s->unrestricted_mv   = 1;
 813         s->msmpeg4_version   = 3;
 814         s->flipflop_rounding = 1;
 815         avctx->delay         = 0;
 816         s->low_delay         = 1;
 817         break;
 818     case AV_CODEC_ID_WMV1:
 819         s->out_format        = FMT_H263;
 820         s->h263_pred         = 1;
 821         s->unrestricted_mv   = 1;
 822         s->msmpeg4_version   = 4;
 823         s->flipflop_rounding = 1;
 824         avctx->delay         = 0;
 825         s->low_delay         = 1;
 826         break;
 827     case AV_CODEC_ID_WMV2:
 828         s->out_format        = FMT_H263;
 829         s->h263_pred         = 1;
 830         s->unrestricted_mv   = 1;
 831         s->msmpeg4_version   = 5;
 832         s->flipflop_rounding = 1;
 833         avctx->delay         = 0;
 834         s->low_delay         = 1;
 835         break;
 836     default:
 837         return -1;
 838     }
 839
 840     avctx->has_b_frames = !s->low_delay;
 841
 842     s->encoding = 1;
 843
 844     s->progressive_frame    =
 845     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 846                                                 CODEC_FLAG_INTERLACED_ME) ||
 847                                 s->alternate_scan);
 848
 849     /* init */
 850     ff_mpv_idct_init(s);
 851     if (ff_mpv_common_init(s) < 0)
 852         return -1;
 853
 854     ff_fdctdsp_init(&s->fdsp, avctx);
 855     ff_me_cmp_init(&s->mecc, avctx);
 856     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 857     ff_pixblockdsp_init(&s->pdsp, avctx);
 858     ff_qpeldsp_init(&s->qdsp);
 859
 860     s->avctx->coded_frame = s->current_picture.f;
 861
 862     if (s->msmpeg4_version) {
 863         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 864                           2 * 2 * (MAX_LEVEL + 1) *
 865                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 866     }
 867     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 868
 869     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 870     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 871     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 872     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 873     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 874     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 875     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 876                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 877     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 878                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 879
 880     if (s->avctx->noise_reduction) {
 881         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 882                           2 * 64 * sizeof(uint16_t), fail);
 883     }
 884
 885     ff_dct_encode_init(s);
 886
 887     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 888         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 889
 890     s->quant_precision = 5;
 891
 892     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 893     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 894
 895     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 896         ff_h261_encode_init(s);
 897     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 898         ff_h263_encode_init(s);
 899     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 900         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 901             return ret;
 902     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 903         && s->out_format == FMT_MPEG1)
 904         ff_mpeg1_encode_init(s);
 905
 906     /* init q matrix */
 907     for (i = 0; i < 64; i++) {
 908         int j = s->idsp.idct_permutation[i];
 909         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 910             s->mpeg_quant) {
 911             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 912             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 913         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 914             s->intra_matrix[j] =
 915             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 916         } else {
 917             /* mpeg1/2 */
 918             s->chroma_intra_matrix[j] =
 919             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 920             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 921         }
 922         if (s->avctx->intra_matrix)
 923             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 924         if (s->avctx->inter_matrix)
 925             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 926     }
 927
 928     /* precompute matrix */
 929     /* for mjpeg, we do include qscale in the matrix */
 930     if (s->out_format != FMT_MJPEG) {
 931         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 932                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 933                           31, 1);
 934         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 935                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 936                           31, 0);
 937     }
 938
 939     if (ff_rate_control_init(s) < 0)
 940         return -1;
 941
 942 #if FF_API_ERROR_RATE
 943     FF_DISABLE_DEPRECATION_WARNINGS
 944     if (avctx->error_rate)
 945         s->error_rate = avctx->error_rate;
 946     FF_ENABLE_DEPRECATION_WARNINGS;
 947 #endif
 948
 949 #if FF_API_NORMALIZE_AQP
 950     FF_DISABLE_DEPRECATION_WARNINGS
 951     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 952         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 953     FF_ENABLE_DEPRECATION_WARNINGS;
 954 #endif
 955
 956 #if FF_API_MV0
 957     FF_DISABLE_DEPRECATION_WARNINGS
 958     if (avctx->flags & CODEC_FLAG_MV0)
 959         s->mpv_flags |= FF_MPV_FLAG_MV0;
 960     FF_ENABLE_DEPRECATION_WARNINGS
 961 #endif
 962
 963 #if FF_API_MPV_OPT
 964     FF_DISABLE_DEPRECATION_WARNINGS
 965     if (avctx->rc_qsquish != 0.0)
 966         s->rc_qsquish = avctx->rc_qsquish;
 967     if (avctx->rc_qmod_amp != 0.0)
 968         s->rc_qmod_amp = avctx->rc_qmod_amp;
 969     if (avctx->rc_qmod_freq)
 970         s->rc_qmod_freq = avctx->rc_qmod_freq;
 971     if (avctx->rc_buffer_aggressivity != 1.0)
 972         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 973     if (avctx->rc_initial_cplx != 0.0)
 974         s->rc_initial_cplx = avctx->rc_initial_cplx;
 975     if (avctx->lmin)
 976         s->lmin = avctx->lmin;
 977     if (avctx->lmax)
 978         s->lmax = avctx->lmax;
 979
 980     if (avctx->rc_eq) {
 981         av_freep(&s->rc_eq);
 982         s->rc_eq = av_strdup(avctx->rc_eq);
 983         if (!s->rc_eq)
 984             return AVERROR(ENOMEM);
 985     }
 986     FF_ENABLE_DEPRECATION_WARNINGS
 987 #endif
 988
 989     if (avctx->b_frame_strategy == 2) {
 990         for (i = 0; i < s->max_b_frames + 2; i++) {
 991             s->tmp_frames[i] = av_frame_alloc();
 992             if (!s->tmp_frames[i])
 993                 return AVERROR(ENOMEM);
 994
 995             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 996             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 997             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 998
 999             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
1000             if (ret < 0)
1001                 return ret;
1002         }
1003     }
1004
1005     return 0;
1006 fail:
1007     ff_mpv_encode_end(avctx);
1008     return AVERROR_UNKNOWN;
1009 }
1010
1011 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1012 {
1013     MpegEncContext *s = avctx->priv_data;
1014     int i;
1015
1016     ff_rate_control_uninit(s);
1017
1018     ff_mpv_common_end(s);
1019     if (CONFIG_MJPEG_ENCODER &&
1020         s->out_format == FMT_MJPEG)
1021         ff_mjpeg_encode_close(s);
1022
1023     av_freep(&avctx->extradata);
1024
1025     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1026         av_frame_free(&s->tmp_frames[i]);
1027
1028     ff_free_picture_tables(&s->new_picture);
1029     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1030
1031     av_freep(&s->avctx->stats_out);
1032     av_freep(&s->ac_stats);
1033
1034     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1035     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1036     s->q_chroma_intra_matrix=   NULL;
1037     s->q_chroma_intra_matrix16= NULL;
1038     av_freep(&s->q_intra_matrix);
1039     av_freep(&s->q_inter_matrix);
1040     av_freep(&s->q_intra_matrix16);
1041     av_freep(&s->q_inter_matrix16);
1042     av_freep(&s->input_picture);
1043     av_freep(&s->reordered_input_picture);
1044     av_freep(&s->dct_offset);
1045
1046     return 0;
1047 }
1048
1049 static int get_sae(uint8_t *src, int ref, int stride)
1050 {
1051     int x,y;
1052     int acc = 0;
1053
1054     for (y = 0; y < 16; y++) {
1055         for (x = 0; x < 16; x++) {
1056             acc += FFABS(src[x + y * stride] - ref);
1057         }
1058     }
1059
1060     return acc;
1061 }
1062
1063 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1064                            uint8_t *ref, int stride)
1065 {
1066     int x, y, w, h;
1067     int acc = 0;
1068
1069     w = s->width  & ~15;
1070     h = s->height & ~15;
1071
1072     for (y = 0; y < h; y += 16) {
1073         for (x = 0; x < w; x += 16) {
1074             int offset = x + y * stride;
1075             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1076                                       stride, 16);
1077             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1078             int sae  = get_sae(src + offset, mean, stride);
1079
1080             acc += sae + 500 < sad;
1081         }
1082     }
1083     return acc;
1084 }
1085
1086 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1087 {
1088     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1089                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1090                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1091                             &s->linesize, &s->uvlinesize);
1092 }
1093
1094 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1095 {
1096     Picture *pic = NULL;
1097     int64_t pts;
1098     int i, display_picture_number = 0, ret;
1099     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1100                                                  (s->low_delay ? 0 : 1);
1101     int direct = 1;
1102
1103     if (pic_arg) {
1104         pts = pic_arg->pts;
1105         display_picture_number = s->input_picture_number++;
1106
1107         if (pts != AV_NOPTS_VALUE) {
1108             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1109                 int64_t last = s->user_specified_pts;
1110
1111                 if (pts <= last) {
1112                     av_log(s->avctx, AV_LOG_ERROR,
1113                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1114                            pts, last);
1115                     return AVERROR(EINVAL);
1116                 }
1117
1118                 if (!s->low_delay && display_picture_number == 1)
1119                     s->dts_delta = pts - last;
1120             }
1121             s->user_specified_pts = pts;
1122         } else {
1123             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1124                 s->user_specified_pts =
1125                 pts = s->user_specified_pts + 1;
1126                 av_log(s->avctx, AV_LOG_INFO,
1127                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1128                        pts);
1129             } else {
1130                 pts = display_picture_number;
1131             }
1132         }
1133     }
1134
1135     if (pic_arg) {
1136         if (!pic_arg->buf[0] ||
1137             pic_arg->linesize[0] != s->linesize ||
1138             pic_arg->linesize[1] != s->uvlinesize ||
1139             pic_arg->linesize[2] != s->uvlinesize)
1140             direct = 0;
1141         if ((s->width & 15) || (s->height & 15))
1142             direct = 0;
1143         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1144             direct = 0;
1145         if (s->linesize & (STRIDE_ALIGN-1))
1146             direct = 0;
1147
1148         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1149                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1150
1151         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1152         if (i < 0)
1153             return i;
1154
1155         pic = &s->picture[i];
1156         pic->reference = 3;
1157
1158         if (direct) {
1159             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1160                 return ret;
1161         }
1162         ret = alloc_picture(s, pic, direct);
1163         if (ret < 0)
1164             return ret;
1165
1166         if (!direct) {
1167             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1168                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1169                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1170                 // empty
1171             } else {
1172                 int h_chroma_shift, v_chroma_shift;
1173                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1174                                                  &h_chroma_shift,
1175                                                  &v_chroma_shift);
1176
1177                 for (i = 0; i < 3; i++) {
1178                     int src_stride = pic_arg->linesize[i];
1179                     int dst_stride = i ? s->uvlinesize : s->linesize;
1180                     int h_shift = i ? h_chroma_shift : 0;
1181                     int v_shift = i ? v_chroma_shift : 0;
1182                     int w = s->width  >> h_shift;
1183                     int h = s->height >> v_shift;
1184                     uint8_t *src = pic_arg->data[i];
1185                     uint8_t *dst = pic->f->data[i];
1186                     int vpad = 16;
1187
1188                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1189                         && !s->progressive_sequence
1190                         && FFALIGN(s->height, 32) - s->height > 16)
1191                         vpad = 32;
1192
1193                     if (!s->avctx->rc_buffer_size)
1194                         dst += INPLACE_OFFSET;
1195
1196                     if (src_stride == dst_stride)
1197                         memcpy(dst, src, src_stride * h);
1198                     else {
1199                         int h2 = h;
1200                         uint8_t *dst2 = dst;
1201                         while (h2--) {
1202                             memcpy(dst2, src, w);
1203                             dst2 += dst_stride;
1204                             src += src_stride;
1205                         }
1206                     }
1207                     if ((s->width & 15) || (s->height & (vpad-1))) {
1208                         s->mpvencdsp.draw_edges(dst, dst_stride,
1209                                                 w, h,
1210                                                 16 >> h_shift,
1211                                                 vpad >> v_shift,
1212                                                 EDGE_BOTTOM);
1213                     }
1214                 }
1215             }
1216         }
1217         ret = av_frame_copy_props(pic->f, pic_arg);
1218         if (ret < 0)
1219             return ret;
1220
1221         pic->f->display_picture_number = display_picture_number;
1222         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1223     }
1224
1225     /* shift buffer entries */
1226     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1227         s->input_picture[i - 1] = s->input_picture[i];
1228
1229     s->input_picture[encoding_delay] = (Picture*) pic;
1230
1231     return 0;
1232 }
1233
1234 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1235 {
1236     int x, y, plane;
1237     int score = 0;
1238     int64_t score64 = 0;
1239
1240     for (plane = 0; plane < 3; plane++) {
1241         const int stride = p->f->linesize[plane];
1242         const int bw = plane ? 1 : 2;
1243         for (y = 0; y < s->mb_height * bw; y++) {
1244             for (x = 0; x < s->mb_width * bw; x++) {
1245                 int off = p->shared ? 0 : 16;
1246                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1247                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1248                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1249
1250                 switch (FFABS(s->avctx->frame_skip_exp)) {
1251                 case 0: score    =  FFMAX(score, v);          break;
1252                 case 1: score   += FFABS(v);                  break;
1253                 case 2: score64 += v * (int64_t)v;                       break;
1254                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1255                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1256                 }
1257             }
1258         }
1259     }
1260     emms_c();
1261
1262     if (score)
1263         score64 = score;
1264     if (s->avctx->frame_skip_exp < 0)
1265         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1266                       -1.0/s->avctx->frame_skip_exp);
1267
1268     if (score64 < s->avctx->frame_skip_threshold)
1269         return 1;
1270     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1271         return 1;
1272     return 0;
1273 }
1274
1275 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1276 {
1277     AVPacket pkt = { 0 };
1278     int ret, got_output;
1279
1280     av_init_packet(&pkt);
1281     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1282     if (ret < 0)
1283         return ret;
1284
1285     ret = pkt.size;
1286     av_free_packet(&pkt);
1287     return ret;
1288 }
1289
1290 static int estimate_best_b_count(MpegEncContext *s)
1291 {
1292     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1293     AVCodecContext *c = avcodec_alloc_context3(NULL);
1294     const int scale = s->avctx->brd_scale;
1295     int i, j, out_size, p_lambda, b_lambda, lambda2;
1296     int64_t best_rd  = INT64_MAX;
1297     int best_b_count = -1;
1298
1299     if (!c)
1300         return AVERROR(ENOMEM);
1301     av_assert0(scale >= 0 && scale <= 3);
1302
1303     //emms_c();
1304     //s->next_picture_ptr->quality;
1305     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1306     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1307     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1308     if (!b_lambda) // FIXME we should do this somewhere else
1309         b_lambda = p_lambda;
1310     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1311                FF_LAMBDA_SHIFT;
1312
1313     c->width        = s->width  >> scale;
1314     c->height       = s->height >> scale;
1315     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1316     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1317     c->mb_decision  = s->avctx->mb_decision;
1318     c->me_cmp       = s->avctx->me_cmp;
1319     c->mb_cmp       = s->avctx->mb_cmp;
1320     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1321     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1322     c->time_base    = s->avctx->time_base;
1323     c->max_b_frames = s->max_b_frames;
1324
1325     if (avcodec_open2(c, codec, NULL) < 0)
1326         return -1;
1327
1328     for (i = 0; i < s->max_b_frames + 2; i++) {
1329         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1330                                                 s->next_picture_ptr;
1331         uint8_t *data[4];
1332
1333         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1334             pre_input = *pre_input_ptr;
1335             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1336
1337             if (!pre_input.shared && i) {
1338                 data[0] += INPLACE_OFFSET;
1339                 data[1] += INPLACE_OFFSET;
1340                 data[2] += INPLACE_OFFSET;
1341             }
1342
1343             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1344                                        s->tmp_frames[i]->linesize[0],
1345                                        data[0],
1346                                        pre_input.f->linesize[0],
1347                                        c->width, c->height);
1348             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1349                                        s->tmp_frames[i]->linesize[1],
1350                                        data[1],
1351                                        pre_input.f->linesize[1],
1352                                        c->width >> 1, c->height >> 1);
1353             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1354                                        s->tmp_frames[i]->linesize[2],
1355                                        data[2],
1356                                        pre_input.f->linesize[2],
1357                                        c->width >> 1, c->height >> 1);
1358         }
1359     }
1360
1361     for (j = 0; j < s->max_b_frames + 1; j++) {
1362         int64_t rd = 0;
1363
1364         if (!s->input_picture[j])
1365             break;
1366
1367         c->error[0] = c->error[1] = c->error[2] = 0;
1368
1369         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1370         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1371
1372         out_size = encode_frame(c, s->tmp_frames[0]);
1373
1374         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1375
1376         for (i = 0; i < s->max_b_frames + 1; i++) {
1377             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1378
1379             s->tmp_frames[i + 1]->pict_type = is_p ?
1380                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1381             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1382
1383             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1384
1385             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1386         }
1387
1388         /* get the delayed frames */
1389         while (out_size) {
1390             out_size = encode_frame(c, NULL);
1391             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1392         }
1393
1394         rd += c->error[0] + c->error[1] + c->error[2];
1395
1396         if (rd < best_rd) {
1397             best_rd = rd;
1398             best_b_count = j;
1399         }
1400     }
1401
1402     avcodec_close(c);
1403     av_freep(&c);
1404
1405     return best_b_count;
1406 }
1407
1408 static int select_input_picture(MpegEncContext *s)
1409 {
1410     int i, ret;
1411
1412     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1413         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1414     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1415
1416     /* set next picture type & ordering */
1417     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1418         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1419             if (s->picture_in_gop_number < s->gop_size &&
1420                 s->next_picture_ptr &&
1421                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1422                 // FIXME check that te gop check above is +-1 correct
1423                 av_frame_unref(s->input_picture[0]->f);
1424
1425                 ff_vbv_update(s, 0);
1426
1427                 goto no_output_pic;
1428             }
1429         }
1430
1431         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1432             !s->next_picture_ptr || s->intra_only) {
1433             s->reordered_input_picture[0] = s->input_picture[0];
1434             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1435             s->reordered_input_picture[0]->f->coded_picture_number =
1436                 s->coded_picture_number++;
1437         } else {
1438             int b_frames;
1439
1440             if (s->avctx->flags & CODEC_FLAG_PASS2) {
1441                 for (i = 0; i < s->max_b_frames + 1; i++) {
1442                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1443
1444                     if (pict_num >= s->rc_context.num_entries)
1445                         break;
1446                     if (!s->input_picture[i]) {
1447                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1448                         break;
1449                     }
1450
1451                     s->input_picture[i]->f->pict_type =
1452                         s->rc_context.entry[pict_num].new_pict_type;
1453                 }
1454             }
1455
1456             if (s->avctx->b_frame_strategy == 0) {
1457                 b_frames = s->max_b_frames;
1458                 while (b_frames && !s->input_picture[b_frames])
1459                     b_frames--;
1460             } else if (s->avctx->b_frame_strategy == 1) {
1461                 for (i = 1; i < s->max_b_frames + 1; i++) {
1462                     if (s->input_picture[i] &&
1463                         s->input_picture[i]->b_frame_score == 0) {
1464                         s->input_picture[i]->b_frame_score =
1465                             get_intra_count(s,
1466                                             s->input_picture[i    ]->f->data[0],
1467                                             s->input_picture[i - 1]->f->data[0],
1468                                             s->linesize) + 1;
1469                     }
1470                 }
1471                 for (i = 0; i < s->max_b_frames + 1; i++) {
1472                     if (!s->input_picture[i] ||
1473                         s->input_picture[i]->b_frame_score - 1 >
1474                             s->mb_num / s->avctx->b_sensitivity)
1475                         break;
1476                 }
1477
1478                 b_frames = FFMAX(0, i - 1);
1479
1480                 /* reset scores */
1481                 for (i = 0; i < b_frames + 1; i++) {
1482                     s->input_picture[i]->b_frame_score = 0;
1483                 }
1484             } else if (s->avctx->b_frame_strategy == 2) {
1485                 b_frames = estimate_best_b_count(s);
1486             } else {
1487                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1488                 b_frames = 0;
1489             }
1490
1491             emms_c();
1492
1493             for (i = b_frames - 1; i >= 0; i--) {
1494                 int type = s->input_picture[i]->f->pict_type;
1495                 if (type && type != AV_PICTURE_TYPE_B)
1496                     b_frames = i;
1497             }
1498             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1499                 b_frames == s->max_b_frames) {
1500                 av_log(s->avctx, AV_LOG_ERROR,
1501                        "warning, too many b frames in a row\n");
1502             }
1503
1504             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1505                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1506                     s->gop_size > s->picture_in_gop_number) {
1507                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1508                 } else {
1509                     if (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)
1510                         b_frames = 0;
1511                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1512                 }
1513             }
1514
1515             if ((s->avctx->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1516                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1517                 b_frames--;
1518
1519             s->reordered_input_picture[0] = s->input_picture[b_frames];
1520             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1521                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1522             s->reordered_input_picture[0]->f->coded_picture_number =
1523                 s->coded_picture_number++;
1524             for (i = 0; i < b_frames; i++) {
1525                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1526                 s->reordered_input_picture[i + 1]->f->pict_type =
1527                     AV_PICTURE_TYPE_B;
1528                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1529                     s->coded_picture_number++;
1530             }
1531         }
1532     }
1533 no_output_pic:
1534     if (s->reordered_input_picture[0]) {
1535         s->reordered_input_picture[0]->reference =
1536            s->reordered_input_picture[0]->f->pict_type !=
1537                AV_PICTURE_TYPE_B ? 3 : 0;
1538
1539         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1540         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1541             return ret;
1542
1543         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1544             // input is a shared pix, so we can't modifiy it -> alloc a new
1545             // one & ensure that the shared one is reuseable
1546
1547             Picture *pic;
1548             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1549             if (i < 0)
1550                 return i;
1551             pic = &s->picture[i];
1552
1553             pic->reference = s->reordered_input_picture[0]->reference;
1554             if (alloc_picture(s, pic, 0) < 0) {
1555                 return -1;
1556             }
1557
1558             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1559             if (ret < 0)
1560                 return ret;
1561
1562             /* mark us unused / free shared pic */
1563             av_frame_unref(s->reordered_input_picture[0]->f);
1564             s->reordered_input_picture[0]->shared = 0;
1565
1566             s->current_picture_ptr = pic;
1567         } else {
1568             // input is not a shared pix -> reuse buffer for current_pix
1569             s->current_picture_ptr = s->reordered_input_picture[0];
1570             for (i = 0; i < 4; i++) {
1571                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1572             }
1573         }
1574         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1575         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1576                                        s->current_picture_ptr)) < 0)
1577             return ret;
1578
1579         s->picture_number = s->new_picture.f->display_picture_number;
1580     } else {
1581         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1582     }
1583     return 0;
1584 }
1585
1586 static void frame_end(MpegEncContext *s)
1587 {
1588     if (s->unrestricted_mv &&
1589         s->current_picture.reference &&
1590         !s->intra_only) {
1591         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1592         int hshift = desc->log2_chroma_w;
1593         int vshift = desc->log2_chroma_h;
1594         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1595                                 s->current_picture.f->linesize[0],
1596                                 s->h_edge_pos, s->v_edge_pos,
1597                                 EDGE_WIDTH, EDGE_WIDTH,
1598                                 EDGE_TOP | EDGE_BOTTOM);
1599         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1600                                 s->current_picture.f->linesize[1],
1601                                 s->h_edge_pos >> hshift,
1602                                 s->v_edge_pos >> vshift,
1603                                 EDGE_WIDTH >> hshift,
1604                                 EDGE_WIDTH >> vshift,
1605                                 EDGE_TOP | EDGE_BOTTOM);
1606         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1607                                 s->current_picture.f->linesize[2],
1608                                 s->h_edge_pos >> hshift,
1609                                 s->v_edge_pos >> vshift,
1610                                 EDGE_WIDTH >> hshift,
1611                                 EDGE_WIDTH >> vshift,
1612                                 EDGE_TOP | EDGE_BOTTOM);
1613     }
1614
1615     emms_c();
1616
1617     s->last_pict_type                 = s->pict_type;
1618     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1619     if (s->pict_type!= AV_PICTURE_TYPE_B)
1620         s->last_non_b_pict_type = s->pict_type;
1621
1622     s->avctx->coded_frame = s->current_picture_ptr->f;
1623
1624 }
1625
1626 static void update_noise_reduction(MpegEncContext *s)
1627 {
1628     int intra, i;
1629
1630     for (intra = 0; intra < 2; intra++) {
1631         if (s->dct_count[intra] > (1 << 16)) {
1632             for (i = 0; i < 64; i++) {
1633                 s->dct_error_sum[intra][i] >>= 1;
1634             }
1635             s->dct_count[intra] >>= 1;
1636         }
1637
1638         for (i = 0; i < 64; i++) {
1639             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1640                                        s->dct_count[intra] +
1641                                        s->dct_error_sum[intra][i] / 2) /
1642                                       (s->dct_error_sum[intra][i] + 1);
1643         }
1644     }
1645 }
1646
1647 static int frame_start(MpegEncContext *s)
1648 {
1649     int ret;
1650
1651     /* mark & release old frames */
1652     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1653         s->last_picture_ptr != s->next_picture_ptr &&
1654         s->last_picture_ptr->f->buf[0]) {
1655         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1656     }
1657
1658     s->current_picture_ptr->f->pict_type = s->pict_type;
1659     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1660
1661     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1662     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1663                                    s->current_picture_ptr)) < 0)
1664         return ret;
1665
1666     if (s->pict_type != AV_PICTURE_TYPE_B) {
1667         s->last_picture_ptr = s->next_picture_ptr;
1668         if (!s->droppable)
1669             s->next_picture_ptr = s->current_picture_ptr;
1670     }
1671
1672     if (s->last_picture_ptr) {
1673         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1674         if (s->last_picture_ptr->f->buf[0] &&
1675             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1676                                        s->last_picture_ptr)) < 0)
1677             return ret;
1678     }
1679     if (s->next_picture_ptr) {
1680         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1681         if (s->next_picture_ptr->f->buf[0] &&
1682             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1683                                        s->next_picture_ptr)) < 0)
1684             return ret;
1685     }
1686
1687     if (s->picture_structure!= PICT_FRAME) {
1688         int i;
1689         for (i = 0; i < 4; i++) {
1690             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1691                 s->current_picture.f->data[i] +=
1692                     s->current_picture.f->linesize[i];
1693             }
1694             s->current_picture.f->linesize[i] *= 2;
1695             s->last_picture.f->linesize[i]    *= 2;
1696             s->next_picture.f->linesize[i]    *= 2;
1697         }
1698     }
1699
1700     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1701         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1702         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1703     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1704         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1705         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1706     } else {
1707         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1708         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1709     }
1710
1711     if (s->dct_error_sum) {
1712         av_assert2(s->avctx->noise_reduction && s->encoding);
1713         update_noise_reduction(s);
1714     }
1715
1716     return 0;
1717 }
1718
1719 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1720                           const AVFrame *pic_arg, int *got_packet)
1721 {
1722     MpegEncContext *s = avctx->priv_data;
1723     int i, stuffing_count, ret;
1724     int context_count = s->slice_context_count;
1725
1726     s->picture_in_gop_number++;
1727
1728     if (load_input_picture(s, pic_arg) < 0)
1729         return -1;
1730
1731     if (select_input_picture(s) < 0) {
1732         return -1;
1733     }
1734
1735     /* output? */
1736     if (s->new_picture.f->data[0]) {
1737         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1738         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1739                                               :
1740                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1741         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1742             return ret;
1743         if (s->mb_info) {
1744             s->mb_info_ptr = av_packet_new_side_data(pkt,
1745                                  AV_PKT_DATA_H263_MB_INFO,
1746                                  s->mb_width*s->mb_height*12);
1747             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1748         }
1749
1750         for (i = 0; i < context_count; i++) {
1751             int start_y = s->thread_context[i]->start_mb_y;
1752             int   end_y = s->thread_context[i]->  end_mb_y;
1753             int h       = s->mb_height;
1754             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1755             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1756
1757             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1758         }
1759
1760         s->pict_type = s->new_picture.f->pict_type;
1761         //emms_c();
1762         ret = frame_start(s);
1763         if (ret < 0)
1764             return ret;
1765 vbv_retry:
1766         ret = encode_picture(s, s->picture_number);
1767         if (growing_buffer) {
1768             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1769             pkt->data = s->pb.buf;
1770             pkt->size = avctx->internal->byte_buffer_size;
1771         }
1772         if (ret < 0)
1773             return -1;
1774
1775         avctx->header_bits = s->header_bits;
1776         avctx->mv_bits     = s->mv_bits;
1777         avctx->misc_bits   = s->misc_bits;
1778         avctx->i_tex_bits  = s->i_tex_bits;
1779         avctx->p_tex_bits  = s->p_tex_bits;
1780         avctx->i_count     = s->i_count;
1781         // FIXME f/b_count in avctx
1782         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1783         avctx->skip_count  = s->skip_count;
1784
1785         frame_end(s);
1786
1787         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1788             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1789
1790         if (avctx->rc_buffer_size) {
1791             RateControlContext *rcc = &s->rc_context;
1792             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1793
1794             if (put_bits_count(&s->pb) > max_size &&
1795                 s->lambda < s->lmax) {
1796                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1797                                        (s->qscale + 1) / s->qscale);
1798                 if (s->adaptive_quant) {
1799                     int i;
1800                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1801                         s->lambda_table[i] =
1802                             FFMAX(s->lambda_table[i] + 1,
1803                                   s->lambda_table[i] * (s->qscale + 1) /
1804                                   s->qscale);
1805                 }
1806                 s->mb_skipped = 0;        // done in frame_start()
1807                 // done in encode_picture() so we must undo it
1808                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1809                     if (s->flipflop_rounding          ||
1810                         s->codec_id == AV_CODEC_ID_H263P ||
1811                         s->codec_id == AV_CODEC_ID_MPEG4)
1812                         s->no_rounding ^= 1;
1813                 }
1814                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1815                     s->time_base       = s->last_time_base;
1816                     s->last_non_b_time = s->time - s->pp_time;
1817                 }
1818                 for (i = 0; i < context_count; i++) {
1819                     PutBitContext *pb = &s->thread_context[i]->pb;
1820                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1821                 }
1822                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1823                 goto vbv_retry;
1824             }
1825
1826             av_assert0(s->avctx->rc_max_rate);
1827         }
1828
1829         if (s->avctx->flags & CODEC_FLAG_PASS1)
1830             ff_write_pass1_stats(s);
1831
1832         for (i = 0; i < 4; i++) {
1833             s->current_picture_ptr->f->error[i] =
1834             s->current_picture.f->error[i] =
1835                 s->current_picture.error[i];
1836             avctx->error[i] += s->current_picture_ptr->f->error[i];
1837         }
1838
1839         if (s->avctx->flags & CODEC_FLAG_PASS1)
1840             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1841                    avctx->i_tex_bits + avctx->p_tex_bits ==
1842                        put_bits_count(&s->pb));
1843         flush_put_bits(&s->pb);
1844         s->frame_bits  = put_bits_count(&s->pb);
1845
1846         stuffing_count = ff_vbv_update(s, s->frame_bits);
1847         s->stuffing_bits = 8*stuffing_count;
1848         if (stuffing_count) {
1849             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1850                     stuffing_count + 50) {
1851                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1852                 return -1;
1853             }
1854
1855             switch (s->codec_id) {
1856             case AV_CODEC_ID_MPEG1VIDEO:
1857             case AV_CODEC_ID_MPEG2VIDEO:
1858                 while (stuffing_count--) {
1859                     put_bits(&s->pb, 8, 0);
1860                 }
1861             break;
1862             case AV_CODEC_ID_MPEG4:
1863                 put_bits(&s->pb, 16, 0);
1864                 put_bits(&s->pb, 16, 0x1C3);
1865                 stuffing_count -= 4;
1866                 while (stuffing_count--) {
1867                     put_bits(&s->pb, 8, 0xFF);
1868                 }
1869             break;
1870             default:
1871                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1872             }
1873             flush_put_bits(&s->pb);
1874             s->frame_bits  = put_bits_count(&s->pb);
1875         }
1876
1877         /* update mpeg1/2 vbv_delay for CBR */
1878         if (s->avctx->rc_max_rate                          &&
1879             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1880             s->out_format == FMT_MPEG1                     &&
1881             90000LL * (avctx->rc_buffer_size - 1) <=
1882                 s->avctx->rc_max_rate * 0xFFFFLL) {
1883             int vbv_delay, min_delay;
1884             double inbits  = s->avctx->rc_max_rate *
1885                              av_q2d(s->avctx->time_base);
1886             int    minbits = s->frame_bits - 8 *
1887                              (s->vbv_delay_ptr - s->pb.buf - 1);
1888             double bits    = s->rc_context.buffer_index + minbits - inbits;
1889
1890             if (bits < 0)
1891                 av_log(s->avctx, AV_LOG_ERROR,
1892                        "Internal error, negative bits\n");
1893
1894             assert(s->repeat_first_field == 0);
1895
1896             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1897             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1898                         s->avctx->rc_max_rate;
1899
1900             vbv_delay = FFMAX(vbv_delay, min_delay);
1901
1902             av_assert0(vbv_delay < 0xFFFF);
1903
1904             s->vbv_delay_ptr[0] &= 0xF8;
1905             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1906             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1907             s->vbv_delay_ptr[2] &= 0x07;
1908             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1909             avctx->vbv_delay     = vbv_delay * 300;
1910         }
1911         s->total_bits     += s->frame_bits;
1912         avctx->frame_bits  = s->frame_bits;
1913
1914         pkt->pts = s->current_picture.f->pts;
1915         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1916             if (!s->current_picture.f->coded_picture_number)
1917                 pkt->dts = pkt->pts - s->dts_delta;
1918             else
1919                 pkt->dts = s->reordered_pts;
1920             s->reordered_pts = pkt->pts;
1921         } else
1922             pkt->dts = pkt->pts;
1923         if (s->current_picture.f->key_frame)
1924             pkt->flags |= AV_PKT_FLAG_KEY;
1925         if (s->mb_info)
1926             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1927     } else {
1928         s->frame_bits = 0;
1929     }
1930
1931     /* release non-reference frames */
1932     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1933         if (!s->picture[i].reference)
1934             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1935     }
1936
1937     av_assert1((s->frame_bits & 7) == 0);
1938
1939     pkt->size = s->frame_bits / 8;
1940     *got_packet = !!pkt->size;
1941     return 0;
1942 }
1943
1944 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1945                                                 int n, int threshold)
1946 {
1947     static const char tab[64] = {
1948         3, 2, 2, 1, 1, 1, 1, 1,
1949         1, 1, 1, 1, 1, 1, 1, 1,
1950         1, 1, 1, 1, 1, 1, 1, 1,
1951         0, 0, 0, 0, 0, 0, 0, 0,
1952         0, 0, 0, 0, 0, 0, 0, 0,
1953         0, 0, 0, 0, 0, 0, 0, 0,
1954         0, 0, 0, 0, 0, 0, 0, 0,
1955         0, 0, 0, 0, 0, 0, 0, 0
1956     };
1957     int score = 0;
1958     int run = 0;
1959     int i;
1960     int16_t *block = s->block[n];
1961     const int last_index = s->block_last_index[n];
1962     int skip_dc;
1963
1964     if (threshold < 0) {
1965         skip_dc = 0;
1966         threshold = -threshold;
1967     } else
1968         skip_dc = 1;
1969
1970     /* Are all we could set to zero already zero? */
1971     if (last_index <= skip_dc - 1)
1972         return;
1973
1974     for (i = 0; i <= last_index; i++) {
1975         const int j = s->intra_scantable.permutated[i];
1976         const int level = FFABS(block[j]);
1977         if (level == 1) {
1978             if (skip_dc && i == 0)
1979                 continue;
1980             score += tab[run];
1981             run = 0;
1982         } else if (level > 1) {
1983             return;
1984         } else {
1985             run++;
1986         }
1987     }
1988     if (score >= threshold)
1989         return;
1990     for (i = skip_dc; i <= last_index; i++) {
1991         const int j = s->intra_scantable.permutated[i];
1992         block[j] = 0;
1993     }
1994     if (block[0])
1995         s->block_last_index[n] = 0;
1996     else
1997         s->block_last_index[n] = -1;
1998 }
1999
2000 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2001                                int last_index)
2002 {
2003     int i;
2004     const int maxlevel = s->max_qcoeff;
2005     const int minlevel = s->min_qcoeff;
2006     int overflow = 0;
2007
2008     if (s->mb_intra) {
2009         i = 1; // skip clipping of intra dc
2010     } else
2011         i = 0;
2012
2013     for (; i <= last_index; i++) {
2014         const int j = s->intra_scantable.permutated[i];
2015         int level = block[j];
2016
2017         if (level > maxlevel) {
2018             level = maxlevel;
2019             overflow++;
2020         } else if (level < minlevel) {
2021             level = minlevel;
2022             overflow++;
2023         }
2024
2025         block[j] = level;
2026     }
2027
2028     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2029         av_log(s->avctx, AV_LOG_INFO,
2030                "warning, clipping %d dct coefficients to %d..%d\n",
2031                overflow, minlevel, maxlevel);
2032 }
2033
2034 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2035 {
2036     int x, y;
2037     // FIXME optimize
2038     for (y = 0; y < 8; y++) {
2039         for (x = 0; x < 8; x++) {
2040             int x2, y2;
2041             int sum = 0;
2042             int sqr = 0;
2043             int count = 0;
2044
2045             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2046                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2047                     int v = ptr[x2 + y2 * stride];
2048                     sum += v;
2049                     sqr += v * v;
2050                     count++;
2051                 }
2052             }
2053             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2054         }
2055     }
2056 }
2057
2058 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2059                                                 int motion_x, int motion_y,
2060                                                 int mb_block_height,
2061                                                 int mb_block_width,
2062                                                 int mb_block_count)
2063 {
2064     int16_t weight[12][64];
2065     int16_t orig[12][64];
2066     const int mb_x = s->mb_x;
2067     const int mb_y = s->mb_y;
2068     int i;
2069     int skip_dct[12];
2070     int dct_offset = s->linesize * 8; // default for progressive frames
2071     int uv_dct_offset = s->uvlinesize * 8;
2072     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2073     ptrdiff_t wrap_y, wrap_c;
2074
2075     for (i = 0; i < mb_block_count; i++)
2076         skip_dct[i] = s->skipdct;
2077
2078     if (s->adaptive_quant) {
2079         const int last_qp = s->qscale;
2080         const int mb_xy = mb_x + mb_y * s->mb_stride;
2081
2082         s->lambda = s->lambda_table[mb_xy];
2083         update_qscale(s);
2084
2085         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2086             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2087             s->dquant = s->qscale - last_qp;
2088
2089             if (s->out_format == FMT_H263) {
2090                 s->dquant = av_clip(s->dquant, -2, 2);
2091
2092                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2093                     if (!s->mb_intra) {
2094                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2095                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2096                                 s->dquant = 0;
2097                         }
2098                         if (s->mv_type == MV_TYPE_8X8)
2099                             s->dquant = 0;
2100                     }
2101                 }
2102             }
2103         }
2104         ff_set_qscale(s, last_qp + s->dquant);
2105     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2106         ff_set_qscale(s, s->qscale + s->dquant);
2107
2108     wrap_y = s->linesize;
2109     wrap_c = s->uvlinesize;
2110     ptr_y  = s->new_picture.f->data[0] +
2111              (mb_y * 16 * wrap_y)              + mb_x * 16;
2112     ptr_cb = s->new_picture.f->data[1] +
2113              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2114     ptr_cr = s->new_picture.f->data[2] +
2115              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2116
2117     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2118         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2119         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2120         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2121         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2122                                  wrap_y, wrap_y,
2123                                  16, 16, mb_x * 16, mb_y * 16,
2124                                  s->width, s->height);
2125         ptr_y = ebuf;
2126         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2127                                  wrap_c, wrap_c,
2128                                  mb_block_width, mb_block_height,
2129                                  mb_x * mb_block_width, mb_y * mb_block_height,
2130                                  cw, ch);
2131         ptr_cb = ebuf + 16 * wrap_y;
2132         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2133                                  wrap_c, wrap_c,
2134                                  mb_block_width, mb_block_height,
2135                                  mb_x * mb_block_width, mb_y * mb_block_height,
2136                                  cw, ch);
2137         ptr_cr = ebuf + 16 * wrap_y + 16;
2138     }
2139
2140     if (s->mb_intra) {
2141         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2142             int progressive_score, interlaced_score;
2143
2144             s->interlaced_dct = 0;
2145             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2146                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2147                                                      NULL, wrap_y, 8) - 400;
2148
2149             if (progressive_score > 0) {
2150                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2151                                                         NULL, wrap_y * 2, 8) +
2152                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2153                                                         NULL, wrap_y * 2, 8);
2154                 if (progressive_score > interlaced_score) {
2155                     s->interlaced_dct = 1;
2156
2157                     dct_offset = wrap_y;
2158                     uv_dct_offset = wrap_c;
2159                     wrap_y <<= 1;
2160                     if (s->chroma_format == CHROMA_422 ||
2161                         s->chroma_format == CHROMA_444)
2162                         wrap_c <<= 1;
2163                 }
2164             }
2165         }
2166
2167         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2168         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2169         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2170         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2171
2172         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2173             skip_dct[4] = 1;
2174             skip_dct[5] = 1;
2175         } else {
2176             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2177             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2178             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2179                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2180                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2181             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2182                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2183                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2184                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2185                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2186                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2187                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2188             }
2189         }
2190     } else {
2191         op_pixels_func (*op_pix)[4];
2192         qpel_mc_func (*op_qpix)[16];
2193         uint8_t *dest_y, *dest_cb, *dest_cr;
2194
2195         dest_y  = s->dest[0];
2196         dest_cb = s->dest[1];
2197         dest_cr = s->dest[2];
2198
2199         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2200             op_pix  = s->hdsp.put_pixels_tab;
2201             op_qpix = s->qdsp.put_qpel_pixels_tab;
2202         } else {
2203             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2204             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2205         }
2206
2207         if (s->mv_dir & MV_DIR_FORWARD) {
2208             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2209                           s->last_picture.f->data,
2210                           op_pix, op_qpix);
2211             op_pix  = s->hdsp.avg_pixels_tab;
2212             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2213         }
2214         if (s->mv_dir & MV_DIR_BACKWARD) {
2215             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2216                           s->next_picture.f->data,
2217                           op_pix, op_qpix);
2218         }
2219
2220         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2221             int progressive_score, interlaced_score;
2222
2223             s->interlaced_dct = 0;
2224             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2225                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2226                                                      ptr_y + wrap_y * 8,
2227                                                      wrap_y, 8) - 400;
2228
2229             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2230                 progressive_score -= 400;
2231
2232             if (progressive_score > 0) {
2233                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2234                                                         wrap_y * 2, 8) +
2235                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2236                                                         ptr_y + wrap_y,
2237                                                         wrap_y * 2, 8);
2238
2239                 if (progressive_score > interlaced_score) {
2240                     s->interlaced_dct = 1;
2241
2242                     dct_offset = wrap_y;
2243                     uv_dct_offset = wrap_c;
2244                     wrap_y <<= 1;
2245                     if (s->chroma_format == CHROMA_422)
2246                         wrap_c <<= 1;
2247                 }
2248             }
2249         }
2250
2251         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2252         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2253         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2254                             dest_y + dct_offset, wrap_y);
2255         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2256                             dest_y + dct_offset + 8, wrap_y);
2257
2258         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2259             skip_dct[4] = 1;
2260             skip_dct[5] = 1;
2261         } else {
2262             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2263             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2264             if (!s->chroma_y_shift) { /* 422 */
2265                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2266                                     dest_cb + uv_dct_offset, wrap_c);
2267                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2268                                     dest_cr + uv_dct_offset, wrap_c);
2269             }
2270         }
2271         /* pre quantization */
2272         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2273                 2 * s->qscale * s->qscale) {
2274             // FIXME optimize
2275             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2276                 skip_dct[0] = 1;
2277             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2278                 skip_dct[1] = 1;
2279             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2280                                wrap_y, 8) < 20 * s->qscale)
2281                 skip_dct[2] = 1;
2282             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2283                                wrap_y, 8) < 20 * s->qscale)
2284                 skip_dct[3] = 1;
2285             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2286                 skip_dct[4] = 1;
2287             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2288                 skip_dct[5] = 1;
2289             if (!s->chroma_y_shift) { /* 422 */
2290                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2291                                    dest_cb + uv_dct_offset,
2292                                    wrap_c, 8) < 20 * s->qscale)
2293                     skip_dct[6] = 1;
2294                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2295                                    dest_cr + uv_dct_offset,
2296                                    wrap_c, 8) < 20 * s->qscale)
2297                     skip_dct[7] = 1;
2298             }
2299         }
2300     }
2301
2302     if (s->quantizer_noise_shaping) {
2303         if (!skip_dct[0])
2304             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2305         if (!skip_dct[1])
2306             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2307         if (!skip_dct[2])
2308             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2309         if (!skip_dct[3])
2310             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2311         if (!skip_dct[4])
2312             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2313         if (!skip_dct[5])
2314             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2315         if (!s->chroma_y_shift) { /* 422 */
2316             if (!skip_dct[6])
2317                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2318                                   wrap_c);
2319             if (!skip_dct[7])
2320                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2321                                   wrap_c);
2322         }
2323         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2324     }
2325
2326     /* DCT & quantize */
2327     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2328     {
2329         for (i = 0; i < mb_block_count; i++) {
2330             if (!skip_dct[i]) {
2331                 int overflow;
2332                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2333                 // FIXME we could decide to change to quantizer instead of
2334                 // clipping
2335                 // JS: I don't think that would be a good idea it could lower
2336                 //     quality instead of improve it. Just INTRADC clipping
2337                 //     deserves changes in quantizer
2338                 if (overflow)
2339                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2340             } else
2341                 s->block_last_index[i] = -1;
2342         }
2343         if (s->quantizer_noise_shaping) {
2344             for (i = 0; i < mb_block_count; i++) {
2345                 if (!skip_dct[i]) {
2346                     s->block_last_index[i] =
2347                         dct_quantize_refine(s, s->block[i], weight[i],
2348                                             orig[i], i, s->qscale);
2349                 }
2350             }
2351         }
2352
2353         if (s->luma_elim_threshold && !s->mb_intra)
2354             for (i = 0; i < 4; i++)
2355                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2356         if (s->chroma_elim_threshold && !s->mb_intra)
2357             for (i = 4; i < mb_block_count; i++)
2358                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2359
2360         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2361             for (i = 0; i < mb_block_count; i++) {
2362                 if (s->block_last_index[i] == -1)
2363                     s->coded_score[i] = INT_MAX / 256;
2364             }
2365         }
2366     }
2367
2368     if ((s->avctx->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2369         s->block_last_index[4] =
2370         s->block_last_index[5] = 0;
2371         s->block[4][0] =
2372         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2373         if (!s->chroma_y_shift) { /* 422 / 444 */
2374             for (i=6; i<12; i++) {
2375                 s->block_last_index[i] = 0;
2376                 s->block[i][0] = s->block[4][0];
2377             }
2378         }
2379     }
2380
2381     // non c quantize code returns incorrect block_last_index FIXME
2382     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2383         for (i = 0; i < mb_block_count; i++) {
2384             int j;
2385             if (s->block_last_index[i] > 0) {
2386                 for (j = 63; j > 0; j--) {
2387                     if (s->block[i][s->intra_scantable.permutated[j]])
2388                         break;
2389                 }
2390                 s->block_last_index[i] = j;
2391             }
2392         }
2393     }
2394
2395     /* huffman encode */
2396     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2397     case AV_CODEC_ID_MPEG1VIDEO:
2398     case AV_CODEC_ID_MPEG2VIDEO:
2399         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2400             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2401         break;
2402     case AV_CODEC_ID_MPEG4:
2403         if (CONFIG_MPEG4_ENCODER)
2404             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2405         break;
2406     case AV_CODEC_ID_MSMPEG4V2:
2407     case AV_CODEC_ID_MSMPEG4V3:
2408     case AV_CODEC_ID_WMV1:
2409         if (CONFIG_MSMPEG4_ENCODER)
2410             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2411         break;
2412     case AV_CODEC_ID_WMV2:
2413         if (CONFIG_WMV2_ENCODER)
2414             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2415         break;
2416     case AV_CODEC_ID_H261:
2417         if (CONFIG_H261_ENCODER)
2418             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2419         break;
2420     case AV_CODEC_ID_H263:
2421     case AV_CODEC_ID_H263P:
2422     case AV_CODEC_ID_FLV1:
2423     case AV_CODEC_ID_RV10:
2424     case AV_CODEC_ID_RV20:
2425         if (CONFIG_H263_ENCODER)
2426             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2427         break;
2428     case AV_CODEC_ID_MJPEG:
2429     case AV_CODEC_ID_AMV:
2430         if (CONFIG_MJPEG_ENCODER)
2431             ff_mjpeg_encode_mb(s, s->block);
2432         break;
2433     default:
2434         av_assert1(0);
2435     }
2436 }
2437
2438 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2439 {
2440     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2441     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2442     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2443 }
2444
2445 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2446     int i;
2447
2448     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2449
2450     /* mpeg1 */
2451     d->mb_skip_run= s->mb_skip_run;
2452     for(i=0; i<3; i++)
2453         d->last_dc[i] = s->last_dc[i];
2454
2455     /* statistics */
2456     d->mv_bits= s->mv_bits;
2457     d->i_tex_bits= s->i_tex_bits;
2458     d->p_tex_bits= s->p_tex_bits;
2459     d->i_count= s->i_count;
2460     d->f_count= s->f_count;
2461     d->b_count= s->b_count;
2462     d->skip_count= s->skip_count;
2463     d->misc_bits= s->misc_bits;
2464     d->last_bits= 0;
2465
2466     d->mb_skipped= 0;
2467     d->qscale= s->qscale;
2468     d->dquant= s->dquant;
2469
2470     d->esc3_level_length= s->esc3_level_length;
2471 }
2472
2473 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2474     int i;
2475
2476     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2477     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2478
2479     /* mpeg1 */
2480     d->mb_skip_run= s->mb_skip_run;
2481     for(i=0; i<3; i++)
2482         d->last_dc[i] = s->last_dc[i];
2483
2484     /* statistics */
2485     d->mv_bits= s->mv_bits;
2486     d->i_tex_bits= s->i_tex_bits;
2487     d->p_tex_bits= s->p_tex_bits;
2488     d->i_count= s->i_count;
2489     d->f_count= s->f_count;
2490     d->b_count= s->b_count;
2491     d->skip_count= s->skip_count;
2492     d->misc_bits= s->misc_bits;
2493
2494     d->mb_intra= s->mb_intra;
2495     d->mb_skipped= s->mb_skipped;
2496     d->mv_type= s->mv_type;
2497     d->mv_dir= s->mv_dir;
2498     d->pb= s->pb;
2499     if(s->data_partitioning){
2500         d->pb2= s->pb2;
2501         d->tex_pb= s->tex_pb;
2502     }
2503     d->block= s->block;
2504     for(i=0; i<8; i++)
2505         d->block_last_index[i]= s->block_last_index[i];
2506     d->interlaced_dct= s->interlaced_dct;
2507     d->qscale= s->qscale;
2508
2509     d->esc3_level_length= s->esc3_level_length;
2510 }
2511
2512 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2513                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2514                            int *dmin, int *next_block, int motion_x, int motion_y)
2515 {
2516     int score;
2517     uint8_t *dest_backup[3];
2518
2519     copy_context_before_encode(s, backup, type);
2520
2521     s->block= s->blocks[*next_block];
2522     s->pb= pb[*next_block];
2523     if(s->data_partitioning){
2524         s->pb2   = pb2   [*next_block];
2525         s->tex_pb= tex_pb[*next_block];
2526     }
2527
2528     if(*next_block){
2529         memcpy(dest_backup, s->dest, sizeof(s->dest));
2530         s->dest[0] = s->sc.rd_scratchpad;
2531         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2532         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2533         av_assert0(s->linesize >= 32); //FIXME
2534     }
2535
2536     encode_mb(s, motion_x, motion_y);
2537
2538     score= put_bits_count(&s->pb);
2539     if(s->data_partitioning){
2540         score+= put_bits_count(&s->pb2);
2541         score+= put_bits_count(&s->tex_pb);
2542     }
2543
2544     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2545         ff_mpv_decode_mb(s, s->block);
2546
2547         score *= s->lambda2;
2548         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2549     }
2550
2551     if(*next_block){
2552         memcpy(s->dest, dest_backup, sizeof(s->dest));
2553     }
2554
2555     if(score<*dmin){
2556         *dmin= score;
2557         *next_block^=1;
2558
2559         copy_context_after_encode(best, s, type);
2560     }
2561 }
2562
2563 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2564     uint32_t *sq = ff_square_tab + 256;
2565     int acc=0;
2566     int x,y;
2567
2568     if(w==16 && h==16)
2569         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2570     else if(w==8 && h==8)
2571         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2572
2573     for(y=0; y<h; y++){
2574         for(x=0; x<w; x++){
2575             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2576         }
2577     }
2578
2579     av_assert2(acc>=0);
2580
2581     return acc;
2582 }
2583
2584 static int sse_mb(MpegEncContext *s){
2585     int w= 16;
2586     int h= 16;
2587
2588     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2589     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2590
2591     if(w==16 && h==16)
2592       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2593         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2594                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2595                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2596       }else{
2597         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2598                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2599                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2600       }
2601     else
2602         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2603                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2604                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2605 }
2606
2607 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2608     MpegEncContext *s= *(void**)arg;
2609
2610
2611     s->me.pre_pass=1;
2612     s->me.dia_size= s->avctx->pre_dia_size;
2613     s->first_slice_line=1;
2614     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2615         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2616             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2617         }
2618         s->first_slice_line=0;
2619     }
2620
2621     s->me.pre_pass=0;
2622
2623     return 0;
2624 }
2625
2626 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2627     MpegEncContext *s= *(void**)arg;
2628
2629     ff_check_alignment();
2630
2631     s->me.dia_size= s->avctx->dia_size;
2632     s->first_slice_line=1;
2633     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2634         s->mb_x=0; //for block init below
2635         ff_init_block_index(s);
2636         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2637             s->block_index[0]+=2;
2638             s->block_index[1]+=2;
2639             s->block_index[2]+=2;
2640             s->block_index[3]+=2;
2641
2642             /* compute motion vector & mb_type and store in context */
2643             if(s->pict_type==AV_PICTURE_TYPE_B)
2644                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2645             else
2646                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2647         }
2648         s->first_slice_line=0;
2649     }
2650     return 0;
2651 }
2652
2653 static int mb_var_thread(AVCodecContext *c, void *arg){
2654     MpegEncContext *s= *(void**)arg;
2655     int mb_x, mb_y;
2656
2657     ff_check_alignment();
2658
2659     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2660         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2661             int xx = mb_x * 16;
2662             int yy = mb_y * 16;
2663             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2664             int varc;
2665             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2666
2667             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2668                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2669
2670             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2671             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2672             s->me.mb_var_sum_temp    += varc;
2673         }
2674     }
2675     return 0;
2676 }
2677
2678 static void write_slice_end(MpegEncContext *s){
2679     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2680         if(s->partitioned_frame){
2681             ff_mpeg4_merge_partitions(s);
2682         }
2683
2684         ff_mpeg4_stuffing(&s->pb);
2685     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2686         ff_mjpeg_encode_stuffing(s);
2687     }
2688
2689     avpriv_align_put_bits(&s->pb);
2690     flush_put_bits(&s->pb);
2691
2692     if ((s->avctx->flags & CODEC_FLAG_PASS1) && !s->partitioned_frame)
2693         s->misc_bits+= get_bits_diff(s);
2694 }
2695
2696 static void write_mb_info(MpegEncContext *s)
2697 {
2698     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2699     int offset = put_bits_count(&s->pb);
2700     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2701     int gobn = s->mb_y / s->gob_index;
2702     int pred_x, pred_y;
2703     if (CONFIG_H263_ENCODER)
2704         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2705     bytestream_put_le32(&ptr, offset);
2706     bytestream_put_byte(&ptr, s->qscale);
2707     bytestream_put_byte(&ptr, gobn);
2708     bytestream_put_le16(&ptr, mba);
2709     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2710     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2711     /* 4MV not implemented */
2712     bytestream_put_byte(&ptr, 0); /* hmv2 */
2713     bytestream_put_byte(&ptr, 0); /* vmv2 */
2714 }
2715
2716 static void update_mb_info(MpegEncContext *s, int startcode)
2717 {
2718     if (!s->mb_info)
2719         return;
2720     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2721         s->mb_info_size += 12;
2722         s->prev_mb_info = s->last_mb_info;
2723     }
2724     if (startcode) {
2725         s->prev_mb_info = put_bits_count(&s->pb)/8;
2726         /* This might have incremented mb_info_size above, and we return without
2727          * actually writing any info into that slot yet. But in that case,
2728          * this will be called again at the start of the after writing the
2729          * start code, actually writing the mb info. */
2730         return;
2731     }
2732
2733     s->last_mb_info = put_bits_count(&s->pb)/8;
2734     if (!s->mb_info_size)
2735         s->mb_info_size += 12;
2736     write_mb_info(s);
2737 }
2738
2739 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2740 {
2741     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2742         && s->slice_context_count == 1
2743         && s->pb.buf == s->avctx->internal->byte_buffer) {
2744         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2745         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2746
2747         uint8_t *new_buffer = NULL;
2748         int new_buffer_size = 0;
2749
2750         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2751                               s->avctx->internal->byte_buffer_size + size_increase);
2752         if (!new_buffer)
2753             return AVERROR(ENOMEM);
2754
2755         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2756         av_free(s->avctx->internal->byte_buffer);
2757         s->avctx->internal->byte_buffer      = new_buffer;
2758         s->avctx->internal->byte_buffer_size = new_buffer_size;
2759         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2760         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2761         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2762     }
2763     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2764         return AVERROR(EINVAL);
2765     return 0;
2766 }
2767
2768 static int encode_thread(AVCodecContext *c, void *arg){
2769     MpegEncContext *s= *(void**)arg;
2770     int mb_x, mb_y, pdif = 0;
2771     int chr_h= 16>>s->chroma_y_shift;
2772     int i, j;
2773     MpegEncContext best_s = { 0 }, backup_s;
2774     uint8_t bit_buf[2][MAX_MB_BYTES];
2775     uint8_t bit_buf2[2][MAX_MB_BYTES];
2776     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2777     PutBitContext pb[2], pb2[2], tex_pb[2];
2778
2779     ff_check_alignment();
2780
2781     for(i=0; i<2; i++){
2782         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2783         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2784         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2785     }
2786
2787     s->last_bits= put_bits_count(&s->pb);
2788     s->mv_bits=0;
2789     s->misc_bits=0;
2790     s->i_tex_bits=0;
2791     s->p_tex_bits=0;
2792     s->i_count=0;
2793     s->f_count=0;
2794     s->b_count=0;
2795     s->skip_count=0;
2796
2797     for(i=0; i<3; i++){
2798         /* init last dc values */
2799         /* note: quant matrix value (8) is implied here */
2800         s->last_dc[i] = 128 << s->intra_dc_precision;
2801
2802         s->current_picture.error[i] = 0;
2803     }
2804     if(s->codec_id==AV_CODEC_ID_AMV){
2805         s->last_dc[0] = 128*8/13;
2806         s->last_dc[1] = 128*8/14;
2807         s->last_dc[2] = 128*8/14;
2808     }
2809     s->mb_skip_run = 0;
2810     memset(s->last_mv, 0, sizeof(s->last_mv));
2811
2812     s->last_mv_dir = 0;
2813
2814     switch(s->codec_id){
2815     case AV_CODEC_ID_H263:
2816     case AV_CODEC_ID_H263P:
2817     case AV_CODEC_ID_FLV1:
2818         if (CONFIG_H263_ENCODER)
2819             s->gob_index = H263_GOB_HEIGHT(s->height);
2820         break;
2821     case AV_CODEC_ID_MPEG4:
2822         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2823             ff_mpeg4_init_partitions(s);
2824         break;
2825     }
2826
2827     s->resync_mb_x=0;
2828     s->resync_mb_y=0;
2829     s->first_slice_line = 1;
2830     s->ptr_lastgob = s->pb.buf;
2831     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2832         s->mb_x=0;
2833         s->mb_y= mb_y;
2834
2835         ff_set_qscale(s, s->qscale);
2836         ff_init_block_index(s);
2837
2838         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2839             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2840             int mb_type= s->mb_type[xy];
2841 //            int d;
2842             int dmin= INT_MAX;
2843             int dir;
2844             int size_increase =  s->avctx->internal->byte_buffer_size/4
2845                                + s->mb_width*MAX_MB_BYTES;
2846
2847             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2848             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2849                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2850                 return -1;
2851             }
2852             if(s->data_partitioning){
2853                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2854                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2855                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2856                     return -1;
2857                 }
2858             }
2859
2860             s->mb_x = mb_x;
2861             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2862             ff_update_block_index(s);
2863
2864             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2865                 ff_h261_reorder_mb_index(s);
2866                 xy= s->mb_y*s->mb_stride + s->mb_x;
2867                 mb_type= s->mb_type[xy];
2868             }
2869
2870             /* write gob / video packet header  */
2871             if(s->rtp_mode){
2872                 int current_packet_size, is_gob_start;
2873
2874                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2875
2876                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2877
2878                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2879
2880                 switch(s->codec_id){
2881                 case AV_CODEC_ID_H263:
2882                 case AV_CODEC_ID_H263P:
2883                     if(!s->h263_slice_structured)
2884                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2885                     break;
2886                 case AV_CODEC_ID_MPEG2VIDEO:
2887                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2888                 case AV_CODEC_ID_MPEG1VIDEO:
2889                     if(s->mb_skip_run) is_gob_start=0;
2890                     break;
2891                 case AV_CODEC_ID_MJPEG:
2892                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2893                     break;
2894                 }
2895
2896                 if(is_gob_start){
2897                     if(s->start_mb_y != mb_y || mb_x!=0){
2898                         write_slice_end(s);
2899
2900                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2901                             ff_mpeg4_init_partitions(s);
2902                         }
2903                     }
2904
2905                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2906                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2907
2908                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2909                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2910                         int d = 100 / s->error_rate;
2911                         if(r % d == 0){
2912                             current_packet_size=0;
2913                             s->pb.buf_ptr= s->ptr_lastgob;
2914                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2915                         }
2916                     }
2917
2918                     if (s->avctx->rtp_callback){
2919                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2920                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2921                     }
2922                     update_mb_info(s, 1);
2923
2924                     switch(s->codec_id){
2925                     case AV_CODEC_ID_MPEG4:
2926                         if (CONFIG_MPEG4_ENCODER) {
2927                             ff_mpeg4_encode_video_packet_header(s);
2928                             ff_mpeg4_clean_buffers(s);
2929                         }
2930                     break;
2931                     case AV_CODEC_ID_MPEG1VIDEO:
2932                     case AV_CODEC_ID_MPEG2VIDEO:
2933                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2934                             ff_mpeg1_encode_slice_header(s);
2935                             ff_mpeg1_clean_buffers(s);
2936                         }
2937                     break;
2938                     case AV_CODEC_ID_H263:
2939                     case AV_CODEC_ID_H263P:
2940                         if (CONFIG_H263_ENCODER)
2941                             ff_h263_encode_gob_header(s, mb_y);
2942                     break;
2943                     }
2944
2945                     if (s->avctx->flags & CODEC_FLAG_PASS1) {
2946                         int bits= put_bits_count(&s->pb);
2947                         s->misc_bits+= bits - s->last_bits;
2948                         s->last_bits= bits;
2949                     }
2950
2951                     s->ptr_lastgob += current_packet_size;
2952                     s->first_slice_line=1;
2953                     s->resync_mb_x=mb_x;
2954                     s->resync_mb_y=mb_y;
2955                 }
2956             }
2957
2958             if(  (s->resync_mb_x   == s->mb_x)
2959                && s->resync_mb_y+1 == s->mb_y){
2960                 s->first_slice_line=0;
2961             }
2962
2963             s->mb_skipped=0;
2964             s->dquant=0; //only for QP_RD
2965
2966             update_mb_info(s, 0);
2967
2968             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2969                 int next_block=0;
2970                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2971
2972                 copy_context_before_encode(&backup_s, s, -1);
2973                 backup_s.pb= s->pb;
2974                 best_s.data_partitioning= s->data_partitioning;
2975                 best_s.partitioned_frame= s->partitioned_frame;
2976                 if(s->data_partitioning){
2977                     backup_s.pb2= s->pb2;
2978                     backup_s.tex_pb= s->tex_pb;
2979                 }
2980
2981                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2982                     s->mv_dir = MV_DIR_FORWARD;
2983                     s->mv_type = MV_TYPE_16X16;
2984                     s->mb_intra= 0;
2985                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2986                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2987                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2988                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2989                 }
2990                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2991                     s->mv_dir = MV_DIR_FORWARD;
2992                     s->mv_type = MV_TYPE_FIELD;
2993                     s->mb_intra= 0;
2994                     for(i=0; i<2; i++){
2995                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2996                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2997                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2998                     }
2999                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3000                                  &dmin, &next_block, 0, 0);
3001                 }
3002                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3003                     s->mv_dir = MV_DIR_FORWARD;
3004                     s->mv_type = MV_TYPE_16X16;
3005                     s->mb_intra= 0;
3006                     s->mv[0][0][0] = 0;
3007                     s->mv[0][0][1] = 0;
3008                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3009                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3010                 }
3011                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3012                     s->mv_dir = MV_DIR_FORWARD;
3013                     s->mv_type = MV_TYPE_8X8;
3014                     s->mb_intra= 0;
3015                     for(i=0; i<4; i++){
3016                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3017                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3018                     }
3019                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3020                                  &dmin, &next_block, 0, 0);
3021                 }
3022                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3023                     s->mv_dir = MV_DIR_FORWARD;
3024                     s->mv_type = MV_TYPE_16X16;
3025                     s->mb_intra= 0;
3026                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3027                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3028                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3029                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3030                 }
3031                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3032                     s->mv_dir = MV_DIR_BACKWARD;
3033                     s->mv_type = MV_TYPE_16X16;
3034                     s->mb_intra= 0;
3035                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3036                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3037                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3038                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3039                 }
3040                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3041                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3042                     s->mv_type = MV_TYPE_16X16;
3043                     s->mb_intra= 0;
3044                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3045                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3046                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3047                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3048                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3049                                  &dmin, &next_block, 0, 0);
3050                 }
3051                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3052                     s->mv_dir = MV_DIR_FORWARD;
3053                     s->mv_type = MV_TYPE_FIELD;
3054                     s->mb_intra= 0;
3055                     for(i=0; i<2; i++){
3056                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3057                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3058                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3059                     }
3060                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3061                                  &dmin, &next_block, 0, 0);
3062                 }
3063                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3064                     s->mv_dir = MV_DIR_BACKWARD;
3065                     s->mv_type = MV_TYPE_FIELD;
3066                     s->mb_intra= 0;
3067                     for(i=0; i<2; i++){
3068                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3069                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3070                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3071                     }
3072                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3073                                  &dmin, &next_block, 0, 0);
3074                 }
3075                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3076                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3077                     s->mv_type = MV_TYPE_FIELD;
3078                     s->mb_intra= 0;
3079                     for(dir=0; dir<2; dir++){
3080                         for(i=0; i<2; i++){
3081                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3082                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3083                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3084                         }
3085                     }
3086                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3087                                  &dmin, &next_block, 0, 0);
3088                 }
3089                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3090                     s->mv_dir = 0;
3091                     s->mv_type = MV_TYPE_16X16;
3092                     s->mb_intra= 1;
3093                     s->mv[0][0][0] = 0;
3094                     s->mv[0][0][1] = 0;
3095                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3096                                  &dmin, &next_block, 0, 0);
3097                     if(s->h263_pred || s->h263_aic){
3098                         if(best_s.mb_intra)
3099                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3100                         else
3101                             ff_clean_intra_table_entries(s); //old mode?
3102                     }
3103                 }
3104
3105                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3106                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3107                         const int last_qp= backup_s.qscale;
3108                         int qpi, qp, dc[6];
3109                         int16_t ac[6][16];
3110                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3111                         static const int dquant_tab[4]={-1,1,-2,2};
3112                         int storecoefs = s->mb_intra && s->dc_val[0];
3113
3114                         av_assert2(backup_s.dquant == 0);
3115
3116                         //FIXME intra
3117                         s->mv_dir= best_s.mv_dir;
3118                         s->mv_type = MV_TYPE_16X16;
3119                         s->mb_intra= best_s.mb_intra;
3120                         s->mv[0][0][0] = best_s.mv[0][0][0];
3121                         s->mv[0][0][1] = best_s.mv[0][0][1];
3122                         s->mv[1][0][0] = best_s.mv[1][0][0];
3123                         s->mv[1][0][1] = best_s.mv[1][0][1];
3124
3125                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3126                         for(; qpi<4; qpi++){
3127                             int dquant= dquant_tab[qpi];
3128                             qp= last_qp + dquant;
3129                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3130                                 continue;
3131                             backup_s.dquant= dquant;
3132                             if(storecoefs){
3133                                 for(i=0; i<6; i++){
3134                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3135                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3136                                 }
3137                             }
3138
3139                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3140                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3141                             if(best_s.qscale != qp){
3142                                 if(storecoefs){
3143                                     for(i=0; i<6; i++){
3144                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3145                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3146                                     }
3147                                 }
3148                             }
3149                         }
3150                     }
3151                 }
3152                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3153                     int mx= s->b_direct_mv_table[xy][0];
3154                     int my= s->b_direct_mv_table[xy][1];
3155
3156                     backup_s.dquant = 0;
3157                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3158                     s->mb_intra= 0;
3159                     ff_mpeg4_set_direct_mv(s, mx, my);
3160                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3161                                  &dmin, &next_block, mx, my);
3162                 }
3163                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3164                     backup_s.dquant = 0;
3165                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3166                     s->mb_intra= 0;
3167                     ff_mpeg4_set_direct_mv(s, 0, 0);
3168                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3169                                  &dmin, &next_block, 0, 0);
3170                 }
3171                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3172                     int coded=0;
3173                     for(i=0; i<6; i++)
3174                         coded |= s->block_last_index[i];
3175                     if(coded){
3176                         int mx,my;
3177                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3178                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3179                             mx=my=0; //FIXME find the one we actually used
3180                             ff_mpeg4_set_direct_mv(s, mx, my);
3181                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3182                             mx= s->mv[1][0][0];
3183                             my= s->mv[1][0][1];
3184                         }else{
3185                             mx= s->mv[0][0][0];
3186                             my= s->mv[0][0][1];
3187                         }
3188
3189                         s->mv_dir= best_s.mv_dir;
3190                         s->mv_type = best_s.mv_type;
3191                         s->mb_intra= 0;
3192 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3193                         s->mv[0][0][1] = best_s.mv[0][0][1];
3194                         s->mv[1][0][0] = best_s.mv[1][0][0];
3195                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3196                         backup_s.dquant= 0;
3197                         s->skipdct=1;
3198                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3199                                         &dmin, &next_block, mx, my);
3200                         s->skipdct=0;
3201                     }
3202                 }
3203
3204                 s->current_picture.qscale_table[xy] = best_s.qscale;
3205
3206                 copy_context_after_encode(s, &best_s, -1);
3207
3208                 pb_bits_count= put_bits_count(&s->pb);
3209                 flush_put_bits(&s->pb);
3210                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3211                 s->pb= backup_s.pb;
3212
3213                 if(s->data_partitioning){
3214                     pb2_bits_count= put_bits_count(&s->pb2);
3215                     flush_put_bits(&s->pb2);
3216                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3217                     s->pb2= backup_s.pb2;
3218
3219                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3220                     flush_put_bits(&s->tex_pb);
3221                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3222                     s->tex_pb= backup_s.tex_pb;
3223                 }
3224                 s->last_bits= put_bits_count(&s->pb);
3225
3226                 if (CONFIG_H263_ENCODER &&
3227                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3228                     ff_h263_update_motion_val(s);
3229
3230                 if(next_block==0){ //FIXME 16 vs linesize16
3231                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3232                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3233                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3234                 }
3235
3236                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3237                     ff_mpv_decode_mb(s, s->block);
3238             } else {
3239                 int motion_x = 0, motion_y = 0;
3240                 s->mv_type=MV_TYPE_16X16;
3241                 // only one MB-Type possible
3242
3243                 switch(mb_type){
3244                 case CANDIDATE_MB_TYPE_INTRA:
3245                     s->mv_dir = 0;
3246                     s->mb_intra= 1;
3247                     motion_x= s->mv[0][0][0] = 0;
3248                     motion_y= s->mv[0][0][1] = 0;
3249                     break;
3250                 case CANDIDATE_MB_TYPE_INTER:
3251                     s->mv_dir = MV_DIR_FORWARD;
3252                     s->mb_intra= 0;
3253                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3254                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3255                     break;
3256                 case CANDIDATE_MB_TYPE_INTER_I:
3257                     s->mv_dir = MV_DIR_FORWARD;
3258                     s->mv_type = MV_TYPE_FIELD;
3259                     s->mb_intra= 0;
3260                     for(i=0; i<2; i++){
3261                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3262                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3263                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3264                     }
3265                     break;
3266                 case CANDIDATE_MB_TYPE_INTER4V:
3267                     s->mv_dir = MV_DIR_FORWARD;
3268                     s->mv_type = MV_TYPE_8X8;
3269                     s->mb_intra= 0;
3270                     for(i=0; i<4; i++){
3271                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3272                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3273                     }
3274                     break;
3275                 case CANDIDATE_MB_TYPE_DIRECT:
3276                     if (CONFIG_MPEG4_ENCODER) {
3277                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3278                         s->mb_intra= 0;
3279                         motion_x=s->b_direct_mv_table[xy][0];
3280                         motion_y=s->b_direct_mv_table[xy][1];
3281                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3282                     }
3283                     break;
3284                 case CANDIDATE_MB_TYPE_DIRECT0:
3285                     if (CONFIG_MPEG4_ENCODER) {
3286                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3287                         s->mb_intra= 0;
3288                         ff_mpeg4_set_direct_mv(s, 0, 0);
3289                     }
3290                     break;
3291                 case CANDIDATE_MB_TYPE_BIDIR:
3292                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3293                     s->mb_intra= 0;
3294                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3295                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3296                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3297                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3298                     break;
3299                 case CANDIDATE_MB_TYPE_BACKWARD:
3300                     s->mv_dir = MV_DIR_BACKWARD;
3301                     s->mb_intra= 0;
3302                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3303                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3304                     break;
3305                 case CANDIDATE_MB_TYPE_FORWARD:
3306                     s->mv_dir = MV_DIR_FORWARD;
3307                     s->mb_intra= 0;
3308                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3309                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3310                     break;
3311                 case CANDIDATE_MB_TYPE_FORWARD_I:
3312                     s->mv_dir = MV_DIR_FORWARD;
3313                     s->mv_type = MV_TYPE_FIELD;
3314                     s->mb_intra= 0;
3315                     for(i=0; i<2; i++){
3316                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3317                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3318                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3319                     }
3320                     break;
3321                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3322                     s->mv_dir = MV_DIR_BACKWARD;
3323                     s->mv_type = MV_TYPE_FIELD;
3324                     s->mb_intra= 0;
3325                     for(i=0; i<2; i++){
3326                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3327                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3328                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3329                     }
3330                     break;
3331                 case CANDIDATE_MB_TYPE_BIDIR_I:
3332                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3333                     s->mv_type = MV_TYPE_FIELD;
3334                     s->mb_intra= 0;
3335                     for(dir=0; dir<2; dir++){
3336                         for(i=0; i<2; i++){
3337                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3338                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3339                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3340                         }
3341                     }
3342                     break;
3343                 default:
3344                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3345                 }
3346
3347                 encode_mb(s, motion_x, motion_y);
3348
3349                 // RAL: Update last macroblock type
3350                 s->last_mv_dir = s->mv_dir;
3351
3352                 if (CONFIG_H263_ENCODER &&
3353                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3354                     ff_h263_update_motion_val(s);
3355
3356                 ff_mpv_decode_mb(s, s->block);
3357             }
3358
3359             /* clean the MV table in IPS frames for direct mode in B frames */
3360             if(s->mb_intra /* && I,P,S_TYPE */){
3361                 s->p_mv_table[xy][0]=0;
3362                 s->p_mv_table[xy][1]=0;
3363             }
3364
3365             if (s->avctx->flags & CODEC_FLAG_PSNR) {
3366                 int w= 16;
3367                 int h= 16;
3368
3369                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3370                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3371
3372                 s->current_picture.error[0] += sse(
3373                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3374                     s->dest[0], w, h, s->linesize);
3375                 s->current_picture.error[1] += sse(
3376                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3377                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3378                 s->current_picture.error[2] += sse(
3379                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3380                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3381             }
3382             if(s->loop_filter){
3383                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3384                     ff_h263_loop_filter(s);
3385             }
3386             ff_dlog(s->avctx, "MB %d %d bits\n",
3387                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3388         }
3389     }
3390
3391     //not beautiful here but we must write it before flushing so it has to be here
3392     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3393         ff_msmpeg4_encode_ext_header(s);
3394
3395     write_slice_end(s);
3396
3397     /* Send the last GOB if RTP */
3398     if (s->avctx->rtp_callback) {
3399         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3400         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3401         /* Call the RTP callback to send the last GOB */
3402         emms_c();
3403         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3404     }
3405
3406     return 0;
3407 }
3408
3409 #define MERGE(field) dst->field += src->field; src->field=0
3410 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3411     MERGE(me.scene_change_score);
3412     MERGE(me.mc_mb_var_sum_temp);
3413     MERGE(me.mb_var_sum_temp);
3414 }
3415
3416 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3417     int i;
3418
3419     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3420     MERGE(dct_count[1]);
3421     MERGE(mv_bits);
3422     MERGE(i_tex_bits);
3423     MERGE(p_tex_bits);
3424     MERGE(i_count);
3425     MERGE(f_count);
3426     MERGE(b_count);
3427     MERGE(skip_count);
3428     MERGE(misc_bits);
3429     MERGE(er.error_count);
3430     MERGE(padding_bug_score);
3431     MERGE(current_picture.error[0]);
3432     MERGE(current_picture.error[1]);
3433     MERGE(current_picture.error[2]);
3434
3435     if(dst->avctx->noise_reduction){
3436         for(i=0; i<64; i++){
3437             MERGE(dct_error_sum[0][i]);
3438             MERGE(dct_error_sum[1][i]);
3439         }
3440     }
3441
3442     assert(put_bits_count(&src->pb) % 8 ==0);
3443     assert(put_bits_count(&dst->pb) % 8 ==0);
3444     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3445     flush_put_bits(&dst->pb);
3446 }
3447
3448 static int estimate_qp(MpegEncContext *s, int dry_run){
3449     if (s->next_lambda){
3450         s->current_picture_ptr->f->quality =
3451         s->current_picture.f->quality = s->next_lambda;
3452         if(!dry_run) s->next_lambda= 0;
3453     } else if (!s->fixed_qscale) {
3454         s->current_picture_ptr->f->quality =
3455         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3456         if (s->current_picture.f->quality < 0)
3457             return -1;
3458     }
3459
3460     if(s->adaptive_quant){
3461         switch(s->codec_id){
3462         case AV_CODEC_ID_MPEG4:
3463             if (CONFIG_MPEG4_ENCODER)
3464                 ff_clean_mpeg4_qscales(s);
3465             break;
3466         case AV_CODEC_ID_H263:
3467         case AV_CODEC_ID_H263P:
3468         case AV_CODEC_ID_FLV1:
3469             if (CONFIG_H263_ENCODER)
3470                 ff_clean_h263_qscales(s);
3471             break;
3472         default:
3473             ff_init_qscale_tab(s);
3474         }
3475
3476         s->lambda= s->lambda_table[0];
3477         //FIXME broken
3478     }else
3479         s->lambda = s->current_picture.f->quality;
3480     update_qscale(s);
3481     return 0;
3482 }
3483
3484 /* must be called before writing the header */
3485 static void set_frame_distances(MpegEncContext * s){
3486     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3487     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3488
3489     if(s->pict_type==AV_PICTURE_TYPE_B){
3490         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3491         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3492     }else{
3493         s->pp_time= s->time - s->last_non_b_time;
3494         s->last_non_b_time= s->time;
3495         assert(s->picture_number==0 || s->pp_time > 0);
3496     }
3497 }
3498
3499 static int encode_picture(MpegEncContext *s, int picture_number)
3500 {
3501     int i, ret;
3502     int bits;
3503     int context_count = s->slice_context_count;
3504
3505     s->picture_number = picture_number;
3506
3507     /* Reset the average MB variance */
3508     s->me.mb_var_sum_temp    =
3509     s->me.mc_mb_var_sum_temp = 0;
3510
3511     /* we need to initialize some time vars before we can encode b-frames */
3512     // RAL: Condition added for MPEG1VIDEO
3513     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3514         set_frame_distances(s);
3515     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3516         ff_set_mpeg4_time(s);
3517
3518     s->me.scene_change_score=0;
3519
3520 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3521
3522     if(s->pict_type==AV_PICTURE_TYPE_I){
3523         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3524         else                        s->no_rounding=0;
3525     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3526         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3527             s->no_rounding ^= 1;
3528     }
3529
3530     if (s->avctx->flags & CODEC_FLAG_PASS2) {
3531         if (estimate_qp(s,1) < 0)
3532             return -1;
3533         ff_get_2pass_fcode(s);
3534     } else if (!(s->avctx->flags & CODEC_FLAG_QSCALE)) {
3535         if(s->pict_type==AV_PICTURE_TYPE_B)
3536             s->lambda= s->last_lambda_for[s->pict_type];
3537         else
3538             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3539         update_qscale(s);
3540     }
3541
3542     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3543         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3544         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3545         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3546         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3547     }
3548
3549     s->mb_intra=0; //for the rate distortion & bit compare functions
3550     for(i=1; i<context_count; i++){
3551         ret = ff_update_duplicate_context(s->thread_context[i], s);
3552         if (ret < 0)
3553             return ret;
3554     }
3555
3556     if(ff_init_me(s)<0)
3557         return -1;
3558
3559     /* Estimate motion for every MB */
3560     if(s->pict_type != AV_PICTURE_TYPE_I){
3561         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3562         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3563         if (s->pict_type != AV_PICTURE_TYPE_B) {
3564             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3565                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3566             }
3567         }
3568
3569         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3570     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3571         /* I-Frame */
3572         for(i=0; i<s->mb_stride*s->mb_height; i++)
3573             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3574
3575         if(!s->fixed_qscale){
3576             /* finding spatial complexity for I-frame rate control */
3577             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3578         }
3579     }
3580     for(i=1; i<context_count; i++){
3581         merge_context_after_me(s, s->thread_context[i]);
3582     }
3583     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3584     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3585     emms_c();
3586
3587     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3588         s->pict_type= AV_PICTURE_TYPE_I;
3589         for(i=0; i<s->mb_stride*s->mb_height; i++)
3590             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3591         if(s->msmpeg4_version >= 3)
3592             s->no_rounding=1;
3593         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3594                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3595     }
3596
3597     if(!s->umvplus){
3598         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3599             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3600
3601             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3602                 int a,b;
3603                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3604                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3605                 s->f_code= FFMAX3(s->f_code, a, b);
3606             }
3607
3608             ff_fix_long_p_mvs(s);
3609             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3610             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3611                 int j;
3612                 for(i=0; i<2; i++){
3613                     for(j=0; j<2; j++)
3614                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3615                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3616                 }
3617             }
3618         }
3619
3620         if(s->pict_type==AV_PICTURE_TYPE_B){
3621             int a, b;
3622
3623             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3624             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3625             s->f_code = FFMAX(a, b);
3626
3627             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3628             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3629             s->b_code = FFMAX(a, b);
3630
3631             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3632             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3633             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3634             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3635             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3636                 int dir, j;
3637                 for(dir=0; dir<2; dir++){
3638                     for(i=0; i<2; i++){
3639                         for(j=0; j<2; j++){
3640                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3641                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3642                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3643                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3644                         }
3645                     }
3646                 }
3647             }
3648         }
3649     }
3650
3651     if (estimate_qp(s, 0) < 0)
3652         return -1;
3653
3654     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3655         s->pict_type == AV_PICTURE_TYPE_I &&
3656         !(s->avctx->flags & CODEC_FLAG_QSCALE))
3657         s->qscale= 3; //reduce clipping problems
3658
3659     if (s->out_format == FMT_MJPEG) {
3660         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3661         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3662
3663         if (s->avctx->intra_matrix) {
3664             chroma_matrix =
3665             luma_matrix = s->avctx->intra_matrix;
3666         }
3667         if (s->avctx->chroma_intra_matrix)
3668             chroma_matrix = s->avctx->chroma_intra_matrix;
3669
3670         /* for mjpeg, we do include qscale in the matrix */
3671         for(i=1;i<64;i++){
3672             int j = s->idsp.idct_permutation[i];
3673
3674             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3675             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3676         }
3677         s->y_dc_scale_table=
3678         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3679         s->chroma_intra_matrix[0] =
3680         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3681         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3682                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3683         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3684                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3685         s->qscale= 8;
3686     }
3687     if(s->codec_id == AV_CODEC_ID_AMV){
3688         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3689         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3690         for(i=1;i<64;i++){
3691             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3692
3693             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3694             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3695         }
3696         s->y_dc_scale_table= y;
3697         s->c_dc_scale_table= c;
3698         s->intra_matrix[0] = 13;
3699         s->chroma_intra_matrix[0] = 14;
3700         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3701                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3702         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3703                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3704         s->qscale= 8;
3705     }
3706
3707     //FIXME var duplication
3708     s->current_picture_ptr->f->key_frame =
3709     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3710     s->current_picture_ptr->f->pict_type =
3711     s->current_picture.f->pict_type = s->pict_type;
3712
3713     if (s->current_picture.f->key_frame)
3714         s->picture_in_gop_number=0;
3715
3716     s->mb_x = s->mb_y = 0;
3717     s->last_bits= put_bits_count(&s->pb);
3718     switch(s->out_format) {
3719     case FMT_MJPEG:
3720         if (CONFIG_MJPEG_ENCODER)
3721             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3722                                            s->intra_matrix, s->chroma_intra_matrix);
3723         break;
3724     case FMT_H261:
3725         if (CONFIG_H261_ENCODER)
3726             ff_h261_encode_picture_header(s, picture_number);
3727         break;
3728     case FMT_H263:
3729         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3730             ff_wmv2_encode_picture_header(s, picture_number);
3731         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3732             ff_msmpeg4_encode_picture_header(s, picture_number);
3733         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3734             ff_mpeg4_encode_picture_header(s, picture_number);
3735         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3736             ret = ff_rv10_encode_picture_header(s, picture_number);
3737             if (ret < 0)
3738                 return ret;
3739         }
3740         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3741             ff_rv20_encode_picture_header(s, picture_number);
3742         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3743             ff_flv_encode_picture_header(s, picture_number);
3744         else if (CONFIG_H263_ENCODER)
3745             ff_h263_encode_picture_header(s, picture_number);
3746         break;
3747     case FMT_MPEG1:
3748         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3749             ff_mpeg1_encode_picture_header(s, picture_number);
3750         break;
3751     default:
3752         av_assert0(0);
3753     }
3754     bits= put_bits_count(&s->pb);
3755     s->header_bits= bits - s->last_bits;
3756
3757     for(i=1; i<context_count; i++){
3758         update_duplicate_context_after_me(s->thread_context[i], s);
3759     }
3760     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3761     for(i=1; i<context_count; i++){
3762         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3763             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3764         merge_context_after_encode(s, s->thread_context[i]);
3765     }
3766     emms_c();
3767     return 0;
3768 }
3769
3770 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3771     const int intra= s->mb_intra;
3772     int i;
3773
3774     s->dct_count[intra]++;
3775
3776     for(i=0; i<64; i++){
3777         int level= block[i];
3778
3779         if(level){
3780             if(level>0){
3781                 s->dct_error_sum[intra][i] += level;
3782                 level -= s->dct_offset[intra][i];
3783                 if(level<0) level=0;
3784             }else{
3785                 s->dct_error_sum[intra][i] -= level;
3786                 level += s->dct_offset[intra][i];
3787                 if(level>0) level=0;
3788             }
3789             block[i]= level;
3790         }
3791     }
3792 }
3793
3794 static int dct_quantize_trellis_c(MpegEncContext *s,
3795                                   int16_t *block, int n,
3796                                   int qscale, int *overflow){
3797     const int *qmat;
3798     const uint16_t *matrix;
3799     const uint8_t *scantable= s->intra_scantable.scantable;
3800     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3801     int max=0;
3802     unsigned int threshold1, threshold2;
3803     int bias=0;
3804     int run_tab[65];
3805     int level_tab[65];
3806     int score_tab[65];
3807     int survivor[65];
3808     int survivor_count;
3809     int last_run=0;
3810     int last_level=0;
3811     int last_score= 0;
3812     int last_i;
3813     int coeff[2][64];
3814     int coeff_count[64];
3815     int qmul, qadd, start_i, last_non_zero, i, dc;
3816     const int esc_length= s->ac_esc_length;
3817     uint8_t * length;
3818     uint8_t * last_length;
3819     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3820
3821     s->fdsp.fdct(block);
3822
3823     if(s->dct_error_sum)
3824         s->denoise_dct(s, block);
3825     qmul= qscale*16;
3826     qadd= ((qscale-1)|1)*8;
3827
3828     if (s->mb_intra) {
3829         int q;
3830         if (!s->h263_aic) {
3831             if (n < 4)
3832                 q = s->y_dc_scale;
3833             else
3834                 q = s->c_dc_scale;
3835             q = q << 3;
3836         } else{
3837             /* For AIC we skip quant/dequant of INTRADC */
3838             q = 1 << 3;
3839             qadd=0;
3840         }
3841
3842         /* note: block[0] is assumed to be positive */
3843         block[0] = (block[0] + (q >> 1)) / q;
3844         start_i = 1;
3845         last_non_zero = 0;
3846         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3847         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3848         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3849             bias= 1<<(QMAT_SHIFT-1);
3850
3851         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3852             length     = s->intra_chroma_ac_vlc_length;
3853             last_length= s->intra_chroma_ac_vlc_last_length;
3854         } else {
3855             length     = s->intra_ac_vlc_length;
3856             last_length= s->intra_ac_vlc_last_length;
3857         }
3858     } else {
3859         start_i = 0;
3860         last_non_zero = -1;
3861         qmat = s->q_inter_matrix[qscale];
3862         matrix = s->inter_matrix;
3863         length     = s->inter_ac_vlc_length;
3864         last_length= s->inter_ac_vlc_last_length;
3865     }
3866     last_i= start_i;
3867
3868     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3869     threshold2= (threshold1<<1);
3870
3871     for(i=63; i>=start_i; i--) {
3872         const int j = scantable[i];
3873         int level = block[j] * qmat[j];
3874
3875         if(((unsigned)(level+threshold1))>threshold2){
3876             last_non_zero = i;
3877             break;
3878         }
3879     }
3880
3881     for(i=start_i; i<=last_non_zero; i++) {
3882         const int j = scantable[i];
3883         int level = block[j] * qmat[j];
3884
3885 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3886 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3887         if(((unsigned)(level+threshold1))>threshold2){
3888             if(level>0){
3889                 level= (bias + level)>>QMAT_SHIFT;
3890                 coeff[0][i]= level;
3891                 coeff[1][i]= level-1;
3892 //                coeff[2][k]= level-2;
3893             }else{
3894                 level= (bias - level)>>QMAT_SHIFT;
3895                 coeff[0][i]= -level;
3896                 coeff[1][i]= -level+1;
3897 //                coeff[2][k]= -level+2;
3898             }
3899             coeff_count[i]= FFMIN(level, 2);
3900             av_assert2(coeff_count[i]);
3901             max |=level;
3902         }else{
3903             coeff[0][i]= (level>>31)|1;
3904             coeff_count[i]= 1;
3905         }
3906     }
3907
3908     *overflow= s->max_qcoeff < max; //overflow might have happened
3909
3910     if(last_non_zero < start_i){
3911         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3912         return last_non_zero;
3913     }
3914
3915     score_tab[start_i]= 0;
3916     survivor[0]= start_i;
3917     survivor_count= 1;
3918
3919     for(i=start_i; i<=last_non_zero; i++){
3920         int level_index, j, zero_distortion;
3921         int dct_coeff= FFABS(block[ scantable[i] ]);
3922         int best_score=256*256*256*120;
3923
3924         if (s->fdsp.fdct == ff_fdct_ifast)
3925             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3926         zero_distortion= dct_coeff*dct_coeff;
3927
3928         for(level_index=0; level_index < coeff_count[i]; level_index++){
3929             int distortion;
3930             int level= coeff[level_index][i];
3931             const int alevel= FFABS(level);
3932             int unquant_coeff;
3933
3934             av_assert2(level);
3935
3936             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3937                 unquant_coeff= alevel*qmul + qadd;
3938             } else if(s->out_format == FMT_MJPEG) {
3939                 j = s->idsp.idct_permutation[scantable[i]];
3940                 unquant_coeff = alevel * matrix[j] * 8;
3941             }else{ //MPEG1
3942                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3943                 if(s->mb_intra){
3944                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3945                         unquant_coeff =   (unquant_coeff - 1) | 1;
3946                 }else{
3947                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3948                         unquant_coeff =   (unquant_coeff - 1) | 1;
3949                 }
3950                 unquant_coeff<<= 3;
3951             }
3952
3953             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3954             level+=64;
3955             if((level&(~127)) == 0){
3956                 for(j=survivor_count-1; j>=0; j--){
3957                     int run= i - survivor[j];
3958                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3959                     score += score_tab[i-run];
3960
3961                     if(score < best_score){
3962                         best_score= score;
3963                         run_tab[i+1]= run;
3964                         level_tab[i+1]= level-64;
3965                     }
3966                 }
3967
3968                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3969                     for(j=survivor_count-1; j>=0; j--){
3970                         int run= i - survivor[j];
3971                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3972                         score += score_tab[i-run];
3973                         if(score < last_score){
3974                             last_score= score;
3975                             last_run= run;
3976                             last_level= level-64;
3977                             last_i= i+1;
3978                         }
3979                     }
3980                 }
3981             }else{
3982                 distortion += esc_length*lambda;
3983                 for(j=survivor_count-1; j>=0; j--){
3984                     int run= i - survivor[j];
3985                     int score= distortion + score_tab[i-run];
3986
3987                     if(score < best_score){
3988                         best_score= score;
3989                         run_tab[i+1]= run;
3990                         level_tab[i+1]= level-64;
3991                     }
3992                 }
3993
3994                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3995                   for(j=survivor_count-1; j>=0; j--){
3996                         int run= i - survivor[j];
3997                         int score= distortion + score_tab[i-run];
3998                         if(score < last_score){
3999                             last_score= score;
4000                             last_run= run;
4001                             last_level= level-64;
4002                             last_i= i+1;
4003                         }
4004                     }
4005                 }
4006             }
4007         }
4008
4009         score_tab[i+1]= best_score;
4010
4011         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4012         if(last_non_zero <= 27){
4013             for(; survivor_count; survivor_count--){
4014                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4015                     break;
4016             }
4017         }else{
4018             for(; survivor_count; survivor_count--){
4019                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4020                     break;
4021             }
4022         }
4023
4024         survivor[ survivor_count++ ]= i+1;
4025     }
4026
4027     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4028         last_score= 256*256*256*120;
4029         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4030             int score= score_tab[i];
4031             if(i) score += lambda*2; //FIXME exacter?
4032
4033             if(score < last_score){
4034                 last_score= score;
4035                 last_i= i;
4036                 last_level= level_tab[i];
4037                 last_run= run_tab[i];
4038             }
4039         }
4040     }
4041
4042     s->coded_score[n] = last_score;
4043
4044     dc= FFABS(block[0]);
4045     last_non_zero= last_i - 1;
4046     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4047
4048     if(last_non_zero < start_i)
4049         return last_non_zero;
4050
4051     if(last_non_zero == 0 && start_i == 0){
4052         int best_level= 0;
4053         int best_score= dc * dc;
4054
4055         for(i=0; i<coeff_count[0]; i++){
4056             int level= coeff[i][0];
4057             int alevel= FFABS(level);
4058             int unquant_coeff, score, distortion;
4059
4060             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4061                     unquant_coeff= (alevel*qmul + qadd)>>3;
4062             }else{ //MPEG1
4063                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4064                     unquant_coeff =   (unquant_coeff - 1) | 1;
4065             }
4066             unquant_coeff = (unquant_coeff + 4) >> 3;
4067             unquant_coeff<<= 3 + 3;
4068
4069             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4070             level+=64;
4071             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4072             else                    score= distortion + esc_length*lambda;
4073
4074             if(score < best_score){
4075                 best_score= score;
4076                 best_level= level - 64;
4077             }
4078         }
4079         block[0]= best_level;
4080         s->coded_score[n] = best_score - dc*dc;
4081         if(best_level == 0) return -1;
4082         else                return last_non_zero;
4083     }
4084
4085     i= last_i;
4086     av_assert2(last_level);
4087
4088     block[ perm_scantable[last_non_zero] ]= last_level;
4089     i -= last_run + 1;
4090
4091     for(; i>start_i; i -= run_tab[i] + 1){
4092         block[ perm_scantable[i-1] ]= level_tab[i];
4093     }
4094
4095     return last_non_zero;
4096 }
4097
4098 //#define REFINE_STATS 1
4099 static int16_t basis[64][64];
4100
4101 static void build_basis(uint8_t *perm){
4102     int i, j, x, y;
4103     emms_c();
4104     for(i=0; i<8; i++){
4105         for(j=0; j<8; j++){
4106             for(y=0; y<8; y++){
4107                 for(x=0; x<8; x++){
4108                     double s= 0.25*(1<<BASIS_SHIFT);
4109                     int index= 8*i + j;
4110                     int perm_index= perm[index];
4111                     if(i==0) s*= sqrt(0.5);
4112                     if(j==0) s*= sqrt(0.5);
4113                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4114                 }
4115             }
4116         }
4117     }
4118 }
4119
4120 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4121                         int16_t *block, int16_t *weight, int16_t *orig,
4122                         int n, int qscale){
4123     int16_t rem[64];
4124     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4125     const uint8_t *scantable= s->intra_scantable.scantable;
4126     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4127 //    unsigned int threshold1, threshold2;
4128 //    int bias=0;
4129     int run_tab[65];
4130     int prev_run=0;
4131     int prev_level=0;
4132     int qmul, qadd, start_i, last_non_zero, i, dc;
4133     uint8_t * length;
4134     uint8_t * last_length;
4135     int lambda;
4136     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4137 #ifdef REFINE_STATS
4138 static int count=0;
4139 static int after_last=0;
4140 static int to_zero=0;
4141 static int from_zero=0;
4142 static int raise=0;
4143 static int lower=0;
4144 static int messed_sign=0;
4145 #endif
4146
4147     if(basis[0][0] == 0)
4148         build_basis(s->idsp.idct_permutation);
4149
4150     qmul= qscale*2;
4151     qadd= (qscale-1)|1;
4152     if (s->mb_intra) {
4153         if (!s->h263_aic) {
4154             if (n < 4)
4155                 q = s->y_dc_scale;
4156             else
4157                 q = s->c_dc_scale;
4158         } else{
4159             /* For AIC we skip quant/dequant of INTRADC */
4160             q = 1;
4161             qadd=0;
4162         }
4163         q <<= RECON_SHIFT-3;
4164         /* note: block[0] is assumed to be positive */
4165         dc= block[0]*q;
4166 //        block[0] = (block[0] + (q >> 1)) / q;
4167         start_i = 1;
4168 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4169 //            bias= 1<<(QMAT_SHIFT-1);
4170         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4171             length     = s->intra_chroma_ac_vlc_length;
4172             last_length= s->intra_chroma_ac_vlc_last_length;
4173         } else {
4174             length     = s->intra_ac_vlc_length;
4175             last_length= s->intra_ac_vlc_last_length;
4176         }
4177     } else {
4178         dc= 0;
4179         start_i = 0;
4180         length     = s->inter_ac_vlc_length;
4181         last_length= s->inter_ac_vlc_last_length;
4182     }
4183     last_non_zero = s->block_last_index[n];
4184
4185 #ifdef REFINE_STATS
4186 {START_TIMER
4187 #endif
4188     dc += (1<<(RECON_SHIFT-1));
4189     for(i=0; i<64; i++){
4190         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4191     }
4192 #ifdef REFINE_STATS
4193 STOP_TIMER("memset rem[]")}
4194 #endif
4195     sum=0;
4196     for(i=0; i<64; i++){
4197         int one= 36;
4198         int qns=4;
4199         int w;
4200
4201         w= FFABS(weight[i]) + qns*one;
4202         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4203
4204         weight[i] = w;
4205 //        w=weight[i] = (63*qns + (w/2)) / w;
4206
4207         av_assert2(w>0);
4208         av_assert2(w<(1<<6));
4209         sum += w*w;
4210     }
4211     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4212 #ifdef REFINE_STATS
4213 {START_TIMER
4214 #endif
4215     run=0;
4216     rle_index=0;
4217     for(i=start_i; i<=last_non_zero; i++){
4218         int j= perm_scantable[i];
4219         const int level= block[j];
4220         int coeff;
4221
4222         if(level){
4223             if(level<0) coeff= qmul*level - qadd;
4224             else        coeff= qmul*level + qadd;
4225             run_tab[rle_index++]=run;
4226             run=0;
4227
4228             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4229         }else{
4230             run++;
4231         }
4232     }
4233 #ifdef REFINE_STATS
4234 if(last_non_zero>0){
4235 STOP_TIMER("init rem[]")
4236 }
4237 }
4238
4239 {START_TIMER
4240 #endif
4241     for(;;){
4242         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4243         int best_coeff=0;
4244         int best_change=0;
4245         int run2, best_unquant_change=0, analyze_gradient;
4246 #ifdef REFINE_STATS
4247 {START_TIMER
4248 #endif
4249         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4250
4251         if(analyze_gradient){
4252 #ifdef REFINE_STATS
4253 {START_TIMER
4254 #endif
4255             for(i=0; i<64; i++){
4256                 int w= weight[i];
4257
4258                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4259             }
4260 #ifdef REFINE_STATS
4261 STOP_TIMER("rem*w*w")}
4262 {START_TIMER
4263 #endif
4264             s->fdsp.fdct(d1);
4265 #ifdef REFINE_STATS
4266 STOP_TIMER("dct")}
4267 #endif
4268         }
4269
4270         if(start_i){
4271             const int level= block[0];
4272             int change, old_coeff;
4273
4274             av_assert2(s->mb_intra);
4275
4276             old_coeff= q*level;
4277
4278             for(change=-1; change<=1; change+=2){
4279                 int new_level= level + change;
4280                 int score, new_coeff;
4281
4282                 new_coeff= q*new_level;
4283                 if(new_coeff >= 2048 || new_coeff < 0)
4284                     continue;
4285
4286                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4287                                                   new_coeff - old_coeff);
4288                 if(score<best_score){
4289                     best_score= score;
4290                     best_coeff= 0;
4291                     best_change= change;
4292                     best_unquant_change= new_coeff - old_coeff;
4293                 }
4294             }
4295         }
4296
4297         run=0;
4298         rle_index=0;
4299         run2= run_tab[rle_index++];
4300         prev_level=0;
4301         prev_run=0;
4302
4303         for(i=start_i; i<64; i++){
4304             int j= perm_scantable[i];
4305             const int level= block[j];
4306             int change, old_coeff;
4307
4308             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4309                 break;
4310
4311             if(level){
4312                 if(level<0) old_coeff= qmul*level - qadd;
4313                 else        old_coeff= qmul*level + qadd;
4314                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4315             }else{
4316                 old_coeff=0;
4317                 run2--;
4318                 av_assert2(run2>=0 || i >= last_non_zero );
4319             }
4320
4321             for(change=-1; change<=1; change+=2){
4322                 int new_level= level + change;
4323                 int score, new_coeff, unquant_change;
4324
4325                 score=0;
4326                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4327                    continue;
4328
4329                 if(new_level){
4330                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4331                     else            new_coeff= qmul*new_level + qadd;
4332                     if(new_coeff >= 2048 || new_coeff <= -2048)
4333                         continue;
4334                     //FIXME check for overflow
4335
4336                     if(level){
4337                         if(level < 63 && level > -63){
4338                             if(i < last_non_zero)
4339                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4340                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4341                             else
4342                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4343                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4344                         }
4345                     }else{
4346                         av_assert2(FFABS(new_level)==1);
4347
4348                         if(analyze_gradient){
4349                             int g= d1[ scantable[i] ];
4350                             if(g && (g^new_level) >= 0)
4351                                 continue;
4352                         }
4353
4354                         if(i < last_non_zero){
4355                             int next_i= i + run2 + 1;
4356                             int next_level= block[ perm_scantable[next_i] ] + 64;
4357
4358                             if(next_level&(~127))
4359                                 next_level= 0;
4360
4361                             if(next_i < last_non_zero)
4362                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4363                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4364                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4365                             else
4366                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4367                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4368                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4369                         }else{
4370                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4371                             if(prev_level){
4372                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4373                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4374                             }
4375                         }
4376                     }
4377                 }else{
4378                     new_coeff=0;
4379                     av_assert2(FFABS(level)==1);
4380
4381                     if(i < last_non_zero){
4382                         int next_i= i + run2 + 1;
4383                         int next_level= block[ perm_scantable[next_i] ] + 64;
4384
4385                         if(next_level&(~127))
4386                             next_level= 0;
4387
4388                         if(next_i < last_non_zero)
4389                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4390                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4391                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4392                         else
4393                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4394                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4395                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4396                     }else{
4397                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4398                         if(prev_level){
4399                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4400                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4401                         }
4402                     }
4403                 }
4404
4405                 score *= lambda;
4406
4407                 unquant_change= new_coeff - old_coeff;
4408                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4409
4410                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4411                                                    unquant_change);
4412                 if(score<best_score){
4413                     best_score= score;
4414                     best_coeff= i;
4415                     best_change= change;
4416                     best_unquant_change= unquant_change;
4417                 }
4418             }
4419             if(level){
4420                 prev_level= level + 64;
4421                 if(prev_level&(~127))
4422                     prev_level= 0;
4423                 prev_run= run;
4424                 run=0;
4425             }else{
4426                 run++;
4427             }
4428         }
4429 #ifdef REFINE_STATS
4430 STOP_TIMER("iterative step")}
4431 #endif
4432
4433         if(best_change){
4434             int j= perm_scantable[ best_coeff ];
4435
4436             block[j] += best_change;
4437
4438             if(best_coeff > last_non_zero){
4439                 last_non_zero= best_coeff;
4440                 av_assert2(block[j]);
4441 #ifdef REFINE_STATS
4442 after_last++;
4443 #endif
4444             }else{
4445 #ifdef REFINE_STATS
4446 if(block[j]){
4447     if(block[j] - best_change){
4448         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4449             raise++;
4450         }else{
4451             lower++;
4452         }
4453     }else{
4454         from_zero++;
4455     }
4456 }else{
4457     to_zero++;
4458 }
4459 #endif
4460                 for(; last_non_zero>=start_i; last_non_zero--){
4461                     if(block[perm_scantable[last_non_zero]])
4462                         break;
4463                 }
4464             }
4465 #ifdef REFINE_STATS
4466 count++;
4467 if(256*256*256*64 % count == 0){
4468     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4469 }
4470 #endif
4471             run=0;
4472             rle_index=0;
4473             for(i=start_i; i<=last_non_zero; i++){
4474                 int j= perm_scantable[i];
4475                 const int level= block[j];
4476
4477                  if(level){
4478                      run_tab[rle_index++]=run;
4479                      run=0;
4480                  }else{
4481                      run++;
4482                  }
4483             }
4484
4485             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4486         }else{
4487             break;
4488         }
4489     }
4490 #ifdef REFINE_STATS
4491 if(last_non_zero>0){
4492 STOP_TIMER("iterative search")
4493 }
4494 }
4495 #endif
4496
4497     return last_non_zero;
4498 }
4499
4500 /**
4501  * Permute an 8x8 block according to permuatation.
4502  * @param block the block which will be permuted according to
4503  *              the given permutation vector
4504  * @param permutation the permutation vector
4505  * @param last the last non zero coefficient in scantable order, used to
4506  *             speed the permutation up
4507  * @param scantable the used scantable, this is only used to speed the
4508  *                  permutation up, the block is not (inverse) permutated
4509  *                  to scantable order!
4510  */
4511 static void block_permute(int16_t *block, uint8_t *permutation,
4512                           const uint8_t *scantable, int last)
4513 {
4514     int i;
4515     int16_t temp[64];
4516
4517     if (last <= 0)
4518         return;
4519     //FIXME it is ok but not clean and might fail for some permutations
4520     // if (permutation[1] == 1)
4521     // return;
4522
4523     for (i = 0; i <= last; i++) {
4524         const int j = scantable[i];
4525         temp[j] = block[j];
4526         block[j] = 0;
4527     }
4528
4529     for (i = 0; i <= last; i++) {
4530         const int j = scantable[i];
4531         const int perm_j = permutation[j];
4532         block[perm_j] = temp[j];
4533     }
4534 }
4535
4536 int ff_dct_quantize_c(MpegEncContext *s,
4537                         int16_t *block, int n,
4538                         int qscale, int *overflow)
4539 {
4540     int i, j, level, last_non_zero, q, start_i;
4541     const int *qmat;
4542     const uint8_t *scantable= s->intra_scantable.scantable;
4543     int bias;
4544     int max=0;
4545     unsigned int threshold1, threshold2;
4546
4547     s->fdsp.fdct(block);
4548
4549     if(s->dct_error_sum)
4550         s->denoise_dct(s, block);
4551
4552     if (s->mb_intra) {
4553         if (!s->h263_aic) {
4554             if (n < 4)
4555                 q = s->y_dc_scale;
4556             else
4557                 q = s->c_dc_scale;
4558             q = q << 3;
4559         } else
4560             /* For AIC we skip quant/dequant of INTRADC */
4561             q = 1 << 3;
4562
4563         /* note: block[0] is assumed to be positive */
4564         block[0] = (block[0] + (q >> 1)) / q;
4565         start_i = 1;
4566         last_non_zero = 0;
4567         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4568         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4569     } else {
4570         start_i = 0;
4571         last_non_zero = -1;
4572         qmat = s->q_inter_matrix[qscale];
4573         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4574     }
4575     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4576     threshold2= (threshold1<<1);
4577     for(i=63;i>=start_i;i--) {
4578         j = scantable[i];
4579         level = block[j] * qmat[j];
4580
4581         if(((unsigned)(level+threshold1))>threshold2){
4582             last_non_zero = i;
4583             break;
4584         }else{
4585             block[j]=0;
4586         }
4587     }
4588     for(i=start_i; i<=last_non_zero; i++) {
4589         j = scantable[i];
4590         level = block[j] * qmat[j];
4591
4592 //        if(   bias+level >= (1<<QMAT_SHIFT)
4593 //           || bias-level >= (1<<QMAT_SHIFT)){
4594         if(((unsigned)(level+threshold1))>threshold2){
4595             if(level>0){
4596                 level= (bias + level)>>QMAT_SHIFT;
4597                 block[j]= level;
4598             }else{
4599                 level= (bias - level)>>QMAT_SHIFT;
4600                 block[j]= -level;
4601             }
4602             max |=level;
4603         }else{
4604             block[j]=0;
4605         }
4606     }
4607     *overflow= s->max_qcoeff < max; //overflow might have happened
4608
4609     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4610     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4611         block_permute(block, s->idsp.idct_permutation,
4612                       scantable, last_non_zero);
4613
4614     return last_non_zero;
4615 }
4616
4617 #define OFFSET(x) offsetof(MpegEncContext, x)
4618 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4619 static const AVOption h263_options[] = {
4620     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4621     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4622     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4623     FF_MPV_COMMON_OPTS
4624     { NULL },
4625 };
4626
4627 static const AVClass h263_class = {
4628     .class_name = "H.263 encoder",
4629     .item_name  = av_default_item_name,
4630     .option     = h263_options,
4631     .version    = LIBAVUTIL_VERSION_INT,
4632 };
4633
4634 AVCodec ff_h263_encoder = {
4635     .name           = "h263",
4636     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4637     .type           = AVMEDIA_TYPE_VIDEO,
4638     .id             = AV_CODEC_ID_H263,
4639     .priv_data_size = sizeof(MpegEncContext),
4640     .init           = ff_mpv_encode_init,
4641     .encode2        = ff_mpv_encode_picture,
4642     .close          = ff_mpv_encode_end,
4643     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4644     .priv_class     = &h263_class,
4645 };
4646
4647 static const AVOption h263p_options[] = {
4648     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4649     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4650     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4651     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4652     FF_MPV_COMMON_OPTS
4653     { NULL },
4654 };
4655 static const AVClass h263p_class = {
4656     .class_name = "H.263p encoder",
4657     .item_name  = av_default_item_name,
4658     .option     = h263p_options,
4659     .version    = LIBAVUTIL_VERSION_INT,
4660 };
4661
4662 AVCodec ff_h263p_encoder = {
4663     .name           = "h263p",
4664     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4665     .type           = AVMEDIA_TYPE_VIDEO,
4666     .id             = AV_CODEC_ID_H263P,
4667     .priv_data_size = sizeof(MpegEncContext),
4668     .init           = ff_mpv_encode_init,
4669     .encode2        = ff_mpv_encode_picture,
4670     .close          = ff_mpv_encode_end,
4671     .capabilities   = CODEC_CAP_SLICE_THREADS,
4672     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4673     .priv_class     = &h263p_class,
4674 };
4675
4676 static const AVClass msmpeg4v2_class = {
4677     .class_name = "msmpeg4v2 encoder",
4678     .item_name  = av_default_item_name,
4679     .option     = ff_mpv_generic_options,
4680     .version    = LIBAVUTIL_VERSION_INT,
4681 };
4682
4683 AVCodec ff_msmpeg4v2_encoder = {
4684     .name           = "msmpeg4v2",
4685     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4686     .type           = AVMEDIA_TYPE_VIDEO,
4687     .id             = AV_CODEC_ID_MSMPEG4V2,
4688     .priv_data_size = sizeof(MpegEncContext),
4689     .init           = ff_mpv_encode_init,
4690     .encode2        = ff_mpv_encode_picture,
4691     .close          = ff_mpv_encode_end,
4692     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4693     .priv_class     = &msmpeg4v2_class,
4694 };
4695
4696 static const AVClass msmpeg4v3_class = {
4697     .class_name = "msmpeg4v3 encoder",
4698     .item_name  = av_default_item_name,
4699     .option     = ff_mpv_generic_options,
4700     .version    = LIBAVUTIL_VERSION_INT,
4701 };
4702
4703 AVCodec ff_msmpeg4v3_encoder = {
4704     .name           = "msmpeg4",
4705     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4706     .type           = AVMEDIA_TYPE_VIDEO,
4707     .id             = AV_CODEC_ID_MSMPEG4V3,
4708     .priv_data_size = sizeof(MpegEncContext),
4709     .init           = ff_mpv_encode_init,
4710     .encode2        = ff_mpv_encode_picture,
4711     .close          = ff_mpv_encode_end,
4712     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4713     .priv_class     = &msmpeg4v3_class,
4714 };
4715
4716 static const AVClass wmv1_class = {
4717     .class_name = "wmv1 encoder",
4718     .item_name  = av_default_item_name,
4719     .option     = ff_mpv_generic_options,
4720     .version    = LIBAVUTIL_VERSION_INT,
4721 };
4722
4723 AVCodec ff_wmv1_encoder = {
4724     .name           = "wmv1",
4725     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4726     .type           = AVMEDIA_TYPE_VIDEO,
4727     .id             = AV_CODEC_ID_WMV1,
4728     .priv_data_size = sizeof(MpegEncContext),
4729     .init           = ff_mpv_encode_init,
4730     .encode2        = ff_mpv_encode_picture,
4731     .close          = ff_mpv_encode_end,
4732     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4733     .priv_class     = &wmv1_class,
4734 };