git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "mpegvideodata.h"
  44 #include "h261.h"
  45 #include "h263.h"
  46 #include "h263data.h"
  47 #include "mjpegenc_common.h"
  48 #include "mathops.h"
  49 #include "mpegutils.h"
  50 #include "mjpegenc.h"
  51 #include "msmpeg4.h"
  52 #include "pixblockdsp.h"
  53 #include "qpeldsp.h"
  54 #include "faandct.h"
  55 #include "thread.h"
  56 #include "aandcttab.h"
  57 #include "flv.h"
  58 #include "mpeg4video.h"
  59 #include "internal.h"
  60 #include "bytestream.h"
  61 #include "wmv2.h"
  62 #include "rv10.h"
  63 #include <limits.h>
  64 #include "sp5x.h"
  65
  66 #define QUANT_BIAS_SHIFT 8
  67
  68 #define QMAT_SHIFT_MMX 16
  69 #define QMAT_SHIFT 21
  70
  71 static int encode_picture(MpegEncContext *s, int picture_number);
  72 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  73 static int sse_mb(MpegEncContext *s);
  74 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  75 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  76
  77 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  78 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  79
  80 const AVOption ff_mpv_generic_options[] = {
  81     FF_MPV_COMMON_OPTS
  82     { NULL },
  83 };
  84
  85 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  86                        uint16_t (*qmat16)[2][64],
  87                        const uint16_t *quant_matrix,
  88                        int bias, int qmin, int qmax, int intra)
  89 {
  90     FDCTDSPContext *fdsp = &s->fdsp;
  91     int qscale;
  92     int shift = 0;
  93
  94     for (qscale = qmin; qscale <= qmax; qscale++) {
  95         int i;
  96         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  97 #if CONFIG_FAANDCT
  98             fdsp->fdct == ff_faandct            ||
  99 #endif /* CONFIG_FAANDCT */
 100             fdsp->fdct == ff_jpeg_fdct_islow_10) {
 101             for (i = 0; i < 64; i++) {
 102                 const int j = s->idsp.idct_permutation[i];
 103                 int64_t den = (int64_t) qscale * quant_matrix[j];
 104                 /* 16 <= qscale * quant_matrix[i] <= 7905
 105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 106                  *             19952 <=              x  <= 249205026
 107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 108                  *           3444240 >= (1 << 36) / (x) >= 275 */
 109
 110                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 111             }
 112         } else if (fdsp->fdct == ff_fdct_ifast) {
 113             for (i = 0; i < 64; i++) {
 114                 const int j = s->idsp.idct_permutation[i];
 115                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 116                 /* 16 <= qscale * quant_matrix[i] <= 7905
 117                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 118                  *             19952 <=              x  <= 249205026
 119                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 120                  *           3444240 >= (1 << 36) / (x) >= 275 */
 121
 122                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 123             }
 124         } else {
 125             for (i = 0; i < 64; i++) {
 126                 const int j = s->idsp.idct_permutation[i];
 127                 int64_t den = (int64_t) qscale * quant_matrix[j];
 128                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 129                  * Assume x = qscale * quant_matrix[i]
 130                  * So             16 <=              x  <= 7905
 131                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 132                  * so          32768 >= (1 << 19) / (x) >= 67 */
 133                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 134                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 135                 //                    (qscale * quant_matrix[i]);
 136                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 137
 138                 if (qmat16[qscale][0][i] == 0 ||
 139                     qmat16[qscale][0][i] == 128 * 256)
 140                     qmat16[qscale][0][i] = 128 * 256 - 1;
 141                 qmat16[qscale][1][i] =
 142                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 143                                 qmat16[qscale][0][i]);
 144             }
 145         }
 146
 147         for (i = intra; i < 64; i++) {
 148             int64_t max = 8191;
 149             if (fdsp->fdct == ff_fdct_ifast) {
 150                 max = (8191LL * ff_aanscales[i]) >> 14;
 151             }
 152             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 153                 shift++;
 154             }
 155         }
 156     }
 157     if (shift) {
 158         av_log(NULL, AV_LOG_INFO,
 159                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 160                QMAT_SHIFT - shift);
 161     }
 162 }
 163
 164 static inline void update_qscale(MpegEncContext *s)
 165 {
 166     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 167                 (FF_LAMBDA_SHIFT + 7);
 168     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 169
 170     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 171                  FF_LAMBDA_SHIFT;
 172 }
 173
 174 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 175 {
 176     int i;
 177
 178     if (matrix) {
 179         put_bits(pb, 1, 1);
 180         for (i = 0; i < 64; i++) {
 181             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 182         }
 183     } else
 184         put_bits(pb, 1, 0);
 185 }
 186
 187 /**
 188  * init s->current_picture.qscale_table from s->lambda_table
 189  */
 190 void ff_init_qscale_tab(MpegEncContext *s)
 191 {
 192     int8_t * const qscale_table = s->current_picture.qscale_table;
 193     int i;
 194
 195     for (i = 0; i < s->mb_num; i++) {
 196         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 197         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 198         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 199                                                   s->avctx->qmax);
 200     }
 201 }
 202
 203 static void update_duplicate_context_after_me(MpegEncContext *dst,
 204                                               MpegEncContext *src)
 205 {
 206 #define COPY(a) dst->a= src->a
 207     COPY(pict_type);
 208     COPY(current_picture);
 209     COPY(f_code);
 210     COPY(b_code);
 211     COPY(qscale);
 212     COPY(lambda);
 213     COPY(lambda2);
 214     COPY(picture_in_gop_number);
 215     COPY(gop_picture_number);
 216     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 217     COPY(progressive_frame);    // FIXME don't set in encode_header
 218     COPY(partitioned_frame);    // FIXME don't set in encode_header
 219 #undef COPY
 220 }
 221
 222 /**
 223  * Set the given MpegEncContext to defaults for encoding.
 224  * the changed fields will not depend upon the prior state of the MpegEncContext.
 225  */
 226 static void mpv_encode_defaults(MpegEncContext *s)
 227 {
 228     int i;
 229     ff_mpv_common_defaults(s);
 230
 231     for (i = -16; i < 16; i++) {
 232         default_fcode_tab[i + MAX_MV] = 1;
 233     }
 234     s->me.mv_penalty = default_mv_penalty;
 235     s->fcode_tab     = default_fcode_tab;
 236
 237     s->input_picture_number  = 0;
 238     s->picture_in_gop_number = 0;
 239 }
 240
 241 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 242     if (ARCH_X86)
 243         ff_dct_encode_init_x86(s);
 244
 245     if (CONFIG_H263_ENCODER)
 246         ff_h263dsp_init(&s->h263dsp);
 247     if (!s->dct_quantize)
 248         s->dct_quantize = ff_dct_quantize_c;
 249     if (!s->denoise_dct)
 250         s->denoise_dct  = denoise_dct_c;
 251     s->fast_dct_quantize = s->dct_quantize;
 252     if (s->avctx->trellis)
 253         s->dct_quantize  = dct_quantize_trellis_c;
 254
 255     return 0;
 256 }
 257
 258 /* init video encoder */
 259 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 260 {
 261     MpegEncContext *s = avctx->priv_data;
 262     int i, ret, format_supported;
 263
 264     mpv_encode_defaults(s);
 265
 266     switch (avctx->codec_id) {
 267     case AV_CODEC_ID_MPEG2VIDEO:
 268         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 269             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 270             av_log(avctx, AV_LOG_ERROR,
 271                    "only YUV420 and YUV422 are supported\n");
 272             return -1;
 273         }
 274         break;
 275     case AV_CODEC_ID_MJPEG:
 276     case AV_CODEC_ID_AMV:
 277         format_supported = 0;
 278         /* JPEG color space */
 279         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 280             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 281             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 282             (avctx->color_range == AVCOL_RANGE_JPEG &&
 283              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 284               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 285               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 286             format_supported = 1;
 287         /* MPEG color space */
 288         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 289                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 290                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 291                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 292             format_supported = 1;
 293
 294         if (!format_supported) {
 295             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 296             return -1;
 297         }
 298         break;
 299     default:
 300         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 301             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 302             return -1;
 303         }
 304     }
 305
 306     switch (avctx->pix_fmt) {
 307     case AV_PIX_FMT_YUVJ444P:
 308     case AV_PIX_FMT_YUV444P:
 309         s->chroma_format = CHROMA_444;
 310         break;
 311     case AV_PIX_FMT_YUVJ422P:
 312     case AV_PIX_FMT_YUV422P:
 313         s->chroma_format = CHROMA_422;
 314         break;
 315     case AV_PIX_FMT_YUVJ420P:
 316     case AV_PIX_FMT_YUV420P:
 317     default:
 318         s->chroma_format = CHROMA_420;
 319         break;
 320     }
 321
 322     s->bit_rate = avctx->bit_rate;
 323     s->width    = avctx->width;
 324     s->height   = avctx->height;
 325     if (avctx->gop_size > 600 &&
 326         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 327         av_log(avctx, AV_LOG_WARNING,
 328                "keyframe interval too large!, reducing it from %d to %d\n",
 329                avctx->gop_size, 600);
 330         avctx->gop_size = 600;
 331     }
 332     s->gop_size     = avctx->gop_size;
 333     s->avctx        = avctx;
 334     if (avctx->max_b_frames > MAX_B_FRAMES) {
 335         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 336                "is %d.\n", MAX_B_FRAMES);
 337         avctx->max_b_frames = MAX_B_FRAMES;
 338     }
 339     s->max_b_frames = avctx->max_b_frames;
 340     s->codec_id     = avctx->codec->id;
 341     s->strict_std_compliance = avctx->strict_std_compliance;
 342     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 343     s->mpeg_quant         = avctx->mpeg_quant;
 344     s->rtp_mode           = !!avctx->rtp_payload_size;
 345     s->intra_dc_precision = avctx->intra_dc_precision;
 346
 347     // workaround some differences between how applications specify dc precision
 348     if (s->intra_dc_precision < 0) {
 349         s->intra_dc_precision += 8;
 350     } else if (s->intra_dc_precision >= 8)
 351         s->intra_dc_precision -= 8;
 352
 353     if (s->intra_dc_precision < 0) {
 354         av_log(avctx, AV_LOG_ERROR,
 355                 "intra dc precision must be positive, note some applications use"
 356                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 357         return AVERROR(EINVAL);
 358     }
 359
 360     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 361         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 362         return AVERROR(EINVAL);
 363     }
 364     s->user_specified_pts = AV_NOPTS_VALUE;
 365
 366     if (s->gop_size <= 1) {
 367         s->intra_only = 1;
 368         s->gop_size   = 12;
 369     } else {
 370         s->intra_only = 0;
 371     }
 372
 373     s->me_method = avctx->me_method;
 374
 375     /* Fixed QSCALE */
 376     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 377
 378 #if FF_API_MPV_OPT
 379     FF_DISABLE_DEPRECATION_WARNINGS
 380     if (avctx->border_masking != 0.0)
 381         s->border_masking = avctx->border_masking;
 382     FF_ENABLE_DEPRECATION_WARNINGS
 383 #endif
 384
 385     s->adaptive_quant = (s->avctx->lumi_masking ||
 386                          s->avctx->dark_masking ||
 387                          s->avctx->temporal_cplx_masking ||
 388                          s->avctx->spatial_cplx_masking  ||
 389                          s->avctx->p_masking      ||
 390                          s->border_masking ||
 391                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 392                         !s->fixed_qscale;
 393
 394     s->loop_filter = !!(s->avctx->flags & CODEC_FLAG_LOOP_FILTER);
 395
 396     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 397         switch(avctx->codec_id) {
 398         case AV_CODEC_ID_MPEG1VIDEO:
 399         case AV_CODEC_ID_MPEG2VIDEO:
 400             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
 401             break;
 402         case AV_CODEC_ID_MPEG4:
 403         case AV_CODEC_ID_MSMPEG4V1:
 404         case AV_CODEC_ID_MSMPEG4V2:
 405         case AV_CODEC_ID_MSMPEG4V3:
 406             if       (avctx->rc_max_rate >= 15000000) {
 407                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
 408             } else if(avctx->rc_max_rate >=  2000000) {
 409                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
 410             } else if(avctx->rc_max_rate >=   384000) {
 411                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
 412             } else
 413                 avctx->rc_buffer_size = 40;
 414             avctx->rc_buffer_size *= 16384;
 415             break;
 416         }
 417         if (avctx->rc_buffer_size) {
 418             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 419         }
 420     }
 421
 422     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 423         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 424         return -1;
 425     }
 426
 427     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 428         av_log(avctx, AV_LOG_INFO,
 429                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 430     }
 431
 432     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 433         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 434         return -1;
 435     }
 436
 437     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 438         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 439         return -1;
 440     }
 441
 442     if (avctx->rc_max_rate &&
 443         avctx->rc_max_rate == avctx->bit_rate &&
 444         avctx->rc_max_rate != avctx->rc_min_rate) {
 445         av_log(avctx, AV_LOG_INFO,
 446                "impossible bitrate constraints, this will fail\n");
 447     }
 448
 449     if (avctx->rc_buffer_size &&
 450         avctx->bit_rate * (int64_t)avctx->time_base.num >
 451             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 452         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 453         return -1;
 454     }
 455
 456     if (!s->fixed_qscale &&
 457         avctx->bit_rate * av_q2d(avctx->time_base) >
 458             avctx->bit_rate_tolerance) {
 459         av_log(avctx, AV_LOG_WARNING,
 460                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 461         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 462     }
 463
 464     if (s->avctx->rc_max_rate &&
 465         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 466         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 467          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 468         90000LL * (avctx->rc_buffer_size - 1) >
 469             s->avctx->rc_max_rate * 0xFFFFLL) {
 470         av_log(avctx, AV_LOG_INFO,
 471                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 472                "specified vbv buffer is too large for the given bitrate!\n");
 473     }
 474
 475     if ((s->avctx->flags & CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 476         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 477         s->codec_id != AV_CODEC_ID_FLV1) {
 478         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 479         return -1;
 480     }
 481
 482     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 483         av_log(avctx, AV_LOG_ERROR,
 484                "OBMC is only supported with simple mb decision\n");
 485         return -1;
 486     }
 487
 488     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 489         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 490         return -1;
 491     }
 492
 493     if (s->max_b_frames                    &&
 494         s->codec_id != AV_CODEC_ID_MPEG4      &&
 495         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 496         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 497         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 498         return -1;
 499     }
 500     if (s->max_b_frames < 0) {
 501         av_log(avctx, AV_LOG_ERROR,
 502                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 503         return -1;
 504     }
 505
 506     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 507          s->codec_id == AV_CODEC_ID_H263  ||
 508          s->codec_id == AV_CODEC_ID_H263P) &&
 509         (avctx->sample_aspect_ratio.num > 255 ||
 510          avctx->sample_aspect_ratio.den > 255)) {
 511         av_log(avctx, AV_LOG_WARNING,
 512                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 513                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 514         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 515                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 516     }
 517
 518     if ((s->codec_id == AV_CODEC_ID_H263  ||
 519          s->codec_id == AV_CODEC_ID_H263P) &&
 520         (avctx->width  > 2048 ||
 521          avctx->height > 1152 )) {
 522         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 523         return -1;
 524     }
 525     if ((s->codec_id == AV_CODEC_ID_H263  ||
 526          s->codec_id == AV_CODEC_ID_H263P) &&
 527         ((avctx->width &3) ||
 528          (avctx->height&3) )) {
 529         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 530         return -1;
 531     }
 532
 533     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 534         (avctx->width  > 4095 ||
 535          avctx->height > 4095 )) {
 536         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 537         return -1;
 538     }
 539
 540     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 541         (avctx->width  > 16383 ||
 542          avctx->height > 16383 )) {
 543         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 544         return -1;
 545     }
 546
 547     if (s->codec_id == AV_CODEC_ID_RV10 &&
 548         (avctx->width &15 ||
 549          avctx->height&15 )) {
 550         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 551         return AVERROR(EINVAL);
 552     }
 553
 554     if (s->codec_id == AV_CODEC_ID_RV20 &&
 555         (avctx->width &3 ||
 556          avctx->height&3 )) {
 557         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 558         return AVERROR(EINVAL);
 559     }
 560
 561     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 562          s->codec_id == AV_CODEC_ID_WMV2) &&
 563          avctx->width & 1) {
 564          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 565          return -1;
 566     }
 567
 568     if ((s->avctx->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 569         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 570         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 571         return -1;
 572     }
 573
 574     // FIXME mpeg2 uses that too
 575     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 576                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 577         av_log(avctx, AV_LOG_ERROR,
 578                "mpeg2 style quantization not supported by codec\n");
 579         return -1;
 580     }
 581
 582     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 583         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 584         return -1;
 585     }
 586
 587     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 588         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 589         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 590         return -1;
 591     }
 592
 593     if (s->avctx->scenechange_threshold < 1000000000 &&
 594         (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)) {
 595         av_log(avctx, AV_LOG_ERROR,
 596                "closed gop with scene change detection are not supported yet, "
 597                "set threshold to 1000000000\n");
 598         return -1;
 599     }
 600
 601     if (s->avctx->flags & CODEC_FLAG_LOW_DELAY) {
 602         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 603             av_log(avctx, AV_LOG_ERROR,
 604                   "low delay forcing is only available for mpeg2\n");
 605             return -1;
 606         }
 607         if (s->max_b_frames != 0) {
 608             av_log(avctx, AV_LOG_ERROR,
 609                    "b frames cannot be used with low delay\n");
 610             return -1;
 611         }
 612     }
 613
 614     if (s->q_scale_type == 1) {
 615         if (avctx->qmax > 12) {
 616             av_log(avctx, AV_LOG_ERROR,
 617                    "non linear quant only supports qmax <= 12 currently\n");
 618             return -1;
 619         }
 620     }
 621
 622     if (s->avctx->thread_count > 1         &&
 623         s->codec_id != AV_CODEC_ID_MPEG4      &&
 624         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 625         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 626         s->codec_id != AV_CODEC_ID_MJPEG      &&
 627         (s->codec_id != AV_CODEC_ID_H263P)) {
 628         av_log(avctx, AV_LOG_ERROR,
 629                "multi threaded encoding not supported by codec\n");
 630         return -1;
 631     }
 632
 633     if (s->avctx->thread_count < 1) {
 634         av_log(avctx, AV_LOG_ERROR,
 635                "automatic thread number detection not supported by codec, "
 636                "patch welcome\n");
 637         return -1;
 638     }
 639
 640     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 641         s->rtp_mode = 1;
 642
 643     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 644         s->h263_slice_structured = 1;
 645
 646     if (!avctx->time_base.den || !avctx->time_base.num) {
 647         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 648         return -1;
 649     }
 650
 651     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 652         av_log(avctx, AV_LOG_INFO,
 653                "notice: b_frame_strategy only affects the first pass\n");
 654         avctx->b_frame_strategy = 0;
 655     }
 656
 657     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 658     if (i > 1) {
 659         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 660         avctx->time_base.den /= i;
 661         avctx->time_base.num /= i;
 662         //return -1;
 663     }
 664
 665     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 666         // (a + x * 3 / 8) / x
 667         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 668         s->inter_quant_bias = 0;
 669     } else {
 670         s->intra_quant_bias = 0;
 671         // (a - x / 4) / x
 672         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 673     }
 674
 675     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 676         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 677         return AVERROR(EINVAL);
 678     }
 679
 680 #if FF_API_QUANT_BIAS
 681 FF_DISABLE_DEPRECATION_WARNINGS
 682     if (s->intra_quant_bias == FF_DEFAULT_QUANT_BIAS &&
 683         avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 684         s->intra_quant_bias = avctx->intra_quant_bias;
 685     if (s->inter_quant_bias == FF_DEFAULT_QUANT_BIAS &&
 686         avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 687         s->inter_quant_bias = avctx->inter_quant_bias;
 688 FF_ENABLE_DEPRECATION_WARNINGS
 689 #endif
 690
 691     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 692
 693     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 694         s->avctx->time_base.den > (1 << 16) - 1) {
 695         av_log(avctx, AV_LOG_ERROR,
 696                "timebase %d/%d not supported by MPEG 4 standard, "
 697                "the maximum admitted value for the timebase denominator "
 698                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 699                (1 << 16) - 1);
 700         return -1;
 701     }
 702     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 703
 704     switch (avctx->codec->id) {
 705     case AV_CODEC_ID_MPEG1VIDEO:
 706         s->out_format = FMT_MPEG1;
 707         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
 708         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 709         break;
 710     case AV_CODEC_ID_MPEG2VIDEO:
 711         s->out_format = FMT_MPEG1;
 712         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
 713         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 714         s->rtp_mode   = 1;
 715         break;
 716     case AV_CODEC_ID_MJPEG:
 717     case AV_CODEC_ID_AMV:
 718         s->out_format = FMT_MJPEG;
 719         s->intra_only = 1; /* force intra only for jpeg */
 720         if (!CONFIG_MJPEG_ENCODER ||
 721             ff_mjpeg_encode_init(s) < 0)
 722             return -1;
 723         avctx->delay = 0;
 724         s->low_delay = 1;
 725         break;
 726     case AV_CODEC_ID_H261:
 727         if (!CONFIG_H261_ENCODER)
 728             return -1;
 729         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 730             av_log(avctx, AV_LOG_ERROR,
 731                    "The specified picture size of %dx%d is not valid for the "
 732                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 733                     s->width, s->height);
 734             return -1;
 735         }
 736         s->out_format = FMT_H261;
 737         avctx->delay  = 0;
 738         s->low_delay  = 1;
 739         s->rtp_mode   = 0; /* Sliced encoding not supported */
 740         break;
 741     case AV_CODEC_ID_H263:
 742         if (!CONFIG_H263_ENCODER)
 743             return -1;
 744         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 745                              s->width, s->height) == 8) {
 746             av_log(avctx, AV_LOG_ERROR,
 747                    "The specified picture size of %dx%d is not valid for "
 748                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 749                    "352x288, 704x576, and 1408x1152. "
 750                    "Try H.263+.\n", s->width, s->height);
 751             return -1;
 752         }
 753         s->out_format = FMT_H263;
 754         avctx->delay  = 0;
 755         s->low_delay  = 1;
 756         break;
 757     case AV_CODEC_ID_H263P:
 758         s->out_format = FMT_H263;
 759         s->h263_plus  = 1;
 760         /* Fx */
 761         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 762         s->modified_quant  = s->h263_aic;
 763         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 764         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 765
 766         /* /Fx */
 767         /* These are just to be sure */
 768         avctx->delay = 0;
 769         s->low_delay = 1;
 770         break;
 771     case AV_CODEC_ID_FLV1:
 772         s->out_format      = FMT_H263;
 773         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 774         s->unrestricted_mv = 1;
 775         s->rtp_mode  = 0; /* don't allow GOB */
 776         avctx->delay = 0;
 777         s->low_delay = 1;
 778         break;
 779     case AV_CODEC_ID_RV10:
 780         s->out_format = FMT_H263;
 781         avctx->delay  = 0;
 782         s->low_delay  = 1;
 783         break;
 784     case AV_CODEC_ID_RV20:
 785         s->out_format      = FMT_H263;
 786         avctx->delay       = 0;
 787         s->low_delay       = 1;
 788         s->modified_quant  = 1;
 789         s->h263_aic        = 1;
 790         s->h263_plus       = 1;
 791         s->loop_filter     = 1;
 792         s->unrestricted_mv = 0;
 793         break;
 794     case AV_CODEC_ID_MPEG4:
 795         s->out_format      = FMT_H263;
 796         s->h263_pred       = 1;
 797         s->unrestricted_mv = 1;
 798         s->low_delay       = s->max_b_frames ? 0 : 1;
 799         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 800         break;
 801     case AV_CODEC_ID_MSMPEG4V2:
 802         s->out_format      = FMT_H263;
 803         s->h263_pred       = 1;
 804         s->unrestricted_mv = 1;
 805         s->msmpeg4_version = 2;
 806         avctx->delay       = 0;
 807         s->low_delay       = 1;
 808         break;
 809     case AV_CODEC_ID_MSMPEG4V3:
 810         s->out_format        = FMT_H263;
 811         s->h263_pred         = 1;
 812         s->unrestricted_mv   = 1;
 813         s->msmpeg4_version   = 3;
 814         s->flipflop_rounding = 1;
 815         avctx->delay         = 0;
 816         s->low_delay         = 1;
 817         break;
 818     case AV_CODEC_ID_WMV1:
 819         s->out_format        = FMT_H263;
 820         s->h263_pred         = 1;
 821         s->unrestricted_mv   = 1;
 822         s->msmpeg4_version   = 4;
 823         s->flipflop_rounding = 1;
 824         avctx->delay         = 0;
 825         s->low_delay         = 1;
 826         break;
 827     case AV_CODEC_ID_WMV2:
 828         s->out_format        = FMT_H263;
 829         s->h263_pred         = 1;
 830         s->unrestricted_mv   = 1;
 831         s->msmpeg4_version   = 5;
 832         s->flipflop_rounding = 1;
 833         avctx->delay         = 0;
 834         s->low_delay         = 1;
 835         break;
 836     default:
 837         return -1;
 838     }
 839
 840     avctx->has_b_frames = !s->low_delay;
 841
 842     s->encoding = 1;
 843
 844     s->progressive_frame    =
 845     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 846                                                 CODEC_FLAG_INTERLACED_ME) ||
 847                                 s->alternate_scan);
 848
 849     /* init */
 850     ff_mpv_idct_init(s);
 851     if (ff_mpv_common_init(s) < 0)
 852         return -1;
 853
 854     ff_fdctdsp_init(&s->fdsp, avctx);
 855     ff_me_cmp_init(&s->mecc, avctx);
 856     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 857     ff_pixblockdsp_init(&s->pdsp, avctx);
 858     ff_qpeldsp_init(&s->qdsp);
 859
 860     if (s->msmpeg4_version) {
 861         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 862                           2 * 2 * (MAX_LEVEL + 1) *
 863                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 864     }
 865     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 866
 867     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 868     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 869     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 870     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 871     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 872     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 873     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 874                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 875     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 876                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 877
 878     if (s->avctx->noise_reduction) {
 879         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 880                           2 * 64 * sizeof(uint16_t), fail);
 881     }
 882
 883     ff_dct_encode_init(s);
 884
 885     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 886         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 887
 888     s->quant_precision = 5;
 889
 890     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 891     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 892
 893     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 894         ff_h261_encode_init(s);
 895     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 896         ff_h263_encode_init(s);
 897     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 898         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 899             return ret;
 900     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 901         && s->out_format == FMT_MPEG1)
 902         ff_mpeg1_encode_init(s);
 903
 904     /* init q matrix */
 905     for (i = 0; i < 64; i++) {
 906         int j = s->idsp.idct_permutation[i];
 907         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 908             s->mpeg_quant) {
 909             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 910             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 911         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 912             s->intra_matrix[j] =
 913             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 914         } else {
 915             /* mpeg1/2 */
 916             s->chroma_intra_matrix[j] =
 917             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 918             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 919         }
 920         if (s->avctx->intra_matrix)
 921             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 922         if (s->avctx->inter_matrix)
 923             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 924     }
 925
 926     /* precompute matrix */
 927     /* for mjpeg, we do include qscale in the matrix */
 928     if (s->out_format != FMT_MJPEG) {
 929         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 930                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 931                           31, 1);
 932         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 933                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 934                           31, 0);
 935     }
 936
 937     if (ff_rate_control_init(s) < 0)
 938         return -1;
 939
 940 #if FF_API_ERROR_RATE
 941     FF_DISABLE_DEPRECATION_WARNINGS
 942     if (avctx->error_rate)
 943         s->error_rate = avctx->error_rate;
 944     FF_ENABLE_DEPRECATION_WARNINGS;
 945 #endif
 946
 947 #if FF_API_NORMALIZE_AQP
 948     FF_DISABLE_DEPRECATION_WARNINGS
 949     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 950         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 951     FF_ENABLE_DEPRECATION_WARNINGS;
 952 #endif
 953
 954 #if FF_API_MV0
 955     FF_DISABLE_DEPRECATION_WARNINGS
 956     if (avctx->flags & CODEC_FLAG_MV0)
 957         s->mpv_flags |= FF_MPV_FLAG_MV0;
 958     FF_ENABLE_DEPRECATION_WARNINGS
 959 #endif
 960
 961 #if FF_API_MPV_OPT
 962     FF_DISABLE_DEPRECATION_WARNINGS
 963     if (avctx->rc_qsquish != 0.0)
 964         s->rc_qsquish = avctx->rc_qsquish;
 965     if (avctx->rc_qmod_amp != 0.0)
 966         s->rc_qmod_amp = avctx->rc_qmod_amp;
 967     if (avctx->rc_qmod_freq)
 968         s->rc_qmod_freq = avctx->rc_qmod_freq;
 969     if (avctx->rc_buffer_aggressivity != 1.0)
 970         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 971     if (avctx->rc_initial_cplx != 0.0)
 972         s->rc_initial_cplx = avctx->rc_initial_cplx;
 973     if (avctx->lmin)
 974         s->lmin = avctx->lmin;
 975     if (avctx->lmax)
 976         s->lmax = avctx->lmax;
 977
 978     if (avctx->rc_eq) {
 979         av_freep(&s->rc_eq);
 980         s->rc_eq = av_strdup(avctx->rc_eq);
 981         if (!s->rc_eq)
 982             return AVERROR(ENOMEM);
 983     }
 984     FF_ENABLE_DEPRECATION_WARNINGS
 985 #endif
 986
 987     if (avctx->b_frame_strategy == 2) {
 988         for (i = 0; i < s->max_b_frames + 2; i++) {
 989             s->tmp_frames[i] = av_frame_alloc();
 990             if (!s->tmp_frames[i])
 991                 return AVERROR(ENOMEM);
 992
 993             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 994             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 995             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 996
 997             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 998             if (ret < 0)
 999                 return ret;
1000         }
1001     }
1002
1003     return 0;
1004 fail:
1005     ff_mpv_encode_end(avctx);
1006     return AVERROR_UNKNOWN;
1007 }
1008
1009 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1010 {
1011     MpegEncContext *s = avctx->priv_data;
1012     int i;
1013
1014     ff_rate_control_uninit(s);
1015
1016     ff_mpv_common_end(s);
1017     if (CONFIG_MJPEG_ENCODER &&
1018         s->out_format == FMT_MJPEG)
1019         ff_mjpeg_encode_close(s);
1020
1021     av_freep(&avctx->extradata);
1022
1023     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1024         av_frame_free(&s->tmp_frames[i]);
1025
1026     ff_free_picture_tables(&s->new_picture);
1027     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1028
1029     av_freep(&s->avctx->stats_out);
1030     av_freep(&s->ac_stats);
1031
1032     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1033     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1034     s->q_chroma_intra_matrix=   NULL;
1035     s->q_chroma_intra_matrix16= NULL;
1036     av_freep(&s->q_intra_matrix);
1037     av_freep(&s->q_inter_matrix);
1038     av_freep(&s->q_intra_matrix16);
1039     av_freep(&s->q_inter_matrix16);
1040     av_freep(&s->input_picture);
1041     av_freep(&s->reordered_input_picture);
1042     av_freep(&s->dct_offset);
1043
1044     return 0;
1045 }
1046
1047 static int get_sae(uint8_t *src, int ref, int stride)
1048 {
1049     int x,y;
1050     int acc = 0;
1051
1052     for (y = 0; y < 16; y++) {
1053         for (x = 0; x < 16; x++) {
1054             acc += FFABS(src[x + y * stride] - ref);
1055         }
1056     }
1057
1058     return acc;
1059 }
1060
1061 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1062                            uint8_t *ref, int stride)
1063 {
1064     int x, y, w, h;
1065     int acc = 0;
1066
1067     w = s->width  & ~15;
1068     h = s->height & ~15;
1069
1070     for (y = 0; y < h; y += 16) {
1071         for (x = 0; x < w; x += 16) {
1072             int offset = x + y * stride;
1073             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1074                                       stride, 16);
1075             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1076             int sae  = get_sae(src + offset, mean, stride);
1077
1078             acc += sae + 500 < sad;
1079         }
1080     }
1081     return acc;
1082 }
1083
1084 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1085 {
1086     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1087                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1088                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1089                             &s->linesize, &s->uvlinesize);
1090 }
1091
1092 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1093 {
1094     Picture *pic = NULL;
1095     int64_t pts;
1096     int i, display_picture_number = 0, ret;
1097     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1098                                                  (s->low_delay ? 0 : 1);
1099     int direct = 1;
1100
1101     if (pic_arg) {
1102         pts = pic_arg->pts;
1103         display_picture_number = s->input_picture_number++;
1104
1105         if (pts != AV_NOPTS_VALUE) {
1106             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1107                 int64_t last = s->user_specified_pts;
1108
1109                 if (pts <= last) {
1110                     av_log(s->avctx, AV_LOG_ERROR,
1111                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1112                            pts, last);
1113                     return AVERROR(EINVAL);
1114                 }
1115
1116                 if (!s->low_delay && display_picture_number == 1)
1117                     s->dts_delta = pts - last;
1118             }
1119             s->user_specified_pts = pts;
1120         } else {
1121             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1122                 s->user_specified_pts =
1123                 pts = s->user_specified_pts + 1;
1124                 av_log(s->avctx, AV_LOG_INFO,
1125                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1126                        pts);
1127             } else {
1128                 pts = display_picture_number;
1129             }
1130         }
1131     }
1132
1133     if (pic_arg) {
1134         if (!pic_arg->buf[0] ||
1135             pic_arg->linesize[0] != s->linesize ||
1136             pic_arg->linesize[1] != s->uvlinesize ||
1137             pic_arg->linesize[2] != s->uvlinesize)
1138             direct = 0;
1139         if ((s->width & 15) || (s->height & 15))
1140             direct = 0;
1141         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1142             direct = 0;
1143         if (s->linesize & (STRIDE_ALIGN-1))
1144             direct = 0;
1145
1146         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1147                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1148
1149         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1150         if (i < 0)
1151             return i;
1152
1153         pic = &s->picture[i];
1154         pic->reference = 3;
1155
1156         if (direct) {
1157             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1158                 return ret;
1159         }
1160         ret = alloc_picture(s, pic, direct);
1161         if (ret < 0)
1162             return ret;
1163
1164         if (!direct) {
1165             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1166                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1167                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1168                 // empty
1169             } else {
1170                 int h_chroma_shift, v_chroma_shift;
1171                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1172                                                  &h_chroma_shift,
1173                                                  &v_chroma_shift);
1174
1175                 for (i = 0; i < 3; i++) {
1176                     int src_stride = pic_arg->linesize[i];
1177                     int dst_stride = i ? s->uvlinesize : s->linesize;
1178                     int h_shift = i ? h_chroma_shift : 0;
1179                     int v_shift = i ? v_chroma_shift : 0;
1180                     int w = s->width  >> h_shift;
1181                     int h = s->height >> v_shift;
1182                     uint8_t *src = pic_arg->data[i];
1183                     uint8_t *dst = pic->f->data[i];
1184                     int vpad = 16;
1185
1186                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1187                         && !s->progressive_sequence
1188                         && FFALIGN(s->height, 32) - s->height > 16)
1189                         vpad = 32;
1190
1191                     if (!s->avctx->rc_buffer_size)
1192                         dst += INPLACE_OFFSET;
1193
1194                     if (src_stride == dst_stride)
1195                         memcpy(dst, src, src_stride * h);
1196                     else {
1197                         int h2 = h;
1198                         uint8_t *dst2 = dst;
1199                         while (h2--) {
1200                             memcpy(dst2, src, w);
1201                             dst2 += dst_stride;
1202                             src += src_stride;
1203                         }
1204                     }
1205                     if ((s->width & 15) || (s->height & (vpad-1))) {
1206                         s->mpvencdsp.draw_edges(dst, dst_stride,
1207                                                 w, h,
1208                                                 16 >> h_shift,
1209                                                 vpad >> v_shift,
1210                                                 EDGE_BOTTOM);
1211                     }
1212                 }
1213             }
1214         }
1215         ret = av_frame_copy_props(pic->f, pic_arg);
1216         if (ret < 0)
1217             return ret;
1218
1219         pic->f->display_picture_number = display_picture_number;
1220         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1221     }
1222
1223     /* shift buffer entries */
1224     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1225         s->input_picture[i - 1] = s->input_picture[i];
1226
1227     s->input_picture[encoding_delay] = (Picture*) pic;
1228
1229     return 0;
1230 }
1231
1232 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1233 {
1234     int x, y, plane;
1235     int score = 0;
1236     int64_t score64 = 0;
1237
1238     for (plane = 0; plane < 3; plane++) {
1239         const int stride = p->f->linesize[plane];
1240         const int bw = plane ? 1 : 2;
1241         for (y = 0; y < s->mb_height * bw; y++) {
1242             for (x = 0; x < s->mb_width * bw; x++) {
1243                 int off = p->shared ? 0 : 16;
1244                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1245                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1246                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1247
1248                 switch (FFABS(s->avctx->frame_skip_exp)) {
1249                 case 0: score    =  FFMAX(score, v);          break;
1250                 case 1: score   += FFABS(v);                  break;
1251                 case 2: score64 += v * (int64_t)v;                       break;
1252                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1253                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1254                 }
1255             }
1256         }
1257     }
1258     emms_c();
1259
1260     if (score)
1261         score64 = score;
1262     if (s->avctx->frame_skip_exp < 0)
1263         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1264                       -1.0/s->avctx->frame_skip_exp);
1265
1266     if (score64 < s->avctx->frame_skip_threshold)
1267         return 1;
1268     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1269         return 1;
1270     return 0;
1271 }
1272
1273 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1274 {
1275     AVPacket pkt = { 0 };
1276     int ret, got_output;
1277
1278     av_init_packet(&pkt);
1279     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1280     if (ret < 0)
1281         return ret;
1282
1283     ret = pkt.size;
1284     av_free_packet(&pkt);
1285     return ret;
1286 }
1287
1288 static int estimate_best_b_count(MpegEncContext *s)
1289 {
1290     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1291     AVCodecContext *c = avcodec_alloc_context3(NULL);
1292     const int scale = s->avctx->brd_scale;
1293     int i, j, out_size, p_lambda, b_lambda, lambda2;
1294     int64_t best_rd  = INT64_MAX;
1295     int best_b_count = -1;
1296
1297     if (!c)
1298         return AVERROR(ENOMEM);
1299     av_assert0(scale >= 0 && scale <= 3);
1300
1301     //emms_c();
1302     //s->next_picture_ptr->quality;
1303     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1304     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1305     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1306     if (!b_lambda) // FIXME we should do this somewhere else
1307         b_lambda = p_lambda;
1308     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1309                FF_LAMBDA_SHIFT;
1310
1311     c->width        = s->width  >> scale;
1312     c->height       = s->height >> scale;
1313     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1314     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1315     c->mb_decision  = s->avctx->mb_decision;
1316     c->me_cmp       = s->avctx->me_cmp;
1317     c->mb_cmp       = s->avctx->mb_cmp;
1318     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1319     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1320     c->time_base    = s->avctx->time_base;
1321     c->max_b_frames = s->max_b_frames;
1322
1323     if (avcodec_open2(c, codec, NULL) < 0)
1324         return -1;
1325
1326     for (i = 0; i < s->max_b_frames + 2; i++) {
1327         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1328                                                 s->next_picture_ptr;
1329         uint8_t *data[4];
1330
1331         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1332             pre_input = *pre_input_ptr;
1333             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1334
1335             if (!pre_input.shared && i) {
1336                 data[0] += INPLACE_OFFSET;
1337                 data[1] += INPLACE_OFFSET;
1338                 data[2] += INPLACE_OFFSET;
1339             }
1340
1341             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1342                                        s->tmp_frames[i]->linesize[0],
1343                                        data[0],
1344                                        pre_input.f->linesize[0],
1345                                        c->width, c->height);
1346             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1347                                        s->tmp_frames[i]->linesize[1],
1348                                        data[1],
1349                                        pre_input.f->linesize[1],
1350                                        c->width >> 1, c->height >> 1);
1351             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1352                                        s->tmp_frames[i]->linesize[2],
1353                                        data[2],
1354                                        pre_input.f->linesize[2],
1355                                        c->width >> 1, c->height >> 1);
1356         }
1357     }
1358
1359     for (j = 0; j < s->max_b_frames + 1; j++) {
1360         int64_t rd = 0;
1361
1362         if (!s->input_picture[j])
1363             break;
1364
1365         c->error[0] = c->error[1] = c->error[2] = 0;
1366
1367         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1368         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1369
1370         out_size = encode_frame(c, s->tmp_frames[0]);
1371
1372         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1373
1374         for (i = 0; i < s->max_b_frames + 1; i++) {
1375             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1376
1377             s->tmp_frames[i + 1]->pict_type = is_p ?
1378                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1379             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1380
1381             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1382
1383             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1384         }
1385
1386         /* get the delayed frames */
1387         while (out_size) {
1388             out_size = encode_frame(c, NULL);
1389             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1390         }
1391
1392         rd += c->error[0] + c->error[1] + c->error[2];
1393
1394         if (rd < best_rd) {
1395             best_rd = rd;
1396             best_b_count = j;
1397         }
1398     }
1399
1400     avcodec_close(c);
1401     av_freep(&c);
1402
1403     return best_b_count;
1404 }
1405
1406 static int select_input_picture(MpegEncContext *s)
1407 {
1408     int i, ret;
1409
1410     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1411         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1412     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1413
1414     /* set next picture type & ordering */
1415     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1416         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1417             if (s->picture_in_gop_number < s->gop_size &&
1418                 s->next_picture_ptr &&
1419                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1420                 // FIXME check that te gop check above is +-1 correct
1421                 av_frame_unref(s->input_picture[0]->f);
1422
1423                 ff_vbv_update(s, 0);
1424
1425                 goto no_output_pic;
1426             }
1427         }
1428
1429         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1430             !s->next_picture_ptr || s->intra_only) {
1431             s->reordered_input_picture[0] = s->input_picture[0];
1432             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1433             s->reordered_input_picture[0]->f->coded_picture_number =
1434                 s->coded_picture_number++;
1435         } else {
1436             int b_frames;
1437
1438             if (s->avctx->flags & CODEC_FLAG_PASS2) {
1439                 for (i = 0; i < s->max_b_frames + 1; i++) {
1440                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1441
1442                     if (pict_num >= s->rc_context.num_entries)
1443                         break;
1444                     if (!s->input_picture[i]) {
1445                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1446                         break;
1447                     }
1448
1449                     s->input_picture[i]->f->pict_type =
1450                         s->rc_context.entry[pict_num].new_pict_type;
1451                 }
1452             }
1453
1454             if (s->avctx->b_frame_strategy == 0) {
1455                 b_frames = s->max_b_frames;
1456                 while (b_frames && !s->input_picture[b_frames])
1457                     b_frames--;
1458             } else if (s->avctx->b_frame_strategy == 1) {
1459                 for (i = 1; i < s->max_b_frames + 1; i++) {
1460                     if (s->input_picture[i] &&
1461                         s->input_picture[i]->b_frame_score == 0) {
1462                         s->input_picture[i]->b_frame_score =
1463                             get_intra_count(s,
1464                                             s->input_picture[i    ]->f->data[0],
1465                                             s->input_picture[i - 1]->f->data[0],
1466                                             s->linesize) + 1;
1467                     }
1468                 }
1469                 for (i = 0; i < s->max_b_frames + 1; i++) {
1470                     if (!s->input_picture[i] ||
1471                         s->input_picture[i]->b_frame_score - 1 >
1472                             s->mb_num / s->avctx->b_sensitivity)
1473                         break;
1474                 }
1475
1476                 b_frames = FFMAX(0, i - 1);
1477
1478                 /* reset scores */
1479                 for (i = 0; i < b_frames + 1; i++) {
1480                     s->input_picture[i]->b_frame_score = 0;
1481                 }
1482             } else if (s->avctx->b_frame_strategy == 2) {
1483                 b_frames = estimate_best_b_count(s);
1484             } else {
1485                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1486                 b_frames = 0;
1487             }
1488
1489             emms_c();
1490
1491             for (i = b_frames - 1; i >= 0; i--) {
1492                 int type = s->input_picture[i]->f->pict_type;
1493                 if (type && type != AV_PICTURE_TYPE_B)
1494                     b_frames = i;
1495             }
1496             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1497                 b_frames == s->max_b_frames) {
1498                 av_log(s->avctx, AV_LOG_ERROR,
1499                        "warning, too many b frames in a row\n");
1500             }
1501
1502             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1503                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1504                     s->gop_size > s->picture_in_gop_number) {
1505                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1506                 } else {
1507                     if (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)
1508                         b_frames = 0;
1509                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1510                 }
1511             }
1512
1513             if ((s->avctx->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1514                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1515                 b_frames--;
1516
1517             s->reordered_input_picture[0] = s->input_picture[b_frames];
1518             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1519                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1520             s->reordered_input_picture[0]->f->coded_picture_number =
1521                 s->coded_picture_number++;
1522             for (i = 0; i < b_frames; i++) {
1523                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1524                 s->reordered_input_picture[i + 1]->f->pict_type =
1525                     AV_PICTURE_TYPE_B;
1526                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1527                     s->coded_picture_number++;
1528             }
1529         }
1530     }
1531 no_output_pic:
1532     if (s->reordered_input_picture[0]) {
1533         s->reordered_input_picture[0]->reference =
1534            s->reordered_input_picture[0]->f->pict_type !=
1535                AV_PICTURE_TYPE_B ? 3 : 0;
1536
1537         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1538         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1539             return ret;
1540
1541         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1542             // input is a shared pix, so we can't modifiy it -> alloc a new
1543             // one & ensure that the shared one is reuseable
1544
1545             Picture *pic;
1546             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1547             if (i < 0)
1548                 return i;
1549             pic = &s->picture[i];
1550
1551             pic->reference = s->reordered_input_picture[0]->reference;
1552             if (alloc_picture(s, pic, 0) < 0) {
1553                 return -1;
1554             }
1555
1556             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1557             if (ret < 0)
1558                 return ret;
1559
1560             /* mark us unused / free shared pic */
1561             av_frame_unref(s->reordered_input_picture[0]->f);
1562             s->reordered_input_picture[0]->shared = 0;
1563
1564             s->current_picture_ptr = pic;
1565         } else {
1566             // input is not a shared pix -> reuse buffer for current_pix
1567             s->current_picture_ptr = s->reordered_input_picture[0];
1568             for (i = 0; i < 4; i++) {
1569                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1570             }
1571         }
1572         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1573         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1574                                        s->current_picture_ptr)) < 0)
1575             return ret;
1576
1577         s->picture_number = s->new_picture.f->display_picture_number;
1578     } else {
1579         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1580     }
1581     return 0;
1582 }
1583
1584 static void frame_end(MpegEncContext *s)
1585 {
1586     if (s->unrestricted_mv &&
1587         s->current_picture.reference &&
1588         !s->intra_only) {
1589         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1590         int hshift = desc->log2_chroma_w;
1591         int vshift = desc->log2_chroma_h;
1592         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1593                                 s->current_picture.f->linesize[0],
1594                                 s->h_edge_pos, s->v_edge_pos,
1595                                 EDGE_WIDTH, EDGE_WIDTH,
1596                                 EDGE_TOP | EDGE_BOTTOM);
1597         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1598                                 s->current_picture.f->linesize[1],
1599                                 s->h_edge_pos >> hshift,
1600                                 s->v_edge_pos >> vshift,
1601                                 EDGE_WIDTH >> hshift,
1602                                 EDGE_WIDTH >> vshift,
1603                                 EDGE_TOP | EDGE_BOTTOM);
1604         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1605                                 s->current_picture.f->linesize[2],
1606                                 s->h_edge_pos >> hshift,
1607                                 s->v_edge_pos >> vshift,
1608                                 EDGE_WIDTH >> hshift,
1609                                 EDGE_WIDTH >> vshift,
1610                                 EDGE_TOP | EDGE_BOTTOM);
1611     }
1612
1613     emms_c();
1614
1615     s->last_pict_type                 = s->pict_type;
1616     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1617     if (s->pict_type!= AV_PICTURE_TYPE_B)
1618         s->last_non_b_pict_type = s->pict_type;
1619
1620     s->avctx->coded_frame = s->current_picture_ptr->f;
1621
1622 }
1623
1624 static void update_noise_reduction(MpegEncContext *s)
1625 {
1626     int intra, i;
1627
1628     for (intra = 0; intra < 2; intra++) {
1629         if (s->dct_count[intra] > (1 << 16)) {
1630             for (i = 0; i < 64; i++) {
1631                 s->dct_error_sum[intra][i] >>= 1;
1632             }
1633             s->dct_count[intra] >>= 1;
1634         }
1635
1636         for (i = 0; i < 64; i++) {
1637             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1638                                        s->dct_count[intra] +
1639                                        s->dct_error_sum[intra][i] / 2) /
1640                                       (s->dct_error_sum[intra][i] + 1);
1641         }
1642     }
1643 }
1644
1645 static int frame_start(MpegEncContext *s)
1646 {
1647     int ret;
1648
1649     /* mark & release old frames */
1650     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1651         s->last_picture_ptr != s->next_picture_ptr &&
1652         s->last_picture_ptr->f->buf[0]) {
1653         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1654     }
1655
1656     s->current_picture_ptr->f->pict_type = s->pict_type;
1657     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1658
1659     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1660     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1661                                    s->current_picture_ptr)) < 0)
1662         return ret;
1663
1664     if (s->pict_type != AV_PICTURE_TYPE_B) {
1665         s->last_picture_ptr = s->next_picture_ptr;
1666         if (!s->droppable)
1667             s->next_picture_ptr = s->current_picture_ptr;
1668     }
1669
1670     if (s->last_picture_ptr) {
1671         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1672         if (s->last_picture_ptr->f->buf[0] &&
1673             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1674                                        s->last_picture_ptr)) < 0)
1675             return ret;
1676     }
1677     if (s->next_picture_ptr) {
1678         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1679         if (s->next_picture_ptr->f->buf[0] &&
1680             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1681                                        s->next_picture_ptr)) < 0)
1682             return ret;
1683     }
1684
1685     if (s->picture_structure!= PICT_FRAME) {
1686         int i;
1687         for (i = 0; i < 4; i++) {
1688             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1689                 s->current_picture.f->data[i] +=
1690                     s->current_picture.f->linesize[i];
1691             }
1692             s->current_picture.f->linesize[i] *= 2;
1693             s->last_picture.f->linesize[i]    *= 2;
1694             s->next_picture.f->linesize[i]    *= 2;
1695         }
1696     }
1697
1698     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1699         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1700         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1701     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1702         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1703         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1704     } else {
1705         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1706         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1707     }
1708
1709     if (s->dct_error_sum) {
1710         av_assert2(s->avctx->noise_reduction && s->encoding);
1711         update_noise_reduction(s);
1712     }
1713
1714     return 0;
1715 }
1716
1717 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1718                           const AVFrame *pic_arg, int *got_packet)
1719 {
1720     MpegEncContext *s = avctx->priv_data;
1721     int i, stuffing_count, ret;
1722     int context_count = s->slice_context_count;
1723
1724     s->picture_in_gop_number++;
1725
1726     if (load_input_picture(s, pic_arg) < 0)
1727         return -1;
1728
1729     if (select_input_picture(s) < 0) {
1730         return -1;
1731     }
1732
1733     /* output? */
1734     if (s->new_picture.f->data[0]) {
1735         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1736         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1737                                               :
1738                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1739         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1740             return ret;
1741         if (s->mb_info) {
1742             s->mb_info_ptr = av_packet_new_side_data(pkt,
1743                                  AV_PKT_DATA_H263_MB_INFO,
1744                                  s->mb_width*s->mb_height*12);
1745             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1746         }
1747
1748         for (i = 0; i < context_count; i++) {
1749             int start_y = s->thread_context[i]->start_mb_y;
1750             int   end_y = s->thread_context[i]->  end_mb_y;
1751             int h       = s->mb_height;
1752             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1753             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1754
1755             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1756         }
1757
1758         s->pict_type = s->new_picture.f->pict_type;
1759         //emms_c();
1760         ret = frame_start(s);
1761         if (ret < 0)
1762             return ret;
1763 vbv_retry:
1764         ret = encode_picture(s, s->picture_number);
1765         if (growing_buffer) {
1766             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1767             pkt->data = s->pb.buf;
1768             pkt->size = avctx->internal->byte_buffer_size;
1769         }
1770         if (ret < 0)
1771             return -1;
1772
1773         avctx->header_bits = s->header_bits;
1774         avctx->mv_bits     = s->mv_bits;
1775         avctx->misc_bits   = s->misc_bits;
1776         avctx->i_tex_bits  = s->i_tex_bits;
1777         avctx->p_tex_bits  = s->p_tex_bits;
1778         avctx->i_count     = s->i_count;
1779         // FIXME f/b_count in avctx
1780         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1781         avctx->skip_count  = s->skip_count;
1782
1783         frame_end(s);
1784
1785         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1786             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1787
1788         if (avctx->rc_buffer_size) {
1789             RateControlContext *rcc = &s->rc_context;
1790             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1791
1792             if (put_bits_count(&s->pb) > max_size &&
1793                 s->lambda < s->lmax) {
1794                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1795                                        (s->qscale + 1) / s->qscale);
1796                 if (s->adaptive_quant) {
1797                     int i;
1798                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1799                         s->lambda_table[i] =
1800                             FFMAX(s->lambda_table[i] + 1,
1801                                   s->lambda_table[i] * (s->qscale + 1) /
1802                                   s->qscale);
1803                 }
1804                 s->mb_skipped = 0;        // done in frame_start()
1805                 // done in encode_picture() so we must undo it
1806                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1807                     if (s->flipflop_rounding          ||
1808                         s->codec_id == AV_CODEC_ID_H263P ||
1809                         s->codec_id == AV_CODEC_ID_MPEG4)
1810                         s->no_rounding ^= 1;
1811                 }
1812                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1813                     s->time_base       = s->last_time_base;
1814                     s->last_non_b_time = s->time - s->pp_time;
1815                 }
1816                 for (i = 0; i < context_count; i++) {
1817                     PutBitContext *pb = &s->thread_context[i]->pb;
1818                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1819                 }
1820                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1821                 goto vbv_retry;
1822             }
1823
1824             av_assert0(s->avctx->rc_max_rate);
1825         }
1826
1827         if (s->avctx->flags & CODEC_FLAG_PASS1)
1828             ff_write_pass1_stats(s);
1829
1830         for (i = 0; i < 4; i++) {
1831             s->current_picture_ptr->f->error[i] =
1832             s->current_picture.f->error[i] =
1833                 s->current_picture.error[i];
1834             avctx->error[i] += s->current_picture_ptr->f->error[i];
1835         }
1836
1837         if (s->avctx->flags & CODEC_FLAG_PASS1)
1838             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1839                    avctx->i_tex_bits + avctx->p_tex_bits ==
1840                        put_bits_count(&s->pb));
1841         flush_put_bits(&s->pb);
1842         s->frame_bits  = put_bits_count(&s->pb);
1843
1844         stuffing_count = ff_vbv_update(s, s->frame_bits);
1845         s->stuffing_bits = 8*stuffing_count;
1846         if (stuffing_count) {
1847             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1848                     stuffing_count + 50) {
1849                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1850                 return -1;
1851             }
1852
1853             switch (s->codec_id) {
1854             case AV_CODEC_ID_MPEG1VIDEO:
1855             case AV_CODEC_ID_MPEG2VIDEO:
1856                 while (stuffing_count--) {
1857                     put_bits(&s->pb, 8, 0);
1858                 }
1859             break;
1860             case AV_CODEC_ID_MPEG4:
1861                 put_bits(&s->pb, 16, 0);
1862                 put_bits(&s->pb, 16, 0x1C3);
1863                 stuffing_count -= 4;
1864                 while (stuffing_count--) {
1865                     put_bits(&s->pb, 8, 0xFF);
1866                 }
1867             break;
1868             default:
1869                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1870             }
1871             flush_put_bits(&s->pb);
1872             s->frame_bits  = put_bits_count(&s->pb);
1873         }
1874
1875         /* update mpeg1/2 vbv_delay for CBR */
1876         if (s->avctx->rc_max_rate                          &&
1877             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1878             s->out_format == FMT_MPEG1                     &&
1879             90000LL * (avctx->rc_buffer_size - 1) <=
1880                 s->avctx->rc_max_rate * 0xFFFFLL) {
1881             int vbv_delay, min_delay;
1882             double inbits  = s->avctx->rc_max_rate *
1883                              av_q2d(s->avctx->time_base);
1884             int    minbits = s->frame_bits - 8 *
1885                              (s->vbv_delay_ptr - s->pb.buf - 1);
1886             double bits    = s->rc_context.buffer_index + minbits - inbits;
1887
1888             if (bits < 0)
1889                 av_log(s->avctx, AV_LOG_ERROR,
1890                        "Internal error, negative bits\n");
1891
1892             assert(s->repeat_first_field == 0);
1893
1894             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1895             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1896                         s->avctx->rc_max_rate;
1897
1898             vbv_delay = FFMAX(vbv_delay, min_delay);
1899
1900             av_assert0(vbv_delay < 0xFFFF);
1901
1902             s->vbv_delay_ptr[0] &= 0xF8;
1903             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1904             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1905             s->vbv_delay_ptr[2] &= 0x07;
1906             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1907             avctx->vbv_delay     = vbv_delay * 300;
1908         }
1909         s->total_bits     += s->frame_bits;
1910         avctx->frame_bits  = s->frame_bits;
1911
1912         pkt->pts = s->current_picture.f->pts;
1913         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1914             if (!s->current_picture.f->coded_picture_number)
1915                 pkt->dts = pkt->pts - s->dts_delta;
1916             else
1917                 pkt->dts = s->reordered_pts;
1918             s->reordered_pts = pkt->pts;
1919         } else
1920             pkt->dts = pkt->pts;
1921         if (s->current_picture.f->key_frame)
1922             pkt->flags |= AV_PKT_FLAG_KEY;
1923         if (s->mb_info)
1924             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1925     } else {
1926         s->frame_bits = 0;
1927     }
1928
1929     /* release non-reference frames */
1930     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1931         if (!s->picture[i].reference)
1932             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1933     }
1934
1935     av_assert1((s->frame_bits & 7) == 0);
1936
1937     pkt->size = s->frame_bits / 8;
1938     *got_packet = !!pkt->size;
1939     return 0;
1940 }
1941
1942 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1943                                                 int n, int threshold)
1944 {
1945     static const char tab[64] = {
1946         3, 2, 2, 1, 1, 1, 1, 1,
1947         1, 1, 1, 1, 1, 1, 1, 1,
1948         1, 1, 1, 1, 1, 1, 1, 1,
1949         0, 0, 0, 0, 0, 0, 0, 0,
1950         0, 0, 0, 0, 0, 0, 0, 0,
1951         0, 0, 0, 0, 0, 0, 0, 0,
1952         0, 0, 0, 0, 0, 0, 0, 0,
1953         0, 0, 0, 0, 0, 0, 0, 0
1954     };
1955     int score = 0;
1956     int run = 0;
1957     int i;
1958     int16_t *block = s->block[n];
1959     const int last_index = s->block_last_index[n];
1960     int skip_dc;
1961
1962     if (threshold < 0) {
1963         skip_dc = 0;
1964         threshold = -threshold;
1965     } else
1966         skip_dc = 1;
1967
1968     /* Are all we could set to zero already zero? */
1969     if (last_index <= skip_dc - 1)
1970         return;
1971
1972     for (i = 0; i <= last_index; i++) {
1973         const int j = s->intra_scantable.permutated[i];
1974         const int level = FFABS(block[j]);
1975         if (level == 1) {
1976             if (skip_dc && i == 0)
1977                 continue;
1978             score += tab[run];
1979             run = 0;
1980         } else if (level > 1) {
1981             return;
1982         } else {
1983             run++;
1984         }
1985     }
1986     if (score >= threshold)
1987         return;
1988     for (i = skip_dc; i <= last_index; i++) {
1989         const int j = s->intra_scantable.permutated[i];
1990         block[j] = 0;
1991     }
1992     if (block[0])
1993         s->block_last_index[n] = 0;
1994     else
1995         s->block_last_index[n] = -1;
1996 }
1997
1998 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1999                                int last_index)
2000 {
2001     int i;
2002     const int maxlevel = s->max_qcoeff;
2003     const int minlevel = s->min_qcoeff;
2004     int overflow = 0;
2005
2006     if (s->mb_intra) {
2007         i = 1; // skip clipping of intra dc
2008     } else
2009         i = 0;
2010
2011     for (; i <= last_index; i++) {
2012         const int j = s->intra_scantable.permutated[i];
2013         int level = block[j];
2014
2015         if (level > maxlevel) {
2016             level = maxlevel;
2017             overflow++;
2018         } else if (level < minlevel) {
2019             level = minlevel;
2020             overflow++;
2021         }
2022
2023         block[j] = level;
2024     }
2025
2026     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2027         av_log(s->avctx, AV_LOG_INFO,
2028                "warning, clipping %d dct coefficients to %d..%d\n",
2029                overflow, minlevel, maxlevel);
2030 }
2031
2032 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2033 {
2034     int x, y;
2035     // FIXME optimize
2036     for (y = 0; y < 8; y++) {
2037         for (x = 0; x < 8; x++) {
2038             int x2, y2;
2039             int sum = 0;
2040             int sqr = 0;
2041             int count = 0;
2042
2043             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2044                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2045                     int v = ptr[x2 + y2 * stride];
2046                     sum += v;
2047                     sqr += v * v;
2048                     count++;
2049                 }
2050             }
2051             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2052         }
2053     }
2054 }
2055
2056 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2057                                                 int motion_x, int motion_y,
2058                                                 int mb_block_height,
2059                                                 int mb_block_width,
2060                                                 int mb_block_count)
2061 {
2062     int16_t weight[12][64];
2063     int16_t orig[12][64];
2064     const int mb_x = s->mb_x;
2065     const int mb_y = s->mb_y;
2066     int i;
2067     int skip_dct[12];
2068     int dct_offset = s->linesize * 8; // default for progressive frames
2069     int uv_dct_offset = s->uvlinesize * 8;
2070     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2071     ptrdiff_t wrap_y, wrap_c;
2072
2073     for (i = 0; i < mb_block_count; i++)
2074         skip_dct[i] = s->skipdct;
2075
2076     if (s->adaptive_quant) {
2077         const int last_qp = s->qscale;
2078         const int mb_xy = mb_x + mb_y * s->mb_stride;
2079
2080         s->lambda = s->lambda_table[mb_xy];
2081         update_qscale(s);
2082
2083         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2084             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2085             s->dquant = s->qscale - last_qp;
2086
2087             if (s->out_format == FMT_H263) {
2088                 s->dquant = av_clip(s->dquant, -2, 2);
2089
2090                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2091                     if (!s->mb_intra) {
2092                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2093                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2094                                 s->dquant = 0;
2095                         }
2096                         if (s->mv_type == MV_TYPE_8X8)
2097                             s->dquant = 0;
2098                     }
2099                 }
2100             }
2101         }
2102         ff_set_qscale(s, last_qp + s->dquant);
2103     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2104         ff_set_qscale(s, s->qscale + s->dquant);
2105
2106     wrap_y = s->linesize;
2107     wrap_c = s->uvlinesize;
2108     ptr_y  = s->new_picture.f->data[0] +
2109              (mb_y * 16 * wrap_y)              + mb_x * 16;
2110     ptr_cb = s->new_picture.f->data[1] +
2111              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2112     ptr_cr = s->new_picture.f->data[2] +
2113              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2114
2115     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2116         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2117         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2118         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2119         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2120                                  wrap_y, wrap_y,
2121                                  16, 16, mb_x * 16, mb_y * 16,
2122                                  s->width, s->height);
2123         ptr_y = ebuf;
2124         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2125                                  wrap_c, wrap_c,
2126                                  mb_block_width, mb_block_height,
2127                                  mb_x * mb_block_width, mb_y * mb_block_height,
2128                                  cw, ch);
2129         ptr_cb = ebuf + 16 * wrap_y;
2130         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2131                                  wrap_c, wrap_c,
2132                                  mb_block_width, mb_block_height,
2133                                  mb_x * mb_block_width, mb_y * mb_block_height,
2134                                  cw, ch);
2135         ptr_cr = ebuf + 16 * wrap_y + 16;
2136     }
2137
2138     if (s->mb_intra) {
2139         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2140             int progressive_score, interlaced_score;
2141
2142             s->interlaced_dct = 0;
2143             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2144                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2145                                                      NULL, wrap_y, 8) - 400;
2146
2147             if (progressive_score > 0) {
2148                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2149                                                         NULL, wrap_y * 2, 8) +
2150                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2151                                                         NULL, wrap_y * 2, 8);
2152                 if (progressive_score > interlaced_score) {
2153                     s->interlaced_dct = 1;
2154
2155                     dct_offset = wrap_y;
2156                     uv_dct_offset = wrap_c;
2157                     wrap_y <<= 1;
2158                     if (s->chroma_format == CHROMA_422 ||
2159                         s->chroma_format == CHROMA_444)
2160                         wrap_c <<= 1;
2161                 }
2162             }
2163         }
2164
2165         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2166         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2167         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2168         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2169
2170         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2171             skip_dct[4] = 1;
2172             skip_dct[5] = 1;
2173         } else {
2174             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2175             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2176             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2177                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2178                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2179             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2180                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2181                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2182                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2183                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2184                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2185                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2186             }
2187         }
2188     } else {
2189         op_pixels_func (*op_pix)[4];
2190         qpel_mc_func (*op_qpix)[16];
2191         uint8_t *dest_y, *dest_cb, *dest_cr;
2192
2193         dest_y  = s->dest[0];
2194         dest_cb = s->dest[1];
2195         dest_cr = s->dest[2];
2196
2197         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2198             op_pix  = s->hdsp.put_pixels_tab;
2199             op_qpix = s->qdsp.put_qpel_pixels_tab;
2200         } else {
2201             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2202             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2203         }
2204
2205         if (s->mv_dir & MV_DIR_FORWARD) {
2206             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2207                           s->last_picture.f->data,
2208                           op_pix, op_qpix);
2209             op_pix  = s->hdsp.avg_pixels_tab;
2210             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2211         }
2212         if (s->mv_dir & MV_DIR_BACKWARD) {
2213             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2214                           s->next_picture.f->data,
2215                           op_pix, op_qpix);
2216         }
2217
2218         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2219             int progressive_score, interlaced_score;
2220
2221             s->interlaced_dct = 0;
2222             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2223                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2224                                                      ptr_y + wrap_y * 8,
2225                                                      wrap_y, 8) - 400;
2226
2227             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2228                 progressive_score -= 400;
2229
2230             if (progressive_score > 0) {
2231                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2232                                                         wrap_y * 2, 8) +
2233                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2234                                                         ptr_y + wrap_y,
2235                                                         wrap_y * 2, 8);
2236
2237                 if (progressive_score > interlaced_score) {
2238                     s->interlaced_dct = 1;
2239
2240                     dct_offset = wrap_y;
2241                     uv_dct_offset = wrap_c;
2242                     wrap_y <<= 1;
2243                     if (s->chroma_format == CHROMA_422)
2244                         wrap_c <<= 1;
2245                 }
2246             }
2247         }
2248
2249         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2250         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2251         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2252                             dest_y + dct_offset, wrap_y);
2253         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2254                             dest_y + dct_offset + 8, wrap_y);
2255
2256         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2257             skip_dct[4] = 1;
2258             skip_dct[5] = 1;
2259         } else {
2260             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2261             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2262             if (!s->chroma_y_shift) { /* 422 */
2263                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2264                                     dest_cb + uv_dct_offset, wrap_c);
2265                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2266                                     dest_cr + uv_dct_offset, wrap_c);
2267             }
2268         }
2269         /* pre quantization */
2270         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2271                 2 * s->qscale * s->qscale) {
2272             // FIXME optimize
2273             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2274                 skip_dct[0] = 1;
2275             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2276                 skip_dct[1] = 1;
2277             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2278                                wrap_y, 8) < 20 * s->qscale)
2279                 skip_dct[2] = 1;
2280             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2281                                wrap_y, 8) < 20 * s->qscale)
2282                 skip_dct[3] = 1;
2283             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2284                 skip_dct[4] = 1;
2285             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2286                 skip_dct[5] = 1;
2287             if (!s->chroma_y_shift) { /* 422 */
2288                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2289                                    dest_cb + uv_dct_offset,
2290                                    wrap_c, 8) < 20 * s->qscale)
2291                     skip_dct[6] = 1;
2292                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2293                                    dest_cr + uv_dct_offset,
2294                                    wrap_c, 8) < 20 * s->qscale)
2295                     skip_dct[7] = 1;
2296             }
2297         }
2298     }
2299
2300     if (s->quantizer_noise_shaping) {
2301         if (!skip_dct[0])
2302             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2303         if (!skip_dct[1])
2304             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2305         if (!skip_dct[2])
2306             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2307         if (!skip_dct[3])
2308             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2309         if (!skip_dct[4])
2310             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2311         if (!skip_dct[5])
2312             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2313         if (!s->chroma_y_shift) { /* 422 */
2314             if (!skip_dct[6])
2315                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2316                                   wrap_c);
2317             if (!skip_dct[7])
2318                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2319                                   wrap_c);
2320         }
2321         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2322     }
2323
2324     /* DCT & quantize */
2325     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2326     {
2327         for (i = 0; i < mb_block_count; i++) {
2328             if (!skip_dct[i]) {
2329                 int overflow;
2330                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2331                 // FIXME we could decide to change to quantizer instead of
2332                 // clipping
2333                 // JS: I don't think that would be a good idea it could lower
2334                 //     quality instead of improve it. Just INTRADC clipping
2335                 //     deserves changes in quantizer
2336                 if (overflow)
2337                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2338             } else
2339                 s->block_last_index[i] = -1;
2340         }
2341         if (s->quantizer_noise_shaping) {
2342             for (i = 0; i < mb_block_count; i++) {
2343                 if (!skip_dct[i]) {
2344                     s->block_last_index[i] =
2345                         dct_quantize_refine(s, s->block[i], weight[i],
2346                                             orig[i], i, s->qscale);
2347                 }
2348             }
2349         }
2350
2351         if (s->luma_elim_threshold && !s->mb_intra)
2352             for (i = 0; i < 4; i++)
2353                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2354         if (s->chroma_elim_threshold && !s->mb_intra)
2355             for (i = 4; i < mb_block_count; i++)
2356                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2357
2358         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2359             for (i = 0; i < mb_block_count; i++) {
2360                 if (s->block_last_index[i] == -1)
2361                     s->coded_score[i] = INT_MAX / 256;
2362             }
2363         }
2364     }
2365
2366     if ((s->avctx->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2367         s->block_last_index[4] =
2368         s->block_last_index[5] = 0;
2369         s->block[4][0] =
2370         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2371         if (!s->chroma_y_shift) { /* 422 / 444 */
2372             for (i=6; i<12; i++) {
2373                 s->block_last_index[i] = 0;
2374                 s->block[i][0] = s->block[4][0];
2375             }
2376         }
2377     }
2378
2379     // non c quantize code returns incorrect block_last_index FIXME
2380     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2381         for (i = 0; i < mb_block_count; i++) {
2382             int j;
2383             if (s->block_last_index[i] > 0) {
2384                 for (j = 63; j > 0; j--) {
2385                     if (s->block[i][s->intra_scantable.permutated[j]])
2386                         break;
2387                 }
2388                 s->block_last_index[i] = j;
2389             }
2390         }
2391     }
2392
2393     /* huffman encode */
2394     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2395     case AV_CODEC_ID_MPEG1VIDEO:
2396     case AV_CODEC_ID_MPEG2VIDEO:
2397         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2398             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2399         break;
2400     case AV_CODEC_ID_MPEG4:
2401         if (CONFIG_MPEG4_ENCODER)
2402             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2403         break;
2404     case AV_CODEC_ID_MSMPEG4V2:
2405     case AV_CODEC_ID_MSMPEG4V3:
2406     case AV_CODEC_ID_WMV1:
2407         if (CONFIG_MSMPEG4_ENCODER)
2408             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2409         break;
2410     case AV_CODEC_ID_WMV2:
2411         if (CONFIG_WMV2_ENCODER)
2412             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2413         break;
2414     case AV_CODEC_ID_H261:
2415         if (CONFIG_H261_ENCODER)
2416             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2417         break;
2418     case AV_CODEC_ID_H263:
2419     case AV_CODEC_ID_H263P:
2420     case AV_CODEC_ID_FLV1:
2421     case AV_CODEC_ID_RV10:
2422     case AV_CODEC_ID_RV20:
2423         if (CONFIG_H263_ENCODER)
2424             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2425         break;
2426     case AV_CODEC_ID_MJPEG:
2427     case AV_CODEC_ID_AMV:
2428         if (CONFIG_MJPEG_ENCODER)
2429             ff_mjpeg_encode_mb(s, s->block);
2430         break;
2431     default:
2432         av_assert1(0);
2433     }
2434 }
2435
2436 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2437 {
2438     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2439     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2440     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2441 }
2442
2443 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2444     int i;
2445
2446     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2447
2448     /* mpeg1 */
2449     d->mb_skip_run= s->mb_skip_run;
2450     for(i=0; i<3; i++)
2451         d->last_dc[i] = s->last_dc[i];
2452
2453     /* statistics */
2454     d->mv_bits= s->mv_bits;
2455     d->i_tex_bits= s->i_tex_bits;
2456     d->p_tex_bits= s->p_tex_bits;
2457     d->i_count= s->i_count;
2458     d->f_count= s->f_count;
2459     d->b_count= s->b_count;
2460     d->skip_count= s->skip_count;
2461     d->misc_bits= s->misc_bits;
2462     d->last_bits= 0;
2463
2464     d->mb_skipped= 0;
2465     d->qscale= s->qscale;
2466     d->dquant= s->dquant;
2467
2468     d->esc3_level_length= s->esc3_level_length;
2469 }
2470
2471 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2472     int i;
2473
2474     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2475     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2476
2477     /* mpeg1 */
2478     d->mb_skip_run= s->mb_skip_run;
2479     for(i=0; i<3; i++)
2480         d->last_dc[i] = s->last_dc[i];
2481
2482     /* statistics */
2483     d->mv_bits= s->mv_bits;
2484     d->i_tex_bits= s->i_tex_bits;
2485     d->p_tex_bits= s->p_tex_bits;
2486     d->i_count= s->i_count;
2487     d->f_count= s->f_count;
2488     d->b_count= s->b_count;
2489     d->skip_count= s->skip_count;
2490     d->misc_bits= s->misc_bits;
2491
2492     d->mb_intra= s->mb_intra;
2493     d->mb_skipped= s->mb_skipped;
2494     d->mv_type= s->mv_type;
2495     d->mv_dir= s->mv_dir;
2496     d->pb= s->pb;
2497     if(s->data_partitioning){
2498         d->pb2= s->pb2;
2499         d->tex_pb= s->tex_pb;
2500     }
2501     d->block= s->block;
2502     for(i=0; i<8; i++)
2503         d->block_last_index[i]= s->block_last_index[i];
2504     d->interlaced_dct= s->interlaced_dct;
2505     d->qscale= s->qscale;
2506
2507     d->esc3_level_length= s->esc3_level_length;
2508 }
2509
2510 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2511                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2512                            int *dmin, int *next_block, int motion_x, int motion_y)
2513 {
2514     int score;
2515     uint8_t *dest_backup[3];
2516
2517     copy_context_before_encode(s, backup, type);
2518
2519     s->block= s->blocks[*next_block];
2520     s->pb= pb[*next_block];
2521     if(s->data_partitioning){
2522         s->pb2   = pb2   [*next_block];
2523         s->tex_pb= tex_pb[*next_block];
2524     }
2525
2526     if(*next_block){
2527         memcpy(dest_backup, s->dest, sizeof(s->dest));
2528         s->dest[0] = s->sc.rd_scratchpad;
2529         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2530         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2531         av_assert0(s->linesize >= 32); //FIXME
2532     }
2533
2534     encode_mb(s, motion_x, motion_y);
2535
2536     score= put_bits_count(&s->pb);
2537     if(s->data_partitioning){
2538         score+= put_bits_count(&s->pb2);
2539         score+= put_bits_count(&s->tex_pb);
2540     }
2541
2542     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2543         ff_mpv_decode_mb(s, s->block);
2544
2545         score *= s->lambda2;
2546         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2547     }
2548
2549     if(*next_block){
2550         memcpy(s->dest, dest_backup, sizeof(s->dest));
2551     }
2552
2553     if(score<*dmin){
2554         *dmin= score;
2555         *next_block^=1;
2556
2557         copy_context_after_encode(best, s, type);
2558     }
2559 }
2560
2561 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2562     uint32_t *sq = ff_square_tab + 256;
2563     int acc=0;
2564     int x,y;
2565
2566     if(w==16 && h==16)
2567         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2568     else if(w==8 && h==8)
2569         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2570
2571     for(y=0; y<h; y++){
2572         for(x=0; x<w; x++){
2573             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2574         }
2575     }
2576
2577     av_assert2(acc>=0);
2578
2579     return acc;
2580 }
2581
2582 static int sse_mb(MpegEncContext *s){
2583     int w= 16;
2584     int h= 16;
2585
2586     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2587     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2588
2589     if(w==16 && h==16)
2590       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2591         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2592                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2593                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2594       }else{
2595         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2596                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2597                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2598       }
2599     else
2600         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2601                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2602                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2603 }
2604
2605 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2606     MpegEncContext *s= *(void**)arg;
2607
2608
2609     s->me.pre_pass=1;
2610     s->me.dia_size= s->avctx->pre_dia_size;
2611     s->first_slice_line=1;
2612     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2613         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2614             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2615         }
2616         s->first_slice_line=0;
2617     }
2618
2619     s->me.pre_pass=0;
2620
2621     return 0;
2622 }
2623
2624 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2625     MpegEncContext *s= *(void**)arg;
2626
2627     ff_check_alignment();
2628
2629     s->me.dia_size= s->avctx->dia_size;
2630     s->first_slice_line=1;
2631     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2632         s->mb_x=0; //for block init below
2633         ff_init_block_index(s);
2634         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2635             s->block_index[0]+=2;
2636             s->block_index[1]+=2;
2637             s->block_index[2]+=2;
2638             s->block_index[3]+=2;
2639
2640             /* compute motion vector & mb_type and store in context */
2641             if(s->pict_type==AV_PICTURE_TYPE_B)
2642                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2643             else
2644                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2645         }
2646         s->first_slice_line=0;
2647     }
2648     return 0;
2649 }
2650
2651 static int mb_var_thread(AVCodecContext *c, void *arg){
2652     MpegEncContext *s= *(void**)arg;
2653     int mb_x, mb_y;
2654
2655     ff_check_alignment();
2656
2657     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2658         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2659             int xx = mb_x * 16;
2660             int yy = mb_y * 16;
2661             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2662             int varc;
2663             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2664
2665             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2666                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2667
2668             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2669             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2670             s->me.mb_var_sum_temp    += varc;
2671         }
2672     }
2673     return 0;
2674 }
2675
2676 static void write_slice_end(MpegEncContext *s){
2677     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2678         if(s->partitioned_frame){
2679             ff_mpeg4_merge_partitions(s);
2680         }
2681
2682         ff_mpeg4_stuffing(&s->pb);
2683     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2684         ff_mjpeg_encode_stuffing(s);
2685     }
2686
2687     avpriv_align_put_bits(&s->pb);
2688     flush_put_bits(&s->pb);
2689
2690     if ((s->avctx->flags & CODEC_FLAG_PASS1) && !s->partitioned_frame)
2691         s->misc_bits+= get_bits_diff(s);
2692 }
2693
2694 static void write_mb_info(MpegEncContext *s)
2695 {
2696     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2697     int offset = put_bits_count(&s->pb);
2698     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2699     int gobn = s->mb_y / s->gob_index;
2700     int pred_x, pred_y;
2701     if (CONFIG_H263_ENCODER)
2702         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2703     bytestream_put_le32(&ptr, offset);
2704     bytestream_put_byte(&ptr, s->qscale);
2705     bytestream_put_byte(&ptr, gobn);
2706     bytestream_put_le16(&ptr, mba);
2707     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2708     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2709     /* 4MV not implemented */
2710     bytestream_put_byte(&ptr, 0); /* hmv2 */
2711     bytestream_put_byte(&ptr, 0); /* vmv2 */
2712 }
2713
2714 static void update_mb_info(MpegEncContext *s, int startcode)
2715 {
2716     if (!s->mb_info)
2717         return;
2718     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2719         s->mb_info_size += 12;
2720         s->prev_mb_info = s->last_mb_info;
2721     }
2722     if (startcode) {
2723         s->prev_mb_info = put_bits_count(&s->pb)/8;
2724         /* This might have incremented mb_info_size above, and we return without
2725          * actually writing any info into that slot yet. But in that case,
2726          * this will be called again at the start of the after writing the
2727          * start code, actually writing the mb info. */
2728         return;
2729     }
2730
2731     s->last_mb_info = put_bits_count(&s->pb)/8;
2732     if (!s->mb_info_size)
2733         s->mb_info_size += 12;
2734     write_mb_info(s);
2735 }
2736
2737 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2738 {
2739     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2740         && s->slice_context_count == 1
2741         && s->pb.buf == s->avctx->internal->byte_buffer) {
2742         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2743         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2744
2745         uint8_t *new_buffer = NULL;
2746         int new_buffer_size = 0;
2747
2748         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2749                               s->avctx->internal->byte_buffer_size + size_increase);
2750         if (!new_buffer)
2751             return AVERROR(ENOMEM);
2752
2753         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2754         av_free(s->avctx->internal->byte_buffer);
2755         s->avctx->internal->byte_buffer      = new_buffer;
2756         s->avctx->internal->byte_buffer_size = new_buffer_size;
2757         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2758         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2759         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2760     }
2761     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2762         return AVERROR(EINVAL);
2763     return 0;
2764 }
2765
2766 static int encode_thread(AVCodecContext *c, void *arg){
2767     MpegEncContext *s= *(void**)arg;
2768     int mb_x, mb_y, pdif = 0;
2769     int chr_h= 16>>s->chroma_y_shift;
2770     int i, j;
2771     MpegEncContext best_s = { 0 }, backup_s;
2772     uint8_t bit_buf[2][MAX_MB_BYTES];
2773     uint8_t bit_buf2[2][MAX_MB_BYTES];
2774     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2775     PutBitContext pb[2], pb2[2], tex_pb[2];
2776
2777     ff_check_alignment();
2778
2779     for(i=0; i<2; i++){
2780         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2781         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2782         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2783     }
2784
2785     s->last_bits= put_bits_count(&s->pb);
2786     s->mv_bits=0;
2787     s->misc_bits=0;
2788     s->i_tex_bits=0;
2789     s->p_tex_bits=0;
2790     s->i_count=0;
2791     s->f_count=0;
2792     s->b_count=0;
2793     s->skip_count=0;
2794
2795     for(i=0; i<3; i++){
2796         /* init last dc values */
2797         /* note: quant matrix value (8) is implied here */
2798         s->last_dc[i] = 128 << s->intra_dc_precision;
2799
2800         s->current_picture.error[i] = 0;
2801     }
2802     if(s->codec_id==AV_CODEC_ID_AMV){
2803         s->last_dc[0] = 128*8/13;
2804         s->last_dc[1] = 128*8/14;
2805         s->last_dc[2] = 128*8/14;
2806     }
2807     s->mb_skip_run = 0;
2808     memset(s->last_mv, 0, sizeof(s->last_mv));
2809
2810     s->last_mv_dir = 0;
2811
2812     switch(s->codec_id){
2813     case AV_CODEC_ID_H263:
2814     case AV_CODEC_ID_H263P:
2815     case AV_CODEC_ID_FLV1:
2816         if (CONFIG_H263_ENCODER)
2817             s->gob_index = H263_GOB_HEIGHT(s->height);
2818         break;
2819     case AV_CODEC_ID_MPEG4:
2820         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2821             ff_mpeg4_init_partitions(s);
2822         break;
2823     }
2824
2825     s->resync_mb_x=0;
2826     s->resync_mb_y=0;
2827     s->first_slice_line = 1;
2828     s->ptr_lastgob = s->pb.buf;
2829     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2830         s->mb_x=0;
2831         s->mb_y= mb_y;
2832
2833         ff_set_qscale(s, s->qscale);
2834         ff_init_block_index(s);
2835
2836         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2837             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2838             int mb_type= s->mb_type[xy];
2839 //            int d;
2840             int dmin= INT_MAX;
2841             int dir;
2842             int size_increase =  s->avctx->internal->byte_buffer_size/4
2843                                + s->mb_width*MAX_MB_BYTES;
2844
2845             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2846             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2847                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2848                 return -1;
2849             }
2850             if(s->data_partitioning){
2851                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2852                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2853                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2854                     return -1;
2855                 }
2856             }
2857
2858             s->mb_x = mb_x;
2859             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2860             ff_update_block_index(s);
2861
2862             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2863                 ff_h261_reorder_mb_index(s);
2864                 xy= s->mb_y*s->mb_stride + s->mb_x;
2865                 mb_type= s->mb_type[xy];
2866             }
2867
2868             /* write gob / video packet header  */
2869             if(s->rtp_mode){
2870                 int current_packet_size, is_gob_start;
2871
2872                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2873
2874                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2875
2876                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2877
2878                 switch(s->codec_id){
2879                 case AV_CODEC_ID_H263:
2880                 case AV_CODEC_ID_H263P:
2881                     if(!s->h263_slice_structured)
2882                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2883                     break;
2884                 case AV_CODEC_ID_MPEG2VIDEO:
2885                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2886                 case AV_CODEC_ID_MPEG1VIDEO:
2887                     if(s->mb_skip_run) is_gob_start=0;
2888                     break;
2889                 case AV_CODEC_ID_MJPEG:
2890                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2891                     break;
2892                 }
2893
2894                 if(is_gob_start){
2895                     if(s->start_mb_y != mb_y || mb_x!=0){
2896                         write_slice_end(s);
2897
2898                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2899                             ff_mpeg4_init_partitions(s);
2900                         }
2901                     }
2902
2903                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2904                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2905
2906                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2907                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2908                         int d = 100 / s->error_rate;
2909                         if(r % d == 0){
2910                             current_packet_size=0;
2911                             s->pb.buf_ptr= s->ptr_lastgob;
2912                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2913                         }
2914                     }
2915
2916                     if (s->avctx->rtp_callback){
2917                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2918                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2919                     }
2920                     update_mb_info(s, 1);
2921
2922                     switch(s->codec_id){
2923                     case AV_CODEC_ID_MPEG4:
2924                         if (CONFIG_MPEG4_ENCODER) {
2925                             ff_mpeg4_encode_video_packet_header(s);
2926                             ff_mpeg4_clean_buffers(s);
2927                         }
2928                     break;
2929                     case AV_CODEC_ID_MPEG1VIDEO:
2930                     case AV_CODEC_ID_MPEG2VIDEO:
2931                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2932                             ff_mpeg1_encode_slice_header(s);
2933                             ff_mpeg1_clean_buffers(s);
2934                         }
2935                     break;
2936                     case AV_CODEC_ID_H263:
2937                     case AV_CODEC_ID_H263P:
2938                         if (CONFIG_H263_ENCODER)
2939                             ff_h263_encode_gob_header(s, mb_y);
2940                     break;
2941                     }
2942
2943                     if (s->avctx->flags & CODEC_FLAG_PASS1) {
2944                         int bits= put_bits_count(&s->pb);
2945                         s->misc_bits+= bits - s->last_bits;
2946                         s->last_bits= bits;
2947                     }
2948
2949                     s->ptr_lastgob += current_packet_size;
2950                     s->first_slice_line=1;
2951                     s->resync_mb_x=mb_x;
2952                     s->resync_mb_y=mb_y;
2953                 }
2954             }
2955
2956             if(  (s->resync_mb_x   == s->mb_x)
2957                && s->resync_mb_y+1 == s->mb_y){
2958                 s->first_slice_line=0;
2959             }
2960
2961             s->mb_skipped=0;
2962             s->dquant=0; //only for QP_RD
2963
2964             update_mb_info(s, 0);
2965
2966             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2967                 int next_block=0;
2968                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2969
2970                 copy_context_before_encode(&backup_s, s, -1);
2971                 backup_s.pb= s->pb;
2972                 best_s.data_partitioning= s->data_partitioning;
2973                 best_s.partitioned_frame= s->partitioned_frame;
2974                 if(s->data_partitioning){
2975                     backup_s.pb2= s->pb2;
2976                     backup_s.tex_pb= s->tex_pb;
2977                 }
2978
2979                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2980                     s->mv_dir = MV_DIR_FORWARD;
2981                     s->mv_type = MV_TYPE_16X16;
2982                     s->mb_intra= 0;
2983                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2984                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2985                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2986                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2987                 }
2988                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2989                     s->mv_dir = MV_DIR_FORWARD;
2990                     s->mv_type = MV_TYPE_FIELD;
2991                     s->mb_intra= 0;
2992                     for(i=0; i<2; i++){
2993                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2994                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2995                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2996                     }
2997                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2998                                  &dmin, &next_block, 0, 0);
2999                 }
3000                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3001                     s->mv_dir = MV_DIR_FORWARD;
3002                     s->mv_type = MV_TYPE_16X16;
3003                     s->mb_intra= 0;
3004                     s->mv[0][0][0] = 0;
3005                     s->mv[0][0][1] = 0;
3006                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3007                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3008                 }
3009                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3010                     s->mv_dir = MV_DIR_FORWARD;
3011                     s->mv_type = MV_TYPE_8X8;
3012                     s->mb_intra= 0;
3013                     for(i=0; i<4; i++){
3014                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3015                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3016                     }
3017                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3018                                  &dmin, &next_block, 0, 0);
3019                 }
3020                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3021                     s->mv_dir = MV_DIR_FORWARD;
3022                     s->mv_type = MV_TYPE_16X16;
3023                     s->mb_intra= 0;
3024                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3025                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3026                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3027                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3028                 }
3029                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3030                     s->mv_dir = MV_DIR_BACKWARD;
3031                     s->mv_type = MV_TYPE_16X16;
3032                     s->mb_intra= 0;
3033                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3034                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3035                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3036                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3037                 }
3038                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3039                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3040                     s->mv_type = MV_TYPE_16X16;
3041                     s->mb_intra= 0;
3042                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3043                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3044                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3045                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3046                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3047                                  &dmin, &next_block, 0, 0);
3048                 }
3049                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3050                     s->mv_dir = MV_DIR_FORWARD;
3051                     s->mv_type = MV_TYPE_FIELD;
3052                     s->mb_intra= 0;
3053                     for(i=0; i<2; i++){
3054                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3055                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3056                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3057                     }
3058                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3059                                  &dmin, &next_block, 0, 0);
3060                 }
3061                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3062                     s->mv_dir = MV_DIR_BACKWARD;
3063                     s->mv_type = MV_TYPE_FIELD;
3064                     s->mb_intra= 0;
3065                     for(i=0; i<2; i++){
3066                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3067                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3068                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3069                     }
3070                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3071                                  &dmin, &next_block, 0, 0);
3072                 }
3073                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3074                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3075                     s->mv_type = MV_TYPE_FIELD;
3076                     s->mb_intra= 0;
3077                     for(dir=0; dir<2; dir++){
3078                         for(i=0; i<2; i++){
3079                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3080                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3081                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3082                         }
3083                     }
3084                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3085                                  &dmin, &next_block, 0, 0);
3086                 }
3087                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3088                     s->mv_dir = 0;
3089                     s->mv_type = MV_TYPE_16X16;
3090                     s->mb_intra= 1;
3091                     s->mv[0][0][0] = 0;
3092                     s->mv[0][0][1] = 0;
3093                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3094                                  &dmin, &next_block, 0, 0);
3095                     if(s->h263_pred || s->h263_aic){
3096                         if(best_s.mb_intra)
3097                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3098                         else
3099                             ff_clean_intra_table_entries(s); //old mode?
3100                     }
3101                 }
3102
3103                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3104                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3105                         const int last_qp= backup_s.qscale;
3106                         int qpi, qp, dc[6];
3107                         int16_t ac[6][16];
3108                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3109                         static const int dquant_tab[4]={-1,1,-2,2};
3110                         int storecoefs = s->mb_intra && s->dc_val[0];
3111
3112                         av_assert2(backup_s.dquant == 0);
3113
3114                         //FIXME intra
3115                         s->mv_dir= best_s.mv_dir;
3116                         s->mv_type = MV_TYPE_16X16;
3117                         s->mb_intra= best_s.mb_intra;
3118                         s->mv[0][0][0] = best_s.mv[0][0][0];
3119                         s->mv[0][0][1] = best_s.mv[0][0][1];
3120                         s->mv[1][0][0] = best_s.mv[1][0][0];
3121                         s->mv[1][0][1] = best_s.mv[1][0][1];
3122
3123                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3124                         for(; qpi<4; qpi++){
3125                             int dquant= dquant_tab[qpi];
3126                             qp= last_qp + dquant;
3127                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3128                                 continue;
3129                             backup_s.dquant= dquant;
3130                             if(storecoefs){
3131                                 for(i=0; i<6; i++){
3132                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3133                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3134                                 }
3135                             }
3136
3137                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3138                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3139                             if(best_s.qscale != qp){
3140                                 if(storecoefs){
3141                                     for(i=0; i<6; i++){
3142                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3143                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3144                                     }
3145                                 }
3146                             }
3147                         }
3148                     }
3149                 }
3150                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3151                     int mx= s->b_direct_mv_table[xy][0];
3152                     int my= s->b_direct_mv_table[xy][1];
3153
3154                     backup_s.dquant = 0;
3155                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3156                     s->mb_intra= 0;
3157                     ff_mpeg4_set_direct_mv(s, mx, my);
3158                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3159                                  &dmin, &next_block, mx, my);
3160                 }
3161                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3162                     backup_s.dquant = 0;
3163                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3164                     s->mb_intra= 0;
3165                     ff_mpeg4_set_direct_mv(s, 0, 0);
3166                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3167                                  &dmin, &next_block, 0, 0);
3168                 }
3169                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3170                     int coded=0;
3171                     for(i=0; i<6; i++)
3172                         coded |= s->block_last_index[i];
3173                     if(coded){
3174                         int mx,my;
3175                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3176                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3177                             mx=my=0; //FIXME find the one we actually used
3178                             ff_mpeg4_set_direct_mv(s, mx, my);
3179                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3180                             mx= s->mv[1][0][0];
3181                             my= s->mv[1][0][1];
3182                         }else{
3183                             mx= s->mv[0][0][0];
3184                             my= s->mv[0][0][1];
3185                         }
3186
3187                         s->mv_dir= best_s.mv_dir;
3188                         s->mv_type = best_s.mv_type;
3189                         s->mb_intra= 0;
3190 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3191                         s->mv[0][0][1] = best_s.mv[0][0][1];
3192                         s->mv[1][0][0] = best_s.mv[1][0][0];
3193                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3194                         backup_s.dquant= 0;
3195                         s->skipdct=1;
3196                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3197                                         &dmin, &next_block, mx, my);
3198                         s->skipdct=0;
3199                     }
3200                 }
3201
3202                 s->current_picture.qscale_table[xy] = best_s.qscale;
3203
3204                 copy_context_after_encode(s, &best_s, -1);
3205
3206                 pb_bits_count= put_bits_count(&s->pb);
3207                 flush_put_bits(&s->pb);
3208                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3209                 s->pb= backup_s.pb;
3210
3211                 if(s->data_partitioning){
3212                     pb2_bits_count= put_bits_count(&s->pb2);
3213                     flush_put_bits(&s->pb2);
3214                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3215                     s->pb2= backup_s.pb2;
3216
3217                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3218                     flush_put_bits(&s->tex_pb);
3219                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3220                     s->tex_pb= backup_s.tex_pb;
3221                 }
3222                 s->last_bits= put_bits_count(&s->pb);
3223
3224                 if (CONFIG_H263_ENCODER &&
3225                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3226                     ff_h263_update_motion_val(s);
3227
3228                 if(next_block==0){ //FIXME 16 vs linesize16
3229                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3230                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3231                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3232                 }
3233
3234                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3235                     ff_mpv_decode_mb(s, s->block);
3236             } else {
3237                 int motion_x = 0, motion_y = 0;
3238                 s->mv_type=MV_TYPE_16X16;
3239                 // only one MB-Type possible
3240
3241                 switch(mb_type){
3242                 case CANDIDATE_MB_TYPE_INTRA:
3243                     s->mv_dir = 0;
3244                     s->mb_intra= 1;
3245                     motion_x= s->mv[0][0][0] = 0;
3246                     motion_y= s->mv[0][0][1] = 0;
3247                     break;
3248                 case CANDIDATE_MB_TYPE_INTER:
3249                     s->mv_dir = MV_DIR_FORWARD;
3250                     s->mb_intra= 0;
3251                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3252                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3253                     break;
3254                 case CANDIDATE_MB_TYPE_INTER_I:
3255                     s->mv_dir = MV_DIR_FORWARD;
3256                     s->mv_type = MV_TYPE_FIELD;
3257                     s->mb_intra= 0;
3258                     for(i=0; i<2; i++){
3259                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3260                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3261                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3262                     }
3263                     break;
3264                 case CANDIDATE_MB_TYPE_INTER4V:
3265                     s->mv_dir = MV_DIR_FORWARD;
3266                     s->mv_type = MV_TYPE_8X8;
3267                     s->mb_intra= 0;
3268                     for(i=0; i<4; i++){
3269                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3270                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3271                     }
3272                     break;
3273                 case CANDIDATE_MB_TYPE_DIRECT:
3274                     if (CONFIG_MPEG4_ENCODER) {
3275                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3276                         s->mb_intra= 0;
3277                         motion_x=s->b_direct_mv_table[xy][0];
3278                         motion_y=s->b_direct_mv_table[xy][1];
3279                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3280                     }
3281                     break;
3282                 case CANDIDATE_MB_TYPE_DIRECT0:
3283                     if (CONFIG_MPEG4_ENCODER) {
3284                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3285                         s->mb_intra= 0;
3286                         ff_mpeg4_set_direct_mv(s, 0, 0);
3287                     }
3288                     break;
3289                 case CANDIDATE_MB_TYPE_BIDIR:
3290                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3291                     s->mb_intra= 0;
3292                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3293                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3294                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3295                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3296                     break;
3297                 case CANDIDATE_MB_TYPE_BACKWARD:
3298                     s->mv_dir = MV_DIR_BACKWARD;
3299                     s->mb_intra= 0;
3300                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3301                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3302                     break;
3303                 case CANDIDATE_MB_TYPE_FORWARD:
3304                     s->mv_dir = MV_DIR_FORWARD;
3305                     s->mb_intra= 0;
3306                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3307                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3308                     break;
3309                 case CANDIDATE_MB_TYPE_FORWARD_I:
3310                     s->mv_dir = MV_DIR_FORWARD;
3311                     s->mv_type = MV_TYPE_FIELD;
3312                     s->mb_intra= 0;
3313                     for(i=0; i<2; i++){
3314                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3315                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3316                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3317                     }
3318                     break;
3319                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3320                     s->mv_dir = MV_DIR_BACKWARD;
3321                     s->mv_type = MV_TYPE_FIELD;
3322                     s->mb_intra= 0;
3323                     for(i=0; i<2; i++){
3324                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3325                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3326                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3327                     }
3328                     break;
3329                 case CANDIDATE_MB_TYPE_BIDIR_I:
3330                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3331                     s->mv_type = MV_TYPE_FIELD;
3332                     s->mb_intra= 0;
3333                     for(dir=0; dir<2; dir++){
3334                         for(i=0; i<2; i++){
3335                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3336                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3337                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3338                         }
3339                     }
3340                     break;
3341                 default:
3342                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3343                 }
3344
3345                 encode_mb(s, motion_x, motion_y);
3346
3347                 // RAL: Update last macroblock type
3348                 s->last_mv_dir = s->mv_dir;
3349
3350                 if (CONFIG_H263_ENCODER &&
3351                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3352                     ff_h263_update_motion_val(s);
3353
3354                 ff_mpv_decode_mb(s, s->block);
3355             }
3356
3357             /* clean the MV table in IPS frames for direct mode in B frames */
3358             if(s->mb_intra /* && I,P,S_TYPE */){
3359                 s->p_mv_table[xy][0]=0;
3360                 s->p_mv_table[xy][1]=0;
3361             }
3362
3363             if (s->avctx->flags & CODEC_FLAG_PSNR) {
3364                 int w= 16;
3365                 int h= 16;
3366
3367                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3368                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3369
3370                 s->current_picture.error[0] += sse(
3371                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3372                     s->dest[0], w, h, s->linesize);
3373                 s->current_picture.error[1] += sse(
3374                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3375                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3376                 s->current_picture.error[2] += sse(
3377                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3378                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3379             }
3380             if(s->loop_filter){
3381                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3382                     ff_h263_loop_filter(s);
3383             }
3384             ff_dlog(s->avctx, "MB %d %d bits\n",
3385                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3386         }
3387     }
3388
3389     //not beautiful here but we must write it before flushing so it has to be here
3390     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3391         ff_msmpeg4_encode_ext_header(s);
3392
3393     write_slice_end(s);
3394
3395     /* Send the last GOB if RTP */
3396     if (s->avctx->rtp_callback) {
3397         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3398         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3399         /* Call the RTP callback to send the last GOB */
3400         emms_c();
3401         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3402     }
3403
3404     return 0;
3405 }
3406
3407 #define MERGE(field) dst->field += src->field; src->field=0
3408 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3409     MERGE(me.scene_change_score);
3410     MERGE(me.mc_mb_var_sum_temp);
3411     MERGE(me.mb_var_sum_temp);
3412 }
3413
3414 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3415     int i;
3416
3417     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3418     MERGE(dct_count[1]);
3419     MERGE(mv_bits);
3420     MERGE(i_tex_bits);
3421     MERGE(p_tex_bits);
3422     MERGE(i_count);
3423     MERGE(f_count);
3424     MERGE(b_count);
3425     MERGE(skip_count);
3426     MERGE(misc_bits);
3427     MERGE(er.error_count);
3428     MERGE(padding_bug_score);
3429     MERGE(current_picture.error[0]);
3430     MERGE(current_picture.error[1]);
3431     MERGE(current_picture.error[2]);
3432
3433     if(dst->avctx->noise_reduction){
3434         for(i=0; i<64; i++){
3435             MERGE(dct_error_sum[0][i]);
3436             MERGE(dct_error_sum[1][i]);
3437         }
3438     }
3439
3440     assert(put_bits_count(&src->pb) % 8 ==0);
3441     assert(put_bits_count(&dst->pb) % 8 ==0);
3442     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3443     flush_put_bits(&dst->pb);
3444 }
3445
3446 static int estimate_qp(MpegEncContext *s, int dry_run){
3447     if (s->next_lambda){
3448         s->current_picture_ptr->f->quality =
3449         s->current_picture.f->quality = s->next_lambda;
3450         if(!dry_run) s->next_lambda= 0;
3451     } else if (!s->fixed_qscale) {
3452         s->current_picture_ptr->f->quality =
3453         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3454         if (s->current_picture.f->quality < 0)
3455             return -1;
3456     }
3457
3458     if(s->adaptive_quant){
3459         switch(s->codec_id){
3460         case AV_CODEC_ID_MPEG4:
3461             if (CONFIG_MPEG4_ENCODER)
3462                 ff_clean_mpeg4_qscales(s);
3463             break;
3464         case AV_CODEC_ID_H263:
3465         case AV_CODEC_ID_H263P:
3466         case AV_CODEC_ID_FLV1:
3467             if (CONFIG_H263_ENCODER)
3468                 ff_clean_h263_qscales(s);
3469             break;
3470         default:
3471             ff_init_qscale_tab(s);
3472         }
3473
3474         s->lambda= s->lambda_table[0];
3475         //FIXME broken
3476     }else
3477         s->lambda = s->current_picture.f->quality;
3478     update_qscale(s);
3479     return 0;
3480 }
3481
3482 /* must be called before writing the header */
3483 static void set_frame_distances(MpegEncContext * s){
3484     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3485     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3486
3487     if(s->pict_type==AV_PICTURE_TYPE_B){
3488         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3489         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3490     }else{
3491         s->pp_time= s->time - s->last_non_b_time;
3492         s->last_non_b_time= s->time;
3493         assert(s->picture_number==0 || s->pp_time > 0);
3494     }
3495 }
3496
3497 static int encode_picture(MpegEncContext *s, int picture_number)
3498 {
3499     int i, ret;
3500     int bits;
3501     int context_count = s->slice_context_count;
3502
3503     s->picture_number = picture_number;
3504
3505     /* Reset the average MB variance */
3506     s->me.mb_var_sum_temp    =
3507     s->me.mc_mb_var_sum_temp = 0;
3508
3509     /* we need to initialize some time vars before we can encode b-frames */
3510     // RAL: Condition added for MPEG1VIDEO
3511     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3512         set_frame_distances(s);
3513     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3514         ff_set_mpeg4_time(s);
3515
3516     s->me.scene_change_score=0;
3517
3518 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3519
3520     if(s->pict_type==AV_PICTURE_TYPE_I){
3521         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3522         else                        s->no_rounding=0;
3523     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3524         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3525             s->no_rounding ^= 1;
3526     }
3527
3528     if (s->avctx->flags & CODEC_FLAG_PASS2) {
3529         if (estimate_qp(s,1) < 0)
3530             return -1;
3531         ff_get_2pass_fcode(s);
3532     } else if (!(s->avctx->flags & CODEC_FLAG_QSCALE)) {
3533         if(s->pict_type==AV_PICTURE_TYPE_B)
3534             s->lambda= s->last_lambda_for[s->pict_type];
3535         else
3536             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3537         update_qscale(s);
3538     }
3539
3540     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3541         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3542         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3543         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3544         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3545     }
3546
3547     s->mb_intra=0; //for the rate distortion & bit compare functions
3548     for(i=1; i<context_count; i++){
3549         ret = ff_update_duplicate_context(s->thread_context[i], s);
3550         if (ret < 0)
3551             return ret;
3552     }
3553
3554     if(ff_init_me(s)<0)
3555         return -1;
3556
3557     /* Estimate motion for every MB */
3558     if(s->pict_type != AV_PICTURE_TYPE_I){
3559         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3560         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3561         if (s->pict_type != AV_PICTURE_TYPE_B) {
3562             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3563                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3564             }
3565         }
3566
3567         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3568     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3569         /* I-Frame */
3570         for(i=0; i<s->mb_stride*s->mb_height; i++)
3571             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3572
3573         if(!s->fixed_qscale){
3574             /* finding spatial complexity for I-frame rate control */
3575             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3576         }
3577     }
3578     for(i=1; i<context_count; i++){
3579         merge_context_after_me(s, s->thread_context[i]);
3580     }
3581     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3582     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3583     emms_c();
3584
3585     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3586         s->pict_type= AV_PICTURE_TYPE_I;
3587         for(i=0; i<s->mb_stride*s->mb_height; i++)
3588             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3589         if(s->msmpeg4_version >= 3)
3590             s->no_rounding=1;
3591         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3592                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3593     }
3594
3595     if(!s->umvplus){
3596         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3597             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3598
3599             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3600                 int a,b;
3601                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3602                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3603                 s->f_code= FFMAX3(s->f_code, a, b);
3604             }
3605
3606             ff_fix_long_p_mvs(s);
3607             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3608             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3609                 int j;
3610                 for(i=0; i<2; i++){
3611                     for(j=0; j<2; j++)
3612                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3613                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3614                 }
3615             }
3616         }
3617
3618         if(s->pict_type==AV_PICTURE_TYPE_B){
3619             int a, b;
3620
3621             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3622             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3623             s->f_code = FFMAX(a, b);
3624
3625             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3626             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3627             s->b_code = FFMAX(a, b);
3628
3629             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3630             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3631             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3632             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3633             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3634                 int dir, j;
3635                 for(dir=0; dir<2; dir++){
3636                     for(i=0; i<2; i++){
3637                         for(j=0; j<2; j++){
3638                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3639                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3640                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3641                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3642                         }
3643                     }
3644                 }
3645             }
3646         }
3647     }
3648
3649     if (estimate_qp(s, 0) < 0)
3650         return -1;
3651
3652     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3653         s->pict_type == AV_PICTURE_TYPE_I &&
3654         !(s->avctx->flags & CODEC_FLAG_QSCALE))
3655         s->qscale= 3; //reduce clipping problems
3656
3657     if (s->out_format == FMT_MJPEG) {
3658         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3659         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3660
3661         if (s->avctx->intra_matrix) {
3662             chroma_matrix =
3663             luma_matrix = s->avctx->intra_matrix;
3664         }
3665         if (s->avctx->chroma_intra_matrix)
3666             chroma_matrix = s->avctx->chroma_intra_matrix;
3667
3668         /* for mjpeg, we do include qscale in the matrix */
3669         for(i=1;i<64;i++){
3670             int j = s->idsp.idct_permutation[i];
3671
3672             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3673             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3674         }
3675         s->y_dc_scale_table=
3676         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3677         s->chroma_intra_matrix[0] =
3678         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3679         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3680                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3681         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3682                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3683         s->qscale= 8;
3684     }
3685     if(s->codec_id == AV_CODEC_ID_AMV){
3686         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3687         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3688         for(i=1;i<64;i++){
3689             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3690
3691             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3692             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3693         }
3694         s->y_dc_scale_table= y;
3695         s->c_dc_scale_table= c;
3696         s->intra_matrix[0] = 13;
3697         s->chroma_intra_matrix[0] = 14;
3698         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3699                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3700         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3701                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3702         s->qscale= 8;
3703     }
3704
3705     //FIXME var duplication
3706     s->current_picture_ptr->f->key_frame =
3707     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3708     s->current_picture_ptr->f->pict_type =
3709     s->current_picture.f->pict_type = s->pict_type;
3710
3711     if (s->current_picture.f->key_frame)
3712         s->picture_in_gop_number=0;
3713
3714     s->mb_x = s->mb_y = 0;
3715     s->last_bits= put_bits_count(&s->pb);
3716     switch(s->out_format) {
3717     case FMT_MJPEG:
3718         if (CONFIG_MJPEG_ENCODER)
3719             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3720                                            s->intra_matrix, s->chroma_intra_matrix);
3721         break;
3722     case FMT_H261:
3723         if (CONFIG_H261_ENCODER)
3724             ff_h261_encode_picture_header(s, picture_number);
3725         break;
3726     case FMT_H263:
3727         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3728             ff_wmv2_encode_picture_header(s, picture_number);
3729         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3730             ff_msmpeg4_encode_picture_header(s, picture_number);
3731         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3732             ff_mpeg4_encode_picture_header(s, picture_number);
3733         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3734             ret = ff_rv10_encode_picture_header(s, picture_number);
3735             if (ret < 0)
3736                 return ret;
3737         }
3738         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3739             ff_rv20_encode_picture_header(s, picture_number);
3740         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3741             ff_flv_encode_picture_header(s, picture_number);
3742         else if (CONFIG_H263_ENCODER)
3743             ff_h263_encode_picture_header(s, picture_number);
3744         break;
3745     case FMT_MPEG1:
3746         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3747             ff_mpeg1_encode_picture_header(s, picture_number);
3748         break;
3749     default:
3750         av_assert0(0);
3751     }
3752     bits= put_bits_count(&s->pb);
3753     s->header_bits= bits - s->last_bits;
3754
3755     for(i=1; i<context_count; i++){
3756         update_duplicate_context_after_me(s->thread_context[i], s);
3757     }
3758     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3759     for(i=1; i<context_count; i++){
3760         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3761             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3762         merge_context_after_encode(s, s->thread_context[i]);
3763     }
3764     emms_c();
3765     return 0;
3766 }
3767
3768 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3769     const int intra= s->mb_intra;
3770     int i;
3771
3772     s->dct_count[intra]++;
3773
3774     for(i=0; i<64; i++){
3775         int level= block[i];
3776
3777         if(level){
3778             if(level>0){
3779                 s->dct_error_sum[intra][i] += level;
3780                 level -= s->dct_offset[intra][i];
3781                 if(level<0) level=0;
3782             }else{
3783                 s->dct_error_sum[intra][i] -= level;
3784                 level += s->dct_offset[intra][i];
3785                 if(level>0) level=0;
3786             }
3787             block[i]= level;
3788         }
3789     }
3790 }
3791
3792 static int dct_quantize_trellis_c(MpegEncContext *s,
3793                                   int16_t *block, int n,
3794                                   int qscale, int *overflow){
3795     const int *qmat;
3796     const uint16_t *matrix;
3797     const uint8_t *scantable= s->intra_scantable.scantable;
3798     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3799     int max=0;
3800     unsigned int threshold1, threshold2;
3801     int bias=0;
3802     int run_tab[65];
3803     int level_tab[65];
3804     int score_tab[65];
3805     int survivor[65];
3806     int survivor_count;
3807     int last_run=0;
3808     int last_level=0;
3809     int last_score= 0;
3810     int last_i;
3811     int coeff[2][64];
3812     int coeff_count[64];
3813     int qmul, qadd, start_i, last_non_zero, i, dc;
3814     const int esc_length= s->ac_esc_length;
3815     uint8_t * length;
3816     uint8_t * last_length;
3817     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3818
3819     s->fdsp.fdct(block);
3820
3821     if(s->dct_error_sum)
3822         s->denoise_dct(s, block);
3823     qmul= qscale*16;
3824     qadd= ((qscale-1)|1)*8;
3825
3826     if (s->mb_intra) {
3827         int q;
3828         if (!s->h263_aic) {
3829             if (n < 4)
3830                 q = s->y_dc_scale;
3831             else
3832                 q = s->c_dc_scale;
3833             q = q << 3;
3834         } else{
3835             /* For AIC we skip quant/dequant of INTRADC */
3836             q = 1 << 3;
3837             qadd=0;
3838         }
3839
3840         /* note: block[0] is assumed to be positive */
3841         block[0] = (block[0] + (q >> 1)) / q;
3842         start_i = 1;
3843         last_non_zero = 0;
3844         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3845         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3846         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3847             bias= 1<<(QMAT_SHIFT-1);
3848
3849         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3850             length     = s->intra_chroma_ac_vlc_length;
3851             last_length= s->intra_chroma_ac_vlc_last_length;
3852         } else {
3853             length     = s->intra_ac_vlc_length;
3854             last_length= s->intra_ac_vlc_last_length;
3855         }
3856     } else {
3857         start_i = 0;
3858         last_non_zero = -1;
3859         qmat = s->q_inter_matrix[qscale];
3860         matrix = s->inter_matrix;
3861         length     = s->inter_ac_vlc_length;
3862         last_length= s->inter_ac_vlc_last_length;
3863     }
3864     last_i= start_i;
3865
3866     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3867     threshold2= (threshold1<<1);
3868
3869     for(i=63; i>=start_i; i--) {
3870         const int j = scantable[i];
3871         int level = block[j] * qmat[j];
3872
3873         if(((unsigned)(level+threshold1))>threshold2){
3874             last_non_zero = i;
3875             break;
3876         }
3877     }
3878
3879     for(i=start_i; i<=last_non_zero; i++) {
3880         const int j = scantable[i];
3881         int level = block[j] * qmat[j];
3882
3883 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3884 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3885         if(((unsigned)(level+threshold1))>threshold2){
3886             if(level>0){
3887                 level= (bias + level)>>QMAT_SHIFT;
3888                 coeff[0][i]= level;
3889                 coeff[1][i]= level-1;
3890 //                coeff[2][k]= level-2;
3891             }else{
3892                 level= (bias - level)>>QMAT_SHIFT;
3893                 coeff[0][i]= -level;
3894                 coeff[1][i]= -level+1;
3895 //                coeff[2][k]= -level+2;
3896             }
3897             coeff_count[i]= FFMIN(level, 2);
3898             av_assert2(coeff_count[i]);
3899             max |=level;
3900         }else{
3901             coeff[0][i]= (level>>31)|1;
3902             coeff_count[i]= 1;
3903         }
3904     }
3905
3906     *overflow= s->max_qcoeff < max; //overflow might have happened
3907
3908     if(last_non_zero < start_i){
3909         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3910         return last_non_zero;
3911     }
3912
3913     score_tab[start_i]= 0;
3914     survivor[0]= start_i;
3915     survivor_count= 1;
3916
3917     for(i=start_i; i<=last_non_zero; i++){
3918         int level_index, j, zero_distortion;
3919         int dct_coeff= FFABS(block[ scantable[i] ]);
3920         int best_score=256*256*256*120;
3921
3922         if (s->fdsp.fdct == ff_fdct_ifast)
3923             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3924         zero_distortion= dct_coeff*dct_coeff;
3925
3926         for(level_index=0; level_index < coeff_count[i]; level_index++){
3927             int distortion;
3928             int level= coeff[level_index][i];
3929             const int alevel= FFABS(level);
3930             int unquant_coeff;
3931
3932             av_assert2(level);
3933
3934             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3935                 unquant_coeff= alevel*qmul + qadd;
3936             } else if(s->out_format == FMT_MJPEG) {
3937                 j = s->idsp.idct_permutation[scantable[i]];
3938                 unquant_coeff = alevel * matrix[j] * 8;
3939             }else{ //MPEG1
3940                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3941                 if(s->mb_intra){
3942                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3943                         unquant_coeff =   (unquant_coeff - 1) | 1;
3944                 }else{
3945                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3946                         unquant_coeff =   (unquant_coeff - 1) | 1;
3947                 }
3948                 unquant_coeff<<= 3;
3949             }
3950
3951             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3952             level+=64;
3953             if((level&(~127)) == 0){
3954                 for(j=survivor_count-1; j>=0; j--){
3955                     int run= i - survivor[j];
3956                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3957                     score += score_tab[i-run];
3958
3959                     if(score < best_score){
3960                         best_score= score;
3961                         run_tab[i+1]= run;
3962                         level_tab[i+1]= level-64;
3963                     }
3964                 }
3965
3966                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3967                     for(j=survivor_count-1; j>=0; j--){
3968                         int run= i - survivor[j];
3969                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3970                         score += score_tab[i-run];
3971                         if(score < last_score){
3972                             last_score= score;
3973                             last_run= run;
3974                             last_level= level-64;
3975                             last_i= i+1;
3976                         }
3977                     }
3978                 }
3979             }else{
3980                 distortion += esc_length*lambda;
3981                 for(j=survivor_count-1; j>=0; j--){
3982                     int run= i - survivor[j];
3983                     int score= distortion + score_tab[i-run];
3984
3985                     if(score < best_score){
3986                         best_score= score;
3987                         run_tab[i+1]= run;
3988                         level_tab[i+1]= level-64;
3989                     }
3990                 }
3991
3992                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3993                   for(j=survivor_count-1; j>=0; j--){
3994                         int run= i - survivor[j];
3995                         int score= distortion + score_tab[i-run];
3996                         if(score < last_score){
3997                             last_score= score;
3998                             last_run= run;
3999                             last_level= level-64;
4000                             last_i= i+1;
4001                         }
4002                     }
4003                 }
4004             }
4005         }
4006
4007         score_tab[i+1]= best_score;
4008
4009         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4010         if(last_non_zero <= 27){
4011             for(; survivor_count; survivor_count--){
4012                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4013                     break;
4014             }
4015         }else{
4016             for(; survivor_count; survivor_count--){
4017                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4018                     break;
4019             }
4020         }
4021
4022         survivor[ survivor_count++ ]= i+1;
4023     }
4024
4025     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4026         last_score= 256*256*256*120;
4027         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4028             int score= score_tab[i];
4029             if(i) score += lambda*2; //FIXME exacter?
4030
4031             if(score < last_score){
4032                 last_score= score;
4033                 last_i= i;
4034                 last_level= level_tab[i];
4035                 last_run= run_tab[i];
4036             }
4037         }
4038     }
4039
4040     s->coded_score[n] = last_score;
4041
4042     dc= FFABS(block[0]);
4043     last_non_zero= last_i - 1;
4044     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4045
4046     if(last_non_zero < start_i)
4047         return last_non_zero;
4048
4049     if(last_non_zero == 0 && start_i == 0){
4050         int best_level= 0;
4051         int best_score= dc * dc;
4052
4053         for(i=0; i<coeff_count[0]; i++){
4054             int level= coeff[i][0];
4055             int alevel= FFABS(level);
4056             int unquant_coeff, score, distortion;
4057
4058             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4059                     unquant_coeff= (alevel*qmul + qadd)>>3;
4060             }else{ //MPEG1
4061                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4062                     unquant_coeff =   (unquant_coeff - 1) | 1;
4063             }
4064             unquant_coeff = (unquant_coeff + 4) >> 3;
4065             unquant_coeff<<= 3 + 3;
4066
4067             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4068             level+=64;
4069             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4070             else                    score= distortion + esc_length*lambda;
4071
4072             if(score < best_score){
4073                 best_score= score;
4074                 best_level= level - 64;
4075             }
4076         }
4077         block[0]= best_level;
4078         s->coded_score[n] = best_score - dc*dc;
4079         if(best_level == 0) return -1;
4080         else                return last_non_zero;
4081     }
4082
4083     i= last_i;
4084     av_assert2(last_level);
4085
4086     block[ perm_scantable[last_non_zero] ]= last_level;
4087     i -= last_run + 1;
4088
4089     for(; i>start_i; i -= run_tab[i] + 1){
4090         block[ perm_scantable[i-1] ]= level_tab[i];
4091     }
4092
4093     return last_non_zero;
4094 }
4095
4096 //#define REFINE_STATS 1
4097 static int16_t basis[64][64];
4098
4099 static void build_basis(uint8_t *perm){
4100     int i, j, x, y;
4101     emms_c();
4102     for(i=0; i<8; i++){
4103         for(j=0; j<8; j++){
4104             for(y=0; y<8; y++){
4105                 for(x=0; x<8; x++){
4106                     double s= 0.25*(1<<BASIS_SHIFT);
4107                     int index= 8*i + j;
4108                     int perm_index= perm[index];
4109                     if(i==0) s*= sqrt(0.5);
4110                     if(j==0) s*= sqrt(0.5);
4111                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4112                 }
4113             }
4114         }
4115     }
4116 }
4117
4118 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4119                         int16_t *block, int16_t *weight, int16_t *orig,
4120                         int n, int qscale){
4121     int16_t rem[64];
4122     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4123     const uint8_t *scantable= s->intra_scantable.scantable;
4124     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4125 //    unsigned int threshold1, threshold2;
4126 //    int bias=0;
4127     int run_tab[65];
4128     int prev_run=0;
4129     int prev_level=0;
4130     int qmul, qadd, start_i, last_non_zero, i, dc;
4131     uint8_t * length;
4132     uint8_t * last_length;
4133     int lambda;
4134     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4135 #ifdef REFINE_STATS
4136 static int count=0;
4137 static int after_last=0;
4138 static int to_zero=0;
4139 static int from_zero=0;
4140 static int raise=0;
4141 static int lower=0;
4142 static int messed_sign=0;
4143 #endif
4144
4145     if(basis[0][0] == 0)
4146         build_basis(s->idsp.idct_permutation);
4147
4148     qmul= qscale*2;
4149     qadd= (qscale-1)|1;
4150     if (s->mb_intra) {
4151         if (!s->h263_aic) {
4152             if (n < 4)
4153                 q = s->y_dc_scale;
4154             else
4155                 q = s->c_dc_scale;
4156         } else{
4157             /* For AIC we skip quant/dequant of INTRADC */
4158             q = 1;
4159             qadd=0;
4160         }
4161         q <<= RECON_SHIFT-3;
4162         /* note: block[0] is assumed to be positive */
4163         dc= block[0]*q;
4164 //        block[0] = (block[0] + (q >> 1)) / q;
4165         start_i = 1;
4166 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4167 //            bias= 1<<(QMAT_SHIFT-1);
4168         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4169             length     = s->intra_chroma_ac_vlc_length;
4170             last_length= s->intra_chroma_ac_vlc_last_length;
4171         } else {
4172             length     = s->intra_ac_vlc_length;
4173             last_length= s->intra_ac_vlc_last_length;
4174         }
4175     } else {
4176         dc= 0;
4177         start_i = 0;
4178         length     = s->inter_ac_vlc_length;
4179         last_length= s->inter_ac_vlc_last_length;
4180     }
4181     last_non_zero = s->block_last_index[n];
4182
4183 #ifdef REFINE_STATS
4184 {START_TIMER
4185 #endif
4186     dc += (1<<(RECON_SHIFT-1));
4187     for(i=0; i<64; i++){
4188         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4189     }
4190 #ifdef REFINE_STATS
4191 STOP_TIMER("memset rem[]")}
4192 #endif
4193     sum=0;
4194     for(i=0; i<64; i++){
4195         int one= 36;
4196         int qns=4;
4197         int w;
4198
4199         w= FFABS(weight[i]) + qns*one;
4200         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4201
4202         weight[i] = w;
4203 //        w=weight[i] = (63*qns + (w/2)) / w;
4204
4205         av_assert2(w>0);
4206         av_assert2(w<(1<<6));
4207         sum += w*w;
4208     }
4209     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4210 #ifdef REFINE_STATS
4211 {START_TIMER
4212 #endif
4213     run=0;
4214     rle_index=0;
4215     for(i=start_i; i<=last_non_zero; i++){
4216         int j= perm_scantable[i];
4217         const int level= block[j];
4218         int coeff;
4219
4220         if(level){
4221             if(level<0) coeff= qmul*level - qadd;
4222             else        coeff= qmul*level + qadd;
4223             run_tab[rle_index++]=run;
4224             run=0;
4225
4226             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4227         }else{
4228             run++;
4229         }
4230     }
4231 #ifdef REFINE_STATS
4232 if(last_non_zero>0){
4233 STOP_TIMER("init rem[]")
4234 }
4235 }
4236
4237 {START_TIMER
4238 #endif
4239     for(;;){
4240         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4241         int best_coeff=0;
4242         int best_change=0;
4243         int run2, best_unquant_change=0, analyze_gradient;
4244 #ifdef REFINE_STATS
4245 {START_TIMER
4246 #endif
4247         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4248
4249         if(analyze_gradient){
4250 #ifdef REFINE_STATS
4251 {START_TIMER
4252 #endif
4253             for(i=0; i<64; i++){
4254                 int w= weight[i];
4255
4256                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4257             }
4258 #ifdef REFINE_STATS
4259 STOP_TIMER("rem*w*w")}
4260 {START_TIMER
4261 #endif
4262             s->fdsp.fdct(d1);
4263 #ifdef REFINE_STATS
4264 STOP_TIMER("dct")}
4265 #endif
4266         }
4267
4268         if(start_i){
4269             const int level= block[0];
4270             int change, old_coeff;
4271
4272             av_assert2(s->mb_intra);
4273
4274             old_coeff= q*level;
4275
4276             for(change=-1; change<=1; change+=2){
4277                 int new_level= level + change;
4278                 int score, new_coeff;
4279
4280                 new_coeff= q*new_level;
4281                 if(new_coeff >= 2048 || new_coeff < 0)
4282                     continue;
4283
4284                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4285                                                   new_coeff - old_coeff);
4286                 if(score<best_score){
4287                     best_score= score;
4288                     best_coeff= 0;
4289                     best_change= change;
4290                     best_unquant_change= new_coeff - old_coeff;
4291                 }
4292             }
4293         }
4294
4295         run=0;
4296         rle_index=0;
4297         run2= run_tab[rle_index++];
4298         prev_level=0;
4299         prev_run=0;
4300
4301         for(i=start_i; i<64; i++){
4302             int j= perm_scantable[i];
4303             const int level= block[j];
4304             int change, old_coeff;
4305
4306             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4307                 break;
4308
4309             if(level){
4310                 if(level<0) old_coeff= qmul*level - qadd;
4311                 else        old_coeff= qmul*level + qadd;
4312                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4313             }else{
4314                 old_coeff=0;
4315                 run2--;
4316                 av_assert2(run2>=0 || i >= last_non_zero );
4317             }
4318
4319             for(change=-1; change<=1; change+=2){
4320                 int new_level= level + change;
4321                 int score, new_coeff, unquant_change;
4322
4323                 score=0;
4324                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4325                    continue;
4326
4327                 if(new_level){
4328                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4329                     else            new_coeff= qmul*new_level + qadd;
4330                     if(new_coeff >= 2048 || new_coeff <= -2048)
4331                         continue;
4332                     //FIXME check for overflow
4333
4334                     if(level){
4335                         if(level < 63 && level > -63){
4336                             if(i < last_non_zero)
4337                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4338                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4339                             else
4340                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4341                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4342                         }
4343                     }else{
4344                         av_assert2(FFABS(new_level)==1);
4345
4346                         if(analyze_gradient){
4347                             int g= d1[ scantable[i] ];
4348                             if(g && (g^new_level) >= 0)
4349                                 continue;
4350                         }
4351
4352                         if(i < last_non_zero){
4353                             int next_i= i + run2 + 1;
4354                             int next_level= block[ perm_scantable[next_i] ] + 64;
4355
4356                             if(next_level&(~127))
4357                                 next_level= 0;
4358
4359                             if(next_i < last_non_zero)
4360                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4361                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4362                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4363                             else
4364                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4365                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4366                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4367                         }else{
4368                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4369                             if(prev_level){
4370                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4371                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4372                             }
4373                         }
4374                     }
4375                 }else{
4376                     new_coeff=0;
4377                     av_assert2(FFABS(level)==1);
4378
4379                     if(i < last_non_zero){
4380                         int next_i= i + run2 + 1;
4381                         int next_level= block[ perm_scantable[next_i] ] + 64;
4382
4383                         if(next_level&(~127))
4384                             next_level= 0;
4385
4386                         if(next_i < last_non_zero)
4387                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4388                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4389                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4390                         else
4391                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4392                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4393                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4394                     }else{
4395                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4396                         if(prev_level){
4397                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4398                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4399                         }
4400                     }
4401                 }
4402
4403                 score *= lambda;
4404
4405                 unquant_change= new_coeff - old_coeff;
4406                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4407
4408                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4409                                                    unquant_change);
4410                 if(score<best_score){
4411                     best_score= score;
4412                     best_coeff= i;
4413                     best_change= change;
4414                     best_unquant_change= unquant_change;
4415                 }
4416             }
4417             if(level){
4418                 prev_level= level + 64;
4419                 if(prev_level&(~127))
4420                     prev_level= 0;
4421                 prev_run= run;
4422                 run=0;
4423             }else{
4424                 run++;
4425             }
4426         }
4427 #ifdef REFINE_STATS
4428 STOP_TIMER("iterative step")}
4429 #endif
4430
4431         if(best_change){
4432             int j= perm_scantable[ best_coeff ];
4433
4434             block[j] += best_change;
4435
4436             if(best_coeff > last_non_zero){
4437                 last_non_zero= best_coeff;
4438                 av_assert2(block[j]);
4439 #ifdef REFINE_STATS
4440 after_last++;
4441 #endif
4442             }else{
4443 #ifdef REFINE_STATS
4444 if(block[j]){
4445     if(block[j] - best_change){
4446         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4447             raise++;
4448         }else{
4449             lower++;
4450         }
4451     }else{
4452         from_zero++;
4453     }
4454 }else{
4455     to_zero++;
4456 }
4457 #endif
4458                 for(; last_non_zero>=start_i; last_non_zero--){
4459                     if(block[perm_scantable[last_non_zero]])
4460                         break;
4461                 }
4462             }
4463 #ifdef REFINE_STATS
4464 count++;
4465 if(256*256*256*64 % count == 0){
4466     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4467 }
4468 #endif
4469             run=0;
4470             rle_index=0;
4471             for(i=start_i; i<=last_non_zero; i++){
4472                 int j= perm_scantable[i];
4473                 const int level= block[j];
4474
4475                  if(level){
4476                      run_tab[rle_index++]=run;
4477                      run=0;
4478                  }else{
4479                      run++;
4480                  }
4481             }
4482
4483             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4484         }else{
4485             break;
4486         }
4487     }
4488 #ifdef REFINE_STATS
4489 if(last_non_zero>0){
4490 STOP_TIMER("iterative search")
4491 }
4492 }
4493 #endif
4494
4495     return last_non_zero;
4496 }
4497
4498 /**
4499  * Permute an 8x8 block according to permuatation.
4500  * @param block the block which will be permuted according to
4501  *              the given permutation vector
4502  * @param permutation the permutation vector
4503  * @param last the last non zero coefficient in scantable order, used to
4504  *             speed the permutation up
4505  * @param scantable the used scantable, this is only used to speed the
4506  *                  permutation up, the block is not (inverse) permutated
4507  *                  to scantable order!
4508  */
4509 static void block_permute(int16_t *block, uint8_t *permutation,
4510                           const uint8_t *scantable, int last)
4511 {
4512     int i;
4513     int16_t temp[64];
4514
4515     if (last <= 0)
4516         return;
4517     //FIXME it is ok but not clean and might fail for some permutations
4518     // if (permutation[1] == 1)
4519     // return;
4520
4521     for (i = 0; i <= last; i++) {
4522         const int j = scantable[i];
4523         temp[j] = block[j];
4524         block[j] = 0;
4525     }
4526
4527     for (i = 0; i <= last; i++) {
4528         const int j = scantable[i];
4529         const int perm_j = permutation[j];
4530         block[perm_j] = temp[j];
4531     }
4532 }
4533
4534 int ff_dct_quantize_c(MpegEncContext *s,
4535                         int16_t *block, int n,
4536                         int qscale, int *overflow)
4537 {
4538     int i, j, level, last_non_zero, q, start_i;
4539     const int *qmat;
4540     const uint8_t *scantable= s->intra_scantable.scantable;
4541     int bias;
4542     int max=0;
4543     unsigned int threshold1, threshold2;
4544
4545     s->fdsp.fdct(block);
4546
4547     if(s->dct_error_sum)
4548         s->denoise_dct(s, block);
4549
4550     if (s->mb_intra) {
4551         if (!s->h263_aic) {
4552             if (n < 4)
4553                 q = s->y_dc_scale;
4554             else
4555                 q = s->c_dc_scale;
4556             q = q << 3;
4557         } else
4558             /* For AIC we skip quant/dequant of INTRADC */
4559             q = 1 << 3;
4560
4561         /* note: block[0] is assumed to be positive */
4562         block[0] = (block[0] + (q >> 1)) / q;
4563         start_i = 1;
4564         last_non_zero = 0;
4565         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4566         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4567     } else {
4568         start_i = 0;
4569         last_non_zero = -1;
4570         qmat = s->q_inter_matrix[qscale];
4571         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4572     }
4573     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4574     threshold2= (threshold1<<1);
4575     for(i=63;i>=start_i;i--) {
4576         j = scantable[i];
4577         level = block[j] * qmat[j];
4578
4579         if(((unsigned)(level+threshold1))>threshold2){
4580             last_non_zero = i;
4581             break;
4582         }else{
4583             block[j]=0;
4584         }
4585     }
4586     for(i=start_i; i<=last_non_zero; i++) {
4587         j = scantable[i];
4588         level = block[j] * qmat[j];
4589
4590 //        if(   bias+level >= (1<<QMAT_SHIFT)
4591 //           || bias-level >= (1<<QMAT_SHIFT)){
4592         if(((unsigned)(level+threshold1))>threshold2){
4593             if(level>0){
4594                 level= (bias + level)>>QMAT_SHIFT;
4595                 block[j]= level;
4596             }else{
4597                 level= (bias - level)>>QMAT_SHIFT;
4598                 block[j]= -level;
4599             }
4600             max |=level;
4601         }else{
4602             block[j]=0;
4603         }
4604     }
4605     *overflow= s->max_qcoeff < max; //overflow might have happened
4606
4607     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4608     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4609         block_permute(block, s->idsp.idct_permutation,
4610                       scantable, last_non_zero);
4611
4612     return last_non_zero;
4613 }
4614
4615 #define OFFSET(x) offsetof(MpegEncContext, x)
4616 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4617 static const AVOption h263_options[] = {
4618     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4619     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4620     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4621     FF_MPV_COMMON_OPTS
4622     { NULL },
4623 };
4624
4625 static const AVClass h263_class = {
4626     .class_name = "H.263 encoder",
4627     .item_name  = av_default_item_name,
4628     .option     = h263_options,
4629     .version    = LIBAVUTIL_VERSION_INT,
4630 };
4631
4632 AVCodec ff_h263_encoder = {
4633     .name           = "h263",
4634     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4635     .type           = AVMEDIA_TYPE_VIDEO,
4636     .id             = AV_CODEC_ID_H263,
4637     .priv_data_size = sizeof(MpegEncContext),
4638     .init           = ff_mpv_encode_init,
4639     .encode2        = ff_mpv_encode_picture,
4640     .close          = ff_mpv_encode_end,
4641     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4642     .priv_class     = &h263_class,
4643 };
4644
4645 static const AVOption h263p_options[] = {
4646     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4647     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4648     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4649     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4650     FF_MPV_COMMON_OPTS
4651     { NULL },
4652 };
4653 static const AVClass h263p_class = {
4654     .class_name = "H.263p encoder",
4655     .item_name  = av_default_item_name,
4656     .option     = h263p_options,
4657     .version    = LIBAVUTIL_VERSION_INT,
4658 };
4659
4660 AVCodec ff_h263p_encoder = {
4661     .name           = "h263p",
4662     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4663     .type           = AVMEDIA_TYPE_VIDEO,
4664     .id             = AV_CODEC_ID_H263P,
4665     .priv_data_size = sizeof(MpegEncContext),
4666     .init           = ff_mpv_encode_init,
4667     .encode2        = ff_mpv_encode_picture,
4668     .close          = ff_mpv_encode_end,
4669     .capabilities   = CODEC_CAP_SLICE_THREADS,
4670     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4671     .priv_class     = &h263p_class,
4672 };
4673
4674 static const AVClass msmpeg4v2_class = {
4675     .class_name = "msmpeg4v2 encoder",
4676     .item_name  = av_default_item_name,
4677     .option     = ff_mpv_generic_options,
4678     .version    = LIBAVUTIL_VERSION_INT,
4679 };
4680
4681 AVCodec ff_msmpeg4v2_encoder = {
4682     .name           = "msmpeg4v2",
4683     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4684     .type           = AVMEDIA_TYPE_VIDEO,
4685     .id             = AV_CODEC_ID_MSMPEG4V2,
4686     .priv_data_size = sizeof(MpegEncContext),
4687     .init           = ff_mpv_encode_init,
4688     .encode2        = ff_mpv_encode_picture,
4689     .close          = ff_mpv_encode_end,
4690     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4691     .priv_class     = &msmpeg4v2_class,
4692 };
4693
4694 static const AVClass msmpeg4v3_class = {
4695     .class_name = "msmpeg4v3 encoder",
4696     .item_name  = av_default_item_name,
4697     .option     = ff_mpv_generic_options,
4698     .version    = LIBAVUTIL_VERSION_INT,
4699 };
4700
4701 AVCodec ff_msmpeg4v3_encoder = {
4702     .name           = "msmpeg4",
4703     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4704     .type           = AVMEDIA_TYPE_VIDEO,
4705     .id             = AV_CODEC_ID_MSMPEG4V3,
4706     .priv_data_size = sizeof(MpegEncContext),
4707     .init           = ff_mpv_encode_init,
4708     .encode2        = ff_mpv_encode_picture,
4709     .close          = ff_mpv_encode_end,
4710     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4711     .priv_class     = &msmpeg4v3_class,
4712 };
4713
4714 static const AVClass wmv1_class = {
4715     .class_name = "wmv1 encoder",
4716     .item_name  = av_default_item_name,
4717     .option     = ff_mpv_generic_options,
4718     .version    = LIBAVUTIL_VERSION_INT,
4719 };
4720
4721 AVCodec ff_wmv1_encoder = {
4722     .name           = "wmv1",
4723     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4724     .type           = AVMEDIA_TYPE_VIDEO,
4725     .id             = AV_CODEC_ID_WMV1,
4726     .priv_data_size = sizeof(MpegEncContext),
4727     .init           = ff_mpv_encode_init,
4728     .encode2        = ff_mpv_encode_picture,
4729     .close          = ff_mpv_encode_end,
4730     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4731     .priv_class     = &wmv1_class,
4732 };