git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mjpegenc_common.h"
  46 #include "mathops.h"
  47 #include "mpegutils.h"
  48 #include "mjpegenc.h"
  49 #include "msmpeg4.h"
  50 #include "pixblockdsp.h"
  51 #include "qpeldsp.h"
  52 #include "faandct.h"
  53 #include "thread.h"
  54 #include "aandcttab.h"
  55 #include "flv.h"
  56 #include "mpeg4video.h"
  57 #include "internal.h"
  58 #include "bytestream.h"
  59 #include <limits.h>
  60 #include "sp5x.h"
  61
  62 #define QUANT_BIAS_SHIFT 8
  63
  64 #define QMAT_SHIFT_MMX 16
  65 #define QMAT_SHIFT 21
  66
  67 static int encode_picture(MpegEncContext *s, int picture_number);
  68 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  69 static int sse_mb(MpegEncContext *s);
  70 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  71 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  72
  73 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  74 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  75
  76 const AVOption ff_mpv_generic_options[] = {
  77     FF_MPV_COMMON_OPTS
  78     { NULL },
  79 };
  80
  81 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  82                        uint16_t (*qmat16)[2][64],
  83                        const uint16_t *quant_matrix,
  84                        int bias, int qmin, int qmax, int intra)
  85 {
  86     FDCTDSPContext *fdsp = &s->fdsp;
  87     int qscale;
  88     int shift = 0;
  89
  90     for (qscale = qmin; qscale <= qmax; qscale++) {
  91         int i;
  92         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  93 #if CONFIG_FAANDCT
  94             fdsp->fdct == ff_faandct            ||
  95 #endif /* CONFIG_FAANDCT */
  96             fdsp->fdct == ff_jpeg_fdct_islow_10) {
  97             for (i = 0; i < 64; i++) {
  98                 const int j = s->idsp.idct_permutation[i];
  99                 int64_t den = (int64_t) qscale * quant_matrix[j];
 100                 /* 16 <= qscale * quant_matrix[i] <= 7905
 101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 102                  *             19952 <=              x  <= 249205026
 103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 104                  *           3444240 >= (1 << 36) / (x) >= 275 */
 105
 106                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 107             }
 108         } else if (fdsp->fdct == ff_fdct_ifast) {
 109             for (i = 0; i < 64; i++) {
 110                 const int j = s->idsp.idct_permutation[i];
 111                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 112                 /* 16 <= qscale * quant_matrix[i] <= 7905
 113                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 114                  *             19952 <=              x  <= 249205026
 115                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 116                  *           3444240 >= (1 << 36) / (x) >= 275 */
 117
 118                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 119             }
 120         } else {
 121             for (i = 0; i < 64; i++) {
 122                 const int j = s->idsp.idct_permutation[i];
 123                 int64_t den = (int64_t) qscale * quant_matrix[j];
 124                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 125                  * Assume x = qscale * quant_matrix[i]
 126                  * So             16 <=              x  <= 7905
 127                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 128                  * so          32768 >= (1 << 19) / (x) >= 67 */
 129                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 131                 //                    (qscale * quant_matrix[i]);
 132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 133
 134                 if (qmat16[qscale][0][i] == 0 ||
 135                     qmat16[qscale][0][i] == 128 * 256)
 136                     qmat16[qscale][0][i] = 128 * 256 - 1;
 137                 qmat16[qscale][1][i] =
 138                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 139                                 qmat16[qscale][0][i]);
 140             }
 141         }
 142
 143         for (i = intra; i < 64; i++) {
 144             int64_t max = 8191;
 145             if (fdsp->fdct == ff_fdct_ifast) {
 146                 max = (8191LL * ff_aanscales[i]) >> 14;
 147             }
 148             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 149                 shift++;
 150             }
 151         }
 152     }
 153     if (shift) {
 154         av_log(NULL, AV_LOG_INFO,
 155                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 156                QMAT_SHIFT - shift);
 157     }
 158 }
 159
 160 static inline void update_qscale(MpegEncContext *s)
 161 {
 162     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 163                 (FF_LAMBDA_SHIFT + 7);
 164     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 165
 166     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 167                  FF_LAMBDA_SHIFT;
 168 }
 169
 170 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 171 {
 172     int i;
 173
 174     if (matrix) {
 175         put_bits(pb, 1, 1);
 176         for (i = 0; i < 64; i++) {
 177             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 178         }
 179     } else
 180         put_bits(pb, 1, 0);
 181 }
 182
 183 /**
 184  * init s->current_picture.qscale_table from s->lambda_table
 185  */
 186 void ff_init_qscale_tab(MpegEncContext *s)
 187 {
 188     int8_t * const qscale_table = s->current_picture.qscale_table;
 189     int i;
 190
 191     for (i = 0; i < s->mb_num; i++) {
 192         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 193         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 194         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 195                                                   s->avctx->qmax);
 196     }
 197 }
 198
 199 static void update_duplicate_context_after_me(MpegEncContext *dst,
 200                                               MpegEncContext *src)
 201 {
 202 #define COPY(a) dst->a= src->a
 203     COPY(pict_type);
 204     COPY(current_picture);
 205     COPY(f_code);
 206     COPY(b_code);
 207     COPY(qscale);
 208     COPY(lambda);
 209     COPY(lambda2);
 210     COPY(picture_in_gop_number);
 211     COPY(gop_picture_number);
 212     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 213     COPY(progressive_frame);    // FIXME don't set in encode_header
 214     COPY(partitioned_frame);    // FIXME don't set in encode_header
 215 #undef COPY
 216 }
 217
 218 /**
 219  * Set the given MpegEncContext to defaults for encoding.
 220  * the changed fields will not depend upon the prior state of the MpegEncContext.
 221  */
 222 static void mpv_encode_defaults(MpegEncContext *s)
 223 {
 224     int i;
 225     ff_mpv_common_defaults(s);
 226
 227     for (i = -16; i < 16; i++) {
 228         default_fcode_tab[i + MAX_MV] = 1;
 229     }
 230     s->me.mv_penalty = default_mv_penalty;
 231     s->fcode_tab     = default_fcode_tab;
 232
 233     s->input_picture_number  = 0;
 234     s->picture_in_gop_number = 0;
 235 }
 236
 237 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 238     if (ARCH_X86)
 239         ff_dct_encode_init_x86(s);
 240
 241     if (CONFIG_H263_ENCODER)
 242         ff_h263dsp_init(&s->h263dsp);
 243     if (!s->dct_quantize)
 244         s->dct_quantize = ff_dct_quantize_c;
 245     if (!s->denoise_dct)
 246         s->denoise_dct  = denoise_dct_c;
 247     s->fast_dct_quantize = s->dct_quantize;
 248     if (s->avctx->trellis)
 249         s->dct_quantize  = dct_quantize_trellis_c;
 250
 251     return 0;
 252 }
 253
 254 /* init video encoder */
 255 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 256 {
 257     MpegEncContext *s = avctx->priv_data;
 258     int i, ret, format_supported;
 259
 260     mpv_encode_defaults(s);
 261
 262     switch (avctx->codec_id) {
 263     case AV_CODEC_ID_MPEG2VIDEO:
 264         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 265             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 266             av_log(avctx, AV_LOG_ERROR,
 267                    "only YUV420 and YUV422 are supported\n");
 268             return -1;
 269         }
 270         break;
 271     case AV_CODEC_ID_MJPEG:
 272     case AV_CODEC_ID_AMV:
 273         format_supported = 0;
 274         /* JPEG color space */
 275         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 276             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 277             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 278             (avctx->color_range == AVCOL_RANGE_JPEG &&
 279              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 280               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 281               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 282             format_supported = 1;
 283         /* MPEG color space */
 284         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 285                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 286                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 287                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 288             format_supported = 1;
 289
 290         if (!format_supported) {
 291             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 292             return -1;
 293         }
 294         break;
 295     default:
 296         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 297             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 298             return -1;
 299         }
 300     }
 301
 302     switch (avctx->pix_fmt) {
 303     case AV_PIX_FMT_YUVJ444P:
 304     case AV_PIX_FMT_YUV444P:
 305         s->chroma_format = CHROMA_444;
 306         break;
 307     case AV_PIX_FMT_YUVJ422P:
 308     case AV_PIX_FMT_YUV422P:
 309         s->chroma_format = CHROMA_422;
 310         break;
 311     case AV_PIX_FMT_YUVJ420P:
 312     case AV_PIX_FMT_YUV420P:
 313     default:
 314         s->chroma_format = CHROMA_420;
 315         break;
 316     }
 317
 318     s->bit_rate = avctx->bit_rate;
 319     s->width    = avctx->width;
 320     s->height   = avctx->height;
 321     if (avctx->gop_size > 600 &&
 322         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 323         av_log(avctx, AV_LOG_WARNING,
 324                "keyframe interval too large!, reducing it from %d to %d\n",
 325                avctx->gop_size, 600);
 326         avctx->gop_size = 600;
 327     }
 328     s->gop_size     = avctx->gop_size;
 329     s->avctx        = avctx;
 330     s->flags        = avctx->flags;
 331     s->flags2       = avctx->flags2;
 332     if (avctx->max_b_frames > MAX_B_FRAMES) {
 333         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 334                "is %d.\n", MAX_B_FRAMES);
 335         avctx->max_b_frames = MAX_B_FRAMES;
 336     }
 337     s->max_b_frames = avctx->max_b_frames;
 338     s->codec_id     = avctx->codec->id;
 339     s->strict_std_compliance = avctx->strict_std_compliance;
 340     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 341     s->mpeg_quant         = avctx->mpeg_quant;
 342     s->rtp_mode           = !!avctx->rtp_payload_size;
 343     s->intra_dc_precision = avctx->intra_dc_precision;
 344
 345     // workaround some differences between how applications specify dc precission
 346     if (s->intra_dc_precision < 0) {
 347         s->intra_dc_precision += 8;
 348     } else if (s->intra_dc_precision >= 8)
 349         s->intra_dc_precision -= 8;
 350
 351     if (s->intra_dc_precision < 0) {
 352         av_log(avctx, AV_LOG_ERROR,
 353                 "intra dc precision must be positive, note some applications use"
 354                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 355         return AVERROR(EINVAL);
 356     }
 357
 358     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 359         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 360         return AVERROR(EINVAL);
 361     }
 362     s->user_specified_pts = AV_NOPTS_VALUE;
 363
 364     if (s->gop_size <= 1) {
 365         s->intra_only = 1;
 366         s->gop_size   = 12;
 367     } else {
 368         s->intra_only = 0;
 369     }
 370
 371     s->me_method = avctx->me_method;
 372
 373     /* Fixed QSCALE */
 374     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 375
 376 #if FF_API_MPV_OPT
 377     FF_DISABLE_DEPRECATION_WARNINGS
 378     if (avctx->border_masking != 0.0)
 379         s->border_masking = avctx->border_masking;
 380     FF_ENABLE_DEPRECATION_WARNINGS
 381 #endif
 382
 383     s->adaptive_quant = (s->avctx->lumi_masking ||
 384                          s->avctx->dark_masking ||
 385                          s->avctx->temporal_cplx_masking ||
 386                          s->avctx->spatial_cplx_masking  ||
 387                          s->avctx->p_masking      ||
 388                          s->border_masking ||
 389                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 390                         !s->fixed_qscale;
 391
 392     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 393
 394     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 395         switch(avctx->codec_id) {
 396         case AV_CODEC_ID_MPEG1VIDEO:
 397         case AV_CODEC_ID_MPEG2VIDEO:
 398             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 399             break;
 400         case AV_CODEC_ID_MPEG4:
 401         case AV_CODEC_ID_MSMPEG4V1:
 402         case AV_CODEC_ID_MSMPEG4V2:
 403         case AV_CODEC_ID_MSMPEG4V3:
 404             if       (avctx->rc_max_rate >= 15000000) {
 405                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 406             } else if(avctx->rc_max_rate >=  2000000) {
 407                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 408             } else if(avctx->rc_max_rate >=   384000) {
 409                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 410             } else
 411                 avctx->rc_buffer_size = 40;
 412             avctx->rc_buffer_size *= 16384;
 413             break;
 414         }
 415         if (avctx->rc_buffer_size) {
 416             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 417         }
 418     }
 419
 420     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 421         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 422         return -1;
 423     }
 424
 425     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 426         av_log(avctx, AV_LOG_INFO,
 427                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 428     }
 429
 430     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 431         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 432         return -1;
 433     }
 434
 435     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 436         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 437         return -1;
 438     }
 439
 440     if (avctx->rc_max_rate &&
 441         avctx->rc_max_rate == avctx->bit_rate &&
 442         avctx->rc_max_rate != avctx->rc_min_rate) {
 443         av_log(avctx, AV_LOG_INFO,
 444                "impossible bitrate constraints, this will fail\n");
 445     }
 446
 447     if (avctx->rc_buffer_size &&
 448         avctx->bit_rate * (int64_t)avctx->time_base.num >
 449             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 450         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 451         return -1;
 452     }
 453
 454     if (!s->fixed_qscale &&
 455         avctx->bit_rate * av_q2d(avctx->time_base) >
 456             avctx->bit_rate_tolerance) {
 457         av_log(avctx, AV_LOG_WARNING,
 458                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 459         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 460     }
 461
 462     if (s->avctx->rc_max_rate &&
 463         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 464         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 465          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 466         90000LL * (avctx->rc_buffer_size - 1) >
 467             s->avctx->rc_max_rate * 0xFFFFLL) {
 468         av_log(avctx, AV_LOG_INFO,
 469                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 470                "specified vbv buffer is too large for the given bitrate!\n");
 471     }
 472
 473     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 474         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 475         s->codec_id != AV_CODEC_ID_FLV1) {
 476         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 477         return -1;
 478     }
 479
 480     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 481         av_log(avctx, AV_LOG_ERROR,
 482                "OBMC is only supported with simple mb decision\n");
 483         return -1;
 484     }
 485
 486     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 487         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 488         return -1;
 489     }
 490
 491     if (s->max_b_frames                    &&
 492         s->codec_id != AV_CODEC_ID_MPEG4      &&
 493         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 494         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 495         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 496         return -1;
 497     }
 498     if (s->max_b_frames < 0) {
 499         av_log(avctx, AV_LOG_ERROR,
 500                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 501         return -1;
 502     }
 503
 504     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 505          s->codec_id == AV_CODEC_ID_H263  ||
 506          s->codec_id == AV_CODEC_ID_H263P) &&
 507         (avctx->sample_aspect_ratio.num > 255 ||
 508          avctx->sample_aspect_ratio.den > 255)) {
 509         av_log(avctx, AV_LOG_WARNING,
 510                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 511                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 512         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 513                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 514     }
 515
 516     if ((s->codec_id == AV_CODEC_ID_H263  ||
 517          s->codec_id == AV_CODEC_ID_H263P) &&
 518         (avctx->width  > 2048 ||
 519          avctx->height > 1152 )) {
 520         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 521         return -1;
 522     }
 523     if ((s->codec_id == AV_CODEC_ID_H263  ||
 524          s->codec_id == AV_CODEC_ID_H263P) &&
 525         ((avctx->width &3) ||
 526          (avctx->height&3) )) {
 527         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 528         return -1;
 529     }
 530
 531     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 532         (avctx->width  > 4095 ||
 533          avctx->height > 4095 )) {
 534         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 535         return -1;
 536     }
 537
 538     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 539         (avctx->width  > 16383 ||
 540          avctx->height > 16383 )) {
 541         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 542         return -1;
 543     }
 544
 545     if (s->codec_id == AV_CODEC_ID_RV10 &&
 546         (avctx->width &15 ||
 547          avctx->height&15 )) {
 548         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 549         return AVERROR(EINVAL);
 550     }
 551
 552     if (s->codec_id == AV_CODEC_ID_RV20 &&
 553         (avctx->width &3 ||
 554          avctx->height&3 )) {
 555         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 556         return AVERROR(EINVAL);
 557     }
 558
 559     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 560          s->codec_id == AV_CODEC_ID_WMV2) &&
 561          avctx->width & 1) {
 562          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 563          return -1;
 564     }
 565
 566     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 567         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 568         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 569         return -1;
 570     }
 571
 572     // FIXME mpeg2 uses that too
 573     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 574                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 575         av_log(avctx, AV_LOG_ERROR,
 576                "mpeg2 style quantization not supported by codec\n");
 577         return -1;
 578     }
 579
 580     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 581         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 582         return -1;
 583     }
 584
 585     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 586         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 587         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 588         return -1;
 589     }
 590
 591     if (s->avctx->scenechange_threshold < 1000000000 &&
 592         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 593         av_log(avctx, AV_LOG_ERROR,
 594                "closed gop with scene change detection are not supported yet, "
 595                "set threshold to 1000000000\n");
 596         return -1;
 597     }
 598
 599     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 600         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 601             av_log(avctx, AV_LOG_ERROR,
 602                   "low delay forcing is only available for mpeg2\n");
 603             return -1;
 604         }
 605         if (s->max_b_frames != 0) {
 606             av_log(avctx, AV_LOG_ERROR,
 607                    "b frames cannot be used with low delay\n");
 608             return -1;
 609         }
 610     }
 611
 612     if (s->q_scale_type == 1) {
 613         if (avctx->qmax > 12) {
 614             av_log(avctx, AV_LOG_ERROR,
 615                    "non linear quant only supports qmax <= 12 currently\n");
 616             return -1;
 617         }
 618     }
 619
 620     if (s->avctx->thread_count > 1         &&
 621         s->codec_id != AV_CODEC_ID_MPEG4      &&
 622         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 623         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 624         s->codec_id != AV_CODEC_ID_MJPEG      &&
 625         (s->codec_id != AV_CODEC_ID_H263P)) {
 626         av_log(avctx, AV_LOG_ERROR,
 627                "multi threaded encoding not supported by codec\n");
 628         return -1;
 629     }
 630
 631     if (s->avctx->thread_count < 1) {
 632         av_log(avctx, AV_LOG_ERROR,
 633                "automatic thread number detection not supported by codec, "
 634                "patch welcome\n");
 635         return -1;
 636     }
 637
 638     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 639         s->rtp_mode = 1;
 640
 641     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 642         s->h263_slice_structured = 1;
 643
 644     if (!avctx->time_base.den || !avctx->time_base.num) {
 645         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 646         return -1;
 647     }
 648
 649     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 650         av_log(avctx, AV_LOG_INFO,
 651                "notice: b_frame_strategy only affects the first pass\n");
 652         avctx->b_frame_strategy = 0;
 653     }
 654
 655     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 656     if (i > 1) {
 657         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 658         avctx->time_base.den /= i;
 659         avctx->time_base.num /= i;
 660         //return -1;
 661     }
 662
 663     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 664         // (a + x * 3 / 8) / x
 665         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 666         s->inter_quant_bias = 0;
 667     } else {
 668         s->intra_quant_bias = 0;
 669         // (a - x / 4) / x
 670         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 671     }
 672
 673     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 674         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 675         return AVERROR(EINVAL);
 676     }
 677
 678     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 679         s->intra_quant_bias = avctx->intra_quant_bias;
 680     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 681         s->inter_quant_bias = avctx->inter_quant_bias;
 682
 683     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 684
 685     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 686         s->avctx->time_base.den > (1 << 16) - 1) {
 687         av_log(avctx, AV_LOG_ERROR,
 688                "timebase %d/%d not supported by MPEG 4 standard, "
 689                "the maximum admitted value for the timebase denominator "
 690                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 691                (1 << 16) - 1);
 692         return -1;
 693     }
 694     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 695
 696     switch (avctx->codec->id) {
 697     case AV_CODEC_ID_MPEG1VIDEO:
 698         s->out_format = FMT_MPEG1;
 699         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 700         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 701         break;
 702     case AV_CODEC_ID_MPEG2VIDEO:
 703         s->out_format = FMT_MPEG1;
 704         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 705         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 706         s->rtp_mode   = 1;
 707         break;
 708     case AV_CODEC_ID_MJPEG:
 709     case AV_CODEC_ID_AMV:
 710         s->out_format = FMT_MJPEG;
 711         s->intra_only = 1; /* force intra only for jpeg */
 712         if (!CONFIG_MJPEG_ENCODER ||
 713             ff_mjpeg_encode_init(s) < 0)
 714             return -1;
 715         avctx->delay = 0;
 716         s->low_delay = 1;
 717         break;
 718     case AV_CODEC_ID_H261:
 719         if (!CONFIG_H261_ENCODER)
 720             return -1;
 721         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 722             av_log(avctx, AV_LOG_ERROR,
 723                    "The specified picture size of %dx%d is not valid for the "
 724                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 725                     s->width, s->height);
 726             return -1;
 727         }
 728         s->out_format = FMT_H261;
 729         avctx->delay  = 0;
 730         s->low_delay  = 1;
 731         s->rtp_mode   = 0; /* Sliced encoding not supported */
 732         break;
 733     case AV_CODEC_ID_H263:
 734         if (!CONFIG_H263_ENCODER)
 735             return -1;
 736         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 737                              s->width, s->height) == 8) {
 738             av_log(avctx, AV_LOG_ERROR,
 739                    "The specified picture size of %dx%d is not valid for "
 740                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 741                    "352x288, 704x576, and 1408x1152. "
 742                    "Try H.263+.\n", s->width, s->height);
 743             return -1;
 744         }
 745         s->out_format = FMT_H263;
 746         avctx->delay  = 0;
 747         s->low_delay  = 1;
 748         break;
 749     case AV_CODEC_ID_H263P:
 750         s->out_format = FMT_H263;
 751         s->h263_plus  = 1;
 752         /* Fx */
 753         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 754         s->modified_quant  = s->h263_aic;
 755         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 756         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 757
 758         /* /Fx */
 759         /* These are just to be sure */
 760         avctx->delay = 0;
 761         s->low_delay = 1;
 762         break;
 763     case AV_CODEC_ID_FLV1:
 764         s->out_format      = FMT_H263;
 765         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 766         s->unrestricted_mv = 1;
 767         s->rtp_mode  = 0; /* don't allow GOB */
 768         avctx->delay = 0;
 769         s->low_delay = 1;
 770         break;
 771     case AV_CODEC_ID_RV10:
 772         s->out_format = FMT_H263;
 773         avctx->delay  = 0;
 774         s->low_delay  = 1;
 775         break;
 776     case AV_CODEC_ID_RV20:
 777         s->out_format      = FMT_H263;
 778         avctx->delay       = 0;
 779         s->low_delay       = 1;
 780         s->modified_quant  = 1;
 781         s->h263_aic        = 1;
 782         s->h263_plus       = 1;
 783         s->loop_filter     = 1;
 784         s->unrestricted_mv = 0;
 785         break;
 786     case AV_CODEC_ID_MPEG4:
 787         s->out_format      = FMT_H263;
 788         s->h263_pred       = 1;
 789         s->unrestricted_mv = 1;
 790         s->low_delay       = s->max_b_frames ? 0 : 1;
 791         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 792         break;
 793     case AV_CODEC_ID_MSMPEG4V2:
 794         s->out_format      = FMT_H263;
 795         s->h263_pred       = 1;
 796         s->unrestricted_mv = 1;
 797         s->msmpeg4_version = 2;
 798         avctx->delay       = 0;
 799         s->low_delay       = 1;
 800         break;
 801     case AV_CODEC_ID_MSMPEG4V3:
 802         s->out_format        = FMT_H263;
 803         s->h263_pred         = 1;
 804         s->unrestricted_mv   = 1;
 805         s->msmpeg4_version   = 3;
 806         s->flipflop_rounding = 1;
 807         avctx->delay         = 0;
 808         s->low_delay         = 1;
 809         break;
 810     case AV_CODEC_ID_WMV1:
 811         s->out_format        = FMT_H263;
 812         s->h263_pred         = 1;
 813         s->unrestricted_mv   = 1;
 814         s->msmpeg4_version   = 4;
 815         s->flipflop_rounding = 1;
 816         avctx->delay         = 0;
 817         s->low_delay         = 1;
 818         break;
 819     case AV_CODEC_ID_WMV2:
 820         s->out_format        = FMT_H263;
 821         s->h263_pred         = 1;
 822         s->unrestricted_mv   = 1;
 823         s->msmpeg4_version   = 5;
 824         s->flipflop_rounding = 1;
 825         avctx->delay         = 0;
 826         s->low_delay         = 1;
 827         break;
 828     default:
 829         return -1;
 830     }
 831
 832     avctx->has_b_frames = !s->low_delay;
 833
 834     s->encoding = 1;
 835
 836     s->progressive_frame    =
 837     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 838                                                 CODEC_FLAG_INTERLACED_ME) ||
 839                                 s->alternate_scan);
 840
 841     /* init */
 842     ff_mpv_idct_init(s);
 843     if (ff_mpv_common_init(s) < 0)
 844         return -1;
 845
 846     ff_fdctdsp_init(&s->fdsp, avctx);
 847     ff_me_cmp_init(&s->mecc, avctx);
 848     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 849     ff_pixblockdsp_init(&s->pdsp, avctx);
 850     ff_qpeldsp_init(&s->qdsp);
 851
 852     s->avctx->coded_frame = s->current_picture.f;
 853
 854     if (s->msmpeg4_version) {
 855         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 856                           2 * 2 * (MAX_LEVEL + 1) *
 857                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 858     }
 859     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 860
 861     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 862     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 864     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 865     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 866     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 867     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 868                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 869     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 870                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 871
 872     if (s->avctx->noise_reduction) {
 873         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 874                           2 * 64 * sizeof(uint16_t), fail);
 875     }
 876
 877     ff_dct_encode_init(s);
 878
 879     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 880         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 881
 882     s->quant_precision = 5;
 883
 884     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 885     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 886
 887     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 888         ff_h261_encode_init(s);
 889     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 890         ff_h263_encode_init(s);
 891     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 892         ff_msmpeg4_encode_init(s);
 893     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 894         && s->out_format == FMT_MPEG1)
 895         ff_mpeg1_encode_init(s);
 896
 897     /* init q matrix */
 898     for (i = 0; i < 64; i++) {
 899         int j = s->idsp.idct_permutation[i];
 900         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 901             s->mpeg_quant) {
 902             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 903             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 904         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 905             s->intra_matrix[j] =
 906             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 907         } else {
 908             /* mpeg1/2 */
 909             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 910             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 911         }
 912         if (s->avctx->intra_matrix)
 913             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 914         if (s->avctx->inter_matrix)
 915             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 916     }
 917
 918     /* precompute matrix */
 919     /* for mjpeg, we do include qscale in the matrix */
 920     if (s->out_format != FMT_MJPEG) {
 921         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 922                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 923                           31, 1);
 924         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 925                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 926                           31, 0);
 927     }
 928
 929     if (ff_rate_control_init(s) < 0)
 930         return -1;
 931
 932 #if FF_API_ERROR_RATE
 933     FF_DISABLE_DEPRECATION_WARNINGS
 934     if (avctx->error_rate)
 935         s->error_rate = avctx->error_rate;
 936     FF_ENABLE_DEPRECATION_WARNINGS;
 937 #endif
 938
 939 #if FF_API_NORMALIZE_AQP
 940     FF_DISABLE_DEPRECATION_WARNINGS
 941     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 942         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 943     FF_ENABLE_DEPRECATION_WARNINGS;
 944 #endif
 945
 946 #if FF_API_MV0
 947     FF_DISABLE_DEPRECATION_WARNINGS
 948     if (avctx->flags & CODEC_FLAG_MV0)
 949         s->mpv_flags |= FF_MPV_FLAG_MV0;
 950     FF_ENABLE_DEPRECATION_WARNINGS
 951 #endif
 952
 953 #if FF_API_MPV_OPT
 954     FF_DISABLE_DEPRECATION_WARNINGS
 955     if (avctx->rc_qsquish != 0.0)
 956         s->rc_qsquish = avctx->rc_qsquish;
 957     if (avctx->rc_qmod_amp != 0.0)
 958         s->rc_qmod_amp = avctx->rc_qmod_amp;
 959     if (avctx->rc_qmod_freq)
 960         s->rc_qmod_freq = avctx->rc_qmod_freq;
 961     if (avctx->rc_buffer_aggressivity != 1.0)
 962         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 963     if (avctx->rc_initial_cplx != 0.0)
 964         s->rc_initial_cplx = avctx->rc_initial_cplx;
 965     if (avctx->lmin)
 966         s->lmin = avctx->lmin;
 967     if (avctx->lmax)
 968         s->lmax = avctx->lmax;
 969
 970     if (avctx->rc_eq) {
 971         av_freep(&s->rc_eq);
 972         s->rc_eq = av_strdup(avctx->rc_eq);
 973         if (!s->rc_eq)
 974             return AVERROR(ENOMEM);
 975     }
 976     FF_ENABLE_DEPRECATION_WARNINGS
 977 #endif
 978
 979     if (avctx->b_frame_strategy == 2) {
 980         for (i = 0; i < s->max_b_frames + 2; i++) {
 981             s->tmp_frames[i] = av_frame_alloc();
 982             if (!s->tmp_frames[i])
 983                 return AVERROR(ENOMEM);
 984
 985             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 986             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 987             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 988
 989             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 990             if (ret < 0)
 991                 return ret;
 992         }
 993     }
 994
 995     return 0;
 996 fail:
 997     ff_mpv_encode_end(avctx);
 998     return AVERROR_UNKNOWN;
 999 }
1000
1001 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1002 {
1003     MpegEncContext *s = avctx->priv_data;
1004     int i;
1005
1006     ff_rate_control_uninit(s);
1007
1008     ff_mpv_common_end(s);
1009     if (CONFIG_MJPEG_ENCODER &&
1010         s->out_format == FMT_MJPEG)
1011         ff_mjpeg_encode_close(s);
1012
1013     av_freep(&avctx->extradata);
1014
1015     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1016         av_frame_free(&s->tmp_frames[i]);
1017
1018     ff_free_picture_tables(&s->new_picture);
1019     ff_mpeg_unref_picture(s, &s->new_picture);
1020
1021     av_freep(&s->avctx->stats_out);
1022     av_freep(&s->ac_stats);
1023
1024     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1025     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1026     s->q_chroma_intra_matrix=   NULL;
1027     s->q_chroma_intra_matrix16= NULL;
1028     av_freep(&s->q_intra_matrix);
1029     av_freep(&s->q_inter_matrix);
1030     av_freep(&s->q_intra_matrix16);
1031     av_freep(&s->q_inter_matrix16);
1032     av_freep(&s->input_picture);
1033     av_freep(&s->reordered_input_picture);
1034     av_freep(&s->dct_offset);
1035
1036     return 0;
1037 }
1038
1039 static int get_sae(uint8_t *src, int ref, int stride)
1040 {
1041     int x,y;
1042     int acc = 0;
1043
1044     for (y = 0; y < 16; y++) {
1045         for (x = 0; x < 16; x++) {
1046             acc += FFABS(src[x + y * stride] - ref);
1047         }
1048     }
1049
1050     return acc;
1051 }
1052
1053 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1054                            uint8_t *ref, int stride)
1055 {
1056     int x, y, w, h;
1057     int acc = 0;
1058
1059     w = s->width  & ~15;
1060     h = s->height & ~15;
1061
1062     for (y = 0; y < h; y += 16) {
1063         for (x = 0; x < w; x += 16) {
1064             int offset = x + y * stride;
1065             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1066                                       stride, 16);
1067             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1068             int sae  = get_sae(src + offset, mean, stride);
1069
1070             acc += sae + 500 < sad;
1071         }
1072     }
1073     return acc;
1074 }
1075
1076
1077 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1078 {
1079     Picture *pic = NULL;
1080     int64_t pts;
1081     int i, display_picture_number = 0, ret;
1082     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1083                                                  (s->low_delay ? 0 : 1);
1084     int direct = 1;
1085
1086     if (pic_arg) {
1087         pts = pic_arg->pts;
1088         display_picture_number = s->input_picture_number++;
1089
1090         if (pts != AV_NOPTS_VALUE) {
1091             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1092                 int64_t last = s->user_specified_pts;
1093
1094                 if (pts <= last) {
1095                     av_log(s->avctx, AV_LOG_ERROR,
1096                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1097                            pts, last);
1098                     return AVERROR(EINVAL);
1099                 }
1100
1101                 if (!s->low_delay && display_picture_number == 1)
1102                     s->dts_delta = pts - last;
1103             }
1104             s->user_specified_pts = pts;
1105         } else {
1106             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1107                 s->user_specified_pts =
1108                 pts = s->user_specified_pts + 1;
1109                 av_log(s->avctx, AV_LOG_INFO,
1110                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1111                        pts);
1112             } else {
1113                 pts = display_picture_number;
1114             }
1115         }
1116     }
1117
1118     if (pic_arg) {
1119         if (!pic_arg->buf[0] ||
1120             pic_arg->linesize[0] != s->linesize ||
1121             pic_arg->linesize[1] != s->uvlinesize ||
1122             pic_arg->linesize[2] != s->uvlinesize)
1123             direct = 0;
1124         if ((s->width & 15) || (s->height & 15))
1125             direct = 0;
1126         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1127             direct = 0;
1128         if (s->linesize & (STRIDE_ALIGN-1))
1129             direct = 0;
1130
1131         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1132                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1133
1134         i = ff_find_unused_picture(s, direct);
1135         if (i < 0)
1136             return i;
1137
1138         pic = &s->picture[i];
1139         pic->reference = 3;
1140
1141         if (direct) {
1142             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1143                 return ret;
1144             if (ff_alloc_picture(s, pic, 1) < 0) {
1145                 return -1;
1146             }
1147         } else {
1148             if (ff_alloc_picture(s, pic, 0) < 0) {
1149                 return -1;
1150             }
1151
1152             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1153                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1154                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1155                 // empty
1156             } else {
1157                 int h_chroma_shift, v_chroma_shift;
1158                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1159                                                  &h_chroma_shift,
1160                                                  &v_chroma_shift);
1161
1162                 for (i = 0; i < 3; i++) {
1163                     int src_stride = pic_arg->linesize[i];
1164                     int dst_stride = i ? s->uvlinesize : s->linesize;
1165                     int h_shift = i ? h_chroma_shift : 0;
1166                     int v_shift = i ? v_chroma_shift : 0;
1167                     int w = s->width  >> h_shift;
1168                     int h = s->height >> v_shift;
1169                     uint8_t *src = pic_arg->data[i];
1170                     uint8_t *dst = pic->f->data[i];
1171                     int vpad = 16;
1172
1173                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1174                         && !s->progressive_sequence
1175                         && FFALIGN(s->height, 32) - s->height > 16)
1176                         vpad = 32;
1177
1178                     if (!s->avctx->rc_buffer_size)
1179                         dst += INPLACE_OFFSET;
1180
1181                     if (src_stride == dst_stride)
1182                         memcpy(dst, src, src_stride * h);
1183                     else {
1184                         int h2 = h;
1185                         uint8_t *dst2 = dst;
1186                         while (h2--) {
1187                             memcpy(dst2, src, w);
1188                             dst2 += dst_stride;
1189                             src += src_stride;
1190                         }
1191                     }
1192                     if ((s->width & 15) || (s->height & (vpad-1))) {
1193                         s->mpvencdsp.draw_edges(dst, dst_stride,
1194                                                 w, h,
1195                                                 16>>h_shift,
1196                                                 vpad>>v_shift,
1197                                                 EDGE_BOTTOM);
1198                     }
1199                 }
1200             }
1201         }
1202         ret = av_frame_copy_props(pic->f, pic_arg);
1203         if (ret < 0)
1204             return ret;
1205
1206         pic->f->display_picture_number = display_picture_number;
1207         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1208     }
1209
1210     /* shift buffer entries */
1211     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1212         s->input_picture[i - 1] = s->input_picture[i];
1213
1214     s->input_picture[encoding_delay] = (Picture*) pic;
1215
1216     return 0;
1217 }
1218
1219 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1220 {
1221     int x, y, plane;
1222     int score = 0;
1223     int64_t score64 = 0;
1224
1225     for (plane = 0; plane < 3; plane++) {
1226         const int stride = p->f->linesize[plane];
1227         const int bw = plane ? 1 : 2;
1228         for (y = 0; y < s->mb_height * bw; y++) {
1229             for (x = 0; x < s->mb_width * bw; x++) {
1230                 int off = p->shared ? 0 : 16;
1231                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1232                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1233                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1234
1235                 switch (FFABS(s->avctx->frame_skip_exp)) {
1236                 case 0: score    =  FFMAX(score, v);          break;
1237                 case 1: score   += FFABS(v);                  break;
1238                 case 2: score64 += v * (int64_t)v;                       break;
1239                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1240                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1241                 }
1242             }
1243         }
1244     }
1245     emms_c();
1246
1247     if (score)
1248         score64 = score;
1249     if (s->avctx->frame_skip_exp < 0)
1250         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1251                       -1.0/s->avctx->frame_skip_exp);
1252
1253     if (score64 < s->avctx->frame_skip_threshold)
1254         return 1;
1255     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1256         return 1;
1257     return 0;
1258 }
1259
1260 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1261 {
1262     AVPacket pkt = { 0 };
1263     int ret, got_output;
1264
1265     av_init_packet(&pkt);
1266     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1267     if (ret < 0)
1268         return ret;
1269
1270     ret = pkt.size;
1271     av_free_packet(&pkt);
1272     return ret;
1273 }
1274
1275 static int estimate_best_b_count(MpegEncContext *s)
1276 {
1277     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1278     AVCodecContext *c = avcodec_alloc_context3(NULL);
1279     const int scale = s->avctx->brd_scale;
1280     int i, j, out_size, p_lambda, b_lambda, lambda2;
1281     int64_t best_rd  = INT64_MAX;
1282     int best_b_count = -1;
1283
1284     av_assert0(scale >= 0 && scale <= 3);
1285
1286     //emms_c();
1287     //s->next_picture_ptr->quality;
1288     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1289     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1290     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1291     if (!b_lambda) // FIXME we should do this somewhere else
1292         b_lambda = p_lambda;
1293     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1294                FF_LAMBDA_SHIFT;
1295
1296     c->width        = s->width  >> scale;
1297     c->height       = s->height >> scale;
1298     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1299     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1300     c->mb_decision  = s->avctx->mb_decision;
1301     c->me_cmp       = s->avctx->me_cmp;
1302     c->mb_cmp       = s->avctx->mb_cmp;
1303     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1304     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1305     c->time_base    = s->avctx->time_base;
1306     c->max_b_frames = s->max_b_frames;
1307
1308     if (avcodec_open2(c, codec, NULL) < 0)
1309         return -1;
1310
1311     for (i = 0; i < s->max_b_frames + 2; i++) {
1312         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1313                                                 s->next_picture_ptr;
1314         uint8_t *data[4];
1315
1316         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1317             pre_input = *pre_input_ptr;
1318             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1319
1320             if (!pre_input.shared && i) {
1321                 data[0] += INPLACE_OFFSET;
1322                 data[1] += INPLACE_OFFSET;
1323                 data[2] += INPLACE_OFFSET;
1324             }
1325
1326             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1327                                        s->tmp_frames[i]->linesize[0],
1328                                        data[0],
1329                                        pre_input.f->linesize[0],
1330                                        c->width, c->height);
1331             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1332                                        s->tmp_frames[i]->linesize[1],
1333                                        data[1],
1334                                        pre_input.f->linesize[1],
1335                                        c->width >> 1, c->height >> 1);
1336             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1337                                        s->tmp_frames[i]->linesize[2],
1338                                        data[2],
1339                                        pre_input.f->linesize[2],
1340                                        c->width >> 1, c->height >> 1);
1341         }
1342     }
1343
1344     for (j = 0; j < s->max_b_frames + 1; j++) {
1345         int64_t rd = 0;
1346
1347         if (!s->input_picture[j])
1348             break;
1349
1350         c->error[0] = c->error[1] = c->error[2] = 0;
1351
1352         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1353         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1354
1355         out_size = encode_frame(c, s->tmp_frames[0]);
1356
1357         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1358
1359         for (i = 0; i < s->max_b_frames + 1; i++) {
1360             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1361
1362             s->tmp_frames[i + 1]->pict_type = is_p ?
1363                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1364             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1365
1366             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1367
1368             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1369         }
1370
1371         /* get the delayed frames */
1372         while (out_size) {
1373             out_size = encode_frame(c, NULL);
1374             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1375         }
1376
1377         rd += c->error[0] + c->error[1] + c->error[2];
1378
1379         if (rd < best_rd) {
1380             best_rd = rd;
1381             best_b_count = j;
1382         }
1383     }
1384
1385     avcodec_close(c);
1386     av_freep(&c);
1387
1388     return best_b_count;
1389 }
1390
1391 static int select_input_picture(MpegEncContext *s)
1392 {
1393     int i, ret;
1394
1395     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1396         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1397     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1398
1399     /* set next picture type & ordering */
1400     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1401         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1402             if (s->picture_in_gop_number < s->gop_size &&
1403                 s->next_picture_ptr &&
1404                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1405                 // FIXME check that te gop check above is +-1 correct
1406                 av_frame_unref(s->input_picture[0]->f);
1407
1408                 ff_vbv_update(s, 0);
1409
1410                 goto no_output_pic;
1411             }
1412         }
1413
1414         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1415             !s->next_picture_ptr || s->intra_only) {
1416             s->reordered_input_picture[0] = s->input_picture[0];
1417             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1418             s->reordered_input_picture[0]->f->coded_picture_number =
1419                 s->coded_picture_number++;
1420         } else {
1421             int b_frames;
1422
1423             if (s->flags & CODEC_FLAG_PASS2) {
1424                 for (i = 0; i < s->max_b_frames + 1; i++) {
1425                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1426
1427                     if (pict_num >= s->rc_context.num_entries)
1428                         break;
1429                     if (!s->input_picture[i]) {
1430                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1431                         break;
1432                     }
1433
1434                     s->input_picture[i]->f->pict_type =
1435                         s->rc_context.entry[pict_num].new_pict_type;
1436                 }
1437             }
1438
1439             if (s->avctx->b_frame_strategy == 0) {
1440                 b_frames = s->max_b_frames;
1441                 while (b_frames && !s->input_picture[b_frames])
1442                     b_frames--;
1443             } else if (s->avctx->b_frame_strategy == 1) {
1444                 for (i = 1; i < s->max_b_frames + 1; i++) {
1445                     if (s->input_picture[i] &&
1446                         s->input_picture[i]->b_frame_score == 0) {
1447                         s->input_picture[i]->b_frame_score =
1448                             get_intra_count(s,
1449                                             s->input_picture[i    ]->f->data[0],
1450                                             s->input_picture[i - 1]->f->data[0],
1451                                             s->linesize) + 1;
1452                     }
1453                 }
1454                 for (i = 0; i < s->max_b_frames + 1; i++) {
1455                     if (!s->input_picture[i] ||
1456                         s->input_picture[i]->b_frame_score - 1 >
1457                             s->mb_num / s->avctx->b_sensitivity)
1458                         break;
1459                 }
1460
1461                 b_frames = FFMAX(0, i - 1);
1462
1463                 /* reset scores */
1464                 for (i = 0; i < b_frames + 1; i++) {
1465                     s->input_picture[i]->b_frame_score = 0;
1466                 }
1467             } else if (s->avctx->b_frame_strategy == 2) {
1468                 b_frames = estimate_best_b_count(s);
1469             } else {
1470                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1471                 b_frames = 0;
1472             }
1473
1474             emms_c();
1475
1476             for (i = b_frames - 1; i >= 0; i--) {
1477                 int type = s->input_picture[i]->f->pict_type;
1478                 if (type && type != AV_PICTURE_TYPE_B)
1479                     b_frames = i;
1480             }
1481             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1482                 b_frames == s->max_b_frames) {
1483                 av_log(s->avctx, AV_LOG_ERROR,
1484                        "warning, too many b frames in a row\n");
1485             }
1486
1487             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1488                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1489                     s->gop_size > s->picture_in_gop_number) {
1490                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1491                 } else {
1492                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1493                         b_frames = 0;
1494                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1495                 }
1496             }
1497
1498             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1499                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1500                 b_frames--;
1501
1502             s->reordered_input_picture[0] = s->input_picture[b_frames];
1503             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1504                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1505             s->reordered_input_picture[0]->f->coded_picture_number =
1506                 s->coded_picture_number++;
1507             for (i = 0; i < b_frames; i++) {
1508                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1509                 s->reordered_input_picture[i + 1]->f->pict_type =
1510                     AV_PICTURE_TYPE_B;
1511                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1512                     s->coded_picture_number++;
1513             }
1514         }
1515     }
1516 no_output_pic:
1517     if (s->reordered_input_picture[0]) {
1518         s->reordered_input_picture[0]->reference =
1519            s->reordered_input_picture[0]->f->pict_type !=
1520                AV_PICTURE_TYPE_B ? 3 : 0;
1521
1522         ff_mpeg_unref_picture(s, &s->new_picture);
1523         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1524             return ret;
1525
1526         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1527             // input is a shared pix, so we can't modifiy it -> alloc a new
1528             // one & ensure that the shared one is reuseable
1529
1530             Picture *pic;
1531             int i = ff_find_unused_picture(s, 0);
1532             if (i < 0)
1533                 return i;
1534             pic = &s->picture[i];
1535
1536             pic->reference = s->reordered_input_picture[0]->reference;
1537             if (ff_alloc_picture(s, pic, 0) < 0) {
1538                 return -1;
1539             }
1540
1541             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1542             if (ret < 0)
1543                 return ret;
1544
1545             /* mark us unused / free shared pic */
1546             av_frame_unref(s->reordered_input_picture[0]->f);
1547             s->reordered_input_picture[0]->shared = 0;
1548
1549             s->current_picture_ptr = pic;
1550         } else {
1551             // input is not a shared pix -> reuse buffer for current_pix
1552             s->current_picture_ptr = s->reordered_input_picture[0];
1553             for (i = 0; i < 4; i++) {
1554                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1555             }
1556         }
1557         ff_mpeg_unref_picture(s, &s->current_picture);
1558         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1559                                        s->current_picture_ptr)) < 0)
1560             return ret;
1561
1562         s->picture_number = s->new_picture.f->display_picture_number;
1563     } else {
1564         ff_mpeg_unref_picture(s, &s->new_picture);
1565     }
1566     return 0;
1567 }
1568
1569 static void frame_end(MpegEncContext *s)
1570 {
1571     if (s->unrestricted_mv &&
1572         s->current_picture.reference &&
1573         !s->intra_only) {
1574         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1575         int hshift = desc->log2_chroma_w;
1576         int vshift = desc->log2_chroma_h;
1577         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1578                                 s->current_picture.f->linesize[0],
1579                                 s->h_edge_pos, s->v_edge_pos,
1580                                 EDGE_WIDTH, EDGE_WIDTH,
1581                                 EDGE_TOP | EDGE_BOTTOM);
1582         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1583                                 s->current_picture.f->linesize[1],
1584                                 s->h_edge_pos >> hshift,
1585                                 s->v_edge_pos >> vshift,
1586                                 EDGE_WIDTH >> hshift,
1587                                 EDGE_WIDTH >> vshift,
1588                                 EDGE_TOP | EDGE_BOTTOM);
1589         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1590                                 s->current_picture.f->linesize[2],
1591                                 s->h_edge_pos >> hshift,
1592                                 s->v_edge_pos >> vshift,
1593                                 EDGE_WIDTH >> hshift,
1594                                 EDGE_WIDTH >> vshift,
1595                                 EDGE_TOP | EDGE_BOTTOM);
1596     }
1597
1598     emms_c();
1599
1600     s->last_pict_type                 = s->pict_type;
1601     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1602     if (s->pict_type!= AV_PICTURE_TYPE_B)
1603         s->last_non_b_pict_type = s->pict_type;
1604
1605     s->avctx->coded_frame = s->current_picture_ptr->f;
1606
1607 }
1608
1609 static void update_noise_reduction(MpegEncContext *s)
1610 {
1611     int intra, i;
1612
1613     for (intra = 0; intra < 2; intra++) {
1614         if (s->dct_count[intra] > (1 << 16)) {
1615             for (i = 0; i < 64; i++) {
1616                 s->dct_error_sum[intra][i] >>= 1;
1617             }
1618             s->dct_count[intra] >>= 1;
1619         }
1620
1621         for (i = 0; i < 64; i++) {
1622             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1623                                        s->dct_count[intra] +
1624                                        s->dct_error_sum[intra][i] / 2) /
1625                                       (s->dct_error_sum[intra][i] + 1);
1626         }
1627     }
1628 }
1629
1630 static int frame_start(MpegEncContext *s)
1631 {
1632     int ret;
1633
1634     /* mark & release old frames */
1635     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1636         s->last_picture_ptr != s->next_picture_ptr &&
1637         s->last_picture_ptr->f->buf[0]) {
1638         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1639     }
1640
1641     s->current_picture_ptr->f->pict_type = s->pict_type;
1642     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1643
1644     ff_mpeg_unref_picture(s, &s->current_picture);
1645     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1646                                    s->current_picture_ptr)) < 0)
1647         return ret;
1648
1649     if (s->pict_type != AV_PICTURE_TYPE_B) {
1650         s->last_picture_ptr = s->next_picture_ptr;
1651         if (!s->droppable)
1652             s->next_picture_ptr = s->current_picture_ptr;
1653     }
1654
1655     if (s->last_picture_ptr) {
1656         ff_mpeg_unref_picture(s, &s->last_picture);
1657         if (s->last_picture_ptr->f->buf[0] &&
1658             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1659                                        s->last_picture_ptr)) < 0)
1660             return ret;
1661     }
1662     if (s->next_picture_ptr) {
1663         ff_mpeg_unref_picture(s, &s->next_picture);
1664         if (s->next_picture_ptr->f->buf[0] &&
1665             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1666                                        s->next_picture_ptr)) < 0)
1667             return ret;
1668     }
1669
1670     if (s->picture_structure!= PICT_FRAME) {
1671         int i;
1672         for (i = 0; i < 4; i++) {
1673             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1674                 s->current_picture.f->data[i] +=
1675                     s->current_picture.f->linesize[i];
1676             }
1677             s->current_picture.f->linesize[i] *= 2;
1678             s->last_picture.f->linesize[i]    *= 2;
1679             s->next_picture.f->linesize[i]    *= 2;
1680         }
1681     }
1682
1683     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1684         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1685         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1686     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1687         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1688         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1689     } else {
1690         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1691         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1692     }
1693
1694     if (s->dct_error_sum) {
1695         av_assert2(s->avctx->noise_reduction && s->encoding);
1696         update_noise_reduction(s);
1697     }
1698
1699     return 0;
1700 }
1701
1702 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1703                           const AVFrame *pic_arg, int *got_packet)
1704 {
1705     MpegEncContext *s = avctx->priv_data;
1706     int i, stuffing_count, ret;
1707     int context_count = s->slice_context_count;
1708
1709     s->picture_in_gop_number++;
1710
1711     if (load_input_picture(s, pic_arg) < 0)
1712         return -1;
1713
1714     if (select_input_picture(s) < 0) {
1715         return -1;
1716     }
1717
1718     /* output? */
1719     if (s->new_picture.f->data[0]) {
1720         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1721         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1722                                               :
1723                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1724         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1725             return ret;
1726         if (s->mb_info) {
1727             s->mb_info_ptr = av_packet_new_side_data(pkt,
1728                                  AV_PKT_DATA_H263_MB_INFO,
1729                                  s->mb_width*s->mb_height*12);
1730             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1731         }
1732
1733         for (i = 0; i < context_count; i++) {
1734             int start_y = s->thread_context[i]->start_mb_y;
1735             int   end_y = s->thread_context[i]->  end_mb_y;
1736             int h       = s->mb_height;
1737             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1738             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1739
1740             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1741         }
1742
1743         s->pict_type = s->new_picture.f->pict_type;
1744         //emms_c();
1745         ret = frame_start(s);
1746         if (ret < 0)
1747             return ret;
1748 vbv_retry:
1749         ret = encode_picture(s, s->picture_number);
1750         if (growing_buffer) {
1751             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1752             pkt->data = s->pb.buf;
1753             pkt->size = avctx->internal->byte_buffer_size;
1754         }
1755         if (ret < 0)
1756             return -1;
1757
1758         avctx->header_bits = s->header_bits;
1759         avctx->mv_bits     = s->mv_bits;
1760         avctx->misc_bits   = s->misc_bits;
1761         avctx->i_tex_bits  = s->i_tex_bits;
1762         avctx->p_tex_bits  = s->p_tex_bits;
1763         avctx->i_count     = s->i_count;
1764         // FIXME f/b_count in avctx
1765         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1766         avctx->skip_count  = s->skip_count;
1767
1768         frame_end(s);
1769
1770         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1771             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1772
1773         if (avctx->rc_buffer_size) {
1774             RateControlContext *rcc = &s->rc_context;
1775             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1776
1777             if (put_bits_count(&s->pb) > max_size &&
1778                 s->lambda < s->lmax) {
1779                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1780                                        (s->qscale + 1) / s->qscale);
1781                 if (s->adaptive_quant) {
1782                     int i;
1783                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1784                         s->lambda_table[i] =
1785                             FFMAX(s->lambda_table[i] + 1,
1786                                   s->lambda_table[i] * (s->qscale + 1) /
1787                                   s->qscale);
1788                 }
1789                 s->mb_skipped = 0;        // done in frame_start()
1790                 // done in encode_picture() so we must undo it
1791                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1792                     if (s->flipflop_rounding          ||
1793                         s->codec_id == AV_CODEC_ID_H263P ||
1794                         s->codec_id == AV_CODEC_ID_MPEG4)
1795                         s->no_rounding ^= 1;
1796                 }
1797                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1798                     s->time_base       = s->last_time_base;
1799                     s->last_non_b_time = s->time - s->pp_time;
1800                 }
1801                 for (i = 0; i < context_count; i++) {
1802                     PutBitContext *pb = &s->thread_context[i]->pb;
1803                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1804                 }
1805                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1806                 goto vbv_retry;
1807             }
1808
1809             av_assert0(s->avctx->rc_max_rate);
1810         }
1811
1812         if (s->flags & CODEC_FLAG_PASS1)
1813             ff_write_pass1_stats(s);
1814
1815         for (i = 0; i < 4; i++) {
1816             s->current_picture_ptr->f->error[i] =
1817             s->current_picture.f->error[i] =
1818                 s->current_picture.error[i];
1819             avctx->error[i] += s->current_picture_ptr->f->error[i];
1820         }
1821
1822         if (s->flags & CODEC_FLAG_PASS1)
1823             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1824                    avctx->i_tex_bits + avctx->p_tex_bits ==
1825                        put_bits_count(&s->pb));
1826         flush_put_bits(&s->pb);
1827         s->frame_bits  = put_bits_count(&s->pb);
1828
1829         stuffing_count = ff_vbv_update(s, s->frame_bits);
1830         s->stuffing_bits = 8*stuffing_count;
1831         if (stuffing_count) {
1832             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1833                     stuffing_count + 50) {
1834                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1835                 return -1;
1836             }
1837
1838             switch (s->codec_id) {
1839             case AV_CODEC_ID_MPEG1VIDEO:
1840             case AV_CODEC_ID_MPEG2VIDEO:
1841                 while (stuffing_count--) {
1842                     put_bits(&s->pb, 8, 0);
1843                 }
1844             break;
1845             case AV_CODEC_ID_MPEG4:
1846                 put_bits(&s->pb, 16, 0);
1847                 put_bits(&s->pb, 16, 0x1C3);
1848                 stuffing_count -= 4;
1849                 while (stuffing_count--) {
1850                     put_bits(&s->pb, 8, 0xFF);
1851                 }
1852             break;
1853             default:
1854                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1855             }
1856             flush_put_bits(&s->pb);
1857             s->frame_bits  = put_bits_count(&s->pb);
1858         }
1859
1860         /* update mpeg1/2 vbv_delay for CBR */
1861         if (s->avctx->rc_max_rate                          &&
1862             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1863             s->out_format == FMT_MPEG1                     &&
1864             90000LL * (avctx->rc_buffer_size - 1) <=
1865                 s->avctx->rc_max_rate * 0xFFFFLL) {
1866             int vbv_delay, min_delay;
1867             double inbits  = s->avctx->rc_max_rate *
1868                              av_q2d(s->avctx->time_base);
1869             int    minbits = s->frame_bits - 8 *
1870                              (s->vbv_delay_ptr - s->pb.buf - 1);
1871             double bits    = s->rc_context.buffer_index + minbits - inbits;
1872
1873             if (bits < 0)
1874                 av_log(s->avctx, AV_LOG_ERROR,
1875                        "Internal error, negative bits\n");
1876
1877             assert(s->repeat_first_field == 0);
1878
1879             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1880             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1881                         s->avctx->rc_max_rate;
1882
1883             vbv_delay = FFMAX(vbv_delay, min_delay);
1884
1885             av_assert0(vbv_delay < 0xFFFF);
1886
1887             s->vbv_delay_ptr[0] &= 0xF8;
1888             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1889             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1890             s->vbv_delay_ptr[2] &= 0x07;
1891             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1892             avctx->vbv_delay     = vbv_delay * 300;
1893         }
1894         s->total_bits     += s->frame_bits;
1895         avctx->frame_bits  = s->frame_bits;
1896
1897         pkt->pts = s->current_picture.f->pts;
1898         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1899             if (!s->current_picture.f->coded_picture_number)
1900                 pkt->dts = pkt->pts - s->dts_delta;
1901             else
1902                 pkt->dts = s->reordered_pts;
1903             s->reordered_pts = pkt->pts;
1904         } else
1905             pkt->dts = pkt->pts;
1906         if (s->current_picture.f->key_frame)
1907             pkt->flags |= AV_PKT_FLAG_KEY;
1908         if (s->mb_info)
1909             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1910     } else {
1911         s->frame_bits = 0;
1912     }
1913
1914     /* release non-reference frames */
1915     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1916         if (!s->picture[i].reference)
1917             ff_mpeg_unref_picture(s, &s->picture[i]);
1918     }
1919
1920     av_assert1((s->frame_bits & 7) == 0);
1921
1922     pkt->size = s->frame_bits / 8;
1923     *got_packet = !!pkt->size;
1924     return 0;
1925 }
1926
1927 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1928                                                 int n, int threshold)
1929 {
1930     static const char tab[64] = {
1931         3, 2, 2, 1, 1, 1, 1, 1,
1932         1, 1, 1, 1, 1, 1, 1, 1,
1933         1, 1, 1, 1, 1, 1, 1, 1,
1934         0, 0, 0, 0, 0, 0, 0, 0,
1935         0, 0, 0, 0, 0, 0, 0, 0,
1936         0, 0, 0, 0, 0, 0, 0, 0,
1937         0, 0, 0, 0, 0, 0, 0, 0,
1938         0, 0, 0, 0, 0, 0, 0, 0
1939     };
1940     int score = 0;
1941     int run = 0;
1942     int i;
1943     int16_t *block = s->block[n];
1944     const int last_index = s->block_last_index[n];
1945     int skip_dc;
1946
1947     if (threshold < 0) {
1948         skip_dc = 0;
1949         threshold = -threshold;
1950     } else
1951         skip_dc = 1;
1952
1953     /* Are all we could set to zero already zero? */
1954     if (last_index <= skip_dc - 1)
1955         return;
1956
1957     for (i = 0; i <= last_index; i++) {
1958         const int j = s->intra_scantable.permutated[i];
1959         const int level = FFABS(block[j]);
1960         if (level == 1) {
1961             if (skip_dc && i == 0)
1962                 continue;
1963             score += tab[run];
1964             run = 0;
1965         } else if (level > 1) {
1966             return;
1967         } else {
1968             run++;
1969         }
1970     }
1971     if (score >= threshold)
1972         return;
1973     for (i = skip_dc; i <= last_index; i++) {
1974         const int j = s->intra_scantable.permutated[i];
1975         block[j] = 0;
1976     }
1977     if (block[0])
1978         s->block_last_index[n] = 0;
1979     else
1980         s->block_last_index[n] = -1;
1981 }
1982
1983 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1984                                int last_index)
1985 {
1986     int i;
1987     const int maxlevel = s->max_qcoeff;
1988     const int minlevel = s->min_qcoeff;
1989     int overflow = 0;
1990
1991     if (s->mb_intra) {
1992         i = 1; // skip clipping of intra dc
1993     } else
1994         i = 0;
1995
1996     for (; i <= last_index; i++) {
1997         const int j = s->intra_scantable.permutated[i];
1998         int level = block[j];
1999
2000         if (level > maxlevel) {
2001             level = maxlevel;
2002             overflow++;
2003         } else if (level < minlevel) {
2004             level = minlevel;
2005             overflow++;
2006         }
2007
2008         block[j] = level;
2009     }
2010
2011     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2012         av_log(s->avctx, AV_LOG_INFO,
2013                "warning, clipping %d dct coefficients to %d..%d\n",
2014                overflow, minlevel, maxlevel);
2015 }
2016
2017 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2018 {
2019     int x, y;
2020     // FIXME optimize
2021     for (y = 0; y < 8; y++) {
2022         for (x = 0; x < 8; x++) {
2023             int x2, y2;
2024             int sum = 0;
2025             int sqr = 0;
2026             int count = 0;
2027
2028             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2029                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2030                     int v = ptr[x2 + y2 * stride];
2031                     sum += v;
2032                     sqr += v * v;
2033                     count++;
2034                 }
2035             }
2036             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2037         }
2038     }
2039 }
2040
2041 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2042                                                 int motion_x, int motion_y,
2043                                                 int mb_block_height,
2044                                                 int mb_block_width,
2045                                                 int mb_block_count)
2046 {
2047     int16_t weight[12][64];
2048     int16_t orig[12][64];
2049     const int mb_x = s->mb_x;
2050     const int mb_y = s->mb_y;
2051     int i;
2052     int skip_dct[12];
2053     int dct_offset = s->linesize * 8; // default for progressive frames
2054     int uv_dct_offset = s->uvlinesize * 8;
2055     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2056     ptrdiff_t wrap_y, wrap_c;
2057
2058     for (i = 0; i < mb_block_count; i++)
2059         skip_dct[i] = s->skipdct;
2060
2061     if (s->adaptive_quant) {
2062         const int last_qp = s->qscale;
2063         const int mb_xy = mb_x + mb_y * s->mb_stride;
2064
2065         s->lambda = s->lambda_table[mb_xy];
2066         update_qscale(s);
2067
2068         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2069             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2070             s->dquant = s->qscale - last_qp;
2071
2072             if (s->out_format == FMT_H263) {
2073                 s->dquant = av_clip(s->dquant, -2, 2);
2074
2075                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2076                     if (!s->mb_intra) {
2077                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2078                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2079                                 s->dquant = 0;
2080                         }
2081                         if (s->mv_type == MV_TYPE_8X8)
2082                             s->dquant = 0;
2083                     }
2084                 }
2085             }
2086         }
2087         ff_set_qscale(s, last_qp + s->dquant);
2088     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2089         ff_set_qscale(s, s->qscale + s->dquant);
2090
2091     wrap_y = s->linesize;
2092     wrap_c = s->uvlinesize;
2093     ptr_y  = s->new_picture.f->data[0] +
2094              (mb_y * 16 * wrap_y)              + mb_x * 16;
2095     ptr_cb = s->new_picture.f->data[1] +
2096              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2097     ptr_cr = s->new_picture.f->data[2] +
2098              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2099
2100     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2101         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2102         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2103         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2104         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2105                                  wrap_y, wrap_y,
2106                                  16, 16, mb_x * 16, mb_y * 16,
2107                                  s->width, s->height);
2108         ptr_y = ebuf;
2109         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2110                                  wrap_c, wrap_c,
2111                                  mb_block_width, mb_block_height,
2112                                  mb_x * mb_block_width, mb_y * mb_block_height,
2113                                  cw, ch);
2114         ptr_cb = ebuf + 16 * wrap_y;
2115         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2116                                  wrap_c, wrap_c,
2117                                  mb_block_width, mb_block_height,
2118                                  mb_x * mb_block_width, mb_y * mb_block_height,
2119                                  cw, ch);
2120         ptr_cr = ebuf + 16 * wrap_y + 16;
2121     }
2122
2123     if (s->mb_intra) {
2124         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2125             int progressive_score, interlaced_score;
2126
2127             s->interlaced_dct = 0;
2128             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2129                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2130                                                      NULL, wrap_y, 8) - 400;
2131
2132             if (progressive_score > 0) {
2133                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2134                                                         NULL, wrap_y * 2, 8) +
2135                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2136                                                         NULL, wrap_y * 2, 8);
2137                 if (progressive_score > interlaced_score) {
2138                     s->interlaced_dct = 1;
2139
2140                     dct_offset = wrap_y;
2141                     uv_dct_offset = wrap_c;
2142                     wrap_y <<= 1;
2143                     if (s->chroma_format == CHROMA_422 ||
2144                         s->chroma_format == CHROMA_444)
2145                         wrap_c <<= 1;
2146                 }
2147             }
2148         }
2149
2150         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2151         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2152         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2153         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2154
2155         if (s->flags & CODEC_FLAG_GRAY) {
2156             skip_dct[4] = 1;
2157             skip_dct[5] = 1;
2158         } else {
2159             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2160             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2161             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2162                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2163                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2164             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2165                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2166                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2167                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2168                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2169                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2170                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2171             }
2172         }
2173     } else {
2174         op_pixels_func (*op_pix)[4];
2175         qpel_mc_func (*op_qpix)[16];
2176         uint8_t *dest_y, *dest_cb, *dest_cr;
2177
2178         dest_y  = s->dest[0];
2179         dest_cb = s->dest[1];
2180         dest_cr = s->dest[2];
2181
2182         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2183             op_pix  = s->hdsp.put_pixels_tab;
2184             op_qpix = s->qdsp.put_qpel_pixels_tab;
2185         } else {
2186             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2187             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2188         }
2189
2190         if (s->mv_dir & MV_DIR_FORWARD) {
2191             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2192                           s->last_picture.f->data,
2193                           op_pix, op_qpix);
2194             op_pix  = s->hdsp.avg_pixels_tab;
2195             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2196         }
2197         if (s->mv_dir & MV_DIR_BACKWARD) {
2198             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2199                           s->next_picture.f->data,
2200                           op_pix, op_qpix);
2201         }
2202
2203         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2204             int progressive_score, interlaced_score;
2205
2206             s->interlaced_dct = 0;
2207             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2208                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2209                                                      ptr_y + wrap_y * 8,
2210                                                      wrap_y, 8) - 400;
2211
2212             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2213                 progressive_score -= 400;
2214
2215             if (progressive_score > 0) {
2216                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2217                                                         wrap_y * 2, 8) +
2218                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2219                                                         ptr_y + wrap_y,
2220                                                         wrap_y * 2, 8);
2221
2222                 if (progressive_score > interlaced_score) {
2223                     s->interlaced_dct = 1;
2224
2225                     dct_offset = wrap_y;
2226                     uv_dct_offset = wrap_c;
2227                     wrap_y <<= 1;
2228                     if (s->chroma_format == CHROMA_422)
2229                         wrap_c <<= 1;
2230                 }
2231             }
2232         }
2233
2234         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2235         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2236         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2237                             dest_y + dct_offset, wrap_y);
2238         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2239                             dest_y + dct_offset + 8, wrap_y);
2240
2241         if (s->flags & CODEC_FLAG_GRAY) {
2242             skip_dct[4] = 1;
2243             skip_dct[5] = 1;
2244         } else {
2245             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2246             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2247             if (!s->chroma_y_shift) { /* 422 */
2248                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2249                                     dest_cb + uv_dct_offset, wrap_c);
2250                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2251                                     dest_cr + uv_dct_offset, wrap_c);
2252             }
2253         }
2254         /* pre quantization */
2255         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2256                 2 * s->qscale * s->qscale) {
2257             // FIXME optimize
2258             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2259                 skip_dct[0] = 1;
2260             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2261                 skip_dct[1] = 1;
2262             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2263                                wrap_y, 8) < 20 * s->qscale)
2264                 skip_dct[2] = 1;
2265             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2266                                wrap_y, 8) < 20 * s->qscale)
2267                 skip_dct[3] = 1;
2268             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2269                 skip_dct[4] = 1;
2270             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2271                 skip_dct[5] = 1;
2272             if (!s->chroma_y_shift) { /* 422 */
2273                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2274                                    dest_cb + uv_dct_offset,
2275                                    wrap_c, 8) < 20 * s->qscale)
2276                     skip_dct[6] = 1;
2277                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2278                                    dest_cr + uv_dct_offset,
2279                                    wrap_c, 8) < 20 * s->qscale)
2280                     skip_dct[7] = 1;
2281             }
2282         }
2283     }
2284
2285     if (s->quantizer_noise_shaping) {
2286         if (!skip_dct[0])
2287             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2288         if (!skip_dct[1])
2289             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2290         if (!skip_dct[2])
2291             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2292         if (!skip_dct[3])
2293             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2294         if (!skip_dct[4])
2295             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2296         if (!skip_dct[5])
2297             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2298         if (!s->chroma_y_shift) { /* 422 */
2299             if (!skip_dct[6])
2300                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2301                                   wrap_c);
2302             if (!skip_dct[7])
2303                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2304                                   wrap_c);
2305         }
2306         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2307     }
2308
2309     /* DCT & quantize */
2310     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2311     {
2312         for (i = 0; i < mb_block_count; i++) {
2313             if (!skip_dct[i]) {
2314                 int overflow;
2315                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2316                 // FIXME we could decide to change to quantizer instead of
2317                 // clipping
2318                 // JS: I don't think that would be a good idea it could lower
2319                 //     quality instead of improve it. Just INTRADC clipping
2320                 //     deserves changes in quantizer
2321                 if (overflow)
2322                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2323             } else
2324                 s->block_last_index[i] = -1;
2325         }
2326         if (s->quantizer_noise_shaping) {
2327             for (i = 0; i < mb_block_count; i++) {
2328                 if (!skip_dct[i]) {
2329                     s->block_last_index[i] =
2330                         dct_quantize_refine(s, s->block[i], weight[i],
2331                                             orig[i], i, s->qscale);
2332                 }
2333             }
2334         }
2335
2336         if (s->luma_elim_threshold && !s->mb_intra)
2337             for (i = 0; i < 4; i++)
2338                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2339         if (s->chroma_elim_threshold && !s->mb_intra)
2340             for (i = 4; i < mb_block_count; i++)
2341                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2342
2343         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2344             for (i = 0; i < mb_block_count; i++) {
2345                 if (s->block_last_index[i] == -1)
2346                     s->coded_score[i] = INT_MAX / 256;
2347             }
2348         }
2349     }
2350
2351     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2352         s->block_last_index[4] =
2353         s->block_last_index[5] = 0;
2354         s->block[4][0] =
2355         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2356         if (!s->chroma_y_shift) { /* 422 / 444 */
2357             for (i=6; i<12; i++) {
2358                 s->block_last_index[i] = 0;
2359                 s->block[i][0] = s->block[4][0];
2360             }
2361         }
2362     }
2363
2364     // non c quantize code returns incorrect block_last_index FIXME
2365     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2366         for (i = 0; i < mb_block_count; i++) {
2367             int j;
2368             if (s->block_last_index[i] > 0) {
2369                 for (j = 63; j > 0; j--) {
2370                     if (s->block[i][s->intra_scantable.permutated[j]])
2371                         break;
2372                 }
2373                 s->block_last_index[i] = j;
2374             }
2375         }
2376     }
2377
2378     /* huffman encode */
2379     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2380     case AV_CODEC_ID_MPEG1VIDEO:
2381     case AV_CODEC_ID_MPEG2VIDEO:
2382         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2383             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2384         break;
2385     case AV_CODEC_ID_MPEG4:
2386         if (CONFIG_MPEG4_ENCODER)
2387             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2388         break;
2389     case AV_CODEC_ID_MSMPEG4V2:
2390     case AV_CODEC_ID_MSMPEG4V3:
2391     case AV_CODEC_ID_WMV1:
2392         if (CONFIG_MSMPEG4_ENCODER)
2393             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2394         break;
2395     case AV_CODEC_ID_WMV2:
2396         if (CONFIG_WMV2_ENCODER)
2397             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2398         break;
2399     case AV_CODEC_ID_H261:
2400         if (CONFIG_H261_ENCODER)
2401             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2402         break;
2403     case AV_CODEC_ID_H263:
2404     case AV_CODEC_ID_H263P:
2405     case AV_CODEC_ID_FLV1:
2406     case AV_CODEC_ID_RV10:
2407     case AV_CODEC_ID_RV20:
2408         if (CONFIG_H263_ENCODER)
2409             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2410         break;
2411     case AV_CODEC_ID_MJPEG:
2412     case AV_CODEC_ID_AMV:
2413         if (CONFIG_MJPEG_ENCODER)
2414             ff_mjpeg_encode_mb(s, s->block);
2415         break;
2416     default:
2417         av_assert1(0);
2418     }
2419 }
2420
2421 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2422 {
2423     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2424     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2425     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2426 }
2427
2428 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2429     int i;
2430
2431     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2432
2433     /* mpeg1 */
2434     d->mb_skip_run= s->mb_skip_run;
2435     for(i=0; i<3; i++)
2436         d->last_dc[i] = s->last_dc[i];
2437
2438     /* statistics */
2439     d->mv_bits= s->mv_bits;
2440     d->i_tex_bits= s->i_tex_bits;
2441     d->p_tex_bits= s->p_tex_bits;
2442     d->i_count= s->i_count;
2443     d->f_count= s->f_count;
2444     d->b_count= s->b_count;
2445     d->skip_count= s->skip_count;
2446     d->misc_bits= s->misc_bits;
2447     d->last_bits= 0;
2448
2449     d->mb_skipped= 0;
2450     d->qscale= s->qscale;
2451     d->dquant= s->dquant;
2452
2453     d->esc3_level_length= s->esc3_level_length;
2454 }
2455
2456 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2457     int i;
2458
2459     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2460     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2461
2462     /* mpeg1 */
2463     d->mb_skip_run= s->mb_skip_run;
2464     for(i=0; i<3; i++)
2465         d->last_dc[i] = s->last_dc[i];
2466
2467     /* statistics */
2468     d->mv_bits= s->mv_bits;
2469     d->i_tex_bits= s->i_tex_bits;
2470     d->p_tex_bits= s->p_tex_bits;
2471     d->i_count= s->i_count;
2472     d->f_count= s->f_count;
2473     d->b_count= s->b_count;
2474     d->skip_count= s->skip_count;
2475     d->misc_bits= s->misc_bits;
2476
2477     d->mb_intra= s->mb_intra;
2478     d->mb_skipped= s->mb_skipped;
2479     d->mv_type= s->mv_type;
2480     d->mv_dir= s->mv_dir;
2481     d->pb= s->pb;
2482     if(s->data_partitioning){
2483         d->pb2= s->pb2;
2484         d->tex_pb= s->tex_pb;
2485     }
2486     d->block= s->block;
2487     for(i=0; i<8; i++)
2488         d->block_last_index[i]= s->block_last_index[i];
2489     d->interlaced_dct= s->interlaced_dct;
2490     d->qscale= s->qscale;
2491
2492     d->esc3_level_length= s->esc3_level_length;
2493 }
2494
2495 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2496                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2497                            int *dmin, int *next_block, int motion_x, int motion_y)
2498 {
2499     int score;
2500     uint8_t *dest_backup[3];
2501
2502     copy_context_before_encode(s, backup, type);
2503
2504     s->block= s->blocks[*next_block];
2505     s->pb= pb[*next_block];
2506     if(s->data_partitioning){
2507         s->pb2   = pb2   [*next_block];
2508         s->tex_pb= tex_pb[*next_block];
2509     }
2510
2511     if(*next_block){
2512         memcpy(dest_backup, s->dest, sizeof(s->dest));
2513         s->dest[0] = s->rd_scratchpad;
2514         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2515         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2516         av_assert0(s->linesize >= 32); //FIXME
2517     }
2518
2519     encode_mb(s, motion_x, motion_y);
2520
2521     score= put_bits_count(&s->pb);
2522     if(s->data_partitioning){
2523         score+= put_bits_count(&s->pb2);
2524         score+= put_bits_count(&s->tex_pb);
2525     }
2526
2527     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2528         ff_mpv_decode_mb(s, s->block);
2529
2530         score *= s->lambda2;
2531         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2532     }
2533
2534     if(*next_block){
2535         memcpy(s->dest, dest_backup, sizeof(s->dest));
2536     }
2537
2538     if(score<*dmin){
2539         *dmin= score;
2540         *next_block^=1;
2541
2542         copy_context_after_encode(best, s, type);
2543     }
2544 }
2545
2546 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2547     uint32_t *sq = ff_square_tab + 256;
2548     int acc=0;
2549     int x,y;
2550
2551     if(w==16 && h==16)
2552         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2553     else if(w==8 && h==8)
2554         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2555
2556     for(y=0; y<h; y++){
2557         for(x=0; x<w; x++){
2558             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2559         }
2560     }
2561
2562     av_assert2(acc>=0);
2563
2564     return acc;
2565 }
2566
2567 static int sse_mb(MpegEncContext *s){
2568     int w= 16;
2569     int h= 16;
2570
2571     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2572     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2573
2574     if(w==16 && h==16)
2575       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2576         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2577                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2578                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2579       }else{
2580         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2581                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2582                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2583       }
2584     else
2585         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2586                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2587                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2588 }
2589
2590 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2591     MpegEncContext *s= *(void**)arg;
2592
2593
2594     s->me.pre_pass=1;
2595     s->me.dia_size= s->avctx->pre_dia_size;
2596     s->first_slice_line=1;
2597     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2598         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2599             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2600         }
2601         s->first_slice_line=0;
2602     }
2603
2604     s->me.pre_pass=0;
2605
2606     return 0;
2607 }
2608
2609 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2610     MpegEncContext *s= *(void**)arg;
2611
2612     ff_check_alignment();
2613
2614     s->me.dia_size= s->avctx->dia_size;
2615     s->first_slice_line=1;
2616     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2617         s->mb_x=0; //for block init below
2618         ff_init_block_index(s);
2619         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2620             s->block_index[0]+=2;
2621             s->block_index[1]+=2;
2622             s->block_index[2]+=2;
2623             s->block_index[3]+=2;
2624
2625             /* compute motion vector & mb_type and store in context */
2626             if(s->pict_type==AV_PICTURE_TYPE_B)
2627                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2628             else
2629                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2630         }
2631         s->first_slice_line=0;
2632     }
2633     return 0;
2634 }
2635
2636 static int mb_var_thread(AVCodecContext *c, void *arg){
2637     MpegEncContext *s= *(void**)arg;
2638     int mb_x, mb_y;
2639
2640     ff_check_alignment();
2641
2642     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2643         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2644             int xx = mb_x * 16;
2645             int yy = mb_y * 16;
2646             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2647             int varc;
2648             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2649
2650             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2651                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2652
2653             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2654             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2655             s->me.mb_var_sum_temp    += varc;
2656         }
2657     }
2658     return 0;
2659 }
2660
2661 static void write_slice_end(MpegEncContext *s){
2662     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2663         if(s->partitioned_frame){
2664             ff_mpeg4_merge_partitions(s);
2665         }
2666
2667         ff_mpeg4_stuffing(&s->pb);
2668     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2669         ff_mjpeg_encode_stuffing(s);
2670     }
2671
2672     avpriv_align_put_bits(&s->pb);
2673     flush_put_bits(&s->pb);
2674
2675     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2676         s->misc_bits+= get_bits_diff(s);
2677 }
2678
2679 static void write_mb_info(MpegEncContext *s)
2680 {
2681     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2682     int offset = put_bits_count(&s->pb);
2683     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2684     int gobn = s->mb_y / s->gob_index;
2685     int pred_x, pred_y;
2686     if (CONFIG_H263_ENCODER)
2687         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2688     bytestream_put_le32(&ptr, offset);
2689     bytestream_put_byte(&ptr, s->qscale);
2690     bytestream_put_byte(&ptr, gobn);
2691     bytestream_put_le16(&ptr, mba);
2692     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2693     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2694     /* 4MV not implemented */
2695     bytestream_put_byte(&ptr, 0); /* hmv2 */
2696     bytestream_put_byte(&ptr, 0); /* vmv2 */
2697 }
2698
2699 static void update_mb_info(MpegEncContext *s, int startcode)
2700 {
2701     if (!s->mb_info)
2702         return;
2703     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2704         s->mb_info_size += 12;
2705         s->prev_mb_info = s->last_mb_info;
2706     }
2707     if (startcode) {
2708         s->prev_mb_info = put_bits_count(&s->pb)/8;
2709         /* This might have incremented mb_info_size above, and we return without
2710          * actually writing any info into that slot yet. But in that case,
2711          * this will be called again at the start of the after writing the
2712          * start code, actually writing the mb info. */
2713         return;
2714     }
2715
2716     s->last_mb_info = put_bits_count(&s->pb)/8;
2717     if (!s->mb_info_size)
2718         s->mb_info_size += 12;
2719     write_mb_info(s);
2720 }
2721
2722 static int encode_thread(AVCodecContext *c, void *arg){
2723     MpegEncContext *s= *(void**)arg;
2724     int mb_x, mb_y, pdif = 0;
2725     int chr_h= 16>>s->chroma_y_shift;
2726     int i, j;
2727     MpegEncContext best_s, backup_s;
2728     uint8_t bit_buf[2][MAX_MB_BYTES];
2729     uint8_t bit_buf2[2][MAX_MB_BYTES];
2730     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2731     PutBitContext pb[2], pb2[2], tex_pb[2];
2732
2733     ff_check_alignment();
2734
2735     for(i=0; i<2; i++){
2736         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2737         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2738         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2739     }
2740
2741     s->last_bits= put_bits_count(&s->pb);
2742     s->mv_bits=0;
2743     s->misc_bits=0;
2744     s->i_tex_bits=0;
2745     s->p_tex_bits=0;
2746     s->i_count=0;
2747     s->f_count=0;
2748     s->b_count=0;
2749     s->skip_count=0;
2750
2751     for(i=0; i<3; i++){
2752         /* init last dc values */
2753         /* note: quant matrix value (8) is implied here */
2754         s->last_dc[i] = 128 << s->intra_dc_precision;
2755
2756         s->current_picture.error[i] = 0;
2757     }
2758     if(s->codec_id==AV_CODEC_ID_AMV){
2759         s->last_dc[0] = 128*8/13;
2760         s->last_dc[1] = 128*8/14;
2761         s->last_dc[2] = 128*8/14;
2762     }
2763     s->mb_skip_run = 0;
2764     memset(s->last_mv, 0, sizeof(s->last_mv));
2765
2766     s->last_mv_dir = 0;
2767
2768     switch(s->codec_id){
2769     case AV_CODEC_ID_H263:
2770     case AV_CODEC_ID_H263P:
2771     case AV_CODEC_ID_FLV1:
2772         if (CONFIG_H263_ENCODER)
2773             s->gob_index = ff_h263_get_gob_height(s);
2774         break;
2775     case AV_CODEC_ID_MPEG4:
2776         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2777             ff_mpeg4_init_partitions(s);
2778         break;
2779     }
2780
2781     s->resync_mb_x=0;
2782     s->resync_mb_y=0;
2783     s->first_slice_line = 1;
2784     s->ptr_lastgob = s->pb.buf;
2785     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2786         s->mb_x=0;
2787         s->mb_y= mb_y;
2788
2789         ff_set_qscale(s, s->qscale);
2790         ff_init_block_index(s);
2791
2792         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2793             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2794             int mb_type= s->mb_type[xy];
2795 //            int d;
2796             int dmin= INT_MAX;
2797             int dir;
2798
2799             if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES
2800                 && s->slice_context_count == 1
2801                 && s->pb.buf == s->avctx->internal->byte_buffer) {
2802                 int new_size =  s->avctx->internal->byte_buffer_size
2803                               + s->avctx->internal->byte_buffer_size/4
2804                               + s->mb_width*MAX_MB_BYTES;
2805                 int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2806                 int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2807
2808                 uint8_t *new_buffer = NULL;
2809                 int new_buffer_size = 0;
2810
2811                 av_fast_padded_malloc(&new_buffer, &new_buffer_size, new_size);
2812                 if (new_buffer) {
2813                     memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2814                     av_free(s->avctx->internal->byte_buffer);
2815                     s->avctx->internal->byte_buffer      = new_buffer;
2816                     s->avctx->internal->byte_buffer_size = new_buffer_size;
2817                     rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2818                     s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2819                     s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2820                 }
2821             }
2822             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2823                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2824                 return -1;
2825             }
2826             if(s->data_partitioning){
2827                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2828                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2829                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2830                     return -1;
2831                 }
2832             }
2833
2834             s->mb_x = mb_x;
2835             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2836             ff_update_block_index(s);
2837
2838             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2839                 ff_h261_reorder_mb_index(s);
2840                 xy= s->mb_y*s->mb_stride + s->mb_x;
2841                 mb_type= s->mb_type[xy];
2842             }
2843
2844             /* write gob / video packet header  */
2845             if(s->rtp_mode){
2846                 int current_packet_size, is_gob_start;
2847
2848                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2849
2850                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2851
2852                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2853
2854                 switch(s->codec_id){
2855                 case AV_CODEC_ID_H261:
2856                     is_gob_start=0;//FIXME
2857                     break;
2858                 case AV_CODEC_ID_H263:
2859                 case AV_CODEC_ID_H263P:
2860                     if(!s->h263_slice_structured)
2861                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2862                     break;
2863                 case AV_CODEC_ID_MPEG2VIDEO:
2864                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2865                 case AV_CODEC_ID_MPEG1VIDEO:
2866                     if(s->mb_skip_run) is_gob_start=0;
2867                     break;
2868                 case AV_CODEC_ID_MJPEG:
2869                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2870                     break;
2871                 }
2872
2873                 if(is_gob_start){
2874                     if(s->start_mb_y != mb_y || mb_x!=0){
2875                         write_slice_end(s);
2876
2877                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2878                             ff_mpeg4_init_partitions(s);
2879                         }
2880                     }
2881
2882                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2883                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2884
2885                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2886                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2887                         int d = 100 / s->error_rate;
2888                         if(r % d == 0){
2889                             current_packet_size=0;
2890                             s->pb.buf_ptr= s->ptr_lastgob;
2891                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2892                         }
2893                     }
2894
2895                     if (s->avctx->rtp_callback){
2896                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2897                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2898                     }
2899                     update_mb_info(s, 1);
2900
2901                     switch(s->codec_id){
2902                     case AV_CODEC_ID_MPEG4:
2903                         if (CONFIG_MPEG4_ENCODER) {
2904                             ff_mpeg4_encode_video_packet_header(s);
2905                             ff_mpeg4_clean_buffers(s);
2906                         }
2907                     break;
2908                     case AV_CODEC_ID_MPEG1VIDEO:
2909                     case AV_CODEC_ID_MPEG2VIDEO:
2910                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2911                             ff_mpeg1_encode_slice_header(s);
2912                             ff_mpeg1_clean_buffers(s);
2913                         }
2914                     break;
2915                     case AV_CODEC_ID_H263:
2916                     case AV_CODEC_ID_H263P:
2917                         if (CONFIG_H263_ENCODER)
2918                             ff_h263_encode_gob_header(s, mb_y);
2919                     break;
2920                     }
2921
2922                     if(s->flags&CODEC_FLAG_PASS1){
2923                         int bits= put_bits_count(&s->pb);
2924                         s->misc_bits+= bits - s->last_bits;
2925                         s->last_bits= bits;
2926                     }
2927
2928                     s->ptr_lastgob += current_packet_size;
2929                     s->first_slice_line=1;
2930                     s->resync_mb_x=mb_x;
2931                     s->resync_mb_y=mb_y;
2932                 }
2933             }
2934
2935             if(  (s->resync_mb_x   == s->mb_x)
2936                && s->resync_mb_y+1 == s->mb_y){
2937                 s->first_slice_line=0;
2938             }
2939
2940             s->mb_skipped=0;
2941             s->dquant=0; //only for QP_RD
2942
2943             update_mb_info(s, 0);
2944
2945             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2946                 int next_block=0;
2947                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2948
2949                 copy_context_before_encode(&backup_s, s, -1);
2950                 backup_s.pb= s->pb;
2951                 best_s.data_partitioning= s->data_partitioning;
2952                 best_s.partitioned_frame= s->partitioned_frame;
2953                 if(s->data_partitioning){
2954                     backup_s.pb2= s->pb2;
2955                     backup_s.tex_pb= s->tex_pb;
2956                 }
2957
2958                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2959                     s->mv_dir = MV_DIR_FORWARD;
2960                     s->mv_type = MV_TYPE_16X16;
2961                     s->mb_intra= 0;
2962                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2963                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2964                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2965                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2966                 }
2967                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2968                     s->mv_dir = MV_DIR_FORWARD;
2969                     s->mv_type = MV_TYPE_FIELD;
2970                     s->mb_intra= 0;
2971                     for(i=0; i<2; i++){
2972                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2973                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2974                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2975                     }
2976                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2977                                  &dmin, &next_block, 0, 0);
2978                 }
2979                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2980                     s->mv_dir = MV_DIR_FORWARD;
2981                     s->mv_type = MV_TYPE_16X16;
2982                     s->mb_intra= 0;
2983                     s->mv[0][0][0] = 0;
2984                     s->mv[0][0][1] = 0;
2985                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2986                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2987                 }
2988                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2989                     s->mv_dir = MV_DIR_FORWARD;
2990                     s->mv_type = MV_TYPE_8X8;
2991                     s->mb_intra= 0;
2992                     for(i=0; i<4; i++){
2993                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2994                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2995                     }
2996                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2997                                  &dmin, &next_block, 0, 0);
2998                 }
2999                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3000                     s->mv_dir = MV_DIR_FORWARD;
3001                     s->mv_type = MV_TYPE_16X16;
3002                     s->mb_intra= 0;
3003                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3004                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3005                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3006                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3007                 }
3008                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3009                     s->mv_dir = MV_DIR_BACKWARD;
3010                     s->mv_type = MV_TYPE_16X16;
3011                     s->mb_intra= 0;
3012                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3013                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3014                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3015                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3016                 }
3017                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3018                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3019                     s->mv_type = MV_TYPE_16X16;
3020                     s->mb_intra= 0;
3021                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3022                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3023                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3024                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3025                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3026                                  &dmin, &next_block, 0, 0);
3027                 }
3028                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3029                     s->mv_dir = MV_DIR_FORWARD;
3030                     s->mv_type = MV_TYPE_FIELD;
3031                     s->mb_intra= 0;
3032                     for(i=0; i<2; i++){
3033                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3034                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3035                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3036                     }
3037                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3038                                  &dmin, &next_block, 0, 0);
3039                 }
3040                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3041                     s->mv_dir = MV_DIR_BACKWARD;
3042                     s->mv_type = MV_TYPE_FIELD;
3043                     s->mb_intra= 0;
3044                     for(i=0; i<2; i++){
3045                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3046                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3047                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3048                     }
3049                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3050                                  &dmin, &next_block, 0, 0);
3051                 }
3052                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3053                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3054                     s->mv_type = MV_TYPE_FIELD;
3055                     s->mb_intra= 0;
3056                     for(dir=0; dir<2; dir++){
3057                         for(i=0; i<2; i++){
3058                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3059                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3060                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3061                         }
3062                     }
3063                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3064                                  &dmin, &next_block, 0, 0);
3065                 }
3066                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3067                     s->mv_dir = 0;
3068                     s->mv_type = MV_TYPE_16X16;
3069                     s->mb_intra= 1;
3070                     s->mv[0][0][0] = 0;
3071                     s->mv[0][0][1] = 0;
3072                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3073                                  &dmin, &next_block, 0, 0);
3074                     if(s->h263_pred || s->h263_aic){
3075                         if(best_s.mb_intra)
3076                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3077                         else
3078                             ff_clean_intra_table_entries(s); //old mode?
3079                     }
3080                 }
3081
3082                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3083                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3084                         const int last_qp= backup_s.qscale;
3085                         int qpi, qp, dc[6];
3086                         int16_t ac[6][16];
3087                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3088                         static const int dquant_tab[4]={-1,1,-2,2};
3089                         int storecoefs = s->mb_intra && s->dc_val[0];
3090
3091                         av_assert2(backup_s.dquant == 0);
3092
3093                         //FIXME intra
3094                         s->mv_dir= best_s.mv_dir;
3095                         s->mv_type = MV_TYPE_16X16;
3096                         s->mb_intra= best_s.mb_intra;
3097                         s->mv[0][0][0] = best_s.mv[0][0][0];
3098                         s->mv[0][0][1] = best_s.mv[0][0][1];
3099                         s->mv[1][0][0] = best_s.mv[1][0][0];
3100                         s->mv[1][0][1] = best_s.mv[1][0][1];
3101
3102                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3103                         for(; qpi<4; qpi++){
3104                             int dquant= dquant_tab[qpi];
3105                             qp= last_qp + dquant;
3106                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3107                                 continue;
3108                             backup_s.dquant= dquant;
3109                             if(storecoefs){
3110                                 for(i=0; i<6; i++){
3111                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3112                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3113                                 }
3114                             }
3115
3116                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3117                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3118                             if(best_s.qscale != qp){
3119                                 if(storecoefs){
3120                                     for(i=0; i<6; i++){
3121                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3122                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3123                                     }
3124                                 }
3125                             }
3126                         }
3127                     }
3128                 }
3129                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3130                     int mx= s->b_direct_mv_table[xy][0];
3131                     int my= s->b_direct_mv_table[xy][1];
3132
3133                     backup_s.dquant = 0;
3134                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3135                     s->mb_intra= 0;
3136                     ff_mpeg4_set_direct_mv(s, mx, my);
3137                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3138                                  &dmin, &next_block, mx, my);
3139                 }
3140                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3141                     backup_s.dquant = 0;
3142                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3143                     s->mb_intra= 0;
3144                     ff_mpeg4_set_direct_mv(s, 0, 0);
3145                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3146                                  &dmin, &next_block, 0, 0);
3147                 }
3148                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3149                     int coded=0;
3150                     for(i=0; i<6; i++)
3151                         coded |= s->block_last_index[i];
3152                     if(coded){
3153                         int mx,my;
3154                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3155                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3156                             mx=my=0; //FIXME find the one we actually used
3157                             ff_mpeg4_set_direct_mv(s, mx, my);
3158                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3159                             mx= s->mv[1][0][0];
3160                             my= s->mv[1][0][1];
3161                         }else{
3162                             mx= s->mv[0][0][0];
3163                             my= s->mv[0][0][1];
3164                         }
3165
3166                         s->mv_dir= best_s.mv_dir;
3167                         s->mv_type = best_s.mv_type;
3168                         s->mb_intra= 0;
3169 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3170                         s->mv[0][0][1] = best_s.mv[0][0][1];
3171                         s->mv[1][0][0] = best_s.mv[1][0][0];
3172                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3173                         backup_s.dquant= 0;
3174                         s->skipdct=1;
3175                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3176                                         &dmin, &next_block, mx, my);
3177                         s->skipdct=0;
3178                     }
3179                 }
3180
3181                 s->current_picture.qscale_table[xy] = best_s.qscale;
3182
3183                 copy_context_after_encode(s, &best_s, -1);
3184
3185                 pb_bits_count= put_bits_count(&s->pb);
3186                 flush_put_bits(&s->pb);
3187                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3188                 s->pb= backup_s.pb;
3189
3190                 if(s->data_partitioning){
3191                     pb2_bits_count= put_bits_count(&s->pb2);
3192                     flush_put_bits(&s->pb2);
3193                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3194                     s->pb2= backup_s.pb2;
3195
3196                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3197                     flush_put_bits(&s->tex_pb);
3198                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3199                     s->tex_pb= backup_s.tex_pb;
3200                 }
3201                 s->last_bits= put_bits_count(&s->pb);
3202
3203                 if (CONFIG_H263_ENCODER &&
3204                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3205                     ff_h263_update_motion_val(s);
3206
3207                 if(next_block==0){ //FIXME 16 vs linesize16
3208                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3209                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3210                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3211                 }
3212
3213                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3214                     ff_mpv_decode_mb(s, s->block);
3215             } else {
3216                 int motion_x = 0, motion_y = 0;
3217                 s->mv_type=MV_TYPE_16X16;
3218                 // only one MB-Type possible
3219
3220                 switch(mb_type){
3221                 case CANDIDATE_MB_TYPE_INTRA:
3222                     s->mv_dir = 0;
3223                     s->mb_intra= 1;
3224                     motion_x= s->mv[0][0][0] = 0;
3225                     motion_y= s->mv[0][0][1] = 0;
3226                     break;
3227                 case CANDIDATE_MB_TYPE_INTER:
3228                     s->mv_dir = MV_DIR_FORWARD;
3229                     s->mb_intra= 0;
3230                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3231                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3232                     break;
3233                 case CANDIDATE_MB_TYPE_INTER_I:
3234                     s->mv_dir = MV_DIR_FORWARD;
3235                     s->mv_type = MV_TYPE_FIELD;
3236                     s->mb_intra= 0;
3237                     for(i=0; i<2; i++){
3238                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3239                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3240                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3241                     }
3242                     break;
3243                 case CANDIDATE_MB_TYPE_INTER4V:
3244                     s->mv_dir = MV_DIR_FORWARD;
3245                     s->mv_type = MV_TYPE_8X8;
3246                     s->mb_intra= 0;
3247                     for(i=0; i<4; i++){
3248                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3249                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3250                     }
3251                     break;
3252                 case CANDIDATE_MB_TYPE_DIRECT:
3253                     if (CONFIG_MPEG4_ENCODER) {
3254                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3255                         s->mb_intra= 0;
3256                         motion_x=s->b_direct_mv_table[xy][0];
3257                         motion_y=s->b_direct_mv_table[xy][1];
3258                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3259                     }
3260                     break;
3261                 case CANDIDATE_MB_TYPE_DIRECT0:
3262                     if (CONFIG_MPEG4_ENCODER) {
3263                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3264                         s->mb_intra= 0;
3265                         ff_mpeg4_set_direct_mv(s, 0, 0);
3266                     }
3267                     break;
3268                 case CANDIDATE_MB_TYPE_BIDIR:
3269                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3270                     s->mb_intra= 0;
3271                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3272                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3273                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3274                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3275                     break;
3276                 case CANDIDATE_MB_TYPE_BACKWARD:
3277                     s->mv_dir = MV_DIR_BACKWARD;
3278                     s->mb_intra= 0;
3279                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3280                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3281                     break;
3282                 case CANDIDATE_MB_TYPE_FORWARD:
3283                     s->mv_dir = MV_DIR_FORWARD;
3284                     s->mb_intra= 0;
3285                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3286                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3287                     break;
3288                 case CANDIDATE_MB_TYPE_FORWARD_I:
3289                     s->mv_dir = MV_DIR_FORWARD;
3290                     s->mv_type = MV_TYPE_FIELD;
3291                     s->mb_intra= 0;
3292                     for(i=0; i<2; i++){
3293                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3294                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3295                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3296                     }
3297                     break;
3298                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3299                     s->mv_dir = MV_DIR_BACKWARD;
3300                     s->mv_type = MV_TYPE_FIELD;
3301                     s->mb_intra= 0;
3302                     for(i=0; i<2; i++){
3303                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3304                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3305                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3306                     }
3307                     break;
3308                 case CANDIDATE_MB_TYPE_BIDIR_I:
3309                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3310                     s->mv_type = MV_TYPE_FIELD;
3311                     s->mb_intra= 0;
3312                     for(dir=0; dir<2; dir++){
3313                         for(i=0; i<2; i++){
3314                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3315                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3316                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3317                         }
3318                     }
3319                     break;
3320                 default:
3321                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3322                 }
3323
3324                 encode_mb(s, motion_x, motion_y);
3325
3326                 // RAL: Update last macroblock type
3327                 s->last_mv_dir = s->mv_dir;
3328
3329                 if (CONFIG_H263_ENCODER &&
3330                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3331                     ff_h263_update_motion_val(s);
3332
3333                 ff_mpv_decode_mb(s, s->block);
3334             }
3335
3336             /* clean the MV table in IPS frames for direct mode in B frames */
3337             if(s->mb_intra /* && I,P,S_TYPE */){
3338                 s->p_mv_table[xy][0]=0;
3339                 s->p_mv_table[xy][1]=0;
3340             }
3341
3342             if(s->flags&CODEC_FLAG_PSNR){
3343                 int w= 16;
3344                 int h= 16;
3345
3346                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3347                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3348
3349                 s->current_picture.error[0] += sse(
3350                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3351                     s->dest[0], w, h, s->linesize);
3352                 s->current_picture.error[1] += sse(
3353                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3354                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3355                 s->current_picture.error[2] += sse(
3356                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3357                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3358             }
3359             if(s->loop_filter){
3360                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3361                     ff_h263_loop_filter(s);
3362             }
3363             av_dlog(s->avctx, "MB %d %d bits\n",
3364                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3365         }
3366     }
3367
3368     //not beautiful here but we must write it before flushing so it has to be here
3369     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3370         ff_msmpeg4_encode_ext_header(s);
3371
3372     write_slice_end(s);
3373
3374     /* Send the last GOB if RTP */
3375     if (s->avctx->rtp_callback) {
3376         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3377         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3378         /* Call the RTP callback to send the last GOB */
3379         emms_c();
3380         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3381     }
3382
3383     return 0;
3384 }
3385
3386 #define MERGE(field) dst->field += src->field; src->field=0
3387 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3388     MERGE(me.scene_change_score);
3389     MERGE(me.mc_mb_var_sum_temp);
3390     MERGE(me.mb_var_sum_temp);
3391 }
3392
3393 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3394     int i;
3395
3396     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3397     MERGE(dct_count[1]);
3398     MERGE(mv_bits);
3399     MERGE(i_tex_bits);
3400     MERGE(p_tex_bits);
3401     MERGE(i_count);
3402     MERGE(f_count);
3403     MERGE(b_count);
3404     MERGE(skip_count);
3405     MERGE(misc_bits);
3406     MERGE(er.error_count);
3407     MERGE(padding_bug_score);
3408     MERGE(current_picture.error[0]);
3409     MERGE(current_picture.error[1]);
3410     MERGE(current_picture.error[2]);
3411
3412     if(dst->avctx->noise_reduction){
3413         for(i=0; i<64; i++){
3414             MERGE(dct_error_sum[0][i]);
3415             MERGE(dct_error_sum[1][i]);
3416         }
3417     }
3418
3419     assert(put_bits_count(&src->pb) % 8 ==0);
3420     assert(put_bits_count(&dst->pb) % 8 ==0);
3421     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3422     flush_put_bits(&dst->pb);
3423 }
3424
3425 static int estimate_qp(MpegEncContext *s, int dry_run){
3426     if (s->next_lambda){
3427         s->current_picture_ptr->f->quality =
3428         s->current_picture.f->quality = s->next_lambda;
3429         if(!dry_run) s->next_lambda= 0;
3430     } else if (!s->fixed_qscale) {
3431         s->current_picture_ptr->f->quality =
3432         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3433         if (s->current_picture.f->quality < 0)
3434             return -1;
3435     }
3436
3437     if(s->adaptive_quant){
3438         switch(s->codec_id){
3439         case AV_CODEC_ID_MPEG4:
3440             if (CONFIG_MPEG4_ENCODER)
3441                 ff_clean_mpeg4_qscales(s);
3442             break;
3443         case AV_CODEC_ID_H263:
3444         case AV_CODEC_ID_H263P:
3445         case AV_CODEC_ID_FLV1:
3446             if (CONFIG_H263_ENCODER)
3447                 ff_clean_h263_qscales(s);
3448             break;
3449         default:
3450             ff_init_qscale_tab(s);
3451         }
3452
3453         s->lambda= s->lambda_table[0];
3454         //FIXME broken
3455     }else
3456         s->lambda = s->current_picture.f->quality;
3457     update_qscale(s);
3458     return 0;
3459 }
3460
3461 /* must be called before writing the header */
3462 static void set_frame_distances(MpegEncContext * s){
3463     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3464     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3465
3466     if(s->pict_type==AV_PICTURE_TYPE_B){
3467         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3468         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3469     }else{
3470         s->pp_time= s->time - s->last_non_b_time;
3471         s->last_non_b_time= s->time;
3472         assert(s->picture_number==0 || s->pp_time > 0);
3473     }
3474 }
3475
3476 static int encode_picture(MpegEncContext *s, int picture_number)
3477 {
3478     int i, ret;
3479     int bits;
3480     int context_count = s->slice_context_count;
3481
3482     s->picture_number = picture_number;
3483
3484     /* Reset the average MB variance */
3485     s->me.mb_var_sum_temp    =
3486     s->me.mc_mb_var_sum_temp = 0;
3487
3488     /* we need to initialize some time vars before we can encode b-frames */
3489     // RAL: Condition added for MPEG1VIDEO
3490     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3491         set_frame_distances(s);
3492     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3493         ff_set_mpeg4_time(s);
3494
3495     s->me.scene_change_score=0;
3496
3497 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3498
3499     if(s->pict_type==AV_PICTURE_TYPE_I){
3500         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3501         else                        s->no_rounding=0;
3502     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3503         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3504             s->no_rounding ^= 1;
3505     }
3506
3507     if(s->flags & CODEC_FLAG_PASS2){
3508         if (estimate_qp(s,1) < 0)
3509             return -1;
3510         ff_get_2pass_fcode(s);
3511     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3512         if(s->pict_type==AV_PICTURE_TYPE_B)
3513             s->lambda= s->last_lambda_for[s->pict_type];
3514         else
3515             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3516         update_qscale(s);
3517     }
3518
3519     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3520         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3521         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3522         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3523         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3524     }
3525
3526     s->mb_intra=0; //for the rate distortion & bit compare functions
3527     for(i=1; i<context_count; i++){
3528         ret = ff_update_duplicate_context(s->thread_context[i], s);
3529         if (ret < 0)
3530             return ret;
3531     }
3532
3533     if(ff_init_me(s)<0)
3534         return -1;
3535
3536     /* Estimate motion for every MB */
3537     if(s->pict_type != AV_PICTURE_TYPE_I){
3538         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3539         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3540         if (s->pict_type != AV_PICTURE_TYPE_B) {
3541             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3542                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3543             }
3544         }
3545
3546         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3547     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3548         /* I-Frame */
3549         for(i=0; i<s->mb_stride*s->mb_height; i++)
3550             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3551
3552         if(!s->fixed_qscale){
3553             /* finding spatial complexity for I-frame rate control */
3554             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3555         }
3556     }
3557     for(i=1; i<context_count; i++){
3558         merge_context_after_me(s, s->thread_context[i]);
3559     }
3560     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3561     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3562     emms_c();
3563
3564     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3565         s->pict_type= AV_PICTURE_TYPE_I;
3566         for(i=0; i<s->mb_stride*s->mb_height; i++)
3567             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3568         if(s->msmpeg4_version >= 3)
3569             s->no_rounding=1;
3570         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3571                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3572     }
3573
3574     if(!s->umvplus){
3575         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3576             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3577
3578             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3579                 int a,b;
3580                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3581                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3582                 s->f_code= FFMAX3(s->f_code, a, b);
3583             }
3584
3585             ff_fix_long_p_mvs(s);
3586             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3587             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3588                 int j;
3589                 for(i=0; i<2; i++){
3590                     for(j=0; j<2; j++)
3591                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3592                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3593                 }
3594             }
3595         }
3596
3597         if(s->pict_type==AV_PICTURE_TYPE_B){
3598             int a, b;
3599
3600             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3601             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3602             s->f_code = FFMAX(a, b);
3603
3604             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3605             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3606             s->b_code = FFMAX(a, b);
3607
3608             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3609             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3610             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3611             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3612             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3613                 int dir, j;
3614                 for(dir=0; dir<2; dir++){
3615                     for(i=0; i<2; i++){
3616                         for(j=0; j<2; j++){
3617                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3618                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3619                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3620                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3621                         }
3622                     }
3623                 }
3624             }
3625         }
3626     }
3627
3628     if (estimate_qp(s, 0) < 0)
3629         return -1;
3630
3631     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3632         s->qscale= 3; //reduce clipping problems
3633
3634     if (s->out_format == FMT_MJPEG) {
3635         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3636         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3637
3638         if (s->avctx->intra_matrix) {
3639             chroma_matrix =
3640             luma_matrix = s->avctx->intra_matrix;
3641         }
3642         if (s->avctx->chroma_intra_matrix)
3643             chroma_matrix = s->avctx->chroma_intra_matrix;
3644
3645         /* for mjpeg, we do include qscale in the matrix */
3646         for(i=1;i<64;i++){
3647             int j = s->idsp.idct_permutation[i];
3648
3649             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3650             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3651         }
3652         s->y_dc_scale_table=
3653         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3654         s->chroma_intra_matrix[0] =
3655         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3656         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3657                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3658         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3659                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3660         s->qscale= 8;
3661     }
3662     if(s->codec_id == AV_CODEC_ID_AMV){
3663         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3664         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3665         for(i=1;i<64;i++){
3666             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3667
3668             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3669             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3670         }
3671         s->y_dc_scale_table= y;
3672         s->c_dc_scale_table= c;
3673         s->intra_matrix[0] = 13;
3674         s->chroma_intra_matrix[0] = 14;
3675         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3676                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3677         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3678                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3679         s->qscale= 8;
3680     }
3681
3682     //FIXME var duplication
3683     s->current_picture_ptr->f->key_frame =
3684     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3685     s->current_picture_ptr->f->pict_type =
3686     s->current_picture.f->pict_type = s->pict_type;
3687
3688     if (s->current_picture.f->key_frame)
3689         s->picture_in_gop_number=0;
3690
3691     s->mb_x = s->mb_y = 0;
3692     s->last_bits= put_bits_count(&s->pb);
3693     switch(s->out_format) {
3694     case FMT_MJPEG:
3695         if (CONFIG_MJPEG_ENCODER)
3696             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3697                                            s->intra_matrix, s->chroma_intra_matrix);
3698         break;
3699     case FMT_H261:
3700         if (CONFIG_H261_ENCODER)
3701             ff_h261_encode_picture_header(s, picture_number);
3702         break;
3703     case FMT_H263:
3704         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3705             ff_wmv2_encode_picture_header(s, picture_number);
3706         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3707             ff_msmpeg4_encode_picture_header(s, picture_number);
3708         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3709             ff_mpeg4_encode_picture_header(s, picture_number);
3710         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3711             ff_rv10_encode_picture_header(s, picture_number);
3712         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3713             ff_rv20_encode_picture_header(s, picture_number);
3714         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3715             ff_flv_encode_picture_header(s, picture_number);
3716         else if (CONFIG_H263_ENCODER)
3717             ff_h263_encode_picture_header(s, picture_number);
3718         break;
3719     case FMT_MPEG1:
3720         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3721             ff_mpeg1_encode_picture_header(s, picture_number);
3722         break;
3723     default:
3724         av_assert0(0);
3725     }
3726     bits= put_bits_count(&s->pb);
3727     s->header_bits= bits - s->last_bits;
3728
3729     for(i=1; i<context_count; i++){
3730         update_duplicate_context_after_me(s->thread_context[i], s);
3731     }
3732     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3733     for(i=1; i<context_count; i++){
3734         merge_context_after_encode(s, s->thread_context[i]);
3735     }
3736     emms_c();
3737     return 0;
3738 }
3739
3740 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3741     const int intra= s->mb_intra;
3742     int i;
3743
3744     s->dct_count[intra]++;
3745
3746     for(i=0; i<64; i++){
3747         int level= block[i];
3748
3749         if(level){
3750             if(level>0){
3751                 s->dct_error_sum[intra][i] += level;
3752                 level -= s->dct_offset[intra][i];
3753                 if(level<0) level=0;
3754             }else{
3755                 s->dct_error_sum[intra][i] -= level;
3756                 level += s->dct_offset[intra][i];
3757                 if(level>0) level=0;
3758             }
3759             block[i]= level;
3760         }
3761     }
3762 }
3763
3764 static int dct_quantize_trellis_c(MpegEncContext *s,
3765                                   int16_t *block, int n,
3766                                   int qscale, int *overflow){
3767     const int *qmat;
3768     const uint8_t *scantable= s->intra_scantable.scantable;
3769     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3770     int max=0;
3771     unsigned int threshold1, threshold2;
3772     int bias=0;
3773     int run_tab[65];
3774     int level_tab[65];
3775     int score_tab[65];
3776     int survivor[65];
3777     int survivor_count;
3778     int last_run=0;
3779     int last_level=0;
3780     int last_score= 0;
3781     int last_i;
3782     int coeff[2][64];
3783     int coeff_count[64];
3784     int qmul, qadd, start_i, last_non_zero, i, dc;
3785     const int esc_length= s->ac_esc_length;
3786     uint8_t * length;
3787     uint8_t * last_length;
3788     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3789
3790     s->fdsp.fdct(block);
3791
3792     if(s->dct_error_sum)
3793         s->denoise_dct(s, block);
3794     qmul= qscale*16;
3795     qadd= ((qscale-1)|1)*8;
3796
3797     if (s->mb_intra) {
3798         int q;
3799         if (!s->h263_aic) {
3800             if (n < 4)
3801                 q = s->y_dc_scale;
3802             else
3803                 q = s->c_dc_scale;
3804             q = q << 3;
3805         } else{
3806             /* For AIC we skip quant/dequant of INTRADC */
3807             q = 1 << 3;
3808             qadd=0;
3809         }
3810
3811         /* note: block[0] is assumed to be positive */
3812         block[0] = (block[0] + (q >> 1)) / q;
3813         start_i = 1;
3814         last_non_zero = 0;
3815         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3816         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3817             bias= 1<<(QMAT_SHIFT-1);
3818         length     = s->intra_ac_vlc_length;
3819         last_length= s->intra_ac_vlc_last_length;
3820     } else {
3821         start_i = 0;
3822         last_non_zero = -1;
3823         qmat = s->q_inter_matrix[qscale];
3824         length     = s->inter_ac_vlc_length;
3825         last_length= s->inter_ac_vlc_last_length;
3826     }
3827     last_i= start_i;
3828
3829     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3830     threshold2= (threshold1<<1);
3831
3832     for(i=63; i>=start_i; i--) {
3833         const int j = scantable[i];
3834         int level = block[j] * qmat[j];
3835
3836         if(((unsigned)(level+threshold1))>threshold2){
3837             last_non_zero = i;
3838             break;
3839         }
3840     }
3841
3842     for(i=start_i; i<=last_non_zero; i++) {
3843         const int j = scantable[i];
3844         int level = block[j] * qmat[j];
3845
3846 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3847 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3848         if(((unsigned)(level+threshold1))>threshold2){
3849             if(level>0){
3850                 level= (bias + level)>>QMAT_SHIFT;
3851                 coeff[0][i]= level;
3852                 coeff[1][i]= level-1;
3853 //                coeff[2][k]= level-2;
3854             }else{
3855                 level= (bias - level)>>QMAT_SHIFT;
3856                 coeff[0][i]= -level;
3857                 coeff[1][i]= -level+1;
3858 //                coeff[2][k]= -level+2;
3859             }
3860             coeff_count[i]= FFMIN(level, 2);
3861             av_assert2(coeff_count[i]);
3862             max |=level;
3863         }else{
3864             coeff[0][i]= (level>>31)|1;
3865             coeff_count[i]= 1;
3866         }
3867     }
3868
3869     *overflow= s->max_qcoeff < max; //overflow might have happened
3870
3871     if(last_non_zero < start_i){
3872         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3873         return last_non_zero;
3874     }
3875
3876     score_tab[start_i]= 0;
3877     survivor[0]= start_i;
3878     survivor_count= 1;
3879
3880     for(i=start_i; i<=last_non_zero; i++){
3881         int level_index, j, zero_distortion;
3882         int dct_coeff= FFABS(block[ scantable[i] ]);
3883         int best_score=256*256*256*120;
3884
3885         if (s->fdsp.fdct == ff_fdct_ifast)
3886             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3887         zero_distortion= dct_coeff*dct_coeff;
3888
3889         for(level_index=0; level_index < coeff_count[i]; level_index++){
3890             int distortion;
3891             int level= coeff[level_index][i];
3892             const int alevel= FFABS(level);
3893             int unquant_coeff;
3894
3895             av_assert2(level);
3896
3897             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3898                 unquant_coeff= alevel*qmul + qadd;
3899             }else{ //MPEG1
3900                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3901                 if(s->mb_intra){
3902                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3903                         unquant_coeff =   (unquant_coeff - 1) | 1;
3904                 }else{
3905                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3906                         unquant_coeff =   (unquant_coeff - 1) | 1;
3907                 }
3908                 unquant_coeff<<= 3;
3909             }
3910
3911             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3912             level+=64;
3913             if((level&(~127)) == 0){
3914                 for(j=survivor_count-1; j>=0; j--){
3915                     int run= i - survivor[j];
3916                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3917                     score += score_tab[i-run];
3918
3919                     if(score < best_score){
3920                         best_score= score;
3921                         run_tab[i+1]= run;
3922                         level_tab[i+1]= level-64;
3923                     }
3924                 }
3925
3926                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3927                     for(j=survivor_count-1; j>=0; j--){
3928                         int run= i - survivor[j];
3929                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3930                         score += score_tab[i-run];
3931                         if(score < last_score){
3932                             last_score= score;
3933                             last_run= run;
3934                             last_level= level-64;
3935                             last_i= i+1;
3936                         }
3937                     }
3938                 }
3939             }else{
3940                 distortion += esc_length*lambda;
3941                 for(j=survivor_count-1; j>=0; j--){
3942                     int run= i - survivor[j];
3943                     int score= distortion + score_tab[i-run];
3944
3945                     if(score < best_score){
3946                         best_score= score;
3947                         run_tab[i+1]= run;
3948                         level_tab[i+1]= level-64;
3949                     }
3950                 }
3951
3952                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3953                   for(j=survivor_count-1; j>=0; j--){
3954                         int run= i - survivor[j];
3955                         int score= distortion + score_tab[i-run];
3956                         if(score < last_score){
3957                             last_score= score;
3958                             last_run= run;
3959                             last_level= level-64;
3960                             last_i= i+1;
3961                         }
3962                     }
3963                 }
3964             }
3965         }
3966
3967         score_tab[i+1]= best_score;
3968
3969         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3970         if(last_non_zero <= 27){
3971             for(; survivor_count; survivor_count--){
3972                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3973                     break;
3974             }
3975         }else{
3976             for(; survivor_count; survivor_count--){
3977                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3978                     break;
3979             }
3980         }
3981
3982         survivor[ survivor_count++ ]= i+1;
3983     }
3984
3985     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3986         last_score= 256*256*256*120;
3987         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3988             int score= score_tab[i];
3989             if(i) score += lambda*2; //FIXME exacter?
3990
3991             if(score < last_score){
3992                 last_score= score;
3993                 last_i= i;
3994                 last_level= level_tab[i];
3995                 last_run= run_tab[i];
3996             }
3997         }
3998     }
3999
4000     s->coded_score[n] = last_score;
4001
4002     dc= FFABS(block[0]);
4003     last_non_zero= last_i - 1;
4004     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4005
4006     if(last_non_zero < start_i)
4007         return last_non_zero;
4008
4009     if(last_non_zero == 0 && start_i == 0){
4010         int best_level= 0;
4011         int best_score= dc * dc;
4012
4013         for(i=0; i<coeff_count[0]; i++){
4014             int level= coeff[i][0];
4015             int alevel= FFABS(level);
4016             int unquant_coeff, score, distortion;
4017
4018             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4019                     unquant_coeff= (alevel*qmul + qadd)>>3;
4020             }else{ //MPEG1
4021                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4022                     unquant_coeff =   (unquant_coeff - 1) | 1;
4023             }
4024             unquant_coeff = (unquant_coeff + 4) >> 3;
4025             unquant_coeff<<= 3 + 3;
4026
4027             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4028             level+=64;
4029             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4030             else                    score= distortion + esc_length*lambda;
4031
4032             if(score < best_score){
4033                 best_score= score;
4034                 best_level= level - 64;
4035             }
4036         }
4037         block[0]= best_level;
4038         s->coded_score[n] = best_score - dc*dc;
4039         if(best_level == 0) return -1;
4040         else                return last_non_zero;
4041     }
4042
4043     i= last_i;
4044     av_assert2(last_level);
4045
4046     block[ perm_scantable[last_non_zero] ]= last_level;
4047     i -= last_run + 1;
4048
4049     for(; i>start_i; i -= run_tab[i] + 1){
4050         block[ perm_scantable[i-1] ]= level_tab[i];
4051     }
4052
4053     return last_non_zero;
4054 }
4055
4056 //#define REFINE_STATS 1
4057 static int16_t basis[64][64];
4058
4059 static void build_basis(uint8_t *perm){
4060     int i, j, x, y;
4061     emms_c();
4062     for(i=0; i<8; i++){
4063         for(j=0; j<8; j++){
4064             for(y=0; y<8; y++){
4065                 for(x=0; x<8; x++){
4066                     double s= 0.25*(1<<BASIS_SHIFT);
4067                     int index= 8*i + j;
4068                     int perm_index= perm[index];
4069                     if(i==0) s*= sqrt(0.5);
4070                     if(j==0) s*= sqrt(0.5);
4071                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4072                 }
4073             }
4074         }
4075     }
4076 }
4077
4078 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4079                         int16_t *block, int16_t *weight, int16_t *orig,
4080                         int n, int qscale){
4081     int16_t rem[64];
4082     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4083     const uint8_t *scantable= s->intra_scantable.scantable;
4084     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4085 //    unsigned int threshold1, threshold2;
4086 //    int bias=0;
4087     int run_tab[65];
4088     int prev_run=0;
4089     int prev_level=0;
4090     int qmul, qadd, start_i, last_non_zero, i, dc;
4091     uint8_t * length;
4092     uint8_t * last_length;
4093     int lambda;
4094     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4095 #ifdef REFINE_STATS
4096 static int count=0;
4097 static int after_last=0;
4098 static int to_zero=0;
4099 static int from_zero=0;
4100 static int raise=0;
4101 static int lower=0;
4102 static int messed_sign=0;
4103 #endif
4104
4105     if(basis[0][0] == 0)
4106         build_basis(s->idsp.idct_permutation);
4107
4108     qmul= qscale*2;
4109     qadd= (qscale-1)|1;
4110     if (s->mb_intra) {
4111         if (!s->h263_aic) {
4112             if (n < 4)
4113                 q = s->y_dc_scale;
4114             else
4115                 q = s->c_dc_scale;
4116         } else{
4117             /* For AIC we skip quant/dequant of INTRADC */
4118             q = 1;
4119             qadd=0;
4120         }
4121         q <<= RECON_SHIFT-3;
4122         /* note: block[0] is assumed to be positive */
4123         dc= block[0]*q;
4124 //        block[0] = (block[0] + (q >> 1)) / q;
4125         start_i = 1;
4126 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4127 //            bias= 1<<(QMAT_SHIFT-1);
4128         length     = s->intra_ac_vlc_length;
4129         last_length= s->intra_ac_vlc_last_length;
4130     } else {
4131         dc= 0;
4132         start_i = 0;
4133         length     = s->inter_ac_vlc_length;
4134         last_length= s->inter_ac_vlc_last_length;
4135     }
4136     last_non_zero = s->block_last_index[n];
4137
4138 #ifdef REFINE_STATS
4139 {START_TIMER
4140 #endif
4141     dc += (1<<(RECON_SHIFT-1));
4142     for(i=0; i<64; i++){
4143         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4144     }
4145 #ifdef REFINE_STATS
4146 STOP_TIMER("memset rem[]")}
4147 #endif
4148     sum=0;
4149     for(i=0; i<64; i++){
4150         int one= 36;
4151         int qns=4;
4152         int w;
4153
4154         w= FFABS(weight[i]) + qns*one;
4155         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4156
4157         weight[i] = w;
4158 //        w=weight[i] = (63*qns + (w/2)) / w;
4159
4160         av_assert2(w>0);
4161         av_assert2(w<(1<<6));
4162         sum += w*w;
4163     }
4164     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4165 #ifdef REFINE_STATS
4166 {START_TIMER
4167 #endif
4168     run=0;
4169     rle_index=0;
4170     for(i=start_i; i<=last_non_zero; i++){
4171         int j= perm_scantable[i];
4172         const int level= block[j];
4173         int coeff;
4174
4175         if(level){
4176             if(level<0) coeff= qmul*level - qadd;
4177             else        coeff= qmul*level + qadd;
4178             run_tab[rle_index++]=run;
4179             run=0;
4180
4181             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4182         }else{
4183             run++;
4184         }
4185     }
4186 #ifdef REFINE_STATS
4187 if(last_non_zero>0){
4188 STOP_TIMER("init rem[]")
4189 }
4190 }
4191
4192 {START_TIMER
4193 #endif
4194     for(;;){
4195         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4196         int best_coeff=0;
4197         int best_change=0;
4198         int run2, best_unquant_change=0, analyze_gradient;
4199 #ifdef REFINE_STATS
4200 {START_TIMER
4201 #endif
4202         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4203
4204         if(analyze_gradient){
4205 #ifdef REFINE_STATS
4206 {START_TIMER
4207 #endif
4208             for(i=0; i<64; i++){
4209                 int w= weight[i];
4210
4211                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4212             }
4213 #ifdef REFINE_STATS
4214 STOP_TIMER("rem*w*w")}
4215 {START_TIMER
4216 #endif
4217             s->fdsp.fdct(d1);
4218 #ifdef REFINE_STATS
4219 STOP_TIMER("dct")}
4220 #endif
4221         }
4222
4223         if(start_i){
4224             const int level= block[0];
4225             int change, old_coeff;
4226
4227             av_assert2(s->mb_intra);
4228
4229             old_coeff= q*level;
4230
4231             for(change=-1; change<=1; change+=2){
4232                 int new_level= level + change;
4233                 int score, new_coeff;
4234
4235                 new_coeff= q*new_level;
4236                 if(new_coeff >= 2048 || new_coeff < 0)
4237                     continue;
4238
4239                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4240                                                   new_coeff - old_coeff);
4241                 if(score<best_score){
4242                     best_score= score;
4243                     best_coeff= 0;
4244                     best_change= change;
4245                     best_unquant_change= new_coeff - old_coeff;
4246                 }
4247             }
4248         }
4249
4250         run=0;
4251         rle_index=0;
4252         run2= run_tab[rle_index++];
4253         prev_level=0;
4254         prev_run=0;
4255
4256         for(i=start_i; i<64; i++){
4257             int j= perm_scantable[i];
4258             const int level= block[j];
4259             int change, old_coeff;
4260
4261             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4262                 break;
4263
4264             if(level){
4265                 if(level<0) old_coeff= qmul*level - qadd;
4266                 else        old_coeff= qmul*level + qadd;
4267                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4268             }else{
4269                 old_coeff=0;
4270                 run2--;
4271                 av_assert2(run2>=0 || i >= last_non_zero );
4272             }
4273
4274             for(change=-1; change<=1; change+=2){
4275                 int new_level= level + change;
4276                 int score, new_coeff, unquant_change;
4277
4278                 score=0;
4279                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4280                    continue;
4281
4282                 if(new_level){
4283                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4284                     else            new_coeff= qmul*new_level + qadd;
4285                     if(new_coeff >= 2048 || new_coeff <= -2048)
4286                         continue;
4287                     //FIXME check for overflow
4288
4289                     if(level){
4290                         if(level < 63 && level > -63){
4291                             if(i < last_non_zero)
4292                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4293                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4294                             else
4295                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4296                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4297                         }
4298                     }else{
4299                         av_assert2(FFABS(new_level)==1);
4300
4301                         if(analyze_gradient){
4302                             int g= d1[ scantable[i] ];
4303                             if(g && (g^new_level) >= 0)
4304                                 continue;
4305                         }
4306
4307                         if(i < last_non_zero){
4308                             int next_i= i + run2 + 1;
4309                             int next_level= block[ perm_scantable[next_i] ] + 64;
4310
4311                             if(next_level&(~127))
4312                                 next_level= 0;
4313
4314                             if(next_i < last_non_zero)
4315                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4316                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4317                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4318                             else
4319                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4320                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4321                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4322                         }else{
4323                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4324                             if(prev_level){
4325                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4326                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4327                             }
4328                         }
4329                     }
4330                 }else{
4331                     new_coeff=0;
4332                     av_assert2(FFABS(level)==1);
4333
4334                     if(i < last_non_zero){
4335                         int next_i= i + run2 + 1;
4336                         int next_level= block[ perm_scantable[next_i] ] + 64;
4337
4338                         if(next_level&(~127))
4339                             next_level= 0;
4340
4341                         if(next_i < last_non_zero)
4342                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4343                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4344                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4345                         else
4346                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4347                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4348                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4349                     }else{
4350                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4351                         if(prev_level){
4352                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4353                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4354                         }
4355                     }
4356                 }
4357
4358                 score *= lambda;
4359
4360                 unquant_change= new_coeff - old_coeff;
4361                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4362
4363                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4364                                                    unquant_change);
4365                 if(score<best_score){
4366                     best_score= score;
4367                     best_coeff= i;
4368                     best_change= change;
4369                     best_unquant_change= unquant_change;
4370                 }
4371             }
4372             if(level){
4373                 prev_level= level + 64;
4374                 if(prev_level&(~127))
4375                     prev_level= 0;
4376                 prev_run= run;
4377                 run=0;
4378             }else{
4379                 run++;
4380             }
4381         }
4382 #ifdef REFINE_STATS
4383 STOP_TIMER("iterative step")}
4384 #endif
4385
4386         if(best_change){
4387             int j= perm_scantable[ best_coeff ];
4388
4389             block[j] += best_change;
4390
4391             if(best_coeff > last_non_zero){
4392                 last_non_zero= best_coeff;
4393                 av_assert2(block[j]);
4394 #ifdef REFINE_STATS
4395 after_last++;
4396 #endif
4397             }else{
4398 #ifdef REFINE_STATS
4399 if(block[j]){
4400     if(block[j] - best_change){
4401         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4402             raise++;
4403         }else{
4404             lower++;
4405         }
4406     }else{
4407         from_zero++;
4408     }
4409 }else{
4410     to_zero++;
4411 }
4412 #endif
4413                 for(; last_non_zero>=start_i; last_non_zero--){
4414                     if(block[perm_scantable[last_non_zero]])
4415                         break;
4416                 }
4417             }
4418 #ifdef REFINE_STATS
4419 count++;
4420 if(256*256*256*64 % count == 0){
4421     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4422 }
4423 #endif
4424             run=0;
4425             rle_index=0;
4426             for(i=start_i; i<=last_non_zero; i++){
4427                 int j= perm_scantable[i];
4428                 const int level= block[j];
4429
4430                  if(level){
4431                      run_tab[rle_index++]=run;
4432                      run=0;
4433                  }else{
4434                      run++;
4435                  }
4436             }
4437
4438             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4439         }else{
4440             break;
4441         }
4442     }
4443 #ifdef REFINE_STATS
4444 if(last_non_zero>0){
4445 STOP_TIMER("iterative search")
4446 }
4447 }
4448 #endif
4449
4450     return last_non_zero;
4451 }
4452
4453 int ff_dct_quantize_c(MpegEncContext *s,
4454                         int16_t *block, int n,
4455                         int qscale, int *overflow)
4456 {
4457     int i, j, level, last_non_zero, q, start_i;
4458     const int *qmat;
4459     const uint8_t *scantable= s->intra_scantable.scantable;
4460     int bias;
4461     int max=0;
4462     unsigned int threshold1, threshold2;
4463
4464     s->fdsp.fdct(block);
4465
4466     if(s->dct_error_sum)
4467         s->denoise_dct(s, block);
4468
4469     if (s->mb_intra) {
4470         if (!s->h263_aic) {
4471             if (n < 4)
4472                 q = s->y_dc_scale;
4473             else
4474                 q = s->c_dc_scale;
4475             q = q << 3;
4476         } else
4477             /* For AIC we skip quant/dequant of INTRADC */
4478             q = 1 << 3;
4479
4480         /* note: block[0] is assumed to be positive */
4481         block[0] = (block[0] + (q >> 1)) / q;
4482         start_i = 1;
4483         last_non_zero = 0;
4484         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4485         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4486     } else {
4487         start_i = 0;
4488         last_non_zero = -1;
4489         qmat = s->q_inter_matrix[qscale];
4490         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4491     }
4492     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4493     threshold2= (threshold1<<1);
4494     for(i=63;i>=start_i;i--) {
4495         j = scantable[i];
4496         level = block[j] * qmat[j];
4497
4498         if(((unsigned)(level+threshold1))>threshold2){
4499             last_non_zero = i;
4500             break;
4501         }else{
4502             block[j]=0;
4503         }
4504     }
4505     for(i=start_i; i<=last_non_zero; i++) {
4506         j = scantable[i];
4507         level = block[j] * qmat[j];
4508
4509 //        if(   bias+level >= (1<<QMAT_SHIFT)
4510 //           || bias-level >= (1<<QMAT_SHIFT)){
4511         if(((unsigned)(level+threshold1))>threshold2){
4512             if(level>0){
4513                 level= (bias + level)>>QMAT_SHIFT;
4514                 block[j]= level;
4515             }else{
4516                 level= (bias - level)>>QMAT_SHIFT;
4517                 block[j]= -level;
4518             }
4519             max |=level;
4520         }else{
4521             block[j]=0;
4522         }
4523     }
4524     *overflow= s->max_qcoeff < max; //overflow might have happened
4525
4526     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4527     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4528         ff_block_permute(block, s->idsp.idct_permutation,
4529                          scantable, last_non_zero);
4530
4531     return last_non_zero;
4532 }
4533
4534 #define OFFSET(x) offsetof(MpegEncContext, x)
4535 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4536 static const AVOption h263_options[] = {
4537     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4538     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4539     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4540     FF_MPV_COMMON_OPTS
4541     { NULL },
4542 };
4543
4544 static const AVClass h263_class = {
4545     .class_name = "H.263 encoder",
4546     .item_name  = av_default_item_name,
4547     .option     = h263_options,
4548     .version    = LIBAVUTIL_VERSION_INT,
4549 };
4550
4551 AVCodec ff_h263_encoder = {
4552     .name           = "h263",
4553     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4554     .type           = AVMEDIA_TYPE_VIDEO,
4555     .id             = AV_CODEC_ID_H263,
4556     .priv_data_size = sizeof(MpegEncContext),
4557     .init           = ff_mpv_encode_init,
4558     .encode2        = ff_mpv_encode_picture,
4559     .close          = ff_mpv_encode_end,
4560     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4561     .priv_class     = &h263_class,
4562 };
4563
4564 static const AVOption h263p_options[] = {
4565     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4566     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4567     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4568     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4569     FF_MPV_COMMON_OPTS
4570     { NULL },
4571 };
4572 static const AVClass h263p_class = {
4573     .class_name = "H.263p encoder",
4574     .item_name  = av_default_item_name,
4575     .option     = h263p_options,
4576     .version    = LIBAVUTIL_VERSION_INT,
4577 };
4578
4579 AVCodec ff_h263p_encoder = {
4580     .name           = "h263p",
4581     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4582     .type           = AVMEDIA_TYPE_VIDEO,
4583     .id             = AV_CODEC_ID_H263P,
4584     .priv_data_size = sizeof(MpegEncContext),
4585     .init           = ff_mpv_encode_init,
4586     .encode2        = ff_mpv_encode_picture,
4587     .close          = ff_mpv_encode_end,
4588     .capabilities   = CODEC_CAP_SLICE_THREADS,
4589     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4590     .priv_class     = &h263p_class,
4591 };
4592
4593 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4594
4595 AVCodec ff_msmpeg4v2_encoder = {
4596     .name           = "msmpeg4v2",
4597     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4598     .type           = AVMEDIA_TYPE_VIDEO,
4599     .id             = AV_CODEC_ID_MSMPEG4V2,
4600     .priv_data_size = sizeof(MpegEncContext),
4601     .init           = ff_mpv_encode_init,
4602     .encode2        = ff_mpv_encode_picture,
4603     .close          = ff_mpv_encode_end,
4604     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4605     .priv_class     = &msmpeg4v2_class,
4606 };
4607
4608 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4609
4610 AVCodec ff_msmpeg4v3_encoder = {
4611     .name           = "msmpeg4",
4612     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4613     .type           = AVMEDIA_TYPE_VIDEO,
4614     .id             = AV_CODEC_ID_MSMPEG4V3,
4615     .priv_data_size = sizeof(MpegEncContext),
4616     .init           = ff_mpv_encode_init,
4617     .encode2        = ff_mpv_encode_picture,
4618     .close          = ff_mpv_encode_end,
4619     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4620     .priv_class     = &msmpeg4v3_class,
4621 };
4622
4623 FF_MPV_GENERIC_CLASS(wmv1)
4624
4625 AVCodec ff_wmv1_encoder = {
4626     .name           = "wmv1",
4627     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4628     .type           = AVMEDIA_TYPE_VIDEO,
4629     .id             = AV_CODEC_ID_WMV1,
4630     .priv_data_size = sizeof(MpegEncContext),
4631     .init           = ff_mpv_encode_init,
4632     .encode2        = ff_mpv_encode_picture,
4633     .close          = ff_mpv_encode_end,
4634     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4635     .priv_class     = &wmv1_class,
4636 };